diff options
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/htfuzzy/Synonym.cc')
-rw-r--r-- | debian/htdig/htdig-3.2.0b6/htfuzzy/Synonym.cc | 225 |
1 files changed, 225 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/htfuzzy/Synonym.cc b/debian/htdig/htdig-3.2.0b6/htfuzzy/Synonym.cc new file mode 100644 index 00000000..234312a4 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/htfuzzy/Synonym.cc @@ -0,0 +1,225 @@ +// +// Synonym.cc +// +// Synonym: A fuzzy matching algorithm to create a database of related words +// (or misspellings) that should be searched together. +// +// Part of the ht://Dig package <http://www.htdig.org/> +// Copyright (c) 1995-2004 The ht://Dig Group +// For copyright details, see the file COPYING in your distribution +// or the GNU Library General Public License (LGPL) version 2 or later +// <http://www.gnu.org/copyleft/lgpl.html> +// +// $Id: Synonym.cc,v 1.16 2004/05/28 13:15:20 lha Exp $ +// + +#ifdef HAVE_CONFIG_H +#include "htconfig.h" +#endif /* HAVE_CONFIG_H */ + +#include <fcntl.h> + +#include "Synonym.h" +#include "htfuzzy.h" +#include "List.h" +#include "StringList.h" +#include "HtConfiguration.h" + +#include "filecopy.h" + +#include <stdio.h> +#include <stdlib.h> +#include <sys/stat.h> + +#ifdef HAVE_STD +#include <fstream> +#ifdef HAVE_NAMESPACES +using namespace std; +#endif +#else +#include <fstream.h> +#endif /* HAVE_STD */ + +//***************************************************************************** +Synonym::Synonym(const HtConfiguration& config_arg) : + Fuzzy(config_arg) +{ + name = "synonyms"; + db = 0; +} + + +//***************************************************************************** +Synonym::~Synonym() +{ + if (db) + { + db->Close(); + delete db; + db = 0; + } +} + + +//***************************************************************************** +int +Synonym::createDB(const HtConfiguration &config) +{ + String tmpdir = getenv("TMPDIR"); + String dbFile; + +#if defined(LIBHTDIG) || defined(LIBHTDIGPHP) || defined(_MSC_VER) //WIN32 + int ret = -1; + char * source = NULL; + char * dest = NULL; +#endif + + if (tmpdir.length()) + dbFile = tmpdir; + else + dbFile = "/tmp"; + + dbFile << "/synonyms.db"; + + char input[1000]; + FILE *fl; + + const String sourceFile = config["synonym_dictionary"]; + + fl = fopen(sourceFile, "r"); + if (fl == NULL) + { + cout << "htfuzzy/synonyms: unable to open " << sourceFile << endl; + cout << "htfuzzy/synonyms: Use the 'synonym_dictionary' attribute\n"; + cout << "htfuzzy/synonyms: to specify the file that contains the synonyms\n"; + return NOTOK; + } + + Database *db = Database::getDatabaseInstance(DB_HASH); + + if (db->OpenReadWrite(dbFile.get(), 0664) == NOTOK) + { + delete db; + db = 0; + return NOTOK; + } + + String data; + String word; + int count = 0; + while (fgets(input, sizeof(input), fl)) + { + StringList sl(input, " \t\r\n"); + if (sl.Count() < 2) + { // Avoid segfault caused by calling Database::Put() + if (debug) // with negative length for data field + { + cout<<"htfuzzy/synonyms: Rejected line with less than 2 words: " + << input << endl; + cout.flush(); + } + continue; + } + for (int i = 0; i < sl.Count(); i++) + { + data = 0; + for (int j = 0; j < sl.Count(); j++) + { + if (i != j) + data << sl[j] << ' '; + } + word = sl[i]; + word.lowercase(); + data.lowercase(); + db->Put(word, String(data.get(), data.length() - 1)); + if (debug && (count % 10) == 0) + { + cout << "htfuzzy/synonyms: " << count << ' ' << word << "\n"; + cout.flush(); + } + count++; + } + } + fclose(fl); + db->Close(); + delete db; + +#if defined(LIBHTDIG) || defined(LIBHTDIGPHP) || defined(_MSC_VER) //WIN32 + + //Uses file_copy function - works on Unix/Linux & WinNT + source = dbFile.get(); + dest = (char *)config["synonym_db"].get(); + + //Attempt rename, if fail attempt copy & delete. + ret = rename(source, dest); + if (ret < 0) + { + ret = file_copy(source, dest, FILECOPY_OVERWRITE_ON); + if (ret == TRUE) + unlink(source); + else + return NOTOK; + } + + if (debug) + { + cout << "htfuzzy/synonyms: " << count << ' ' << word << "\n"; + cout << "htfuzzy/synonyms: Done.\n"; + } + +#else //This code uses a system call - Phase this out + + struct stat stat_buf; + String mv("mv"); // assume it's in the PATH if predefined setting fails + if ((stat(MV, &stat_buf) != -1) && S_ISREG(stat_buf.st_mode)) + mv = MV; + system(form("%s %s %s", + mv.get(), dbFile.get(), config["synonym_db"].get())); + +#endif + + return OK; +} + + +//***************************************************************************** +int +Synonym::openIndex() +{ + const String dbFile = config["synonym_db"]; + + if (db) + { + db->Close(); + delete db; + db = 0; + } + db = Database::getDatabaseInstance(DB_HASH); + if (db->OpenRead(dbFile) == NOTOK) + { + delete db; + db = 0; + return NOTOK; + } + return OK; +} + + +//***************************************************************************** +void +Synonym::getWords(char *originalWord, List &words) +{ + String data; + String stripped = originalWord; + HtStripPunctuation(stripped); + + if (db && db->Get(stripped, data) == OK) + { + char *token = strtok(data.get(), " "); + while (token) + { + words.Add(new String(token)); + token = strtok(0, " "); + } + } +} |