diff options
author | Slávek Banko <[email protected]> | 2021-11-05 13:28:23 +0100 |
---|---|---|
committer | Slávek Banko <[email protected]> | 2021-11-05 13:28:23 +0100 |
commit | 8c787c3591c1c885b91a54128835b400858c5cca (patch) | |
tree | eca1b776912a305c4d45b3964038278a2fae1ead /debian/htdig/htdig-3.2.0b6/htfuzzy/Substring.cc | |
parent | fe188b907cdf30dfdfe0eba9412e7f8749fec158 (diff) | |
download | extra-dependencies-8c787c3591c1c885b91a54128835b400858c5cca.tar.gz extra-dependencies-8c787c3591c1c885b91a54128835b400858c5cca.zip |
DEB htdig: Added to repository.
Signed-off-by: Slávek Banko <[email protected]>
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/htfuzzy/Substring.cc')
-rw-r--r-- | debian/htdig/htdig-3.2.0b6/htfuzzy/Substring.cc | 116 |
1 files changed, 116 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/htfuzzy/Substring.cc b/debian/htdig/htdig-3.2.0b6/htfuzzy/Substring.cc new file mode 100644 index 00000000..2b5a7c36 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/htfuzzy/Substring.cc @@ -0,0 +1,116 @@ +// +// Substring.cc +// +// Substring: The substring fuzzy algorithm. Currently a rather slow, naive approach +// that checks the substring against every word in the word db. +// It does not generate a separate database. +// +// Part of the ht://Dig package <http://www.htdig.org/> +// Copyright (c) 1995-2004 The ht://Dig Group +// For copyright details, see the file COPYING in your distribution +// or the GNU Library General Public License (LGPL) version 2 or later +// <http://www.gnu.org/copyleft/lgpl.html> +// +// $Id: Substring.cc,v 1.15 2004/05/28 13:15:20 lha Exp $ +// + +#ifdef HAVE_CONFIG_H +#include "htconfig.h" +#endif /* HAVE_CONFIG_H */ + +#include <fcntl.h> + +#include "Substring.h" +#include "htString.h" +#include "List.h" +#include "StringMatch.h" +#include "HtConfiguration.h" + +//***************************************************************************** +// Substring::Substring(const HtConfiguration& config_arg) +// +Substring::Substring(const HtConfiguration& config_arg) : + Fuzzy(config_arg) +{ + name = "substring"; +} + + +//***************************************************************************** +// Substring::~Substring() +// +Substring::~Substring() +{ +} + + +//***************************************************************************** +// A very simplistic and inefficient substring search. For every word +// that is looked for we do a complete linear search through the word +// database. +// Maybe a better method of doing this would be to mmap a list of words +// to memory and then run the StringMatch on it. It would still be a +// linear search, but with much less overhead. +// +void +Substring::getWords(char *w, List &words) +{ + // First strip the punctuation + String stripped = w; + HtStripPunctuation(stripped); + + // Now set up the StringMatch object + StringMatch match; + match.Pattern(stripped); + + // And get the list of all possible words + HtWordList wordDB(config); + List *wordList; + String *key; + wordDB.Open(config["word_db"], O_RDONLY); + wordList = wordDB.Words(); + + int wordCount = 0; + int maximumWords = config.Value("substring_max_words", 25); + + wordList->Start_Get(); + while (wordCount < maximumWords && (key = (String *) wordList->Get_Next())) + { + if (match.FindFirst((char*)*key) >= 0) + { + words.Add(new String(*key)); + wordCount++; + } + } + if (wordList) { + wordList->Destroy(); + delete wordList; + } + wordDB.Close(); +} + + +//***************************************************************************** +int +Substring::openIndex() +{ + return 0; +} + + +//***************************************************************************** +void +Substring::generateKey(char *, String &) +{ +} + + +//***************************************************************************** +void +Substring::addWord(char *) +{ +} + + + + |