summaryrefslogtreecommitdiffstats
path: root/debian/htdig/htdig-3.2.0b6/htfuzzy/Substring.cc
diff options
context:
space:
mode:
authorSlávek Banko <[email protected]>2021-11-05 13:28:23 +0100
committerSlávek Banko <[email protected]>2021-11-05 13:28:23 +0100
commit8c787c3591c1c885b91a54128835b400858c5cca (patch)
treeeca1b776912a305c4d45b3964038278a2fae1ead /debian/htdig/htdig-3.2.0b6/htfuzzy/Substring.cc
parentfe188b907cdf30dfdfe0eba9412e7f8749fec158 (diff)
downloadextra-dependencies-8c787c3591c1c885b91a54128835b400858c5cca.tar.gz
extra-dependencies-8c787c3591c1c885b91a54128835b400858c5cca.zip
DEB htdig: Added to repository.
Signed-off-by: Slávek Banko <[email protected]>
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/htfuzzy/Substring.cc')
-rw-r--r--debian/htdig/htdig-3.2.0b6/htfuzzy/Substring.cc116
1 files changed, 116 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/htfuzzy/Substring.cc b/debian/htdig/htdig-3.2.0b6/htfuzzy/Substring.cc
new file mode 100644
index 00000000..2b5a7c36
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htfuzzy/Substring.cc
@@ -0,0 +1,116 @@
+//
+// Substring.cc
+//
+// Substring: The substring fuzzy algorithm. Currently a rather slow, naive approach
+// that checks the substring against every word in the word db.
+// It does not generate a separate database.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: Substring.cc,v 1.15 2004/05/28 13:15:20 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include <fcntl.h>
+
+#include "Substring.h"
+#include "htString.h"
+#include "List.h"
+#include "StringMatch.h"
+#include "HtConfiguration.h"
+
+//*****************************************************************************
+// Substring::Substring(const HtConfiguration& config_arg)
+//
+Substring::Substring(const HtConfiguration& config_arg) :
+ Fuzzy(config_arg)
+{
+ name = "substring";
+}
+
+
+//*****************************************************************************
+// Substring::~Substring()
+//
+Substring::~Substring()
+{
+}
+
+
+//*****************************************************************************
+// A very simplistic and inefficient substring search. For every word
+// that is looked for we do a complete linear search through the word
+// database.
+// Maybe a better method of doing this would be to mmap a list of words
+// to memory and then run the StringMatch on it. It would still be a
+// linear search, but with much less overhead.
+//
+void
+Substring::getWords(char *w, List &words)
+{
+ // First strip the punctuation
+ String stripped = w;
+ HtStripPunctuation(stripped);
+
+ // Now set up the StringMatch object
+ StringMatch match;
+ match.Pattern(stripped);
+
+ // And get the list of all possible words
+ HtWordList wordDB(config);
+ List *wordList;
+ String *key;
+ wordDB.Open(config["word_db"], O_RDONLY);
+ wordList = wordDB.Words();
+
+ int wordCount = 0;
+ int maximumWords = config.Value("substring_max_words", 25);
+
+ wordList->Start_Get();
+ while (wordCount < maximumWords && (key = (String *) wordList->Get_Next()))
+ {
+ if (match.FindFirst((char*)*key) >= 0)
+ {
+ words.Add(new String(*key));
+ wordCount++;
+ }
+ }
+ if (wordList) {
+ wordList->Destroy();
+ delete wordList;
+ }
+ wordDB.Close();
+}
+
+
+//*****************************************************************************
+int
+Substring::openIndex()
+{
+ return 0;
+}
+
+
+//*****************************************************************************
+void
+Substring::generateKey(char *, String &)
+{
+}
+
+
+//*****************************************************************************
+void
+Substring::addWord(char *)
+{
+}
+
+
+
+