From 8c787c3591c1c885b91a54128835b400858c5cca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sl=C3=A1vek=20Banko?= Date: Fri, 5 Nov 2021 13:28:23 +0100 Subject: DEB htdig: Added to repository. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Slávek Banko --- debian/htdig/htdig-3.2.0b6/htfuzzy/Substring.cc | 116 ++++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 debian/htdig/htdig-3.2.0b6/htfuzzy/Substring.cc (limited to 'debian/htdig/htdig-3.2.0b6/htfuzzy/Substring.cc') diff --git a/debian/htdig/htdig-3.2.0b6/htfuzzy/Substring.cc b/debian/htdig/htdig-3.2.0b6/htfuzzy/Substring.cc new file mode 100644 index 00000000..2b5a7c36 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/htfuzzy/Substring.cc @@ -0,0 +1,116 @@ +// +// Substring.cc +// +// Substring: The substring fuzzy algorithm. Currently a rather slow, naive approach +// that checks the substring against every word in the word db. +// It does not generate a separate database. +// +// Part of the ht://Dig package +// Copyright (c) 1995-2004 The ht://Dig Group +// For copyright details, see the file COPYING in your distribution +// or the GNU Library General Public License (LGPL) version 2 or later +// +// +// $Id: Substring.cc,v 1.15 2004/05/28 13:15:20 lha Exp $ +// + +#ifdef HAVE_CONFIG_H +#include "htconfig.h" +#endif /* HAVE_CONFIG_H */ + +#include + +#include "Substring.h" +#include "htString.h" +#include "List.h" +#include "StringMatch.h" +#include "HtConfiguration.h" + +//***************************************************************************** +// Substring::Substring(const HtConfiguration& config_arg) +// +Substring::Substring(const HtConfiguration& config_arg) : + Fuzzy(config_arg) +{ + name = "substring"; +} + + +//***************************************************************************** +// Substring::~Substring() +// +Substring::~Substring() +{ +} + + +//***************************************************************************** +// A very simplistic and inefficient substring search. For every word +// that is looked for we do a complete linear search through the word +// database. +// Maybe a better method of doing this would be to mmap a list of words +// to memory and then run the StringMatch on it. It would still be a +// linear search, but with much less overhead. +// +void +Substring::getWords(char *w, List &words) +{ + // First strip the punctuation + String stripped = w; + HtStripPunctuation(stripped); + + // Now set up the StringMatch object + StringMatch match; + match.Pattern(stripped); + + // And get the list of all possible words + HtWordList wordDB(config); + List *wordList; + String *key; + wordDB.Open(config["word_db"], O_RDONLY); + wordList = wordDB.Words(); + + int wordCount = 0; + int maximumWords = config.Value("substring_max_words", 25); + + wordList->Start_Get(); + while (wordCount < maximumWords && (key = (String *) wordList->Get_Next())) + { + if (match.FindFirst((char*)*key) >= 0) + { + words.Add(new String(*key)); + wordCount++; + } + } + if (wordList) { + wordList->Destroy(); + delete wordList; + } + wordDB.Close(); +} + + +//***************************************************************************** +int +Substring::openIndex() +{ + return 0; +} + + +//***************************************************************************** +void +Substring::generateKey(char *, String &) +{ +} + + +//***************************************************************************** +void +Substring::addWord(char *) +{ +} + + + + -- cgit v1.2.1