summaryrefslogtreecommitdiffstats
path: root/debian/htdig/htdig-3.2.0b6/htfuzzy/Soundex.cc
diff options
context:
space:
mode:
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/htfuzzy/Soundex.cc')
-rw-r--r--debian/htdig/htdig-3.2.0b6/htfuzzy/Soundex.cc167
1 files changed, 167 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/htfuzzy/Soundex.cc b/debian/htdig/htdig-3.2.0b6/htfuzzy/Soundex.cc
new file mode 100644
index 00000000..ed903e9d
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htfuzzy/Soundex.cc
@@ -0,0 +1,167 @@
+//
+// Soundex.cc
+//
+// Soundex: A fuzzy matching algorithm on the principal of the
+// Soundex method for last names used by the U.S. INS
+// and described by Knuth and others.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: Soundex.cc,v 1.11 2004/05/28 13:15:20 lha Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include <fcntl.h>
+
+#include "Soundex.h"
+#include "Dictionary.h"
+
+#include <ctype.h>
+
+//*****************************************************************************
+// Soundex::Soundex(const HtConfiguration& config_arg)
+//
+Soundex::Soundex(const HtConfiguration& config_arg) :
+ Fuzzy(config_arg)
+{
+ name = "soundex";
+}
+
+
+//*****************************************************************************
+// Soundex::~Soundex()
+//
+Soundex::~Soundex()
+{
+}
+
+
+//*****************************************************************************
+// void Soundex::generateKey(char *word, String &key)
+//
+void
+Soundex::generateKey(char *word, String &key)
+{
+ int code = 0;
+ int lastcode = 0;
+
+ key = 0;
+ if (!word)
+ {
+ key = '0';
+ return;
+ }
+
+ while (*word && !isalpha(*word))
+ word++;
+
+ if (*word)
+ {
+ key << *word++;
+ }
+ else
+ {
+ key = '0';
+ return;
+ }
+
+
+ while (key.length() < 6)
+ {
+ switch (*word)
+ {
+ case 'b':
+ case 'p':
+ case 'f':
+ case 'v':
+ code = 1;
+ break;
+
+ case 'c':
+ case 's':
+ case 'k':
+ case 'g':
+ case 'j':
+ case 'q':
+ case 'x':
+ case 'z':
+ code = 2;
+ break;
+
+ case 'd':
+ case 't':
+ code = 3;
+ break;
+
+ case 'l':
+ code = 4;
+ break;
+
+ case 'm':
+ case 'n':
+ code = 5;
+ break;
+
+ case 'r':
+ code = 6;
+ break;
+
+ case 'a':
+ case 'e':
+ case 'i':
+ case 'o':
+ case 'u':
+ case 'y':
+ case 'w':
+ case 'h':
+ code = 0;
+ break;
+
+ default:
+ break;
+ }
+ if (code && code != lastcode)
+ {
+ key << code;
+ lastcode = code;
+ }
+ if (*word)
+ word++;
+ else
+ break;
+ }
+}
+
+
+//*****************************************************************************
+// void Soundex::addWord(char *word)
+//
+void
+Soundex::addWord(char *word)
+{
+ if (!dict)
+ {
+ dict = new Dictionary;
+ }
+
+ String key;
+ generateKey(word, key);
+
+ String *s = (String *) dict->Find(key);
+ if (s)
+ {
+ // if (mystrcasestr(s->get(), word) != 0)
+ (*s) << ' ' << word;
+ }
+ else
+ {
+ dict->Add(key, new String(word));
+ }
+}