1 files changed, 505 insertions, 0 deletions
diff --git a/tdespell2/plugins/ispell/ispell_checker.cpp b/tdespell2/plugins/ispell/ispell_checker.cpp
new file mode 100644
index 000000000..c07d9a55f
--- /dev/null
+++ b/tdespell2/plugins/ispell/ispell_checker.cpp
@@ -0,0 +1,505 @@
+/* vim: set sw=8: -*- Mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/* tdespell2 - adopted from Enchant
+ * Copyright (C) 2003 Dom Lachowicz
+ * Copyright (C) 2004 Zack Rusin <[email protected]>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ * In addition, as a special exception, Dom Lachowicz
+ * gives permission to link the code of this program with
+ * non-LGPL Spelling Provider libraries (eg: a MSFT Office
+ * spell checker backend) and distribute linked combinations including
+ * the two.  You must obey the GNU Lesser General Public License in all
+ * respects for all of the code used other than said providers.  If you modify
+ * this file, you may extend this exception to your version of the
+ * file, but you are not obligated to do so.  If you do not wish to
+ * do so, delete this exception statement from your version.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <string>
+#include <vector>
+
+#include "sp_spell.h"
+#include "ispell_checker.h"
+
+#include <tqmap.h>
+#include <tqdir.h>
+#include <tqfileinfo.h>
+
+/***************************************************************************/
+
+typedef struct str_ispell_map
+{
+	const char * lang;
+	const char * dict;
+	const char * enc;
+} IspellMap;
+
+static const char *ispell_dirs [] = {
+	"/usr/lib/ispell",
+	"/usr/local/lib/ispell",
+	"/usr/local/share/ispell",
+	"/usr/share/ispell",
+	"/usr/pkg/lib",
+	0
+};
+static const IspellMap ispell_map [] = {
+	{"ca"    ,"catala.hash"         ,"iso-8859-1" },
+	{"ca_ES" ,"catala.hash"         ,"iso-8859-1" },
+	{"cs"    ,"czech.hash"          ,"iso-8859-2" },
+	{"cs_CZ" ,"czech.hash"          ,"iso-8859-2" },
+	{"da"    ,"dansk.hash"          ,"iso-8859-1" },
+	{"da_DK" ,"dansk.hash"          ,"iso-8859-1" },
+	{"de"    ,"deutsch.hash"        ,"iso-8859-1" },
+	{"de_CH" ,"swiss.hash"          ,"iso-8859-1" },
+	{"de_AT" ,"deutsch.hash"        ,"iso-8859-1" },
+	{"de_DE" ,"deutsch.hash"        ,"iso-8859-1" },
+	{"el"    ,"ellhnika.hash"       ,"iso-8859-7" },
+	{"el_GR" ,"ellhnika.hash"       ,"iso-8859-7" },
+	{"en"    ,"british.hash"        ,"iso-8859-1" },
+	{"en_AU" ,"british.hash"        ,"iso-8859-1" },
+	{"en_BZ" ,"british.hash"        ,"iso-8859-1" },
+	{"en_CA" ,"british.hash"        ,"iso-8859-1" },
+	{"en_GB" ,"british.hash"        ,"iso-8859-1" },
+	{"en_IE" ,"british.hash"        ,"iso-8859-1" },
+	{"en_JM" ,"british.hash"        ,"iso-8859-1" },
+	{"en_NZ" ,"british.hash"        ,"iso-8859-1" },
+	{"en_TT" ,"british.hash"        ,"iso-8859-1" },
+	{"en_ZA" ,"british.hash"        ,"iso-8859-1" },
+	{"en_ZW" ,"british.hash"        ,"iso-8859-1" },
+	{"en_PH" ,"american.hash"       ,"iso-8859-1" },
+	{"en_US" ,"american.hash"       ,"iso-8859-1" },
+	{"eo"    ,"esperanto.hash"      ,"iso-8859-3" },
+	{"es"    ,"espanol.hash"        ,"iso-8859-1" },
+	{"es_AR" ,"espanol.hash"        ,"iso-8859-1" },
+	{"es_BO" ,"espanol.hash"        ,"iso-8859-1" },
+	{"es_CL" ,"espanol.hash"        ,"iso-8859-1" },
+	{"es_CO" ,"espanol.hash"        ,"iso-8859-1" },
+	{"es_CR" ,"espanol.hash"        ,"iso-8859-1" },
+	{"es_DO" ,"espanol.hash"        ,"iso-8859-1" },
+	{"es_EC" ,"espanol.hash"        ,"iso-8859-1" },
+	{"es_ES" ,"espanol.hash"        ,"iso-8859-1" },
+	{"es_GT" ,"espanol.hash"        ,"iso-8859-1" },
+	{"es_HN" ,"espanol.hash"        ,"iso-8859-1" },
+	{"es_MX" ,"espanol.hash"        ,"iso-8859-1" },
+	{"es_NI" ,"espanol.hash"        ,"iso-8859-1" },
+	{"es_PA" ,"espanol.hash"        ,"iso-8859-1" },
+	{"es_PE" ,"espanol.hash"        ,"iso-8859-1" },
+	{"es_PR" ,"espanol.hash"        ,"iso-8859-1" },
+	{"es_PY" ,"espanol.hash"        ,"iso-8859-1" },
+	{"es_SV" ,"espanol.hash"        ,"iso-8859-1" },
+	{"es_UY" ,"espanol.hash"        ,"iso-8859-1" },
+	{"es_VE" ,"espanol.hash"        ,"iso-8859-1" },
+	{"fi"    ,"finnish.hash"        ,"iso-8859-1" },
+	{"fi_FI" ,"finnish.hash"        ,"iso-8859-1" },
+	{"fr"    ,"francais.hash"       ,"iso-8859-1" },
+	{"fr_BE" ,"francais.hash"       ,"iso-8859-1" },
+	{"fr_CA" ,"francais.hash"       ,"iso-8859-1" },
+	{"fr_CH" ,"francais.hash"       ,"iso-8859-1" },
+	{"fr_FR" ,"francais.hash"       ,"iso-8859-1" },
+	{"fr_LU" ,"francais.hash"       ,"iso-8859-1" },
+	{"fr_MC" ,"francais.hash"       ,"iso-8859-1" },
+	{"hu"    ,"hungarian.hash"      ,"iso-8859-2" },
+	{"hu_HU" ,"hungarian.hash"      ,"iso-8859-2" },
+	{"ga"    ,"irish.hash"          ,"iso-8859-1" },
+	{"ga_IE" ,"irish.hash"          ,"iso-8859-1" },
+	{"gl"    ,"galician.hash"       ,"iso-8859-1" },
+	{"gl_ES" ,"galician.hash"       ,"iso-8859-1" },
+	{"ia"    ,"interlingua.hash"    ,"iso-8859-1" },
+	{"it"    ,"italian.hash"        ,"iso-8859-1" },
+	{"it_IT" ,"italian.hash"        ,"iso-8859-1" },
+	{"it_CH" ,"italian.hash"        ,"iso-8859-1" },
+	{"la"    ,"mlatin.hash"         ,"iso-8859-1" },
+	{"la_IT" ,"mlatin.hash"         ,"iso-8859-1" },
+	{"lt"    ,"lietuviu.hash"       ,"iso-8859-13" },
+	{"lt_LT" ,"lietuviu.hash"       ,"iso-8859-13" },
+	{"nl"    ,"nederlands.hash"     ,"iso-8859-1" },
+	{"nl_NL" ,"nederlands.hash"     ,"iso-8859-1" },
+	{"nl_BE" ,"nederlands.hash"     ,"iso-8859-1" },
+	{"nb"    ,"norsk.hash"          ,"iso-8859-1" },
+	{"nb_NO" ,"norsk.hash"          ,"iso-8859-1" },
+	{"nn"    ,"nynorsk.hash"        ,"iso-8859-1" },
+	{"nn_NO" ,"nynorsk.hash"        ,"iso-8859-1" },
+	{"no"    ,"norsk.hash"          ,"iso-8859-1" },
+	{"no_NO" ,"norsk.hash"          ,"iso-8859-1" },
+	{"pl"    ,"polish.hash"         ,"iso-8859-2" },
+	{"pl_PL" ,"polish.hash"         ,"iso-8859-2" },
+	{"pt"    ,"brazilian.hash"      ,"iso-8859-1" },
+	{"pt_BR" ,"brazilian.hash"      ,"iso-8859-1" },
+	{"pt_PT" ,"portugues.hash"      ,"iso-8859-1" },
+	{"ru"    ,"russian.hash"        ,"koi8-r" },
+	{"ru_MD" ,"russian.hash"        ,"koi8-r" },
+	{"ru_RU" ,"russian.hash"        ,"koi8-r" },
+	{"sc"    ,"sardinian.hash"      ,"iso-8859-1" },
+	{"sc_IT" ,"sardinian.hash"      ,"iso-8859-1" },
+	{"sk"    ,"slovak.hash"         ,"iso-8859-2" },
+	{"sk_SK" ,"slovak.hash"         ,"iso-8859-2" },
+	{"sl"    ,"slovensko.hash"      ,"iso-8859-2" },
+	{"sl_SI" ,"slovensko.hash"      ,"iso-8859-2" },
+	{"sv"    ,"svenska.hash"        ,"iso-8859-1" },
+	{"sv_SE" ,"svenska.hash"        ,"iso-8859-1" },
+	{"uk"    ,"ukrainian.hash"      ,"koi8-u" },
+	{"uk_UA" ,"ukrainian.hash"      ,"koi8-u" },
+	{"yi"    ,"yiddish-yivo.hash"   ,"utf-8" }
+};
+
+static const size_t size_ispell_map = ( sizeof(ispell_map) / sizeof((ispell_map)[0]) );
+static TQMap<TQString, TQString> ispell_dict_map;
+
+
+void
+ISpellChecker::try_autodetect_charset(const char * const inEncoding)
+{
+	if (inEncoding && strlen(inEncoding))
+		{
+			m_translate_in = TQTextCodec::codecForName(inEncoding);
+		}
+}
+
+/***************************************************************************/
+/***************************************************************************/
+
+ISpellChecker::ISpellChecker()
+	: deftflag(-1),
+     prefstringchar(-1),
+     m_bSuccessfulInit(false),
+     m_BC(NULL),
+     m_cd(NULL),
+     m_cl(NULL),
+     m_cm(NULL),
+     m_ho(NULL),
+     m_nd(NULL),
+     m_so(NULL),
+     m_se(NULL),
+     m_ti(NULL),
+     m_te(NULL),
+     m_hashstrings(NULL),
+     m_hashtbl(NULL),
+     m_pflaglist(NULL),
+     m_sflaglist(NULL),
+     m_chartypes(NULL),
+     m_infile(NULL),
+     m_outfile(NULL),
+     m_askfilename(NULL),
+     m_Trynum(0),
+     m_translate_in(0)
+{
+	memset(m_sflagindex,0,sizeof(m_sflagindex));
+	memset(m_pflagindex,0,sizeof(m_pflagindex));
+}
+
+#ifndef FREEP
+#define FREEP(p)        do { if (p) free(p); } while (0)
+#endif
+
+ISpellChecker::~ISpellChecker()
+{
+	if (m_bSuccessfulInit) {
+		// only cleanup our mess if we were successfully initialized
+
+		clearindex (m_pflagindex);
+		clearindex (m_sflagindex);
+	}
+
+	FREEP(m_hashtbl);
+	FREEP(m_hashstrings);
+	FREEP(m_sflaglist);
+	FREEP(m_chartypes);
+
+	delete m_translate_in;
+	m_translate_in = 0;
+}
+
+bool
+ISpellChecker::checkWord( const TQString& utf8Word )
+{
+	ichar_t iWord[INPUTWORDLEN + MAXAFFIXLEN];
+	if (!m_bSuccessfulInit)
+		return false;
+
+	if (!utf8Word || utf8Word.length() >= (INPUTWORDLEN + MAXAFFIXLEN) || utf8Word.isEmpty())
+		return false;
+
+	bool retVal = false;
+	TQCString out;
+	if (!m_translate_in)
+		return false;
+	else {
+		/* convert to 8bit string and null terminate */
+		int len_out = utf8Word.length();
+
+		out = m_translate_in->fromUnicode( utf8Word, len_out );
+	}
+
+	if (!strtoichar(iWord, out.data(), INPUTWORDLEN + MAXAFFIXLEN, 0))
+		{
+			if (good(iWord, 0, 0, 1, 0) == 1 ||
+			    compoundgood(iWord, 1) == 1)
+				{
+					retVal = true;
+				}
+		}
+
+	return retVal;
+}
+
+TQStringList
+ISpellChecker::suggestWord(const TQString& utf8Word)
+{
+	ichar_t  iWord[INPUTWORDLEN + MAXAFFIXLEN];
+	int  c;
+
+	if (!m_bSuccessfulInit)
+		return TQStringList();
+
+	if (utf8Word.isEmpty() || utf8Word.length() >= (INPUTWORDLEN + MAXAFFIXLEN) ||
+			utf8Word.length() == 0)
+		return TQStringList();
+
+	TQCString out;
+	if (!m_translate_in)
+		return TQStringList();
+	else
+		{
+			/* convert to 8bit string and null terminate */
+
+			int len_out = utf8Word.length();
+			out = m_translate_in->fromUnicode( utf8Word, len_out );
+		}
+
+	if (!strtoichar(iWord, out.data(), INPUTWORDLEN + MAXAFFIXLEN, 0))
+		makepossibilities(iWord);
+	else
+		return TQStringList();
+
+	TQStringList sugg_arr;
+	for (c = 0; c < m_pcount; c++)
+	{
+		TQString utf8Word;
+
+		if (!m_translate_in)
+		{
+			/* copy to 8bit string and null terminate */
+			utf8Word = TQString::fromUtf8( m_possibilities[c] );
+		}
+		else
+		{
+			/* convert to 32bit string and null terminate */
+			utf8Word = m_translate_in->toUnicode( m_possibilities[c] );
+		}
+
+		sugg_arr.append( utf8Word );
+	}
+
+	return sugg_arr;
+}
+
+static void
+s_buildHashNames (std::vector<std::string> & names, const char * dict)
+{
+	const char * tmp = 0;
+	int i = 0;
+
+	names.clear ();
+
+	while ( (tmp = ispell_dirs[i++]) ) {
+		TQCString maybeFile = TQCString( tmp ) + '/';
+		maybeFile += dict;
+		names.push_back( maybeFile.data() );
+	}
+}
+
+static void
+s_allDics()
+{
+	const char * tmp = 0;
+	int i = 0;
+
+	while ( (tmp = ispell_dirs[i++]) ) {
+		TQDir dir( tmp );
+		TQStringList lst = dir.entryList( "*.hash" );
+		for ( TQStringList::Iterator it = lst.begin(); it != lst.end(); ++it ) {
+			TQFileInfo info( *it );
+			for (size_t i = 0; i < size_ispell_map; i++)
+			{
+				const IspellMap * mapping = (const IspellMap *)(&(ispell_map[i]));
+				if (!strcmp (info.fileName().latin1(), mapping->dict))
+				{
+					ispell_dict_map.insert( mapping->lang, *it );
+				}
+			}
+		}
+	}
+}
+
+TQValueList<TQString>
+ISpellChecker::allDics()
+{
+	if ( ispell_dict_map.empty() )
+		s_allDics();
+
+	return ispell_dict_map.keys();
+}
+
+TQString
+ISpellChecker::loadDictionary (const char * szdict)
+{
+	std::vector<std::string> dict_names;
+
+	s_buildHashNames (dict_names, szdict);
+
+	for (size_t i = 0; i < dict_names.size(); i++)
+		{
+			if (linit(const_cast<char*>(dict_names[i].c_str())) >= 0)
+				return dict_names[i].c_str();
+		}
+
+	return TQString::null;
+}
+
+/*!
+ * Load ispell dictionary hash file for given language.
+ *
+ * \param szLang -  The language tag ("en-US") we want to use
+ * \return The name of the dictionary file
+ */
+bool
+ISpellChecker::loadDictionaryForLanguage ( const char * szLang )
+{
+	TQString hashname;
+
+	const char * encoding = NULL;
+	const char * szFile = NULL;
+
+	for (size_t i = 0; i < size_ispell_map; i++)
+		{
+			const IspellMap * mapping = (const IspellMap *)(&(ispell_map[i]));
+			if (!strcmp (szLang, mapping->lang))
+				{
+					szFile = mapping->dict;
+					encoding = mapping->enc;
+					break;
+				}
+		}
+
+	if (!szFile || !strlen(szFile))
+		return false;
+
+	alloc_ispell_struct();
+
+	hashname = loadDictionary(szFile);
+	if (hashname.isEmpty())
+		return false;
+
+	// one of the two above calls succeeded
+	setDictionaryEncoding (hashname, encoding);
+
+	return true;
+}
+
+void
+ISpellChecker::setDictionaryEncoding( const TQString& hashname, const char * encoding )
+{
+	/* Get Hash encoding from XML file. This should always work! */
+	try_autodetect_charset(encoding);
+
+	if (m_translate_in)
+		{
+			/* We still have to setup prefstringchar*/
+			prefstringchar = findfiletype("utf8", 1, deftflag < 0 ? &deftflag
+						      : static_cast<int *>(NULL));
+
+			if (prefstringchar < 0)
+				{
+					std::string teststring;
+					for(int n1 = 1; n1 <= 15; n1++)
+						{
+							teststring = "latin" + n1;
+							prefstringchar = findfiletype(teststring.c_str(), 1,
+										      deftflag < 0 ? &deftflag : static_cast<int *>(NULL));
+							if (prefstringchar >= 0)
+								break;
+						}
+				}
+
+			return; /* success */
+		}
+
+	/* Test for UTF-8 first */
+	prefstringchar = findfiletype("utf8", 1, deftflag < 0 ? &deftflag : static_cast<int *>(NULL));
+	if (prefstringchar >= 0)
+		{
+			m_translate_in = TQTextCodec::codecForName("utf8");
+		}
+
+	if (m_translate_in)
+		return; /* success */
+
+	/* Test for "latinN" */
+	if (!m_translate_in)
+		{
+			/* Look for "altstringtype" names from latin1 to latin15 */
+			for(int n1 = 1; n1 <= 15; n1++)
+				{
+					TQString teststring = TQString("latin%1").arg(n1);
+					prefstringchar = findfiletype(teststring.latin1(), 1,
+								      deftflag < 0 ? &deftflag : static_cast<int *>(NULL));
+					if (prefstringchar >= 0)
+						{
+							//FIXME: latin1 might be wrong
+							m_translate_in = TQTextCodec::codecForName( teststring.latin1() );
+							break;
+						}
+				}
+		}
+
+	/* If nothing found, use latin1 */
+	if (!m_translate_in)
+		{
+			m_translate_in = TQTextCodec::codecForName("latin1");
+		}
+}
+
+bool
+ISpellChecker::requestDictionary(const char *szLang)
+{
+	if (!loadDictionaryForLanguage (szLang))
+		{
+			// handle a shortened version of the language tag: en_US => en
+			std::string shortened_dict (szLang);
+			size_t uscore_pos;
+
+			if ((uscore_pos = shortened_dict.rfind ('_')) != ((size_t)-1)) {
+				shortened_dict = shortened_dict.substr(0, uscore_pos);
+				if (!loadDictionaryForLanguage (shortened_dict.c_str()))
+					return false;
+			} else
+				return false;
+		}
+
+	m_bSuccessfulInit = true;
+
+	if (prefstringchar < 0)
+		m_defdupchar = 0;
+	else
+		m_defdupchar = prefstringchar;
+
+	return true;
+}