tdespell2/plugins/ispell/ispell_checker.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272

/* tdespell2 - adopted from Enchant
 * Copyright (C) 2003 Dom Lachowicz
 * Copyright (C) 2004 Zack Rusin <[email protected]>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 * Boston, MA 02110-1301, USA.
 *
 * In addition, as a special exception, Dom Lachowicz
 * gives permission to link the code of this program with
 * non-LGPL Spelling Provider libraries (eg: a MSFT Office
 * spell checker backend) and distribute linked combinations including
 * the two.  You must obey the GNU Lesser General Public License in all
 * respects for all of the code used other than said providers.  If you modify
 * this file, you may extend this exception to your version of the
 * file, but you are not obligated to do so.  If you do not wish to
 * do so, delete this exception statement from your version.
 */

#ifndef ISPELL_CHECKER_H
#define ISPELL_CHECKER_H

#include "ispell.h"

#include <tqstringlist.h>
#include <tqvaluelist.h>
#include <tqtextcodec.h>
#include <tqstring.h>


class ISpellChecker
{
public:
	ISpellChecker();
	~ISpellChecker();

	bool checkWord(const TQString& word);
	TQStringList suggestWord(const TQString& word);

	bool requestDictionary (const char * szLang);
	static TQValueList<TQString> allDics();
private:
	ISpellChecker(const ISpellChecker&);	// no impl
	void operator=(const ISpellChecker&);	// no impl

	TQString loadDictionary (const char * szLang );
	bool   loadDictionaryForLanguage ( const char * szLang );
	void   setDictionaryEncoding ( const TQString& hashname, const char * enc );

	//
	// The member functions after this point were formerly global functions
	//  passed a context structure pointer...
	//

	void try_autodetect_charset(const char * inEncoding);

	//
	// From ispell correct.c
	//

	int		casecmp P ((char * a, char * b, int canonical));
	void		makepossibilities P ((ichar_t * word));
	int	insert P ((ichar_t * word));
#ifndef NO_CAPITALIZATION_SUPPORT
	void	wrongcapital P ((ichar_t * word));
#endif /* NO_CAPITALIZATION_SUPPORT */
	void	wrongletter P ((ichar_t * word));
	void	extraletter P ((ichar_t * word));
	void	missingletter P ((ichar_t * word));
	void	missingspace P ((ichar_t * word));
	int		compoundgood P ((ichar_t * word, int pfxopts));
	void	transposedletter P ((ichar_t * word));
	int	ins_cap P ((ichar_t * word, ichar_t * pattern));
	int	save_cap P ((ichar_t * word, ichar_t * pattern,
			  ichar_t savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN]));
	int		ins_root_cap P ((ichar_t * word, ichar_t * pattern,
			  int prestrip, int preadd, int sufstrip, int sufadd,
			  struct dent * firstdent, struct flagent * pfxent,
			  struct flagent * sufent));
	void	save_root_cap P ((ichar_t * word, ichar_t * pattern,
			  int prestrip, int preadd, int sufstrip, int sufadd,
			  struct dent * firstdent, struct flagent * pfxent,
			  struct flagent * sufent,
			  ichar_t savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN],
			  int * nsaved));

	//
	// From ispell good.c
	//

	int good (ichar_t* w, int ignoreflagbits, int allhits, int pfxopts, int sfxopts);
	void chk_aff (ichar_t* word, ichar_t* ucword, int len, int ignoreflagbits, int allhits, int pfxopts, int sfxopts);
	int linit(char*);
	struct dent * ispell_lookup (ichar_t* s, int dotree);
	int strtoichar (ichar_t* out, char* in, int outlen, int canonical);
	int ichartostr (char* out, ichar_t* in, int outlen, int canonical);
	char * ichartosstr (ichar_t* in, int canonical);
	int	findfiletype (const char * name, int searchnames, int * deformatter);
	long whatcap (ichar_t* word);

	/*
		HACK: macros replaced with function implementations
		so we could do a side-effect-free check for unicode
		characters which aren't in hashheader
	*/
	char myupper(ichar_t c);
	char mylower(ichar_t c);
	int myspace(ichar_t c);
	char iswordch(ichar_t c);
	char isboundarych(ichar_t c);
	char isstringstart(ichar_t c);
	ichar_t mytolower(ichar_t c);
	ichar_t mytoupper(ichar_t c);

#ifndef ICHAR_IS_CHAR
	int cap_ok (ichar_t* word, struct success* hit, int len);

	int hash (ichar_t* s, int hashtblsize);
#endif

	//
	// From ispell lookup.c
	//

	void	clearindex P ((struct flagptr * indexp));
	void     initckch P ((char *));

	void alloc_ispell_struct();
	void free_ispell_struct();

	//
	// From ispell makedent.c
	//

	int		addvheader P ((struct dent * ent));
	void		upcase P ((ichar_t * string));
	void		lowcase P ((ichar_t * string));
	void		chupcase P ((char * s));

	int		stringcharlen P ((char * bufp, int canonical));
	ichar_t *	strtosichar P ((char * in, int canonical));
	char *		printichar P ((int in));

	//
	// From ispell tgood.c
	//

	void	pfx_list_chk P ((ichar_t * word, ichar_t * ucword,
			  int len, int optflags, int sfxopts, struct flagptr * ind,
			  int ignoreflagbits, int allhits));
	void	chk_suf P ((ichar_t * word, ichar_t * ucword, int len,
			  int optflags, struct flagent * pfxent, int ignoreflagbits,
			  int allhits));
	void	suf_list_chk P ((ichar_t * word, ichar_t * ucword, int len,
			  struct flagptr * ind, int optflags, struct flagent * pfxent,
			  int ignoreflagbits, int allhits));
	int		expand_pre P ((char * croot, ichar_t * rootword,
			  MASKTYPE mask[], int option, char * extra));
	int	pr_pre_expansion P ((char * croot, ichar_t * rootword,
			  struct flagent * flent, MASKTYPE mask[], int option,
			  char * extra));
	int		expand_suf P ((char * croot, ichar_t * rootword,
			  MASKTYPE mask[], int optflags, int option, char * extra));
	int	pr_suf_expansion P ((char * croot, ichar_t * rootword,
			  struct flagent * flent, int option, char * extra));
	void	forcelc P ((ichar_t * dst, int len));

	/* this is used for converting form unsigned short to UCS-4 */

	int deftflag;              /* NZ for TeX mode by default */
	int prefstringchar;        /* Preferred string character type */
	bool m_bSuccessfulInit;

	//
	// The members after this point were formerly global variables
	//  in the original ispell code
	//

	char *	m_BC;	/* backspace if not ^H */
	char *	m_cd;	/* clear to end of display */
	char *	m_cl;	/* clear display */
	char *	m_cm;	/* cursor movement */
	char *	m_ho;	/* home */
	char *	m_nd;	/* non-destructive space */
	char *	m_so;	/* standout */
	char *	m_se;	/* standout end */
	int	m_sg;	/* space taken by so/se */
	char *	m_ti;	/* terminal initialization sequence */
	char *	m_te;	/* terminal termination sequence */
	int	m_li;	/* lines */
	int	m_co;	/* columns */

	char	m_ctoken[INPUTWORDLEN + MAXAFFIXLEN]; /* Current token as char */
	ichar_t	m_itoken[INPUTWORDLEN + MAXAFFIXLEN]; /* Ctoken as ichar_t str */

	int	m_numhits;	/* number of hits in dictionary lookups */
	struct success
			m_hits[MAX_HITS]; /* table of hits gotten in lookup */

	char *	m_hashstrings;	/* Strings in hash table */
	struct hashheader
			m_hashheader;	/* Header of hash table */
	struct dent *
			m_hashtbl;	/* Main hash table, for dictionary */
	int	m_hashsize;	/* Size of main hash table */

	char	m_hashname[MAXPATHLEN]; /* Name of hash table file */

	int	m_aflag;		/* NZ if -a or -A option specified */
	int	m_cflag;		/* NZ if -c (crunch) option */
	int	m_lflag;		/* NZ if -l (list) option */
	int	m_incfileflag;	/* whether xgets() acts exactly like gets() */
	int	m_nodictflag;	/* NZ if dictionary not needed */

	int	m_uerasechar;	/* User's erase character, from stty */
	int	m_ukillchar;	/* User's kill character */

	unsigned int m_laststringch; /* Number of last string character */
	int	m_defdupchar;	/* Default duplicate string type */

	int	m_numpflags;		/* Number of prefix flags in table */
	int	m_numsflags;		/* Number of suffix flags in table */
	struct flagptr m_pflagindex[SET_SIZE + MAXSTRINGCHARS];
						/* Fast index to pflaglist */
	struct flagent *	m_pflaglist;	/* Prefix flag control list */
	struct flagptr m_sflagindex[SET_SIZE + MAXSTRINGCHARS];
						/* Fast index to sflaglist */
	struct flagent *	m_sflaglist;	/* Suffix flag control list */

	struct strchartype *		/* String character type collection */
			m_chartypes;

	FILE *	m_infile;			/* File being corrected */
	FILE *	m_outfile;		/* Corrected copy of infile */

	char *	m_askfilename;		/* File specified in -f option */

	int	m_changes;		/* NZ if changes made to cur. file */
	int	m_readonly;		/* NZ if current file is readonly */
	int	m_quit;			/* NZ if we're done with this file */

#define MAXPOSSIBLE	100	/* Max no. of possibilities to generate */

	char	m_possibilities[MAXPOSSIBLE][INPUTWORDLEN + MAXAFFIXLEN];
					/* Table of possible corrections */
	int	m_pcount;		/* Count of possibilities generated */
	int	m_maxposslen;	/* Length of longest possibility */
	int	m_easypossibilities; /* Number of "easy" corrections found */
					/* ..(defined as those using legal affixes) */

	/*
	 * The following array contains a list of characters that should be tried
	 * in "missingletter."  Note that lowercase characters are omitted.
	 */
	int	m_Trynum;		/* Size of "Try" array */
	ichar_t	m_Try[SET_SIZE + MAXSTRINGCHARS];

	TQTextCodec   *m_translate_in; /* Selected translation from/to Unicode */
};

#endif /* ISPELL_CHECKER_H */