summaryrefslogtreecommitdiffstats
path: root/kooka/ksaneocr.h
blob: 1a94a71888050e8368a71e094de5dfd4b118a838 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
/***************************************************************************
                          ksaneocr.h  - ocr-engine class
                             -------------------
    begin                : Fri Jun 30 2000
    copyright            : (C) 2000 by Klaas Freitag
    email                : [email protected]
 ***************************************************************************/

/***************************************************************************
 *                                                                         *
 *  This file may be distributed and/or modified under the terms of the    *
 *  GNU General Public License version 2 as published by the Free Software *
 *  Foundation and appearing in the file COPYING included in the           *
 *  packaging of this file.                                                *
 *
 *  As a special exception, permission is given to link this program       *
 *  with any version of the KADMOS ocr/icr engine of reRecognition GmbH,   *
 *  Kreuzlingen and distribute the resulting executable without            *
 *  including the source code for KADMOS in the source distribution.       *
 *
 *  As a special exception, permission is given to link this program       *
 *  with any edition of TQt, and distribute the resulting executable,       *
 *  without including the source code for TQt in the source distribution.   *
 *                                                                         *
 ***************************************************************************/

#ifndef KSANEOCR_H
#define KSANEOCR_H
#include <tqwidget.h>
#include <tqobject.h>

#include "ocrword.h"

#define CFG_OCR_ENGINE    "ocrEngine"
#define CFG_OCR_CLEANUP   "unlinkORF"  /* delete orf file? */

#define CFG_OCR_KSPELL    "ocrSpellSettings"
#define CFG_WANT_KSPELL   "ocrKSpellEnabled"
#define CFG_KS_NOROOTAFFIX  "KSpell_NoRootAffix"
#define CFG_KS_RUNTOGETHER  "KSpell_RunTogether"
#define CFG_KS_DICTIONARY   "KSpell_Dictionary"
#define CFG_KS_DICTFROMLIST "KSpell_DictFromList"
#define CFG_KS_ENCODING     "KSpell_Encoding"
#define CFG_KS_CLIENT       "KSpell_Client"


#define HIDE_BASE_DIALOG "hideOCRDialogWhileSpellCheck"
/**
  *@author Klaas Freitag
  */

class KOCRBase;
class KookaImage;
class KTempFile;
class KProcess;
class TQRect;
class TQPixmap;
class TQStringList;
class KSpell;
class KSpellConfig;
class ImageCanvas;
class KConfig;
// class ocrWord;
// class ocrPage;

#ifdef HAVE_KADMOS
#include "kadmosocr.h"
#endif

/*
 * Error Classifier the report errors on bad engine setup
 */
typedef enum{ ENG_ERROR, ENG_OK, ENG_DATA_MISSING, ENG_BAD_SETUP } EngineError;

class KSANEOCR : public TQObject
{
    Q_OBJECT
  
public:
    enum OCREngines{ GOCR, OCRAD, KADMOS };

    KSANEOCR( TQWidget*, KConfig *);
    ~KSANEOCR();

    bool startOCRVisible( TQWidget* parent=0);

    void finishedOCRVisible( bool );

    /**
     * checks after a ocr run if the line number exists in the result
     */
    bool lineValid( int line );

#ifdef HAVE_KADMOS
    bool startKadmosOCR();
#endif

    /**
     * return the final ocr result
     */

    TQString ocrResultText();

    /**
     * @return the current spell config.
     */
    KSpellConfig* ocrSpellConfig() const
        { return m_spellInitialConfig; }


    /**
     * Sets an image Canvas that displays the result image of ocr. If this
     * is set to zero (or never set) no result image is displayed.
     * The ocr fabric passes a new image to the canvas which is a copy of
     * the image to ocr.
     */
    void setImageCanvas( ImageCanvas* canvas );

signals:
    void newOCRResultText( const TQString& );
    void clearOCRResultText();
    void newOCRResultPixmap( const TQPixmap& );

    /**
     * progress of the ocr process. The first integer is the main progress,
     * the second the sub progress. If there is only on progress, it is the
     * first parameter, the second is always -1 than.
     * Both have a range from 0..100.
     * Note that this signal may not be emitted if the engine does not support
     * progress.
     */
    void ocrProgress(int, int);

    /**
     * select a word in the editor in line line.
     */
     void selectWord( int line, const ocrWord& word );

    /**
     * signal to indicate that a ocr text must be updated due to better results
     * retrieved from spell check. The internal ocr data structure is already
     * updated when this signal is fired.
     *
     * @param line      the line in which the word must be changed (start at 0)
     * @param wordFrom  the original word
     * @param wordTo    the new word(s).
     */
    void updateWord( int line, const TQString& wordFrom, const TQString& wordTo );

    /**
     * signal to indicate that word word was ignored by the user. This should result
     * in a special coloring in the editor.
     */
    void ignoreWord( int, const ocrWord& );

    /**
     * signal that comes if a word is considered to be wrong in the editor.
     * The word should be marked in any way, e.g. with a signal color.
     **/
    void markWordWrong( int, const ocrWord& );

    /**
     * signal the tells that the result image was modified.
     */
    void repaintOCRResImage( );

    /**
     * indicates that the text editor holding the text that came through
     * newOCRResultText should be set to readonly or not. Can be connected
     * to TQTextEdit::setReadOnly directly.
     */
    void readOnlyEditor( bool );

public slots:
    void slSetImage( KookaImage* );

    void slLineBox( const TQRect& );

protected:
    /**
     *  Start spell checking on a specific line that is stored in m_ocrCurrLine.
     *  This method starts the spell checking.
     **/
    void startLineSpellCheck();
    ocrWord ocrWordFromKSpellWord( int line, const TQString& word );

    /**
     * Eventhandler to handle the mouse events to the image viewer showing the
     * ocr result image
     */
    bool eventFilter( TQObject *object, TQEvent *event );

    void startOCRAD();
protected slots:
    void slotClose ();
    void slotStopOCR();

    void slSpellReady( KSpell* );
    void slSpellDead( );
    /**
     * a new list of ocr results of the current ocr process arrived and is available
     * in the member m_ocrPage[line]
     */
    // void gotOCRLine( int line );

    void slMisspelling( const TQString& originalword,
                        const TQStringList& suggestions,
                        unsigned int pos );
    void slSpellCorrected( const TQString& originalword,
                           const TQString& newword,
                           unsigned int pos );

    void slSpellIgnoreWord( const TQString& word );

    void slCheckListDone( bool );

    bool  slUpdateWord( int line, int spellWordIndx,
                        const TQString& origWord,
                        const TQString& newWord );

private slots:

    void slotKadmosResult();
    void startOCRProcess( void );
    void gocrStdIn(KProcess*, char* buffer, int buflen);
    void gocrStdErr(KProcess*, char* buffer, int buflen);
    void gocrExited(KProcess*);

    void ocradStdIn(KProcess*, char* buffer, int buflen);
    void ocradStdErr(KProcess*, char* buffer, int buflen);
    void ocradExited(KProcess*);

    /*
     * reads orf files from a file and fills the result structures
     * accordingly.
     */
    bool readORF( const TQString&, TQString& );

private:
    void     cleanUpFiles( void );


    KOCRBase        *m_ocrProcessDia;
    KProcess        *daemon;
    bool             visibleOCRRunning;
    KTempFile       *m_tmpFile;

    KookaImage      *m_img;
    TQString         m_ocrResultText;
    TQString         m_ocrResultImage;
    TQString         m_ocrImagePBM;
    TQString         m_tmpOrfName;
    TQImage          *m_resultImage;

    OCREngines      m_ocrEngine;
    TQPixmap         m_resPixmap;
    TQPixmap         m_storePixmap;

    ImageCanvas     *m_imgCanvas;

    KSpell          *m_spell;
    bool             m_wantKSpell;
    bool             m_kspellVisible;
    bool             m_hideDiaWhileSpellcheck;
    KSpellConfig    *m_spellInitialConfig;

    /* ValueVector of wordLists for every line of ocr results */
    ocrBlock         m_ocrPage; /* one block contains all lines of the page */
    TQWidget          *m_parent;
    /* current processed line to speed kspell correction */
    unsigned         m_ocrCurrLine;
    TQStringList      m_checkStrings;

    int              m_currHighlight;
    bool             m_applyFilter;

    bool             m_unlinkORF;
    rectList         m_blocks;   // dimensions of blocks

    static char UndetectedChar;
#ifdef HAVE_KADMOS
    Kadmos::CRep   m_rep;
#endif
};

#endif