summaryrefslogtreecommitdiffstats
path: root/kexi/plugins/importexport/csv/kexicsvimportdialog.h
blob: 2e8648608e1563b8f79f347ff6e732013acc7c53 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
/* This file is part of the KDE project
   Copyright (C) 2005-2006 Jaroslaw Staniek <[email protected]>

   This work is based on kspread/dialogs/kspread_dlg_csv.cpp
   and will be merged back with KOffice libraries.

   Copyright (C) 2002-2003 Norbert Andres <[email protected]>
   Copyright (C) 2002-2003 Ariya Hidayat <[email protected]>
   Copyright (C) 2002 Laurent Montel <[email protected]>
   Copyright (C) 1999 David Faure <[email protected]>

   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Library General Public
   License as published by the Free Software Foundation; either
   version 2 of the License, or (at your option) any later version.

   This library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Library General Public License for more details.

   You should have received a copy of the GNU Library General Public License
   along with this library; see the file COPYING.LIB.  If not, write to
   the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 * Boston, MA 02110-1301, USA.
*/

#ifndef KEXI_CSVDIALOG_H
#define KEXI_CSVDIALOG_H

#include <tqvaluevector.h>
#include <tqvaluelist.h>
#include <tqptrvector.h>
#include <tqregexp.h>
#include <tqbitarray.h>

#include <kdialogbase.h>

#include <kexiutils/tristate.h>
#include <kexidb/connection.h>

#include "kexicsvimportoptionsdlg.h"

class TQVBoxLayout;
class TQHBoxLayout;
class TQGridLayout;
class TQButtonGroup;
class TQCheckBox;
class TQLabel;
class TQLineEdit;
class TQPushButton;
class TQRadioButton;
class TQTable;
class TQFile;
class KComboBox;
class KIntSpinBox;
class KProgressDialog;

class KexiMainWindow;
class KexiCSVDelimiterWidget;
class KexiCSVTextQuoteComboBox;
class KexiCSVInfoLabel;

/**
 * @short Kexi CSV import dialog
 *
 * This is temporary solution for Kexi CSV import,
 * based on kspread/dialogs/kspread_dlg_csv.h, cc.
 *
 * Provides dialog for managing CSV (comma separated value) data.
 *
 * Currently KexiCSVImportDialog is used for converting text into columns,
 * inserting text file and pasting text from clipboard, where conversion
 * from CSV (comma separated value) data is is all required. 
 * The different purposed mentioned above is determined
 * using mode, which can be Column, File, or Clipboard respectively.
*/
class KexiCSVImportDialog : public KDialogBase
{
	TQ_OBJECT
  

	public:
		enum Mode { Clipboard, File /*, Column*/ };
		enum Header { TEXT, NUMBER, DATE, CURRENCY };

		//! @todo what about making it kexidb-independent?
		KexiCSVImportDialog( Mode mode, KexiMainWindow* mainWin, TQWidget * parent, 
		  const char * name = 0/*, TQRect const & rect*/);

		virtual ~KexiCSVImportDialog();

		bool cancelled() const;
		virtual bool eventFilter ( TQObject * watched, TQEvent * e );

	protected:
		bool openData();
		virtual void accept();

	private:
		TQGridLayout* MyDialogLayout;
		TQHBoxLayout* Layout1;
		TQTable* m_table;
		KexiCSVDelimiterWidget* m_delimiterWidget;
		bool m_detectDelimiter; //!< true if delimiter should be detected 
		                        //!< (true by default, set to false if user sets delimiter)
		TQString m_formatComboText;
		TQLabel* m_formatLabel;
		KComboBox* m_formatCombo;
		KIntSpinBox *m_startAtLineSpinBox;
		KexiCSVTextQuoteComboBox* m_comboQuote;
		TQLabel* m_startAtLineLabel;
		TQLabel* TextLabel2;
		TQCheckBox* m_ignoreDuplicates;
		TQCheckBox* m_1stRowForFieldNames;
		TQCheckBox* m_primaryKeyField;

		KexiMainWindow* m_mainWin;

		void detectTypeAndUniqueness(int row, int col, const TQString& text);
		void setText(int row, int col, const TQString& text, bool inGUI);

		/*! Parses date from \a text and stores into \a date. 
		 m_dateRegExp is used for clever detection; 
		 if '/' separated is found, it's assumed the format is american mm/dd/yyyy.
		 This function supports omitted zeros, so 1/2/2006 is parsed properly too.
		 \return true on success. */
		bool parseDate(const TQString& text, TQDate& date);

		/*! Parses time from \a text and stores into \a date. 
		 m_timeRegExp1 and m_timeRegExp2 are used for clever detection; 
		 both hh:mm:ss and hh:mm are supported.
		 This function supports omitted zeros, so 1:2:3 is parsed properly too.
		 \return true on success. */
		bool parseTime(const TQString& text, TQTime& time);

		/*! Called after the first fillTable() when number of rows is unknown. */
		void adjustRows(int iRows);

		int  getHeader(int col);
		TQString getText(int row, int col);
		void updateColumnText(int col);
		void updateRowCountInfo();
		tristate loadRows(TQString &field, int &row, int &columnm, int &maxColumn, bool inGUI);

		/*! Detects delimiter by looking at first 4K bytes of the data. Used by loadRows().
		The used algorithm:
		1. Look byte by byte and locate special characters that can be delimiters.
		  Special fact is taken into account: if there are '"' quotes used for text values,
		  delimiters that follow directly the closing quote has higher priority than the one
		  that follows other character. We do not assume that every text value is quoted.
		  Summing up, there is following hierarchy (from highest to lowest): 
		  quote+tab, quote+semicolon, quote+comma, tab, semicolon, comma.
		  Space characters are skipped. Text inside quotes is skipped, as well as double 
		  (escaped) quotes.
		2. While scanning the data, for every row following number of tabs, semicolons and commas
		  (only these outside of the quotes) are computed. On every line the values are appended
		  to a separate list (TQValueList<int>).
		3. After scanning, all the values are checked on the TQValueList<int> of tabs. 
		  If the list has more one element (so there was more than one row) and all the values 
		  (numbers of tabs) are equal, it's very probable the tab is a delimiter. 
		  So, this character is returned as a delimiter.
		  3a. The same algorithm as in 3. is performed for semicolon character.
		  3b. The same algorithm as in 3. is performed for comma character.
		4. If the step 3. did not return a delimiter, a character found in step 1. with 
		  the highest priority is retured as delimiter. */
		TQString detectDelimiterByLookingAtFirstBytesOfFile(TQTextStream& inputStream);

		/*! Callback, called whenever row is loaded in loadRows(). When inGUI is true, 
		nothing is performed, else database buffer is written back to the database. */
		bool saveRow(bool inGUI);

		bool m_cancelled;
		bool m_adjustRows;
		int m_startline;
		TQChar m_textquote;
		TQString m_clipboardData;
		TQByteArray m_fileArray;
		Mode m_mode;
		int m_prevSelectedCol;

		//! vector of detected types, 0==text (the default), 1==number, 2==date
//! @todo more types
		TQValueVector<int> m_detectedTypes;

		//! m_detectedUniqueColumns[i]==true means that i-th column has unique values
		//! (only for numeric type)
		TQPtrVector< TQValueList<int> > m_uniquenessTest;

		TQRegExp m_dateRegExp, m_timeRegExp1, m_timeRegExp2, m_fpNumberRegExp;
		TQValueVector<TQString> m_typeNames, m_columnNames;
		TQBitArray m_changedColumnNames;
		bool m_columnsAdjusted : 1; //!< to call adjustColumn() only once
		bool m_1stRowForFieldNamesDetected : 1; //!< used to force rerun fillTable() after 1st row
		bool m_firstFillTableCall : 1; //!< used to know whether it's 1st fillTable() call
		bool m_blockUserEvents : 1;
		int m_primaryKeyColumn; //!< index of column with PK assigned (-1 if none)
		int m_maximumRowsForPreview;
		int m_maximumBytesForPreview;
		TQPixmap m_pkIcon;
		TQString m_fname;
		TQFile* m_file;
		TQTextStream *m_inputStream; //!< used in loadData()
		KexiCSVImportOptions m_options;
		KProgressDialog *m_loadingProgressDlg, *m_importingProgressDlg;
		bool m_dialogCancelled;
		KexiCSVInfoLabel *m_infoLbl;
		KexiDB::Connection *m_conn; //!< (temp) database connection used for importing
		KexiDB::TableSchema *m_destinationTableSchema;  //!< (temp) dest. table schema used for importing
		KexiDB::PreparedStatement::Ptr m_importingStatement;
		TQValueList<TQVariant> m_dbRowBuffer; //!< (temp) used for importing
		bool m_implicitPrimaryKeyAdded; //!< (temp) used for importing
		bool m_allRowsLoadedInPreview; //!< we need to know whether all rows were loaded or it's just a partial data preview
		bool m_stoppedAt_MAX_BYTES_TO_PREVIEW; //!< used to compute m_allRowsLoadedInPreview

	private slots:
		void fillTable();
		void fillTableLater();
		void initLater();
		void formatChanged(int id);
		void delimiterChanged(const TQString& delimiter);
		void startlineSelected(int line);
		void textquoteSelected(int);
		void currentCellChanged(int, int col);
		void ignoreDuplicatesChanged(int);
		void slot1stRowForFieldNamesChanged(int);
		void cellValueChanged(int row,int col);
		void optionsButtonClicked();
		void slotPrimaryKeyFieldToggled(bool on);
};

#endif