1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
|
/***************************************************************************
pseudoDtd.cpp
copyright : (C) 2001-2002 by Daniel Naber
email : [email protected]
***************************************************************************/
/***************************************************************************
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or ( at your option ) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
***************************************************************************/
#include "pseudo_dtd.h"
#include <assert.h>
#include <qdom.h>
#include <qregexp.h>
#include <klocale.h>
#include <kmessagebox.h>
PseudoDTD::PseudoDTD()
{
// "SGML support" only means case-insensivity, because HTML is case-insensitive up to version 4:
m_sgmlSupport = true; // TODO: make this an run-time option ( maybe automatically set )
}
PseudoDTD::~PseudoDTD()
{
}
void PseudoDTD::analyzeDTD( QString &metaDtdUrl, QString &metaDtd )
{
QDomDocument doc( "dtdIn_xml" );
if ( ! doc.setContent( metaDtd) )
{
KMessageBox::error(0, i18n("The file '%1' could not be parsed. "
"Please check that the file is well-formed XML.").arg( metaDtdUrl ),
i18n( "XML Plugin Error") );
return;
}
if ( doc.doctype().name() != "dtd" )
{
KMessageBox::error(0, i18n("The file '%1' is not in the expected format. "
"Please check that the file is of this type:\n"
"-//Norman Walsh//DTD DTDParse V2.0//EN\n"
"You can produce such files with dtdparse. "
"See the Kate Plugin documentation for more information.").arg( metaDtdUrl ),
i18n("XML Plugin Error") );
return;
}
uint listLength = 0;
listLength += doc.elementsByTagName( "entity" ).count();
listLength += doc.elementsByTagName( "element" ).count();
// count this twice, as it will be iterated twice ( TODO: optimize that? ):
listLength += doc.elementsByTagName( "attlist" ).count() * 2;
QProgressDialog progress( i18n("Analyzing meta DTD..."), i18n("Cancel"), listLength,
0, "progress", TRUE );
progress.setMinimumDuration( 400 );
progress.setProgress(0);
// Get information from meta DTD and put it in Qt data structures for fast access:
if( ! parseEntities( &doc, &progress ) )
return;
if( ! parseElements( &doc, &progress ) )
return;
if( ! parseAttributes( &doc, &progress ) )
return;
if( ! parseAttributeValues( &doc, &progress ) )
return;
progress.setProgress( listLength ); // just to make sure the dialog disappears
}
// ========================================================================
// DOM stuff:
/**
* Iterate through the XML to get a mapping which sub-elements are allowed for
* all elements.
*/
bool PseudoDTD::parseElements( QDomDocument *doc, QProgressDialog *progress )
{
m_elementsList.clear();
// We only display a list, i.e. we pretend that the content model is just
// a set, so we use a map. This is necessay e.g. for xhtml 1.0's head element,
// which would otherwise display some elements twice.
QMap<QString,bool> subelementList; // the bool is not used
QDomNodeList list = doc->elementsByTagName( "element" );
uint listLength = list.count(); // speedup (really! )
for( uint i = 0; i < listLength; i++ )
{
if( progress->wasCancelled() )
return false;
progress->setProgress( progress->progress()+1 );
// FIXME!:
//qApp->processEvents();
subelementList.clear();
QDomNode node = list.item( i );
QDomElement elem = node.toElement();
if( !elem.isNull() )
{
// Enter the expanded content model, which may also include stuff not allowed.
// We do not care if it's a <sequence-group> or whatever.
QDomNodeList contentModelList = elem.elementsByTagName( "content-model-expanded" );
QDomNode contentModelNode = contentModelList.item(0);
QDomElement contentModelElem = contentModelNode.toElement();
if( ! contentModelElem.isNull() )
{
// check for <pcdata/>:
QDomNodeList pcdataList = contentModelElem.elementsByTagName( "pcdata" );
// check for other sub elements:
QDomNodeList subList = contentModelElem.elementsByTagName( "element-name" );
uint subListLength = subList.count();
for( uint l = 0; l < subListLength; l++ )
{
QDomNode subNode = subList.item(l);
QDomElement subElem = subNode.toElement();
if( !subElem.isNull() )
subelementList[subElem.attribute( "name" )] = true;
}
// anders: check if this is an EMPTY element, and put "__EMPTY" in the
// sub list, so that we can insert tags in empty form if required.
QDomNodeList emptyList = elem.elementsByTagName( "empty" );
if ( emptyList.count() )
subelementList["__EMPTY"] = true;
}
// Now remove the elements not allowed (e.g. <a> is explicitely not allowed in <a>
// in the HTML 4.01 Strict DTD):
QDomNodeList exclusionsList = elem.elementsByTagName( "exclusions" );
if( exclusionsList.length() > 0 )
{ // sometimes there are no exclusions ( e.g. in XML DTDs there are never exclusions )
QDomNode exclusionsNode = exclusionsList.item(0);
QDomElement exclusionsElem = exclusionsNode.toElement();
if( ! exclusionsElem.isNull() )
{
QDomNodeList subList = exclusionsElem.elementsByTagName( "element-name" );
uint subListLength = subList.count();
for( uint l = 0; l < subListLength; l++ )
{
QDomNode subNode = subList.item(l);
QDomElement subElem = subNode.toElement();
if( !subElem.isNull() )
{
QMap<QString,bool>::Iterator it = subelementList.find( subElem.attribute( "name" ) );
if( it != subelementList.end() )
subelementList.remove(it);
}
}
}
}
// turn the map into a list:
QStringList subelementListTmp;
QMap<QString,bool>::Iterator it;
for( it = subelementList.begin(); it != subelementList.end(); ++it )
subelementListTmp.append( it.key() );
m_elementsList.insert( elem.attribute( "name" ), subelementListTmp );
}
} // end iteration over all <element> nodes
return true;
}
/**
* Check which elements are allowed inside a parent element. This returns
* a list of allowed elements, but it doesn't care about order or if only a certain
* number of occurences is allowed.
*/
QStringList PseudoDTD::allowedElements( QString parentElement )
{
if( m_sgmlSupport )
{
// find the matching element, ignoring case:
QMap<QString,QStringList>::Iterator it;
for( it = m_elementsList.begin(); it != m_elementsList.end(); ++it )
{
if( it.key().lower() == parentElement.lower() )
return it.data();
}
}
else if( m_elementsList.contains(parentElement) )
return m_elementsList[parentElement];
return QStringList();
}
/**
* Iterate through the XML to get a mapping which attributes are allowed inside
* all elements.
*/
bool PseudoDTD::parseAttributes( QDomDocument *doc, QProgressDialog *progress )
{
m_attributesList.clear();
// QStringList allowedAttributes;
QDomNodeList list = doc->elementsByTagName( "attlist" );
uint listLength = list.count();
for( uint i = 0; i < listLength; i++ )
{
if( progress->wasCancelled() )
return false;
progress->setProgress( progress->progress()+1 );
// FIXME!!
//qApp->processEvents();
ElementAttributes attrs;
QDomNode node = list.item(i);
QDomElement elem = node.toElement();
if( !elem.isNull() )
{
QDomNodeList attributeList = elem.elementsByTagName( "attribute" );
uint attributeListLength = attributeList.count();
for( uint l = 0; l < attributeListLength; l++ )
{
QDomNode attributeNode = attributeList.item(l);
QDomElement attributeElem = attributeNode.toElement();
if( ! attributeElem.isNull() )
{
if ( attributeElem.attribute("type") == "#REQUIRED" )
attrs.requiredAttributes.append( attributeElem.attribute("name") );
else
attrs.optionalAttributes.append( attributeElem.attribute("name") );
}
}
m_attributesList.insert( elem.attribute("name"), attrs );
}
}
return true;
}
/** Check which attributes are allowed for an element.
*/
QStringList PseudoDTD::allowedAttributes( QString element )
{
if( m_sgmlSupport )
{
// find the matching element, ignoring case:
QMap<QString,ElementAttributes>::Iterator it;
for( it = m_attributesList.begin(); it != m_attributesList.end(); ++it ) {
if( it.key().lower() == element.lower() ) {
return it.data().optionalAttributes + it.data().requiredAttributes;
}
}
}
else if( m_attributesList.contains(element) )
return m_attributesList[element].optionalAttributes + m_attributesList[element].requiredAttributes;
return QStringList();
}
QStringList PseudoDTD::requiredAttributes( const QString &element ) const
{
if ( m_sgmlSupport )
{
QMap<QString,ElementAttributes>::ConstIterator it;
for( it = m_attributesList.begin(); it != m_attributesList.end(); ++it )
{
if( it.key().lower() == element.lower() )
return it.data().requiredAttributes;
}
}
else if( m_attributesList.contains(element) )
return m_attributesList[element].requiredAttributes;
return QStringList();
}
/**
* Iterate through the XML to get a mapping which attribute values are allowed
* for all attributes inside all elements.
*/
bool PseudoDTD::parseAttributeValues( QDomDocument *doc, QProgressDialog *progress )
{
m_attributevaluesList.clear(); // 1 element : n possible attributes
QMap<QString,QStringList> attributevaluesTmp; // 1 attribute : n possible values
QDomNodeList list = doc->elementsByTagName( "attlist" );
uint listLength = list.count();
for( uint i = 0; i < listLength; i++ )
{
if( progress->wasCancelled() )
return false;
progress->setProgress( progress->progress()+1 );
// FIXME!
//qApp->processEvents();
attributevaluesTmp.clear();
QDomNode node = list.item(i);
QDomElement elem = node.toElement();
if( !elem.isNull() )
{
// Enter the list of <attribute>:
QDomNodeList attributeList = elem.elementsByTagName( "attribute" );
uint attributeListLength = attributeList.count();
for( uint l = 0; l < attributeListLength; l++ )
{
QDomNode attributeNode = attributeList.item(l);
QDomElement attributeElem = attributeNode.toElement();
if( ! attributeElem.isNull() )
{
QString value = attributeElem.attribute( "value" );
attributevaluesTmp.insert( attributeElem.attribute("name"), QStringList::split(QRegExp(" "), value) );
}
}
m_attributevaluesList.insert( elem.attribute("name"), attributevaluesTmp );
}
}
return true;
}
/**
* Check which attributes values are allowed for an attribute in an element
* (the element is necessary because e.g. "href" inside <a> could be different
* to an "href" inside <link>):
*/
QStringList PseudoDTD::attributeValues( QString element, QString attribute )
{
// Direct access would be faster than iteration of course but not always correct,
// because we need to be case-insensitive.
if( m_sgmlSupport ) {
// first find the matching element, ignoring case:
QMap< QString,QMap<QString,QStringList> >::Iterator it;
for( it = m_attributevaluesList.begin(); it != m_attributevaluesList.end(); ++it )
{
if( it.key().lower() == element.lower() )
{
QMap<QString,QStringList> attrVals = it.data();
QMap<QString,QStringList>::Iterator itV;
// then find the matching attribute for that element, ignoring case:
for( itV = attrVals.begin(); itV != attrVals.end(); ++itV )
{
if( itV.key().lower() == attribute.lower() )
return( itV.data() );
}
}
}
}
else if( m_attributevaluesList.contains(element) )
{
QMap<QString,QStringList> attrVals = m_attributevaluesList[element];
if( attrVals.contains(attribute) )
return attrVals[attribute];
}
// no predefined values available:
return QStringList();
}
/**
* Iterate through the XML to get a mapping of all entity names and their expanded
* version, e.g. nbsp =>  . Parameter entities are ignored.
*/
bool PseudoDTD::parseEntities( QDomDocument *doc, QProgressDialog *progress )
{
m_entityList.clear();
QDomNodeList list = doc->elementsByTagName( "entity" );
uint listLength = list.count();
for( uint i = 0; i < listLength; i++ )
{
if( progress->wasCancelled() )
return false;
progress->setProgress( progress->progress()+1 );
//FIXME!!
//qApp->processEvents();
QDomNode node = list.item(i);
QDomElement elem = node.toElement();
if( !elem.isNull()
&& elem.attribute( "type" ) != "param" )
{ // TODO: what's cdata <-> gen ?
QDomNodeList expandedList = elem.elementsByTagName( "text-expanded" );
QDomNode expandedNode = expandedList.item(0);
QDomElement expandedElem = expandedNode.toElement();
if( ! expandedElem.isNull() )
{
QString exp = expandedElem.text();
// TODO: support more than one &#...; in the expanded text
/* TODO include do this when the unicode font problem is solved:
if( exp.contains(QRegExp("^&#x[a-zA-Z0-9]+;$")) ) {
// hexadecimal numbers, e.g. "ȶ"
uint end = exp.find( ";" );
exp = exp.mid( 3, end-3 );
exp = QChar();
} else if( exp.contains(QRegExp("^&#[0-9]+;$")) ) {
// decimal numbers, e.g. "ì"
uint end = exp.find( ";" );
exp = exp.mid( 2, end-2 );
exp = QChar( exp.toInt() );
}
*/
m_entityList.insert( elem.attribute("name"), exp );
}
else
{
m_entityList.insert( elem.attribute("name"), QString() );
}
}
}
return true;
}
/**
* Get a list of all ( non-parameter ) entities that start with a certain string.
*/
QStringList PseudoDTD::entities( QString start )
{
QStringList entities;
QMap<QString,QString>::Iterator it;
for( it = m_entityList.begin(); it != m_entityList.end(); ++it ) {
if( (*it).startsWith(start) )
{
QString str = it.key();
/* TODO: show entities as unicode character
if( !it.data().isEmpty() ) {
//str += " -- " + it.data();
QRegExp re( "&#(\\d+);" );
if( re.search(it.data()) != -1 ) {
uint ch = re.cap( 1).toUInt();
str += " -- " + QChar( ch).decomposition();
}
//kdDebug() << "#" << it.data() << endl;
}
*/
entities.append( str );
// TODO: later use a table view
}
}
return entities;
}
// kate: space-indent on; indent-width 2; replace-tabs on; mixed-indent off;
|