1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
|
/***************************************************************************
copyright : (C) 2006 by Robby Stephenson
email : [email protected]
***************************************************************************/
/***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of version 2 of the GNU General Public License as *
* published by the Free Software Foundation; *
* *
***************************************************************************/
#include "dcimporter.h"
#include "../collections/bookcollection.h"
#include "tellico_xml.h"
#include "../tellico_debug.h"
using Tellico::Import::DCImporter;
DCImporter::DCImporter(const KURL& url_) : XMLImporter(url_) {
}
DCImporter::DCImporter(const TQString& text_) : XMLImporter(text_) {
}
DCImporter::DCImporter(const TQDomDocument& dom_) : XMLImporter(dom_) {
}
Tellico::Data::CollPtr DCImporter::collection() {
const TQString& dc = XML::nsDublinCore;
const TQString& zing = XML::nsZing;
Data::CollPtr c = new Data::BookCollection(true);
TQDomDocument doc = domDocument();
TQRegExp authorDateRX(TQString::tqfromLatin1(",?(\\s+\\d{4}-?(?:\\d{4})?\\.?)(.*)$"));
TQRegExp dateRX(TQString::tqfromLatin1("\\d{4}"));
TQDomNodeList recordList = doc.elementsByTagNameNS(zing, TQString::tqfromLatin1("recordData"));
myDebug() << "DCImporter::collection() - number of records: " << recordList.count() << endl;
enum { UnknownNS, UseNS, NoNS } useNS = UnknownNS;
#define GETELEMENTS(s) (useNS == NoNS) \
? elem.elementsByTagName(TQString::tqfromLatin1(s)) \
: elem.elementsByTagNameNS(dc, TQString::tqfromLatin1(s))
for(uint i = 0; i < recordList.count(); ++i) {
Data::EntryPtr e = new Data::Entry(c);
TQDomElement elem = recordList.item(i).toElement();
TQDomNodeList nodeList = GETELEMENTS("title");
if(nodeList.count() == 0) { // no title, skip
if(useNS == UnknownNS) {
nodeList = elem.elementsByTagName(TQString::tqfromLatin1("title"));
if(nodeList.count() > 0) {
useNS = NoNS;
} else {
myDebug() << "DCImporter::collection() - no title, skipping" << endl;
continue;
}
} else {
myDebug() << "DCImporter::collection() - no title, skipping" << endl;
continue;
}
} else if(useNS == UnknownNS) {
useNS = UseNS;
}
TQString s = nodeList.item(0).toElement().text();
s.replace('\n', ' ');
s = s.simplifyWhiteSpace();
e->setField(TQString::tqfromLatin1("title"), s);
nodeList = GETELEMENTS("creator");
TQStringList creators;
for(uint j = 0; j < nodeList.count(); ++j) {
TQString s = nodeList.item(j).toElement().text();
if(authorDateRX.search(s) > -1) {
// check if anything after date like [publisher]
if(authorDateRX.cap(2).stripWhiteSpace().isEmpty()) {
s.remove(authorDateRX);
s = s.simplifyWhiteSpace();
creators << s;
} else {
myDebug() << "DCImporter::collection() - weird creator, skipping: " << s << endl;
}
} else {
creators << s;
}
}
e->setField(TQString::tqfromLatin1("author"), creators.join(TQString::tqfromLatin1("; ")));
nodeList = GETELEMENTS("publisher");
TQStringList publishers;
for(uint j = 0; j < nodeList.count(); ++j) {
publishers << nodeList.item(j).toElement().text();
}
e->setField(TQString::tqfromLatin1("publisher"), publishers.join(TQString::tqfromLatin1("; ")));
nodeList = GETELEMENTS("subject");
TQStringList keywords;
for(uint j = 0; j < nodeList.count(); ++j) {
keywords << nodeList.item(j).toElement().text();
}
e->setField(TQString::tqfromLatin1("keyword"), keywords.join(TQString::tqfromLatin1("; ")));
nodeList = GETELEMENTS("date");
if(nodeList.count() > 0) {
TQString s = nodeList.item(0).toElement().text();
if(dateRX.search(s) > -1) {
e->setField(TQString::tqfromLatin1("pub_year"), dateRX.cap());
}
}
nodeList = GETELEMENTS("description");
if(nodeList.count() > 0) { // no title, skip
e->setField(TQString::tqfromLatin1("comments"), nodeList.item(0).toElement().text());
}
c->addEntries(e);
}
#undef GETELEMENTS
return c;
}
|