summaryrefslogtreecommitdiffstats
path: root/src/translators/dcimporter.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/translators/dcimporter.cpp')
-rw-r--r--src/translators/dcimporter.cpp128
1 files changed, 128 insertions, 0 deletions
diff --git a/src/translators/dcimporter.cpp b/src/translators/dcimporter.cpp
new file mode 100644
index 0000000..c8bb59f
--- /dev/null
+++ b/src/translators/dcimporter.cpp
@@ -0,0 +1,128 @@
+/***************************************************************************
+ copyright : (C) 2006 by Robby Stephenson
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#include "dcimporter.h"
+#include "../collections/bookcollection.h"
+#include "tellico_xml.h"
+#include "../tellico_debug.h"
+
+using Tellico::Import::DCImporter;
+
+DCImporter::DCImporter(const KURL& url_) : XMLImporter(url_) {
+}
+
+DCImporter::DCImporter(const QString& text_) : XMLImporter(text_) {
+}
+
+DCImporter::DCImporter(const QDomDocument& dom_) : XMLImporter(dom_) {
+}
+
+Tellico::Data::CollPtr DCImporter::collection() {
+ const QString& dc = XML::nsDublinCore;
+ const QString& zing = XML::nsZing;
+
+ Data::CollPtr c = new Data::BookCollection(true);
+
+ QDomDocument doc = domDocument();
+
+ QRegExp authorDateRX(QString::fromLatin1(",?(\\s+\\d{4}-?(?:\\d{4})?\\.?)(.*)$"));
+ QRegExp dateRX(QString::fromLatin1("\\d{4}"));
+
+ QDomNodeList recordList = doc.elementsByTagNameNS(zing, QString::fromLatin1("recordData"));
+ myDebug() << "DCImporter::collection() - number of records: " << recordList.count() << endl;
+
+ enum { UnknownNS, UseNS, NoNS } useNS = UnknownNS;
+
+#define GETELEMENTS(s) (useNS == NoNS) \
+ ? elem.elementsByTagName(QString::fromLatin1(s)) \
+ : elem.elementsByTagNameNS(dc, QString::fromLatin1(s))
+
+ for(uint i = 0; i < recordList.count(); ++i) {
+ Data::EntryPtr e = new Data::Entry(c);
+
+ QDomElement elem = recordList.item(i).toElement();
+
+ QDomNodeList nodeList = GETELEMENTS("title");
+ if(nodeList.count() == 0) { // no title, skip
+ if(useNS == UnknownNS) {
+ nodeList = elem.elementsByTagName(QString::fromLatin1("title"));
+ if(nodeList.count() > 0) {
+ useNS = NoNS;
+ } else {
+ myDebug() << "DCImporter::collection() - no title, skipping" << endl;
+ continue;
+ }
+ } else {
+ myDebug() << "DCImporter::collection() - no title, skipping" << endl;
+ continue;
+ }
+ } else if(useNS == UnknownNS) {
+ useNS = UseNS;
+ }
+ QString s = nodeList.item(0).toElement().text();
+ s.replace('\n', ' ');
+ s = s.simplifyWhiteSpace();
+ e->setField(QString::fromLatin1("title"), s);
+
+ nodeList = GETELEMENTS("creator");
+ QStringList creators;
+ for(uint j = 0; j < nodeList.count(); ++j) {
+ QString s = nodeList.item(j).toElement().text();
+ if(authorDateRX.search(s) > -1) {
+ // check if anything after date like [publisher]
+ if(authorDateRX.cap(2).stripWhiteSpace().isEmpty()) {
+ s.remove(authorDateRX);
+ s = s.simplifyWhiteSpace();
+ creators << s;
+ } else {
+ myDebug() << "DCImporter::collection() - weird creator, skipping: " << s << endl;
+ }
+ } else {
+ creators << s;
+ }
+ }
+ e->setField(QString::fromLatin1("author"), creators.join(QString::fromLatin1("; ")));
+
+ nodeList = GETELEMENTS("publisher");
+ QStringList publishers;
+ for(uint j = 0; j < nodeList.count(); ++j) {
+ publishers << nodeList.item(j).toElement().text();
+ }
+ e->setField(QString::fromLatin1("publisher"), publishers.join(QString::fromLatin1("; ")));
+
+ nodeList = GETELEMENTS("subject");
+ QStringList keywords;
+ for(uint j = 0; j < nodeList.count(); ++j) {
+ keywords << nodeList.item(j).toElement().text();
+ }
+ e->setField(QString::fromLatin1("keyword"), keywords.join(QString::fromLatin1("; ")));
+
+ nodeList = GETELEMENTS("date");
+ if(nodeList.count() > 0) {
+ QString s = nodeList.item(0).toElement().text();
+ if(dateRX.search(s) > -1) {
+ e->setField(QString::fromLatin1("pub_year"), dateRX.cap());
+ }
+ }
+
+ nodeList = GETELEMENTS("description");
+ if(nodeList.count() > 0) { // no title, skip
+ e->setField(QString::fromLatin1("comments"), nodeList.item(0).toElement().text());
+ }
+
+ c->addEntries(e);
+ }
+#undef GETELEMENTS
+
+ return c;
+}