summaryrefslogtreecommitdiffstats
path: root/src/translators/dcimporter.cpp
blob: 583925548dbffe99ad9659602e37458b75b80a39 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
/***************************************************************************
    copyright            : (C) 2006 by Robby Stephenson
    email                : [email protected]
 ***************************************************************************/

/***************************************************************************
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of version 2 of the GNU General Public License as  *
 *   published by the Free Software Foundation;                            *
 *                                                                         *
 ***************************************************************************/

#include "dcimporter.h"
#include "../collections/bookcollection.h"
#include "tellico_xml.h"
#include "../tellico_debug.h"

using Tellico::Import::DCImporter;

DCImporter::DCImporter(const KURL& url_) : XMLImporter(url_) {
}

DCImporter::DCImporter(const TQString& text_) : XMLImporter(text_) {
}

DCImporter::DCImporter(const TQDomDocument& dom_) : XMLImporter(dom_) {
}

Tellico::Data::CollPtr DCImporter::collection() {
  const TQString& dc = XML::nsDublinCore;
  const TQString& zing = XML::nsZing;

  Data::CollPtr c = new Data::BookCollection(true);

  TQDomDocument doc = domDocument();

  TQRegExp authorDateRX(TQString::tqfromLatin1(",?(\\s+\\d{4}-?(?:\\d{4})?\\.?)(.*)$"));
  TQRegExp dateRX(TQString::tqfromLatin1("\\d{4}"));

  TQDomNodeList recordList = doc.elementsByTagNameNS(zing, TQString::tqfromLatin1("recordData"));
  myDebug() << "DCImporter::collection() - number of records: " << recordList.count() << endl;

  enum { UnknownNS, UseNS, NoNS } useNS = UnknownNS;

#define GETELEMENTS(s) (useNS == NoNS) \
                         ? elem.elementsByTagName(TQString::tqfromLatin1(s)) \
                         : elem.elementsByTagNameNS(dc, TQString::tqfromLatin1(s))

  for(uint i = 0; i < recordList.count(); ++i) {
    Data::EntryPtr e = new Data::Entry(c);

    TQDomElement elem = recordList.item(i).toElement();

    TQDomNodeList nodeList = GETELEMENTS("title");
    if(nodeList.count() == 0) { // no title, skip
      if(useNS == UnknownNS) {
        nodeList = elem.elementsByTagName(TQString::tqfromLatin1("title"));
        if(nodeList.count() > 0) {
          useNS = NoNS;
        } else {
          myDebug() << "DCImporter::collection() - no title, skipping" << endl;
          continue;
        }
      } else {
        myDebug() << "DCImporter::collection() - no title, skipping" << endl;
        continue;
      }
    } else if(useNS == UnknownNS) {
      useNS = UseNS;
    }
    TQString s = nodeList.item(0).toElement().text();
    s.replace('\n', ' ');
    s = s.simplifyWhiteSpace();
    e->setField(TQString::tqfromLatin1("title"), s);

    nodeList = GETELEMENTS("creator");
    TQStringList creators;
    for(uint j = 0; j < nodeList.count(); ++j) {
      TQString s = nodeList.item(j).toElement().text();
      if(authorDateRX.search(s) > -1) {
      // check if anything after date like [publisher]
        if(authorDateRX.cap(2).stripWhiteSpace().isEmpty()) {
          s.remove(authorDateRX);
          s = s.simplifyWhiteSpace();
          creators << s;
        } else {
          myDebug() << "DCImporter::collection() - weird creator, skipping: " << s << endl;
        }
      } else {
        creators << s;
      }
    }
    e->setField(TQString::tqfromLatin1("author"), creators.join(TQString::tqfromLatin1("; ")));

    nodeList = GETELEMENTS("publisher");
    TQStringList publishers;
    for(uint j = 0; j < nodeList.count(); ++j) {
      publishers << nodeList.item(j).toElement().text();
    }
    e->setField(TQString::tqfromLatin1("publisher"), publishers.join(TQString::tqfromLatin1("; ")));

    nodeList = GETELEMENTS("subject");
    TQStringList keywords;
    for(uint j = 0; j < nodeList.count(); ++j) {
      keywords << nodeList.item(j).toElement().text();
    }
    e->setField(TQString::tqfromLatin1("keyword"), keywords.join(TQString::tqfromLatin1("; ")));

    nodeList = GETELEMENTS("date");
    if(nodeList.count() > 0) {
      TQString s = nodeList.item(0).toElement().text();
      if(dateRX.search(s) > -1) {
        e->setField(TQString::tqfromLatin1("pub_year"), dateRX.cap());
      }
    }

    nodeList = GETELEMENTS("description");
    if(nodeList.count() > 0) { // no title, skip
      e->setField(TQString::tqfromLatin1("comments"), nodeList.item(0).toElement().text());
    }

    c->addEntries(e);
  }
#undef GETELEMENTS

  return c;
}