diff options
author | mio <[email protected]> | 2025-03-14 19:50:48 +1000 |
---|---|---|
committer | mio <[email protected]> | 2025-03-14 20:11:20 +1000 |
commit | 81d428dedb2fa9f14ddef3edfa4d68c0d58af528 (patch) | |
tree | 74d9bce5afcd161ab2caa825c96e9a574cb382bc | |
parent | b69050d6e8956d0f38c526b9fca93d76fccffeac (diff) | |
download | tdepim-improvement/akregator-librss-namespaces.tar.gz tdepim-improvement/akregator-librss-namespaces.zip |
akregator-librss: use namespacesimprovement/akregator-librss-namespaces
Check element namespace rather than assuming a prefix, which can result
in incorrect metadata displaying.
Signed-off-by: mio <[email protected]>
-rw-r--r-- | akregator/src/librss/article.cpp | 32 | ||||
-rw-r--r-- | akregator/src/librss/document.cpp | 4 | ||||
-rw-r--r-- | akregator/src/librss/global.h | 15 | ||||
-rw-r--r-- | akregator/src/librss/loader.cpp | 2 | ||||
-rw-r--r-- | akregator/src/librss/tools_p.cpp | 63 | ||||
-rw-r--r-- | akregator/src/librss/tools_p.h | 2 |
6 files changed, 93 insertions, 25 deletions
diff --git a/akregator/src/librss/article.cpp b/akregator/src/librss/article.cpp index 18522fe3e..88d42a7dc 100644 --- a/akregator/src/librss/article.cpp +++ b/akregator/src/librss/article.cpp @@ -92,13 +92,16 @@ Article::Article(const TQDomNode &node, Format format, Version version) : d(new d->link = elemText; } + // prefer content/content:encoded over summary/description for feeds that provide it + if (format == AtomFeed) + { + d->description = extractNode(node, TQString::fromLatin1("content"), false); + } + else + { + d->description = extractElementTextNS(node, ContentNamespace, TQString::fromLatin1("encoded"), false); + } - // prefer content/content:encoded over summary/description for feeds that provide it - TQString tagName=(format==AtomFeed)? TQString::fromLatin1("content"): TQString::fromLatin1("content:encoded"); - - if (!(elemText = extractNode(node, tagName, false)).isNull()) - d->description = elemText; - if (d->description.isEmpty()) { if (!(elemText = extractNode(node, TQString::fromLatin1("body"), false)).isNull()) @@ -130,7 +133,7 @@ Article::Article(const TQDomNode &node, Format format, Version version) : d(new time = KRFCDate::parseDate(elemText); } - if (!(elemText = extractNode(node, TQString::fromLatin1("dc:date"))).isNull()) + if (!(elemText = extractElementTextNS(node, DublinCoreNamespace, TQString::fromLatin1("date"))).isNull()) { time = parseISO8601Date(elemText); } @@ -139,27 +142,22 @@ Article::Article(const TQDomNode &node, Format format, Version version) : d(new if (time != 0) d->pubDate.setTime_t(time); - if (!(elemText = extractNode(node, TQString::fromLatin1("wfw:comment"))).isNull()) { - d->commentsLink = elemText; - } - - if (!(elemText = extractNode(node, TQString::fromLatin1("slash:comments"))).isNull()) { - d->numComments = elemText.toInt(); - } + d->commentsLink = extractElementTextNS(node, CommentAPINamespace, TQString::fromLatin1("comment")); + d->numComments = extractElementTextNS(node, SlashNamespace, TQString::fromLatin1("comments")).toInt(); TQDomElement element = TQDomNode(node).toElement(); // in RSS 1.0, we use <item about> attribute as ID // FIXME: pass format version instead of checking for attribute - if (!element.isNull() && element.hasAttribute(TQString::fromLatin1("rdf:about"))) + if (!element.isNull() && element.hasAttributeNS(RDFNamespace, TQString::fromLatin1("about"))) { - d->guid = element.attribute(TQString::fromLatin1("rdf:about")); // HACK: using ns properly did not work + d->guid = element.attributeNS(RDFNamespace, TQString::fromLatin1("about"), TQString::null); d->guidIsPermaLink = false; } else { - tagName=(format==AtomFeed)? TQString::fromLatin1("id"): TQString::fromLatin1("guid"); + TQString tagName=(format==AtomFeed)? TQString::fromLatin1("id"): TQString::fromLatin1("guid"); TQDomNode n = node.namedItem(tagName); if (!n.isNull()) { diff --git a/akregator/src/librss/document.cpp b/akregator/src/librss/document.cpp index 3bc64d000..7d94a252a 100644 --- a/akregator/src/librss/document.cpp +++ b/akregator/src/librss/document.cpp @@ -224,7 +224,7 @@ Document::Document(const TQDomDocument &doc) : d(new Private) d->copyright = elemText; if (d->format == AtomFeed) - elemText = rootNode.toElement().attribute(TQString::fromLatin1("xml:lang"), TQString()); + elemText = rootNode.toElement().attributeNS(XMLNamespace, "lang", TQString::null); else elemText = extractNode(channelNode, TQString::fromLatin1("language")); @@ -441,7 +441,7 @@ Document::Document(const TQDomDocument &doc) : d(new Private) d->pubDate.setTime_t(_time); } - if (!(elemText = extractNode(channelNode, TQString::fromLatin1("dc:date"))).isNull()) { + if (!(elemText = extractElementTextNS(channelNode, DublinCoreNamespace, "date")).isNull()) { time_t _time = parseISO8601Date(elemText); /* \bug This isn't really the right way since it will set the date to * Jan 1 1970, 1:00:00 if the passed date was invalid; this means that diff --git a/akregator/src/librss/global.h b/akregator/src/librss/global.h index 16c08178c..8ff4e406d 100644 --- a/akregator/src/librss/global.h +++ b/akregator/src/librss/global.h @@ -18,6 +18,21 @@ class TQValueList; namespace RSS { + /// The Atom 1.0 XML namespace. + constexpr const char *AtomNamespace = "http://www.w3.org/2005/Atom"; + /// The CommentAPI XML namespace. + constexpr const char *CommentAPINamespace = "http://wellformedweb.org/CommentAPI/"; + /// The Content XML namespace. + constexpr const char *ContentNamespace = "http://purl.org/rss/1.0/modules/content/"; + /// The Dublin Core XML namespace. + constexpr const char *DublinCoreNamespace = "http://purl.org/dc/elements/1.1/"; + /// The RDF Concepts Vocabulary (RDF) namespace. + constexpr const char *RDFNamespace = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; + /// The Slash XML namespace. + constexpr const char *SlashNamespace = "http://purl.org/rss/1.0/modules/slash/"; + /// The XML namespace. + constexpr const char *XMLNamespace = "http://www.w3.org/XML/1998/namespace"; + /** * Versions currently supported by this library. This enumeration is * subject to be extended in the future and used by Document::version() to diff --git a/akregator/src/librss/loader.cpp b/akregator/src/librss/loader.cpp index 75dd22fd8..8674dfb79 100644 --- a/akregator/src/librss/loader.cpp +++ b/akregator/src/librss/loader.cpp @@ -351,7 +351,7 @@ void Loader::slotRetrieverDone(const TQByteArray &data, bool success) TQByteArray tmpData; tmpData.setRawData(charData, len); - if (doc.setContent(tmpData)) + if (doc.setContent(tmpData, /* namespaceProcessing */ true)) { rssDoc = Document(doc); if (!rssDoc.isValid()) diff --git a/akregator/src/librss/tools_p.cpp b/akregator/src/librss/tools_p.cpp index 9303bdf50..04dc570b4 100644 --- a/akregator/src/librss/tools_p.cpp +++ b/akregator/src/librss/tools_p.cpp @@ -117,6 +117,59 @@ static TQString extractAtomContent(const TQDomElement& e) return TQString(); } +TQDomElement extractElementNS(const TQDomNode &parent, const TQString &nameSpace, const TQString &localName) +{ + TQDomElement element; + + if (parent.isNull()) + { + return element; + } + + TQDomNodeList children = parent.childNodes(); + for (size_t i = 0; i < children.count(); ++i) + { + TQDomNode node = children.item(i); + if (node.isElement() && node.namespaceURI() == nameSpace && node.localName() == localName) + { + element = node.toElement(); + break; + } + } + + return element; +} + +TQString extractElementTextNS(const TQDomNode &parent, const TQString &namespaceURI, const TQString &localName, bool isInlined) +{ + TQDomElement element = extractElementNS(parent, namespaceURI, localName); + + if (element.isNull()) + { + return TQString::null; + } + + TQString result = element.text().stripWhiteSpace(); + if (localName == "content") + { + // Atom content + result = extractAtomContent(element); + } + else + { + // Check for HTML; not necessary for atom:content + // Taken from extractNode below + bool hasPre = result.contains("<pre>", false) || result.contains("<pre ", false); + bool hasHtml = hasPre || result.contains("<"); + if (!isInlined && !hasHtml) + result = result = result.replace(TQChar('\n'), "<br />"); + if (!hasPre) + result = result.simplifyWhiteSpace(); + } + + return result.isEmpty() ? TQString::null : result; +} + TQString extractNode(const TQDomNode &parent, const TQString &elemName, bool isInlined) { TQDomNode node = parent.namedItem(elemName); @@ -208,25 +261,25 @@ TQString parseItemAuthor(const TQDomElement& element, Format format, Version ver TQString name; TQString email; - TQDomElement dcCreator = element.namedItem("dc:creator").toElement(); - + TQDomElement dcCreator = extractElementNS(element, DublinCoreNamespace, "creator"); + if (!dcCreator.isNull()) authorFromString(dcCreator.text(), name, email); else if (format == AtomFeed) { TQDomElement atomAuthor = element.namedItem("author").toElement(); if (atomAuthor.isNull()) - atomAuthor = element.namedItem("atom:author").toElement(); + atomAuthor = extractElementNS(element, AtomNamespace, "author"); if (!atomAuthor.isNull()) { TQDomElement atomName = atomAuthor.namedItem("name").toElement(); if (atomName.isNull()) - atomName = atomAuthor.namedItem("atom:name").toElement(); + atomName = extractElementNS(atomAuthor, AtomNamespace, "name"); name = atomName.text().stripWhiteSpace(); TQDomElement atomEmail = atomAuthor.namedItem("email").toElement(); if (atomEmail.isNull()) - atomEmail = atomAuthor.namedItem("atom:email").toElement(); + atomEmail = extractElementNS(atomAuthor, AtomNamespace, "email"); email = atomEmail.text().stripWhiteSpace(); } } diff --git a/akregator/src/librss/tools_p.h b/akregator/src/librss/tools_p.h index 0ec9013f3..1b89fc85d 100644 --- a/akregator/src/librss/tools_p.h +++ b/akregator/src/librss/tools_p.h @@ -29,6 +29,8 @@ namespace RSS unsigned int count; }; + TQDomElement extractElementNS(const TQDomNode& parent, const TQString& namespaceURI, const TQString& localName); + TQString extractElementTextNS(const TQDomNode& parent, const TQString& namespaceURI, const TQString& localName, bool isInlined = true); TQString extractNode(const TQDomNode &parent, const TQString &elemName, bool isInlined=true); TQString extractTitle(const TQDomNode &parent); TQString childNodesAsXML(const TQDomNode& parent); |