summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authormio <[email protected]>2025-03-14 19:50:48 +1000
committermio <[email protected]>2025-03-14 20:11:20 +1000
commit81d428dedb2fa9f14ddef3edfa4d68c0d58af528 (patch)
tree74d9bce5afcd161ab2caa825c96e9a574cb382bc
parentb69050d6e8956d0f38c526b9fca93d76fccffeac (diff)
downloadtdepim-improvement/akregator-librss-namespaces.tar.gz
tdepim-improvement/akregator-librss-namespaces.zip
akregator-librss: use namespacesimprovement/akregator-librss-namespaces
Check element namespace rather than assuming a prefix, which can result in incorrect metadata displaying. Signed-off-by: mio <[email protected]>
-rw-r--r--akregator/src/librss/article.cpp32
-rw-r--r--akregator/src/librss/document.cpp4
-rw-r--r--akregator/src/librss/global.h15
-rw-r--r--akregator/src/librss/loader.cpp2
-rw-r--r--akregator/src/librss/tools_p.cpp63
-rw-r--r--akregator/src/librss/tools_p.h2
6 files changed, 93 insertions, 25 deletions
diff --git a/akregator/src/librss/article.cpp b/akregator/src/librss/article.cpp
index 18522fe3e..88d42a7dc 100644
--- a/akregator/src/librss/article.cpp
+++ b/akregator/src/librss/article.cpp
@@ -92,13 +92,16 @@ Article::Article(const TQDomNode &node, Format format, Version version) : d(new
d->link = elemText;
}
+ // prefer content/content:encoded over summary/description for feeds that provide it
+ if (format == AtomFeed)
+ {
+ d->description = extractNode(node, TQString::fromLatin1("content"), false);
+ }
+ else
+ {
+ d->description = extractElementTextNS(node, ContentNamespace, TQString::fromLatin1("encoded"), false);
+ }
- // prefer content/content:encoded over summary/description for feeds that provide it
- TQString tagName=(format==AtomFeed)? TQString::fromLatin1("content"): TQString::fromLatin1("content:encoded");
-
- if (!(elemText = extractNode(node, tagName, false)).isNull())
- d->description = elemText;
-
if (d->description.isEmpty())
{
if (!(elemText = extractNode(node, TQString::fromLatin1("body"), false)).isNull())
@@ -130,7 +133,7 @@ Article::Article(const TQDomNode &node, Format format, Version version) : d(new
time = KRFCDate::parseDate(elemText);
}
- if (!(elemText = extractNode(node, TQString::fromLatin1("dc:date"))).isNull())
+ if (!(elemText = extractElementTextNS(node, DublinCoreNamespace, TQString::fromLatin1("date"))).isNull())
{
time = parseISO8601Date(elemText);
}
@@ -139,27 +142,22 @@ Article::Article(const TQDomNode &node, Format format, Version version) : d(new
if (time != 0)
d->pubDate.setTime_t(time);
- if (!(elemText = extractNode(node, TQString::fromLatin1("wfw:comment"))).isNull()) {
- d->commentsLink = elemText;
- }
-
- if (!(elemText = extractNode(node, TQString::fromLatin1("slash:comments"))).isNull()) {
- d->numComments = elemText.toInt();
- }
+ d->commentsLink = extractElementTextNS(node, CommentAPINamespace, TQString::fromLatin1("comment"));
+ d->numComments = extractElementTextNS(node, SlashNamespace, TQString::fromLatin1("comments")).toInt();
TQDomElement element = TQDomNode(node).toElement();
// in RSS 1.0, we use <item about> attribute as ID
// FIXME: pass format version instead of checking for attribute
- if (!element.isNull() && element.hasAttribute(TQString::fromLatin1("rdf:about")))
+ if (!element.isNull() && element.hasAttributeNS(RDFNamespace, TQString::fromLatin1("about")))
{
- d->guid = element.attribute(TQString::fromLatin1("rdf:about")); // HACK: using ns properly did not work
+ d->guid = element.attributeNS(RDFNamespace, TQString::fromLatin1("about"), TQString::null);
d->guidIsPermaLink = false;
}
else
{
- tagName=(format==AtomFeed)? TQString::fromLatin1("id"): TQString::fromLatin1("guid");
+ TQString tagName=(format==AtomFeed)? TQString::fromLatin1("id"): TQString::fromLatin1("guid");
TQDomNode n = node.namedItem(tagName);
if (!n.isNull())
{
diff --git a/akregator/src/librss/document.cpp b/akregator/src/librss/document.cpp
index 3bc64d000..7d94a252a 100644
--- a/akregator/src/librss/document.cpp
+++ b/akregator/src/librss/document.cpp
@@ -224,7 +224,7 @@ Document::Document(const TQDomDocument &doc) : d(new Private)
d->copyright = elemText;
if (d->format == AtomFeed)
- elemText = rootNode.toElement().attribute(TQString::fromLatin1("xml:lang"), TQString());
+ elemText = rootNode.toElement().attributeNS(XMLNamespace, "lang", TQString::null);
else
elemText = extractNode(channelNode, TQString::fromLatin1("language"));
@@ -441,7 +441,7 @@ Document::Document(const TQDomDocument &doc) : d(new Private)
d->pubDate.setTime_t(_time);
}
- if (!(elemText = extractNode(channelNode, TQString::fromLatin1("dc:date"))).isNull()) {
+ if (!(elemText = extractElementTextNS(channelNode, DublinCoreNamespace, "date")).isNull()) {
time_t _time = parseISO8601Date(elemText);
/* \bug This isn't really the right way since it will set the date to
* Jan 1 1970, 1:00:00 if the passed date was invalid; this means that
diff --git a/akregator/src/librss/global.h b/akregator/src/librss/global.h
index 16c08178c..8ff4e406d 100644
--- a/akregator/src/librss/global.h
+++ b/akregator/src/librss/global.h
@@ -18,6 +18,21 @@ class TQValueList;
namespace RSS
{
+ /// The Atom 1.0 XML namespace.
+ constexpr const char *AtomNamespace = "http://www.w3.org/2005/Atom";
+ /// The CommentAPI XML namespace.
+ constexpr const char *CommentAPINamespace = "http://wellformedweb.org/CommentAPI/";
+ /// The Content XML namespace.
+ constexpr const char *ContentNamespace = "http://purl.org/rss/1.0/modules/content/";
+ /// The Dublin Core XML namespace.
+ constexpr const char *DublinCoreNamespace = "http://purl.org/dc/elements/1.1/";
+ /// The RDF Concepts Vocabulary (RDF) namespace.
+ constexpr const char *RDFNamespace = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
+ /// The Slash XML namespace.
+ constexpr const char *SlashNamespace = "http://purl.org/rss/1.0/modules/slash/";
+ /// The XML namespace.
+ constexpr const char *XMLNamespace = "http://www.w3.org/XML/1998/namespace";
+
/**
* Versions currently supported by this library. This enumeration is
* subject to be extended in the future and used by Document::version() to
diff --git a/akregator/src/librss/loader.cpp b/akregator/src/librss/loader.cpp
index 75dd22fd8..8674dfb79 100644
--- a/akregator/src/librss/loader.cpp
+++ b/akregator/src/librss/loader.cpp
@@ -351,7 +351,7 @@ void Loader::slotRetrieverDone(const TQByteArray &data, bool success)
TQByteArray tmpData;
tmpData.setRawData(charData, len);
- if (doc.setContent(tmpData))
+ if (doc.setContent(tmpData, /* namespaceProcessing */ true))
{
rssDoc = Document(doc);
if (!rssDoc.isValid())
diff --git a/akregator/src/librss/tools_p.cpp b/akregator/src/librss/tools_p.cpp
index 9303bdf50..04dc570b4 100644
--- a/akregator/src/librss/tools_p.cpp
+++ b/akregator/src/librss/tools_p.cpp
@@ -117,6 +117,59 @@ static TQString extractAtomContent(const TQDomElement& e)
return TQString();
}
+TQDomElement extractElementNS(const TQDomNode &parent, const TQString &nameSpace, const TQString &localName)
+{
+ TQDomElement element;
+
+ if (parent.isNull())
+ {
+ return element;
+ }
+
+ TQDomNodeList children = parent.childNodes();
+ for (size_t i = 0; i < children.count(); ++i)
+ {
+ TQDomNode node = children.item(i);
+ if (node.isElement() && node.namespaceURI() == nameSpace && node.localName() == localName)
+ {
+ element = node.toElement();
+ break;
+ }
+ }
+
+ return element;
+}
+
+TQString extractElementTextNS(const TQDomNode &parent, const TQString &namespaceURI, const TQString &localName, bool isInlined)
+{
+ TQDomElement element = extractElementNS(parent, namespaceURI, localName);
+
+ if (element.isNull())
+ {
+ return TQString::null;
+ }
+
+ TQString result = element.text().stripWhiteSpace();
+ if (localName == "content")
+ {
+ // Atom content
+ result = extractAtomContent(element);
+ }
+ else
+ {
+ // Check for HTML; not necessary for atom:content
+ // Taken from extractNode below
+ bool hasPre = result.contains("<pre>", false) || result.contains("<pre ", false);
+ bool hasHtml = hasPre || result.contains("<");
+ if (!isInlined && !hasHtml)
+ result = result = result.replace(TQChar('\n'), "<br />");
+ if (!hasPre)
+ result = result.simplifyWhiteSpace();
+ }
+
+ return result.isEmpty() ? TQString::null : result;
+}
+
TQString extractNode(const TQDomNode &parent, const TQString &elemName, bool isInlined)
{
TQDomNode node = parent.namedItem(elemName);
@@ -208,25 +261,25 @@ TQString parseItemAuthor(const TQDomElement& element, Format format, Version ver
TQString name;
TQString email;
- TQDomElement dcCreator = element.namedItem("dc:creator").toElement();
-
+ TQDomElement dcCreator = extractElementNS(element, DublinCoreNamespace, "creator");
+
if (!dcCreator.isNull())
authorFromString(dcCreator.text(), name, email);
else if (format == AtomFeed)
{
TQDomElement atomAuthor = element.namedItem("author").toElement();
if (atomAuthor.isNull())
- atomAuthor = element.namedItem("atom:author").toElement();
+ atomAuthor = extractElementNS(element, AtomNamespace, "author");
if (!atomAuthor.isNull())
{
TQDomElement atomName = atomAuthor.namedItem("name").toElement();
if (atomName.isNull())
- atomName = atomAuthor.namedItem("atom:name").toElement();
+ atomName = extractElementNS(atomAuthor, AtomNamespace, "name");
name = atomName.text().stripWhiteSpace();
TQDomElement atomEmail = atomAuthor.namedItem("email").toElement();
if (atomEmail.isNull())
- atomEmail = atomAuthor.namedItem("atom:email").toElement();
+ atomEmail = extractElementNS(atomAuthor, AtomNamespace, "email");
email = atomEmail.text().stripWhiteSpace();
}
}
diff --git a/akregator/src/librss/tools_p.h b/akregator/src/librss/tools_p.h
index 0ec9013f3..1b89fc85d 100644
--- a/akregator/src/librss/tools_p.h
+++ b/akregator/src/librss/tools_p.h
@@ -29,6 +29,8 @@ namespace RSS
unsigned int count;
};
+ TQDomElement extractElementNS(const TQDomNode& parent, const TQString& namespaceURI, const TQString& localName);
+ TQString extractElementTextNS(const TQDomNode& parent, const TQString& namespaceURI, const TQString& localName, bool isInlined = true);
TQString extractNode(const TQDomNode &parent, const TQString &elemName, bool isInlined=true);
TQString extractTitle(const TQDomNode &parent);
TQString childNodesAsXML(const TQDomNode& parent);