/* * tools_p.cpp * * Copyright (c) 2001, 2002, 2003 Frerich Raabe * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. For licensing and distribution details, check the * accompanying file 'COPYING'. */ #include "tools_p.h" #include #include #include #include namespace RSS { time_t parseISO8601Date(const TQString &s) { // do some sanity check: 26-12-2004T00:00+00:00 is parsed to epoch+1 in the KRFCDate, which is wrong. So let's check if the date begins with YYYY -fo if (s.stripWhiteSpace().left(4).toInt() < 1000) return 0; // error // FIXME: imho this is done in KRFCDate::parseDateISO8601() automatically, so we could omit it? -fo if (s.find('T') != -1) return KRFCDate::parseDateISO8601(s); else return KRFCDate::parseDateISO8601(s + "T12:00:00"); } TQString childNodesAsXML(const TQDomNode& tqparent) { TQDomNodeList list = tqparent.childNodes(); TQString str; TQTextStream ts( &str, IO_WriteOnly ); for (uint i = 0; i < list.count(); ++i) ts << list.item(i); return str.stripWhiteSpace(); } static TQString plainTextToHtml(const TQString& plainText) { TQString str(plainText); str.replace("&", "&"); str.replace("\"", """); str.replace("<", "<"); //str.replace(">", ">"); str.replace("\n", "
"); return str; } enum ContentFormat { Text, HTML, XML, Binary }; static ContentFormat mapTypeToFormat(const TQString& modep, const TQString& typep, const TQString& src) { TQString mode = modep.isNull() ? "escaped" : modep; TQString type = typep; //"If neither the type attribute nor the src attribute is provided, //Atom Processors MUST behave as though the type attribute were //present with a value of "text"" if (type.isNull() && src.isEmpty()) type = TQString::fromUtf8("text"); if (type == TQString::fromUtf8("html") || type == TQString::fromUtf8("text/html")) return HTML; if (type == TQString::fromUtf8("text") || (type.tqstartsWith(TQString::fromUtf8("text/"), false) && !type.tqstartsWith(TQString::fromUtf8("text/xml"), false)) ) return Text; TQStringList xmltypes; xmltypes.append(TQString::fromUtf8("xhtml")); // XML media types as defined in RFC3023: xmltypes.append(TQString::fromUtf8("text/xml")); xmltypes.append(TQString::fromUtf8("application/xml")); xmltypes.append(TQString::fromUtf8("text/xml-external-parsed-entity")); xmltypes.append(TQString::fromUtf8("application/xml-external-parsed-entity")); xmltypes.append(TQString::fromUtf8("application/xml-dtd")); if (xmltypes.contains(type) || type.tqendsWith(TQString::fromUtf8("+xml"), false) || type.tqendsWith(TQString::fromUtf8("/xml"), false)) return XML; return Binary; } static TQString extractAtomContent(const TQDomElement& e) { ContentFormat format = mapTypeToFormat(e.attribute("mode"), e.attribute("type"), e.attribute("src")); switch (format) { case HTML: { const bool hasPre = e.text().contains( "
", false ) || e.text().contains( "
", false) || result.contains("
 regexp
            if(!isInlined && !hasHtml)						// perform nl2br if not a inline elt and it has no html elts
                    result = result = result.replace(TQChar('\n'), "
"); if(!hasPre) // strip white spaces if no
                    result = result.simplifyWhiteSpace();
        }
        
        return result.isEmpty() ? TQString() : result;
}

TQString extractTitle(const TQDomNode & tqparent)
{
    TQDomNode node = tqparent.namedItem(TQString::tqfromLatin1("title"));
    if (node.isNull())
        return TQString();

    TQString result = node.toElement().text();

    result = KCharsets::resolveEntities(KCharsets::resolveEntities(result).replace(TQRegExp("<[^>]*>"), "").remove("\\"));
	result = result.simplifyWhiteSpace();

    if (result.isEmpty())
        return TQString();

    return result;
}

static void authorFromString(const TQString& strp, TQString& name, TQString& email)
{
    TQString str = strp.stripWhiteSpace();
    if (str.isEmpty())
        return;
    
    // look for something looking like a mail address ( "foo@bar.com", 
    // "") and extract it
    
    TQRegExp remail("\\s]+)>?"); // FIXME: user "proper" regexp,
       // search kmail source for it
    
    int pos = remail.search(str);
    if (pos != -1)
    {
        TQString all = remail.cap(0);
        email = remail.cap(1);
        str.replace(all, ""); // remove mail address
    }
    
    // simplify the rest and use it as name
    
    name = str.simplifyWhiteSpace();
    
    // after removing the email, str might have 
    // the format "(Foo M. Bar)". We cut off 
    // parentheses if there are any. However, if
    // str is of the format "Foo M. Bar (President)",
    // we should not cut anything.

    TQRegExp rename("^\\(([^\\)]*)\\)");
    
    pos = rename.search(name);
    
    if (pos != -1)
    {
        name = rename.cap(1);
    }
    
    name = name.isEmpty() ? TQString() : name;
    email = email.isEmpty() ? TQString() : email;
}

TQString parseItemAuthor(const TQDomElement& element, Format format, Version version)
{
    TQString name;
    TQString email;

    TQDomElement dcCreator = element.namedItem("dc:creator").toElement();
    
    if (!dcCreator.isNull())
         authorFromString(dcCreator.text(), name, email);
    else if (format == AtomFeed)
    {
        TQDomElement atomAuthor = element.namedItem("author").toElement();
        if (atomAuthor.isNull())
            atomAuthor = element.namedItem("atom:author").toElement();
        if (!atomAuthor.isNull())
        {
            TQDomElement atomName = atomAuthor.namedItem("name").toElement();
            if (atomName.isNull())
                atomName = atomAuthor.namedItem("atom:name").toElement();
            name = atomName.text().stripWhiteSpace();
            
            TQDomElement atomEmail = atomAuthor.namedItem("email").toElement();
            if (atomEmail.isNull())
                atomEmail = atomAuthor.namedItem("atom:email").toElement();
            email = atomEmail.text().stripWhiteSpace();
        }
    }
    else if (format == RSSFeed)
    {
        authorFromString(element.namedItem("author").toElement().text(), name, email);
    }
    
    if (name.isNull())
        name = email;
    
    if (!email.isNull())
        return TQString("%2").tqarg(email).tqarg(name);
    else
        return name;
}

} // namespace RSS

// vim:noet:ts=4