/* kmime_parsers.cpp KMime, the KDE internet mail/usenet news message library. Copyright (c) 2001 the KMime authors. See file AUTHORS for details This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, US */ #include "kmime_parsers.h" #include <tqregexp.h> using namespace KMime::Parser; namespace KMime { namespace Parser { MultiPart::MultiPart(const TQCString &src, const TQCString &boundary) { s_rc=src; b_oundary=boundary; } bool MultiPart::parse() { TQCString b="--"+b_oundary, part; int pos1=0, pos2=0, blen=b.length(); p_arts.clear(); //find the first valid boundary while(1) { if( (pos1=s_rc.find(b.data(), pos1))==-1 || pos1==0 || s_rc[pos1-1]=='\n' ) //valid boundary found or no boundary at all break; pos1+=blen; //boundary found but not valid => skip it; } if(pos1>-1) { pos1+=blen; if(s_rc[pos1]=='-' && s_rc[pos1+1]=='-') // the only valid boundary is the end-boundary - this message is *really* broken pos1=-1; //we give up else if( (pos1-blen)>1 ) //preamble present p_reamble=s_rc.left(pos1-blen); } while(pos1>-1 && pos2>-1) { //skip the rest of the line for the first boundary - the message-part starts here if( (pos1=s_rc.find('\n', pos1))>-1 ) { //now search the next linebreak //now find the next valid boundary pos2=++pos1; //pos1 and pos2 point now to the beginning of the next line after the boundary while(1) { if( (pos2=s_rc.find(b.data(), pos2))==-1 || s_rc[pos2-1]=='\n' ) //valid boundary or no more boundaries found break; pos2+=blen; //boundary is invalid => skip it; } if(pos2==-1) { // no more boundaries found part=s_rc.mid(pos1, s_rc.length()-pos1); //take the rest of the string p_arts.append(part); pos1=-1; pos2=-1; //break; } else { part=s_rc.mid(pos1, pos2-pos1 - 1 ); // pos2 - 1 (\n) is part of the boundary (see RFC 2046, section 5.1.1) p_arts.append(part); pos2+=blen; //pos2 points now to the first charakter after the boundary if(s_rc[pos2]=='-' && s_rc[pos2+1]=='-') { //end-boundary pos1=pos2+2; //pos1 points now to the character directly after the end-boundary if( (pos1=s_rc.find('\n', pos1))>-1 ) //skipt the rest of this line e_pilouge=s_rc.mid(pos1+1, s_rc.length()-pos1-1); //everything after the end-boundary is considered as the epilouge pos1=-1; pos2=-1; //break } else { pos1=pos2; //the search continues ... } } } } return (!p_arts.isEmpty()); } //============================================================================================ NonMimeParser::NonMimeParser(const TQCString &src) : s_rc(src), p_artNr(-1), t_otalNr(-1) {} /** * try to guess the mimetype from the file-extension */ TQCString NonMimeParser::guessMimeType(const TQCString& fileName) { TQCString tmp, mimeType; int pos; if(!fileName.isEmpty()) { pos=fileName.findRev('.'); if(pos++ != -1) { tmp=fileName.mid(pos, fileName.length()-pos).upper(); if(tmp=="JPG" || tmp=="JPEG") mimeType="image/jpeg"; else if(tmp=="GIF") mimeType="image/gif"; else if(tmp=="PNG") mimeType="image/png"; else if(tmp=="TIFF" || tmp=="TIF") mimeType="image/tiff"; else if(tmp=="XPM") mimeType="image/x-xpm"; else if(tmp=="XBM") mimeType="image/x-xbm"; else if(tmp=="BMP") mimeType="image/x-bmp"; else if(tmp=="TXT" || tmp=="ASC" || tmp=="H" || tmp=="C" || tmp=="CC" || tmp=="CPP") mimeType="text/plain"; else if(tmp=="HTML" || tmp=="HTM") mimeType="text/html"; else mimeType="application/octet-stream"; } else mimeType="application/octet-stream"; } else mimeType="application/octet-stream"; return mimeType; } //============================================================================================ UUEncoded::UUEncoded(const TQCString &src, const TQCString &subject) : NonMimeParser(src), s_ubject(subject) {} bool UUEncoded::parse() { int currentPos=0; bool success=true, firstIteration=true; while (success) { int beginPos=currentPos, uuStart=currentPos, endPos=0, lineCount=0, MCount=0, pos=0, len=0; bool containsBegin=false, containsEnd=false; TQCString tmp,fileName; if( (beginPos=s_rc.find(TQRegExp("begin [0-9][0-9][0-9]"),currentPos))>-1 && (beginPos==0 || s_rc.at(beginPos-1)=='\n') ) { containsBegin=true; uuStart=s_rc.find('\n', beginPos); if(uuStart==-1) {//no more line breaks found, we give up success = false; break; } else uuStart++; //points now at the beginning of the next line } else beginPos=currentPos; if ( (endPos=s_rc.find("\nend",(uuStart>0)? uuStart-1:0))==-1 ) endPos=s_rc.length(); //no end found else containsEnd=true; if ((containsBegin && containsEnd) || firstIteration) { //printf("beginPos=%d , uuStart=%d , endPos=%d\n", beginPos, uuStart, endPos); //all lines in a uuencoded text start with 'M' for(int idx=uuStart; idx<endPos; idx++) if(s_rc[idx]=='\n') { lineCount++; if(idx+1<endPos && s_rc[idx+1]=='M') { idx++; MCount++; } } //printf("lineCount=%d , MCount=%d\n", lineCount, MCount); if( MCount==0 || (lineCount-MCount)>10 || ((!containsBegin || !containsEnd) && (MCount<15)) ) { // harder check for splitted-articles success = false; break; //too many "non-M-Lines" found, we give up } if( (!containsBegin || !containsEnd) && !s_ubject.isNull()) { // message may be split up => parse subject TQRegExp rx("[0-9]+/[0-9]+"); pos=rx.search(TQString(s_ubject), 0); len=rx.matchedLength(); if(pos!=-1) { tmp=s_ubject.mid(pos, len); pos=tmp.find('/'); p_artNr=tmp.left(pos).toInt(); t_otalNr=tmp.right(tmp.length()-pos-1).toInt(); } else { success = false; break; //no "part-numbers" found in the subject, we give up } } //everything before "begin" is text if(beginPos>0) t_ext.append(s_rc.mid(currentPos,beginPos-currentPos)); if(containsBegin) fileName = s_rc.mid(beginPos+10, uuStart-beginPos-11); //everything between "begin ### " and the next LF is considered as the filename else fileName = ""; f_ilenames.append(fileName); b_ins.append(s_rc.mid(uuStart, endPos-uuStart+1)); //everything beetween "begin" and "end" is uuencoded m_imeTypes.append(guessMimeType(fileName)); firstIteration=false; int next = s_rc.find('\n', endPos+1); if(next==-1) { //no more line breaks found, we give up success = false; break; } else next++; //points now at the beginning of the next line currentPos = next; } else { success = false; } } // append trailing text part of the article t_ext.append(s_rc.right(s_rc.length()-currentPos)); return ((b_ins.count()>0) || isPartial()); } //============================================================================================ YENCEncoded::YENCEncoded(const TQCString &src) : NonMimeParser(src) {} bool YENCEncoded::yencMeta(TQCString& src, const TQCString& name, int* value) { bool found = false; TQCString sought=name + "="; int iPos=src.find( sought.data() ); if (iPos>-1) { int pos1=src.find(' ', iPos); int pos2=src.find('\r', iPos); int pos3=src.find('\t', iPos); int pos4=src.find('\n', iPos); if (pos2>=0 && (pos1<0 || pos1>pos2)) pos1=pos2; if (pos3>=0 && (pos1<0 || pos1>pos3)) pos1=pos3; if (pos4>=0 && (pos1<0 || pos1>pos4)) pos1=pos4; iPos=src.findRev( '=', pos1)+1; if (iPos<pos1) { char c=src.at( iPos); if ( c>='0' && c<='9') { found=true; *value=src.mid( iPos, pos1-iPos).toInt(); } } } return found; } bool YENCEncoded::parse() { int currentPos=0; bool success=true; while (success) { int beginPos=currentPos, yencStart=currentPos; bool containsPart=false; TQCString fileName,mimeType; if ((beginPos=s_rc.find("=ybegin ", currentPos))>-1 && ( beginPos==0 || s_rc.at( beginPos-1)=='\n') ) { yencStart=s_rc.find( '\n', beginPos); if (yencStart==-1) { // no more line breaks found, give up success = false; break; } else { yencStart++; if (s_rc.find("=ypart", yencStart)==yencStart) { containsPart=true; yencStart=s_rc.find( '\n', yencStart); if ( yencStart== -1) { success=false; break; } yencStart++; } } // Try to identify yenc meta data // Filenames can contain any embedded chars until end of line TQCString meta=s_rc.mid(beginPos, yencStart-beginPos); int namePos=meta.find("name="); if (namePos== -1) { success=false; break; } int eolPos=meta.find('\r', namePos); if (eolPos== -1) eolPos=meta.find('\n', namePos); if (eolPos== -1) { success=false; break; } fileName=meta.mid(namePos+5, eolPos-(namePos+5)); // Other metadata is integer int yencLine; if (!yencMeta(meta, "line", ¥cLine)) { success=false; break; } int yencSize; if (!yencMeta( meta, "size", ¥cSize)) { success=false; break; } int partBegin, partEnd; if (containsPart) { if (!yencMeta(meta, "part", &p_artNr)) { success=false; break; } if (!yencMeta(meta, "begin", &partBegin) || ! yencMeta(meta, "end", &partEnd)) { success=false; break; } if (!yencMeta(meta, "total", &t_otalNr)) t_otalNr=p_artNr+1; if (yencSize==partEnd-partBegin+1) t_otalNr=1; else yencSize=partEnd-partBegin+1; } // We have a valid yenc header; now we extract the binary data int totalSize=0; int pos=yencStart; int len=s_rc.length(); bool lineStart=true; int lineLength=0; bool containsEnd=false; TQByteArray binary = TQByteArray(yencSize); while (pos<len) { int ch=s_rc.at(pos); if (ch<0) ch+=256; if (ch=='\r') { if (lineLength!=yencLine && totalSize!=yencSize) break; pos++; } else if (ch=='\n') { lineStart=true; lineLength=0; pos++; } else { if (ch=='=') { if (pos+1<len) { ch=s_rc.at( pos+1); if (lineStart && ch=='y') { containsEnd=true; break; } pos+=2; ch-=64+42; if (ch<0) ch+=256; if (totalSize>=yencSize) break; binary.at(totalSize++)=ch; lineLength++; } else break; } else { ch-=42; if (ch<0) ch+=256; if (totalSize>=yencSize) break; binary.at(totalSize++)=ch; lineLength++; pos++; } lineStart=false; } } if (!containsEnd) { success=false; break; } if (totalSize!=yencSize) { success=false; break; } // pos now points to =yend; get end data eolPos=s_rc.find('\n', pos); if (eolPos== -1) { success=false; break; } meta=s_rc.mid(pos, eolPos-pos); if (!yencMeta(meta, "size", &totalSize)) { success=false; break; } if (totalSize!=yencSize) { success=false; break; } f_ilenames.append(fileName); m_imeTypes.append(guessMimeType( fileName)); b_ins.append(binary); //everything before "begin" is text if(beginPos>0) t_ext.append(s_rc.mid(currentPos,beginPos-currentPos)); currentPos = eolPos+1; } else { success = false; } } // append trailing text part of the article t_ext.append(s_rc.right(s_rc.length()-currentPos)); return b_ins.count()>0; } } // namespace Parser } // namespace KMime