summaryrefslogtreecommitdiffstats
path: root/src/rtf2html/rtf2html.cpp
diff options
context:
space:
mode:
authortpearson <tpearson@283d02a7-25f6-0310-bc7c-ecb5cbfe19da>2010-03-01 19:17:32 +0000
committertpearson <tpearson@283d02a7-25f6-0310-bc7c-ecb5cbfe19da>2010-03-01 19:17:32 +0000
commite38d2351b83fa65c66ccde443777647ef5cb6cff (patch)
tree1897fc20e9f73a81c520a5b9f76f8ed042124883 /src/rtf2html/rtf2html.cpp
downloadtellico-e38d2351b83fa65c66ccde443777647ef5cb6cff.tar.gz
tellico-e38d2351b83fa65c66ccde443777647ef5cb6cff.zip
Added KDE3 version of Tellico
git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/applications/tellico@1097620 283d02a7-25f6-0310-bc7c-ecb5cbfe19da
Diffstat (limited to 'src/rtf2html/rtf2html.cpp')
-rw-r--r--src/rtf2html/rtf2html.cpp531
1 files changed, 531 insertions, 0 deletions
diff --git a/src/rtf2html/rtf2html.cpp b/src/rtf2html/rtf2html.cpp
new file mode 100644
index 0000000..4f29fe7
--- /dev/null
+++ b/src/rtf2html/rtf2html.cpp
@@ -0,0 +1,531 @@
+/* This is RTF to HTML converter, implemented as a text filter, generally.
+ Copyright (C) 2003 Valentin Lavrinenko, [email protected]
+
+ available at http://rtf2html.sf.net
+
+ Original available under the terms of the GNU LGPL2, and according
+ to those terms, relicensed under the GNU GPL2 for inclusion in Tellico */
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of version 2 of the GNU General Public License as *
+ * published by the Free Software Foundation; *
+ * *
+ ***************************************************************************/
+
+#include "rtf2html.h"
+#include "rtf_table.h"
+#include "rtf_tools.h"
+#include "rtf_keyword.h"
+#include "fmt_opts.h"
+
+#include <cstdlib>
+#include <stdexcept>
+#include <fstream>
+#include <iostream>
+#include <string>
+
+using Tellico::RTF2HTML;
+using namespace rtf;
+
+RTF2HTML::RTF2HTML(const QString& text) : m_text(text) {
+}
+
+QString RTF2HTML::toHTML() const {
+ std::string str_in = m_text;
+
+ std::string::iterator buf_in=str_in.begin(), buf_in_end=str_in.end();
+ colorvect colortbl;
+ fontmap fonttbl;
+ std::string title;
+
+ bool bAsterisk=false;
+ fo_stack foStack;
+ formatting_options cur_options;
+ std::string html;
+ html_text par_html(cur_options);
+
+ /* CellDefs in rtf are really queer. We'll keep a list of them in main()
+ and will give an iterator into this list to a row */
+ table_cell_defs_list CellDefsList;
+ table_cell_defs_list::iterator CurCellDefs;
+ table_cell_def* tcdCurCellDef=new table_cell_def;
+ table_cell* tcCurCell=new table_cell;
+ table_row* trCurRow=new table_row;
+ table* tblCurTable=new table;
+ int iLastRowLeft=0, iLastRowHeight=0;
+ std::string t_str;
+
+ bool bInTable=false;
+ int iDocWidth=12240;
+ int iMarginLeft=1800;
+ while(buf_in!=buf_in_end)
+ {
+ switch (*buf_in)
+ {
+ case '\\':
+ {
+ rtf_keyword kw(++buf_in);
+ if (kw.is_control_char())
+ switch (kw.control_char())
+ {
+ case '\\': case '{': case '}':
+ par_html.write(kw.control_char());
+ break;
+ case '\'':
+ {
+ std::string stmp(1,*buf_in++);
+ stmp+=*buf_in++;
+ int code=std::strtol(stmp.c_str(), NULL, 16);
+ switch (code)
+ {
+ case 167:
+ par_html.write("&bull;");
+ break;
+ case 188:
+ par_html.write("&hellip;");
+ break;
+ default:
+ par_html.write((char)code);
+ }
+ break;
+ }
+ case '*':
+ bAsterisk=true;
+ break;
+ case '~':
+ par_html.write("&nbsp;");
+ break;
+ case '\n':
+ par_html.write("<br><br>");
+ break;
+ }
+ else //kw.is_control_char
+ if (bAsterisk)
+ {
+ bAsterisk=false;
+ skip_group(buf_in);
+ }
+ else
+ {
+ switch (kw.keyword())
+ {
+ case rtf_keyword::rkw_filetbl:
+ case rtf_keyword::rkw_stylesheet:
+ case rtf_keyword::rkw_header:
+ case rtf_keyword::rkw_footer: case rtf_keyword::rkw_headerf:
+ case rtf_keyword::rkw_footerf: case rtf_keyword::rkw_pict:
+ case rtf_keyword::rkw_object:
+ // we'll skip such groups
+ skip_group(buf_in);
+ break;
+ // document title
+ case rtf_keyword::rkw_info:
+ {
+ int depth=1;
+ bool in_title=false;
+ while (depth>0)
+ {
+// std::cout<<std::string(buf_in).substr(0,20)<<"\t"<<depth<<std::endl;
+ switch (*buf_in)
+ {
+ case '\\':
+ {
+ rtf_keyword kw(++buf_in);
+ if (kw.keyword()==rtf_keyword::rkw_title)
+ in_title=true;
+ break;
+ }
+ case '{': ++depth; ++buf_in; break;
+ case '}': --depth; ++buf_in; in_title=false; break;
+ default: if (in_title) title+=*buf_in; ++buf_in; break;
+ }
+ }
+ break;
+ }
+ // color table
+ case rtf_keyword::rkw_colortbl:
+ {
+ color clr;
+ while (*buf_in!='}')
+ {
+ switch (*buf_in)
+ {
+ case '\\':
+ {
+ rtf_keyword kw(++buf_in);
+ switch (kw.keyword())
+ {
+ case rtf_keyword::rkw_red:
+ clr.r=kw.parameter();
+ break;
+ case rtf_keyword::rkw_green:
+ clr.g=kw.parameter();
+ break;
+ case rtf_keyword::rkw_blue:
+ clr.b=kw.parameter();
+ break;
+ default: break;
+ }
+ break;
+ }
+ case ';':
+ colortbl.push_back(clr);
+ ++buf_in;
+ break;
+ default:
+ ++buf_in;
+ break;
+ }
+ }
+ ++buf_in;
+ break;
+ }
+ // font table
+ case rtf_keyword::rkw_fonttbl:
+ {
+ font fnt;
+ int font_num;
+ bool full_name=false;
+ bool in_font=false;
+ while (! (*buf_in=='}' && !in_font))
+ {
+ switch (*buf_in)
+ {
+ case '\\':
+ {
+ rtf_keyword kw(++buf_in);
+ if (kw.is_control_char() && kw.control_char()=='*')
+ skip_group(buf_in);
+ else
+ switch (kw.keyword())
+ {
+ case rtf_keyword::rkw_f:
+ font_num=kw.parameter();
+ break;
+ case rtf_keyword::rkw_fprq:
+ fnt.pitch=kw.parameter();
+ break;
+ case rtf_keyword::rkw_fcharset:
+ fnt.charset=kw.parameter();
+ break;
+ case rtf_keyword::rkw_fnil:
+ fnt.family=font::ff_none;
+ break;
+ case rtf_keyword::rkw_froman:
+ fnt.family=font::ff_serif;
+ break;
+ case rtf_keyword::rkw_fswiss:
+ fnt.family=font::ff_sans_serif;
+ break;
+ case rtf_keyword::rkw_fmodern:
+ fnt.family=font::ff_monospace;
+ break;
+ case rtf_keyword::rkw_fscript:
+ fnt.family=font::ff_cursive;
+ break;
+ case rtf_keyword::rkw_fdecor:
+ fnt.family=font::ff_fantasy;
+ break;
+ default: break;
+ }
+ break;
+ }
+ case '{':
+ in_font=true;
+ ++buf_in;
+ break;
+ case '}':
+ in_font=false;
+ fonttbl.insert(std::make_pair(font_num, fnt));
+ fnt=font();
+ full_name=false;
+ ++buf_in;
+ break;
+ case ';':
+ full_name=true;
+ ++buf_in;
+ break;
+ default:
+ if (!full_name && in_font)
+ fnt.name+=*buf_in;
+ ++buf_in;
+ break;
+ }
+ }
+ ++buf_in;
+ break;
+ }
+ // special characters
+ case rtf_keyword::rkw_line: case rtf_keyword::rkw_softline:
+ par_html.write("<br>");
+ break;
+ case rtf_keyword::rkw_tab:
+ par_html.write("&nbsp;&nbsp;"); // maybe, this can be done better
+ break;
+ case rtf_keyword::rkw_enspace: case rtf_keyword::rkw_emspace:
+ par_html.write("&nbsp;");
+ break;
+ case rtf_keyword::rkw_qmspace:
+ par_html.write("&thinsp;");
+ break;
+ case rtf_keyword::rkw_endash:
+ par_html.write("&ndash;");
+ break;
+ case rtf_keyword::rkw_emdash:
+ par_html.write("&mdash;");
+ break;
+ case rtf_keyword::rkw_bullet:
+ par_html.write("&bull;");
+ break;
+ case rtf_keyword::rkw_lquote:
+ par_html.write("&lsquo;");
+ break;
+ case rtf_keyword::rkw_rquote:
+ par_html.write("&rsquo;");
+ break;
+ case rtf_keyword::rkw_ldblquote:
+ par_html.write("&ldquo;");
+ break;
+ case rtf_keyword::rkw_rdblquote:
+ par_html.write("&rdquo;");
+ break;
+ // paragraph formatting
+ case rtf_keyword::rkw_ql:
+ cur_options.papAlign=formatting_options::align_left;
+ break;
+ case rtf_keyword::rkw_qr:
+ cur_options.papAlign=formatting_options::align_right;
+ break;
+ case rtf_keyword::rkw_qc:
+ cur_options.papAlign=formatting_options::align_center;
+ break;
+ case rtf_keyword::rkw_qj:
+ cur_options.papAlign=formatting_options::align_justify;
+ break;
+ case rtf_keyword::rkw_fi:
+ cur_options.papFirst=(int)rint(kw.parameter()/20);
+ break;
+ case rtf_keyword::rkw_li:
+ cur_options.papLeft=(int)rint(kw.parameter()/20);
+ break;
+ case rtf_keyword::rkw_ri:
+ cur_options.papRight=(int)rint(kw.parameter()/20);
+ break;
+ case rtf_keyword::rkw_sb:
+ cur_options.papBefore=(int)rint(kw.parameter()/20);
+ break;
+ case rtf_keyword::rkw_sa:
+ cur_options.papAfter=(int)rint(kw.parameter()/20);
+ break;
+ case rtf_keyword::rkw_pard:
+ cur_options.papBefore=cur_options.papAfter=0;
+ cur_options.papLeft=cur_options.papRight=0;
+ cur_options.papFirst=0;
+ cur_options.papAlign=formatting_options::align_left;
+ cur_options.papInTbl=false;
+ break;
+ case rtf_keyword::rkw_par:
+ case rtf_keyword::rkw_sect:
+ t_str=cur_options.get_par_str()+par_html.str()
+ +"&nbsp;"+par_html.close()+"</p>\n";
+ if (!bInTable)
+ {
+ html+=t_str;
+ }
+ else
+ {
+ if (cur_options.papInTbl)
+ {
+ tcCurCell->Text+=t_str;
+ }
+ else
+ {
+ html+=tblCurTable->make()+t_str;
+ bInTable=false;
+ tblCurTable=new table;
+ }
+ }
+ par_html.clear();
+ break;
+ // character formatting
+ case rtf_keyword::rkw_super:
+ cur_options.chpVAlign=
+ kw.parameter()==0?formatting_options::va_normal
+ :formatting_options::va_sup;
+ break;
+ case rtf_keyword::rkw_sub:
+ cur_options.chpVAlign=
+ kw.parameter()==0?formatting_options::va_normal
+ :formatting_options::va_sub;
+ break;
+ case rtf_keyword::rkw_b:
+ cur_options.chpBold=!(kw.parameter()==0);
+ break;
+ case rtf_keyword::rkw_i:
+ cur_options.chpItalic=!(kw.parameter()==0);
+ break;
+ case rtf_keyword::rkw_ul:
+ cur_options.chpUnderline=!(kw.parameter()==0);
+ break;
+ case rtf_keyword::rkw_ulnone:
+ cur_options.chpUnderline=false;
+ break;
+ case rtf_keyword::rkw_fs:
+ cur_options.chpFontSize=kw.parameter();
+ break;
+ case rtf_keyword::rkw_cf:
+ cur_options.chpFColor=colortbl[kw.parameter()];
+ break;
+ case rtf_keyword::rkw_cb:
+ cur_options.chpBColor=colortbl[kw.parameter()];
+ break;
+ case rtf_keyword::rkw_highlight:
+ cur_options.chpHighlight=kw.parameter();
+ break;
+ case rtf_keyword::rkw_f:
+ cur_options.chpFont=fonttbl[kw.parameter()];
+ break;
+ case rtf_keyword::rkw_plain:
+ cur_options.chpBold=cur_options.chpItalic
+ =cur_options.chpUnderline=false;
+ cur_options.chpVAlign=formatting_options::va_normal;
+ cur_options.chpFontSize=cur_options.chpHighlight=0;
+ cur_options.chpFColor=cur_options.chpBColor=color();
+ cur_options.chpFont=font();
+ break;
+ // table formatting
+ case rtf_keyword::rkw_intbl:
+ cur_options.papInTbl=true;
+ break;
+ case rtf_keyword::rkw_trowd:
+ CurCellDefs=CellDefsList.insert(CellDefsList.end(),
+ table_cell_defs());
+ case rtf_keyword::rkw_row:
+ if (!trCurRow->Cells.empty())
+ {
+ trCurRow->CellDefs=CurCellDefs;
+ if (trCurRow->Left==-1000)
+ trCurRow->Left=iLastRowLeft;
+ if (trCurRow->Height==-1000)
+ trCurRow->Height=iLastRowHeight;
+ tblCurTable->push_back(trCurRow);
+ trCurRow=new table_row;
+ }
+ bInTable=true;
+ break;
+ case rtf_keyword::rkw_cell:
+ t_str=cur_options.get_par_str()+par_html.str()
+ +"&nbsp;"+par_html.close()+"</p>\n";
+ tcCurCell->Text+=t_str;
+ par_html.clear();
+ trCurRow->Cells.push_back(tcCurCell);
+ tcCurCell=new table_cell;
+ break;
+ case rtf_keyword::rkw_cellx:
+ tcdCurCellDef->Right=kw.parameter();
+ CurCellDefs->push_back(tcdCurCellDef);
+ tcdCurCellDef=new table_cell_def;
+ break;
+ case rtf_keyword::rkw_trleft:
+ trCurRow->Left=kw.parameter();
+ iLastRowLeft=kw.parameter();
+ break;
+ case rtf_keyword::rkw_trrh:
+ trCurRow->Height=kw.parameter();
+ iLastRowHeight=kw.parameter();
+ break;
+ case rtf_keyword::rkw_clvmgf:
+ tcdCurCellDef->FirstMerged=true;
+ break;
+ case rtf_keyword::rkw_clvmrg:
+ tcdCurCellDef->Merged=true;
+ break;
+ case rtf_keyword::rkw_clbrdrb:
+ tcdCurCellDef->BorderBottom=true;
+ tcdCurCellDef->ActiveBorder=&(tcdCurCellDef->BorderBottom);
+ break;
+ case rtf_keyword::rkw_clbrdrt:
+ tcdCurCellDef->BorderTop=true;
+ tcdCurCellDef->ActiveBorder=&(tcdCurCellDef->BorderTop);
+ break;
+ case rtf_keyword::rkw_clbrdrl:
+ tcdCurCellDef->BorderLeft=true;
+ tcdCurCellDef->ActiveBorder=&(tcdCurCellDef->BorderLeft);
+ break;
+ case rtf_keyword::rkw_clbrdrr:
+ tcdCurCellDef->BorderRight=true;
+ tcdCurCellDef->ActiveBorder=&(tcdCurCellDef->BorderRight);
+ break;
+ case rtf_keyword::rkw_brdrnone:
+ if (tcdCurCellDef->ActiveBorder!=NULL)
+ {
+ *(tcdCurCellDef->ActiveBorder)=false;
+ }
+ break;
+ case rtf_keyword::rkw_clvertalt:
+ tcdCurCellDef->VAlign=table_cell_def::valign_top;
+ break;
+ case rtf_keyword::rkw_clvertalc:
+ tcdCurCellDef->VAlign=table_cell_def::valign_center;
+ break;
+ case rtf_keyword::rkw_clvertalb:
+ tcdCurCellDef->VAlign=table_cell_def::valign_bottom;
+ break;
+ // page formatting
+ case rtf_keyword::rkw_paperw:
+ iDocWidth=kw.parameter();
+ break;
+ case rtf_keyword::rkw_margl:
+ iMarginLeft=kw.parameter();
+ break;
+ default: break;
+ }
+ }
+ break;
+ }
+ case '{':
+ // perform group opening actions here
+ foStack.push(cur_options);
+ ++buf_in;
+ break;
+ case '}':
+ // perform group closing actions here
+ cur_options=foStack.top();
+ foStack.pop();
+ ++buf_in;
+ break;
+ case 13:
+ case 10:
+ ++buf_in;
+ break;
+ case '<':
+ par_html.write("&lt;");
+ ++buf_in;
+ break;
+ case '>':
+ par_html.write("&gt;");
+ ++buf_in;
+ break;
+/* case ' ':
+ par_html.write("&ensp;");
+ ++buf_in;
+ break;*/
+ default:
+ par_html.write(*buf_in++);
+ }
+ }
+
+ t_str=cur_options.get_par_str()+par_html.str()
+ +"&nbsp;"+par_html.close()+"</p>\n";
+ html+=t_str;
+
+ delete tcCurCell;
+ delete trCurRow;
+ delete tblCurTable;
+ delete tcdCurCellDef;
+
+ return html;
+}
+