diff options
Diffstat (limited to 'src/rtf2html/rtf2html.cpp')
-rw-r--r-- | src/rtf2html/rtf2html.cpp | 531 |
1 files changed, 531 insertions, 0 deletions
diff --git a/src/rtf2html/rtf2html.cpp b/src/rtf2html/rtf2html.cpp new file mode 100644 index 0000000..4f29fe7 --- /dev/null +++ b/src/rtf2html/rtf2html.cpp @@ -0,0 +1,531 @@ +/* This is RTF to HTML converter, implemented as a text filter, generally. + Copyright (C) 2003 Valentin Lavrinenko, [email protected] + + available at http://rtf2html.sf.net + + Original available under the terms of the GNU LGPL2, and according + to those terms, relicensed under the GNU GPL2 for inclusion in Tellico */ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "rtf2html.h" +#include "rtf_table.h" +#include "rtf_tools.h" +#include "rtf_keyword.h" +#include "fmt_opts.h" + +#include <cstdlib> +#include <stdexcept> +#include <fstream> +#include <iostream> +#include <string> + +using Tellico::RTF2HTML; +using namespace rtf; + +RTF2HTML::RTF2HTML(const QString& text) : m_text(text) { +} + +QString RTF2HTML::toHTML() const { + std::string str_in = m_text; + + std::string::iterator buf_in=str_in.begin(), buf_in_end=str_in.end(); + colorvect colortbl; + fontmap fonttbl; + std::string title; + + bool bAsterisk=false; + fo_stack foStack; + formatting_options cur_options; + std::string html; + html_text par_html(cur_options); + + /* CellDefs in rtf are really queer. We'll keep a list of them in main() + and will give an iterator into this list to a row */ + table_cell_defs_list CellDefsList; + table_cell_defs_list::iterator CurCellDefs; + table_cell_def* tcdCurCellDef=new table_cell_def; + table_cell* tcCurCell=new table_cell; + table_row* trCurRow=new table_row; + table* tblCurTable=new table; + int iLastRowLeft=0, iLastRowHeight=0; + std::string t_str; + + bool bInTable=false; + int iDocWidth=12240; + int iMarginLeft=1800; + while(buf_in!=buf_in_end) + { + switch (*buf_in) + { + case '\\': + { + rtf_keyword kw(++buf_in); + if (kw.is_control_char()) + switch (kw.control_char()) + { + case '\\': case '{': case '}': + par_html.write(kw.control_char()); + break; + case '\'': + { + std::string stmp(1,*buf_in++); + stmp+=*buf_in++; + int code=std::strtol(stmp.c_str(), NULL, 16); + switch (code) + { + case 167: + par_html.write("•"); + break; + case 188: + par_html.write("…"); + break; + default: + par_html.write((char)code); + } + break; + } + case '*': + bAsterisk=true; + break; + case '~': + par_html.write(" "); + break; + case '\n': + par_html.write("<br><br>"); + break; + } + else //kw.is_control_char + if (bAsterisk) + { + bAsterisk=false; + skip_group(buf_in); + } + else + { + switch (kw.keyword()) + { + case rtf_keyword::rkw_filetbl: + case rtf_keyword::rkw_stylesheet: + case rtf_keyword::rkw_header: + case rtf_keyword::rkw_footer: case rtf_keyword::rkw_headerf: + case rtf_keyword::rkw_footerf: case rtf_keyword::rkw_pict: + case rtf_keyword::rkw_object: + // we'll skip such groups + skip_group(buf_in); + break; + // document title + case rtf_keyword::rkw_info: + { + int depth=1; + bool in_title=false; + while (depth>0) + { +// std::cout<<std::string(buf_in).substr(0,20)<<"\t"<<depth<<std::endl; + switch (*buf_in) + { + case '\\': + { + rtf_keyword kw(++buf_in); + if (kw.keyword()==rtf_keyword::rkw_title) + in_title=true; + break; + } + case '{': ++depth; ++buf_in; break; + case '}': --depth; ++buf_in; in_title=false; break; + default: if (in_title) title+=*buf_in; ++buf_in; break; + } + } + break; + } + // color table + case rtf_keyword::rkw_colortbl: + { + color clr; + while (*buf_in!='}') + { + switch (*buf_in) + { + case '\\': + { + rtf_keyword kw(++buf_in); + switch (kw.keyword()) + { + case rtf_keyword::rkw_red: + clr.r=kw.parameter(); + break; + case rtf_keyword::rkw_green: + clr.g=kw.parameter(); + break; + case rtf_keyword::rkw_blue: + clr.b=kw.parameter(); + break; + default: break; + } + break; + } + case ';': + colortbl.push_back(clr); + ++buf_in; + break; + default: + ++buf_in; + break; + } + } + ++buf_in; + break; + } + // font table + case rtf_keyword::rkw_fonttbl: + { + font fnt; + int font_num; + bool full_name=false; + bool in_font=false; + while (! (*buf_in=='}' && !in_font)) + { + switch (*buf_in) + { + case '\\': + { + rtf_keyword kw(++buf_in); + if (kw.is_control_char() && kw.control_char()=='*') + skip_group(buf_in); + else + switch (kw.keyword()) + { + case rtf_keyword::rkw_f: + font_num=kw.parameter(); + break; + case rtf_keyword::rkw_fprq: + fnt.pitch=kw.parameter(); + break; + case rtf_keyword::rkw_fcharset: + fnt.charset=kw.parameter(); + break; + case rtf_keyword::rkw_fnil: + fnt.family=font::ff_none; + break; + case rtf_keyword::rkw_froman: + fnt.family=font::ff_serif; + break; + case rtf_keyword::rkw_fswiss: + fnt.family=font::ff_sans_serif; + break; + case rtf_keyword::rkw_fmodern: + fnt.family=font::ff_monospace; + break; + case rtf_keyword::rkw_fscript: + fnt.family=font::ff_cursive; + break; + case rtf_keyword::rkw_fdecor: + fnt.family=font::ff_fantasy; + break; + default: break; + } + break; + } + case '{': + in_font=true; + ++buf_in; + break; + case '}': + in_font=false; + fonttbl.insert(std::make_pair(font_num, fnt)); + fnt=font(); + full_name=false; + ++buf_in; + break; + case ';': + full_name=true; + ++buf_in; + break; + default: + if (!full_name && in_font) + fnt.name+=*buf_in; + ++buf_in; + break; + } + } + ++buf_in; + break; + } + // special characters + case rtf_keyword::rkw_line: case rtf_keyword::rkw_softline: + par_html.write("<br>"); + break; + case rtf_keyword::rkw_tab: + par_html.write(" "); // maybe, this can be done better + break; + case rtf_keyword::rkw_enspace: case rtf_keyword::rkw_emspace: + par_html.write(" "); + break; + case rtf_keyword::rkw_qmspace: + par_html.write(" "); + break; + case rtf_keyword::rkw_endash: + par_html.write("–"); + break; + case rtf_keyword::rkw_emdash: + par_html.write("—"); + break; + case rtf_keyword::rkw_bullet: + par_html.write("•"); + break; + case rtf_keyword::rkw_lquote: + par_html.write("‘"); + break; + case rtf_keyword::rkw_rquote: + par_html.write("’"); + break; + case rtf_keyword::rkw_ldblquote: + par_html.write("“"); + break; + case rtf_keyword::rkw_rdblquote: + par_html.write("”"); + break; + // paragraph formatting + case rtf_keyword::rkw_ql: + cur_options.papAlign=formatting_options::align_left; + break; + case rtf_keyword::rkw_qr: + cur_options.papAlign=formatting_options::align_right; + break; + case rtf_keyword::rkw_qc: + cur_options.papAlign=formatting_options::align_center; + break; + case rtf_keyword::rkw_qj: + cur_options.papAlign=formatting_options::align_justify; + break; + case rtf_keyword::rkw_fi: + cur_options.papFirst=(int)rint(kw.parameter()/20); + break; + case rtf_keyword::rkw_li: + cur_options.papLeft=(int)rint(kw.parameter()/20); + break; + case rtf_keyword::rkw_ri: + cur_options.papRight=(int)rint(kw.parameter()/20); + break; + case rtf_keyword::rkw_sb: + cur_options.papBefore=(int)rint(kw.parameter()/20); + break; + case rtf_keyword::rkw_sa: + cur_options.papAfter=(int)rint(kw.parameter()/20); + break; + case rtf_keyword::rkw_pard: + cur_options.papBefore=cur_options.papAfter=0; + cur_options.papLeft=cur_options.papRight=0; + cur_options.papFirst=0; + cur_options.papAlign=formatting_options::align_left; + cur_options.papInTbl=false; + break; + case rtf_keyword::rkw_par: + case rtf_keyword::rkw_sect: + t_str=cur_options.get_par_str()+par_html.str() + +" "+par_html.close()+"</p>\n"; + if (!bInTable) + { + html+=t_str; + } + else + { + if (cur_options.papInTbl) + { + tcCurCell->Text+=t_str; + } + else + { + html+=tblCurTable->make()+t_str; + bInTable=false; + tblCurTable=new table; + } + } + par_html.clear(); + break; + // character formatting + case rtf_keyword::rkw_super: + cur_options.chpVAlign= + kw.parameter()==0?formatting_options::va_normal + :formatting_options::va_sup; + break; + case rtf_keyword::rkw_sub: + cur_options.chpVAlign= + kw.parameter()==0?formatting_options::va_normal + :formatting_options::va_sub; + break; + case rtf_keyword::rkw_b: + cur_options.chpBold=!(kw.parameter()==0); + break; + case rtf_keyword::rkw_i: + cur_options.chpItalic=!(kw.parameter()==0); + break; + case rtf_keyword::rkw_ul: + cur_options.chpUnderline=!(kw.parameter()==0); + break; + case rtf_keyword::rkw_ulnone: + cur_options.chpUnderline=false; + break; + case rtf_keyword::rkw_fs: + cur_options.chpFontSize=kw.parameter(); + break; + case rtf_keyword::rkw_cf: + cur_options.chpFColor=colortbl[kw.parameter()]; + break; + case rtf_keyword::rkw_cb: + cur_options.chpBColor=colortbl[kw.parameter()]; + break; + case rtf_keyword::rkw_highlight: + cur_options.chpHighlight=kw.parameter(); + break; + case rtf_keyword::rkw_f: + cur_options.chpFont=fonttbl[kw.parameter()]; + break; + case rtf_keyword::rkw_plain: + cur_options.chpBold=cur_options.chpItalic + =cur_options.chpUnderline=false; + cur_options.chpVAlign=formatting_options::va_normal; + cur_options.chpFontSize=cur_options.chpHighlight=0; + cur_options.chpFColor=cur_options.chpBColor=color(); + cur_options.chpFont=font(); + break; + // table formatting + case rtf_keyword::rkw_intbl: + cur_options.papInTbl=true; + break; + case rtf_keyword::rkw_trowd: + CurCellDefs=CellDefsList.insert(CellDefsList.end(), + table_cell_defs()); + case rtf_keyword::rkw_row: + if (!trCurRow->Cells.empty()) + { + trCurRow->CellDefs=CurCellDefs; + if (trCurRow->Left==-1000) + trCurRow->Left=iLastRowLeft; + if (trCurRow->Height==-1000) + trCurRow->Height=iLastRowHeight; + tblCurTable->push_back(trCurRow); + trCurRow=new table_row; + } + bInTable=true; + break; + case rtf_keyword::rkw_cell: + t_str=cur_options.get_par_str()+par_html.str() + +" "+par_html.close()+"</p>\n"; + tcCurCell->Text+=t_str; + par_html.clear(); + trCurRow->Cells.push_back(tcCurCell); + tcCurCell=new table_cell; + break; + case rtf_keyword::rkw_cellx: + tcdCurCellDef->Right=kw.parameter(); + CurCellDefs->push_back(tcdCurCellDef); + tcdCurCellDef=new table_cell_def; + break; + case rtf_keyword::rkw_trleft: + trCurRow->Left=kw.parameter(); + iLastRowLeft=kw.parameter(); + break; + case rtf_keyword::rkw_trrh: + trCurRow->Height=kw.parameter(); + iLastRowHeight=kw.parameter(); + break; + case rtf_keyword::rkw_clvmgf: + tcdCurCellDef->FirstMerged=true; + break; + case rtf_keyword::rkw_clvmrg: + tcdCurCellDef->Merged=true; + break; + case rtf_keyword::rkw_clbrdrb: + tcdCurCellDef->BorderBottom=true; + tcdCurCellDef->ActiveBorder=&(tcdCurCellDef->BorderBottom); + break; + case rtf_keyword::rkw_clbrdrt: + tcdCurCellDef->BorderTop=true; + tcdCurCellDef->ActiveBorder=&(tcdCurCellDef->BorderTop); + break; + case rtf_keyword::rkw_clbrdrl: + tcdCurCellDef->BorderLeft=true; + tcdCurCellDef->ActiveBorder=&(tcdCurCellDef->BorderLeft); + break; + case rtf_keyword::rkw_clbrdrr: + tcdCurCellDef->BorderRight=true; + tcdCurCellDef->ActiveBorder=&(tcdCurCellDef->BorderRight); + break; + case rtf_keyword::rkw_brdrnone: + if (tcdCurCellDef->ActiveBorder!=NULL) + { + *(tcdCurCellDef->ActiveBorder)=false; + } + break; + case rtf_keyword::rkw_clvertalt: + tcdCurCellDef->VAlign=table_cell_def::valign_top; + break; + case rtf_keyword::rkw_clvertalc: + tcdCurCellDef->VAlign=table_cell_def::valign_center; + break; + case rtf_keyword::rkw_clvertalb: + tcdCurCellDef->VAlign=table_cell_def::valign_bottom; + break; + // page formatting + case rtf_keyword::rkw_paperw: + iDocWidth=kw.parameter(); + break; + case rtf_keyword::rkw_margl: + iMarginLeft=kw.parameter(); + break; + default: break; + } + } + break; + } + case '{': + // perform group opening actions here + foStack.push(cur_options); + ++buf_in; + break; + case '}': + // perform group closing actions here + cur_options=foStack.top(); + foStack.pop(); + ++buf_in; + break; + case 13: + case 10: + ++buf_in; + break; + case '<': + par_html.write("<"); + ++buf_in; + break; + case '>': + par_html.write(">"); + ++buf_in; + break; +/* case ' ': + par_html.write(" "); + ++buf_in; + break;*/ + default: + par_html.write(*buf_in++); + } + } + + t_str=cur_options.get_par_str()+par_html.str() + +" "+par_html.close()+"</p>\n"; + html+=t_str; + + delete tcCurCell; + delete trCurRow; + delete tblCurTable; + delete tcdCurCellDef; + + return html; +} + |