diff options
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/contrib/rtf2html')
-rw-r--r-- | debian/htdig/htdig-3.2.0b6/contrib/rtf2html/COPYING | 340 | ||||
-rw-r--r-- | debian/htdig/htdig-3.2.0b6/contrib/rtf2html/Makefile | 11 | ||||
-rw-r--r-- | debian/htdig/htdig-3.2.0b6/contrib/rtf2html/README | 16 | ||||
-rw-r--r-- | debian/htdig/htdig-3.2.0b6/contrib/rtf2html/charset1252.h | 257 | ||||
-rw-r--r-- | debian/htdig/htdig-3.2.0b6/contrib/rtf2html/charsetmac.h | 257 | ||||
-rw-r--r-- | debian/htdig/htdig-3.2.0b6/contrib/rtf2html/rtf2html.c | 910 |
6 files changed, 1791 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/COPYING b/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/COPYING new file mode 100644 index 00000000..d60c31a9 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/COPYING @@ -0,0 +1,340 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/Makefile b/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/Makefile new file mode 100644 index 00000000..5409f487 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/Makefile @@ -0,0 +1,11 @@ +CC= gcc +CFLAGS= -O2 -Wall + +rtf2html: rtf2html.c + $(CC) $(CFLAGS) -o rtf2html rtf2html.c + +install: rtf2html + cp rtf2html /usr/local/bin + +clean: + rm -f rtf2html diff --git a/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/README b/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/README new file mode 100644 index 00000000..9f3084d4 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/README @@ -0,0 +1,16 @@ +rtf2html - a RTF to HTML conversion program + +This version of rtf2html has been developed by +David Lippi <[email protected]> and Gabriele Bartolini +<[email protected]>, based on an earlier work +by Chuck Shotton <[email protected]> +(see http://www.w3.org/Tools/HTMLGeneration/rtf2html.html) +and Dmitry Porapov <[email protected]>. + +This version can handle character set recognition at run-time: +currently, the ANSI Windows 1252 code and the Macintosh's are +supported. + +For copyright details, see the file COPYING in your distribution +or the GNU General Public License (GPL) version 2 or later +<http://www.gnu.org/copyleft/gpl.html> diff --git a/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/charset1252.h b/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/charset1252.h new file mode 100644 index 00000000..d2b40ba0 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/charset1252.h @@ -0,0 +1,257 @@ +unsigned char* charset1252[256] = { + "", /* 1 - 1 */ + "", /* 2 - 2 */ + "", /* 3 - 3 */ + "", /* 4 - 4 */ + "", /* 5 - 5 */ + "", /* 6 - 6 */ + "", /* 7 - 7 */ + "", /* 8 - 8 */ + "\t", /* 9 - 9 */ + "\n", /* 10 - a */ + "", /* 11 - b */ + "", /* 12 - c */ + "\r", /* 13 - d */ + "", /* 14 - e */ + "", /* 15 - f */ + "", /* 16 - 10 */ + "", /* 17 - 11 */ + "", /* 18 - 12 */ + "", /* 19 - 13 */ + "", /* 20 - 14 */ + "", /* 21 - 15 */ + "", /* 22 - 16 */ + "", /* 23 - 17 */ + "", /* 24 - 18 */ + "", /* 25 - 19 */ + "", /* 26 - 1a */ + "", /* 27 - 1b */ + "", /* 28 - 1c */ + "", /* 29 - 1d */ + "", /* 30 - 1e */ + "", /* 31 - 1f */ + " ", /* 32 - 20 */ + "!", /* 33 - 21 */ + "\"", /* 34 - 22 */ + "#", /* 35 - 23 */ + "$", /* 36 - 24 */ + "%", /* 37 - 25 */ + "&", /* 38 - 26 */ + "'", /* 39 - 27 */ + "(", /* 40 - 28 */ + ")", /* 41 - 29 */ + "*", /* 42 - 2a */ + "+", /* 43 - 2b */ + ",", /* 44 - 2c */ + "-", /* 45 - 2d */ + ".", /* 46 - 2e */ + "/", /* 47 - 2f */ + "0", /* 48 - 30 */ + "1", /* 49 - 31 */ + "2", /* 50 - 32 */ + "3", /* 51 - 33 */ + "4", /* 52 - 34 */ + "5", /* 53 - 35 */ + "6", /* 54 - 36 */ + "7", /* 55 - 37 */ + "8", /* 56 - 38 */ + "9", /* 57 - 39 */ + ":", /* 58 - 3a */ + ";", /* 59 - 3b */ + "<", /* 60 - 3c */ + "=", /* 61 - 3d */ + ">", /* 62 - 3e */ + "?", /* 63 - 3f */ + "@", /* 64 - 40 */ + "A", /* 65 - 41 */ + "B", /* 66 - 42 */ + "C", /* 67 - 43 */ + "D", /* 68 - 44 */ + "E", /* 69 - 45 */ + "F", /* 70 - 46 */ + "G", /* 71 - 47 */ + "H", /* 72 - 48 */ + "I", /* 73 - 49 */ + "J", /* 74 - 4a */ + "K", /* 75 - 4b */ + "L", /* 76 - 4c */ + "M", /* 77 - 4d */ + "N", /* 78 - 4e */ + "O", /* 79 - 4f */ + "P", /* 80 - 50 */ + "Q", /* 81 - 51 */ + "R", /* 82 - 52 */ + "S", /* 83 - 53 */ + "T", /* 84 - 54 */ + "U", /* 85 - 55 */ + "V", /* 86 - 56 */ + "W", /* 87 - 57 */ + "X", /* 88 - 58 */ + "Y", /* 89 - 59 */ + "Z", /* 90 - 5a */ + "[", /* 91 - 5b */ + "\\", /* 92 - 5c */ + "]", /* 93 - 5d */ + "^", /* 94 - 5e */ + "_", /* 95 - 5f */ + "`", /* 96 - 60 */ + "a", /* 97 - 61 */ + "b", /* 98 - 62 */ + "c", /* 99 - 63 */ + "d", /* 100 - 64 */ + "e", /* 101 - 65 */ + "f", /* 102 - 66 */ + "g", /* 103 - 67 */ + "h", /* 104 - 68 */ + "i", /* 105 - 69 */ + "j", /* 106 - 6a */ + "k", /* 107 - 6b */ + "l", /* 108 - 6c */ + "m", /* 109 - 6d */ + "n", /* 110 - 6e */ + "o", /* 111 - 6f */ + "p", /* 112 - 70 */ + "q", /* 113 - 71 */ + "r", /* 114 - 72 */ + "s", /* 115 - 73 */ + "t", /* 116 - 74 */ + "u", /* 117 - 75 */ + "v", /* 118 - 76 */ + "w", /* 119 - 77 */ + "x", /* 120 - 78 */ + "y", /* 121 - 79 */ + "z", /* 122 - 7a */ + "{", /* 123 - 7b */ + "¦", /* 124 - 7c */ + "}", /* 125 - 7d */ + "~", /* 126 - 7e */ + " ", /* 127 - 7f */ + "€", /* 128 - 80 */ + " ", /* 129 - 81 */ + "‚", /* 130 - 82 */ + "ƒ", /* 131 - 83 */ + "„", /* 132 - 84 */ + "…", /* 133 - 85 */ + "†", /* 134 - 86 */ + "‡", /* 135 - 87 */ + "ˆ", /* 136 - 88 */ + "‰", /* 137 - 89 */ + "Š", /* 138 - 8a */ + "‹", /* 139 - 8b */ + "Œ", /* 140 - 8c */ + " ", /* 141 - 8d */ + "Ž", /* 142 - 8e */ + " ", /* 143 - 8f */ + " ", /* 144 - 90 */ + "‘", /* 145 - 91 */ + "’", /* 146 - 92 */ + "“", /* 147 - 93 */ + "”", /* 148 - 94 */ + "•", /* 149 - 95 */ + " ", /* 150 - 96 */ + " ", /* 151 - 97 */ + "˜", /* 152 - 98 */ + "™", /* 153 - 99 */ + "š", /* 154 - 9a */ + "›", /* 155 - 9b */ + "œ", /* 156 - 9c */ + " ", /* 157 - 9d */ + "ž", /* 158 - 9e */ + "Ÿ", /* 159 - 9f */ + " ", /* 160 - a0 */ + "¡", /* 161 - a1 */ + "¢", /* 162 - a2 */ + "£", /* 163 - a3 */ + "¤", /* 164 - a4 */ + "¥", /* 165 - a5 */ + "¦", /* 166 - a6 */ + "§", /* 167 - a7 */ + "¨", /* 168 - a8 */ + "©", /* 169 - a9 */ + "ª", /* 170 - aa */ + "«", /* 171 - ab */ + "¬", /* 172 - ac */ + "­", /* 173 - ad */ + "®", /* 174 - ae */ + "¯", /* 175 - af */ + "°", /* 176 - b0 */ + "±", /* 177 - b1 */ + "²", /* 178 - b2 */ + "³", /* 179 - b3 */ + "´", /* 180 - b4 */ + "µ", /* 181 - b5 */ + "¶", /* 182 - b6 */ + "·", /* 183 - b7 */ + "ç", /* 184 - b8 */ + "¹", /* 185 - b9 */ + "º", /* 186 - ba */ + "»", /* 187 - bb */ + "¼", /* 188 - bc */ + "½", /* 189 - bd */ + "¾", /* 190 - be */ + "¿", /* 191 - bf */ + "À", /* 192 - c0 */ + "Á", /* 193 - c1 */ + "Â", /* 194 - c2 */ + "Ã", /* 195 - c3 */ + "Ä", /* 196 - c4 */ + "Å", /* 197 - c5 */ + "Æ", /* 198 - c6 */ + "Ç", /* 199 - c7 */ + "È", /* 200 - c8 */ + "É", /* 201 - c9 */ + "Ê", /* 202 - ca */ + "Ë", /* 203 - cb */ + "Ì", /* 204 - cc */ + "Í", /* 205 - cd */ + "Î", /* 206 - ce */ + "Ï", /* 207 - cf */ + "Ð", /* 208 - d0 */ + "Ñ", /* 209 - d1 */ + "Ò", /* 210 - d2 */ + "Ó", /* 211 - d3 */ + "&Oring;", /* 212 - d4 */ + "Õ", /* 213 - d5 */ + "Ö", /* 214 - d6 */ + "×", /* 215 - d7 */ + "Ø", /* 216 - d8 */ + "Ù", /* 217 - d9 */ + "Ú", /* 218 - da */ + "Û", /* 219 - db */ + "Ü", /* 220 - dc */ + "Ý", /* 221 - dd */ + "Þ", /* 222 - de */ + "ß", /* 223 - df */ + "à", /* 224 - e0 */ + "á", /* 225 - e1 */ + "â", /* 226 - e2 */ + "ã", /* 227 - e3 */ + "ä", /* 228 - e4 */ + "å", /* 229 - e5 */ + "æ", /* 230 - e6 */ + "ç", /* 231 - e7 */ + "è", /* 232 - e8 */ + "é", /* 233 - e9 */ + "ê", /* 234 - ea */ + "ë", /* 235 - eb */ + "ì", /* 236 - ec */ + "í", /* 237 - ed */ + "î", /* 238 - ee */ + "ï", /* 239 - ef */ + "ð", /* 240 - f0 */ + "ñ", /* 241 - f1 */ + "ò", /* 242 - f2 */ + "ó", /* 243 - f3 */ + "ô", /* 244 - f4 */ + "õ", /* 245 - f5 */ + "ö", /* 246 - f6 */ + "÷", /* 247 - f7 */ + "ø", /* 248 - f8 */ + "ù", /* 249 - f9 */ + "ú", /* 250 - fa */ + "û", /* 251 - fb */ + "ü", /* 252 - fc */ + "ý", /* 253 - fd */ + "þ", /* 254 - fe */ + "ÿ" /* 255 - ff */ +}; diff --git a/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/charsetmac.h b/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/charsetmac.h new file mode 100644 index 00000000..8c4aeca0 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/charsetmac.h @@ -0,0 +1,257 @@ +unsigned char* mac[256] = { + "", /* 1 - 1 */ + "", /* 2 - 2 */ + "", /* 3 - 3 */ + "", /* 4 - 4 */ + "", /* 5 - 5 */ + "", /* 6 - 6 */ + "", /* 7 - 7 */ + "", /* 8 - 8 */ + "\t", /* 9 - 9 */ + "\n", /* 10 - a */ + "", /* 11 - b */ + "", /* 12 - c */ + "\r", /* 13 - d */ + "", /* 14 - e */ + "", /* 15 - f */ + "", /* 16 - 10 */ + "", /* 17 - 11 */ + "", /* 18 - 12 */ + "", /* 19 - 13 */ + "", /* 20 - 14 */ + "", /* 21 - 15 */ + "", /* 22 - 16 */ + "", /* 23 - 17 */ + "", /* 24 - 18 */ + "", /* 25 - 19 */ + "", /* 26 - 1a */ + "", /* 27 - 1b */ + "", /* 28 - 1c */ + "", /* 29 - 1d */ + "", /* 30 - 1e */ + "", /* 31 - 1f */ + " ", /* 32 - 20 */ + "!", /* 33 - 21 */ + "\"", /* 34 - 22 */ + "#", /* 35 - 23 */ + "$", /* 36 - 24 */ + "%", /* 37 - 25 */ + "&", /* 38 - 26 */ + "'", /* 39 - 27 */ + "(", /* 40 - 28 */ + ")", /* 41 - 29 */ + "*", /* 42 - 2a */ + "+", /* 43 - 2b */ + ",", /* 44 - 2c */ + "-", /* 45 - 2d */ + ".", /* 46 - 2e */ + "/", /* 47 - 2f */ + "0", /* 48 - 30 */ + "1", /* 49 - 31 */ + "2", /* 50 - 32 */ + "3", /* 51 - 33 */ + "4", /* 52 - 34 */ + "5", /* 53 - 35 */ + "6", /* 54 - 36 */ + "7", /* 55 - 37 */ + "8", /* 56 - 38 */ + "9", /* 57 - 39 */ + ":", /* 58 - 3a */ + ";", /* 59 - 3b */ + "<", /* 60 - 3c */ + "=", /* 61 - 3d */ + ">", /* 62 - 3e */ + "?", /* 63 - 3f */ + "@", /* 64 - 40 */ + "A", /* 65 - 41 */ + "B", /* 66 - 42 */ + "C", /* 67 - 43 */ + "D", /* 68 - 44 */ + "E", /* 69 - 45 */ + "F", /* 70 - 46 */ + "G", /* 71 - 47 */ + "H", /* 72 - 48 */ + "I", /* 73 - 49 */ + "J", /* 74 - 4a */ + "K", /* 75 - 4b */ + "L", /* 76 - 4c */ + "M", /* 77 - 4d */ + "N", /* 78 - 4e */ + "O", /* 79 - 4f */ + "P", /* 80 - 50 */ + "Q", /* 81 - 51 */ + "R", /* 82 - 52 */ + "S", /* 83 - 53 */ + "T", /* 84 - 54 */ + "U", /* 85 - 55 */ + "V", /* 86 - 56 */ + "W", /* 87 - 57 */ + "X", /* 88 - 58 */ + "Y", /* 89 - 59 */ + "Z", /* 90 - 5a */ + "[", /* 91 - 5b */ + "\\", /* 92 - 5c */ + "]", /* 93 - 5d */ + "^", /* 94 - 5e */ + "_", /* 95 - 5f */ + "`", /* 96 - 60 */ + "a", /* 97 - 61 */ + "b", /* 98 - 62 */ + "c", /* 99 - 63 */ + "d", /* 100 - 64 */ + "e", /* 101 - 65 */ + "f", /* 102 - 66 */ + "g", /* 103 - 67 */ + "h", /* 104 - 68 */ + "i", /* 105 - 69 */ + "j", /* 106 - 6a */ + "k", /* 107 - 6b */ + "l", /* 108 - 6c */ + "m", /* 109 - 6d */ + "n", /* 110 - 6e */ + "o", /* 111 - 6f */ + "p", /* 112 - 70 */ + "q", /* 113 - 71 */ + "r", /* 114 - 72 */ + "s", /* 115 - 73 */ + "t", /* 116 - 74 */ + "u", /* 117 - 75 */ + "v", /* 118 - 76 */ + "w", /* 119 - 77 */ + "x", /* 120 - 78 */ + "y", /* 121 - 79 */ + "z", /* 122 - 7a */ + "{", /* 123 - 7b */ + "¦", /* 124 - 7c */ + "}", /* 125 - 7d */ + "~", /* 126 - 7e */ + " ", /* 127 - 7f */ + "€", /* 128 - 80 */ + "Å", /* 129 - 81 */ + "‚", /* 130 - 82 */ + "ƒ", /* 131 - 83 */ + "„", /* 132 - 84 */ + "…", /* 133 - 85 */ + "†", /* 134 - 86 */ + "‡", /* 135 - 87 */ + "á", /* 136 - 88 */ + "‰", /* 137 - 89 */ + "Š", /* 138 - 8a */ + "‹", /* 139 - 8b */ + "Œ", /* 140 - 8c */ + "ç", /* 141 - 8d */ + "é", /* 142 - 8e */ + "è ", /* 143 - 8f */ + "ê", /* 144 - 90 */ + "‘", /* 145 - 91 */ + "’", /* 146 - 92 */ + "ì", /* 147 - 93 */ + "\"", /* 148 - 94 */ + "•", /* 149 - 95 */ + " ", /* 150 - 96 */ + " ", /* 151 - 97 */ + "˜", /* 152 - 98 */ + "™", /* 153 - 99 */ + "š", /* 154 - 9a */ + "›", /* 155 - 9b */ + "œ", /* 156 - 9c */ + "ù", /* 157 - 9d */ + "ž", /* 158 - 9e */ + "Ÿ", /* 159 - 9f */ + " ", /* 160 - a0 */ + "º", /* 161 - a1 */ + "¢", /* 162 - a2 */ + "£", /* 163 - a3 */ + "§", /* 164 - a4 */ + "¥", /* 165 - a5 */ + "¦", /* 166 - a6 */ + "§", /* 167 - a7 */ + "¨", /* 168 - a8 */ + "©", /* 169 - a9 */ + "ª", /* 170 - aa */ + "«", /* 171 - ab */ + "¬", /* 172 - ac */ + "­", /* 173 - ad */ + "®", /* 174 - ae */ + "¯", /* 175 - af */ + "°", /* 176 - b0 */ + "±", /* 177 - b1 */ + "²", /* 178 - b2 */ + "³", /* 179 - b3 */ + "´", /* 180 - b4 */ + "µ", /* 181 - b5 */ + "¶", /* 182 - b6 */ + "·", /* 183 - b7 */ + "ç", /* 184 - b8 */ + "¹", /* 185 - b9 */ + "º", /* 186 - ba */ + "»", /* 187 - bb */ + "¼", /* 188 - bc */ + "½", /* 189 - bd */ + "¾", /* 190 - be */ + "¿", /* 191 - bf */ + "À", /* 192 - c0 */ + "Á", /* 193 - c1 */ + "Â", /* 194 - c2 */ + "Ã", /* 195 - c3 */ + "Ä", /* 196 - c4 */ + "Å", /* 197 - c5 */ + "Æ", /* 198 - c6 */ + "Ç", /* 199 - c7 */ + "È", /* 200 - c8 */ + "É", /* 201 - c9 */ + "Ê", /* 202 - ca */ + "À", /* 203 - cb */ + "Ì", /* 204 - cc */ + "Í", /* 205 - cd */ + "Î", /* 206 - ce */ + "Ï", /* 207 - cf */ + "Ð", /* 208 - d0 */ + "Ñ", /* 209 - d1 */ + "\"", /* 210 - d2 */ + "\"", /* 211 - d3 */ + "&Oring;", /* 212 - d4 */ + "Õ", /* 213 - d5 */ + "Ö", /* 214 - d6 */ + "×", /* 215 - d7 */ + "Ø", /* 216 - d8 */ + "Ù", /* 217 - d9 */ + "Ú", /* 218 - da */ + "Û", /* 219 - db */ + "Ü", /* 220 - dc */ + "Ý", /* 221 - dd */ + "Þ", /* 222 - de */ + "ß", /* 223 - df */ + "à", /* 224 - e0 */ + "á", /* 225 - e1 */ + "â", /* 226 - e2 */ + "ã", /* 227 - e3 */ + "ä", /* 228 - e4 */ + "å", /* 229 - e5 */ + "æ", /* 230 - e6 */ + "ç", /* 231 - e7 */ + "è", /* 232 - e8 */ + "é", /* 233 - e9 */ + "ê", /* 234 - ea */ + "ë", /* 235 - eb */ + "ì", /* 236 - ec */ + "í", /* 237 - ed */ + "î", /* 238 - ee */ + "ï", /* 239 - ef */ + "ð", /* 240 - f0 */ + "ñ", /* 241 - f1 */ + "ò", /* 242 - f2 */ + "ó", /* 243 - f3 */ + "ô", /* 244 - f4 */ + "õ", /* 245 - f5 */ + "ö", /* 246 - f6 */ + "÷", /* 247 - f7 */ + "ø", /* 248 - f8 */ + "ù", /* 249 - f9 */ + "ú", /* 250 - fa */ + "û", /* 251 - fb */ + "ü", /* 252 - fc */ + "ý", /* 253 - fd */ + "þ", /* 254 - fe */ + "ÿ" /* 255 - ff */ +}; diff --git a/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/rtf2html.c b/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/rtf2html.c new file mode 100644 index 00000000..d49140d4 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/rtf2html.c @@ -0,0 +1,910 @@ +/* RTF2HTML.c, Chuck Shotton - 6/21/93 */ +/************************************************************************ + * This program takes a stab at converting RTF (Rich Text Format) files + * into HTML. There are some limitations that keep RTF from being able to + * easily represent things like in-line images and anchors as styles. In + * particular, RTF styles apply to entire "paragraphs", so anchors or + * images in the middle of a text stream can't easily be represented by + * styles. The intent is to ultimately use something like embedded text + * color changes to represent these constructs. + * + * In the meantime, you can take existing Word documents, apply the + * correct style sheet, and convert them to HTML with this tool. + * + * AUTHOR: Chuck Shotton, UT-Houston Academic Computing, + * + * Dmitry Potapov, CapitalSoft + * + * David Lippi, Comune di Prato, Italy + * + * Gabriele Bartolini, Comune di Prato, Italy + * + * USAGE: rtf2html [rtf_filename] + * + * BEHAVIOR: + * rtf2html will open the specified RTF input file or read from + * standard input, writing converted HTML to standard output. + * + * NOTES: + * The RTF document must be formatted with a style sheet that has + * style numberings that conform to the style_mappings table + * defined in this source file. Characters are converted according + * to the ANSI Windows 1252 code or Macintosh. + * + * MODIFICATIONS: + * 6/21/93 : Chuck Shotton - created version 1.0. + * 11/26/98 : Dmitry Potapov - version 1.1 beta + * 05/07/04 : David Lippi, Gabriele Bartolini - version 1.2 + * + * Copyright (C) 2004 Comune di Prato + * + * For copyright details, see the file COPYING in your distribution + * or the GNU General Public License (GPL) version 2 or later + * <http://www.gnu.org/copyleft/gpl.html> + * + ************************************************************************/ + +/* Note, the source is formated with 4 character tabs */ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include "charset1252.h" +#include "charsetmac.h" + +#ifdef _MSC_VER +# define strcasecmp _stricmp +#endif + +#ifndef TRUE +#define TRUE -1 +#define FALSE 0 +#endif + +#define MAX_LEVELS 40 /*defines the # of nested in-line styles (pairs of {})*/ +#define MAX_RTF_TOKEN 40 + +#define MAX_INLINE_STYLES 5 /*defines # of in-line styles, bold, italic, etc.*/ + +typedef struct tag_StyleState +{ + unsigned char s: MAX_INLINE_STYLES; +} TStyleState; + +typedef enum { s_plain, s_bold, s_italic, s_underline, s_hidden, /*in-line styles*/ + s_para, s_br, /*pseudo style*/ + s_h0, s_h1, s_h2, s_h3, s_h4, s_h5, s_h6 /*heading styles*/ +} StyleState; + +char *styles[][2] = { /*HTML Start and end tags for styles*/ + {"", ""}, + {"<strong>", "</strong>"}, + {"<em>", "</em>"}, + {"", ""}, + {"<!-- ", " -->"}, + {"\n", "\n"}, /* {"\n<p>", "</p>\n"}, */ + {"<br />\n",""}, + {"", ""}, + {"<h1>", "</h1>"}, + {"<h2>", "</h2>"}, + {"<h3>", "</h3>"}, + {"<h4>", "</h4>"}, + {"<h5>", "</h5>"}, + {"<h6>", "</h6>"} +}; + +/* style_mappings maps the style numbers in a RTF style sheet into one of the*/ +/* (currently) six paragraph-oriented HTML styles (i.e. heading 1 through 6.)*/ +/* Additional styles for lists, etc. should be added here. Style info */ +/* ultimately should be read from some sort of config file into these tables.*/ + +#define MAX_NAME_LEN 40 +char style_name[MAX_NAME_LEN]; + +#define STYLE_NUMBER 7 +char *style_namings[STYLE_NUMBER] = { + "", "heading 1", "heading 2", "heading 3", "heading 4", "heading 5", + "heading 6" +}; +char style_mappings[STYLE_NUMBER][MAX_RTF_TOKEN]; +char style_number[MAX_RTF_TOKEN]; + +/* RTF tokens that mean something to the parser. All others are ignored. */ + +typedef enum { + t_start, + t_fonttbl, t_colortbl, t_stylesheet, t_info, t_s, t_b, t_ul, t_ulw, + t_uld, t_uldb, t_i, t_v, t_plain, t_par, t_pict, t_tab, t_bullet, + t_cell, t_row, t_line, t_endash, t_emdash, t_rquote, + t_end +} TokenIndex; + +char *tokens[] = { + "###", + "fonttbl", "colortbl", "stylesheet", "info", "s", "b", "ul", "ulw", + "uld", "uldb", "i", "v", "plain", "par", "pict", "tab", "bullet", + "cell", "row", "line", "endash", "emdash", "rquote", + "###" +}; + +TStyleState style_state[MAX_LEVELS], curr_style; +short curr_heading; + +void (*RTF_DoControl)(FILE*,char*,char*); +char isBody; +char* title; +//FILE* f; + +short level, /*current {} nesting level*/ + skip_to_level,/*{} level to which parsing should skip (used to skip */ + /* font tables, style sheets, color tables, etc.) */ + gobble, /*Flag set to indicate all input should be discarded */ + ignore_styles;/*Set to ignore inline style expansions after style use*/ + +/* Charset */ +unsigned char** charset_table; + +#define CHARSET_DEFAULT 0 // Index of the default charset to use +#define CHARSET_NUMBER 2 // Number of charset used +#define CHARSET_MAX_LENGTH 20 // Max numbero of char in the charset +// metadata used in rtf standard for the charset definition +unsigned char *charset[CHARSET_NUMBER] = { + "ansi", + "mac" +}; +// variable with the charset definition +unsigned char **charset_variablename[CHARSET_NUMBER] = { + charset1252, + mac +}; + +/**************************************/ + +int openfile (char * filename, FILE ** f) +{ + int rv = 1; + + if (filename) + { + if (!(*f = fopen (filename, "r"))) + { + fprintf (stderr, "\nError: Input file %s not found.\n", filename); + rv = 0; + } + else + { + title = filename; + } + } + else + { + *f = stdin; + title="STDIN"; + } + return rv; +} + +/**************************************/ + +int closefile (FILE * f) +{ + return fclose (f); +} + +/**************************************/ + +char RTF_GetChar( FILE* f ) +{ + char ch; + do + { + ch = fgetc( f ); + } while ((ch=='\r')||(ch=='\n')); + return ch; +} + +/**************************************/ + +char RTF_UnGetChar(FILE* f, char ch) +{ + return ungetc(ch, f); +} + +/**************************************/ + +void RTF_PutStr(char* s) +{ + if (gobble) return; + fputs(s, stdout); +} + +/**************************************/ + +void RTF_PutHeader() +{ + RTF_PutStr("<head>\n<title>"); + RTF_PutStr(title); + RTF_PutStr("</title>\n"); + RTF_PutStr("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\">\n"); + RTF_PutStr("</head>\n"); +} + +/**************************************/ + +void RTF_PutChar(char ch) +{ + if (gobble) return; + if (!isBody) + { + RTF_PutHeader(); + RTF_PutStr("<body>\n"); + isBody=TRUE; + } + switch (ch) { + case '<': + RTF_PutStr("<"); + break; + + case '>': + RTF_PutStr(">"); + break; + + case '&': + RTF_PutStr("&"); + break; + + default: + fputc(ch, stdout); + } +} + +/**************************************/ + +void RTF_PlainStyle (TStyleState* s) +{ + int i; + for(i=0;i<MAX_INLINE_STYLES;i++) + { + if(s->s & (1<<i)) + RTF_PutStr(styles[i][1]); + } + s->s=0; +} + +/**************************************/ + +void RTF_SetStyle(TStyleState* s, StyleState style) +{ + if( (!ignore_styles||(style==s_hidden)) && ((s->s&(1<<style))==0) ) + { + RTF_PutStr(styles[style][0]); + s->s|=(1<<style); + } +} + +/**************************************/ + +void RTF_PushState(short* level) +{ + if(*level>=MAX_LEVELS) + { + fprintf(stderr,"Exceed maximum level\n"); + exit(-1); + } + style_state[*level]=curr_style; + (*level)++; +} + +/**************************************/ + +void RTF_PopState(short* level) +{ + int j; + TStyleState new_style; + + if(*level<1) + { + fprintf(stderr,"RTF parse error: unexpected '}'\n"); + exit(-1); + } + new_style = style_state[*level-1]; + /*close off any in-line styles*/ + for (j=0;j<MAX_INLINE_STYLES;j++) + { + if ( ((curr_style.s & (1<<j))!=0) && ((new_style.s & (1<<j))==0) ) + { + curr_style.s &= ~(1<<j); + RTF_PutStr(styles[j][1]); + } + } + + for (j=0;j<MAX_INLINE_STYLES;j++) + { + if( ((curr_style.s & (1<<j))==0) && ((new_style.s & (1<<j))!=0) ) + RTF_PutStr(styles[j][0]); + } + (*level)--; + curr_style = new_style; + + if (*level == skip_to_level) { + skip_to_level = -1; + gobble = FALSE; + } +} + +/**************************************/ +/* Map a style number into a HTML heading */ + +short RTF_MapStyle(char* s) +{ + int i; + for (i=0;i<7;i++) + if (!strcmp(style_mappings[i], s)) + return (i); + return (0); +} + +/**************************************/ + +void RTF_AddStyleMap(char* name, char* number) +{ + int i, len; + len=strlen(name); + if( name[len-1]==';') name[--len]=0; + for(i=0;i<STYLE_NUMBER;i++) + { + if(!strcasecmp(name,style_namings[i])) + { + strcpy(style_mappings[i],number); + return; + } + } +} + +/**************************************/ + +void RTF_BuildName(char* token, char* ch, unsigned is_string) +{ + int len; + char *p; + len = strlen(token); + if(len>=MAX_NAME_LEN-1) + return; + if (is_string) + { + for (p = ch; p && *p; ++p) + { + token[len]=*p; + ++len; + } + } + else + { + token[len] = *ch; + ++len; + } + token[len]='\0'; +} + + +/**************************************/ + +void RTF_ClearName(char* token) +{ + token[0]=0; +} + +/**************************************/ + +TokenIndex GetTokenIndex(char* control) +{ + TokenIndex i; + + for (i=t_start; i<t_end; i++) + { + if(control[0]==tokens[i][0]) /* Added for fast compare */ + { + if (!strcmp(control, tokens[i])) + { + break; + } + } + } + return i; +} + +/**************************************/ + +void RTF_DoStyleControl (FILE* f, char* control, char* arg) +{ + if(GetTokenIndex(control)==t_s) + { + strcpy(style_number,arg); + } +} + +/**************************************/ + +int chartoi(char ch) +{ + if((ch>='0')&&(ch<='9')) + return ch-'0'; + if((ch>='A')&&(ch<='Z')) + return ch-'A'+10; + if((ch>='a')&&(ch<='z')) + return ch-'a'+10; + return -1; +} + +/**************************************/ + +void RTF_BuildArg (FILE * f, char ch, char* arg) +{ + int i=0; + + if(feof(f)) + { + arg[0]=0; + return; + } + if(ch=='-') + { + arg[i++]='-'; + ch = RTF_GetChar( f ); + if(feof(f)) + { + arg[0]=0; + return; + } + } + for(;isdigit(ch);i++) + { + arg[i]=ch; + if(i>=MAX_RTF_TOKEN-1) + { + arg[MAX_RTF_TOKEN-1]=0; + while(isdigit(ch)) { + ch = RTF_GetChar( f ); + if(feof(f)) + return; + } + break; + } + ch = RTF_GetChar( f ); + if(feof(f)) + { + arg[i+1]=0; + return; + } + } + arg[i]=0; + if(!isspace(ch)) + { + RTF_UnGetChar(f, ch); + } +} + +/**************************************/ + +void RTF_BuildToken (FILE* f, char ch) +{ + int i; + + for(i=1;;i++) + { + char token[MAX_RTF_TOKEN], arg[MAX_RTF_TOKEN]; + token[i-1]=ch; + if(i>=MAX_RTF_TOKEN-1) + { + do { + ch = RTF_GetChar( f ); + if(feof(f)) + return; + } while (isalpha(ch)); + RTF_BuildArg(f, ch,arg); + return; + } + ch = RTF_GetChar( f ); + if(feof(f)) + { + token[i]=0; + RTF_DoControl(f,token,""); + return; + } + if( !isalpha(ch) ) + { + token[i]=0; + RTF_BuildArg(f, ch,arg); + RTF_DoControl(f,token,arg); + return; + } + } +} + +/**************************************/ + +void RTF_backslash(FILE* f, char** pch, char* pf) +{ + int ch; + *pf=FALSE; + ch = RTF_GetChar( f ); + if(feof(f)) + { + fprintf(stderr,"Unexpected end of file\n"); + return; + } + switch (ch) + { + case '\\': + *pch=charset_table[92]; *pf=TRUE; + break; + case '{': + *pch=charset_table[123]; *pf=TRUE; + break; + case '}': + *pch=charset_table[125]; *pf=TRUE; + break; + case '*': + gobble = TRUE; /*perform no output, ignore commands 'til level-1*/ + if(skip_to_level>level-1||skip_to_level==-1) + skip_to_level = level-1; + break; + case '\'': + { + char ch1, ch2; + ch1 = RTF_GetChar( f ); + ch2 = RTF_GetChar( f ); + if(!feof(f)) + { + if(isxdigit(ch1)&&isxdigit(ch2)) + { + ch = chartoi(ch1)*16+chartoi(ch2); + *pch = charset_table[ch-1]; *pf=TRUE; + } else { + fprintf(stderr,"RTF Error: unexpected '%c%c' after \\\'\n",ch1,ch2); + } + } + break; + } + default: + if (isalpha(ch)) + { + RTF_BuildToken(f, ch); + } else { + fprintf(stderr, "\nRTF Error: unexpected '%c' after \\.\n", ch); + } + break; + } +} + +/**************************************/ + +void RTF_ParseStyle(FILE * f) +{ + char ch, pf; + char *code; + int level0; + void (*PrevDoControl)(FILE*,char*,char*); + + level0=level; + PrevDoControl=RTF_DoControl; + RTF_DoControl=RTF_DoStyleControl; + + RTF_ClearName(style_name); + style_number[0]=0; + while (1) + { + ch = RTF_GetChar( f ); + if(feof(f)) + break; + switch (ch) + { + case '\\': + RTF_backslash(f, &code, &pf); + if(pf) + { + RTF_BuildName(style_name, code, 1); + } else { + RTF_ClearName(style_name); + } + break; + + case '{': + level++; + RTF_ClearName(style_name); + break; + + case '}': + if(level0+1==level) + { + if(style_number[0]!=0) + { + RTF_AddStyleMap(style_name,style_number); + style_number[0]=0; + } + } else if(level0==level) { + RTF_DoControl=PrevDoControl; + RTF_UnGetChar(f, ch); + return; + } + level--; + RTF_ClearName(style_name); + break; + + default: + RTF_BuildName(style_name, &ch, 0); + break; + } + } /* while */ +} + +/**************************************/ +/* Perform actions for RTF control words */ + +void RTF_DoBodyControl (FILE * f, char* control,char* arg) +{ + short style; + + if (gobble) return; + + switch (GetTokenIndex(control)) + { + case t_stylesheet: + gobble = TRUE; /*perform no output, ignore commands 'til level-1*/ + skip_to_level = level-1; + RTF_ParseStyle( f ); + break; + case t_fonttbl: /*skip all of these and their contents!*/ + case t_colortbl: + case t_info: + gobble = TRUE; /*perform no output, ignore commands 'til level-1*/ + skip_to_level = level-1; + break; + case t_pict: + gobble = TRUE; /*perform no output, ignore commands 'til level-1*/ + if(skip_to_level>=level || skip_to_level==-1) + skip_to_level = level-1; + break; + + + case t_s: /*Style*/ + if (!curr_heading) + { + style = RTF_MapStyle (arg); + if(style) + { + curr_heading = s_h0 + style; + RTF_PutStr(styles[curr_heading][0]); + ignore_styles = TRUE; + } + } + break; + + case t_b: /*Bold*/ + RTF_SetStyle(&curr_style,s_bold); + break; + + case t_ulw: + case t_uld: + case t_uldb: + case t_ul: /*Underline, maps to "emphasis" HTML style*/ + RTF_SetStyle(&curr_style,s_underline); + break; + + case t_i: /*Italic*/ + RTF_SetStyle(&curr_style,s_italic); + break; + + case t_v: /* Hidden*/ + RTF_SetStyle(&curr_style,s_hidden); + break; + + case t_par: /*Paragraph*/ + if (curr_heading!=s_plain) { + RTF_PutStr(styles[curr_heading][1]); + curr_heading = s_plain; + } else { + RTF_PutStr(styles[s_para][0]); + } + ignore_styles = FALSE; + break; + + case t_plain: /*reset inline styles*/ + RTF_PlainStyle(&curr_style); + break; + case t_cell: + case t_tab: + RTF_PutChar(' '); + break; + case t_endash: + case t_emdash: + RTF_PutChar('-'); + break; + case t_line: + case t_row: + RTF_PutStr(styles[s_br][0]); + break; + case t_bullet: + RTF_PutChar('\xb7'); + break; + case t_start: + case t_end: + break; + case t_rquote: + //RTF_PutStr("’"); + RTF_PutStr("'"); + break; + } + +} + +/**************************************/ +/* RTF_Parse is a crude, ugly state machine that understands enough of */ +/* the RTF syntax to be dangerous. */ + +void RTF_ParseBody( FILE* f ) +{ + char ch, pf; + char* code; + + RTF_DoControl=RTF_DoBodyControl; + level = 0; + skip_to_level = -1; + gobble = FALSE; + ignore_styles = FALSE; + + while (1) + { + ch = RTF_GetChar( f ); + if (feof(f)) + { + break; + } + switch (ch) + { + case '\\': + RTF_backslash(f, &code,&pf); + if(pf && code) + RTF_PutStr(code); + break; + + case '{': + RTF_PushState(&level); + break; + + case '}': + RTF_PopState(&level); + break; + + default: + RTF_PutChar(ch); + break; + } + }/*while*/ +} + +/**************************************/ + +int RTF_Parse (FILE* f) +{ + RTF_PutStr("<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML//EN\">\n<html>\n"); + + isBody=FALSE; + + RTF_ParseBody(f); + + if (isBody) RTF_PutStr("</body>\n"); + + RTF_PutStr("</html>\n"); + + return 0; +} + +/**************************************/ + +void Initialize() +{ + int i; + + for (i=0;i<MAX_LEVELS;i++) + style_state[i].s=s_plain; + + curr_style.s=s_plain; + curr_heading = s_plain; + + // Set default styles maping + style_mappings[0][0]=0; + for(i=1;i<=6;i++) + sprintf(style_mappings[i],"%d",256-i); +} + +/**************************************/ + +int RTF_FindCharset(FILE * f) +{ + char ch; + char code[CHARSET_MAX_LENGTH]; + int metadata = 0; + int i = 0; + + while ( !feof(f) ) + { + ch = RTF_GetChar( f ); + if ( ch == '\\' ) + { + metadata++; + } + if ( metadata == 2 ) // the second metadata is the charset used + { + if ( ch != '\\' ) + { + code[i] = ch; + i++; + } + } + if ( metadata > 2 ) + { + code[i] = '\0'; + break; + } + } + + + for ( i = 0; i < CHARSET_NUMBER ; i++) + { + if ( strcmp( (const char *)charset[i], (const char *) code ) == 0 ) + { + charset_table = charset_variablename[i]; + break; + }; + } + if ( i == CHARSET_NUMBER ) + { + charset_table = charset_variablename[CHARSET_DEFAULT]; + } + + return 1; // always true! +} + +/**************************************/ + +int main(int argc,char** argv) +{ + int rv = 0; + FILE *f = NULL; + + Initialize(); + + if ( argc > 1) + { + if( strcmp(argv[1],"--help")==0 || strcmp(argv[1],"-H")==0 ) + { + printf("Use: %s [rtf_filename]\n",argv[0]); + rv = 0; + } else if ( strcmp(argv[1],"--version")==0 || strcmp(argv[1],"-V")==0 ) { + printf("rtf2html version 1.2\n"); + rv = 0; + } + else + { + rv = openfile(argv[1], &f); + if ( rv ) rv = RTF_FindCharset(f); + if ( rv ) + { + rewind(f); + rv = RTF_Parse(f); + } + if ( rv ) rv = closefile(f); + } + } + else + { + printf("Use: %s [rtf_filename]\n",argv[0]); + } + return rv; +} |