diff options
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/htcommon/HtSGMLCodec.cc')
-rw-r--r-- | debian/htdig/htdig-3.2.0b6/htcommon/HtSGMLCodec.cc | 124 |
1 files changed, 124 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/htcommon/HtSGMLCodec.cc b/debian/htdig/htdig-3.2.0b6/htcommon/HtSGMLCodec.cc new file mode 100644 index 00000000..23518119 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/htcommon/HtSGMLCodec.cc @@ -0,0 +1,124 @@ +// +// HtSGMLCodec.cc +// +// HtSGMLCodec: A Specialized HtWordCodec class to convert between SGML +// ISO 8859-1 entities and high-bit characters. +// +// Part of the ht://Dig package <http://www.htdig.org/> +// Copyright (c) 1995-2004 The ht://Dig Group +// For copyright details, see the file COPYING in your distribution +// or the GNU Library General Public License (LGPL) version 2 or later +// <http://www.gnu.org/copyleft/lgpl.html> +// +// $Id: HtSGMLCodec.cc,v 1.6 2004/06/01 18:25:01 angusgb Exp $ +// + +#ifdef HAVE_CONFIG_H +#include "htconfig.h" +#endif /* HAVE_CONFIG_H */ + +#include "HtSGMLCodec.h" +#include "HtConfiguration.h" + +// Constructor: parses the appropriate parameters using the +// encapsulated HtWordCodec class. +// Only used in privacy. +HtSGMLCodec::HtSGMLCodec() +{ + HtConfiguration* config= HtConfiguration::config(); + int translate_latin1 = config->Boolean("translate_latin1", 1); + StringList *myTextFromList = new StringList(); // For &foo; + StringList *myNumFromList = new StringList(); // For &#nnn; + StringList *myToList = new StringList(); + String myTextFromString(770); // Full text list + + // Is this really the best way to do this? + if (!translate_latin1 ) + { + myTextFromString = " "; + } + else + { + // this set has been slightly modified in order to manage the € entity + // the resulting charset is therefore a ISO-8859-1 partially moved to ISO-8859-15 + myTextFromString = " |¡|¢|£|€|¥|¦|§|"; + myTextFromString << "¨|©|ª|«|¬|­|®|¯|°|"; + myTextFromString << "±|²|³|´|µ|¶|·|¸|"; + myTextFromString << "¹|º|»|¼|½|¾|¿|À|"; + myTextFromString << "Á|Â|Ã|Ä|Å|Æ|Ç|È|"; + myTextFromString << "É|Ê|Ë|Ì|Í|Î|Ï|Ð|"; + myTextFromString << "Ñ|Ò|Ó|Ô|Õ|Ö|×|Ø|"; + myTextFromString << "Ù|Ú|Û|Ü|Ý|Þ|ß|à|"; + myTextFromString << "á|â|ã|ä|å|æ|ç|è|"; + myTextFromString << "é|ê|ë|ì|í|î|ï|ð|"; + myTextFromString << "ñ|ò|ó|ô|õ|ö|÷|ø|"; + myTextFromString << "ù|ú|û|ü|ý|þ|ÿ"; + } + + myTextFromList->Create(myTextFromString, '|'); + + for (int i = 160; i <= 255; i++) + { + String temp = 0; + temp << (char) i; + myToList->Add(temp); + + temp = 0; + temp << "&#" << i << ";"; + myNumFromList->Add(temp); + if (!translate_latin1 ) + break; + } + + // Now let's take care of the low-bit characters with encodings. + myTextFromList->Add("""); + myToList->Add("\""); + myNumFromList->Add("""); + + myTextFromList->Add("&"); + myToList->Add("&"); + myNumFromList->Add("&"); + + myTextFromList->Add("<"); + myToList->Add("<"); + myNumFromList->Add("<"); + + myTextFromList->Add(">"); + myToList->Add(">"); + myNumFromList->Add(">"); + + myTextWordCodec = new HtWordCodec(myTextFromList, myToList, '|'); + myNumWordCodec = new HtWordCodec(myNumFromList, myToList, '|'); +} + + +HtSGMLCodec::~HtSGMLCodec() +{ + delete myTextWordCodec; + delete myNumWordCodec; +} + + +// Supposedly used as HtSGMLCodec::instance()->ErrMsg() +// to check if HtWordCodec liked what was fed. +String& HtSGMLCodec::ErrMsg() +{ + return myErrMsg; +} + + +// Canonical singleton interface. +HtSGMLCodec * +HtSGMLCodec::instance() +{ + static HtSGMLCodec *_instance = 0; + + if (_instance == 0) + { + _instance = new HtSGMLCodec(); + } + + return _instance; +} + +// End of HtSGMLCodec.cc |