59 files changed, 7957 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/README b/debian/htdig/htdig-3.2.0b6/contrib/README
new file mode 100644
index 00000000..d7c57ea3
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/README
@@ -0,0 +1,34 @@
+ht://Dig contributed scripts
+
+This directory tree contains perl and shell programs that attempt to
+do things with the generated databases.  Most of these were written
+for a very specific purpose for the specific version of ht://Dig that
+was current at that point.  This means that some of these programs
+will be severely broken!  Do not expect them to work; use them only as
+examples of the types of things you can do with the ht://Dig
+databases.
+
+More contributed work is available on the ht://Dig website:
+<http://www.htdig.org/contrib/>
+
+What's here:
+
+acroconv.pl	An external converter script that uses acroread to parse PDFs
+autorun		An example of automating the database building
+changehost	A script to change hostnames of URLs in the databases
+conv_doc.pl	A sample script to use the conversion features of external_parsers
+doclist		List the information in the doc db (or after a certain date)
+ewswrap		Two sample htsearch wrappers to emulate Excite for Web
+		Servers (EWS) and to simplify queries
+handler.pl	A sample external_protocols script to handle HTTP/HTTPS using curl
+htparsedoc	A sample shell script to parse Word documents
+multidig	A set of scripts to simplify updating multiple databases
+parse_doc.pl	A general external parser script that handles MS Word documents
+		(among others)
+run-robot.sh	Another example of automating the database building
+scriptname	An example of using htsearch within dynamic SSI pages
+status.pl	Build a status page of last 5 runs and top 10
+		servers (by # URLs)
+urlindex	Build an index of all the URLs in the database
+whatsnew	Build a "what's new" page with custom header and footer
+wordfreq	Build a list of words and frequency in the database
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/acroconv.pl b/debian/htdig/htdig-3.2.0b6/contrib/acroconv.pl
new file mode 100755
index 00000000..ad7d4d79
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/acroconv.pl
@@ -0,0 +1,93 @@
+#!/usr/local/bin/perl
+#
+# Sample external converter for htdig 3.1.4 or later, to convert PDFs
+# using Adobe Acrobat 3's acroread -toPostScript option on UNIX systems.
+# (Use it in place of conv_doc.pl if you have acroread but not pdftotext.)
+# Written by Gilles Detillieux.
+#
+# Usage: (in htdig.conf)
+#
+# external_parsers: application/pdf->text/html /usr/local/bin/acroconv.pl
+#
+# This is a pretty quick and dirty implementation, but it does seem to
+# give functionality equivalent to the now defunct htdig/PDF.cc parser.
+# I'm not a Perl expert by any stretch of the imagination, so the code
+# could probably use a lot of optimization to make it work better.
+#
+
+$watch = 0;
+$bigspace = 0;
+$putspace = 0;
+$putbody = 1;
+
+system("ln $ARGV[0] $ARGV[0].pdf; acroread -toPostScript $ARGV[0].pdf");
+open(INP, "< $ARGV[0].ps") || die "Can't open $ARGV[0].ps\n";
+
+print "<HTML>\n<head>\n";
+while (<INP>) {
+	if (/^%%Title: / && $putbody) {
+		s/^%%Title: \((.*)\).*\n/$1/;
+		s/\\222/'/g;
+		s/\\267/*/g;
+		s/\\336/fi/g;
+		s/\\([0-7]{3})/pack(C, oct($1))/eig;
+		s/\\([0-7]{2})/pack(C, oct($1))/eig;
+		s/\\([0-7])/pack(C, oct($1))/eig;
+		s/\\[nrtbf]/ /g;
+		s/\\(.)/$1/g;
+		s/&/\&amp\;/g;
+		s/</\&lt\;/g;
+		s/>/\&gt\;/g;
+		print "<title>$_</title>\n";
+		print "</head>\n<body>\n";
+		$putbody = 0;
+	} elsif (/^BT/) {
+		$watch = 1;
+	} elsif (/^ET/) {
+		$watch = 0;
+		if ($putspace) {
+			print "\n";
+			$putspace = 0;
+		}
+	} elsif ($watch) {
+		if (/T[Jj]$/) {
+			s/\)[^(]*\(//g;
+			s/^[^(]*\((.*)\).*\n/$1/;
+			s/\\222/'/g;
+			s/\\267/*/g;
+			s/\\336/fi/g;
+			s/\\([0-7]{3})/pack(C, oct($1))/eig;
+			s/\\([0-7]{2})/pack(C, oct($1))/eig;
+			s/\\([0-7])/pack(C, oct($1))/eig;
+			s/\\[nrtbf]/ /g;
+			s/\\(.)/$1/g;
+			if ($bigspace) {
+				s/(.)/$1 /g;
+			}
+			s/&/\&amp\;/g;
+			s/</\&lt\;/g;
+			s/>/\&gt\;/g;
+			if ($putbody) {
+				print "</head>\n<body>\n";
+				$putbody = 0;
+			}
+			print "$_";
+			$putspace = 1;
+		} elsif (/T[Ddm*]$/ && $putspace) {
+			print "\n";
+			$putspace = 0;
+		} elsif (/Tc$/) {
+			$bigspace = 0;
+			if (/^([3-9]|[1-9][0-9]+)\..*Tc$/) {
+				$bigspace = 1;
+			}
+		}
+	}
+}
+if ($putbody) {
+	print "</head>\n<body>\n";
+}
+print "</body>\n</HTML>\n";
+
+close(INP);
+system("rm -f $ARGV[0].pdf $ARGV[0].ps");
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/autorun/README b/debian/htdig/htdig-3.2.0b6/contrib/autorun/README
new file mode 100644
index 00000000..44686879
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/autorun/README
@@ -0,0 +1,16 @@
+README for autorun.
+
+The autorun program is an attempt at automatic the steps
+needed to build a complete search database.
+
+If the search domain is not too big, this can be run on a
+daily (nightly) basis.
+
+
+Usage:
+	autorun
+
+Configuration:
+	Edit the autorun script and change things to your
+	liking...
+
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/autorun/autorun b/debian/htdig/htdig-3.2.0b6/contrib/autorun/autorun
new file mode 100755
index 00000000..6014073a
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/autorun/autorun
@@ -0,0 +1,46 @@
+#!/bin/sh
+
+##
+## Configurable variables
+##
+
+
+##
+## Specify the location of the htdig and htmerge binaries
+##
+htbin=/opt/www/bin
+
+##
+## Specify the configuration file to use for digging and merging
+##
+conffile=/opt/www/htdig/sdsu.conf
+
+##
+## Specify the location where the temporary database is
+##
+source=/tmp
+
+##
+## Specify the location of the target search database
+##
+target=/gopher/www/htdig
+
+##
+## Specify the host of the target search database
+##
+search_host=athena
+
+##
+## Specify how to copy the new database to the location
+## where the search engine can get at it.
+##
+docopy() {
+	rcp $source/*.docdb $source/*.docs.index $source/*.words.gdbm ${search_host}:$target
+}
+
+
+$htbin/htdig -i -c $conffile
+$htbin/htmerge -c $conffile
+$htbin/htnotify -vv -c $conffile
+
+docopy
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/changehost/changehost.pl b/debian/htdig/htdig-3.2.0b6/contrib/changehost/changehost.pl
new file mode 100755
index 00000000..3bd6c44d
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/changehost/changehost.pl
@@ -0,0 +1,298 @@
+#!/usr/local/bin/perl
+
+##
+## changehost.pl  (C) 1995 Andrew Scherpbier
+##
+## This program will change hostnames of URLs in the document database and index.
+##
+## usage:
+##   changehost.pl database_base from to
+##
+## example:
+##   changehost.pl /opt/www/htdig/sdsu www.sdsu.edu www.northpole.net
+##
+## Two new database will be created with a base of '/tmp/new'.
+## These databases can then be used by htsearch.
+##
+
+use GDBM_File;
+
+$base = $ARGV[0];
+$from = $ARGV[1];
+$to = $ARGV[2];
+
+$dbfile = "$base.docdb";
+$newfile = "/tmp/new.docdb";
+
+##
+## Convert the document database first.
+##
+tie(%newdb, GDBM_File, $newfile, GDBM_NEWDB, 0644) || die "$newfile: '$!'";
+tie(%docdb, GDBM_File, $dbfile, GDBM_READER, 0) || die "$dbfile: $!";
+
+
+while (($key, $value) = each %docdb)
+{
+    if ($key =~ /http:\/\/$from/i)
+    {
+	%record = parse_ref_record($value);
+	$key =~ s/http:\/\/$from/http:\/\/$to/i;
+	print "$key\n";
+	$t = $record{"URL"};
+	$t =~ s/http:\/\/$from/http:\/\/$to/i;
+	$record{"URL"} = $t;
+
+	$value = create_ref_record(%record);
+    }
+
+    $newdb{$key} = $value;
+}
+
+untie %newdb;
+untie %docdb;
+
+##
+## Now create the document index
+##
+$newfile = "/tmp/new.docs.index";
+$dbfile = "$base.docs.index";
+
+tie(%newdb, GDBM_File, $newfile, GDBM_NEWDB, 0644) || die "$newfile: '$!'";
+tie(%docdb, GDBM_File, $dbfile, GDBM_READER, 0) || die "$dbfile: $!";
+
+while (($key, $value) = each %docdb)
+{
+    if ($value =~ /http:\/\/$from/i)
+    {
+	$value =~ s/http:\/\/$from/http:\/\/$to/i;
+    }
+    $newdb{$key} = $value;
+}
+
+untie %newdb;
+untie %docdb;
+
+######################################################################
+sub create_ref_record
+{
+    local(%rec) = @_;
+    local($s);
+
+    if (exists $rec{"ID"})
+    {
+	$s .= pack("Ci", 0, $rec{"ID"});
+    }
+    if (exists $rec{"TIME"})
+    {
+	$s .= pack("Ci", 1, $rec{"TIME"});
+    }
+    if (exists $rec{"ACCESSED"})
+    {
+	$s .= pack("Ci", 2, $rec{"ACCESSED"});
+    }
+    if (exists $rec{"STATE"})
+    {
+	$s .= pack("Ci", 3, $rec{"STATE"});
+    }
+    if (exists $rec{"SIZE"})
+    {
+	$s .= pack("Ci", 4, $rec{"SIZE"});
+    }
+    if (exists $rec{"LINKS"})
+    {
+	$s .= pack("Ci", 5, $rec{"LINKS"});
+    }
+    if (exists $rec{"IMAGESIZE"})
+    {
+	$s .= pack("Ci", 6, $rec{"IMAGESIZE"});
+    }
+    if (exists $rec{"HOPCOUNT"})
+    {
+	$s .= pack("Ci", 7, $rec{"HOPCOUNT"});
+    }
+    if (exists $rec{"URL"})
+    {
+	$s .= pack("Ci", 8, length($rec{"URL"}));
+	$s .= $rec{"URL"};
+    }
+    if (exists $rec{"HEAD"})
+    {
+	$s .= pack("Ci", 9, length($rec{"HEAD"}));
+	$s .= $rec{"HEAD"};
+    }
+    if (exists $rec{"TITLE"})
+    {
+	$s .= pack("Ci", 10, length($rec{"TITLE"}));
+	$s .= $rec{"TITLE"};
+    }
+    if (exists $rec{"DESCRIPTIONS"})
+    {
+	@v = split('', $rec{"DESCRIPTIONS"});
+	$s .= pack("Ci", 11, $#v - 1);
+	foreach (@v)
+	{
+	    $s .= pack("i", length($_));
+	    $s .= $_;
+	}
+    }
+    if (exists $rec{"ANCHORS"})
+    {
+	@v = split('', $rec{"ANCHORS"});
+	$s .= pack("Ci", 12, $#v - 1);
+	foreach (@v)
+	{
+	    $s .= pack("i", length($_));
+	    $s .= $_;
+	}
+    }
+    if (exists $rec{"EMAIL"})
+    {
+	$s .= pack("Ci", 13, length($rec{"EMAIL"}));
+	$s .= $rec{"EMAIL"};
+    }
+    if (exists $rec{"NOTIFICATION"})
+    {
+	$s .= pack("Ci", 14, length($rec{"NOTIFICATION"}));
+	$s .= $rec{"NOTIFICATION"};
+    }
+    if (exists $rec{"SUBJECT"})
+    {
+	$s .= pack("Ci", 15, length($rec{"SUBJECT"}));
+	$s .= $rec{"SUBJECT"};
+    }
+
+    return $s;
+}
+
+sub parse_ref_record
+{
+    local($value) = @_;
+    local(%rec, $length, $count, $result);
+
+    while (length($value) > 0)
+    {
+	$what = unpack("C", $value);
+	$value = substr($value, 1);
+	if ($what == 0)
+	{
+	    # ID
+	    $rec{"ID"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 1)
+	{
+	    # TIME
+	    $rec{"TIME"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 2)
+	{
+	    # ACCESSED
+	    $rec{"ACCESSED"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 3)
+	{
+	    # STATE
+	    $rec{"STATE"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 4)
+	{
+	    # SIZE
+	    $rec{"SIZE"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 5)
+	{
+	    # LINKS
+	    $rec{"LINKS"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 6)
+	{
+	    # IMAGESIZE
+	    $rec{"IMAGESIZE"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 7)
+	{
+	    # HOPCOUNT
+	    $rec{"HOPCOUNT"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 8)
+	{
+	    # URL
+	    $length = unpack("i", $value);
+	    $rec{"URL"} = unpack("x4 A$length", $value);
+	    $value = substr($value, 4 + $length);
+	}
+	elsif ($what == 9)
+	{
+	    # HEAD
+	    $length = unpack("i", $value);
+	    $rec{"HEAD"} = unpack("x4 A$length", $value);
+	    $value = substr($value, 4 + $length);
+	}
+	elsif ($what == 10)
+	{
+	    # TITLE
+	    $length = unpack("i", $value);
+	    $rec{"TITLE"} = unpack("x4 A$length", $value);
+	    $value = substr($value, 4 + $length);
+	}
+	elsif ($what == 11)
+	{
+	    # DESCRIPTIONS
+	    $count = unpack("i", $value);
+	    $value = substr($value, 4);
+	    $result = "";
+	    foreach (1 .. $count)
+	    {
+		$length = unpack("i", $value);
+		$result = $result . unpack("x4 A$length", $value) . "";
+		$value = substr($value, 4 + $length);
+	    }
+	    chop $result;
+	    $rec{"DESCRIPTIONS"} = $result;
+	}
+	elsif ($what == 12)
+	{
+	    # ANCHORS
+	    $count = unpack("i", $value);
+	    $value = substr($value, 4);
+	    $result = "";
+	    foreach (1 .. $count)
+	    {
+		$length = unpack("i", $value);
+		$result = $result . unpack("x4 A$length", $value) . "";
+		$value = substr($value, 4 + $length);
+	    }
+	    chop $result;
+	    $rec{"ANCHORS"} = $result;
+	}
+	elsif ($what == 13)
+	{
+	    # EMAIL
+	    $length = unpack("i", $value);
+	    $rec{"EMAIL"} = unpack("x4 A$length", $value);
+	    $value = substr($value, 4 + $length);
+	}
+	elsif ($what == 14)
+	{
+	    # NOTIFICATION
+	    $length = unpack("i", $value);
+	    $rec{"NOTIFICATION"} = unpack("x4 A$length", $value);
+	    $value = substr($value, 4 + $length);
+	}
+	elsif ($what == 15)
+	{
+	    # SUBJECT
+	    $length = unpack("i", $value);
+	    $rec{"SUBJECT"} = unpack("x4 A$length", $value);
+	    $value = substr($value, 4 + $length);
+	}
+    }
+    return %rec;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/conv_doc.pl b/debian/htdig/htdig-3.2.0b6/contrib/conv_doc.pl
new file mode 100755
index 00000000..78d8a985
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/conv_doc.pl
@@ -0,0 +1,214 @@
+#!/usr/local/bin/perl
+
+#
+# Sample external converter for htdig 3.1.4 or later.
+# Usage: (in htdig.conf)
+#
+# external_parsers: application/msword->text/html /usr/local/bin/conv_doc.pl \
+#               application/postscript->text/html /usr/local/bin/conv_doc.pl \
+#               application/pdf->text/html /usr/local/bin/conv_doc.pl
+#
+# Written by Gilles Detillieux <[email protected]>.
+# Based in part on the parse_word_doc.pl script, written by
+# Jesse op den Brouw <[email protected]> but heavily revised.
+#
+# 1998/12/11
+# Added:        catdoc test (is catdoc runnable?)    <[email protected]>
+# 1999/02/09
+# Added:        uses ps2ascii to handle PS files     <[email protected]>
+# 1999/02/15
+# Added:        check for some file formats          <[email protected]>
+# 1999/02/25
+# Added:        uses pdftotext to handle PDF files   <[email protected]>
+# 1999/03/01
+# Added:        extra checks for file "wrappers"     <[email protected]>
+#               & check for MS Word signature (no longer defaults to catdoc)
+# 1999/03/05
+# Changed:      rejoin hyphenated words across lines <[email protected]>
+#               (in PDFs)
+# 1999/08/12
+# Changed:      adapted for xpdf 0.90 release        <[email protected]>
+# Added:        uses pdfinfo to handle PDF titles    <[email protected]>
+# Changed:      change dashes to hyphens             <[email protected]>
+# 1999/09/09
+# Changed:      fix to handle empty PDF title right  <[email protected]>
+# 1999/12/01
+# Changed:      rewritten as external converter      <[email protected]>
+#               stripped out all parser-related code
+# Added:        test to silently ignore wrapped EPS files    < " >
+# Added:        test for null device on Win32 env.   <[email protected]>
+# 2000/01/12
+# Changed:      "break" to "last" (no break in Perl) <[email protected]>
+# 2001/07/12
+# Changed:      fix "last" handling in dehyphenation <[email protected]>
+# Added:        handle %xx codes in title from URL   <[email protected]>
+#########################################
+#
+# set this to your MS Word to text converter
+# get it from: http://www.fe.msk.ru/~vitus/catdoc/
+#
+$CATDOC = "/usr/local/bin/catdoc";
+#
+# set this to your WordPerfect to text converter, or /bin/true if none available
+# this nabs WP documents with .doc suffix, so catdoc doesn't see them
+#
+$CATWP = "/bin/true";
+#
+# set this to your RTF to text converter, or /bin/true if none available
+# this nabs RTF documents with .doc suffix, so catdoc doesn't see them
+#
+$CATRTF = "/bin/true";
+#
+# set this to your PostScript to text converter
+# get it from the ghostscript 3.33 (or later) package
+#
+$CATPS = "/usr/bin/ps2ascii";
+#
+# set this to your PDF to text converter, and pdfinfo tool
+# get it from the xpdf 0.90 package at http://www.foolabs.com/xpdf/
+#
+$CATPDF = "/usr/bin/pdftotext";
+$PDFINFO = "/usr/bin/pdfinfo";
+#$CATPDF = "/usr/local/bin/pdftotext";
+#$PDFINFO = "/usr/local/bin/pdfinfo";
+
+#########################################
+#
+# need some var's
+$dehyphenate = 0;                       # set if we must dehyphenate text output
+$ishtml = 0;                            # set if converter produces HTML
+$null = "";
+$magic = "";
+$type = "";
+$cvtr = "";
+$cvtcmd = "";
+$title = "";
+@parts = ();
+
+# make portable to win32 platform or unix
+$null = "/dev/null";
+if ($^O eq "MSWin32") {$null = "nul";}
+
+
+#########################################
+#
+# Read first bytes of file to check for file type (like file(1) does)
+open(FILE, "< $ARGV[0]") || die "Can't open file $ARGV[0]: $!\n";
+read FILE,$magic,8;
+close FILE;
+
+if ($magic =~ /^\0\n/) {                # possible MacBinary header
+    open(FILE, "< $ARGV[0]") || die "Can't open file $ARGV[0]: $!\n";
+    read FILE,$magic,136;               # let's hope converters can handle them!
+    close FILE;
+}
+
+if ($magic =~ /%!|^\033%-12345/) {      # it's PostScript (or HP print job)
+    $cvtr = $CATPS;                     # gs 3.33 leaves _temp_.??? files in .
+# keep quiet even if PS gives errors...
+    $cvtcmd = "(cd /tmp; $cvtr; rm -f _temp_.???) < $ARGV[0] 2>$null";
+# allow PS interpreter to give error messages...
+#   $cvtcmd = "(cd /tmp; $cvtr; rm -f _temp_.???) < $ARGV[0]";
+    $type = "PostScript";
+    $dehyphenate = 0;                   # ps2ascii already does this
+    if ($magic =~ /^\033%-12345/) {     # HP print job
+        open(FILE, "< $ARGV[0]") || die "Can't open file $ARGV[0]: $!\n";
+        read FILE,$magic,256;
+        close FILE;
+        exit unless $magic =~ /^\033%-12345X\@PJL.*\n*.*\n*.*ENTER\s*LANGUAGE\s*=\s*POSTSCRIPT.*\n*.*\n*.*\n%!/
+    }
+} elsif ($magic =~ /\305\320\323\306\036/) {    # it's a wrapped EPS - ignore
+    exit
+} elsif ($magic =~ /%PDF-/) {           # it's PDF (Acrobat)
+    $cvtr = $CATPDF;
+    $cvtcmd = "$cvtr -raw $ARGV[0] -";
+# to handle single-column, strangely laid out PDFs, use coalescing feature...
+#   $cvtcmd = "$cvtr $ARGV[0] -";
+    $type = "PDF";
+    $dehyphenate = 1;                   # PDFs often have hyphenated lines
+    if (open(INFO, "$PDFINFO $ARGV[0] 2>$null |")) {
+        while (<INFO>) {
+            if (/^Title:/) {
+                s/^Title:\s+//;
+                s/\s+$//;
+                s/\s+/ /g;
+                s/&/\&amp\;/g;
+                s/</\&lt\;/g;
+                s/>/\&gt\;/g;
+                $title = $_;
+                last;
+            }
+        }
+        close INFO;
+    }
+# to use coalescing feature conditionally...
+#   if ($title =~ /...Title of Corel DRAW output.../) {
+#       $cvtcmd = "$cvtr $ARGV[0] -";
+#   }
+} elsif ($magic =~ /WPC/) {             # it's WordPerfect
+    $cvtr = $CATWP;
+    $cvtcmd = "$cvtr $ARGV[0]";
+    $type = "WordPerfect";
+    $dehyphenate = 0;                   # WP documents not likely hyphenated
+} elsif ($magic =~ /^{\\rtf/) {         # it's Richtext
+    $cvtr = $CATRTF;
+    $cvtcmd = "$cvtr $ARGV[0]";
+    $type = "RTF";
+    $dehyphenate = 0;                   # RTF documents not likely hyphenated
+} elsif ($magic =~ /\320\317\021\340/) {    # it's MS Word
+    $cvtr = $CATDOC;
+    $cvtcmd = "$cvtr -a -w $ARGV[0]";
+    $type = "Word";
+    $dehyphenate = 0;                   # Word documents not likely hyphenated
+} else {
+    die "Can't determine type of file $ARGV[0]; content-type: $ARGV[1]; URL: $ARGV[2]\n";
+}
+
+die "$cvtr is absent or unwilling to execute.\n" unless -x $cvtr;
+
+#############################################
+#
+# Start output.
+
+# if running as a converter for "user-defined" output type...
+#print "Content-Type: text/html\n\n";
+
+if ($ishtml) {
+    # converter will give its own HTML output
+    system("$cvtcmd") || die "$cvtr doesn't want to be run from shell.\n";
+    exit;
+}
+
+# Produce HTML output from converter's text output, so we can add title.
+print "<HTML>\n<head>\n";
+
+# print out the title, if it's set, and not just a file name, or make one up
+if ($title eq "" || $title =~ /^[A-G]:[^\s]+\.[Pp][Dd][Ff]$/) {
+    @parts = split(/\//, $ARGV[2]);         # get the file basename
+    $parts[-1] =~ s/%([A-F0-9][A-F0-9])/pack("C", hex($1))/gie;
+    $title = "$type Document $parts[-1]";   # use it in title
+}
+print "<title>$title</title>\n";
+
+print "</head>\n<body>\n";
+
+# Open file via selected converter, output its text.
+open(CAT, "$cvtcmd |") || die "$cvtr doesn't want to be opened using pipe.\n";
+while (<CAT>) {
+    while (/[A-Za-z\300-\377]-\s*$/ && $dehyphenate) {
+        $_ .= <CAT>;
+        last if eof;
+        s/([A-Za-z\300-\377])-\s*\n\s*([A-Za-z\300-\377])/$1$2/s
+    }
+    s/[\255]/-/g;                       # replace dashes with hyphens
+    s/\f/\n/g;                          # replace form feed
+    s/&/\&amp\;/g;                      # HTMLify text
+    s/</\&lt\;/g;
+    s/>/\&gt\;/g;
+    print;
+}
+
+print "</body>\n</HTML>\n";
+
+close CAT;
+
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/doc2html/DETAILS b/debian/htdig/htdig-3.2.0b6/contrib/doc2html/DETAILS
new file mode 100644
index 00000000..35300c03
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/doc2html/DETAILS
@@ -0,0 +1,399 @@
+INTRODUCTION
+============
+
+This DETAILS file accompanies doc2html version 3.0.1.
+
+Read this file for instructions on the installation and use of the 
+doc2html scripts.
+
+The set of files is:
+
+	DETAILS		- this file
+	doc2html.pl	- the main Perl script
+	doc2html.cfg	- configuration file for use with wp2html
+	doc2html.sty	- style file for use with wp2html
+	pdf2html.pl	- Perl script for converting PDF files to HTML
+	swf2html.pl	- Perl script for extracting links from Shockwave flash files.
+	README		- brief description
+
+doc2html.pl is a Perl5 script for use as an external converter with
+htdig 3.1.4 or later.  It takes as input the name of a file containing a
+document in a number of possible formats and its MIME type.  It uses
+the appropriate conversion utility to convert it to HTML on standard
+output. 
+
+doc2html.pl was designed to be easily adapted to use whatever conversion
+utilities are available, and although it has been written around the
+"wp2html" utility, it does not require wp2html to function. 
+
+NOTE: version 3.0.1 has only been tested on Unix. 
+
+pdf2html.pl is a Perl script which uses a pair of utilities (pdfinfo and
+pdf2text) to extract information and text from an Adobe PDF file and
+write HTML output.  It can be called directly from htdig, but you are
+recommended to call it via doc2html.pl.
+
+swf2html.pl is a Perl script which calls a utility (swfparse) and
+outputs HTML containing links to the URL's found in a Shockwave flash
+file.  It can be called directly from htdig, but you are recommended to
+call it via doc2html.pl. 
+
+
+ABOUT DOC2HTML.PL
+=================
+
+doc2html.pl is essentially a wrapper script, and is itself only capable
+of reading plain text files.  It requires the utility programs described
+below to work properly.
+
+doc2html.pl was written by David Adams <[email protected]>, it is 
+based on conv_doc.pl written by Gilles Detillieux <[email protected]>.
+This in turn was based on the parse_word_doc.pl script, written by 
+Jesse op den Brouw <[email protected]>.
+
+doc2html.pl makes up to three attempts to read a file.  It first tries
+utilities which convert directly into HTML.  If one is not found, or no
+output is produced, it then tries utilities which output plain text.  If
+none is found, and the file is not of a type known to be unconvertable,
+then doc2html.pl attempts to read the file itself, stripping out any
+control characters. 
+
+doc2html.pl is written to be flexible and easy to adapt to whatever
+conversion utilites are available.  New conversion utilities may be
+added simply by making additions to routine 'store_methods', with no
+other changes being necessary.  The existing lines in store_methods
+should provide sufficient examples on how to add more converters.  Note
+that converters which produce HTML are entered differently to those that
+produce plain text. 
+
+htdig provides three arguments which are read by doc2html.pl: 
+
+1)	the name of a temporary file containing a copy of the 
+	document to be converted.
+
+2)	the MIME type of the document.
+
+3)	the URL of the document (which is used in generating the
+	title in the output).
+
+The test for document type uses both the MIME-type passed as second
+argument and the "Magic number" of the file. 
+
+
+INSTALLATION
+============
+
+Installation requires that you acquire, compile and install the utilities 
+you need to do the conversions.  Those already setup in the Perl scripts are 
+described below.
+
+If you don't have Perl module Sys::AlarmCall installed, then consider
+installing it, see section "TIMEOUT" below. 
+
+You may need to change the first line of each script to the location of
+Perl on your system.
+
+Edit doc2html.pl to include the full pathname of each utility you have
+installed.  For example:
+
+my $WP2HTML = '/opt/local/wp2html-3.2/bin/wp2html';
+  
+If you don't have a particular utility then leave its location as a null
+string. 
+
+Then place doc2html.pl and the other scripts where htdig can access them.  
+
+If you are going to convert PDF files then you will need to edit pdf2html.pl
+and include its full path name in doc2html.pl.
+
+If you are going to extract links from Shockwave flash files then you will
+need to edit swf2html.pl and include its full path name in doc2html.pl.
+
+Edit the htdig.conf configuration file to use the script, as in this example:
+
+external_parsers:	application/rtf->text/html /usr/local/scripts/doc2html.pl \
+		 	text/rtf->text/html /usr/local/scripts/doc2html.pl \
+			application/pdf->text/html /usr/local/scripts/doc2html.pl \
+			application/postscript->text/html /usr/local/scripts/doc2html.pl \
+			application/msword->text/html /usr/local/scripts/doc2html.pl \
+			application/Wordperfect5.1->text/html /usr/local/scripts/doc2html.pl \
+			application/msexcel->text/html /usr/local/scripts/doc2html.pl \
+			application/vnd.ms-excel->text/html /usr/local/scripts/doc2html.pl \
+			application/vnd.ms-powerpoint->text/html /usr/local/scripts/doc2html.pl \
+			application/x-shockwave-flash->text/html /usr/local/scripts/doc2html.pl \
+			application/x-shockwave-flash2-preview->text/html /usr/local/scripts/doc2html.pl
+
+If you are using wp2html then place the files doc2html.cfg and doc2html.sty in the
+wp2html library directory.
+
+
+UTILITY WP2HTML
+===============
+
+Obtain wp2html from http://www.res.bbsrc.ac.uk/wp2html/
+
+Note that wp2html is not free; its author charges a small fee for
+"registration".  Various pre-compiled versions and the source code are
+available, together with extensive documentation.  Upgrades are
+available at no further charge. 
+
+wp2html converts WordPerfect documents (5.1 and later) to HTML. 
+Versions 3.2 and later will also convert Word7 and Word97 documents to
+HTML.  A feature of wp2html which doc2html.pl exploits is that the -q
+option will result in either good HTML or no output at all. 
+
+wp2html is very flexible in the output it creates.  The two files,
+doc2html.cfg and doc2html.sty, should be placed in the wp2html library
+directory along with the .cfg and .sty files supplied with wp2html. 
+ 
+Edit the line in doc2html.pl:
+
+my $WP2HTML = '';
+
+to set $WP2HTML to the full pathname of wp2html.
+
+wp2html will look for the title in a document, and if it is found then
+output it in <TITLE>....</TITLE> markup.  If a title is not found
+then it defaults to the file name in square brackets. 
+
+If wp2html is unable to convert a document, or is not installed,
+then doc2html.pl can use the "catdoc" or "catwpd" utilities instead.
+
+
+UTILITY CATDOC
+==============
+
+Obtain catdoc from http://www.ice.ru/~vitus/catdoc/, it is available
+under the terms of the Gnu Public License. 
+
+Edit the line in doc2html.pl:
+
+my $CATDOC = '';
+
+to set the variables to the full pathname of catdoc.  You might want
+to use a different version of catdoc for Word2 documents or for MAC Word
+files. 
+
+catdoc converts MS Word6, Word7, etc., documents to plain text.  The
+latest beta version is also able to convert Word2 documents.  catdoc
+also produces a certaint amount of "garbage" as well as the text of the
+document.  The -b option improves the likelihood that catdoc will
+extract all the text from the document, but at the expense of increasing
+the garbage as well.  doc2html.pl removes some non-printing characters
+to minimise the garbage.  If a later version of catdoc than 0.91.4 is
+obtained then the use of the -b option should be reviewed. 
+
+
+UTILITY CATWPD
+==============
+
+Obtain catwpd from the contribs section of the Ht://Dig web site where
+you obtained doc2html.  It extracts words from some versions of WordPerfect
+files.  You won't need it if you buy the superior wp2html.
+
+If you do use it, then edit the line in doc2html.pl:
+
+my $CATWPD = '';
+
+to set the variables to the full pathname of catwpd.
+
+
+UTILITY PPTHTML
+===============
+
+obtain ppthtml from http://www.xlhtml.org, where it is bundled in with
+xlhtml.
+
+In doc2html.pl, edit the line:
+
+my $PPT2HTML = '';
+
+to set $PPT2HTML to the full pathname of ppthtml.
+
+ppthtml converts Microsoft Powerpoint files into HTML.  It uses the input
+filename as the title.  doc2html.pl replaces this with the original
+filename from the URL in square brackets. 
+
+
+UTILITY XLHTML
+==============
+
+Obtain xlhtml from http://www.xlhtml.org
+
+In doc2html.pl, edit the line:
+
+my $XLS2HTML = '';
+
+to set $XLS2HTML to the full pathname of xlhtml.
+
+xlhtml converts Microsoft Excel spreadsheets into HTML.  It uses the input
+filename as the title.  doc2html.pl replaces this with the original
+filename from the URL in square brackets.
+
+The present version of xlHtml (0.4) writes HTML output, but does not
+mark up hyperlinks in .xls files as links in its output.
+
+An alternative to xlHtml is xls2csv, see below.
+
+
+UTILITY RTF2HTML
+================
+
+Obtain rtf2html from http://www.ice.ru/~vitus/catdoc/
+
+In doc2html.pl, edit the line:
+
+my $RTF2HTML = '';
+
+to set $RTF2HTML to the full pathname of rtf2html.
+
+rtf2html converts Rich Text Font documents into HTML.  It uses the input
+filename as the title, doc2html.pl replaces this with the original
+filename from the URL within square brackets.
+
+
+UTILITY PS2ASCII
+================
+
+Ps2ascii is a PostScript to text converter.
+
+In doc2html.pl, edit the line:
+
+my $CATPS = '';
+
+to the correct full pathname of ps2ascii.
+
+ps2ascii comes with ghostscript 3.33 (or later) package, which is
+pre-installed on many Unix systems.  Commonly, it is a Bourne-shell
+script which invokes "gs", the Ghostscript binary.  doc2html.pl has
+provision for adding the location of gs to the search path. 
+
+
+UTILITY PDFTOTEXT
+=================
+
+pdftotext converts Adobe PDF files to text.  pdfinfo is a tool which
+displays information about the document, and is used to obtain its
+title, etc.  Get them from the xpdf package at
+http://www.foolabs.com/xpdf/
+
+In script pdf2html.pl, change the lines:
+
+my $PDFTOTEXT = "/... .../pdftotext";
+my $PDFINFO = "/... .../pdfinfo";
+
+to the correct full pathnames.
+
+Edit doc2html.pl to include the full pathname of the pdf2html.pl script.
+
+pdf2text may fail to convert PDF documents which have been truncated
+because htdig has max_doc_size set to smaller than the documents full
+size.  Some PDF documents do not allow text to be extracted.
+
+
+UTILITY CATXLS
+==============
+
+The Excel to .csv converter, xls2csv, is included with recent versions of
+catdoc.  This is an alternative to xlhtml (see above).
+
+Edit the line:
+
+my $CATXLS = '';
+
+to the full pathname of xls2csv.
+
+Xls2csv translates Excel spread sheets into comma-separated data. 
+
+
+UTILITY SWFPARSE
+================
+
+swfparse (aka swfdump) extracts information from Shockwave flash files,
+and can be obtained from the contribs section of the Ht://Dig web site,
+where you obtained doc2html.
+
+Perl script swf2html.pl calls swfparse and writes HTML output containing
+links to the URLs found in the Shockwave file.  It does NOT extract text
+from the file.
+
+In script swf2html.pl, change the line:
+
+my $SWFPARSE = "/... .../swfdump";
+
+to the full pathname.
+
+Edit doc2html.pl to include the full pathname of the swf2html.pl script.
+
+
+LOGGING
+=======
+
+Output of logging information and error messages is controlled by the
+environmental variable DOC2HTML_LOG, which may be set in the rundig
+script.  If it is not set then only error messages output by doc2html.pl
+and by the conversion utilities it calls are returned to htdig and will
+appear in its STDOUT.  If DOC2HTML_LOG is set to a filename, then
+doc2html.pl appends logging information and any error messages to the
+file.  If DOC2HTML_LOG is set but blank, or the file cannot be opened
+for writing, logging information and error messages are passed back to
+htdig and will appear its STDOUT. 
+
+In doc2html.pl, the variables $Emark and $EEmark, set in subroutine init,
+are used to highlight error messages.  
+
+The number of lines of STDERR output from a utility which is logged or
+passed back to htdig is controlled by the variable $Maxerr set in
+routine "init" of doc2html.pl.  This is provided in order to curb the
+large number of error messages which some utilities can produce from
+processing a single file. 
+
+
+TIMEOUT
+=======
+
+If possible, install Perl module Sys::AlarmCall, obtainable from CPAN if
+you don't already have it.  This module is used by doc2html.pl to
+terminate a utility if it takes too long to finish.  The line in
+doc2html.pl:
+
+  $Time = 60;	# allow 60 seconds for external utility to complete
+ 
+may be altered to suit.
+
+
+LIMITING INPUT AND OUTPUT
+=========================
+
+The environmental variable DOC2HTML_IP_LIMIT may be set in the rundig
+script to limit the size of the file which doc2html.pl will attempt to
+convert. The default value is 20000000.  Doc2html.pl will return no
+output to htdig if the file size is equal to or greater than this size.
+
+You are recommended to set DOC2HTML_IP_LIMIT to the same as the
+"max_doc_size" parameter in the htdig configuration file. Then no
+attempt wil be made to extract text from files which have been truncated
+by htdig.  It is not possible to extract any text from .PDF files, for
+example, if they have been truncated.
+
+The environmental variable DOC2HTML_OP_LIMIT may be set in the rundig
+script to limit the output sent back to htdig by a single call to
+doc2html.pl. The default value is 10000000.   Doc2html.pl will stop
+returning output to htdig once the DOC2HTML_OP_LIMIT has been reached.
+This is precaution against the unlikely event of a conversion utility
+returning disproportionately large amounts of data.
+
+
+CONTACT
+=======
+
+Any queries regarding doc2html are best sent to the mailing list
+[email protected]
+
+The author can be emailed at [email protected]
+
+David Adams
+Information Systems Services
+University of Southampton
+
+27-November-2002
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/doc2html/README b/debian/htdig/htdig-3.2.0b6/contrib/doc2html/README
new file mode 100644
index 00000000..427eb8ce
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/doc2html/README
@@ -0,0 +1,25 @@
+Readme for doc2html
+
+External converter scripts for ht://Dig (version 3.1.4 and later), that
+convert Microsoft Word, Excel and Powerpoint files, and PDF,
+PostScript, RTF, and WordPerfect files to text (in HTML form) so they
+can be indexed.  Uses a variety of conversion programs:
+
+	wp2html		- to convert Wordperfect and Word7 & 97 documents to HTML
+	catdoc		- to extract text from Word documents
+	catwpd		- to extract text from WordPerfect documents [alternative to wp2html]
+	rtf2html	- to convert RTF documents to HTML 
+	pdftotext	- to extract text from Adobe PDFs 
+	ps2ascii 	- to extract text from PostScript
+	pptHtml		- to convert Powerpoint files to HTML
+	xlHtml		- to convert Excel spreadsheets to HTML
+	xls2csv		- to extract data from Excel spreadsheets [alternative to xlHtml] 
+	swfparse	- to extract links from Shockwave flash files.
+
+The main script, doc2html.pl, is easily edited to include the available 
+utlitities, and new utilities are easily incorporated.
+
+Written by David Adams (University of Southampton), and based on the 
+conv_doc.pl script by Gilles Detillieux.
+
+For more information see the DETAILS file.
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/doc2html/doc2html.cfg b/debian/htdig/htdig-3.2.0b6/contrib/doc2html/doc2html.cfg
new file mode 100644
index 00000000..0bff981a
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/doc2html/doc2html.cfg
@@ -0,0 +1,413 @@
+# Configuration file for use with doc2html.pl, which is used
+# to index Word, WordPerfect , etc. files using Ht://dig.
+#
+# Based on wp2html.cfg file supplied with wp2html version 3.0
+
+# The special token "typeout" simply outputs the given text
+# and can be used to inform users of versions, configuration changes etc.
+typeout=""
+
+#------------------- Single character translations ---------------
+# Protect HTML magic symbols.
+'<'="&lt;"
+'>'="&gt;"
+'&'="&amp;"
+'"'="&quot;"
+
+#------------------- WP code translations ---------------
+# File header. BEGIN is called before any text is output
+# BEGIN is passed three strings being the
+# Input Directory, Input file name and Input file type.
+# Do what you like with them!
+
+BEGIN="<HTML>
+<HEAD>
+<Title>%X<XDocSubject></Title>
+<META name=\"keywords\" content=\"%X<XDocKeywords>\">
+<META name=\"description\" content=\"%X<XDocAbstract>\">
+</HEAD>
+<BODY>
+<p>
+%xH
+"
+# Beginning of a subpart. This is called for each file.
+begin="<html>
+<head>
+<title>%X<XDocSubject - %O</title>
+</head>
+<body>\n"
+
+# File end. END is called at the end of the document
+# You may wish to insert signatures etc.
+END="<p>
+%xf
+%xF
+%X<XDocAuthor>
+</BODY>
+</HTML>\n"
+
+# End of a subpart. This is called for each sub part of a file except last.
+end="\n%xf\n</body>\n</html>\n"
+
+# End of the last subpart. This is only for the final sub-part
+# which may wish to have a different ending to the others (like
+# perhaps not refering to the NEXT chapter?)
+End="\n%xf\n</body>\n</html>\n"
+
+# Message output by wp2html into output file but not to be displayed
+# or for "hidden" WP text
+Message="<!-- "
+message="-->"
+
+# PageNo="\\folio{}"           # insert page number
+# RomanPage="%\n%% Set roman page number %1\n"    # set roman numerals
+# ArabicPage="%\n%% Set arabic page number %1\n"  # set arabic numerals
+
+HSpace="&nbsp;"              # Unbreakable (Hard) space
+
+# Tabs in Netscape (before HTML3) cannot be done properly
+# We fudge them here with non breaking space until Netscape 1.2?
+Tab="&nbsp;"
+
+# Hard page looks quite good as a line
+HPg="\n"
+# Soft page is usually ignored
+SPg=" "
+
+CondEOP=" "
+
+HRt="<br>\n"               # Single Hard return just a break
+HRt2="<p>\n"               # Two or more terminates paragraph
+SRt="\n"                   # Soft return is a newline
+DSRt="\n"                  # Deletable return at end of line
+DSPg="\n"                  # Deletable return at end of page
+
+softHyphen=""        # Hyphens are ignored, since wrapping is up the clients.
+softHyphenEOL=""     # same for hyphens at end of a line
+hardHyphen="-"       # Nonbreaking hyphen, must put in
+autoHyphen=""        # auto Hyphens will be ignored too
+NoHyphWord=""        # Inhibit hyphenation of this word
+
+# Margins are left as comments until HTML3 arrives
+Marg=" "
+TopMarg=" "
+PageLength=" "
+
+# Line spacing changes are ignored
+
+SS=""
+1.5S=""
+DS=""
+TS=""
+LS=""
+LPI=""
+
+# Font changes mapped to Netscape font size defn
+ExtraLarge="<H1>"
+extralarge="</h1>"
+VeryLarge="<h2>"
+verylarge="</h2>"
+Large="<h3>"
+large="</h3>"
+Small="<h5>"
+small="</h5>"
+Fine="</h6>"
+fine="<h6>"
+
+FontColour="\n<font color=\"#%s\">"
+fontColour="\n</font>"
+
+Font=" "
+font=" "
+
+Bold="<b>"                  # Boldface
+bold="</b>"
+Und="<u>"
+und="</u>"
+Red=""                   # Redlining
+red=""
+Strike="" 	 	     # Strikeout XXX
+strike=""
+Italics="<i>"
+italics="</i>"
+Rev=""            # Reverse video XXX
+rev=""
+Over=""           # overprinting not supported XXX
+over=""
+# Netscape 2 and after can use
+Sup="<sup>"
+sup="</sup>"
+Sub="<sub>"
+sub="</sub>"
+
+# UpHalfLine, DownHalfLine, AdvanceToHalf -- undefined
+
+# Indent mapped to unordered lists, good for blocked indents
+#Indent="\n<ul>\n"
+#indent="\n</ul>\n"
+#DIndent="\n<ul>\n"
+#dindent="\n</ul>\n"
+
+# Indents as space, use if indents are like TABS for you
+Indent="&nbsp;"
+indent=""
+DIndent="&nbsp;"
+dindent=""
+
+# Margin release is passed one parameter, the number of characters.
+MarginRelease=" "
+
+Center="<Div align=\"center\">"  # centering, Netscape way
+center="</Div>\n"
+
+Left="<Div align=\"left\">"    # Netscape left justify
+left="</Div>"
+
+Right="<Div align=\"right\">"    # Netscape right justify
+right="</Div>"
+
+Full="<Div align=\"full\">"    # Netscape full justify
+full="</Div>"
+
+# Can use also
+# Left
+# left
+# Full
+# full
+
+
+# Math, math, MathCalc, MathCalcColumn, SubTtl, IsSubTtl, Ttl, IsTtl, GrandTtl
+#  -- undefined
+Column="<MULTICOL COLS=%1>"
+column="</MULTICOL>"
+
+Header="\n"
+header="\n"
+Footer="\n"
+footer="\n"
+
+Footnote="&nbsp;&nbsp; <Font size=2>
+<a href=\"#Footnote%1\" name=\"Footref%1\">
+</Font>&nbsp;"
+footnote="</a>"
+FootnoteFormat="<a href=\"#Footref%1\" name=\"Footnote%1\">%1</i></a><i>"
+footnoteFormat="</i>"
+
+# Displays for various automatic numbers
+# uncomment these to "eat" the autonumbers inserted by WP6
+FootnoteDisplay="%e"
+footnoteDisplay="%f"
+
+#EndnoteDisplay="%e"
+#endnoteDisplay="%f"
+#ParanumDisplay="%e"
+#paranumDisplay="%f"
+#LinenumDisplay="%e"
+#linenumDisplay="%f"
+
+BeginTabs=""
+SetTab=""
+SetTabCenter=""
+SetTabRight=""
+SetTabDecimal=""
+EndTabs=""
+
+Hyph=""     # Allow hyphenation
+hyph=""     # Disable hyphenation
+Wid="" 			# Widow protection
+wid="" 			# Allow widows
+
+# HZone, DAlign -- undefined
+
+Supp=" "
+CtrPg=" "
+SetFont=" "
+SetBin=" "
+
+# True table definitions, these are Netscape style (HTML3)
+# Start of a table
+Table="\n<Table>"
+# end of a table
+table="\n</Table>\n"
+
+# New row
+Row="\n<Tr>"
+# End row
+row="\n</Tr>"
+
+# New cell. Is passed the col and row spans integers and align flags
+#Cell="\n<Td ColSpan=%1 RowSpan=%2 Align=%u Valign=%v>\n"
+Cell="\n<Td>\n"
+# End cell
+cell="\n</Td>"
+
+# Table header cells.
+HeadCell="\n<Th ColSpan=%1 RowSpan=%2 Align=%u Valign=%v>\n"
+HeadCell="\n<Th>\n"
+# End header cell
+headCell="\n</Th>"
+
+# Ordinary WP comment anywhere in the document, passed comment text
+Comment="\n<!-- WP Comment "
+comment=" -->\n"
+
+# default Style operation for styles user has not names below
+defaultStyleOn=" "
+defaultStyleOff=" "
+defaultStyleOpen=" "
+defaultStyleEnd=" "
+
+# Set defaults for TOC markers here
+ToC=" "
+toC=" "
+ToC1=" "
+toC1=" "
+ToC2=" "
+toC2=" "
+ToC3=" "
+toC3=" "
+ToC4=" "
+toC4=" "
+ToC5=" "
+toC5=" "
+
+# Detect start and end of index in document
+Index=" "
+index=" "
+
+# Set defaults for List markers here
+List=" "
+list=" "
+List1=" "
+list1=" "
+List2=" "
+list2=" "
+List3=" "
+list3=" "
+List4=" "
+list4=" "
+List5=" "
+list5=" "
+
+ToA=" "
+toA=" "
+ToAMark=" "
+
+XrefMark=" "
+xrefMark=" "
+XrefTarget=" "
+
+# Figure inside WP.
+# Right now we have a confusing 3 options, that is we have
+# A WPG image inside Wordperfect,
+#   1. With no GIF/JPEG conversion available
+#   2. Of unknown size but a GIF/JPEG conversion exists
+#   3. With known (set) size and with GIF/JPEG conversion
+# Depending on which of the above we find we call one of the next
+# three tags, Figure, Image, ImageSized
+#          NO GIF/JPEG
+Figure=" "
+
+#          GIF/JPEG available, but sizes and alignment unknown
+# Image="<a href=\"%s.%t\"><img src=\"%s.%u\"></a>
+# Click thumbnail picture to see full size version"
+# You can also do thumbnails too, like this
+#Image="<a href=\"%s.%t\" target=\"Graphics\"><img src=\"%s.%u\"></a>
+#Click thumbnail picture to see full size in separate window"
+Image="<img src=\"%s.%t\"></a>"
+
+
+#          GIF/JPEG available, and sizes and alignment known
+# If this is NOT given, Image will be used instead
+#ImageSized="<img src=\"%s.%t\" height=%3 width=%4 align=%w>"
+
+# Boxes, Table, Text and User
+TableBox="\n"
+tableBox="\n"
+TextBox="\n"
+textBox="\n"
+UserBox="\n"
+userBox="\n"
+
+# Equations are rendered as rough text right now, wait for HTML3
+#  and we can switch this on properly
+Equation=" "
+equation=" "
+
+# Captions for all boxes
+Caption="<br><i>["
+caption="]</i><br>\n"
+
+HLine="<hr width=%1 size=%2 align=%u>"
+
+ParaNum1="%s "
+ParaNum2="%s "
+ParaNum3="%s "
+ParaNum4="%s "
+ParaNum5="%s "
+ParaNum6="%s "
+ParaNum7="%s "
+ParaNum8="%s "
+ParaNumEnd=" "
+
+PN0=" "
+PN1=" "
+PN2=" "
+PN3=" "
+PN4=" "
+PN5=" "
+PN6=" "
+PN7=" "
+PN8=" "
+
+#*********************************************************************
+# End of required parameters -- start of optional entries
+
+# Here is the complete list of fields which can optionally be
+# extracted from the extended document summary area of the file
+# If they are not defined (to be anything) they will not be extracted
+#!XDocName="Filename"
+#!XDocType="Type"
+XDocAuthor=""
+XDocAbstract=""
+#!XDocAccount="Account"
+XDocKeywords=""
+XDocSubject="%X<Title>"
+#!XDocTypist="Typist"
+
+# These tags allow you to convert WP6 hypertext refs into HTML hypertext
+# But you should be careful how you use them as documents NOT designed
+# to be HTML (perhaps linking to other documents in unreachable
+# directories) would generate confusing links.
+#!HyperBegin="<a href=\"%s.htm#%t\">%e"
+#!hyperBegin="%f"
+#!HyperEnd="</a>%e"
+#!hyperEnd="%f"
+
+# Or you can use WP hypertext refs as direct hypertext
+# but make sure you insert the URL in the BOOKMARK field
+HyperBegin="<a href=\"%t\">%e"
+hyperBegin="%f"
+HyperEnd="</a>%e"
+hyperEnd="%f"
+
+# These tags allow you to convert WP6 bookmarks into appropriate
+# HTML anchors which are needed if you use the Hyper tags above.
+BookMark="<a name=\"%s\">"
+bookMark="</a>"
+
+# Hypertext references in Word Documents (real URL)
+Href="<a href=\"%s\">"
+href="</a>"
+
+# These tags allow you to find the file name of included files
+#SubDoc="<hr>Start Included file %s<hr>\n"
+#subDoc="<hr>End Included file %s<hr>\n"
+SubDoc="\n"
+subDoc="\n"
+
+# These tags are trigger by WP Small Caps attributes (On/Off)
+#!SmallCaps=""
+#!smallCaps=""
+
+# End of main configuration file
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/doc2html/doc2html.pl b/debian/htdig/htdig-3.2.0b6/contrib/doc2html/doc2html.pl
new file mode 100755
index 00000000..c69f00cc
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/doc2html/doc2html.pl
@@ -0,0 +1,676 @@
+#!/usr/bin/perl
+use strict;
+#
+# Version 3.0.1	19-September-2002
+#
+# External converter for htdig 3.1.4 or later (Perl5 or later)
+# Usage: (in htdig.conf)
+#
+#external_parsers:	application/rtf->text/html /opt/local/htdig-3.1.6/scripts/doc2html.pl \
+#			text/rtf->text/html /opt/local/htdig-3.1.6/scripts/doc2html.pl \
+#			application/pdf->text/html /opt/local/htdig-3.1.6/scripts/doc2html.pl \
+#			application/postscript->text/html /opt/local/htdig-3.1.6/scripts/doc2html.pl \
+#			application/msword->text/html /opt/local/htdig-3.1.6/scripts/doc2html.pl \
+#			application/wordperfect5.1->text/html /opt/local/htdig-3.1.6/scripts/doc2html.pl \
+#			application/wordperfect6.0->text/html /opt/local/htdig-3.1.6/scripts/doc2html.pl \
+#			application/msexcel->text/html /opt/local/htdig-3.1.6/scripts/doc2html.pl \
+#			application/vnd.ms-excel->text/html /opt/local/htdig-3.1.6/scripts/doc2html.pl \
+#			application/vnd.ms-powerpoint->text/html /opt/local/htdig-3.1.6/scripts/doc2html.pl
+#			application/x-shockwave-flash->text/html /opt/local/htdig-3.1.6/scripts/doc2html.pl \
+#			application/x-shockwave-flash2-preview->text/html /opt/local/htdig-3.1.6/scripts/doc2html.pl
+#
+#  Uses wp2html to convert Word and WordPerfect documents into HTML, and
+#  falls back to using Catdoc for Word and Catwpd for WordPerfect if 
+#  Wp2html is unavailable or unable to convert.
+#
+#  Uses range of other converters as available.
+#
+#  If all else fails, attempts to read file without conversion.
+#
+########################################################################################
+# Written by David Adams <[email protected]>.
+# Based on conv_doc.pl written by Gilles Detillieux <[email protected]>,
+#   which in turn was based on the parse_word_doc.pl script, written by
+#   Jesse op den Brouw <[email protected]>.
+########################################################################################
+
+# Install Sys::AlarmCall if you can
+eval "use Sys::AlarmCall";
+
+########  Full paths of conversion utilities  ##########
+########          YOU MUST SET THESE          ##########
+########   (leave null those you don't have)  ##########
+
+# Wp2html converts Word & Wordperfect to HTML
+# (get it from: http://www.res.bbsrc.ac.uk/wp2html/):
+my $WP2HTML = '';
+
+#Catwpd for WordPerfect to text conversion
+# (you don't need this if you have wp2html)
+# (get it from htdig site)
+my $CATWPD = '';
+
+# rtf2html converts Rich Text Font documents to HTML
+# (get it from http://www.ice.ru/~vitus/catdoc/):
+my $RTF2HTML = '';
+
+# Catdoc converts Word (MicroSoft) to plain text
+# (get it from: http://www.ice.ru/~vitus/catdoc/):
+
+#version of catdoc for Word6, Word7 & Word97 files:
+my $CATDOC = '';
+
+#version of catdoc for Word2 files:
+my $CATDOC2 = $CATDOC;
+
+#version of catdoc for Word 5.1 for MAC:
+my $CATDOCM = $CATDOC;
+
+# PostScript to text converter
+# (get it from the ghostscript 3.33 (or later) package):
+my $CATPS = '';
+
+# add to search path the directory which contains gs:
+#$ENV{PATH} .= ":/usr/freeware/bin";
+
+# PDF to HTML conversion script:
+my $PDF2HTML = ''; # full pathname of pdf2html/pl script
+
+# Excel (MicroSoft) to HTML converter
+# (get it from www.xlhtml.org)
+my $XLS2HTML = '';
+
+# Excel (MicroSoft) to .CSV converter
+# (you don't need this if you have xlhtml)
+# (if you do want it, you can get it with catdoc)
+my $CATXLS = '';
+
+# Powerpoint (MicroSoft) to HTML converter
+# (get it from www.xlhtml.org)
+my $PPT2HTML = '';
+
+# Shockwave Flash 
+# (extracts links from file)
+my $SWF2HTML = ''; # full pathname of swf2html.pl script
+
+# OpenOffice.org files
+#my $OpenOffice2XML = '/usr/bin/unzip';
+my $OpenOffice2XML = '';
+# (remove multi-byte unicode from XML in OOo documents)
+#my $strip_unicode = '| /usr/bin/iconv -c -s -f UTF-8 -t ISO-8859-1';
+my $strip_unicode = '';
+
+
+########################################################################
+
+# Other Global Variables
+my ($Success, $LOG, $Verbose, $CORE_MESS, $TMP, $RM, $ED, $Magic, $Time,
+    $Count, $Prog, $Input, $MIME_type, $URL, $Name, $Efile, $Maxerr, 
+    $Redir, $Emark, $EEmark, $Method, $OP_Limit, $IP_Limit);
+my (%HTML_Method, %TEXT_Method, %BAD_type);
+
+
+&init;			# initialise
+my $size = -s $Input;
+&quit("Input file size of $size at or above $IP_Limit limit" ) if $size >= $IP_Limit;
+&store_methods;		# 
+&read_magic;		# Magic reveals type
+&error_setup;		# re-route standard error o/p from utilities
+
+# see if a document -> HTML converter will work:
+&run('&try_html');
+if ($Success) { &quit(0) }
+
+# try a document -> text converter:
+&run('&try_text');
+if ($Success) { &quit(0) }
+
+# see if a known problem
+my $fail = &cannot_do;
+if ($fail) { &quit($fail) }
+
+# last-ditch attempt, try copying document
+&try_plain;
+if ($Success) {&quit(0)}
+
+&quit("UNABLE to convert");
+
+#------------------------------------------------------------------------------
+
+sub init {
+
+  # Doc2html log file
+  $LOG = $ENV{'DOC2HTML_LOG'} || '';
+  #
+  if ($LOG) {
+    open(STDERR,">>$LOG"); # ignore possible failure to open
+  } # else O/P really does go to STDERR
+
+  # Set to 1 for O/P to STDERR or Log file
+  $Verbose = exists($ENV{'DOC2HTML_LOG'}) ? 1 : 0;
+
+  # Limiting size of file doc2html.pl will try to process (default 20Mbyte)
+  $IP_Limit = $ENV{'DOC2HTML_IP_LIMIT'} || 20000000; 
+
+  # Limit for O/P returned to htdig (default 10Mbyte)
+  $OP_Limit = $ENV{'DOC2HTML_OP_LIMIT'} || 10000000; 
+
+  # Mark error message produced within doc2html script
+  $Emark = "!\t";
+  # Mark error message produced by conversion utility
+  $EEmark = "!!\t";
+
+  # Message to STDERR if core dump detected
+  $CORE_MESS = "CORE DUMPED";
+
+  # Directory for temporary files
+  $TMP = "/tmp/htdig";
+  if (! -d $TMP) {
+    mkdir($TMP,0700) or die "Unable to create directory \"$TMP\": $!";
+  }
+  # Current directory during run of script:
+  chdir $TMP or warn "Cannot change directory to $TMP\n";
+
+  # File for error output from utility
+  $Efile = 'doc_err.' . $$;
+
+  # Max. number of lines of error output from utility copied
+  $Maxerr = 10;
+
+  # System command to delete a file
+  $RM = "/bin/rm -f";
+
+  # Line editor to do substitution
+  $ED = "/bin/sed -e"; 
+  if ($^O eq "MSWin32") {$ED = "$^X -pe"}
+
+  $Time = 60;	# allow 60 seconds for external utility to complete
+
+  $Success = 0;
+  $Count = 0;
+  $Method = '';
+  $Prog = $0;
+  $Prog =~ s#^.*/##; 
+  $Prog =~ s/\..*?$//;
+
+  $Input = $ARGV[0] or die "No filename given\n";
+  $MIME_type = $ARGV[1] or die "No MIME-type given";
+  $URL = $ARGV[2] || '?';
+  $Name = $URL;
+  $Name =~ s#^.*/##;
+  $Name =~ s/%([A-F0-9][A-F0-9])/pack("C", hex($1))/gie;
+  
+  if ($Verbose and not $LOG) { print STDERR "\n$Prog: [$MIME_type] " }
+  if ($LOG) { print STDERR "$URL [$MIME_type] " }
+
+}
+
+#------------------------------------------------------------------------------
+
+sub store_methods {
+#	The method of dealing with each file type is set up here.
+#	Edit as necessary
+
+  my ($mime_type,$magic,$cmd,$cmdl,$type,$description);
+
+  my $name = quotemeta($Name);
+
+  ####Document -> HTML converters####
+
+  # WordPerfect documents
+  if ($WP2HTML) {
+    $mime_type = "application/wordperfect|application/msword";
+    $cmd = $WP2HTML;
+    $cmdl = "($cmd -q -DTitle=\"[$name]\" -c doc2html.cfg -s doc2html.sty -i $Input -O; $RM CmdLine.ovr)";
+    $magic = '\377WPC';
+    &store_html_method('WordPerfect (wp2html)',$cmd,$cmdl,$mime_type,$magic);
+  }
+
+  # Word documents
+  if ($WP2HTML) {
+    $mime_type = "application/msword";
+    $cmd = $WP2HTML;
+    $cmdl = "($cmd -q -DTitle=\"[$name]\" -c doc2html.cfg -s doc2html.sty -i $Input -O; $RM CmdLine.ovr)";
+    $magic = '^\320\317\021\340';
+    &store_html_method('Word (wp2html)',$cmd,$cmdl,$mime_type,$magic);
+  }
+
+  # RTF documents
+  if ($RTF2HTML) {
+    $mime_type = "application/msword|application/rtf|text/rtf";
+    $cmd = $RTF2HTML;
+    # Rtf2html uses filename as title, change this:
+    $cmdl = "$cmd $Input | $ED \"s#^<TITLE>$Input</TITLE>#<TITLE>[$name]</TITLE>#\"";
+    $magic = '^{\134rtf';
+    &store_html_method('RTF (rtf2html)',$cmd,$cmdl,$mime_type,$magic);
+  }
+
+  # Microsoft Excel spreadsheet
+  if ($XLS2HTML) {
+    $mime_type = "application/msexcel|application/vnd.ms-excel";
+    $cmd = $XLS2HTML;
+    # xlHtml uses filename as title, change this:
+    $cmdl = "$cmd -fw $Input | $ED \"s#<TITLE>$Input</TITLE>#<TITLE>[$name]</TITLE>#\"";
+    $magic = '^\320\317\021\340';
+    &store_html_method('Excel (xlHtml)',$cmd,$cmdl,$mime_type,$magic);
+  }
+
+  # Microsoft Powerpoint Presentation
+  if ($PPT2HTML) {
+    $mime_type = "application/vnd.ms-powerpoint";
+    $cmd = $PPT2HTML;
+    # xlHtml uses filename as title, change this:
+    $cmdl = "$cmd $Input | $ED \"s#<TITLE>$Input</TITLE>#<TITLE>[$name]</TITLE>#\"";
+    $magic = '^\320\317\021\340';
+    &store_html_method('Powerpoint (pptHtml)',$cmd,$cmdl,$mime_type,$magic);
+  }
+
+  # Adobe PDF file using Perl script
+  if ($PDF2HTML) {
+    $mime_type = "application/pdf";
+    $cmd = $PDF2HTML;
+    # Replace default title (if used) with filename:
+    $cmdl = "$cmd $Input $mime_type $name";
+    $magic = '%PDF-|\0PDF CARO\001\000\377';
+    &store_html_method('PDF (pdf2html)',$cmd,$cmdl,$mime_type,$magic);
+  }
+
+  # Shockwave Flash file using Perl script
+  if ($SWF2HTML) {
+    $mime_type = "application/x-shockwave-flash";
+    $cmd = $SWF2HTML;
+    $cmdl = "$cmd $Input";
+    $magic = '^FWS[\001-\010]'; # versions 1 to 5, perhaps some later versions
+    &store_html_method('Shockwave-Flash (swf2html)',$cmd,$cmdl,$mime_type,$magic);
+  }
+
+  # OpenOffice Documents
+  if ($OpenOffice2XML) {
+    $mime_type = "application/vnd.sun.xml.writer|application/vnd.sun.xml.impress|application/vnd.sun.xml.calc|application/vnd.sun.xml.draw|application/vnd.sun.xml.math";
+    $cmd = $OpenOffice2XML;
+    $cmdl = "$cmd -p -qq $Input content.xml | /bin/sed -r 's/<[^>]*>/ /gi' $strip_unicode";
+    $magic = 'PK';
+    &store_html_method('OpenOffice XML (oo2xml)',$cmd,$cmdl,$mime_type,$magic);
+  }
+
+  ####Document -> Text converters####
+
+  # Word6, Word7 & Word97 documents
+  if ($CATDOC) {
+    $mime_type = "application/msword";
+    $cmd = $CATDOC;
+    # -b option increases chance of success:
+    $cmdl = "$cmd -a -b -w $Input";
+    $magic = '^\320\317\021\340';
+    &store_text_method('Word (catdoc)',$cmd,$cmdl,$mime_type,$magic);
+  }
+
+  # Word2 documents
+  if ($CATDOC2) {
+    $mime_type = "application/msword";
+    $cmd = $CATDOC2;
+    $cmdl = "$cmd -a -b -w $Input";
+    $magic = '^\333\245-\000';
+    &store_text_method('Word2 (catdoc)',$cmd,$cmdl,$mime_type,$magic);
+  }
+
+  # Word 5.1 for MAC documents
+  if ($CATDOCM) {
+    $mime_type = "application/msword";
+    $cmd = $CATDOCM;
+    $cmdl = "$cmd -a -b -w $Input";
+    $magic = '^\3767\000#\000\000\000\000';
+    &store_text_method('MACWord (catdoc)',$cmd,$cmdl,$mime_type,$magic);
+  }
+
+  # PostScript files
+  if ($CATPS) {
+    $mime_type = "application/postscript";
+    $cmd = $CATPS;
+    # allow PS interpreter to give error messages
+    $cmdl = "($cmd; $RM _temp_.???) < $Input";
+    $magic = '^.{0,20}?%!|^\033%-12345.*\n%!';
+    &store_text_method('PostScript (ps2ascii)',$cmd,$cmdl,$mime_type,$magic);
+  }
+
+  # Microsoft Excel file
+  if ($CATXLS) {
+    $mime_type = "application/vnd.ms-excel";
+    $cmd = $CATXLS;
+    $cmdl = "$cmd $Input";
+    $magic = '^\320\317\021\340';
+    &store_text_method('MS Excel (xls2csv)',$cmd,$cmdl,$mime_type,$magic);
+  }
+
+  # WordPerfect document
+  if ($CATWPD) {
+    $mime_type = "application/wordperfect|application/msword";
+    $cmd = $CATWPD;
+    $cmdl = "$cmd $Input";
+    $magic = '\377WPC';
+    &store_text_method('WordPerfect (catwpd)',$cmd,$cmdl,$mime_type,$magic);
+  }
+
+
+  ####Documents that cannot be converted####
+
+  # wrapped encapsulated Postscript
+  $type = "EPS";
+  $magic = '^\305\320\323\306 \0';
+  $description = 'wrapped Encapsulated Postscript';
+  &store_cannot_do($type,$magic,$description);
+
+  # Shockwave Flash version 6
+  $type = "SWF6";
+  $description = 'Shockwave-Flash Version 6';
+  $magic = '^CWS\006';
+  &store_cannot_do($type,$magic,$description);
+
+#### Binary (data or whatever)
+###$type = "BIN";
+###$magic = '[\000-\007\016-\037\177]'; # rather crude test!
+###$description = 'apparently binary';
+###&store_cannot_do($type,$magic,$description);
+
+  return;
+}
+
+#------------------------------------------------------------------------------
+
+sub read_magic {
+
+  # Read first bytes of file to check for file type
+  open(FILE, "< $Input") || die "Can't open file $Input\n";
+  read FILE,$Magic,256;
+  close FILE;
+
+  return;
+}
+
+#------------------------------------------------------------------------------
+
+sub error_setup {
+
+  if ($Efile) {
+   open SAVERR, ">&STDERR";
+   if (open STDERR, "> $Efile") {
+     print SAVERR " Overwriting $Efile\n" if (-s $Efile);
+     $Redir = 1;
+   } else { close SAVERR }
+  }
+
+}
+
+#------------------------------------------------------------------------------
+
+sub run {
+
+  my $routine = shift;
+  my $return;
+
+  if (defined &alarm_call) {
+    $return = alarm_call($Time, $routine);
+  } else {
+    eval $routine;
+    $return = $@ if $@;
+  }
+
+  if ($return) { &quit($return) }
+
+}
+
+#------------------------------------------------------------------------------
+
+sub try_html  {
+
+  my($set,$cmnd,$type);
+
+  $Success = 0;
+  foreach $type (keys %HTML_Method) {
+    $set = $HTML_Method{$type};
+    if (($MIME_type =~ m/$set->{'mime'}/i) and  
+        ($Magic =~ m/$set->{'magic'}/s))     { # found the method to use
+      $Method = $type;
+      my $cmnd = $set->{'cmnd'};
+      if (! -x $cmnd) {
+	warn "Unable to execute $cmnd for $type document\n";
+	return;
+      }
+      if (not open(CAT, "$set->{'command'} |")) {
+	warn "$cmnd doesn't want to be opened using pipe\n";
+	return;
+      }
+      while (<CAT>) {
+	# getting something, so it is working
+	$Success = 1;
+        if ($_ !~ m/^<!--/) { # skip comment lines inserted by converter
+	  print;
+	  $Count += length;
+	  if ($Count > $OP_Limit) { last }
+        }
+      }
+      close CAT;
+      last;
+    } 
+  }
+  return;
+}
+
+#------------------------------------------------------------------------------
+
+sub try_text  {
+
+  my($set,$cmnd,$type);
+
+  $Success = 0;
+  foreach $type (keys %TEXT_Method) {
+    $set = $TEXT_Method{$type};
+    if (($MIME_type =~ m/$set->{'mime'}/i) and
+        ($Magic =~ m/$set->{'magic'}/s))     { # found the method to use
+      $Method = $type;
+      my $cmnd = $set->{'cmnd'};
+      if (! -x $cmnd) { die "Unable to execute $cmnd for $type document\n" }
+
+      # Open file via selected converter, output head, then its text:
+      open(CAT, "$set->{'command'} |") or
+	  die "$cmnd doesn't want to be opened using pipe\n";
+      &head;
+      print "<BODY>\n<PRE>\n";
+      $Success = 1;
+      while (<CAT>) {
+	s/\255/-/g;	# replace dashes with hyphens
+	# replace bell, backspace, tab. etc. with single space:
+	s/[\000-\040]+/ /g;
+	if (length > 1) { # if not just a single character, eg space
+	  print &HTML($_), "\n";
+	  $Count += length;
+	  if ($Count > $OP_Limit) { last }
+	}
+      }
+      close CAT;
+
+      print "</PRE>\n</BODY>\n</HTML>\n";
+      last;
+    }
+
+  }
+
+  return;
+}
+
+#------------------------------------------------------------------------------
+
+sub cannot_do  {
+
+  my ($type,$set);
+
+  # see if known, unconvertable type
+  $Method = '';
+  foreach $type (keys %BAD_type) {
+    $set = $BAD_type{$type};
+    if ($Magic =~ m/$set->{'magic'}/s) { # known problem
+      return  "CANNOT DO $set->{'desc'} ";
+    }
+  }
+
+  return 0;
+}
+
+#------------------------------------------------------------------------------
+
+sub try_plain  {
+
+  $Success = 0;
+  ####### if ($Magic !~ m/^[\000-\007\016-\037\177]) {
+  if (-T $Input) { # Looks like text, so go for it:
+      $Method = 'Plain Text';
+      open(FILE, "<$Input") || die "Error reading $Input\n";
+      $Success = 1;
+      $Method = 'Plain Text';
+      &head;
+      print "<BODY>\n<PRE>\n";
+
+      while (<FILE>) {
+	# replace bell, backspace, tab. etc. with single space:
+	s/[\000-\040\177]+/ /g;
+        if (length > 1) {
+	  print &HTML($_), "\n";
+	  $Count += length;
+	  if ($Count > $OP_Limit) { last }
+	}
+      }
+      close FILE;
+      print "</PRE>\n</BODY>\n</HTML>\n";
+
+  } else { $Method = '' }
+
+  return;
+}
+
+#------------------------------------------------------------------------------
+
+sub HTML {
+
+  my $text = shift;
+
+  $text =~ s/\f/\n/gs;	# replace form feed
+  $text =~ s/\s+/ /g;	# replace multiple spaces, etc. with a single space
+  $text =~ s/\s+$//gm;	# remove trailing spaces
+  $text =~ s/&/&amp;/g;
+  $text =~ s/</&lt;/g;
+  $text =~ s/>/&gt;/g;
+
+  return $text;
+}
+
+#------------------------------------------------------------------------------
+
+sub store_html_method {
+
+  my $type = shift;
+  my $cmnd = shift;
+  my $cline = shift;
+  my $mime = shift;
+  my $magic = shift;
+
+  $HTML_Method{$type} = {
+    'mime'	=> $mime,
+    'magic'	=> $magic,
+    'cmnd'	=> $cmnd,
+    'command'	=> $cline,
+    };
+
+  return;
+}
+
+#------------------------------------------------------------------------------
+
+sub store_text_method {
+
+  my $type = shift;
+  my $cmnd = shift;
+  my $cline = shift;
+  my $mime = shift;
+  my $magic = shift;
+
+  $TEXT_Method{$type} = {
+    'mime'	=> $mime,
+    'magic'	=> $magic,
+    'cmnd'	=> $cmnd,
+    'command'	=> $cline,
+    };
+
+  return;
+}
+
+#------------------------------------------------------------------------------
+
+sub store_cannot_do {
+
+  my $type = shift;
+  my $magic = shift;
+  my $desc = shift;
+
+  $BAD_type{$type} = { 
+    'magic' => $magic,
+    'desc' => $desc,
+    };
+
+  return;
+
+}
+
+#------------------------------------------------------------------------------
+
+sub head {
+
+      print "<HTML>\n<HEAD>\n";
+      print "<TITLE>[" . $Name . "]</TITLE>\n";
+      print "</HEAD>\n";
+
+}
+
+#------------------------------------------------------------------------------
+
+sub quit {
+
+  if ($Redir) {	# end redirection of STDERR to temporary file
+   close STDERR;
+   open STDERR, ">&SAVERR";
+  }    
+
+  if ($Verbose) {
+    print STDERR "$Method $Count" if ($Success);
+    print STDERR "\n";
+  } 
+
+  if ($Count > $OP_Limit) {
+    print STDERR $Emark, "Output truncated after limit $OP_Limit reached\n";
+  }
+ 
+  my $return = shift;  
+  if ($return) {
+    print STDERR $Emark, $return, "\n";
+    $return = 1;
+  }
+
+  chdir $TMP;
+  if ($Efile && -s $Efile) {
+    open EFILE, "<$Efile";
+    my $c = 0;
+    while (<EFILE>) {
+      $c++;
+      if ($c <= $Maxerr) {
+        print STDERR $EEmark, $_;
+      }
+    }
+    close EFILE;
+    print STDERR $Emark, " ... (total of $c lines of error messages)\n" if ($c > $Maxerr);
+  }
+  unlink $Efile if ($Efile && -e $Efile);
+
+  if (-e "core" && (-M "core" < 0)) {
+    print STDERR $Emark, "$CORE_MESS\n";
+  }
+  exit $return;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/doc2html/doc2html.sty b/debian/htdig/htdig-3.2.0b6/contrib/doc2html/doc2html.sty
new file mode 100644
index 00000000..fccfb8ee
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/doc2html/doc2html.sty
@@ -0,0 +1,40 @@
+Any Font  8 On  ="<H6>"
+Any Font  8 Off ="</H6>"
+Any Font  9 On  =" "
+Any Font  9 Off =" "
+Any Font 10 On  =" "
+Any Font 10 Off =" "
+Any Font 11 On  =" "
+Any Font 11 Off =" "
+Any Font 12 On  ="<H4>"
+Any Font 12 Off ="</H4>"
+Any Font 14 On  ="<H3>"
+Any Font 14 Off ="</H3>"
+Any Font 18 On  ="<H3>"
+Any Font 18 Off ="</H3>"
+Any Font 24 On  ="<H2>"
+Any Font 24 Off ="</H2>"
+Any Font 28 On  ="<H2>"
+Any Font 28 Off ="</H2>"
+Any Font 32 On  ="<H1>"
+Any Font 32 Off ="</H1>"
+Any Font 36 On  ="<H1>"
+Any Font 36 Off ="</H1>"
+
+# Now the really specific stuff for WWW Urls
+# This one decodes the special Url macro which puts the URL reference
+# inside a WP Comment (so it is hidden but editable) and makes the
+# link text blue and underline
+# If we find a comment inside an Url style pair defined by the user
+# we can be pretty sure it was deliberate ( done by MACRO), so we
+# have this special translation just for Comments inside Url Styles
+# Course, if not defined (UrlComment) it will default to standard
+#
+UrlOn="%e"                          # Eat style codes
+UrlOnEnd="%f"                       # Style end for UrlOn, restart output
+# the comment text, passed as parameter 2 text, is the URL
+UrlCommentOn="<a href=\""
+UrlCommentOff="\">"                 #  URL link
+UrlOff="</a>%e"                     # Anchor Off and eat style codes
+UrlOffEnd="%f"                      # Style end for UrlOff, restart output
+
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/doc2html/pdf2html.pl b/debian/htdig/htdig-3.2.0b6/contrib/doc2html/pdf2html.pl
new file mode 100755
index 00000000..fee93282
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/doc2html/pdf2html.pl
@@ -0,0 +1,161 @@
+#!/usr/bin/perl -w
+use strict;
+#
+# Version 1.0.1	12-Feb-2002
+# Written by David Adams <[email protected]>
+#
+# Uses pdftotext & pdfinfo utilities from the xpdf package
+# to read an Adobe Acrobat file and produce HTML output.
+#  
+# Can be called directly from htdig as an external converter,
+#  or may be called by doc2html.pl converter script. 
+#
+
+####--- Configuration ---####
+# Full paths of pdtotext and pdfinfo
+# (get them from the xpdf package at http://www.foolabs.com/xpdf/):
+
+#### YOU MUST SET THESE  ####
+
+my $PDFTOTEXT = "/... .../pdftotext";
+my $PDFINFO = "/... .../pdfinfo";
+#
+# De-hyphenation option (only affects end-of-line hyphens):
+my $Dehyphenate = 1;
+#
+# Set title to be used when none is found:
+my $Default_title = "Adobe Acrobat Document";
+#  
+# make portable to win32 platform or unix:
+my $null = "/dev/null";
+if ($^O eq "MSWin32") {$null = "nul";}
+####--- End of configuration ---###
+
+if (! -x $PDFTOTEXT) { die "Unable to execute pdftotext" }
+
+my $Input = $ARGV[0] || die "Usage: pdf2html.pl filename [mime-type] [URL]";
+my $MIME_type = $ARGV[1] || '';
+if ($MIME_type and ($MIME_type !~ m#^application/pdf#i)) {
+  die "MIME/type $MIME_type wrong";
+}
+
+my $Name = $ARGV[2] || '';
+$Name =~ s#^(.*/)##;
+# decode if 2nd argument was a URL 
+$Name =~ s/%([A-F0-9][A-F0-9])/pack("C", hex($1))/gie if $1;
+
+&pdf_head;
+&pdf_body;
+exit;
+
+#------------------------------------------------------------------------------
+
+sub pdf_head {
+#
+#  Contributed by Greg Holmes and Michael Fuller
+#   (any errors by David Adams)
+#
+    my $title = '';
+    my $subject = '';
+    my $keywords = '';
+    if (open(INFO, "$PDFINFO '$Input' 2>$null |")) {
+        while (<INFO>) {
+            if (m/^title:/i) {
+                s/^title:\s+//i;
+		$title = &clean_pdf($_);
+	    } elsif (m/^subject:/i) {
+                s/^subject:\s+//i;
+                $subject = &clean_pdf($_);
+            } elsif (m/^keywords:/i) {
+                s/^keywords:\s+//i;
+                $keywords = &clean_pdf($_);
+            }
+
+        }
+        close INFO;
+    } else { warn "cannot execute pdfinfo" }
+    if (not length $title) {
+      if ($Name) {
+        $title = '[' . $Name . ']';
+      } else {
+        $title = $Default_title;
+      }
+    }
+
+    print "<HTML>\n<HEAD>\n";
+    print "<TITLE>$title</TITLE>\n";
+    if (length $subject) {
+      print '<META NAME="DESCRIPTION" CONTENT="' . $subject. "\">\n";
+    }
+    if (length $keywords) {
+      print '<META NAME="KEYWORDS" CONTENT="' . $keywords . "\">\n";
+    }
+    print "</HEAD>\n";
+
+###print STDERR "\n$Name:\n";
+###print STDERR "\tTitle:\t$title\n";
+###print STDERR "\tDescription:\t$subject\n";
+###print STDERR "\tKeywords:\t$keywords\n";
+
+}
+
+#------------------------------------------------------------------------------
+
+sub pdf_body {
+
+  my $bline = '';
+  open(CAT, "$PDFTOTEXT -raw '$Input' - |") || 
+	  die "$PDFTOTEXT doesn't want to be opened using pipe\n";
+  print "<BODY>\n";
+  while (<CAT>) {
+    while ( m/[A-Za-z\300-\377]-\s*$/ && $Dehyphenate) {
+	  $_ .= <CAT>;
+	  last if eof;
+	  s/([A-Za-z\300-\377])-\s*\n\s*([A-Za-z\300-\377])/$1$2/s;
+    }
+    s/\255/-/g;	# replace dashes with hyphens
+    # replace bell, backspace, tab. etc. with single space:
+    s/[\000-\040]+/ /g;
+    $_ = &HTML($_);
+    if (length) {
+      print $bline, $_, "\n";
+      $bline = "<br>\n";
+    } else {
+      $bline = "<p>\n";
+    }
+  }
+  close CAT;
+
+  print "</BODY>\n</HTML>\n";
+  return;
+}
+
+#------------------------------------------------------------------------------
+
+sub HTML {
+
+  my $text = shift;
+
+  $text =~ s/\f/\n/gs;	# replace form feed
+  $text =~ s/\s+/ /g;	# replace multiple spaces, etc. with a single space
+  $text =~ s/\s+$//gm;	# remove trailing space
+  $text =~ s/&/&amp;/g;
+  $text =~ s/</&lt;/g;
+  $text =~ s/>/&gt;/g;
+  chomp $text;
+
+  return $text;
+}
+
+#------------------------------------------------------------------------------
+
+sub clean_pdf {
+# removes odd pair of characters that may be in pdfinfo output
+# Any double quotes are replaced with single
+
+  my $text = shift;
+  chomp $text;
+  $text =~  s/\376\377//g;
+  $text =~  s/\"/\'/g;
+  return $text;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/doc2html/swf2html.pl b/debian/htdig/htdig-3.2.0b6/contrib/doc2html/swf2html.pl
new file mode 100755
index 00000000..5f0cdb07
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/doc2html/swf2html.pl
@@ -0,0 +1,67 @@
+#!/usr/bin/perl -w
+use strict;
+#
+# Version 1.1	17-May-2002
+# Written by David Adams <[email protected]>
+#
+# Uses swfparse utlity to extract URL's from  Shockwave flash files
+#  
+# Can be called directly from htdig as an external converter,
+#  or may be called by doc2html.pl converter script. 
+#
+
+####--- Configuration ---####
+# Full path of swfparse
+# (get it from http:/www.htdig.org/files/contrib/contrib/parsers/)
+
+##### YOU MUST SET THIS  ####
+
+my $SWFPARSE = "/.. .../swfdump";
+
+####--- End of configuration ---###
+
+if (! -x $SWFPARSE) { die "Unable to execute swfparse" }
+
+my $Input = $ARGV[0] || die "Usage: swf2html.pl filename [mime-type] [URL]";
+my $MIME_type = $ARGV[1] || '';
+if ($MIME_type and ($MIME_type !~ m#^application/x-shockwave-flash#i)) {
+  die "MIME/type $MIME_type wrong";
+}
+
+my $Name = $ARGV[2] || '';
+$Name =~ s#^(.*/)##;
+# decode if 2nd argument was a URL 
+$Name =~ s/%([A-F0-9][A-F0-9])/pack("C", hex($1))/gie if $1;
+
+print <<"HEAD";
+<HTML>
+<HEAD>
+<TITLE>SWF $Name</TITLE>
+<META NAME="robots" CONTENT="follow, noindex">
+</HEAD>
+HEAD
+
+open(CAT, "$SWFPARSE -t '$Input'|") || 
+	  die "$SWFPARSE doesn't want to be opened using pipe\n";
+
+print "<BODY>\n";
+my $c = 0;
+while (<CAT>) {
+###    if ($_ !~ m/\s+getUrl\s+(.*?)\s+.*$/) { next }
+    if ($_ !~ m/\s+getUrl\s+(.*)$/) { next }
+    my $link = $1 . ' ';
+    if ($link =~ m/^FSCommand:/) { next }
+    if ($link =~ m/\s+target\s+/) {
+      $link =~ s/^(.*)\s+target\s+.*$/$1/;  
+    } else {
+      $link =~ s/^(.*?)\s+.*$/$1/; 
+    }
+    print '<A href="', $link, '"> </a>', "\n";
+    $c++;
+}
+close CAT;
+
+print "</BODY>\n</HTML>\n";
+print STDERR "No links extracted\n" if ($c == 0);
+
+exit;
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/doclist/doclist.pl b/debian/htdig/htdig-3.2.0b6/contrib/doclist/doclist.pl
new file mode 100755
index 00000000..ef933de8
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/doclist/doclist.pl
@@ -0,0 +1,183 @@
+#!/usr/local/bin/perl
+
+##
+## doclist.pl  (C) 1995 Andrew Scherpbier
+##
+## This program will list the information in the documentdb generated by htdig.
+##
+
+use GDBM_File;
+
+$dbfile = $ARGV[0];
+
+tie(%docdb, GDBM_File, $dbfile, GDBM_READER, 0) || die "Unable to open $dbfile: $!";
+
+
+while (($key, $value) = each %docdb)
+{
+    next if $key =~ /^nextDocID/;
+    %record = parse_ref_record($value);
+    print "Title:        $record{'TITLE'}\n";
+    print "Descriptions: $record{'DESCRIPTIONS'}\n";
+    print "URL:          $record{'URL'}\n";
+    print "\n";
+}
+
+sub parse_ref_record
+{
+    local($value) = @_;
+    local(%rec, $length, $count, $result);
+
+    while (length($value) > 0)
+    {
+	$what = unpack("C", $value);
+	$value = substr($value, 1);
+	if ($what == 0)
+	{
+	    # ID
+	    $rec{"ID"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 1)
+	{
+	    # TIME
+	    $rec{"TIME"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 2)
+	{
+	    # ACCESSED
+	    $rec{"ACCESSED"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 3)
+	{
+	    # STATE
+	    $rec{"STATE"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 4)
+	{
+	    # SIZE
+	    $rec{"SIZE"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 5)
+	{
+	    # LINKS
+	    $rec{"LINKS"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 6)
+	{
+	    # IMAGESIZE
+	    $rec{"IMAGESIZE"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 7)
+	{
+	    # HOPCOUNT
+	    $rec{"HOPCOUNT"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 8)
+	{
+	    # URL
+	    $length = unpack("i", $value);
+	    $rec{"URL"} = unpack("x4 A$length", $value);
+	    $value = substr($value, 4 + $length);
+	}
+	elsif ($what == 9)
+	{
+	    # HEAD
+	    $length = unpack("i", $value);
+	    $rec{"HEAD"} = unpack("x4 A$length", $value);
+	    $value = substr($value, 4 + $length);
+	}
+	elsif ($what == 10)
+	{
+	    # TITLE
+	    $length = unpack("i", $value);
+	    $rec{"TITLE"} = unpack("x4 A$length", $value);
+	    $value = substr($value, 4 + $length);
+	}
+	elsif ($what == 11)
+	{
+	    # DESCRIPTIONS
+	    $count = unpack("i", $value);
+	    $value = substr($value, 4);
+	    $result = "";
+	    foreach (1 .. $count)
+	    {
+		$length = unpack("i", $value);
+		$result = $result . unpack("x4 A$length", $value) . "";
+		$value = substr($value, 4 + $length);
+	    }
+	    chop $result;
+	    $rec{"DESCRIPTIONS"} = $result;
+	}
+	elsif ($what == 12)
+	{
+	    # ANCHORS
+	    $count = unpack("i", $value);
+	    $value = substr($value, 4);
+	    $result = "";
+	    foreach (1 .. $count)
+	    {
+		$length = unpack("i", $value);
+		$result = $result . unpack("x4 A$length", $value) . "";
+		$value = substr($value, 4 + $length);
+	    }
+	    chop $result;
+	    $rec{"ANCHORS"} = $result;
+	}
+	elsif ($what == 13)
+	{
+	    # EMAIL
+	    $length = unpack("i", $value);
+	    $rec{"EMAIL"} = unpack("x4 A$length", $value);
+	    $value = substr($value, 4 + $length);
+	}
+	elsif ($what == 14)
+	{
+	    # NOTIFICATION
+	    $length = unpack("i", $value);
+	    $rec{"NOTIFICATION"} = unpack("x4 A$length", $value);
+	    $value = substr($value, 4 + $length);
+	}
+	elsif ($what == 15)
+	{
+	    # SUBJECT
+	    $length = unpack("i", $value);
+	    $rec{"SUBJECT"} = unpack("x4 A$length", $value);
+	    $value = substr($value, 4 + $length);
+	}
+	elsif ($what == 16)
+	{
+	    # STRING (ignore, but unpack)
+	    $length = unpack("i", $value);
+	    $rec{"STRING"} = unpack("x4 A$length", $value);
+	    $value = substr($value, 4 + $length);
+	}
+	elsif ($what == 17)
+	{
+	    # METADSC
+	    $length = unpack("i", $value);
+	    $rec{"METADSC"} = unpack("x4 A$length", $value);
+	    $value = substr($value, 4 + $length);
+	}
+	elsif ($what == 18)
+	{
+	    # BACKLINKS
+	    $rec{"BACKLINKS"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 19)
+	{
+	    # SIGNATURE
+	    $rec{"SIG"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+    }
+    return %rec;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/doclist/listafter.pl b/debian/htdig/htdig-3.2.0b6/contrib/doclist/listafter.pl
new file mode 100755
index 00000000..976cf333
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/doclist/listafter.pl
@@ -0,0 +1,201 @@
+#!/usr/local/bin/perl
+
+##
+## listafter.pl  (C) 1996 Andrew Scherpbier
+##
+## This program will list all URLs which were modified after a specified date.
+## For each URL, the following fields are displayed:
+##   Title
+##   Descriptions
+##   URL
+##   Last modification date (in ctime format)
+##
+## The date is specified as mm/dd/yyyy
+##
+## Example usage:
+##   listafter.pl 1/1/1996 /opt/www/htdig/sdsu.docdb
+##
+
+use GDBM_File;
+require('timelocal.pl');
+
+$t = $ARGV[0];
+$t =~ m,([0-9]+)/([0-9]+)/([0-9]+),;
+$when = timelocal(0, 0, 0, $2, $1 - 1, $3 - 1900);
+$dbfile = $ARGV[1];
+
+tie(%docdb, GDBM_File, $dbfile, GDBM_READER, 0) || die "Unable to open $dbfile: $!";
+
+while (($key, $value) = each %docdb)
+{
+    next if $key =~ /^nextDocID/;
+    %record = parse_ref_record($value);
+    if ($record{'TIME'} >= $when)
+    {
+	print "Title:        $record{'TITLE'}\n";
+	print "Descriptions: $record{'DESCRIPTIONS'}\n";
+	print "URL:          $record{'URL'}\n";
+	$w = localtime($record{'TIME'} * 1);
+	print "Modified:     $w\n";
+	print "\n";
+    }
+}
+
+sub parse_ref_record
+{
+    local($value) = @_;
+    local(%rec, $length, $count, $result);
+
+    while (length($value) > 0)
+    {
+	$what = unpack("C", $value);
+	$value = substr($value, 1);
+	if ($what == 0)
+	{
+	    # ID
+	    $rec{"ID"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 1)
+	{
+	    # TIME
+	    $rec{"TIME"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 2)
+	{
+	    # ACCESSED
+	    $rec{"ACCESSED"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 3)
+	{
+	    # STATE
+	    $rec{"STATE"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 4)
+	{
+	    # SIZE
+	    $rec{"SIZE"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 5)
+	{
+	    # LINKS
+	    $rec{"LINKS"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 6)
+	{
+	    # IMAGESIZE
+	    $rec{"IMAGESIZE"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 7)
+	{
+	    # HOPCOUNT
+	    $rec{"HOPCOUNT"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 8)
+	{
+	    # URL
+	    $length = unpack("i", $value);
+	    $rec{"URL"} = unpack("x4 A$length", $value);
+	    $value = substr($value, 4 + $length);
+	}
+	elsif ($what == 9)
+	{
+	    # HEAD
+	    $length = unpack("i", $value);
+	    $rec{"HEAD"} = unpack("x4 A$length", $value);
+	    $value = substr($value, 4 + $length);
+	}
+	elsif ($what == 10)
+	{
+	    # TITLE
+	    $length = unpack("i", $value);
+	    $rec{"TITLE"} = unpack("x4 A$length", $value);
+	    $value = substr($value, 4 + $length);
+	}
+	elsif ($what == 11)
+	{
+	    # DESCRIPTIONS
+	    $count = unpack("i", $value);
+	    $value = substr($value, 4);
+	    $result = "";
+	    foreach (1 .. $count)
+	    {
+		$length = unpack("i", $value);
+		$result = $result . unpack("x4 A$length", $value) . "";
+		$value = substr($value, 4 + $length);
+	    }
+	    chop $result;
+	    $rec{"DESCRIPTIONS"} = $result;
+	}
+	elsif ($what == 12)
+	{
+	    # ANCHORS
+	    $count = unpack("i", $value);
+	    $value = substr($value, 4);
+	    $result = "";
+	    foreach (1 .. $count)
+	    {
+		$length = unpack("i", $value);
+		$result = $result . unpack("x4 A$length", $value) . "";
+		$value = substr($value, 4 + $length);
+	    }
+	    chop $result;
+	    $rec{"ANCHORS"} = $result;
+	}
+	elsif ($what == 13)
+	{
+	    # EMAIL
+	    $length = unpack("i", $value);
+	    $rec{"EMAIL"} = unpack("x4 A$length", $value);
+	    $value = substr($value, 4 + $length);
+	}
+	elsif ($what == 14)
+	{
+	    # NOTIFICATION
+	    $length = unpack("i", $value);
+	    $rec{"NOTIFICATION"} = unpack("x4 A$length", $value);
+	    $value = substr($value, 4 + $length);
+	}
+	elsif ($what == 15)
+	{
+	    # SUBJECT
+	    $length = unpack("i", $value);
+	    $rec{"SUBJECT"} = unpack("x4 A$length", $value);
+	    $value = substr($value, 4 + $length);
+	}
+	elsif ($what == 16)
+	{
+	    # STRING (ignore, but unpack)
+	    $length = unpack("i", $value);
+	    $rec{"STRING"} = unpack("x4 A$length", $value);
+	    $value = substr($value, 4 + $length);
+	}
+	elsif ($what == 17)
+	{
+	    # METADSC
+	    $length = unpack("i", $value);
+	    $rec{"METADSC"} = unpack("x4 A$length", $value);
+	    $value = substr($value, 4 + $length);
+	}
+	elsif ($what == 18)
+	{
+	    # BACKLINKS
+	    $rec{"BACKLINKS"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 19)
+	{
+	    # SIGNATURE
+	    $rec{"SIG"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+    }
+    return %rec;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/ewswrap/README b/debian/htdig/htdig-3.2.0b6/contrib/ewswrap/README
new file mode 100644
index 00000000..0889e245
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/ewswrap/README
@@ -0,0 +1,3 @@
+ewswrap.cgi = Excite for Web Servers (EWS) to htsearch wrapper
+htwrap.cgi  = htsearch wrapper to do some basic
+              sanity checking on the query
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/ewswrap/ewswrap.cgi b/debian/htdig/htdig-3.2.0b6/contrib/ewswrap/ewswrap.cgi
new file mode 100755
index 00000000..f3f9419e
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/ewswrap/ewswrap.cgi
@@ -0,0 +1,118 @@
+#!/usr/bin/perl -w
+
+# ewswrap.cgi
+#
+# by John Grohol ([email protected])
+# Freeware
+# v1.00 - 5 Oct 1998
+#
+# Simple wrapper script for htsearch to parse old
+# Excite for Web Servers (EWS) forms as-is 
+# This only makes sense if your want to upgrade
+# your search engine but can't upgrade every form
+# which points to it (e.g., external sites are
+# pointing to your EWS CGI.
+#
+# As an added bonus, given the differences from how
+# EWS handles queries to how htsearch handles them,
+# it does some basic sanity checking on the query
+# and tries to re-form it into a valid htsearch query.
+#
+# This script must be called using the POST method!
+#
+#_______________________________________________________
+# Set some defaults here
+# These can be overridden in the calling form
+
+$config         = "htdig";              # htDig config file
+$exclude        = "";                   # exclude this url
+$restrict       = "";                   # restrict to this url
+$format         = "builtin-long";       # results format
+$method         = "and";                # default method
+$dir            = "/usr/httpd/cgi-bin"; # Set cgi-bin dir
+
+#_______________________________________________________
+# Rest of program
+
+ $| = 1;
+
+# Get the form variables from POST form
+
+   read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'});
+   @pairs = split(/&/, $buffer);
+
+   foreach $pair (@pairs) {
+      ($name, $value) = split(/=/, $pair);
+      $value =~ tr/+/ /;
+      $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
+      $value =~ s/<!--(.|\n)*-->//g;
+      $value =~ s/<([^>]|\n)*>//g;
+      $tags{$name} = $value;
+   }
+
+$squery         = $tags{'search'};      # Set search query
+$page           = $tags{'page'};
+if (not($page)) { $page=1; }
+
+ $squery =~ s/\+//g;
+ $squery =~ s/\-//g;
+ $squery =~ s/the//g;
+ $squery =~ s/not//g;
+ $squery =~ s/what//g;
+
+# If someone puts "and" or "or" in the query,
+# then it should be a boolean query
+
+ if (($squery =~ " and ") || ($squery =~ " or ")) {
+        $method = "boolean"; }
+
+# Count the number of words in the query
+
+ @words = split(/ /,$squery);
+ foreach $word (@words) { $xwd++; }
+
+# If there are quotes in the query, we have to
+# turn them into parantheses and make it boolean
+
+if (($squery =~ "\"")) {
+        $oo = (index($squery,"\""))+1;
+        $od = (index($squery,"\"",$oo))-1;
+        $op = $od - $oo +1;
+        $yty = substr($squery,$oo,$op);
+                @wrds = split(/ /,$yty);
+                foreach $wrd (@wrds) { $xww++; }
+
+        if ($xww eq 2) {   # Right now, can only handle 2-word phrases
+           $oi = (index($yty," "));
+           if ($oi > -1) {
+                $ytt = substr($yty,0,$oi);
+                $john = $od - $oi +1;
+                $yte = substr($yty,$oi+1,$john);
+                $james = substr($squery,$od+2);
+                $james =~ s/ and//g;
+                $james =~ s/ / and /g;
+                $squery = "($ytt and $yte) $james"; # We turn it into a
+                $method = "boolean";                # boolean query
+           }
+
+# More than 2 words in quotes (phrase), just
+# turn it into one big string of words and set method to "and"
+
+        } else {
+         $squery =~ s/\"//g;
+         $squery =~ s/ and//g;
+         $method = "and";
+         $yty = "";
+        }
+}
+
+# Set the environmental variables
+
+$ENV{'REQUEST_METHOD'} = 'GET';
+$ENV{'QUERY_STRING'} = "config=$config&restrict=$restrict&exclude=$exclude&words=$squery&method=$method&format=$format&page=$page";
+
+# Run htsearch
+
+system("$dir/htsearch");
+
+1;
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/ewswrap/htwrap.cgi b/debian/htdig/htdig-3.2.0b6/contrib/ewswrap/htwrap.cgi
new file mode 100755
index 00000000..1e7ea66b
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/ewswrap/htwrap.cgi
@@ -0,0 +1,125 @@
+#!/usr/bin/perl -w
+
+# htwrap.cgi
+#
+# by John Grohol ([email protected])
+# Freeware
+# v1.00 - 5 Oct 1998
+#
+# Simple wrapper script for htsearch to 
+# do some basic sanity checking on the query
+# and tries to re-form it into a valid htsearch query.
+#
+# This script must be called using the GET method!
+#
+#_______________________________________________________
+# Set some defaults here
+# These can be overridden in the calling form
+
+$config         = "htdig";              # htDig config file
+$exclude        = "";                   # exclude this url
+$restrict       = "";                   # restrict to this url
+$format         = "builtin-long";       # results format
+$method         = "and";                # default method
+$dir            = "/usr/httpd/cgi-bin"; # Set cgi-bin dir
+
+#_______________________________________________________
+# Rest of program
+
+ $| = 1;
+
+# Get the form variables
+
+&ParseTags($ENV{'PATH_INFO'});
+&ParseTags($ENV{'QUERY_STRING'}); 
+
+$squery         = $tags{'words'};
+$restrict       = $tags{'restrict'};
+$method         = $tags{'method'};
+$format         = $tags{'format'};
+$page           = $tags{'page'};
+
+if (not($page)) { $page=1; }
+
+ $squery =~ s/\+//g;
+ $squery =~ s/\-//g;
+ $squery =~ s/the//g;
+ $squery =~ s/not//g;
+ $squery =~ s/what//g;
+
+# If someone puts "and" or "or" in the query,
+# then it should be a boolean query
+
+ if (($squery =~ " and ") || ($squery =~ " or ")) {
+        $method = "boolean"; }
+
+# How many words are there in the query?
+ @words = split(/ /,$squery);
+ foreach $word (@words) { $xwd++; }
+
+# If there are quotes in the query, we have to
+# turn them into parantheses and make it boolean
+
+if (($squery =~ "\"")) {
+        $oo = (index($squery,"\""))+1;
+        $od = (index($squery,"\"",$oo))-1;
+        $op = $od - $oo +1;
+        $yty = substr($squery,$oo,$op);
+                @wrds = split(/ /,$yty);
+                foreach $wrd (@wrds) { $xww++; }
+
+
+        if ($xww eq 2) {  # Right now, can only handle 2-word phrases
+         $oi = (index($yty," "));
+         if ($oi > -1) {
+                $ytt = substr($yty,0,$oi);
+                $john = $od - $oi +1;
+                $yte = substr($yty,$oi+1,$john);
+                $james = substr($squery,$od+2);
+                $james =~ s/ and//g;
+                $james =~ s/ / and /g;
+                $squery = "($ytt and $yte) $james"; # We turn it into a
+                $method = "boolean";                # boolean query
+           }
+
+# More than 2 words in quotes (phrase), just
+# turn it into one big string of words and set method to "and"
+
+        } else {
+         $squery =~ s/\"//g;   # Dump quotes
+         $squery =~ s/ and//g; # Dump and's
+         $squery =~ s/ or//g;  # Dump or's
+         $method = "and";
+         $yty = "";
+        }
+}
+
+# Set the environmental variables
+
+$ENV{'REQUEST_METHOD'} = 'GET';
+$ENV{'QUERY_STRING'} = "config=$config&restrict=$restrict&exclude=$exclude&words=$squery&method=$method&format=$format&page=$page"
+;
+
+# Run htsearch
+
+system("$dir/htsearch");
+
+exit;
+
+sub ParseTags {
+        local($_) = @_;
+        local(@terms, $tag, $val);
+        s|^/||;
+                @terms = split('&');
+                foreach $term (@terms) {
+                        ($tag,$val) = split('=',$term,2);
+      $val =~ tr/+/ /;
+      $val =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
+      $val =~ s/<!--(.|\n)*-->//g;
+      $val =~ s/<([^>]|\n)*>//g;
+                        # may override previous value
+                        $tags{$tag} = $val;
+                }
+}
+
+1;
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/examples/badwords b/debian/htdig/htdig-3.2.0b6/contrib/examples/badwords
new file mode 100644
index 00000000..9912e646
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/examples/badwords
@@ -0,0 +1,349 @@
+a
+above
+about
+according
+across
+actually
+adj
+after
+afterwards
+again
+against
+all
+almost
+alone
+along
+already
+also
+although
+always
+among
+amongst
+an
+and
+another
+any
+anyhow
+anyone
+anything
+anywhere
+are
+aren
+arent
+around
+as
+at
+be
+became
+because
+become
+becomes
+becoming
+been
+before
+beforehand
+begin
+beginning
+behind
+being
+below
+beside
+besides
+between
+beyond
+billion
+both
+but
+by
+can
+cant
+cannot
+caption
+co
+could
+couldnt
+did
+didnt
+do
+does
+doesnt
+dont
+down
+during
+each
+eg
+eight
+eighty
+either
+else
+elsewhere
+end
+ending
+enough
+etc
+even
+ever
+every
+everyone
+everything
+everywhere
+except
+few
+fifty
+first
+five
+for
+former
+formerly
+forty
+found
+four
+from
+further
+had
+has
+hasnt
+have
+havent
+he
+hence
+her
+here
+hereafter
+hereby
+herein
+heres
+hereupon
+hers
+herself
+hes
+him
+himself
+his
+how
+however
+hundred
+ie
+if
+in
+inc
+indeed
+instead
+into
+is
+isnt
+it
+its
+itself
+last
+later
+latter
+latterly
+least
+less
+let
+like
+likely
+ltd
+made
+make
+makes
+many
+may
+maybe
+me
+meantime
+meanwhile
+might
+million
+miss
+more
+moreover
+most
+mostly
+mr
+mrs
+much
+must
+my
+myself
+namely
+neither
+never
+nevertheless
+next
+nine
+ninety
+no
+nobody
+none
+nonetheless
+noone
+nor
+not
+nothing
+now
+nowhere
+of
+off
+often
+on
+once
+one
+only
+onto
+or
+others
+otherwise
+our
+ours
+ourselves
+out
+over
+overall
+own
+page
+per
+perhaps
+rather
+re
+recent
+recently
+same
+seem
+seemed
+seeming
+seems
+seven
+seventy
+several
+she
+shes
+should
+shouldnt
+since
+six
+sixty
+so
+some
+somehow
+someone
+something
+sometime
+sometimes
+somewhere
+still
+stop
+such
+taking
+ten
+than
+that
+the
+their
+them
+themselves
+then
+thence
+there
+thereafter
+thereby
+therefore
+therein
+thereupon
+these
+they
+thirty
+this
+those
+though
+thousand
+three
+through
+throughout
+thru
+thus
+tips
+to
+together
+too
+toward
+towards
+trillion
+twenty
+two
+under
+unless
+unlike
+unlikely
+until
+up
+update
+updated
+updates
+upon
+us
+used
+using
+ve
+very
+via
+want
+wanted
+wants
+was
+wasnt
+way
+ways
+we
+wed
+well
+were
+werent
+what
+whats
+whatever
+when
+whence
+whenever
+where
+whereafter
+whereas
+whereby
+wherein
+whereupon
+wherever
+wheres
+whether
+which
+while
+whither
+who
+whoever
+whole
+whom
+whomever
+whose
+why
+will
+with
+within
+without
+wont
+work
+worked
+works
+working
+would
+wouldnt
+yes
+yet
+you
+youd
+youll
+your
+youre
+yours
+yourself
+yourselves
+youve
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/examples/rundig.sh b/debian/htdig/htdig-3.2.0b6/contrib/examples/rundig.sh
new file mode 100644
index 00000000..7a78955d
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/examples/rundig.sh
@@ -0,0 +1,96 @@
+#! /bin/sh
+
+# rundig.sh
+# a script to drive ht://Dig updates
+# Copyright (c) 1998 Colin Viebrock <[email protected]>
+# Copyright (c) 1998-1999 Geoff Hutchison <[email protected]>
+# Updated for ht://Dig 3.2.0b3 Feb 2001, Copyright (c) 2001 Geoff Hutchison
+# Distributed under the GNU GPL version 2 or later
+
+if [ "$1" = "-v" ]; then
+    verbose="-v"
+fi
+
+# This is the directory where htdig lives
+BASEDIR=/export/htdig
+
+# This is the db dir
+DBDIR=$BASEDIR/db/
+
+# This is the name of a temporary report file
+REPORT=/tmp/htdig.report
+
+# This is who gets the report
+REPORT_DEST="[email protected]"
+export REPORT_DEST
+
+# This is the subject line of the report
+SUBJECT="cron: htdig report for domain"
+
+# This is the name of the conf file to use
+CONF=htdig.conf
+
+# This is the directory htdig will use for temporary sort files
+TMPDIR=$DBDIR
+export TMPDIR
+
+# This is the PATH used by this script. Change it if you have problems
+#  with not finding wc or grep.
+PATH=/usr/local/bin:/usr/bin:/bin
+
+##### Dig phase
+STARTTIME=`date`
+echo Start time: $STARTTIME
+echo rundig: Start time:   $STARTTIME > $REPORT
+$BASEDIR/bin/htdig $verbose -s -a -c $BASEDIR/conf/$CONF >> $REPORT
+TIME=`date`
+echo Done Digging: $TIME
+echo rundig: Done Digging: $TIME >> $REPORT
+
+##### Purge Phase
+# (clean out broken links, etc.)
+$BASEDIR/bin/htpurge $verbose -a -c $BASEDIR/conf/$CONF >> $REPORT
+TIME=`date`
+echo Done Purging: $TIME
+echo rundig: Done Purging: $TIME >> $REPORT
+
+##### Cleanup Phase
+# To enable htnotify or the soundex search, uncomment the following lines
+# $BASEDIR/bin/htnotify $verbose >>$REPORT
+# $BASEDIR/bin/htfuzzy $verbose soundex
+# To get additional statistics, uncomment the following line
+# $BASEDIR/bin/htstat $verbose >>$REPORT
+
+# Move 'em into place. Since these are only used by htdig for update digs
+# and we always use -a, we just leave them as .work
+# mv $DBDIR/db.docs.index.work $DBDIR/db.docs.index
+# (this is just a mapping from a URL to a DocID)
+# We need the .work for next time as an update dig, plus the copy for searching
+cp $DBDIR/db.docdb.work $DBDIR/db.docdb
+cp $DBDIR/db.excerpts.work $DBDIR/db.excerpts
+cp $DBDIR/db.words.db.work $DBDIR/db.words.db
+test -f $DBDIR/db.words.db.work_weakcmpr &&
+  cp $DBDIR/db.words.db.work_weakcmpr $DBDIR/db.words.db_weakcmpr
+
+END=`date`
+echo End time: $END
+echo rundig: End time:     $END >> $REPORT
+echo 
+
+# Grab the important statistics from the report file
+# All lines begin with htdig: or htmerge:
+fgrep "htdig:" $REPORT  
+echo 
+fgrep "htmerge:" $REPORT
+echo
+fgrep "rundig:" $REPORT
+echo
+
+WC=`wc -l $REPORT`
+echo Total lines in $REPORT: $WC
+
+# Send out the report ...
+mail -s "$SUBJECT - $STARTTIME" $REPORT_DEST < $REPORT
+
+# ... and clean up
+rm $REPORT
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/examples/updatedig b/debian/htdig/htdig-3.2.0b6/contrib/examples/updatedig
new file mode 100755
index 00000000..1bcc3e08
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/examples/updatedig
@@ -0,0 +1,53 @@
+#! /bin/sh
+
+#
+# updatedig
+#
+# This is a script to update the search database for ht://Dig.
+# Copyright (c) 1998 David Robley [email protected]
+#
+if [ "$1" = "-v" ]; then
+    verbose=-v
+fi
+
+# -a: run using alternate work files so search can still be done during index run
+# -t: create an ASCII version of document database in doc_list as specified 
+#			in the config file
+# -s: print stats after completion
+/web/webdocs/htdig/bin/htdig -a -t $verbose -s
+/web/webdocs/htdig/bin/htmerge -a $verbose -s
+/web/webdocs/htdig/bin/htnotify $verbose
+
+# Because the -a switch creates alternate work files, but doesn't seem to move
+# them into the correct place, we will do it here.
+mv /web/webdocs/htdig/db/db.docdb /web/webdocs/htdig/db/db.docdb.old
+mv /web/webdocs/htdig/db/db.docdb.work /web/webdocs/htdig/db/db.docdb
+
+mv /web/webdocs/htdig/db/db.docs.index /web/webdocs/htdig/db/db.docs.index.old
+mv /web/webdocs/htdig/db/db.docs.index.work /web/webdocs/htdig/db/db.docs.index
+
+mv /web/webdocs/htdig/db/db.wordlist /web/webdocs/htdig/db/db.wordlist.old
+mv /web/webdocs/htdig/db/db.wordlist.work /web/webdocs/htdig/db/db.wordlist
+
+mv /web/webdocs/htdig/db/db.words.gdbm /web/webdocs/htdig/db/db.words.gdbm.old
+mv /web/webdocs/htdig/db/db.words.gdbm.work /web/webdocs/htdig/db/db.words.gdbm
+
+#
+# Only create the endings database if it doesn't already exist.
+# This database is static, so even if pages change, this database will not
+# need to be rebuilt.
+#
+if [ ! -f /web/webdocs/htdig/common/word2root.gdbm ]
+then
+    /web/webdocs/htdig/bin/htfuzzy $verbose endings
+fi
+
+# This next needs to be run if synonyms are added/modified/removed
+# Guess the best way would be to delete synonyms.gdbm before
+# running this script??
+
+if [ ! -f /web/webdocs/htdig/common/synonyms.gdbm ]
+then
+    /web/webdocs/htdig/bin/htfuzzy $verbose synonyms
+fi
+# end updatedig
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/handler.pl b/debian/htdig/htdig-3.2.0b6/contrib/handler.pl
new file mode 100755
index 00000000..53ec7f34
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/handler.pl
@@ -0,0 +1,45 @@
+#!/usr/bin/perl
+# 
+# handler.pl
+# Sample ExternalTransport handler for HTTP and HTTPS using curl
+# for the ht://Dig package 3.2.x and higher
+# by Geoffrey Hutchison <[email protected]>
+# Copyright (c) 1999 under the terms of the GNU Public License vesion 2 (GPL)
+#
+# handler.pl protocol url config_file
+#
+# Really a simplistic example--this should probably use Perl's LWP for HTTP/HTTPS/FTP
+# Right now it uses the program 'curl' to do HTTP or HTTPS transactions.
+#
+
+my $curl_path="/usr/local/bin/curl";
+my $protocol=$ARGV[0];
+my $url=$ARGV[1];
+my $config_file=$ARGV[2];
+
+open (DOC, "$curl_path -i $url |") || die "s:\t404\nr:\tCan't open curl!\n";
+while ( my $line = <DOC> ) {
+    if ( $line =~ /^HTTP.?\/\d.\d\s(\d\d\d)\s(.*)/io ) {
+	print "s:\t$1\n";
+	print "r:\t$2\n";
+    } elsif ( $line =~ /^last-modified: (.*)$/io ) {
+	print "m:\t$1\n";
+    } elsif ( $line =~ /^content-type: (.*)$/io ) {
+	print "t:\t$1\n";
+    } elsif ( $line =~ /^content-length: (.*)$/io ) {
+	print "l:\t$1\n";
+    } elsif ( $line =~ /^location: (.*)$/io ) {
+	print "u:\t$1\n";
+    }
+
+    last if ( $line =~ /^\s*$/ )
+}
+
+local($/) = undef;
+my $text = <DOC>;
+close(DOC);
+
+print "\n$text";
+
+
+
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/htdig-3.2.0.spec b/debian/htdig/htdig-3.2.0b6/contrib/htdig-3.2.0.spec
new file mode 100644
index 00000000..1631164f
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/htdig-3.2.0.spec
@@ -0,0 +1,184 @@
+# Last definitions below override, so change the order to redefine. You can't
+# comment them out because %defines are parsed inside comments.
+# For Red Hat [456].x...
+%define contentdir /home/httpd
+%define commondir /var/lib/htdig/common
+%define databasedir /var/lib/htdig/db
+%define searchdir %{contentdir}/html
+%define configdir /etc/htdig
+%define bindir /usr/sbin
+%define mandir /usr/man
+%define docdir /usr/doc
+# For Red Hat [789].x, FCx...
+%define contentdir /var/www
+%define commondir %{_prefix}/share/htdig
+%define databasedir /var/lib/htdig
+%define searchdir %{contentdir}/html/htdig
+%define configdir %{_sysconfdir}/htdig
+%define bindir %{_bindir}
+%define mandir %{_mandir}
+%define docdir %{_docdir}
+Summary: A web indexing and searching system for a small domain or intranet
+Name: htdig
+Version: 3.2.0b6
+Release: 8
+Copyright: GPL
+Group: Networking/Utilities
+BuildRoot: /var/tmp/htdig-root
+Source0: http://www.htdig.org/files/htdig-%{PACKAGE_VERSION}.tar.gz
+URL: http://www.htdig.org/
+Packager: Gilles Detillieux <[email protected]>
+
+%description
+The ht://Dig system is a complete world wide web indexing and searching
+system for a small domain or intranet. This system is not meant to replace
+the need for powerful internet-wide search systems like Lycos, Infoseek,
+Webcrawler and AltaVista. Instead it is meant to cover the search needs for
+a single company, campus, or even a particular sub section of a web site.
+
+As opposed to some WAIS-based or web-server based search engines, ht://Dig
+can span several web servers at a site. The type of these different web
+servers doesn't matter as long as they understand the HTTP 1.0 protocol.
+%prep
+%setup -q -n htdig-%{PACKAGE_VERSION}
+#%patch0 -p0 -b .noparse
+
+%build
+CFLAGS="$RPM_OPT_FLAGS" ./configure --prefix=/usr --mandir=%{mandir} \
+	--bindir=%{bindir} --libexec=/usr/lib --libdir=/usr/lib \
+	--with-image-dir=%{contentdir}/html/htdig \
+	--with-cgi-bin-dir=%{contentdir}/cgi-bin \
+	--with-search-dir=%{searchdir} \
+	--with-config-dir=%{configdir} \
+	--with-common-dir=%{commondir} \
+	--with-database-dir=%{databasedir}
+#rm -f htlib/langinfo.h		# conflicts with libc5 headers
+#echo '#include "/usr/include/langinfo.h"' > htlib/langinfo.h # to keep htlib/Makefile happy
+make
+
+%install
+
+rm -rf $RPM_BUILD_ROOT
+
+make DESTDIR=$RPM_BUILD_ROOT install-strip
+mkdir -p $RPM_BUILD_ROOT/etc/cron.daily
+ln -s ../..%{bindir}/rundig $RPM_BUILD_ROOT/etc/cron.daily/htdig-dbgen
+ln -s ../../../..%{docdir}/htdig-%{PACKAGE_VERSION} \
+	$RPM_BUILD_ROOT%{contentdir}/html/htdig/htdoc
+
+%clean
+rm -rf $RPM_BUILD_ROOT
+
+%post
+# Only run this if installing for the first time
+if [ "$1" = 1 ]; then
+	SERVERNAME="`grep '^ServerName' /etc/httpd/conf/httpd.conf | awk 'NR == 1 {print $2}'`"
+	[ -z "$SERVERNAME" ] && SERVERNAME="`hostname -f`"
+	[ -z "$SERVERNAME" ] && SERVERNAME="localhost"
+	TMPFILE=$(mktemp /tmp/ht.XXXXXX) || exit 1
+	sed 's/^start_url:.*/#&\
+# (See end of file for this parameter.)/' %{configdir}/htdig.conf > $TMPFILE
+	cat $TMPFILE > %{configdir}/htdig.conf
+	rm $TMPFILE
+	cat >> %{configdir}/htdig.conf <<!
+
+# Automatically set up by htdig RPM, from your current Apache httpd.conf...
+# Verify and configure these, and set maintainer above, before running
+# %{bindir}/rundig.
+# See %{docdir}/htdig*/attrs.html for descriptions of attributes.
+
+# The URL(s) where htdig will start.  See also limit_urls_to above.
+start_url:	http://$SERVERNAME/
+
+# These attributes allow indexing server via local filesystem rather than HTTP.
+local_urls:	http://$SERVERNAME/=%{contentdir}/html/
+local_user_urls:	http://$SERVERNAME/=/home/,/public_html/
+!
+
+fi
+
+%files
+%defattr(-,root,root)
+%config %{configdir}/htdig.conf
+%config %{configdir}/mime.types
+%config %{configdir}/HtFileType-magic.mime
+%config %{configdir}/cookies.txt
+%config %{bindir}/rundig
+%config %{searchdir}/search.html
+%config %{commondir}/[a-rt-z]*.html
+%config %{commondir}/s[a-df-z]*.html
+%config %{commondir}/english*
+%config %{commondir}/synonyms
+%config %{commondir}/bad_words
+%config(missingok) /etc/cron.daily/htdig-dbgen
+%{bindir}/[Hh]t*
+/usr/lib/*
+/usr/include/*
+%dir %{databasedir}
+%{contentdir}/cgi-bin/htsearch
+%{contentdir}/cgi-bin/qtest
+%{contentdir}/html/htdig/*.gif
+%{contentdir}/html/htdig/*.png
+%{contentdir}/html/htdig/htdoc
+%{mandir}/man*
+
+%doc README htdoc/*
+
+%changelog
+* Thu Jun 10 2004 Gilles Detillieux <[email protected]>
+  - built with 3.2.0b6, adding man pages & include files
+  - updated pathnames for current systems (/usr/share/htdig for common dir)
+  - used variable for configdir, mandir & docdir
+  - used mktemp to create safe temp file in post script
+
+* Wed Jul  4 2001 Gilles Detillieux <[email protected]>
+  - used variables for many pathnames, to allow easy switchover to 7.x
+    (using Powertools-like pathnames for Red Hat 7)
+
+* Thu Jun  7 2001 Gilles Detillieux <[email protected]>
+  - updated to 3.2.0b4
+
+* Fri Dec  1 2000 Gilles Detillieux <[email protected]>
+  - updated to 3.2.0b3
+
+* Mon Feb 21 2000 Gilles Detillieux <[email protected]>
+  - fixed post script to add more descriptive entries in htdig.conf
+  - made cron script a config file
+  - updated to 3.2.0b2
+
+* Thu Feb  3 2000 Gilles Detillieux <[email protected]>
+  - added mime.types as a config file
+
+* Mon Jan 17 2000 Gilles Detillieux <[email protected]>
+  - updated to 3.2.0b1
+
+* Fri Aug 13 1999 Gilles Detillieux <[email protected]>
+  - changed configure & install options and got rid of conf.patch file
+    to work with latest 3.2 code
+
+* Mon Jun  7 1999 Gilles Detillieux <[email protected]>
+  - fixed post script to use only first ServerName directive in httpd.conf
+
+* Tue Mar 23 1999 Gilles Detillieux <[email protected]>
+  - updated to 3.2.0dev, for testing
+
+* Thu Feb  4 1999 Gilles Detillieux <[email protected]>
+  - put web stuff back in /home/httpd/html & /home/httpd/cgi-bin, so it can
+	go over a standard Apache installation on Red Hat
+  - cleaned up install to make use of new features
+
+* Thu Feb 4 1999 Ric Klaren <[email protected]>
+  - changed buildroot stuff
+  - minor spec file fixes
+  - install web stuff in /home/httpd/htdig
+  - made rundig config file
+
+* Tue Sep 22 1998 Gilles Detillieux <[email protected]>
+  - Added local_urls stuff to generated htdig.conf file
+
+* Fri Sep 18 1998 Gilles Detillieux <[email protected]>
+  - Built the rpm from latest htdig source (3.1.0b1), using earlier
+    versions of rpms by Mihai Ibanescu <[email protected]> and Elliot Lee
+    <[email protected]> as a model, incorporating ideas from both.  I've
+    made the install locations as FSSTND compliant as I can think of.
+
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/htparsedoc/README b/debian/htdig/htdig-3.2.0b6/contrib/htparsedoc/README
new file mode 100644
index 00000000..4ec0f6ab
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/htparsedoc/README
@@ -0,0 +1,38 @@
+
+>    Subject: htdig: HTDIG: Searching Word files
+>         To: [email protected]
+>       From: Richard Jones <[email protected]>
+>       Date: Tue, 15 Jul 1997 12:44:03 +0100
+>
+> I'm currently trying to hack together a script to search
+> Word files. I have a little program called `catdoc' (attached)
+> which takes Word files and turns them into passable text files.
+> What I did was write a shell script around this called
+> `htparsedoc' (also attached) and add it as an external
+> parser:
+> 
+>         --- /usr/local/lib/htdig/conf/htdig.conf ---
+> 
+>         # External parser for Word documents.
+>         external_parsers:       "applications/msword"
+> "/usr/local/lib/htdig/bin/htparsedoc"
+> 
+> This script produces output like this:
+> 
+>         t Word document http://annexia.imcl.com/test/comm.doc
+>         w INmEDIA 1 -
+>         w Investment 2 -
+>         w Ltd 3 -
+>         w Applications 4 -
+>         w Subproject 5 -
+>         w Terms 6 -
+>         w of 7 -
+>  [...]
+>         w Needed 994 -
+>         w Tbd 995 -
+>         w Resources 996 -
+>         w Needed 997 -
+>         w Tbd 998 -
+>         w i 1000 -
+> 
+
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/htparsedoc/catdoc.c b/debian/htdig/htdig-3.2.0b6/contrib/htparsedoc/catdoc.c
new file mode 100644
index 00000000..93bf02f8
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/htparsedoc/catdoc.c
@@ -0,0 +1,197 @@
+
+From [email protected] Fri Jul  3 09:52:34 1998
+Date: Fri, 3 Jul 1998 17:20:50 +0200 (MET DST)
+From: Valerio Di Giampietro <[email protected]>
+To: [email protected]
+Subject: htdig: Searching Word files
+/* catdoc.c version 0.3 */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#define TEXT_WIDTH 72
+/* #define LATIN1 */
+/* enable this define, if you don't want cyrillic code page translations */
+
+unsigned char specs[]={7, /* tab columns separator - handled specially*/
+                       '\n',/* hook to handle end of line in tables */ 
+                       0x1E,/* unbreakable defis */
+                       0x1F,/* soft hyphen */
+                       0x85,/* dots */
+                       0x91,/* opening single quote */
+                       0x92,/* closing single quote */
+                       0x93,/* opening double quote */
+                       0x94,/* closing double quote */
+                       0x96,/* em-dash (or em-space)*/
+                       0x97,/* en-dash */
+                       0x99,/* Trade Mark sign */
+                       0xA0,/* unbreakable space */
+                       0xA9,/* Copyright sign */
+                       0xAE,/* Reserved sign */
+                       0xAB,/* opening << quote*/
+                       0xBB,/* closing >> quote*/
+ /* The rest is translated into itself unless TeX mode is selected */
+                       '%','$','_','{','}','\\', 
+                    };
+                         
+char *ascii_specs[]={"\t","\n","-","","...","`","'","``","''","-","-","tm",
+ " ","(c)","(R)","\"","\"","%","$","_","{","}","\\"};
+char *TeX_specs[]={"\t&","\\\\\n","-","\\-","\\dots{}","`","'","``","''","---","--",
+"${}^{\\scriptscriptstyle\\mathrm{TM}}$",/* this is my idea about tm sign*/
+"~",
+"{\\copyright}",
+"(R)",/* to be replaced with correct command */
+"<",">","\\%","\\$","$\\{$","$\\}$","$\\backslash$",};
+#ifndef LATIN1
+#ifdef unix
+unsigned char table[256]={
+/* Windows cyrillic code page to KOI-8 */
+0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0D,0x0C,0x0D,0x0E,0x0F,
+0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x2D,0x20,
+0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
+0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
+0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
+0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
+0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
+0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
+0x80,0x81,0x82,0xAA,0x8F,0x90,0xA9,0x93,0x84,0x92,0x91,0x94,0x83,0x95,0x99,0x8B,
+0x98,0x60,0x27,0x22,0x22,0x9A,0x2D,0x2D,0x9E,0xA6,0x87,0xB0,0x8D,0x97,0x86,0xA2,
+0x20,0xA7,0xA5,0x88,0xA4,0x8E,0x96,0x85,0xB3,0xA1,0x9F,0x22,0xAB,0xAC,0xAD,0xAE,
+0xAF,0xB2,0xB1,'i',0xB5,0xB6,0xB7,0xB8,0xA3,0xB9,0xBA,0x22,0xBC,0xBD,0xBE,0x9B,
+0xE1,0xE2,0xF7,0xE7,0xE4,0xE5,0xF6,0xFA,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,0xF0,
+0xF2,0xF3,0xF4,0xF5,0xE6,0xE8,0xE3,0xFE,0xFB,0xFD,0xFF,0xF9,0xF8,0xFC,0xE0,0xF1,
+0xC1,0xC2,0xD7,0xC7,0xC4,0xC5,0xD6,0xDA,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,0xD0,
+0xD2,0xD3,0xD4,0xD5,0xC6,0xC8,0xC3,0xDE,0xDB,0xDD,0xDF,0xD9,0xD8,0xDC,0xC0,0xD1};
+#else
+unsigned char table[256]={
+0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0D,0x0c,0x0d,0x0e,0x0f,
+0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x2D,0x20,
+0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
+0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
+0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
+0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
+0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
+0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
+0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
+0x90,0x60,0x27,0x22,0x22,0x95,0x2D,0x2D,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
+0x20,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0x22,0xac,0xad,0xae,0xaf,
+0xb0,0xb1,0xb2,0xb3,'i',0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0x22,0xbc,0xbd,0xbe,0xbf,
+0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
+0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
+0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
+0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef};
+#endif
+#define recode_char(x) table[x]
+#else
+#define recode_char(x) x
+#endif
+char *map_char(char **map,int c)
+
+{unsigned char *ptr;
+ static char buffer[2]="a";
+ if ((ptr=strchr(specs,c))) 
+  return map[ptr-specs];
+ else
+  { buffer[0]=recode_char(c); return buffer; }
+}
+void format(char *buf,char **map)
+{ unsigned char outstring[128]="";
+  unsigned char *sp=buf,*dp;int table=0;
+  while (*sp)
+  { if (*sp==7&&table)
+     { printf("%s%s",outstring,map_char(map,'\n'));
+       outstring[0]=0;
+       table=0;sp++; 
+     }
+   else
+   { if (strlen(strcat(outstring,map_char(map,*sp)))>TEXT_WIDTH)
+    { dp=strrchr(outstring,' ');
+      if (dp) 
+       { *(dp++)=0;
+         printf("%s\n",outstring);
+         strcpy(outstring,dp);
+       }
+       else 
+       { int i;
+         for(i=0;i<72;i++) putc(outstring[i],stdout);
+           putc('\n',stdout);
+         strcpy(outstring,outstring+72);
+       }
+    }
+   table=*(sp++)==7; 
+   } 
+ }
+if (outstring[0]==0) putc('\n',stdout); 
+ else printf("%s\n\n",outstring);
+    
+}
+void help(void)
+{ printf("catdoc - exctract text from MS-Word files and catenate it to stdout\n"
+         "Copyright (c) by Victor B. Wagner, 1996\n"
+         "Usage catdoc [-ast] files ...\n"
+         "\t-a - converts non-standard printable chars into readable form (default)\n"
+	 "\t-t - converts them into TeX control sequences\n"
+	 "\t-s - exits with code 1 if MSWordDoc signature not found before\n"
+	 "\t\tfirst printable paragraph\n\n"
+         "All options affects only files, specified AFTER them\n");
+         exit(2);
+}  
+
+char buf[8192];
+void do_file(FILE *f,char **map,int search_sign)
+{ int ok=!search_sign;
+  int bufptr,c;
+ while(!feof(f))
+ {bufptr=-1;
+  do {
+   c=getc(f);
+   /* Special printable symbols 7- table separator \r - paragraph end
+      0x1E - short defis */
+   if ((c<=255&&c>=32)||c==7||c=='\t'||c=='\r'||c==0x1E)
+      buf[++bufptr]=c;
+   else
+   if (c==0x0b) buf[++bufptr]='\r';
+   else
+   { if (!c) {buf[++bufptr]=0;
+              if(!strcmp(buf,"MSWordDoc"))
+                { ok=1; }
+             }
+    if (c!=2) bufptr=-1;/* \002 is Word's footnote mark */
+   }
+  } while (c!='\r'&&c!=EOF);
+ if (bufptr>0&&buf[bufptr]=='\r')
+   { if (!ok) exit( 1);
+     buf[bufptr]=0; format(buf,map);
+   }
+ }
+}
+  
+int main(int argc,char **argv)
+{ int search_sign =0; /* Must program exit with exit code 1 if MSWordDoc
+                         signature is not found? */
+  char **sequences=ascii_specs;/* pointer to array of character sequences 
+                            to represent special characters of Word */
+  int i=1,stdin_processed=0;
+  if (argc<2) help();
+  for(;i<argc;i++)
+  { if (!strcmp(argv[i],"-s")) search_sign=1;
+    else 
+    if (!strcmp(argv[i],"-t")) sequences=TeX_specs;
+    else
+    if (!strcmp(argv[i],"-a")) sequences=ascii_specs;
+    else   
+    if (!strcmp(argv[i],"-"))
+        if (!stdin_processed) {do_file(stdin,sequences,search_sign);
+                               stdin_processed=1;}
+         else { fprintf(stderr,"Cannot process standard input twice a row\n");
+                exit (2);}
+    else
+     if (argv[i][0]=='-') {fprintf(stderr,"Invalid option %s\n",argv[i]);
+                           help();} 
+    else
+     { FILE *f=fopen(argv[i],"r");
+       if(!f) {fprintf(stderr,"Cannot open file %s\n",argv[i]);exit(2);}
+       do_file(f,sequences,search_sign);
+     }
+   }
+  return 0;
+}      
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/htparsedoc/htparsedoc b/debian/htdig/htdig-3.2.0b6/contrib/htparsedoc/htparsedoc
new file mode 100755
index 00000000..9d47e85d
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/htparsedoc/htparsedoc
@@ -0,0 +1,72 @@
+#!/bin/sh -
+
+#--
+# External parser for HTDIG that parses Word files so they can
+# be indexed.
+#--
+# Written by Richard W.M. Jones <[email protected]>. Distributed freely
+# under the terms of the GNU General Public License (GPL).
+# Modified by Andrew M. Bishop <[email protected]>
+#--
+
+#----------------------------------------------------------------------
+# Configurable stuff here:
+
+# The program that converts Word files into text. I use ``catdoc''
+# by Victor Wagner <[email protected]>. You may wish to just use
+# ``strings''.
+CATDOC=/usr/local/bin/catdoc
+#CATDOC=strings
+
+# End of configurable stuff.
+#----------------------------------------------------------------------
+
+# Arguments are:
+#   $1 = input file
+#   $2 = content type (ignored)
+#   $3 = base URL
+#   $4 = HTDIG config file (ignored)
+# HTDIG expects us to print out:
+#   w WORD LOCATION HEADING    Word at location 0-1000 under heading
+#   u URL DESCRIPTION          URL with description
+#   t TITLE                    Title of document
+#   h HEAD                     Heading
+#   a ANCHOR                   Anchor (ie. like <a name="">)
+#   i IMAGE_URL                Image pointer
+
+#----------------------------------------------------------------------
+
+# Format input to word per line.
+
+wordPerLine () {
+    tr '[ \010]' '\012' | awk 'NF==1 {print;}'
+}
+
+# Change non-alphabetical/numeric characters in space.
+
+removeNonAlNum () {
+    tr -c '[a-zA-Z0-9\015]' ' '
+}
+
+#----------------------------------------------------------------------
+
+# Parse input file to linear list of words.
+$CATDOC $1 | removeNonAlNum | wordPerLine > /tmp/htparsedoc.$$
+
+# Compute length of list.
+filelen=`wc -l < /tmp/htparsedoc.$$`
+
+# We can't find the title from the document, so make one up.
+echo "t	Binary Document $3"
+
+# We can't make an excerpt so we make one up.
+echo "h	No excerpt available"
+
+# Pass words to htdig.
+if [ $filelen -gt 0 ]; then
+    awk "{printf (\"w\t%s\t%d\t-\t\n\", \$1, 1000*NR/$filelen);}" \
+	< /tmp/htparsedoc.$$
+fi
+
+# Remove temporary file.
+rm /tmp/htparsedoc.$$
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/multidig/Makefile b/debian/htdig/htdig-3.2.0b6/contrib/multidig/Makefile
new file mode 100644
index 00000000..c2dc4857
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/multidig/Makefile
@@ -0,0 +1,58 @@
+#
+# Makefile for the multidig system
+#
+# Copyright (c) 1998-2000 The ht://Dig Group
+# Distributed under the terms of the GNU General Public License (GPL)
+#   version 2 or later.
+# for the ht://Dig search system http://www.htdig.org/
+# and the multidig script system http://www.htdig.org/contrib/scripts/
+#
+
+
+#
+# You probably want to change some or all of these.
+#  BASH = location of bash or other Bourne-like shell with 'source' builtin
+#  BASEDIR = directory of ht://Dig installation
+# These should probably be OK.
+#  BINDIR = directory of ht://Dig binaries. Also destination for these scripts.
+#  CONFIG_DIR = directory of ht://Dig config files.
+#  DB_BASE = base directory for ht://Dig / multidig databases
+BASH=		/bin/bash
+BASEDIR=	/opt/htdig
+BINDIR=		$(BASEDIR)/bin
+CONFIG_DIR=	$(BASEDIR)/conf
+DB_BASE=	$(BASEDIR)/db
+
+
+#
+# You shouldn't need to change any of this...
+#
+SCRIPTS=	add-collect add-urls multidig \
+		new-collect new-db gen-collect
+CONF=		db.conf multidig.conf
+
+all:
+
+clean:
+	rm -f *~
+
+install:
+	@echo "Installing scripts..."
+	@for i in $(SCRIPTS); do \
+		sed -e s%@BASH@%$(BASH)% \
+		-e s%@CONFIG_DIR@%$(CONFIG_DIR)% $$i >$(BINDIR)/$$i; \
+		chmod a+x $(BINDIR)/$$i; \
+	echo $(BINDIR)/$$i; \
+	done && test -z "$$fail"
+	@echo
+	@echo "Installing config files..."
+	@echo
+	@for i in $(CONF); do \
+           sed -e s%@BASH@%$(BASH)% -e s%@BASEDIR@%$(BASEDIR)% \
+		-e s%@BINDIR@%$(BINDIR)% -e s%@CONFIG_DIR@%$(CONFIG_DIR)% \
+		-e s%@DB_BASE@%$(DB_BASE)% $$i >$(CONFIG_DIR)/$$i; \
+           echo $(CONFIG_DIR)/$$i; \
+        done && test -z "$$fail"
+	@echo
+	@echo "Done with installation."
+	@echo
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/multidig/README b/debian/htdig/htdig-3.2.0b6/contrib/multidig/README
new file mode 100644
index 00000000..f394e5e5
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/multidig/README
@@ -0,0 +1,133 @@
+README for multidig 1.1
+ by Geoff Hutchison <[email protected]>
+
+ Copyright (c) 1998-1999 The ht://Dig Group  <http://www.htdig.org/>
+   Distributed under the terms of the GNU General Public License (GPL)
+   version 2 or later.
+--------------------------------
+
+This document is part of the "multidig script system" a system of
+shell scripts and some modified conf files that makes dealing with
+multiple databases easier for ht://Dig. It assumes that you know what
+ht://Dig is. If you don't know, see the website at
+<http://www.htdig.org/>
+
+This README is a bit rough around the edges. I don't know what people
+really want or need to know about the scripts. I expect a lot of
+questions. Hey, maybe I'm wrong. I'm always open to suggestions,
+criticisms, corrections, etc. E-mail me at <[email protected]>
+
+--------------------------------
+
+INTRODUCTION:
+
+* Why write multidig?
+
+  There are many reasons I started the multidig system. The biggest
+were the complaints that ht://Dig didn't have much of an
+administration interface. If you're looking for one, multidig isn't
+it. Yet. The next biggest is that people wanted me to make dealing
+with multiple databases easier. If you're looking for this, you're in
+the right place.
+
+* Why should I bother with multidig?
+
+  If you already have a multiple-database setup and it's working
+smoothly, you probably don't want to bother. It was written the way
+*I* would organize a multiple-database setup. Not suprisingly, it
+might be more pain to convert to multidig than it's worth.
+  If you're planning a multiple-database setup or you have one and
+it's not working well, this will help. It hides most of the pain and
+suffering behind some shell scripts and generally automates life. :-)
+
+--------------------------------
+
+SETTING UP:
+
+* How do I install it?
+
+  It's pretty easy to install. It requires bash, or at least a
+Bourne-shell that supports the "source" builtin. Obviously, it also
+requires ht://Dig. :-)
+  Change any paths in the Makefile. D a "make install" to install the
+scripts in the right place and the config files in the right
+place. The Makefile edits the scripts for you so the paths are consistent.
+
+* Now that it's in, how does it work?
+
+  The multidig script will replace the rundig script that comes with
+ht://Dig. Use it through a cron job or some other means of automating
+updates. It will run through all the db that multidig knows about, run
+htdig, htmerge, move the databases around, etc. As written it tries to
+index with the least disk space in the least time. Thus it keeps only
+the minimum files and does "update" digs.
+  After indexing all the db, it merges all the collections, trying to
+do the same thing, fastest speed, smallest disk and RAM
+requirements. It spits out a short status to STDOUT and a more
+complete report to the file referened with the $REPORT option in
+multidig.conf. Adding a "-v" to the command-line makes everything more
+verbose.
+
+* Can I convert my previous multiple-db setup?
+
+  Yes. I'm assuming you have a config file for each database you've
+set up. In that case, put the databases into a directory with the same
+name as the .conf file and tack the name onto the db.list file in your
+config directory. This is multidig's list of all databases, so adding
+a line here will ensure it's indexed using multidig.
+
+* How do I add new URLs to databases or add new databases?
+
+  1) New URLs: Run 'add-urls <db>' and either paste in URLs or
+     redirect a file or program.
+  2) New DB: Run 'new-db <db>' to set up everything for that database.
+
+--------------------------------
+
+COLLECTIONS:
+
+* What's a collection?
+
+  Version 3.1.0 of ht://Dig added support for merging multiple
+databases together. Technically, you merge one database into
+another. Multidig makes this a bit easier. You set up a "collection"
+of other databases and the multidig script will merge them all
+together.
+
+* Fantastic! How do I define a collection?
+
+./new-collect <name>
+./add-collect <name>
+<insert dbs here>
+
+ The add-collect script will go through the list of dbs and make sure
+the multidig system actually knows about them. If not, it complains.
+
+* Can I just generate the collections from my databases?
+
+  Yup, run gen-collect. This is what the main multidig script runs.
+
+--------------------------------
+
+DIRECTORY LAYOUT:
+
+Here are the locations of files used by multidig:
+
+	$BASEDIR/bin
+		add-collect	script for adding db to a collection
+		add-urls	script for adding URLs to a db
+		gen-collect	script for generating all collections
+				from their db (called by multidig)
+		multidig	script for generating all db and collections
+		new-collect	script for making a new collection
+		new-db		script for making a new db
+	$BASEDIR/conf
+		db.conf		template database config
+				used by new-collect and new-db
+		foo.conf	database config for db foo
+		multidig.conf	config for multidig paths and options
+		db.list		list of all db, one per line
+		collect.list	list of all collections, one per line
+	$BASEDIR/db
+		foo/foo.urls	URLs used by foo db
+		foo/db.*	actual foo databases
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/multidig/add-collect b/debian/htdig/htdig-3.2.0b6/contrib/multidig/add-collect
new file mode 100644
index 00000000..d169ed84
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/multidig/add-collect
@@ -0,0 +1,49 @@
+#!@BASH@
+
+#
+# add-collect 1.1
+#
+# Copyright (c) 1998-1999 The ht://Dig Group
+# Distributed under the terms of the GNU General Public License (GPL)
+#   version 2 or later.
+# for the ht://Dig search system http://www.htdig.org/
+# and the multidig script system http://www.htdig.org/contrib/scripts/
+#
+# syntax:
+# add-collect <collection>
+#
+# Reads new DB in from the standard input (either redirect or paste)
+# Ensures the DB actually exist before adding them to the collection
+#
+
+# You may need to set the following:
+MULTIDIG_CONF=@CONFIG_DIR@/multidig.conf
+source $MULTIDIG_CONF
+
+# Catch people who don't supply an argument
+if [ "$1" = "" ]; then
+    echo Syntax: add-colect \<collection\>
+    exit
+fi
+
+# Do we actually have a collection named as specified?
+TEST=`grep $1 $COLLECT_LIST`
+if [ "$TEST" = "" ]; then
+	# This may become annoying. If so, comment it out!
+	echo The collection $1 does not exist. Sorry.
+	echo The existing collections are:
+	cat $COLLECT_LIST
+else
+	# OK, now we have to make sure these are legal db
+	for db in `cat /dev/stdin`; do
+	    DBTEST=`grep $db $DB_LIST`
+	    if [ "$DBTEST" != "" ]; then
+		echo $db >>$DB_BASE/$1/$1.collect
+	    else
+	        # This may become annoying. If so, comment it out!
+		echo The database $db does not exist. Sorry.
+		echo The existing databases are:
+		cat $DB_LIST
+	    fi
+        done
+fi
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/multidig/add-urls b/debian/htdig/htdig-3.2.0b6/contrib/multidig/add-urls
new file mode 100644
index 00000000..15866e23
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/multidig/add-urls
@@ -0,0 +1,37 @@
+#!@BASH@
+
+#
+# add-urls 1.1
+#
+# Copyright (c) 1998-1999 The ht://Dig Group
+# Distributed under the terms of the GNU General Public License (GPL)
+#   version 2 or later.
+# for the ht://Dig search system http://www.htdig.org/
+# and the multidig script system http://www.htdig.org/contrib/scripts/
+#
+# syntax:
+# add-urls <db>
+#
+# Reads new URLs in from the standard input (either redirect or paste)
+#
+
+# You may need to set the following:
+MULTIDIG_CONF=@CONFIG_DIR@/multidig.conf
+source $MULTIDIG_CONF
+
+# Catch people who don't supply an argument
+if [ "$1" = "" ]; then
+    echo Syntax: add-urls \<db\>
+    exit
+fi
+
+# Do we actually have a database named as specified?
+TEST=`grep $1 $DB_LIST`
+if [ "$TEST" = "" ]; then
+	# This may become annoying. If so, comment it out!
+	echo The database $1 does not exist. Sorry.
+	echo The existing databases are:
+	cat $DB_LIST
+else
+	cat /dev/stdin >>$DB_BASE/$1/$1.urls
+fi
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/multidig/db.conf b/debian/htdig/htdig-3.2.0b6/contrib/multidig/db.conf
new file mode 100644
index 00000000..edacd723
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/multidig/db.conf
@@ -0,0 +1,26 @@
+#
+# db.conf file for the multidig system
+#  (copied for each database used)
+#
+# Copyright (c) 1998-1999 The ht://Dig Group
+# Distributed under the terms of the GNU General Public License (GPL)
+#   version 2 or later.
+# for the ht://Dig search system http://www.htdig.org/
+# and the multidig script system http://www.htdig.org/contrib/scripts/
+#
+
+# Change this if you use a different global config file
+# Put most of your configuration options in this file
+#  the db.conf files only define the URL list used and the directory for
+#  storing the databases
+include:		${config_dir}/htdig.conf
+
+# Changed for each database. Places the databases in separate directories
+# for convenience and organization
+database_dir:		@DB_BASE@/@DATABASE@
+
+# Each database has a separate list of starting URLs
+# This makes it easier to index a variety of categories
+start_url:		`${database_dir}/@[email protected]`
+
+# Any database-specific config options should go here...
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/multidig/gen-collect b/debian/htdig/htdig-3.2.0b6/contrib/multidig/gen-collect
new file mode 100644
index 00000000..f75e08ad
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/multidig/gen-collect
@@ -0,0 +1,99 @@
+#!@BASH@
+
+#
+# gen-collect 1.1
+#
+# Copyright (c) 1998-1999 The ht://Dig Group
+# Distributed under the terms of the GNU General Public License (GPL)
+#   version 2 or later.
+# for the ht://Dig search system http://www.htdig.org/
+# and the multidig script system http://www.htdig.org/contrib/scripts/
+#
+# Part of the "multidig script system"
+# a system of shell scripts and some modified conf files
+# that makes dealing with multiple databases easier for ht://Dig
+#
+# Syntax:
+# gen-collect [-v]
+#
+# Merges multiple databases into ``collected'' db
+# (This is done by multidig too, but this script lets you *just*
+#  generate the collections.)
+#
+
+# This is useful for debugging info
+if [ "$1" = "-v" ]; then
+    verbose=-v
+fi
+
+# You may need to set the following:
+MULTIDIG_CONF=@CONFIG_DIR@/multidig.conf
+source $MULTIDIG_CONF
+
+# We may be called inside multidig, so we don't want to mess with the report.
+for collect in `cat $COLLECT_LIST`; do
+    # What's the conf file for this database?
+    CONF=$CONFIG_DIR/$collect.conf
+    echo Generating $collect at: `date`
+
+    # We want to replace the old .work files with the first database
+    # This ensures that we *only* get documents from the merged db
+    # and not old ones left around in our previous collected db
+    firstdb=`head -n 1 $DB_BASE/$collect/$collect.collect`
+    cp $DB_BASE/$firstdb/db.docdb      $DB_BASE/$collect/db.docdb.work
+    cp $DB_BASE/$firstdb/db.docs.index $DB_BASE/$collect/db.docs.index.work
+    cp $DB_BASE/$firstdb/db.wordlist.work $DB_BASE/$collect/db.wordlist.work
+    cp $DB_BASE/$firstdb/db.words.db   $DB_BASE/$collect/db.words.db.work
+    # Now we need to work out the number of remaining db in the collection
+    LENGTH=`wc -l $DB_BASE/$collect/$collect.collect | awk '{print $1;}'`
+    let NUM=LENGTH-1
+
+    for db in `tail -n $NUM $DB_BASE/$collect/$collect.collect`; do
+	if [ "$1" = "-v" ]; then
+	    echo Merging db $db of collect $collect
+	fi
+	MERGE_CONF=$CONFIG_DIR/$db.conf
+	# There's a slight bug in the merge function.
+	# It's looking for db.wordlist, not .work. So lets copy it temporarily
+	cp $DB_BASE/$db/db.wordlist.work $DB_BASE/$db/db.wordlist
+	# Do the merging, using -d and -w to prevent normal merging
+	# (it would be a waste of time, we'd repeat it multiple times)
+	$BINDIR/htmerge $verbose -s -d -w -m $MERGE_CONF -a -c $CONF >>$REPORT
+	# And now remove the copy
+	rm $DB_BASE/$db/db.wordlist
+    done
+
+    # Now after merging in all of those databases
+    # we need to do the usual htmerge run
+    $BINDIR/htmerge -a $verbose -s -c $CONF >>$REPORT
+
+    if [ "$1" = "-v" ]; then
+	echo Moving files $collect at: `date`
+    fi
+    # If you don't have the space for backups, this step can be omitted
+    if [ $BACKUPS = "true" ]; then
+	cp $DB_BASE/$collect/db.docdb      $DB_BASE/$collect/db.docdb.bak
+	cp $DB_BASE/$collect/db.docs.index $DB_BASE/$collect/db.docs.index.bak
+	# cp $DB_BASE/$collect/db.wordlist $DB_BASE/$collect/db.wordlist.bak
+	cp $DB_BASE/$collect/db.words.db   $DB_BASE/$collect/db.words.db.bak
+    fi
+
+    # Move them because we don't want .work files around
+    # (Remember, we're generating using merging,
+    # so we want to make sure we don't have old stuff to gum up the works...
+    mv $DB_BASE/$collect/db.docdb.work         $DB_BASE/$collect/db.docdb
+    mv $DB_BASE/$collect/db.docs.index.work    $DB_BASE/$collect/db.docs.index
+    # mv $DB_BASE/$collect/db.wordlist.work    $DB_BASE/$collect/db.wordlist
+    mv $DB_BASE/$collect/db.words.db.work      $DB_BASE/$collect/db.words.db
+
+    # Make them world readable!
+    chmod 644 $DB_BASE/$collect/db.docdb
+    chmod 644 $DB_BASE/$collect/db.docs.index
+    # chmod 644 $DB_BASE/$collect/db.wordlist
+    chmod 644 $DB_BASE/$collect/db.words.db
+    if [ "$1" = "-v" ]; then
+	echo Done with $collect at: `date`
+    fi
+done
+
+# That's it!
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/multidig/multidig b/debian/htdig/htdig-3.2.0b6/contrib/multidig/multidig
new file mode 100644
index 00000000..0b59136a
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/multidig/multidig
@@ -0,0 +1,93 @@
+#!@BASH@
+
+#
+# multidig 1.1
+#
+# Copyright (c) 1998-1999 The ht://Dig Group
+# Distributed under the terms of the GNU General Public License (GPL)
+#   version 2 or later.
+# for the ht://Dig search system http://www.htdig.org/
+# and the multidig script system http://www.htdig.org/contrib/scripts/
+#
+# Part of the "multidig script system"
+# a system of shell scripts and some modified conf files
+# that makes dealing with multiple databases easier for ht://Dig
+#
+# Syntax:
+# multidig [-v]
+#
+# Performs all the digging, merging and so on needed
+#  for indexing and updating multiple db
+# Merges multiple databases into ``collected'' db
+#
+
+# This is useful for debugging info
+if [ "$1" = "-v" ]; then
+    verbose=-v
+fi
+
+# You may need to set the following:
+MULTIDIG_CONF=@CONFIG_DIR@/multidig.conf
+source $MULTIDIG_CONF
+
+# Start indexing.
+rm $REPORT
+for db in `cat $DB_LIST`; do
+    echo Digging $db at: `date`
+    # What's the conf file for this database?
+    CONF=$CONFIG_DIR/$db.conf
+    if [ "$1" = "-v" ]; then
+	echo "  Indexing $db at: `date`"
+    fi
+    $BINDIR/htdig -a $verbose -s -c $CONF >>$REPORT
+    if [ "$1" = "-v" ]; then
+	echo "  Merging $db at: `date`"
+    fi
+    $BINDIR/htmerge -a $verbose -s -c $CONF >>$REPORT
+
+    if [ "$1" = "-v" ]; then
+	echo "  Moving files $db at: `date`"
+    fi
+    # If you don't have the space for backups, this step can be omitted
+    if [ $BACKUPS = "true" ]; then
+	cp $DB_BASE/$db/db.docdb      $DB_BASE/$db/db.docdb.bak
+	cp $DB_BASE/$db/db.docs.index $DB_BASE/$db/db.docs.index.bak
+	# cp $DB_BASE/$db/db.wordlist $DB_BASE/$db/db.wordlist.bak
+	cp $DB_BASE/$db/db.words.db   $DB_BASE/$db/db.words.db.bak
+    fi
+
+    # Copy the db.docdb file, the .work file is needed for update digs
+    cp $DB_BASE/$db/db.docdb.work         $DB_BASE/$db/db.docdb
+    # We don't do anything with the db.wordlist file because the
+    # .work file is needed for update digs and the non-work file isn't needed
+    # cp $DB_BASE/$db/db.wordlist.work    $DB_BASE/$db/db.wordlist
+    # These .work files are never used, so let's just keep the active copy
+    mv $DB_BASE/$db/db.docs.index.work    $DB_BASE/$db/db.docs.index
+    mv $DB_BASE/$db/db.words.db.work      $DB_BASE/$db/db.words.db
+
+    # Make them world readable!
+    chmod 644 $DB_BASE/$db/db.docdb     
+    chmod 644 $DB_BASE/$db/db.docdb.work
+    chmod 644 $DB_BASE/$db/db.docs.index
+    # chmod 644 $DB_BASE/$db/db.wordlist 
+    chmod 644 $DB_BASE/$db/db.words.db
+    if [ "$1" = "-v" ]; then
+	echo "  Done with $db at: `date`"
+    fi
+done
+# Now generate the collections by merging their component databases
+# We do this in our gen-collect script, so we won't do that here.
+$BINDIR/gen-collect $1
+
+if [ "$1" = "-v" ]; then
+	echo
+	fgrep "htdig:" $REPORT
+	echo
+	fgrep "htmerge:" $REPORT
+	echo
+	echo Total lines in $REPORT: `wc -l $REPORT`
+fi
+
+# You probably don't need to do this since the script will remove it next
+# time it's run. But you can do it anyway
+# rm $REPORT
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/multidig/multidig.conf b/debian/htdig/htdig-3.2.0b6/contrib/multidig/multidig.conf
new file mode 100644
index 00000000..32164977
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/multidig/multidig.conf
@@ -0,0 +1,32 @@
+#!@BASH@
+#
+# multidig config 1.1
+#
+# Copyright (c) 1998-1999 The ht://Dig Group
+# Distributed under the terms of the GNU General Public License (GPL)
+#   version 2 or later.
+# for the ht://Dig search system http://www.htdig.org/
+# and the multidig script system http://www.htdig.org/contrib/scripts/
+#
+
+# You may wish to set some of these:
+# BASEDIR = base directory for ht://Dig installation
+# BINDIR = directory with ht://Dig binaries (i.e. htdig, htmerge)
+# DB_BASE = base directory for ht://Dig DB
+# (i.e. each DB gets its own directory off of this)
+# CONFIG_DIR = directory with ht://Dig config files
+# DB_LIST = file with list of databases
+# COLLECT_LIST = file with list of "collections" databases merged from others
+# DB_CONF = file copied by new-db and new-collect for .conf files
+# REPORT = temporary file used to generate a report for the dig
+# TMPDIR = a directory with lots of temporary space for the merging
+export BASEDIR=@BASEDIR@
+export BINDIR=@BINDIR@
+export DB_BASE=@DB_BASE@
+export CONFIG_DIR=@CONFIG_DIR@
+export DB_LIST=$CONFIG_DIR/db.list
+export COLLECT_LIST=$CONFIG_DIR/collect.list
+export DB_CONF=$CONFIG_DIR/db.conf
+export REPORT=$BASEDIR/multidig.report
+export TMPDIR=$DB_BASE
+export BACKUPS=true
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/multidig/new-collect b/debian/htdig/htdig-3.2.0b6/contrib/multidig/new-collect
new file mode 100644
index 00000000..6647d447
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/multidig/new-collect
@@ -0,0 +1,39 @@
+#!@BASH@
+
+#
+# new-collect 1.1
+#
+# Copyright (c) 1998-2000 The ht://Dig Group
+# Distributed under the terms of the GNU General Public License (GPL)
+#   version 2 or later.
+# for the ht://Dig search system http://www.htdig.org/
+# and the multidig script system http://www.htdig.org/contrib/scripts/
+#
+# syntax:
+# new-collect <collection>
+#
+# Creates a new database directory and conf file with given name 
+# Updates the global collect.list file
+#
+
+# You may need to set the following:
+MULTIDIG_CONF=@CONFIG_DIR@/multidig.conf
+source $MULTIDIG_CONF
+
+# Catch people who don't supply an argument
+if [ "$1" = "" ]; then
+    echo Syntax: new-collect \<collection\>
+    exit
+fi
+
+# Add the new collection to the collect.list file
+echo ${1:?You need to specify a collection} >>$COLLECT_LIST
+
+# Now make the appropriate database directory
+mkdir $DB_BASE/$1
+
+# And make a copy of the default (db.conf) conf file for the DB
+# Use sed to replace @DATABASE@ with the name of the database
+sed -e s%@DATABASE@%$1% $DB_CONF >$CONFIG_DIR/$1.conf
+# And make a blank file for the ${start_urls} directive
+touch $DB_BASE/$1/$1.collect
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/multidig/new-db b/debian/htdig/htdig-3.2.0b6/contrib/multidig/new-db
new file mode 100644
index 00000000..1c4948f7
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/multidig/new-db
@@ -0,0 +1,39 @@
+#!@BASH@
+
+#
+# new-db 1.1
+#
+# Copyright (c) 1998-1999 The ht://Dig Group
+# Distributed under the terms of the GNU General Public License (GPL)
+#   version 2 or later.
+# for the ht://Dig search system http://www.htdig.org/
+# and the multidig script system http://www.htdig.org/contrib/scripts/
+#
+# syntax:
+# new-db <db>
+#
+# Creates a new database directory and conf file with given name 
+# Updates the global db.list file
+#
+
+# You may need to set the following:
+MULTIDIG_CONF=@CONFIG_DIR@/multidig.conf
+source $MULTIDIG_CONF
+
+# Catch people who don't supply an argument
+if [ "$1" = "" ]; then
+    echo Syntax: new-db \<db\>
+    exit
+fi
+
+# Add the new database to the db.list file
+echo ${1:?You need to specify a database} >>$DB_LIST
+
+# Now make the appropriate database directory
+mkdir $DB_BASE/$1
+
+# And make a copy of the default (db.conf) conf file for the DB
+# Use sed to replace @DATABASE@ with the name of the database
+sed -e s%@DATABASE@%$1% $DB_CONF >$CONFIG_DIR/$1.conf
+# And make a blank file for the ${start_urls} directive
+touch $DB_BASE/$1/$1.urls
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/parse_doc.pl b/debian/htdig/htdig-3.2.0b6/contrib/parse_doc.pl
new file mode 100755
index 00000000..63b775db
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/parse_doc.pl
@@ -0,0 +1,238 @@
+#!/usr/local/bin/perl
+
+# 1998/12/10
+# Added:        push @allwords, $fields[$x];   <[email protected]>
+# Replaced:     matching patterns. they match words starting or ending with ()[]'`;:?.,! now, not when in between!
+# Gone:         the variable $line is gone (using $_ now)
+#
+# 1998/12/11
+# Added:        catdoc test (is catdoc runnable?)    <[email protected]>
+# Changed:      push line semi-colomn wrong.         <[email protected]>
+# Changed:      matching works for end of lines now  <[email protected]>
+# Added:        option to rigorously delete all punctuation <[email protected]>
+#
+# 1999/02/09
+# Added:        option to delete all hyphens         <[email protected]>
+# Added:        uses ps2ascii to handle PS files     <[email protected]>
+# 1999/02/15
+# Added:        check for some file formats          <[email protected]>
+# 1999/02/25
+# Added:        uses pdftotext to handle PDF files   <[email protected]>
+# Changed:      generates a head record with punct.  <[email protected]>
+# 1999/03/01
+# Added:        extra checks for file "wrappers"     <[email protected]>
+#               & check for MS Word signature (no longer defaults to catdoc)
+# 1999/03/05
+# Changed:      rejoin hyphenated words across lines <[email protected]>
+#               (in PDFs) & remove multiple punct. chars. between words (all)
+# 1999/03/10
+# Changed:      fix handling of minimum word length  <[email protected]>
+# 1999/08/12
+# Changed:      adapted for xpdf 0.90 release        <[email protected]>
+# Added:        uses pdfinfo to handle PDF titles    <[email protected]>
+# Changed:      keep hyphens by default, as htdig    <[email protected]>
+#               does, but change dashes to hyphens
+# 1999/09/09
+# Changed:      fix to handle empty PDF title right  <[email protected]>
+# 2000/01/12
+# Changed:      "break" to "last" (no break in Perl) <[email protected]>
+# Changed:      code for parsing a line into a list of
+#               words, to use "split", other streamlining.
+# 2001/07/12
+# Changed:      fix "last" handling in dehyphenation <[email protected]>
+# Added:        handle %xx codes in title from URL   <[email protected]>
+# 2003/06/07
+# Changed:	allow file names with spaces         <[email protected]>
+#########################################
+#
+# set this to your MS Word to text converter
+# get it from: http://www.fe.msk.ru/~vitus/catdoc/
+#
+$CATDOC = "/usr/local/bin/catdoc";
+#
+# set this to your WordPerfect to text converter, or /bin/true if none available
+# this nabs WP documents with .doc suffix, so catdoc doesn't see them
+#
+$CATWP = "/bin/true";
+#
+# set this to your RTF to text converter, or /bin/true if none available
+# this nabs RTF documents with .doc suffix, so catdoc doesn't see them
+#
+$CATRTF = "/bin/true";
+#
+# set this to your PostScript to text converter
+# get it from the ghostscript 3.33 (or later) package
+#
+$CATPS = "/usr/bin/ps2ascii";
+#
+# set this to your PDF to text converter, and pdfinfo tool
+# get it from the xpdf 0.90 package at http://www.foolabs.com/xpdf/
+#
+$CATPDF = "/usr/bin/pdftotext";
+$PDFINFO = "/usr/bin/pdfinfo";
+#$CATPDF = "/usr/local/bin/pdftotext";
+#$PDFINFO = "/usr/local/bin/pdfinfo";
+
+# need some var's
+$minimum_word_length = 3;
+$head = "";
+@allwords = ();
+@temp = ();
+$x = 0;
+#@fields = ();
+$calc = 0;
+$dehyphenate = 0;
+$title = "";
+#
+# okay. my programming style isn't that nice, but it works...
+
+#for ($x=0; $x<@ARGV; $x++) {           # print out the args
+#       print STDERR "$ARGV[$x]\n";
+#}
+
+# Read first bytes of file to check for file type (like file(1) does)
+open(FILE, "< $ARGV[0]") || die "Oops. Can't open file $ARGV[0]: $!\n";
+read FILE,$magic,8;
+close FILE;
+
+if ($magic =~ /^\0\n/) {                # possible MacBinary header
+        open(FILE, "< $ARGV[0]") || die "Oops. Can't open file $ARGV[0]: $!\n";
+        read FILE,$magic,136;           # let's hope parsers can handle them!
+        close FILE;
+}
+
+if ($magic =~ /%!|^\033%-12345/) {      # it's PostScript (or HP print job)
+        $parser = $CATPS;               # gs 3.33 leaves _temp_.??? files in .
+        $parsecmd = "(cd /tmp; $parser; rm -f _temp_.???) < \"$ARGV[0]\" |";
+# keep quiet even if PS gives errors...
+#       $parsecmd = "(cd /tmp; $parser; rm -f _temp_.???) < \"$ARGV[0]\" 2>/dev/null |";
+        $type = "PostScript";
+        $dehyphenate = 0;               # ps2ascii already does this
+        if ($magic =~ /^\033%-12345/) { # HP print job
+                open(FILE, "< $ARGV[0]") || die "Oops. Can't open file $ARGV[0]: $!\n";
+                read FILE,$magic,256;
+                close FILE;
+                exit unless $magic =~ /^\033%-12345X\@PJL.*\n*.*\n*.*ENTER\s*LANGUAGE\s*=\s*POSTSCRIPT.*\n*.*\n*.*\n%!/
+        }
+} elsif ($magic =~ /%PDF-/) {           # it's PDF (Acrobat)
+        $parser = $CATPDF;
+        $parsecmd = "$parser -raw \"$ARGV[0]\" - |";
+# to handle single-column, strangely laid out PDFs, use coalescing feature...
+#       $parsecmd = "$parser \"$ARGV[0]\" - |";
+        $type = "PDF";
+        $dehyphenate = 1;               # PDFs often have hyphenated lines
+        if (open(INFO, "$PDFINFO \"$ARGV[0]\" 2>/dev/null |")) {
+                while (<INFO>) {
+                        if (/^Title:/) {
+                                $title = $_;
+                                $title =~ s/^Title:\s+//;
+                                $title =~ s/\s+$//;
+                                $title =~ s/\s+/ /g;
+                                $title =~ s/&/\&amp\;/g;
+                                $title =~ s/</\&lt\;/g;
+                                $title =~ s/>/\&gt\;/g;
+                                last;
+                        }
+                }
+                close INFO;
+        }
+} elsif ($magic =~ /WPC/) {             # it's WordPerfect
+        $parser = $CATWP;
+        $parsecmd = "$parser \"$ARGV[0]\" |";
+        $type = "WordPerfect";
+        $dehyphenate = 0;               # WP documents not likely hyphenated
+} elsif ($magic =~ /^{\\rtf/) {         # it's Richtext
+        $parser = $CATRTF;
+        $parsecmd = "$parser \"$ARGV[0]\" |";
+        $type = "RTF";
+        $dehyphenate = 0;               # RTF documents not likely hyphenated
+} elsif ($magic =~ /\320\317\021\340/) {    # it's MS Word
+        $parser = $CATDOC;
+        $parsecmd = "$parser -a -w \"$ARGV[0]\" |";
+        $type = "Word";
+        $dehyphenate = 0;               # Word documents not likely hyphenated
+} else {
+        die "Can't determine type of file $ARGV[0]; content-type: $ARGV[1]; URL: $ARGV[2]\n";
+}
+# print STDERR "$ARGV[0]: $type $parsecmd\n";
+die "Hmm. $parser is absent or unwilling to execute.\n" unless -x $parser;
+
+
+# open it
+open(CAT, "$parsecmd") || die "Hmmm. $parser doesn't want to be opened using pipe.\n";
+while (<CAT>) {
+        while (/[A-Za-z\300-\377]-\s*$/ && $dehyphenate) {
+                $_ .= <CAT>;
+                last if eof;
+                s/([A-Za-z\300-\377])-\s*\n\s*([A-Za-z\300-\377])/$1$2/s
+        }
+        $head .= " " . $_;
+#       s/\s+[\(\)\[\]\\\/\^\;\:\"\'\`\.\,\?!\*]+|[\(\)\[\]\\\/\^\;\:\"\'\`\.\,\?!\*]+\s+|^[\(\)\[\]\\\/\^\;\:\"\'\`\.\,\?!\*]+|[\(\)\[\]\\\/\^\;\:\"\'\`\.\,\?!\*]+$/ /g;    # replace reading-chars with space (only at end or begin of word, but allow multiple characters)
+##       s/\s[\(\)\[\]\\\/\^\;\:\"\'\`\.\,\?!\*]|[\(\)\[\]\\\/\^\;\:\"\'\`\.\,\?!\*]\s|^[\(\)\[\]\\\/\^\;\:\"\'\`\.\,\?!\*]|[\(\)\[\]\\\/\^\;\:\"\'\`\.\,\?!\*]$/ /g;    # replace reading-chars with space (only at end or begin of word)
+##       s/[\(\)\[\]\\\/\^\;\:\"\'\`\.\,\?!\*]/ /g;      # rigorously replace all by <[email protected]>
+##       s/[\-\255]/ /g;                                 # replace hyphens with space
+#       s/[\255]/-/g;                                   # replace dashes with hyphens
+#       @fields = split;                                # split up line
+#       next if (@fields == 0);                         # skip if no fields (does it speed up?)
+#       for ($x=0; $x<@fields; $x++) {                  # check each field if string length >= 3
+#               if (length($fields[$x]) >= $minimum_word_length) {
+#                       push @allwords, $fields[$x];    # add to list
+#               }
+#       }
+
+	# Delete valid punctuation.  These are the default values
+	# for valid_punctuation, and should be changed other values
+	# are specified in the config file.
+	tr{-\255._/!#$%^&'}{}d;
+	push @allwords, grep { length >= $minimum_word_length } split /\W+/;
+}
+
+close CAT;
+
+exit unless @allwords > 0;              # nothing to output
+
+#############################################
+# print out the title, if it's set, and not just a file name
+if ($title !~ /^$/ && $title !~ /^[A-G]:[^\s]+\.[Pp][Dd][Ff]$/) {
+        print "t\t$title\n";
+} else {                                        # otherwise generate a title
+        @temp = split(/\//, $ARGV[2]);          # get the filename, get rid of basename
+        $temp[-1] =~ s/%([A-F0-9][A-F0-9])/pack("C", hex($1))/gie;
+        print "t\t$type Document $temp[-1]\n";  # print it
+}
+
+
+#############################################
+# print out the head
+$head =~ s/^\s+//;                      # remove leading and trailing space
+$head =~ s/\s+$//;
+$head =~ s/\s+/ /g;
+$head =~ s/&/\&amp\;/g;
+$head =~ s/</\&lt\;/g;
+$head =~ s/>/\&gt\;/g;
+print "h\t$head\n";
+#$calc = @allwords;
+#print "h\t";
+##if ($calc >100) {                      # but not more than 100 words
+##       $calc = 100;
+##}
+#for ($x=0; $x<$calc; $x++) {            # print out the words for the exerpt
+#        print "$allwords[$x] ";
+#}
+#print "\n";
+
+
+#############################################
+# now the words
+#for ($x=0; $x<@allwords; $x++) {
+#       $calc=int(1000*$x/@allwords);           # calculate rel. position (0-1000)
+#       print "w\t$allwords[$x]\t$calc\t0\n";   # print out word, rel. pos. and text type (0)
+#}
+$x = 0;
+for ( @allwords ) {
+    # print out word, rel. pos. and text type (0)
+    printf "w\t%s\t%d\t0\n", $_, 1000*$x++/@allwords;
+}
+
+$calc=@allwords;
+# print STDERR "# of words indexed: $calc\n";
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/COPYING b/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/COPYING
new file mode 100644
index 00000000..d60c31a9
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/COPYING
@@ -0,0 +1,340 @@
+		    GNU GENERAL PUBLIC LICENSE
+		       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+     59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+		    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+			    NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+		     END OF TERMS AND CONDITIONS
+
+	    How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year  name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/Makefile b/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/Makefile
new file mode 100644
index 00000000..5409f487
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/Makefile
@@ -0,0 +1,11 @@
+CC= gcc
+CFLAGS= -O2 -Wall
+
+rtf2html:	rtf2html.c
+		$(CC) $(CFLAGS) -o rtf2html rtf2html.c
+
+install:	rtf2html
+	cp rtf2html /usr/local/bin
+
+clean:
+	rm -f rtf2html
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/README b/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/README
new file mode 100644
index 00000000..9f3084d4
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/README
@@ -0,0 +1,16 @@
+rtf2html - a RTF to HTML conversion program
+
+This version of rtf2html has been developed by 
+David Lippi <[email protected]> and Gabriele Bartolini
+<[email protected]>, based on an earlier work
+by Chuck Shotton <[email protected]>
+(see http://www.w3.org/Tools/HTMLGeneration/rtf2html.html)
+and Dmitry Porapov <[email protected]>.
+
+This version can handle character set recognition at run-time:
+currently, the ANSI Windows 1252 code and the Macintosh's are
+supported.
+
+For copyright details, see the file COPYING in your distribution
+or the GNU General Public License (GPL) version 2 or later
+<http://www.gnu.org/copyleft/gpl.html>
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/charset1252.h b/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/charset1252.h
new file mode 100644
index 00000000..d2b40ba0
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/charset1252.h
@@ -0,0 +1,257 @@
+unsigned char* charset1252[256] = {
+	"", /* 1 - 1 */
+	"", /* 2 - 2 */
+	"", /* 3 - 3 */
+	"", /* 4 - 4 */
+	"", /* 5 - 5 */
+	"", /* 6 - 6 */
+	"", /* 7 - 7 */
+	"", /* 8 - 8 */
+	"\t", /* 9 - 9 */
+	"\n", /* 10 - a */
+	"", /* 11 - b */
+	"", /* 12 - c */
+	"\r", /* 13 - d */
+	"", /* 14 - e */
+	"", /* 15 - f */
+	"", /* 16 - 10 */
+	"", /* 17 - 11 */
+	"", /* 18 - 12 */
+	"", /* 19 - 13 */
+	"", /* 20 - 14 */
+	"", /* 21 - 15 */
+	"", /* 22 - 16 */
+	"", /* 23 - 17 */
+	"", /* 24 - 18 */
+	"", /* 25 - 19 */
+	"", /* 26 - 1a */
+	"", /* 27 - 1b */
+	"", /* 28 - 1c */
+	"", /* 29 - 1d */
+	"", /* 30 - 1e */
+	"", /* 31 - 1f */
+	" ", /* 32 - 20 */
+	"!", /* 33 - 21 */
+	"\"", /* 34 - 22 */
+	"#", /* 35 - 23 */
+	"$", /* 36 - 24 */
+	"%", /* 37 - 25 */
+	"&amp;", /* 38 - 26 */
+	"'", /* 39 - 27 */
+	"(", /* 40 - 28 */
+	")", /* 41 - 29 */
+	"*", /* 42 - 2a */
+	"+", /* 43 - 2b */
+	",", /* 44 - 2c */
+	"-", /* 45 - 2d */
+	".", /* 46 - 2e */
+	"/", /* 47 - 2f */
+	"0", /* 48 - 30 */
+	"1", /* 49 - 31 */
+	"2", /* 50 - 32 */
+	"3", /* 51 - 33 */
+	"4", /* 52 - 34 */
+	"5", /* 53 - 35 */
+	"6", /* 54 - 36 */
+	"7", /* 55 - 37 */
+	"8", /* 56 - 38 */
+	"9", /* 57 - 39 */
+	":", /* 58 - 3a */
+	";", /* 59 - 3b */
+	"<", /* 60 - 3c */
+	"=", /* 61 - 3d */
+	">", /* 62 - 3e */
+	"?", /* 63 - 3f */
+	"@", /* 64 - 40 */
+	"A", /* 65 - 41 */
+	"B", /* 66 - 42 */
+	"C", /* 67 - 43 */
+	"D", /* 68 - 44 */
+	"E", /* 69 - 45 */
+	"F", /* 70 - 46 */
+	"G", /* 71 - 47 */
+	"H", /* 72 - 48 */
+	"I", /* 73 - 49 */
+	"J", /* 74 - 4a */
+	"K", /* 75 - 4b */
+	"L", /* 76 - 4c */
+	"M", /* 77 - 4d */
+	"N", /* 78 - 4e */
+	"O", /* 79 - 4f */
+	"P", /* 80 - 50 */
+	"Q", /* 81 - 51 */
+	"R", /* 82 - 52 */
+	"S", /* 83 - 53 */
+	"T", /* 84 - 54 */
+	"U", /* 85 - 55 */
+	"V", /* 86 - 56 */
+	"W", /* 87 - 57 */
+	"X", /* 88 - 58 */
+	"Y", /* 89 - 59 */
+	"Z", /* 90 - 5a */
+	"[", /* 91 - 5b */
+	"\\", /* 92 - 5c */
+	"]", /* 93 - 5d */
+	"^", /* 94 - 5e */
+	"_", /* 95 - 5f */
+	"`", /* 96 - 60 */
+	"a", /* 97 - 61 */
+	"b", /* 98 - 62 */
+	"c", /* 99 - 63 */
+	"d", /* 100 - 64 */
+	"e", /* 101 - 65 */
+	"f", /* 102 - 66 */
+	"g", /* 103 - 67 */
+	"h", /* 104 - 68 */
+	"i", /* 105 - 69 */
+	"j", /* 106 - 6a */
+	"k", /* 107 - 6b */
+	"l", /* 108 - 6c */
+	"m", /* 109 - 6d */
+	"n", /* 110 - 6e */
+	"o", /* 111 - 6f */
+	"p", /* 112 - 70 */
+	"q", /* 113 - 71 */
+	"r", /* 114 - 72 */
+	"s", /* 115 - 73 */
+	"t", /* 116 - 74 */
+	"u", /* 117 - 75 */
+	"v", /* 118 - 76 */
+	"w", /* 119 - 77 */
+	"x", /* 120 - 78 */
+	"y", /* 121 - 79 */
+	"z", /* 122 - 7a */
+	"{", /* 123 - 7b */
+	"&brvbar;", /* 124 - 7c */
+	"}", /* 125 - 7d */
+	"~", /* 126 - 7e */
+	" ", /* 127 - 7f */
+	"&euro;", /* 128 - 80 */
+	" ", /* 129 - 81 */
+	"&sbquo;", /* 130 - 82 */
+	"&fnof;", /* 131 - 83 */
+	"&bdquo;", /* 132 - 84 */
+	"&hellip;", /* 133 - 85 */
+	"&dagger;", /* 134 - 86 */
+	"&#0135;", /* 135 - 87 */
+	"&#0136;", /* 136 - 88 */
+	"&#0137;", /* 137 - 89 */
+	"&Scaron;", /* 138 - 8a */
+	"&lsaquo;", /* 139 - 8b */
+	"&OElig;", /* 140 - 8c */
+	" ", /* 141 - 8d */
+	"&Zcaron;", /* 142 - 8e */
+	" ", /* 143 - 8f */
+	" ", /* 144 - 90 */
+	"&lsquo;", /* 145 - 91 */
+	"&rsquo;", /* 146 - 92 */
+	"&ldquo;", /* 147 - 93 */
+	"&rdquo;", /* 148 - 94 */
+	"&bull;", /* 149 - 95 */
+	"&ensp;", /* 150 - 96 */
+	"&emsp;", /* 151 - 97 */
+	"&tilde;", /* 152 - 98 */
+	"&trade;", /* 153 - 99 */
+	"&scaron;", /* 154 - 9a */
+	"&rsaquo;", /* 155 - 9b */
+	"&oelig;", /* 156 - 9c */
+	" ", /* 157 - 9d */
+	"&zcaron;", /* 158 - 9e */
+	"&Yuml;", /* 159 - 9f */
+	"&nbsp;", /* 160 - a0 */
+	"&iexcl;", /* 161 - a1 */
+	"&cent;", /* 162 - a2 */
+	"&pound;", /* 163 - a3 */
+	"&curren;", /* 164 - a4 */
+	"&yen;", /* 165 - a5 */
+	"&brvbar;", /* 166 - a6 */
+	"&sect;", /* 167 - a7 */
+	"&uml;", /* 168 - a8 */
+	"&copy;", /* 169 - a9 */
+	"&ordf;", /* 170 - aa */
+	"&laquo;", /* 171 - ab */
+	"&not;", /* 172 - ac */
+	"&shy;", /* 173 - ad */
+	"&reg;", /* 174 - ae */
+	"&macr;", /* 175 - af */
+	"&deg;", /* 176 - b0 */
+	"&plusmn;", /* 177 - b1 */
+	"&sup2;", /* 178 - b2 */
+	"&sup3;", /* 179 - b3 */
+	"&acute;", /* 180 - b4 */
+	"&micro;", /* 181 - b5 */
+	"&para;", /* 182 - b6 */
+	"&middot;", /* 183 - b7 */
+	"&ccedil;", /* 184 - b8 */
+	"&sup1;", /* 185 - b9 */
+	"&ordm;", /* 186 - ba */
+	"&raquo;", /* 187 - bb */
+	"&frac14;", /* 188 - bc */
+	"&frac12;", /* 189 - bd */
+	"&frac34;", /* 190 - be */
+	"&iquest;", /* 191 - bf */
+	"&Agrave;", /* 192 - c0 */
+	"&Aacute;", /* 193 - c1 */
+	"&Acirc;", /* 194 - c2 */
+	"&Atilde;", /* 195 - c3 */
+	"&Auml;", /* 196 - c4 */
+	"&Aring;", /* 197 - c5 */
+	"&AElig;", /* 198 - c6 */
+	"&Ccedil;", /* 199 - c7 */
+	"&Egrave;", /* 200 - c8 */
+	"&Eacute;", /* 201 - c9 */
+	"&Ecirc;", /* 202 - ca */
+	"&Euml;", /* 203 - cb */
+	"&Igrave;", /* 204 - cc */
+	"&Iacute;", /* 205 - cd */
+	"&Icirc;", /* 206 - ce */
+	"&Iuml;", /* 207 - cf */
+	"&ETH;", /* 208 - d0 */
+	"&Ntilde;", /* 209 - d1 */
+	"&Ograve;", /* 210 - d2 */
+	"&Oacute;", /* 211 - d3 */
+	"&Oring;", /* 212 - d4 */
+	"&Otilde;", /* 213 - d5 */
+	"&Ouml;", /* 214 - d6 */
+	"&times;", /* 215 - d7 */
+	"&Oslash;", /* 216 - d8 */
+	"&Ugrave;", /* 217 - d9 */
+	"&Uacute;", /* 218 - da */
+	"&Ucirc;", /* 219 - db */
+	"&Uuml;", /* 220 - dc */
+	"&Yacute;", /* 221 - dd */
+	"&THORN;", /* 222 - de */
+	"&szlig;", /* 223 - df */
+	"&agrave;", /* 224 - e0 */
+	"&aacute;", /* 225 - e1 */
+	"&acirc;", /* 226 - e2 */
+	"&atilde;", /* 227 - e3 */
+	"&auml;", /* 228 - e4 */
+	"&aring;", /* 229 - e5 */
+	"&aelig;", /* 230 - e6 */
+	"&ccedil;", /* 231 - e7 */
+	"&egrave;", /* 232 - e8 */
+	"&eacute;", /* 233 - e9 */
+	"&ecirc;", /* 234 - ea */
+	"&euml;", /* 235 - eb */
+	"&igrave;", /* 236 - ec */
+	"&iacute;", /* 237 - ed */
+	"&icirc;", /* 238 - ee */
+	"&iuml;", /* 239 - ef */
+	"&eth;", /* 240 - f0 */
+	"&ntilde;", /* 241 - f1 */
+	"&ograve;", /* 242 - f2 */
+	"&oacute;", /* 243 - f3 */
+	"&ocirc;", /* 244 - f4 */
+	"&otilde;", /* 245 - f5 */
+	"&ouml;", /* 246 - f6 */
+	"&divide;", /* 247 - f7 */
+	"&oslash;", /* 248 - f8 */
+	"&ugrave;", /* 249 - f9 */
+	"&uacute;", /* 250 - fa */
+	"&ucirc;", /* 251 - fb */
+	"&uuml;", /* 252 - fc */
+	"&yacute;", /* 253 - fd */
+	"&thorn;", /* 254 - fe */
+	"&yuml;" /* 255 - ff */
+};
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/charsetmac.h b/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/charsetmac.h
new file mode 100644
index 00000000..8c4aeca0
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/charsetmac.h
@@ -0,0 +1,257 @@
+unsigned char* mac[256] = {
+	"", /* 1 - 1 */
+	"", /* 2 - 2 */
+	"", /* 3 - 3 */
+	"", /* 4 - 4 */
+	"", /* 5 - 5 */
+	"", /* 6 - 6 */
+	"", /* 7 - 7 */
+	"", /* 8 - 8 */
+	"\t", /* 9 - 9 */
+	"\n", /* 10 - a */
+	"", /* 11 - b */
+	"", /* 12 - c */
+	"\r", /* 13 - d */
+	"", /* 14 - e */
+	"", /* 15 - f */
+	"", /* 16 - 10 */
+	"", /* 17 - 11 */
+	"", /* 18 - 12 */
+	"", /* 19 - 13 */
+	"", /* 20 - 14 */
+	"", /* 21 - 15 */
+	"", /* 22 - 16 */
+	"", /* 23 - 17 */
+	"", /* 24 - 18 */
+	"", /* 25 - 19 */
+	"", /* 26 - 1a */
+	"", /* 27 - 1b */
+	"", /* 28 - 1c */
+	"", /* 29 - 1d */
+	"", /* 30 - 1e */
+	"", /* 31 - 1f */
+	" ", /* 32 - 20 */
+	"!", /* 33 - 21 */
+	"\"", /* 34 - 22 */
+	"#", /* 35 - 23 */
+	"$", /* 36 - 24 */
+	"%", /* 37 - 25 */
+	"&amp;", /* 38 - 26 */
+	"'", /* 39 - 27 */
+	"(", /* 40 - 28 */
+	")", /* 41 - 29 */
+	"*", /* 42 - 2a */
+	"+", /* 43 - 2b */
+	",", /* 44 - 2c */
+	"-", /* 45 - 2d */
+	".", /* 46 - 2e */
+	"/", /* 47 - 2f */
+	"0", /* 48 - 30 */
+	"1", /* 49 - 31 */
+	"2", /* 50 - 32 */
+	"3", /* 51 - 33 */
+	"4", /* 52 - 34 */
+	"5", /* 53 - 35 */
+	"6", /* 54 - 36 */
+	"7", /* 55 - 37 */
+	"8", /* 56 - 38 */
+	"9", /* 57 - 39 */
+	":", /* 58 - 3a */
+	";", /* 59 - 3b */
+	"<", /* 60 - 3c */
+	"=", /* 61 - 3d */
+	">", /* 62 - 3e */
+	"?", /* 63 - 3f */
+	"@", /* 64 - 40 */
+	"A", /* 65 - 41 */
+	"B", /* 66 - 42 */
+	"C", /* 67 - 43 */
+	"D", /* 68 - 44 */
+	"E", /* 69 - 45 */
+	"F", /* 70 - 46 */
+	"G", /* 71 - 47 */
+	"H", /* 72 - 48 */
+	"I", /* 73 - 49 */
+	"J", /* 74 - 4a */
+	"K", /* 75 - 4b */
+	"L", /* 76 - 4c */
+	"M", /* 77 - 4d */
+	"N", /* 78 - 4e */
+	"O", /* 79 - 4f */
+	"P", /* 80 - 50 */
+	"Q", /* 81 - 51 */
+	"R", /* 82 - 52 */
+	"S", /* 83 - 53 */
+	"T", /* 84 - 54 */
+	"U", /* 85 - 55 */
+	"V", /* 86 - 56 */
+	"W", /* 87 - 57 */
+	"X", /* 88 - 58 */
+	"Y", /* 89 - 59 */
+	"Z", /* 90 - 5a */
+	"[", /* 91 - 5b */
+	"\\", /* 92 - 5c */
+	"]", /* 93 - 5d */
+	"^", /* 94 - 5e */
+	"_", /* 95 - 5f */
+	"`", /* 96 - 60 */
+	"a", /* 97 - 61 */
+	"b", /* 98 - 62 */
+	"c", /* 99 - 63 */
+	"d", /* 100 - 64 */
+	"e", /* 101 - 65 */
+	"f", /* 102 - 66 */
+	"g", /* 103 - 67 */
+	"h", /* 104 - 68 */
+	"i", /* 105 - 69 */
+	"j", /* 106 - 6a */
+	"k", /* 107 - 6b */
+	"l", /* 108 - 6c */
+	"m", /* 109 - 6d */
+	"n", /* 110 - 6e */
+	"o", /* 111 - 6f */
+	"p", /* 112 - 70 */
+	"q", /* 113 - 71 */
+	"r", /* 114 - 72 */
+	"s", /* 115 - 73 */
+	"t", /* 116 - 74 */
+	"u", /* 117 - 75 */
+	"v", /* 118 - 76 */
+	"w", /* 119 - 77 */
+	"x", /* 120 - 78 */
+	"y", /* 121 - 79 */
+	"z", /* 122 - 7a */
+	"{", /* 123 - 7b */
+	"&brvbar;", /* 124 - 7c */
+	"}", /* 125 - 7d */
+	"~", /* 126 - 7e */
+	" ", /* 127 - 7f */
+	"&euro;", /* 128 - 80 */
+	"&Aring;", /* 129 - 81 */
+	"&sbquo;", /* 130 - 82 */
+	"&fnof;", /* 131 - 83 */
+	"&bdquo;", /* 132 - 84 */
+	"&hellip;", /* 133 - 85 */
+	"&dagger;", /* 134 - 86 */
+	"&#0135;", /* 135 - 87 */
+	"&aacute;", /* 136 - 88 */
+	"&#0137;", /* 137 - 89 */
+	"&Scaron;", /* 138 - 8a */
+	"&lsaquo;", /* 139 - 8b */
+	"&OElig;", /* 140 - 8c */
+	"&ccedil;", /* 141 - 8d */
+	"&eacute;", /* 142 - 8e */
+	"&egrave; ", /* 143 - 8f */
+	"&ecirc;", /* 144 - 90 */
+	"&#145;", /* 145 - 91 */
+	"&#146;", /* 146 - 92 */
+	"&igrave;", /* 147 - 93 */
+	"\"", /* 148 - 94 */
+	"&bull;", /* 149 - 95 */
+	"&ensp;", /* 150 - 96 */
+	"&emsp;", /* 151 - 97 */
+	"&tilde;", /* 152 - 98 */
+	"&trade;", /* 153 - 99 */
+	"&scaron;", /* 154 - 9a */
+	"&rsaquo;", /* 155 - 9b */
+	"&oelig;", /* 156 - 9c */
+	"&ugrave;", /* 157 - 9d */
+	"&zcaron;", /* 158 - 9e */
+	"&Yuml;", /* 159 - 9f */
+	"&nbsp;", /* 160 - a0 */
+	"&ordm;", /* 161 - a1 */
+	"&cent;", /* 162 - a2 */
+	"&pound;", /* 163 - a3 */
+	"&sect;", /* 164 - a4 */
+	"&yen;", /* 165 - a5 */
+	"&brvbar;", /* 166 - a6 */
+	"&sect;", /* 167 - a7 */
+	"&uml;", /* 168 - a8 */
+	"&copy;", /* 169 - a9 */
+	"&ordf;", /* 170 - aa */
+	"&laquo;", /* 171 - ab */
+	"&not;", /* 172 - ac */
+	"&shy;", /* 173 - ad */
+	"&reg;", /* 174 - ae */
+	"&macr;", /* 175 - af */
+	"&deg;", /* 176 - b0 */
+	"&plusmn;", /* 177 - b1 */
+	"&sup2;", /* 178 - b2 */
+	"&sup3;", /* 179 - b3 */
+	"&acute;", /* 180 - b4 */
+	"&micro;", /* 181 - b5 */
+	"&para;", /* 182 - b6 */
+	"&middot;", /* 183 - b7 */
+	"&ccedil;", /* 184 - b8 */
+	"&sup1;", /* 185 - b9 */
+	"&ordm;", /* 186 - ba */
+	"&raquo;", /* 187 - bb */
+	"&frac14;", /* 188 - bc */
+	"&frac12;", /* 189 - bd */
+	"&frac34;", /* 190 - be */
+	"&iquest;", /* 191 - bf */
+	"&Agrave;", /* 192 - c0 */
+	"&Aacute;", /* 193 - c1 */
+	"&Acirc;", /* 194 - c2 */
+	"&Atilde;", /* 195 - c3 */
+	"&Auml;", /* 196 - c4 */
+	"&Aring;", /* 197 - c5 */
+	"&AElig;", /* 198 - c6 */
+	"&Ccedil;", /* 199 - c7 */
+	"&Egrave;", /* 200 - c8 */
+	"&Eacute;", /* 201 - c9 */
+	"&Ecirc;", /* 202 - ca */
+	"&Agrave;", /* 203 - cb */
+	"&Igrave;", /* 204 - cc */
+	"&Iacute;", /* 205 - cd */
+	"&Icirc;", /* 206 - ce */
+	"&Iuml;", /* 207 - cf */
+	"&ETH;", /* 208 - d0 */
+	"&Ntilde;", /* 209 - d1 */
+	"\"", /* 210 - d2 */
+	"\"", /* 211 - d3 */
+	"&Oring;", /* 212 - d4 */
+	"&Otilde;", /* 213 - d5 */
+	"&Ouml;", /* 214 - d6 */
+	"&times;", /* 215 - d7 */
+	"&Oslash;", /* 216 - d8 */
+	"&Ugrave;", /* 217 - d9 */
+	"&Uacute;", /* 218 - da */
+	"&Ucirc;", /* 219 - db */
+	"&Uuml;", /* 220 - dc */
+	"&Yacute;", /* 221 - dd */
+	"&THORN;", /* 222 - de */
+	"&szlig;", /* 223 - df */
+	"&agrave;", /* 224 - e0 */
+	"&aacute;", /* 225 - e1 */
+	"&acirc;", /* 226 - e2 */
+	"&atilde;", /* 227 - e3 */
+	"&auml;", /* 228 - e4 */
+	"&aring;", /* 229 - e5 */
+	"&aelig;", /* 230 - e6 */
+	"&ccedil;", /* 231 - e7 */
+	"&egrave;", /* 232 - e8 */
+	"&eacute;", /* 233 - e9 */
+	"&ecirc;", /* 234 - ea */
+	"&euml;", /* 235 - eb */
+	"&igrave;", /* 236 - ec */
+	"&iacute;", /* 237 - ed */
+	"&icirc;", /* 238 - ee */
+	"&iuml;", /* 239 - ef */
+	"&eth;", /* 240 - f0 */
+	"&ntilde;", /* 241 - f1 */
+	"&ograve;", /* 242 - f2 */
+	"&oacute;", /* 243 - f3 */
+	"&ocirc;", /* 244 - f4 */
+	"&otilde;", /* 245 - f5 */
+	"&ouml;", /* 246 - f6 */
+	"&divide;", /* 247 - f7 */
+	"&oslash;", /* 248 - f8 */
+	"&ugrave;", /* 249 - f9 */
+	"&uacute;", /* 250 - fa */
+	"&ucirc;", /* 251 - fb */
+	"&uuml;", /* 252 - fc */
+	"&yacute;", /* 253 - fd */
+	"&thorn;", /* 254 - fe */
+	"&yuml;" /* 255 - ff */
+};
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/rtf2html.c b/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/rtf2html.c
new file mode 100644
index 00000000..d49140d4
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/rtf2html/rtf2html.c
@@ -0,0 +1,910 @@
+/* RTF2HTML.c, Chuck Shotton - 6/21/93 */
+/************************************************************************
+ * This program takes a stab at converting RTF (Rich Text Format) files
+ * into HTML. There are some limitations that keep RTF from being able to
+ * easily represent things like in-line images and anchors as styles. In
+ * particular, RTF styles apply to entire "paragraphs", so anchors or
+ * images in the middle of a text stream can't easily be represented by
+ * styles. The intent is to ultimately use something like embedded text
+ * color changes to represent these constructs. 
+ * 
+ * In the meantime, you can take existing Word documents, apply the
+ * correct style sheet, and convert them to HTML with this tool.
+ *
+ * AUTHOR: Chuck Shotton, UT-Houston Academic Computing,
+ *         [email protected]
+ *         
+ *         Dmitry Potapov, CapitalSoft
+ *         [email protected]
+ *
+ *         David Lippi, Comune di Prato, Italy
+ *         [email protected]
+ *
+ *         Gabriele Bartolini, Comune di Prato, Italy
+ *         [email protected]
+ *
+ * USAGE: rtf2html [rtf_filename] 
+ *
+ * BEHAVIOR:
+ *        rtf2html will open the specified RTF input file or read from
+ *        standard input, writing converted HTML to standard output.
+ *
+ * NOTES:
+ *        The RTF document must be formatted with a style sheet that has
+ *        style numberings that conform to the style_mappings table
+ *        defined in this source file. Characters are converted according
+ *        to the ANSI Windows 1252 code or Macintosh.
+ *
+ * MODIFICATIONS:
+ *         6/21/93 : Chuck Shotton - created version 1.0.
+ *        11/26/98 : Dmitry Potapov - version 1.1 beta
+ *        05/07/04 : David Lippi, Gabriele Bartolini - version 1.2
+ *
+ * Copyright (C) 2004 Comune di Prato
+ * 
+ * For copyright details, see the file COPYING in your distribution
+ * or the GNU General Public License (GPL) version 2 or later
+ * <http://www.gnu.org/copyleft/gpl.html>
+ *
+ ************************************************************************/
+
+/* Note, the source is formated with 4 character tabs */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include "charset1252.h"
+#include "charsetmac.h"
+
+#ifdef _MSC_VER
+#	define	strcasecmp _stricmp
+#endif
+
+#ifndef TRUE
+#define TRUE -1
+#define FALSE 0
+#endif
+
+#define MAX_LEVELS 40	/*defines the # of nested in-line styles (pairs of {})*/
+#define MAX_RTF_TOKEN 40
+
+#define MAX_INLINE_STYLES 5 /*defines # of in-line styles, bold, italic, etc.*/
+
+typedef struct tag_StyleState
+{
+	unsigned char s: MAX_INLINE_STYLES;
+} TStyleState;
+
+typedef enum { s_plain, s_bold, s_italic, s_underline, s_hidden, /*in-line styles*/
+	s_para,	s_br,	  /*pseudo style*/
+	s_h0, s_h1, s_h2, s_h3, s_h4, s_h5, s_h6 /*heading styles*/
+} StyleState;
+
+char *styles[][2] = {		/*HTML Start and end tags for styles*/
+	{"", ""},
+	{"<strong>", "</strong>"},
+	{"<em>", "</em>"},
+	{"", ""},
+	{"<!-- ", " -->"},
+	{"\n", "\n"}, /* {"\n<p>", "</p>\n"}, */
+	{"<br />\n",""},
+	{"", ""},
+	{"<h1>", "</h1>"},
+	{"<h2>", "</h2>"},
+	{"<h3>", "</h3>"},
+	{"<h4>", "</h4>"},
+	{"<h5>", "</h5>"},
+	{"<h6>", "</h6>"}
+};
+
+/* style_mappings maps the style numbers in a RTF style sheet into one of the*/
+/* (currently) six paragraph-oriented HTML styles (i.e. heading 1 through 6.)*/
+/* Additional styles for lists, etc. should be added here. Style info        */
+/* ultimately should be read from some sort of config file into these tables.*/
+
+#define MAX_NAME_LEN 40
+char style_name[MAX_NAME_LEN];
+
+#define STYLE_NUMBER 7
+char *style_namings[STYLE_NUMBER] = {
+	"", "heading 1", "heading 2", "heading 3", "heading 4", "heading 5",
+	"heading 6"
+};
+char style_mappings[STYLE_NUMBER][MAX_RTF_TOKEN];
+char style_number[MAX_RTF_TOKEN];
+
+/* RTF tokens that mean something to the parser. All others are ignored. */
+
+typedef enum {
+	t_start,
+	t_fonttbl, t_colortbl, t_stylesheet, t_info, t_s, t_b, t_ul, t_ulw, 
+	t_uld, t_uldb, t_i, t_v, t_plain, t_par, t_pict, t_tab, t_bullet, 
+	t_cell, t_row, t_line, t_endash, t_emdash, t_rquote,
+	t_end
+} TokenIndex;
+
+char *tokens[] = {
+	"###", 
+	"fonttbl", "colortbl", "stylesheet", "info", "s", "b", "ul", "ulw", 
+	"uld", "uldb", "i", "v", "plain", "par", "pict", "tab", "bullet",
+	"cell", "row", "line", "endash", "emdash", "rquote",
+	"###"
+};
+
+TStyleState style_state[MAX_LEVELS], curr_style;
+short curr_heading;
+
+void (*RTF_DoControl)(FILE*,char*,char*);
+char isBody;
+char* title;
+//FILE* f;
+
+short 	level,		/*current {} nesting level*/ 
+	skip_to_level,/*{} level to which parsing should skip (used to skip */ 
+	              /*  font tables, style sheets, color tables, etc.)    */ 
+	gobble,	/*Flag set to indicate all input should be discarded  */ 
+	ignore_styles;/*Set to ignore inline style expansions after style use*/
+
+/* Charset */
+unsigned char** charset_table;
+
+#define CHARSET_DEFAULT 0 // Index of the default charset to use
+#define CHARSET_NUMBER 2 // Number of charset used
+#define CHARSET_MAX_LENGTH 20 // Max numbero of char in the charset
+// metadata used in rtf standard for the  charset definition
+unsigned char *charset[CHARSET_NUMBER] = {
+	"ansi", 
+	"mac"
+};
+// variable with the charset definition
+unsigned char **charset_variablename[CHARSET_NUMBER] = {
+	charset1252, 
+	mac
+};
+
+/**************************************/
+
+int openfile (char * filename, FILE ** f)
+{
+	int rv = 1;
+
+	if (filename) 
+	{
+		if (!(*f = fopen (filename, "r"))) 
+		{
+			fprintf (stderr, "\nError: Input file %s not found.\n", filename);
+			rv = 0;
+		}
+		else
+		{
+			title = filename;
+		}
+	} 
+	else 
+	{
+		*f = stdin;
+		title="STDIN";
+	}
+	return rv;
+}
+
+/**************************************/
+
+int closefile (FILE * f)
+{
+	return fclose (f);
+}
+
+/**************************************/
+
+char RTF_GetChar( FILE* f )
+{
+	char ch;
+	do
+	{
+		ch = fgetc( f );
+	} while ((ch=='\r')||(ch=='\n'));
+	return ch;
+}
+
+/**************************************/
+
+char RTF_UnGetChar(FILE* f, char ch)
+{
+	return ungetc(ch, f);
+}
+
+/**************************************/
+
+void RTF_PutStr(char* s)
+{
+	if (gobble) return;
+	fputs(s, stdout);
+}
+
+/**************************************/
+
+void RTF_PutHeader()
+{
+	RTF_PutStr("<head>\n<title>");
+	RTF_PutStr(title);
+	RTF_PutStr("</title>\n");
+	RTF_PutStr("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\">\n");
+	RTF_PutStr("</head>\n");
+}
+
+/**************************************/
+
+void RTF_PutChar(char ch)
+{
+	if (gobble) return;
+	if (!isBody)
+	{
+		RTF_PutHeader();
+		RTF_PutStr("<body>\n");
+		isBody=TRUE;
+	}
+	switch (ch) {
+		case '<':
+			RTF_PutStr("&lt;");
+			break;
+			
+		case '>':
+			RTF_PutStr("&gt;");
+			break;
+			
+		case '&':
+			RTF_PutStr("&amp;");
+			break;
+		
+		default:
+			fputc(ch, stdout);
+	}
+}
+
+/**************************************/
+
+void RTF_PlainStyle (TStyleState* s)
+{
+	int i;
+	for(i=0;i<MAX_INLINE_STYLES;i++)
+	{
+		if(s->s & (1<<i))
+			RTF_PutStr(styles[i][1]);
+	}
+	s->s=0;
+}
+
+/**************************************/
+
+void RTF_SetStyle(TStyleState* s, StyleState style)
+{
+	if( (!ignore_styles||(style==s_hidden)) && ((s->s&(1<<style))==0) )
+	{
+		RTF_PutStr(styles[style][0]);
+		s->s|=(1<<style);
+	}
+}
+
+/**************************************/
+
+void RTF_PushState(short* level)
+{
+	if(*level>=MAX_LEVELS)
+	{
+		fprintf(stderr,"Exceed maximum level\n");
+		exit(-1);
+	}
+	style_state[*level]=curr_style;
+	(*level)++;
+}
+
+/**************************************/
+
+void RTF_PopState(short* level)
+{
+	int j;
+	TStyleState new_style;
+	
+	if(*level<1)
+	{
+		fprintf(stderr,"RTF parse error: unexpected '}'\n");
+		exit(-1);
+	}
+	new_style = style_state[*level-1];
+	/*close off any in-line styles*/
+	for (j=0;j<MAX_INLINE_STYLES;j++) 
+	{
+		if ( ((curr_style.s & (1<<j))!=0) && ((new_style.s & (1<<j))==0) )
+		{
+			curr_style.s &= ~(1<<j);
+			RTF_PutStr(styles[j][1]);
+		}
+	}
+	
+	for (j=0;j<MAX_INLINE_STYLES;j++) 
+	{
+		if( ((curr_style.s & (1<<j))==0) && ((new_style.s & (1<<j))!=0) )
+			RTF_PutStr(styles[j][0]);
+	}
+	(*level)--;
+	curr_style = new_style;
+
+	if (*level == skip_to_level) {
+		skip_to_level = -1;
+		gobble = FALSE;
+	}
+}
+
+/**************************************/
+/* Map a style number into a HTML heading */
+
+short RTF_MapStyle(char* s)
+{
+	int i;
+	for (i=0;i<7;i++)
+		if (!strcmp(style_mappings[i], s))
+			return (i);
+	return (0);
+}
+
+/**************************************/
+
+void RTF_AddStyleMap(char* name, char* number)
+{
+	int i, len;
+	len=strlen(name);
+	if( name[len-1]==';') name[--len]=0;
+	for(i=0;i<STYLE_NUMBER;i++)
+	{
+		if(!strcasecmp(name,style_namings[i]))
+		{
+			strcpy(style_mappings[i],number);
+			return;
+		}
+	}
+}
+
+/**************************************/
+
+void RTF_BuildName(char* token, char* ch, unsigned is_string)
+{
+	int len;
+	char *p;
+	len = strlen(token);
+	if(len>=MAX_NAME_LEN-1)
+		return;
+	if (is_string)
+	{
+		for (p = ch; p && *p; ++p)
+		{
+			token[len]=*p;
+			++len;
+		}
+	}
+	else
+	{
+		token[len] = *ch;
+		++len;
+	}
+	token[len]='\0';
+}
+
+
+/**************************************/
+
+void RTF_ClearName(char* token)
+{
+	token[0]=0;
+}
+
+/**************************************/
+
+TokenIndex GetTokenIndex(char* control)
+{
+	TokenIndex i;
+
+	for (i=t_start; i<t_end; i++) 
+	{
+		if(control[0]==tokens[i][0]) /* Added for fast compare */
+		{
+			if (!strcmp(control, tokens[i]))
+			{
+				break;
+			}
+		}
+	}
+	return i;
+}
+
+/**************************************/
+
+void RTF_DoStyleControl (FILE* f, char* control, char* arg)
+{
+	if(GetTokenIndex(control)==t_s)
+	{
+		strcpy(style_number,arg);
+	}
+}
+
+/**************************************/
+
+int chartoi(char ch)
+{
+	if((ch>='0')&&(ch<='9'))
+		return ch-'0';
+	if((ch>='A')&&(ch<='Z'))
+		return ch-'A'+10;
+	if((ch>='a')&&(ch<='z'))
+		return ch-'a'+10;
+	return -1;
+}
+
+/**************************************/
+
+void RTF_BuildArg (FILE * f, char ch, char* arg)
+{
+	int i=0;
+
+	if(feof(f))
+	{
+		arg[0]=0;
+		return;
+	}
+	if(ch=='-')
+	{
+		arg[i++]='-';
+		ch = RTF_GetChar( f );
+		if(feof(f))
+		{
+			arg[0]=0;
+			return;
+		}
+	}
+	for(;isdigit(ch);i++)
+	{
+		arg[i]=ch;
+		if(i>=MAX_RTF_TOKEN-1)
+		{
+			arg[MAX_RTF_TOKEN-1]=0;
+			while(isdigit(ch)) {
+				ch = RTF_GetChar( f );
+				if(feof(f))
+					return;
+			} 
+			break;
+		}
+		ch = RTF_GetChar( f );
+		if(feof(f))
+		{
+			arg[i+1]=0;
+			return;
+		}
+	}
+	arg[i]=0;
+	if(!isspace(ch))
+	{
+		RTF_UnGetChar(f, ch);
+	}
+}
+	
+/**************************************/
+
+void RTF_BuildToken (FILE* f, char ch)
+{
+	int i;
+	
+	for(i=1;;i++)
+	{
+		char token[MAX_RTF_TOKEN], arg[MAX_RTF_TOKEN];
+		token[i-1]=ch;
+		if(i>=MAX_RTF_TOKEN-1)
+		{
+			do {
+				ch = RTF_GetChar( f );
+				if(feof(f))
+					return;
+			} while (isalpha(ch)); 	
+			RTF_BuildArg(f, ch,arg);
+			return;
+		}
+		ch = RTF_GetChar( f );
+		if(feof(f))
+		{
+			token[i]=0;
+			RTF_DoControl(f,token,"");
+			return;
+		}
+		if( !isalpha(ch) )
+		{
+			token[i]=0;
+			RTF_BuildArg(f, ch,arg);
+			RTF_DoControl(f,token,arg);
+			return;
+		}
+	}
+}
+
+/**************************************/
+
+void RTF_backslash(FILE* f, char** pch, char* pf)
+{
+	int ch;
+	*pf=FALSE;
+	ch = RTF_GetChar( f );
+	if(feof(f))
+	{
+		fprintf(stderr,"Unexpected end of file\n");
+		return;
+	}
+	switch (ch) 
+	{
+		case '\\':
+			*pch=charset_table[92]; *pf=TRUE;
+			break;
+		case '{':
+			*pch=charset_table[123]; *pf=TRUE;
+			break;
+		case '}':
+			*pch=charset_table[125]; *pf=TRUE;
+			break;
+		case '*':
+			gobble = TRUE;	/*perform no output, ignore commands 'til level-1*/
+			if(skip_to_level>level-1||skip_to_level==-1) 
+				skip_to_level = level-1;
+			break;
+		case '\'':
+		{
+			char ch1, ch2;
+			ch1 = RTF_GetChar( f );
+			ch2 = RTF_GetChar( f );
+			if(!feof(f)) 
+			{
+				if(isxdigit(ch1)&&isxdigit(ch2))
+				{
+					ch = chartoi(ch1)*16+chartoi(ch2);
+					*pch = charset_table[ch-1]; *pf=TRUE;
+				} else {
+					fprintf(stderr,"RTF Error: unexpected '%c%c' after \\\'\n",ch1,ch2);
+				}
+			}
+			break;
+		}
+		default:
+			if (isalpha(ch)) 
+			{
+				RTF_BuildToken(f, ch);
+			} else {
+				fprintf(stderr, "\nRTF Error: unexpected '%c' after \\.\n", ch);
+			}
+			break;
+	}
+}
+
+/**************************************/
+
+void RTF_ParseStyle(FILE * f)
+{
+	char ch, pf;
+	char *code;
+	int level0;
+	void (*PrevDoControl)(FILE*,char*,char*);
+	
+	level0=level;
+	PrevDoControl=RTF_DoControl;
+	RTF_DoControl=RTF_DoStyleControl;
+	
+	RTF_ClearName(style_name);
+	style_number[0]=0;
+	while (1) 
+	{
+		ch = RTF_GetChar( f );
+		if(feof(f))
+			break;
+		switch (ch) 
+		{
+			case '\\':
+				RTF_backslash(f, &code, &pf);
+				if(pf)
+				{
+					RTF_BuildName(style_name, code, 1);
+				} else {
+					RTF_ClearName(style_name);
+				}
+				break;
+			
+			case '{':
+				level++;
+				RTF_ClearName(style_name);
+				break;
+			
+			case '}':
+				if(level0+1==level)
+				{
+					if(style_number[0]!=0)
+					{
+						RTF_AddStyleMap(style_name,style_number);
+						style_number[0]=0;
+					}
+				} else if(level0==level) {
+					RTF_DoControl=PrevDoControl;
+					RTF_UnGetChar(f, ch);
+					return;
+				}
+				level--;
+				RTF_ClearName(style_name); 
+				break;
+				
+			default:
+				RTF_BuildName(style_name, &ch, 0);
+				break;
+		}
+	} /* while */
+}
+
+/**************************************/
+/* Perform actions for RTF control words */
+
+void RTF_DoBodyControl (FILE * f, char* control,char* arg)
+{
+	short style;
+
+	if (gobble) return;
+
+	switch (GetTokenIndex(control)) 
+	{
+		case t_stylesheet:
+			gobble = TRUE;	/*perform no output, ignore commands 'til level-1*/
+			skip_to_level = level-1;
+			RTF_ParseStyle( f );
+			break;
+		case t_fonttbl:	/*skip all of these and their contents!*/
+		case t_colortbl:
+		case t_info:
+			gobble = TRUE;	/*perform no output, ignore commands 'til level-1*/
+			skip_to_level = level-1;
+			break;
+		case t_pict:
+			gobble = TRUE;	/*perform no output, ignore commands 'til level-1*/
+			if(skip_to_level>=level || skip_to_level==-1) 
+				skip_to_level = level-1;
+			break;
+			
+			
+		case t_s: /*Style*/
+			if (!curr_heading) 
+			{
+				style = RTF_MapStyle (arg);
+				if(style)
+				{
+					curr_heading = s_h0 + style;
+					RTF_PutStr(styles[curr_heading][0]);
+					ignore_styles = TRUE;
+				}
+			}
+			break;
+			
+		case t_b: /*Bold*/
+				RTF_SetStyle(&curr_style,s_bold);
+			break;
+			
+		case t_ulw:
+		case t_uld:
+		case t_uldb:
+		case t_ul: /*Underline, maps to "emphasis" HTML style*/
+				RTF_SetStyle(&curr_style,s_underline);
+			break;
+			
+		case t_i: /*Italic*/
+				RTF_SetStyle(&curr_style,s_italic);
+			break;
+			
+		case t_v: /* Hidden*/
+				RTF_SetStyle(&curr_style,s_hidden);
+			break;
+			
+		case t_par: /*Paragraph*/
+			if (curr_heading!=s_plain) {
+				RTF_PutStr(styles[curr_heading][1]);
+				curr_heading = s_plain;
+			} else {
+				RTF_PutStr(styles[s_para][0]);
+			}
+			ignore_styles = FALSE;
+			break;
+			
+		case t_plain: /*reset inline styles*/
+			RTF_PlainStyle(&curr_style);
+			break;
+		case t_cell:
+		case t_tab:
+			RTF_PutChar(' ');
+			break;
+		case t_endash:
+		case t_emdash:
+			RTF_PutChar('-');
+			break;
+		case t_line:
+		case t_row:
+			RTF_PutStr(styles[s_br][0]);
+			break;
+		case t_bullet:
+			RTF_PutChar('\xb7');
+			break;
+		case t_start:
+		case t_end:
+			break;
+		case t_rquote:
+			//RTF_PutStr("&rsquo;");
+			RTF_PutStr("'");
+			break;
+	}
+			
+}
+
+/**************************************/
+/* RTF_Parse is a crude, ugly state machine that understands enough of */
+/* the RTF syntax to be dangerous.                                     */
+
+void RTF_ParseBody( FILE* f )
+{
+	char ch, pf;
+	char* code;
+
+	RTF_DoControl=RTF_DoBodyControl;
+	level = 0;
+	skip_to_level = -1;
+	gobble = FALSE;
+	ignore_styles = FALSE;
+	
+	while (1) 
+	{
+		ch = RTF_GetChar( f );
+		if (feof(f))
+		{
+			break;
+		}
+		switch (ch) 
+		{
+			case '\\':
+				RTF_backslash(f, &code,&pf);
+				if(pf && code)
+					RTF_PutStr(code);
+				break;
+			
+			case '{':
+				RTF_PushState(&level);
+				break;
+			
+			case '}':
+				RTF_PopState(&level);
+				break;
+				
+			default:
+				RTF_PutChar(ch);
+				break;
+		}
+	}/*while*/
+}
+
+/**************************************/
+
+int RTF_Parse (FILE* f)
+{
+	RTF_PutStr("<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML//EN\">\n<html>\n");
+
+	isBody=FALSE;
+
+	RTF_ParseBody(f);
+
+	if (isBody) RTF_PutStr("</body>\n");
+
+	RTF_PutStr("</html>\n");
+
+	return 0;
+}
+
+/**************************************/
+
+void Initialize()
+{
+	int i;
+
+	for (i=0;i<MAX_LEVELS;i++)
+			style_state[i].s=s_plain;
+
+	curr_style.s=s_plain;
+	curr_heading = s_plain;
+
+	// Set default styles maping
+	style_mappings[0][0]=0;
+	for(i=1;i<=6;i++)
+			sprintf(style_mappings[i],"%d",256-i);
+}
+
+/**************************************/
+
+int RTF_FindCharset(FILE * f)
+{
+	char ch;
+	char code[CHARSET_MAX_LENGTH];
+	int metadata = 0;
+	int i = 0;
+
+	while ( !feof(f) )
+	{
+		ch = RTF_GetChar( f );
+		if ( ch == '\\' ) 
+		{
+			metadata++;
+		}
+		if ( metadata == 2 ) // the second metadata is the charset used
+		{
+			if ( ch != '\\' ) 
+			{
+				code[i] = ch;
+				i++;
+			}
+		}
+		if ( metadata > 2 )
+		{
+			code[i] = '\0';
+			break;
+		}
+	}
+
+
+	for ( i = 0; i < CHARSET_NUMBER ; i++)
+	{
+		if ( strcmp( (const char *)charset[i], (const char *) code ) == 0 )
+		{
+			charset_table = charset_variablename[i];
+			break;	
+		};
+	}
+	if ( i == CHARSET_NUMBER )
+	{
+		charset_table = charset_variablename[CHARSET_DEFAULT];
+	}
+
+	return 1; // always true!
+}
+
+/**************************************/
+
+int main(int argc,char** argv)
+{
+	int rv = 0;
+	FILE *f = NULL;
+
+	Initialize();
+	
+	if ( argc > 1)
+	{
+		if( strcmp(argv[1],"--help")==0 || strcmp(argv[1],"-H")==0 )
+		{
+			printf("Use: %s [rtf_filename]\n",argv[0]);
+			rv = 0;
+		} else if ( strcmp(argv[1],"--version")==0 || strcmp(argv[1],"-V")==0 ) {
+			printf("rtf2html version 1.2\n");
+			rv = 0;
+		}
+		else
+		{
+			rv = openfile(argv[1], &f);
+			if ( rv ) rv = RTF_FindCharset(f);
+			if ( rv ) 
+			{
+				rewind(f);
+				rv = RTF_Parse(f);
+			}
+			if ( rv ) rv = closefile(f);
+		}
+	}
+	else
+	{
+		printf("Use: %s [rtf_filename]\n",argv[0]);
+	}
+	return rv;
+}
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/run-robot.sh b/debian/htdig/htdig-3.2.0b6/contrib/run-robot.sh
new file mode 100644
index 00000000..a5884f2c
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/run-robot.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+CfgFile=/www/search.sbs.de/test/conf/htfig.conf
+BinDir=/www/search.sbs.de/test/bin
+CgiBinDir=/www/search.sbs.de/test/cgi-bin
+DataDir=/www/search.sbs.de/data/robot
+Date=`date +%y%m%d`
+
+date > $DataDir/$Date-runtime
+$BinDir/htdig -v -t -s -c $CfgFile >> $DataDir/$Date-robot
+$BinDir/htmerge -v -c $CfgFile >> $DataDir/$Date-robot
+date >> $DataDir/$Date-runtime
+
+$BinDir/whatsnew.pl -v > $DataDir/$Date-whatsnew
+sort $BinDir/urls | uniq > $DataDir/$Date-urls
+
+rm -f $DataDir/current-*
+ln -s $DataDir/$Date-runtime $DataDir/current-runtime
+ln -s $DataDir/$Date-robot $DataDir/current-robot
+ln -s $DataDir/$Date-urls $DataDir/current-urls
+
+$BinDir/status.pl -v > $DataDir/$Date-status
+
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/scriptname/README b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/README
new file mode 100644
index 00000000..9b94ec5d
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/README
@@ -0,0 +1,16 @@
+This is a small example to demonstrate the script_name attribute.
+
+
+Assuming that these files are located within your server's "search"
+directory, just add the following line to your htdig configuration
+file:
+
+script_name:	/search/results.shtml
+
+You may also have to override the standard template files, using the
+search_results_header, search_results_footer, syntax_error_file and
+nothing_found_file attributes.
+
+
+(c) 1999, Hanno Mueller, http://www.hanno.de
+
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/scriptname/results.shtml b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/results.shtml
new file mode 100644
index 00000000..86e09563
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/results.shtml
@@ -0,0 +1,17 @@
+<!--
+  -- script_name example using SSI
+  -- This is the results page.
+  -- Note the server side include directive calling /cgi-bin/htsearch.
+  -- The page's parameters will be handed over to htsearch.
+  -->
+
+<html><head><title>Search results (SHTML)</title></head>
+<body bgcolor="#eef7ff">
+<h2><img src="/htdig/htdig.gif">
+
+<!--#exec cgi="/cgi-bin/htsearch" -->
+
+<hr noshade size=4>
+<a href="http://www.htdig.org">
+<img src="/htdig/htdig.gif" border=0>ht://Dig</a>
+</body></html>
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/scriptname/search.html b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/search.html
new file mode 100644
index 00000000..b6f80e97
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/search.html
@@ -0,0 +1,53 @@
+<!--
+  -- script_name example using SSI
+  -- This is the standard search page (no dynamic stuff),
+  -- with two minor differences: The form uses the "get"
+  -- method and the "action" sends the form input to the
+  -- the dynamic results page.
+  -->
+
+<html>
+<head>
+<title>ht://Dig WWW Search</title>
+</head>
+<body bgcolor="#eef7ff">
+<h1>
+<a href="http://www.htdig.org"><IMG SRC="/htdig/htdig.gif" align=bottom alt="ht://Dig" border=0></a>
+WWW Site Search</H1>
+<hr noshade size=4>
+This search will allow you to search the contents of
+all the publicly available WWW documents at this site.
+<br>
+<p>
+<form method="get" action="/search/results.shtml">
+<font size=-1>
+Match: <select name=method>
+<option value=and>All
+<option value=or>Any
+<option value=boolean>Boolean
+</select>
+Format: <select name=format>
+<option value=builtin-long>Long
+<option value=builtin-short>Short
+</select>
+Sort by: <select name=sort>
+<option value=score>Score
+<option value=time>Time
+<option value=title>Title
+<option value=revscore>Reverse Score
+<option value=revtime>Reverse Time
+<option value=revtitle>Reverse Title
+</select>
+</font>
+<input type=hidden name=config value="htdig-ssi">
+<input type=hidden name=restrict value="">
+<input type=hidden name=exclude value="">
+<br>
+Search:
+<input type="text" size="30" name="words" value="">
+<input type="submit" value="Search">
+</form>
+<hr noshade size=4>
+</body>
+</html>
+
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/NOTE b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/NOTE
new file mode 100644
index 00000000..b6a82833
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/NOTE
@@ -0,0 +1,2 @@
+These are the standard template files, minus the standard start and
+ending of the HTML that is already in the dynamic results page.
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/footer.html b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/footer.html
new file mode 100644
index 00000000..67938f89
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/footer.html
@@ -0,0 +1,2 @@
+$(PAGEHEADER)
+$(PREVPAGE) $(PAGELIST) $(NEXTPAGE)
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/header.html b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/header.html
new file mode 100644
index 00000000..41503364
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/header.html
@@ -0,0 +1,22 @@
+Search results for '$(LOGICAL_WORDS)'</h2>
+<hr noshade size=4>
+<form method="get" action="$(CGI)">
+<font size=-1>
+<input type=hidden name=config value=$(CONFIG)>
+<input type=hidden name=restrict value="$(RESTRICT)">
+<input type=hidden name=exclude value="$(EXCLUDE)">
+Match: $(METHOD)
+Format: $(FORMAT)
+Sort by: $(SORT)
+<br>
+Refine search:
+<input type="text" size="30" name="words" value="$(WORDS)">
+<input type="submit" value="Search">
+</select>
+</font>
+</form>
+<hr noshade size=1>
+<b>Documents $(FIRSTDISPLAYED) - $(LASTDISPLAYED) of $(MATCHES) matches.
+More <img src="/htdig/star.gif" alt="*">'s indicate a better match.
+</b>
+<hr noshade size=1>
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/long.html b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/long.html
new file mode 100644
index 00000000..57ea8dcc
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/long.html
@@ -0,0 +1,6 @@
+<dl><dt><strong><a href="$(URL)">$(TITLE)</a></strong>$(STARSLEFT)
+</dt><dd>$(EXCERPT)<br>
+<i><a href="$(URL)">$(URL)</a></i>
+<font size=-1>$(MODIFIED), $(SIZE) bytes</font>
+</dd></dl>
+
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/nomatch.html b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/nomatch.html
new file mode 100644
index 00000000..840e4098
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/nomatch.html
@@ -0,0 +1,30 @@
+Search results</h1>
+<hr noshade size=4>
+<h2>No matches were found for '$(LOGICAL_WORDS)'</h2>
+<p>
+Check the spelling of the search word(s) you used.
+If the spelling is correct and you only used one word,
+try using one or more similar search words with "<b>Any</b>."
+</p><p>
+If the spelling is correct and you used more than one
+word with "<b>Any</b>," try using one or more similar search
+words with "<b>Any</b>."</p><p>
+If the spelling is correct and you used more than one
+word with "<b>All</b>," try using one or more of the same words
+with "<b>Any</b>."</p>
+<hr noshade size=4>
+<form method="get" action="$(CGI)">
+<font size=-1>
+<input type=hidden name=config value=$(CONFIG)>
+<input type=hidden name=restrict value="$(RESTRICT)">
+<input type=hidden name=exclude value="$(EXCLUDE)">
+Match: $(METHOD)
+Format: $(FORMAT)
+Sort by: $(SORT)
+<br>
+Refine search:
+<input type="text" size="30" name="words" value="$(WORDS)">
+<input type="submit" value="Search">
+</select>
+</font>
+</form>
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/short.html b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/short.html
new file mode 100644
index 00000000..b5044b31
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/short.html
@@ -0,0 +1 @@
+$(STARSRIGHT) <strong><a href="$(URL)">$(TITLE)</a></strong><br>
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/syntax.html b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/syntax.html
new file mode 100644
index 00000000..feddac71
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/scriptname/templates/syntax.html
@@ -0,0 +1,27 @@
+Error in Boolean search for '$(LOGICAL_WORDS)'</h1>
+<hr noshade size=4>
+Boolean expressions need to be 'correct' in order for the search
+system to use them.
+The expression you entered has errors in it.<p>
+Examples of correct expressions are: <b>cat and dog</b>, <b>cat
+not dog</b>, <b>cat or (dog not nose)</b>.<br>Note that
+the operator <b>not</b> has the meaning of 'without'.
+<blockquote><b>
+$(SYNTAXERROR)
+</b></blockquote>
+<hr noshade size=4>
+<form method="get" action="$(CGI)">
+<font size=-1>
+<input type=hidden name=config value=$(CONFIG)>
+<input type=hidden name=restrict value="$(RESTRICT)">
+<input type=hidden name=exclude value="$(EXCLUDE)">
+Match: $(METHOD)
+Format: $(FORMAT)
+Sort: $(SORT)
+<br>
+Refine search:
+<input type="text" size="30" name="words" value="$(WORDS)">
+<input type="submit" value="Search">
+</select>
+</font>
+</form>
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/status.pl b/debian/htdig/htdig-3.2.0b6/contrib/status.pl
new file mode 100755
index 00000000..25ddeda8
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/status.pl
@@ -0,0 +1,258 @@
+#!/usr/local/bin/perl
+#
+# status.pl v1.0  960413 Iain Lea ([email protected])
+#
+# ChangeLog
+# 960413 IL  
+#
+# Produces a HTML 'Search Engine Status' page with last 5 runs
+# and 'Top 10' servers by #URLS indexed.
+#
+# Usage: status.pl [options]
+#        -h       help
+#        -F file  HTML footer 
+#        -H file  HTML header
+#        -o file  HTML generated file
+#        -v       verbose
+#
+# TODO
+
+require 'timelocal.pl';
+require 'getopts.pl';
+require '/www/search.sbs.de/bin/sbs.pl';
+
+$DataDir = '/www/search.sbs.de/data/robot';
+$RunTimeFile = "$DataDir/current-runtime";
+$RobotFile = "$DataDir/current-robot";
+$IndexFile = '/www/search.sbs.de/test/db/db.wordlist';
+
+$DefOutputFile = '/www/search.sbs.de/test/pub/status.html';
+$TmpFile = "/tmp/status.$$";
+$DefFooter = '';
+$DefHeader = '';
+$Verbose = 0;
+$Top10Servers = 10;
+
+&ParseCmdLine;
+
+print "Generating status.html...\n" if $Verbose;
+
+&ReadDataFiles ($RunTimeFile, $RobotFile, $IndexFile);
+&WriteStatus ($DataDir, $DefOutputFile, $DefHeader, $DefFooter);
+
+exit 1;
+
+#############################################################################
+# Subroutines
+#
+
+sub ParseCmdLine
+{
+	&Getopts ('F:hH:o:v');
+
+	if ($opt_h ne "") {
+		print <<EndOfHelp
+Produce an HTML 'Status' page of last 5 runs and Top 10 servers by #URLS.
+
+Usage: $0 [options]
+  -h       help
+  -F file  HTML footer 
+  -H file  HTML header
+  -o file  HTML generated file
+  -v       verbose
+
+EndOfHelp
+;
+		exit 0;
+	}       
+	$DefFooter = $opt_F if ($opt_H ne "");
+	$DefHeader = $opt_H if ($opt_H ne "");
+	$DefOutputFile = $opt_o if ($opt_o ne "");
+	$Verbose = 1 if ($opt_v ne "");
+}
+
+sub ReadDataFiles
+{
+	my ($RunTimeFile, $RobotFile, $IndexFile) = @_;
+	my ($IndexSize, $NumWords, $NumURLS, $NumServers);
+	my ($BegTime, $EndTime, $RunDate, $RunTime, $Key);
+	my (%Months) = (
+		'Jan', '0', 'Feb', '1', 'Mar', '2', 'Apr', '3', 'May',  '4', 'Jun',  '5',
+		'Jul', '6', 'Aug', '7', 'Sep', '8', 'Oct', '9', 'Nov', '10', 'Dec', '11' );
+
+	# RunDate : RunTime
+
+	open (TIME, "$RunTimeFile") || die "Error: $RunTimeFile - $!\n";
+	while (<TIME>) {
+		chop;
+		if (! $EndTime && $BegTime) {
+			# Sat Apr 13 12:57:52 MET DST 1996
+			/^...\ (...)\ ([0-9][0-9])\ (..):(..):(..)\ ... ... ([0-9]{4}$)/;
+			$EndTime = timelocal ($5, $4, $3, $2, $Months{$1}, $6 - 1900);
+			$RunTime = $EndTime - $BegTime;
+			$RunTime = sprintf ("%02d%02d", $RunTime/3600, ($RunTime%3600)/60);
+			print "END=[$_] [$EndTime] [$RunTime]\n" if $Verbose;
+		}
+		if (! $BegTime) {
+			# Sat Apr 13 12:57:52 MET DST 1996
+			/^...\ (...)\ ([0-9][0-9])\ (..):(..):(..)\ ... ... ([0-9]{4}$)/;
+			$Mon = $Months{$1};
+			$Year = $6 - 1900;
+			$BegTime = timelocal ($5, $4, $3, $2, $Mon, $Year);
+			$RunDate = sprintf ("%02d%02d%02d", $Year, $Mon+1, $2);
+			print "BEG=[$_] [$BegTime] [$RunDate]\n" if $Verbose;
+		}
+	}
+	close (TIME);
+
+	# IndexSize : NumWords : NumURLS : NumServers
+
+	@StatData = stat ($IndexFile);
+	$IndexSize = $StatData[7];
+	print "SIZE=[$IndexSize]\n" if $Verbose;
+
+	# NumWords : NumURLS : NumServers
+
+	$NumWords = $NumURLS = $NumServers = 0;
+
+	open (ROBOT, "$RobotFile") || die "Error: $RobotFile - $!\n";
+	while (<ROBOT>) {
+		if (/^htdig:\s+(.*)\s+([0-9]*)\s+documents$/) {
+			$NumURLS += $2;
+			$NumServers++;
+			if ($2 > 0) {
+				$Key = sprintf ("%07d|%s", $2, $1);
+				$Top10ByName{$Key} = $2; 
+			}
+			print "SERVER=[$1] DOCS=[$2]\n" if $Verbose;
+		} elsif (/^Read\s+([0-9]*)\s+words$/) {
+			$NumWords = $1;
+			print "WORDS=[$NumWords]\n" if $Verbose;
+		}
+	}
+	close (ROBOT);
+
+	# Write data to YYMMDD-info file
+
+	$InfoFile = "$DataDir/$RunDate-info";
+	$CurrFile = "$DataDir/current-info";
+
+	open (INFO, ">$InfoFile") || die "Error: $InfoFile - $!\n";
+	print "$RunDate:$RunTime:$IndexSize:$NumWords:$NumURLS:$NumServers\n" if $Verbose;
+	print INFO "$RunDate:$RunTime:$IndexSize:$NumWords:$NumURLS:$NumServers\n";
+	close (INFO);
+	unlink ($CurrFile);
+	symlink ($InfoFile, $CurrFile);
+}
+
+sub WriteStatus
+{
+	my ($DataDir, $OutFile, $Header, $Footer) = @_;
+
+	$RobotInfo = &ReadRobotInfo ("$DataDir/current-info");
+
+	open (HTML, ">$OutFile") || die "Error: $OutFile - $!\n";
+
+	&PrintBoilerPlate ($Header, 1);
+
+	print HTML <<EOT
+<p>
+<strong>$RobotInfo</strong>
+<p>
+<table border=2 width=400>
+<caption>Table of last 5 robot runs.</caption>
+<th>Run Date<th>Run Time<th># Servers<th># URL's<th># Words<th>Index (MB)
+<tr>
+EOT
+;
+	# read YYMMDD-info files
+	opendir (DIR, $DataDir) || die "Error: $DataDir - $!\n";
+	@InfoFiles = grep (/^[0-9]{6}-info$/, readdir (DIR));
+	closedir (DIR);
+	@InfoFiles = reverse (sort (@InfoFiles));
+
+	@InfoFiles = @InfoFiles[0,1,2,3,4]; 
+	foreach $File (@InfoFiles) {
+		$File = "$DataDir/$File";
+		open (INFO, "$File") || die "Error: $File - $!\n";
+		chop (($_ = <INFO>));
+		($RunDate, $RunTime, $IndexSize, $NumWords, $NumURLS, $NumServers) = split (':');
+		$IndexSize = sprintf ("%.1f", $IndexSize / (1024*1024));
+		$RunTime =~ /(..)(..)/;
+		$RunTime = "$1:$2";
+		print HTML <<EOT
+<td align="center">$RunDate</td>
+<td align="center">$RunTime</td>
+<td align="right">$NumServers</td>
+<td align="right">$NumURLS</td>
+<td align="right">$NumWords</td>
+<td align="right">$IndexSize</td>
+<tr>
+EOT
+;
+		close (INFO);
+	}
+
+	print HTML <<EOT
+</table>
+<p>
+<p>
+<table border=2 width=400>
+<caption>Table of Top 10 servers listed by number of indexed documents.</caption>
+<th>Top 10 Servers<th># URL's
+<tr>
+EOT
+;
+	$NumServers = 0;
+	foreach $Key (reverse (sort (keys (%Top10ByName)))) {
+		if ($NumServers < $Top10Servers) {
+			$NumServers++;
+			$NumURLS = $Top10ByName{$Key};
+			$Key =~ /^[0-9]*\|(.*)$/;
+			$Server = $1;		
+			$Server =~ s/:80$//;		
+			print HTML <<EOT
+<td width="80%" align="left"><a href="http://$Server/">$Server</a></td>
+<td width="20%" align="right">$NumURLS</td>
+<tr>
+EOT
+;
+		}
+	}
+
+	print HTML "</table>\n";
+
+	&PrintBoilerPlate ($Footer, 0);
+
+	close (HTML);
+}
+
+sub PrintBoilerPlate
+{
+	my ($File, $IsHeader) = @_;
+
+	if ($File ne "" && -e $File) { 
+		open (FILE, $File) || die "Error: $File - $!\n";
+		while (<FILE>) {
+			print HTML;
+		}
+		close (FILE);
+	} else {
+		if ($IsHeader) {
+			print HTML <<EOT
+<html>
+<head>
+<title>Search Engine Status</title>
+</head>
+<body>
+<h2>Search Engine Status</h2>
+<hr>
+<p>
+EOT
+;
+		} else {
+			&PrintFooterHTML;
+		}
+	}
+}
+
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/urlindex/urlindex.pl b/debian/htdig/htdig-3.2.0b6/contrib/urlindex/urlindex.pl
new file mode 100755
index 00000000..436c5eef
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/urlindex/urlindex.pl
@@ -0,0 +1,285 @@
+#!/usr/local/bin/perl
+
+##
+## urlindex.pl  (C) 1995 Andrew Scherpbier
+##
+## This program will build an index of all the URLs in the
+## htdig document database.
+##
+
+use GDBM_File;
+require('SDSU_www.pl');
+
+$dbfile = "/gopher/www/htdig/sdsu3d.docdb";
+$dbfile = "/tmp/db.docdb";
+$exclude = "rohan.sdsu.edu\\/home\\/";
+
+tie(%docdb, GDBM_File, $dbfile, GDBM_READER, 0) || die "Unable to open $dbfile: $!";
+
+print "Reading...\n";
+
+##
+## Read in all the relevant data.
+##
+while (($key, $value) = each %docdb)
+{
+    next if $key =~ /^nextDocID/;
+    %record = parse_ref_record($value);
+    next if $record{"STATE"} eq 1;
+    next if $key =~ /$exclude/;
+
+    $title = $record{"TITLE"};
+    
+    ##
+    ## Get rid of starting and trailing whitespace junk
+    ##
+    $title =~ s/^[ \t\n\r]*//;
+    $title =~ s/[ \t\n\r]*$//;
+    
+    ##
+    ## If the title starts with 'the', it will be taken out and added
+    ## to the end of the title.  This means that a title like "The
+    ## Homepage of X" will become "Homepage of X, The"
+    ##
+    if ($title =~ /^the /i)
+    {
+	$title = substr($title, 4) . ", " . substr($title, 0, 3);
+    }
+    if ($title =~ /^SDSU /)
+    {
+	$title = substr($title, 5) . ", " . substr($title, 0, 4);
+    }
+    if ($title =~ /^San Diego State University /i)
+    {
+	$title = substr($title, 27) . ", " . substr($title, 0, 26);
+    }
+    $value = $title;
+    $value =~ tr/A-Z/a-z/;
+    $titles{$value} = "$title\001$key";
+    push(@unsorted, $value);
+}
+
+$current = " ";
+open(M, ">index.html");
+print M "<html><head><title>Index of all documents at SDSU</title></head>\n";
+print M "<body>\n";
+print M &www_logo_2("Index of all documents at SDSU");
+print M "<p>This is a list of WWW documents that were found while indexing all\n";
+print M "the publicly available WWW servers at San Diego State University.\n";
+print M "The documents are indexed by their titles.\n";
+print M "</p><h2>\n";
+
+$previous = "";
+
+print "Writing...\n";
+
+foreach $value (sort @unsorted)
+{
+    next if $value eq $previous;
+    $previous = $value;
+    next if !($value =~ /^[a-zA-Z]/);
+
+    ($title, $url) = split('\001', $titles{$value}, 2);
+
+    $first = substr($title, 0, 1);
+    if ($current =~ /$first/i)
+    {
+	print F "<li><a href=\"$url\">$title</a></li>\n";
+    }
+    else
+    {
+	##
+	## New letter.  Open a new file for it
+	##
+	$current = $first;
+	$current =~ tr/a-z/A-Z/;
+	print F "</li></body></html>\n";
+	close(F);
+	open(F, ">index$current.html");
+	print F "<html><head><title>Index for $current</title></head>\n";
+	print F "<body>\n";
+	print F &www_logo_2("Index for $current");
+	print F "<ul>\n";
+	print F "<li><a href=\"$url\">$title</a></li>\n";
+
+	##
+	## Add a reference to the main index for this letter
+	##
+	print M " <a href=\"index$current.html\">$current</a>\n";
+
+	print "Index of $current\n";
+    }
+}
+
+close(F);
+
+print M "</h2></body></html>\n";
+close(M);
+
+
+sub parse_ref_record
+{
+    local($value) = @_;
+    local(%rec, $length, $count, $result);
+
+    while (length($value) > 0)
+    {
+	$what = unpack("C", $value);
+	$value = substr($value, 1);
+	if ($what == 0)
+	{
+	    # ID
+	    $rec{"ID"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 1)
+	{
+	    # TIME
+	    $rec{"TIME"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 2)
+	{
+	    # ACCESSED
+	    $rec{"ACCESSED"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 3)
+	{
+	    # STATE
+	    $rec{"STATE"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 4)
+	{
+	    # SIZE
+	    $rec{"SIZE"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 5)
+	{
+	    # LINKS
+	    $rec{"LINKS"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 6)
+	{
+	    # IMAGESIZE
+	    $rec{"IMAGESIZE"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 7)
+	{
+	    # HOPCOUNT
+	    $rec{"HOPCOUNT"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 8)
+	{
+	    # URL
+	    $length = unpack("i", $value);
+	    $rec{"URL"} = unpack("x4 A$length", $value);
+	    $value = substr($value, 4 + $length);
+	}
+	elsif ($what == 9)
+	{
+	    # HEAD
+	    $length = unpack("i", $value);
+	    $rec{"HEAD"} = unpack("x4 A$length", $value);
+	    $value = substr($value, 4 + $length);
+	}
+	elsif ($what == 10)
+	{
+	    # TITLE
+	    $length = unpack("i", $value);
+	    $rec{"TITLE"} = unpack("x4 A$length", $value);
+	    $value = substr($value, 4 + $length);
+	}
+	elsif ($what == 11)
+	{
+	    # DESCRIPTIONS
+	    $count = unpack("i", $value);
+	    $value = substr($value, 4);
+	    $result = "";
+	    foreach (1 .. $count)
+	    {
+		$length = unpack("i", $value);
+		$result = $result . unpack("x4 A$length", $value) . "";
+		$value = substr($value, 4 + $length);
+	    }
+	    chop $result;
+	    $rec{"DESCRIPTIONS"} = $result;
+	}
+	elsif ($what == 12)
+	{
+	    # ANCHORS
+	    $count = unpack("i", $value);
+	    $value = substr($value, 4);
+	    $result = "";
+	    foreach (1 .. $count)
+	    {
+		$length = unpack("i", $value);
+		$result = $result . unpack("x4 A$length", $value) . "";
+		$value = substr($value, 4 + $length);
+	    }
+	    chop $result;
+	    $rec{"ANCHORS"} = $result;
+	}
+	elsif ($what == 13)
+	{
+	    # EMAIL
+	    $length = unpack("i", $value);
+	    $rec{"EMAIL"} = unpack("x4 A$length", $value);
+	    $value = substr($value, 4 + $length);
+	}
+	elsif ($what == 14)
+	{
+	    # NOTIFICATION
+	    $length = unpack("i", $value);
+	    $rec{"NOTIFICATION"} = unpack("x4 A$length", $value);
+	    $value = substr($value, 4 + $length);
+	}
+	elsif ($what == 15)
+	{
+	    # SUBJECT
+	    $length = unpack("i", $value);
+	    $rec{"SUBJECT"} = unpack("x4 A$length", $value);
+	    $value = substr($value, 4 + $length);
+	}
+	elsif ($what == 16)
+	{
+	    # STRING (ignore, but unpack)
+	    $length = unpack("i", $value);
+	    $rec{"STRING"} = unpack("x4 A$length", $value);
+	    $value = substr($value, 4 + $length);
+	}
+	elsif ($what == 17)
+	{
+	    # METADSC
+	    $length = unpack("i", $value);
+	    $rec{"METADSC"} = unpack("x4 A$length", $value);
+	    $value = substr($value, 4 + $length);
+	}
+	elsif ($what == 18)
+	{
+	    # BACKLINKS
+	    $rec{"BACKLINKS"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 19)
+	{
+	    # SIGNATURE
+	    $rec{"SIG"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+    }
+    print "title = $rec{'TITLE'}\n";
+    return %rec;
+}
+
+
+
+
+
+
+
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/whatsnew/whatsnew.pl b/debian/htdig/htdig-3.2.0b6/contrib/whatsnew/whatsnew.pl
new file mode 100755
index 00000000..e27e744c
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/whatsnew/whatsnew.pl
@@ -0,0 +1,365 @@
+#!/usr/local/bin/perl
+#
+# whatsnew.pl v1.1  (C) 1996 Iain Lea
+# modified 26 Oct 1998 (c) 1998 Jacques Reynes
+#
+# ChangeLog
+# 960321 IL  Reversed sorting to show newest documents first
+# 981026 JR  Modified to work with Berkeley DB2.
+# 980204 GRH Modified to work with changes in ht://Dig db format
+#
+# Produces a HTML 'Whats New' page with custom header and footer.
+#
+#   Title
+#   Descriptions
+#   URL
+#   Last modification date (in ctime format)
+#
+# The date is specified as yyyymmdd
+#
+# Usage: whatsnew.pl [options]
+#        -h       help
+#        -d date  base date [default: $DefDate]
+#        -n days  list documents newer than days old [default: $DefDays]
+#        -f file  database index [default: $DefIndex]
+#        -F file  HTML footer 
+#        -H file  HTML header
+#        -o file  HTML generated file
+#        -v       verbose
+
+use BerkeleyDB;
+require 'timelocal.pl';
+require 'getopts.pl';
+
+$DefIndex = '    your data base  .docdb';
+$DefOutputFile = ' your result file URL created in your web server  whatsnew.html';
+$TmpFile = "/tmp/whatsnew.$$";
+$DefFooter = '';
+$DefHeader = '';
+$Verbose = 0;
+$NewNum = 0;
+$DefDays = 3;
+chop (($DefDate = '19'.`date +%y%m%d`));
+
+&ParseCmdLine;
+
+$DefDate =~ /([0-9]{4})([0-9]{2})([0-9]{2})/;
+$When = timelocal (0, 0, 0, $3, $2 - 1, $1 - 1900)- ($DefDays * 86400);
+$NewDate = localtime ($When);
+$dbfile = $DefIndex;
+
+print "Generating 'Whats New' for documents newer than '$NewDate'...\n" if $Verbose;
+
+&ReadDatabase ($DefIndex, $TmpFile);
+&WriteWhatsNew ($TmpFile, $DefOutputFile, $DefHeader, $DefFooter);
+
+exit 1;
+
+#############################################################################
+# Subroutines
+#
+
+sub ParseCmdLine
+{
+	&Getopts ('d:f:F:hH:n:o:v');
+
+	if ($opt_h ne "") {
+		print <<EndOfHelp
+Produce an HTML 'Whats New' page with custom header & footer for database.
+
+Usage: $0 [options]
+  -h       help
+  -d date  base date [default: $DefDate]
+  -n days  list documents newer than days old [default: $DefDays]
+  -f file  database index [default: $DefIndex]
+  -F file  HTML footer 
+  -H file  HTML header
+  -o file  HTML generated file
+  -v       verbose
+
+EndOfHelp
+;
+		exit 0;
+	}       
+	$DefDate = $opt_d if ($opt_d ne "");
+	$DefDays = $opt_n if ($opt_n ne "");
+	$DefIndex = $opt_f if ($opt_f ne "");
+	$DefFooter = $opt_F if ($opt_H ne "");
+	$DefHeader = $opt_H if ($opt_H ne "");
+	$DefOutputFile = $opt_o if ($opt_o ne "");
+	$Verbose = 1 if ($opt_v ne "");
+}
+
+sub ReadDatabase
+{
+	my ($Index, $TmpFile) = @_;
+
+	tie %docdb, 'BerkeleyDB::Btree', -Filename => $Index, -Flags => DB_RDONLY || die "Error: $Index - $!";
+
+	open (TMP, ">$TmpFile") || die "Error: $TmpFile - $!\n";
+
+	while (($key, $value) = each %docdb)
+	{
+	    next if $key =~ /^nextDocID/;
+		%rec = parse_ref_record ($value);
+		if ($rec{'TIME'} >= $When)
+		{
+			$Line = "$rec{'TIME'}|$rec{'URL'}|$rec{'TITLE'}|$rec{'DESCRIPTIONS'}\n";
+			print $Line if $Verbose;
+			print TMP $Line;
+			$NewNum++;
+		}
+	}
+
+	close (TMP);
+}
+
+sub WriteWhatsNew
+{
+	my ($InFile, $OutFile, $Header, $Footer) = @_;
+
+	open (URLS, "sort -r $InFile |") || die "Error: $InFile - $!\n";
+	open (HTML, ">$OutFile") || die "Error: $OutFile - $!\n";
+
+	&PrintBoilerPlate ($Header, 1);
+
+	while (<URLS>) {
+		chop;
+		($Time, $URL, $Title, $Description) = split ('\|');
+		$Ctime = localtime ($Time);
+		if ($Verbose) {
+		print <<EOT
+Title:       $Title
+Description: $Description
+URL:         $URL
+Modified:    $Ctime
+
+EOT
+;
+		}
+		print HTML <<EOT
+<strong>Title:</strong>       <a href="$URL">$Title</a>
+<strong>Description:</strong> $Description
+<strong>URL:</strong>         $URL
+<strong>Modified:</strong>    $Ctime
+
+EOT
+;
+	}
+
+	&PrintBoilerPlate ($Footer, 0);
+
+	close (HTML);
+	close (URLS);
+
+	unlink ($InFile);
+}
+
+sub PrintBoilerPlate
+{
+	my ($File, $IsHeader) = @_;
+
+	if ($File ne "" && -e $File) { 
+		open (FILE, $File) || die "Error: $File - $!\n";
+		while (<FILE>) {
+			print HTML;
+		}
+		close (FILE);
+	} else {
+		if ($IsHeader) {
+			print HTML <<EOT
+<html>
+<head>
+<title>Whats New!</title>
+</head>
+<body>
+<h2>Whats New!</h2>
+<center>
+<a href="/whatsnew.html"><img src="/new.gif"></a>
+<a href="/"><img src="/home.gif"></a>
+<a href="/intranet.html"><img src="/search.gif"></a>
+<a href="mailto:Iain.Lea\@sbs.de"><img src="/contact.gif"></a>
+</center>
+<hr>
+<strong>Found $NewNum documents newer than '$NewDate'</strong>
+<pre>
+EOT
+;
+		} else {
+			print HTML <<EOT
+</pre>
+<hr>
+<center>
+<a href="/whatsnew.html"><img src="/new.gif"></a>
+<a href="/"><img src="/home.gif"></a>
+<a href="/intranet.html"><img src="/search.gif"></a>
+<a href="mailto:Iain.Lea\@sbs.de"><img src="/contact.gif"></a>
+</center>
+</body>
+</html>
+EOT
+;
+		}
+	}
+}
+
+
+sub parse_ref_record
+{
+    local($value) = @_;
+    local(%rec, $length, $count, $result);
+
+    while (length($value) > 0)
+    {
+	$what = unpack("C", $value);
+	$value = substr($value, 1);
+	if ($what == 0)
+	{
+	    # ID
+	    $rec{"ID"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 1)
+	{
+	    # TIME
+	    $rec{"TIME"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 2)
+	{
+	    # ACCESSED
+	    $rec{"ACCESSED"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 3)
+	{
+	    # STATE
+	    $rec{"STATE"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 4)
+	{
+	    # SIZE
+	    $rec{"SIZE"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 5)
+	{
+	    # LINKS
+	    $rec{"LINKS"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 6)
+	{
+	    # IMAGESIZE
+	    $rec{"IMAGESIZE"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 7)
+	{
+	    # HOPCOUNT
+	    $rec{"HOPCOUNT"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 8)
+	{
+	    # URL
+	    $length = unpack("i", $value);
+	    $rec{"URL"} = unpack("x4 A$length", $value);
+	    $value = substr($value, 4 + $length);
+	}
+	elsif ($what == 9)
+	{
+	    # HEAD
+	    $length = unpack("i", $value);
+	    $rec{"HEAD"} = unpack("x4 A$length", $value);
+	    $value = substr($value, 4 + $length);
+	}
+	elsif ($what == 10)
+	{
+	    # TITLE
+	    $length = unpack("i", $value);
+	    $rec{"TITLE"} = unpack("x4 A$length", $value);
+	    $value = substr($value, 4 + $length);
+	}
+	elsif ($what == 11)
+	{
+	    # DESCRIPTIONS
+	    $count = unpack("i", $value);
+	    $value = substr($value, 4);
+	    $result = "";
+	    foreach (1 .. $count)
+	    {
+		$length = unpack("i", $value);
+		$result = $result . unpack("x4 A$length", $value) . "";
+		$value = substr($value, 4 + $length);
+	    }
+	    chop $result;
+	    $rec{"DESCRIPTIONS"} = $result;
+	}
+	elsif ($what == 12)
+	{
+	    # ANCHORS
+	    $count = unpack("i", $value);
+	    $value = substr($value, 4);
+	    $result = "";
+	    foreach (1 .. $count)
+	    {
+		$length = unpack("i", $value);
+		$result = $result . unpack("x4 A$length", $value) . "";
+		$value = substr($value, 4 + $length);
+	    }
+	    chop $result;
+	    $rec{"ANCHORS"} = $result;
+	}
+	elsif ($what == 13)
+	{
+	    # EMAIL
+	    $length = unpack("i", $value);
+	    $rec{"EMAIL"} = unpack("x4 A$length", $value);
+	    $value = substr($value, 4 + $length);
+	}
+	elsif ($what == 14)
+	{
+	    # NOTIFICATION
+	    $length = unpack("i", $value);
+	    $rec{"NOTIFICATION"} = unpack("x4 A$length", $value);
+	    $value = substr($value, 4 + $length);
+	}
+	elsif ($what == 15)
+	{
+	    # SUBJECT
+	    $length = unpack("i", $value);
+	    $rec{"SUBJECT"} = unpack("x4 A$length", $value);
+	    $value = substr($value, 4 + $length);
+	}
+	elsif ($what == 16)
+	{
+	    # STRING (ignore, but unpack)
+	    $length = unpack("i", $value);
+	    $rec{"STRING"} = unpack("x4 A$length", $value);
+	    $value = substr($value, 4 + $length);
+	}
+	elsif ($what == 17)
+	{
+	    # METADSC
+	    $length = unpack("i", $value);
+	    $rec{"METADSC"} = unpack("x4 A$length", $value);
+	    $value = substr($value, 4 + $length);
+	}
+	elsif ($what == 18)
+	{
+	    # BACKLINKS
+	    $rec{"BACKLINKS"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+	elsif ($what == 19)
+	{
+	    # SIGNATURE
+	    $rec{"SIG"} = unpack("i", $value);
+	    $value = substr($value, 4);
+	}
+    }
+    return %rec;
+}
+
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/wordfreq/wordfreq.html b/debian/htdig/htdig-3.2.0b6/contrib/wordfreq/wordfreq.html
new file mode 100644
index 00000000..164b8e5f
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/wordfreq/wordfreq.html
@@ -0,0 +1,16 @@
+<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
+<html> <head>
+<title></title>
+</head>
+
+<body>
+<h1></h1>
+
+
+
+<hr>
+<address><a href="http://www.sdsu.edu/~turtle/">Andrew Scherpbier &lt;[email protected]&gt;</a></address>
+<!-- hhmts start -->
+Last modified: Wed Jul  5 10:26:36 PDT 1995
+<!-- hhmts end -->
+</body> </html>
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/wordfreq/wordfreq.pl b/debian/htdig/htdig-3.2.0b6/contrib/wordfreq/wordfreq.pl
new file mode 100755
index 00000000..31402a23
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/wordfreq/wordfreq.pl
@@ -0,0 +1,54 @@
+#!/usr/local/bin/perl 
+
+use GDBM_File;
+use BerkeleyDB;
+
+##
+## wordfreq.pl
+## (C) 1995 Andrew Scherpbier <[email protected]>
+##
+## Will generate a list of words and how frequently they are used
+##
+## updated to deal with Berkeley db files 1998 Iosif Fettich <[email protected]>
+##
+
+
+$filetype = 'DB';
+
+if (not defined $ARGV[0] or defined ($ARGV[1]) and $ARGV[1] !~ /g/i) {
+   print "\n\nThis program is used in conjunction with ht://Dig \n";
+   print "to determine the frequency of words in a database containing word references.\n\n";
+   print "Usage: $0 filename         (to use a Berkeley db2 wordlist)\n";
+   print "       $0 filename g[dbm]  (to use a GDBM wordlist)\n\n\n";
+   exit;
+}
+
+$filename = $ARGV[0];
+
+if ($filename =~ /gdbm$/i or $ARGV[1] =~ /g/i) {
+  $filetype = 'GDBM';
+}
+
+if ($filetype eq 'GDBM') {
+   tie %worddb, 'GDBM_File', $ARGV[0], GDBM_READER, 0
+       or die "Unable to open $ARGV[0] $!";
+} else {
+   tie %worddb, 'BerkeleyDB::Btree',
+              -Filename  => $filename,
+              -Flags     => DB_RDONLY
+       or die "Cannot open file $filename: $! $BerkeleyDB::Error\n" ;
+}
+
+while (($key, $value) = each %worddb)
+{
+    $length = length($value) / 20;
+    $total = 0;
+    foreach $i (0 .. $length - 1)
+    {
+	($count, $id, $weight, $anchor, $location) =
+	    unpack("i i i i i", substr($value, $i * 20, 20));
+	$total += $count;
+    }
+    print "$total\t$key\n";
+}
+