summaryrefslogtreecommitdiffstats
path: root/debian/htdig/htdig-3.2.0b6/contrib/changehost/changehost.pl
diff options
context:
space:
mode:
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/contrib/changehost/changehost.pl')
-rwxr-xr-xdebian/htdig/htdig-3.2.0b6/contrib/changehost/changehost.pl298
1 files changed, 298 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/changehost/changehost.pl b/debian/htdig/htdig-3.2.0b6/contrib/changehost/changehost.pl
new file mode 100755
index 00000000..3bd6c44d
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/contrib/changehost/changehost.pl
@@ -0,0 +1,298 @@
+#!/usr/local/bin/perl
+
+##
+## changehost.pl (C) 1995 Andrew Scherpbier
+##
+## This program will change hostnames of URLs in the document database and index.
+##
+## usage:
+## changehost.pl database_base from to
+##
+## example:
+## changehost.pl /opt/www/htdig/sdsu www.sdsu.edu www.northpole.net
+##
+## Two new database will be created with a base of '/tmp/new'.
+## These databases can then be used by htsearch.
+##
+
+use GDBM_File;
+
+$base = $ARGV[0];
+$from = $ARGV[1];
+$to = $ARGV[2];
+
+$dbfile = "$base.docdb";
+$newfile = "/tmp/new.docdb";
+
+##
+## Convert the document database first.
+##
+tie(%newdb, GDBM_File, $newfile, GDBM_NEWDB, 0644) || die "$newfile: '$!'";
+tie(%docdb, GDBM_File, $dbfile, GDBM_READER, 0) || die "$dbfile: $!";
+
+
+while (($key, $value) = each %docdb)
+{
+ if ($key =~ /http:\/\/$from/i)
+ {
+ %record = parse_ref_record($value);
+ $key =~ s/http:\/\/$from/http:\/\/$to/i;
+ print "$key\n";
+ $t = $record{"URL"};
+ $t =~ s/http:\/\/$from/http:\/\/$to/i;
+ $record{"URL"} = $t;
+
+ $value = create_ref_record(%record);
+ }
+
+ $newdb{$key} = $value;
+}
+
+untie %newdb;
+untie %docdb;
+
+##
+## Now create the document index
+##
+$newfile = "/tmp/new.docs.index";
+$dbfile = "$base.docs.index";
+
+tie(%newdb, GDBM_File, $newfile, GDBM_NEWDB, 0644) || die "$newfile: '$!'";
+tie(%docdb, GDBM_File, $dbfile, GDBM_READER, 0) || die "$dbfile: $!";
+
+while (($key, $value) = each %docdb)
+{
+ if ($value =~ /http:\/\/$from/i)
+ {
+ $value =~ s/http:\/\/$from/http:\/\/$to/i;
+ }
+ $newdb{$key} = $value;
+}
+
+untie %newdb;
+untie %docdb;
+
+######################################################################
+sub create_ref_record
+{
+ local(%rec) = @_;
+ local($s);
+
+ if (exists $rec{"ID"})
+ {
+ $s .= pack("Ci", 0, $rec{"ID"});
+ }
+ if (exists $rec{"TIME"})
+ {
+ $s .= pack("Ci", 1, $rec{"TIME"});
+ }
+ if (exists $rec{"ACCESSED"})
+ {
+ $s .= pack("Ci", 2, $rec{"ACCESSED"});
+ }
+ if (exists $rec{"STATE"})
+ {
+ $s .= pack("Ci", 3, $rec{"STATE"});
+ }
+ if (exists $rec{"SIZE"})
+ {
+ $s .= pack("Ci", 4, $rec{"SIZE"});
+ }
+ if (exists $rec{"LINKS"})
+ {
+ $s .= pack("Ci", 5, $rec{"LINKS"});
+ }
+ if (exists $rec{"IMAGESIZE"})
+ {
+ $s .= pack("Ci", 6, $rec{"IMAGESIZE"});
+ }
+ if (exists $rec{"HOPCOUNT"})
+ {
+ $s .= pack("Ci", 7, $rec{"HOPCOUNT"});
+ }
+ if (exists $rec{"URL"})
+ {
+ $s .= pack("Ci", 8, length($rec{"URL"}));
+ $s .= $rec{"URL"};
+ }
+ if (exists $rec{"HEAD"})
+ {
+ $s .= pack("Ci", 9, length($rec{"HEAD"}));
+ $s .= $rec{"HEAD"};
+ }
+ if (exists $rec{"TITLE"})
+ {
+ $s .= pack("Ci", 10, length($rec{"TITLE"}));
+ $s .= $rec{"TITLE"};
+ }
+ if (exists $rec{"DESCRIPTIONS"})
+ {
+ @v = split('', $rec{"DESCRIPTIONS"});
+ $s .= pack("Ci", 11, $#v - 1);
+ foreach (@v)
+ {
+ $s .= pack("i", length($_));
+ $s .= $_;
+ }
+ }
+ if (exists $rec{"ANCHORS"})
+ {
+ @v = split('', $rec{"ANCHORS"});
+ $s .= pack("Ci", 12, $#v - 1);
+ foreach (@v)
+ {
+ $s .= pack("i", length($_));
+ $s .= $_;
+ }
+ }
+ if (exists $rec{"EMAIL"})
+ {
+ $s .= pack("Ci", 13, length($rec{"EMAIL"}));
+ $s .= $rec{"EMAIL"};
+ }
+ if (exists $rec{"NOTIFICATION"})
+ {
+ $s .= pack("Ci", 14, length($rec{"NOTIFICATION"}));
+ $s .= $rec{"NOTIFICATION"};
+ }
+ if (exists $rec{"SUBJECT"})
+ {
+ $s .= pack("Ci", 15, length($rec{"SUBJECT"}));
+ $s .= $rec{"SUBJECT"};
+ }
+
+ return $s;
+}
+
+sub parse_ref_record
+{
+ local($value) = @_;
+ local(%rec, $length, $count, $result);
+
+ while (length($value) > 0)
+ {
+ $what = unpack("C", $value);
+ $value = substr($value, 1);
+ if ($what == 0)
+ {
+ # ID
+ $rec{"ID"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 1)
+ {
+ # TIME
+ $rec{"TIME"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 2)
+ {
+ # ACCESSED
+ $rec{"ACCESSED"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 3)
+ {
+ # STATE
+ $rec{"STATE"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 4)
+ {
+ # SIZE
+ $rec{"SIZE"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 5)
+ {
+ # LINKS
+ $rec{"LINKS"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 6)
+ {
+ # IMAGESIZE
+ $rec{"IMAGESIZE"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 7)
+ {
+ # HOPCOUNT
+ $rec{"HOPCOUNT"} = unpack("i", $value);
+ $value = substr($value, 4);
+ }
+ elsif ($what == 8)
+ {
+ # URL
+ $length = unpack("i", $value);
+ $rec{"URL"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 9)
+ {
+ # HEAD
+ $length = unpack("i", $value);
+ $rec{"HEAD"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 10)
+ {
+ # TITLE
+ $length = unpack("i", $value);
+ $rec{"TITLE"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 11)
+ {
+ # DESCRIPTIONS
+ $count = unpack("i", $value);
+ $value = substr($value, 4);
+ $result = "";
+ foreach (1 .. $count)
+ {
+ $length = unpack("i", $value);
+ $result = $result . unpack("x4 A$length", $value) . "";
+ $value = substr($value, 4 + $length);
+ }
+ chop $result;
+ $rec{"DESCRIPTIONS"} = $result;
+ }
+ elsif ($what == 12)
+ {
+ # ANCHORS
+ $count = unpack("i", $value);
+ $value = substr($value, 4);
+ $result = "";
+ foreach (1 .. $count)
+ {
+ $length = unpack("i", $value);
+ $result = $result . unpack("x4 A$length", $value) . "";
+ $value = substr($value, 4 + $length);
+ }
+ chop $result;
+ $rec{"ANCHORS"} = $result;
+ }
+ elsif ($what == 13)
+ {
+ # EMAIL
+ $length = unpack("i", $value);
+ $rec{"EMAIL"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 14)
+ {
+ # NOTIFICATION
+ $length = unpack("i", $value);
+ $rec{"NOTIFICATION"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ elsif ($what == 15)
+ {
+ # SUBJECT
+ $length = unpack("i", $value);
+ $rec{"SUBJECT"} = unpack("x4 A$length", $value);
+ $value = substr($value, 4 + $length);
+ }
+ }
+ return %rec;
+}