diff options
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/contrib/changehost/changehost.pl')
-rwxr-xr-x | debian/htdig/htdig-3.2.0b6/contrib/changehost/changehost.pl | 298 |
1 files changed, 298 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/contrib/changehost/changehost.pl b/debian/htdig/htdig-3.2.0b6/contrib/changehost/changehost.pl new file mode 100755 index 00000000..3bd6c44d --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/contrib/changehost/changehost.pl @@ -0,0 +1,298 @@ +#!/usr/local/bin/perl + +## +## changehost.pl (C) 1995 Andrew Scherpbier +## +## This program will change hostnames of URLs in the document database and index. +## +## usage: +## changehost.pl database_base from to +## +## example: +## changehost.pl /opt/www/htdig/sdsu www.sdsu.edu www.northpole.net +## +## Two new database will be created with a base of '/tmp/new'. +## These databases can then be used by htsearch. +## + +use GDBM_File; + +$base = $ARGV[0]; +$from = $ARGV[1]; +$to = $ARGV[2]; + +$dbfile = "$base.docdb"; +$newfile = "/tmp/new.docdb"; + +## +## Convert the document database first. +## +tie(%newdb, GDBM_File, $newfile, GDBM_NEWDB, 0644) || die "$newfile: '$!'"; +tie(%docdb, GDBM_File, $dbfile, GDBM_READER, 0) || die "$dbfile: $!"; + + +while (($key, $value) = each %docdb) +{ + if ($key =~ /http:\/\/$from/i) + { + %record = parse_ref_record($value); + $key =~ s/http:\/\/$from/http:\/\/$to/i; + print "$key\n"; + $t = $record{"URL"}; + $t =~ s/http:\/\/$from/http:\/\/$to/i; + $record{"URL"} = $t; + + $value = create_ref_record(%record); + } + + $newdb{$key} = $value; +} + +untie %newdb; +untie %docdb; + +## +## Now create the document index +## +$newfile = "/tmp/new.docs.index"; +$dbfile = "$base.docs.index"; + +tie(%newdb, GDBM_File, $newfile, GDBM_NEWDB, 0644) || die "$newfile: '$!'"; +tie(%docdb, GDBM_File, $dbfile, GDBM_READER, 0) || die "$dbfile: $!"; + +while (($key, $value) = each %docdb) +{ + if ($value =~ /http:\/\/$from/i) + { + $value =~ s/http:\/\/$from/http:\/\/$to/i; + } + $newdb{$key} = $value; +} + +untie %newdb; +untie %docdb; + +###################################################################### +sub create_ref_record +{ + local(%rec) = @_; + local($s); + + if (exists $rec{"ID"}) + { + $s .= pack("Ci", 0, $rec{"ID"}); + } + if (exists $rec{"TIME"}) + { + $s .= pack("Ci", 1, $rec{"TIME"}); + } + if (exists $rec{"ACCESSED"}) + { + $s .= pack("Ci", 2, $rec{"ACCESSED"}); + } + if (exists $rec{"STATE"}) + { + $s .= pack("Ci", 3, $rec{"STATE"}); + } + if (exists $rec{"SIZE"}) + { + $s .= pack("Ci", 4, $rec{"SIZE"}); + } + if (exists $rec{"LINKS"}) + { + $s .= pack("Ci", 5, $rec{"LINKS"}); + } + if (exists $rec{"IMAGESIZE"}) + { + $s .= pack("Ci", 6, $rec{"IMAGESIZE"}); + } + if (exists $rec{"HOPCOUNT"}) + { + $s .= pack("Ci", 7, $rec{"HOPCOUNT"}); + } + if (exists $rec{"URL"}) + { + $s .= pack("Ci", 8, length($rec{"URL"})); + $s .= $rec{"URL"}; + } + if (exists $rec{"HEAD"}) + { + $s .= pack("Ci", 9, length($rec{"HEAD"})); + $s .= $rec{"HEAD"}; + } + if (exists $rec{"TITLE"}) + { + $s .= pack("Ci", 10, length($rec{"TITLE"})); + $s .= $rec{"TITLE"}; + } + if (exists $rec{"DESCRIPTIONS"}) + { + @v = split('', $rec{"DESCRIPTIONS"}); + $s .= pack("Ci", 11, $#v - 1); + foreach (@v) + { + $s .= pack("i", length($_)); + $s .= $_; + } + } + if (exists $rec{"ANCHORS"}) + { + @v = split('', $rec{"ANCHORS"}); + $s .= pack("Ci", 12, $#v - 1); + foreach (@v) + { + $s .= pack("i", length($_)); + $s .= $_; + } + } + if (exists $rec{"EMAIL"}) + { + $s .= pack("Ci", 13, length($rec{"EMAIL"})); + $s .= $rec{"EMAIL"}; + } + if (exists $rec{"NOTIFICATION"}) + { + $s .= pack("Ci", 14, length($rec{"NOTIFICATION"})); + $s .= $rec{"NOTIFICATION"}; + } + if (exists $rec{"SUBJECT"}) + { + $s .= pack("Ci", 15, length($rec{"SUBJECT"})); + $s .= $rec{"SUBJECT"}; + } + + return $s; +} + +sub parse_ref_record +{ + local($value) = @_; + local(%rec, $length, $count, $result); + + while (length($value) > 0) + { + $what = unpack("C", $value); + $value = substr($value, 1); + if ($what == 0) + { + # ID + $rec{"ID"} = unpack("i", $value); + $value = substr($value, 4); + } + elsif ($what == 1) + { + # TIME + $rec{"TIME"} = unpack("i", $value); + $value = substr($value, 4); + } + elsif ($what == 2) + { + # ACCESSED + $rec{"ACCESSED"} = unpack("i", $value); + $value = substr($value, 4); + } + elsif ($what == 3) + { + # STATE + $rec{"STATE"} = unpack("i", $value); + $value = substr($value, 4); + } + elsif ($what == 4) + { + # SIZE + $rec{"SIZE"} = unpack("i", $value); + $value = substr($value, 4); + } + elsif ($what == 5) + { + # LINKS + $rec{"LINKS"} = unpack("i", $value); + $value = substr($value, 4); + } + elsif ($what == 6) + { + # IMAGESIZE + $rec{"IMAGESIZE"} = unpack("i", $value); + $value = substr($value, 4); + } + elsif ($what == 7) + { + # HOPCOUNT + $rec{"HOPCOUNT"} = unpack("i", $value); + $value = substr($value, 4); + } + elsif ($what == 8) + { + # URL + $length = unpack("i", $value); + $rec{"URL"} = unpack("x4 A$length", $value); + $value = substr($value, 4 + $length); + } + elsif ($what == 9) + { + # HEAD + $length = unpack("i", $value); + $rec{"HEAD"} = unpack("x4 A$length", $value); + $value = substr($value, 4 + $length); + } + elsif ($what == 10) + { + # TITLE + $length = unpack("i", $value); + $rec{"TITLE"} = unpack("x4 A$length", $value); + $value = substr($value, 4 + $length); + } + elsif ($what == 11) + { + # DESCRIPTIONS + $count = unpack("i", $value); + $value = substr($value, 4); + $result = ""; + foreach (1 .. $count) + { + $length = unpack("i", $value); + $result = $result . unpack("x4 A$length", $value) . ""; + $value = substr($value, 4 + $length); + } + chop $result; + $rec{"DESCRIPTIONS"} = $result; + } + elsif ($what == 12) + { + # ANCHORS + $count = unpack("i", $value); + $value = substr($value, 4); + $result = ""; + foreach (1 .. $count) + { + $length = unpack("i", $value); + $result = $result . unpack("x4 A$length", $value) . ""; + $value = substr($value, 4 + $length); + } + chop $result; + $rec{"ANCHORS"} = $result; + } + elsif ($what == 13) + { + # EMAIL + $length = unpack("i", $value); + $rec{"EMAIL"} = unpack("x4 A$length", $value); + $value = substr($value, 4 + $length); + } + elsif ($what == 14) + { + # NOTIFICATION + $length = unpack("i", $value); + $rec{"NOTIFICATION"} = unpack("x4 A$length", $value); + $value = substr($value, 4 + $length); + } + elsif ($what == 15) + { + # SUBJECT + $length = unpack("i", $value); + $rec{"SUBJECT"} = unpack("x4 A$length", $value); + $value = substr($value, 4 + $length); + } + } + return %rec; +} |