summaryrefslogtreecommitdiffstats
path: root/debian/htdig/htdig-3.2.0b6/contrib/ewswrap/ewswrap.cgi
blob: f3f9419ec13192748a21dd7d37a00f60395629a2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/perl -w

# ewswrap.cgi
#
# by John Grohol ([email protected])
# Freeware
# v1.00 - 5 Oct 1998
#
# Simple wrapper script for htsearch to parse old
# Excite for Web Servers (EWS) forms as-is 
# This only makes sense if your want to upgrade
# your search engine but can't upgrade every form
# which points to it (e.g., external sites are
# pointing to your EWS CGI.
#
# As an added bonus, given the differences from how
# EWS handles queries to how htsearch handles them,
# it does some basic sanity checking on the query
# and tries to re-form it into a valid htsearch query.
#
# This script must be called using the POST method!
#
#_______________________________________________________
# Set some defaults here
# These can be overridden in the calling form

$config         = "htdig";              # htDig config file
$exclude        = "";                   # exclude this url
$restrict       = "";                   # restrict to this url
$format         = "builtin-long";       # results format
$method         = "and";                # default method
$dir            = "/usr/httpd/cgi-bin"; # Set cgi-bin dir

#_______________________________________________________
# Rest of program

 $| = 1;

# Get the form variables from POST form

   read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'});
   @pairs = split(/&/, $buffer);

   foreach $pair (@pairs) {
      ($name, $value) = split(/=/, $pair);
      $value =~ tr/+/ /;
      $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
      $value =~ s/<!--(.|\n)*-->//g;
      $value =~ s/<([^>]|\n)*>//g;
      $tags{$name} = $value;
   }

$squery         = $tags{'search'};      # Set search query
$page           = $tags{'page'};
if (not($page)) { $page=1; }

 $squery =~ s/\+//g;
 $squery =~ s/\-//g;
 $squery =~ s/the//g;
 $squery =~ s/not//g;
 $squery =~ s/what//g;

# If someone puts "and" or "or" in the query,
# then it should be a boolean query

 if (($squery =~ " and ") || ($squery =~ " or ")) {
        $method = "boolean"; }

# Count the number of words in the query

 @words = split(/ /,$squery);
 foreach $word (@words) { $xwd++; }

# If there are quotes in the query, we have to
# turn them into parantheses and make it boolean

if (($squery =~ "\"")) {
        $oo = (index($squery,"\""))+1;
        $od = (index($squery,"\"",$oo))-1;
        $op = $od - $oo +1;
        $yty = substr($squery,$oo,$op);
                @wrds = split(/ /,$yty);
                foreach $wrd (@wrds) { $xww++; }

        if ($xww eq 2) {   # Right now, can only handle 2-word phrases
           $oi = (index($yty," "));
           if ($oi > -1) {
                $ytt = substr($yty,0,$oi);
                $john = $od - $oi +1;
                $yte = substr($yty,$oi+1,$john);
                $james = substr($squery,$od+2);
                $james =~ s/ and//g;
                $james =~ s/ / and /g;
                $squery = "($ytt and $yte) $james"; # We turn it into a
                $method = "boolean";                # boolean query
           }

# More than 2 words in quotes (phrase), just
# turn it into one big string of words and set method to "and"

        } else {
         $squery =~ s/\"//g;
         $squery =~ s/ and//g;
         $method = "and";
         $yty = "";
        }
}

# Set the environmental variables

$ENV{'REQUEST_METHOD'} = 'GET';
$ENV{'QUERY_STRING'} = "config=$config&restrict=$restrict&exclude=$exclude&words=$squery&method=$method&format=$format&page=$page";

# Run htsearch

system("$dir/htsearch");

1;