summaryrefslogtreecommitdiffstats
path: root/debian/htdig/htdig-3.2.0b6/test/t_htsearch
blob: 9041da75befc66540d1b2cdebc8d94184ea0c4b9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
#
# Part of the ht://Dig package   <http://www.htdig.org/>
# Copyright (c) 1999-2004 The ht://Dig Group
# For copyright details, see the file COPYING in your distribution
# or the GNU Library General Public License (LGPL) version 2 or later
# <http://www.gnu.org/copyleft/lgpl.html>
#
# $Id: t_htsearch,v 1.14 2004/05/28 13:15:30 lha Exp $
#

test_functions_action=--start-apache
. ./test_functions

config=$testdir/conf/htdig.conf
tmp=/tmp/t_htsearch$$

$htdig "$@" -t -i -c $config
$htpurge -c $config

test_functions_action=--stop-apache
. ./test_functions

try() {
    comment="$1"
    shift
    query="$1"
    shift
    $htsearch -c $config "$query" > $tmp
    for pattern
    do
	if grep "$pattern" $tmp > /dev/null 
	then :
	else
	    $htsearch -v -c $config "$query" > /dev/null
	    echo "Output doesn't match \"$pattern\""
	    fail "$htsearch -c $config '$query' >> $tmp --
		  $comment"
	fi
    done
}

try "Simple search for 'also'" \
    "words=also" \
    '4 matches' 'bad_local.htm' 'site2.html' 'script.html' 'site4.html'

try "Implicit and search with two words 'also movies'" \
    "method=and&words=also+movies" \
    '1 match' 'site2.html'

try "Explicit and search with two words 'also movies'" \
    "method=boolean&words=also+and+movies" \
    '1 match' 'site2.html'

try "Implicit or search for 'also distribution'" \
    "method=or&words=also+distribution" \
    '5 matches' 'bad_local.htm' 'site2.html' 'script.html' 'site4.html' 'site%201.html'

try "Explicit or search for 'also distribution'" \
    "method=boolean&words=also+or+distribution" \
    '5 matches' 'bad_local.htm' 'site2.html' 'script.html' 'site4.html' 'site%201.html'

try "Boolean and/or mixed 'also or distribution and ltd'" \
    "method=boolean&words=also+or+distribution+and+ltd" \
    '5 matches' 'bad_local.htm' 'site2.html' 'script.html' 'site4.html' 'site%201.html'

try "Boolean explicit priority '(also or distribution) and ltd'" \
    "method=boolean&words=(also+or+distribution)+and+ltd" \
    '2 matches' 'script.html' 'site%201.html'

try "Wildcard search for '*'" \
    "method=and&words=*" \
    '10 matches' 'set1/bad_local.htm' 'set1/title.html' 'set1/site2.html' 'set1/script.html' 'set1/"' 'set1/site3.html' 'set1/site4.html' 'set1/site%201.html' 'set1/sub%2520dir/' 'set1/sub%2520dir/empty%20file.html'

try "Phrase search for '\"who offer\" and \"loans to graduate\"'" \
    "method=boolean&words=%22who+offer%22+and+%22loans+to+graduate%22" \
    '1 match' 'site4.html'

try "Phrase search for 'may be deferred' -- stop words at start" \
    "method=boolean&words=%22may+be+deferred%22" \
    '1 match' 'site4.html' '<strong>may be deferred</strong>'

try "Phrase search for 'Repayment may be' -- stop words at end" \
    "method=boolean&words=%22Repayment+may+be%22" \
    '1 match' 'site4.html' '<strong>Repayment may be</strong>'

# This test fails
#
#try "Phrase search for 'may be' -- all stop words" \
#    "method=boolean&words=%22may+be%22" \
#    '1 match' 'site4.html'

try "Phrase search for 'Repayment may be deferred' -- stop words in middle" \
    "method=boolean&words=%22Repayment+may+be+deferred%22" \
    '1 match' 'site4.html' '<strong>Repayment may be deferred</strong>'

try "Phrase search for 'Repayment deferred' -- phrase interrupted by stop words" \
    "method=boolean&words=%22Repayment+deferred%22" \
    'No match'

try "Syntax error: finishing with 'and'" \
    "method=boolean&words=also+and" \
    'Expected a search word, a quoted phrase or a boolean expression between () at the end'

try "Syntax error: excess open brackets" \
    "method=boolean&words=(also+or+distribution" \
    "Expected ')' at the end"

try "Syntax error: excess close brackets" \
    "method=boolean&words=also+or+distribution)" \
    "Expected end of expression instead of ')'"

try "Syntax error: missing end quote" \
    "method=boolean&words=also+or+%22distribution" \
    'Expected quotes at the end'

try "Unrestricted search for 'group'" \
    "method=and&words=group" \
    '4 matches' 'script.html' 'bad_local.htm' 'site3.html' 'site4.html'

try "Field-restricted search for 'author:group'" \
    "method=and&words=author:group" \
    '1 match' 'script.html'

try "Field-restricted search for 'text:group'" \
    "method=and&words=text:group" \
    '3 matches' 'bad_local.htm' 'site3.html' 'site4.html'

try "Checking prefix parsing using 'text: group'" \
    "method=and&words=text:%20group" \
    '1 match' 'script.html'

try "Checking prefix parsing using 'text::group'" \
    "method=and&words=text::group" \
    '1 match' 'script.html'

try "Checking prefix parsing using 'unknown:group'" \
    "method=any&words=unknown:group" \
    '5 matches' 'script.html' 'bad_local.htm' 'site3.html' 'site4.html' 'set1/"'

try "Field-restricted search for 'descr:cost'" \
    "method=and&words=descr:cost" \
    '1 match' 'script.html'

config=$testdir/conf/htdig.conf3

try "Testing boolean_keywords and search_rewrite_urls" \
    "method=boolean&words=also+ou+distribution+et+ltd" \
    '5 matches' 'bad_local.htm' 'place2.html' 'script.html' 'place4.html' 'place%201.html'

try "Testing boolean_syntax_errors" \
    "method=boolean&words=ou+distribution" \
    "Attendait un mot au lieu de '|' ou 'ou'"

try "Testing htdig's noindex_start/end overlapping" \
    "words=considered" \
    '1 match' 'script.html'

try "Testing htdig's noindex_start/end nested" \
    "words=neglected" \
    'No match'

rm -f $tmp

exit 0