diff options
author | Slávek Banko <[email protected]> | 2021-11-05 13:28:23 +0100 |
---|---|---|
committer | Slávek Banko <[email protected]> | 2021-11-05 13:28:23 +0100 |
commit | 8c787c3591c1c885b91a54128835b400858c5cca (patch) | |
tree | eca1b776912a305c4d45b3964038278a2fae1ead /debian/htdig/htdig-3.2.0b6/test | |
parent | fe188b907cdf30dfdfe0eba9412e7f8749fec158 (diff) | |
download | extra-dependencies-8c787c3591c1c885b91a54128835b400858c5cca.tar.gz extra-dependencies-8c787c3591c1c885b91a54128835b400858c5cca.zip |
DEB htdig: Added to repository.
Signed-off-by: Slávek Banko <[email protected]>
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/test')
78 files changed, 23455 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/test/.cvsignore b/debian/htdig/htdig-3.2.0b6/test/.cvsignore new file mode 100644 index 00000000..635aa471 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/.cvsignore @@ -0,0 +1,22 @@ +Makefile +*.lo +*.la +.pure +.purify +.deps +.libs +test_functions +dbbench +testnet +document +word +logs +var +url +txt2mifluz +search +test +test_weakcmpr +t_htdb.d1 +t_htdb.d2 +__db.*
\ No newline at end of file diff --git a/debian/htdig/htdig-3.2.0b6/test/Makefile.am b/debian/htdig/htdig-3.2.0b6/test/Makefile.am new file mode 100644 index 00000000..08aeb1b6 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/Makefile.am @@ -0,0 +1,139 @@ +# +# Part of the ht://Dig package <http://www.htdig.org/> +# Copyright (c) 1999-2004 The ht://Dig Group +# For copyright details, see the file COPYING in your distribution +# or the GNU Library General Public License version 2 or later +# <http://www.gnu.org/copyleft/lgpl.html> +# +include $(top_srcdir)/Makefile.config + +# +# All test programs use the index description from the +# mifluz.conf file (MIFLUZ_CONFIG set in test_functions.in). +# Some have hard wired additions but these +# additions are never redundant with mifluz.conf content. Some +# programs have options that allow command line overriding of +# index parameters (page size, compression and such). +# +# To run individual tests use (for instance): +# make TESTS=t_wordkey check +# +# To turn on verbosity use (for instance): +# VERBOSE=-vv make TESTS=t_wordkey check +# +# Run individual test without using make +# MAKE=make srcdir=. VERBOSE=-vv t_wordkey +# +# To purify objects use: +# ( cd ../test ; rm -f word ; make CXXLD='purify g++' word ) +# +# To generate benchmarks in benchmark/... make sure +# you have rrdtool installed and use +# make MONITOR=' -m' dobench +# + +TESTS = t_wordkey t_wordlist t_wordskip t_wordbitstream \ + t_search t_htdb t_rdonly t_trunc t_url \ + t_htdig t_htsearch t_htmerge t_htnet t_htdig_local \ + t_factors t_fuzzy t_parsing t_templates t_validwords + +TESTS_ENVIRONMENT = $(top_srcdir)/test/test_prepare +AM_MAKEFLAGS = MAKE="$(MAKE)" + +EXTRA_DIST = test_functions.in test_prepare $(TESTS) \ + skiptest_db.txt search.txt mifluz.conf mifluz-search.conf \ + benchmark-report benchmark t_htdb.dump \ + htdocs conf url.parents url.children url.output + +LOCAL_DEFINES = -I$(top_builddir)/db -I$(top_srcdir)/db $(PROFILING) + +check_PROGRAMS = word dbbench txt2mifluz search testnet document url + +dbbench_SOURCES = dbbench.cc +dbbench_DEPENDENCIES = $(HTLIBS) +dbbench_LDFLAGS = $(PROFILING) ${extra_ldflags} +dbbench_LDADD = $(HTLIBS) + +word_SOURCES = word.cc +word_DEPENDENCIES = $(HTLIBS) +word_LDFLAGS = $(PROFILING) ${extra_ldflags} +word_LDADD = $(HTLIBS) + +txt2mifluz_SOURCES = txt2mifluz.cc +txt2mifluz_DEPENDENCIES = $(HTLIBS) +txt2mifluz_LDFLAGS = $(PROFILING) ${extra_ldflags} +txt2mifluz_LDADD = $(HTLIBS) + +search_SOURCES = search.cc +search_DEPENDENCIES = $(HTLIBS) +search_LDFLAGS = $(PROFILING) ${extra_ldflags} +search_LDADD = $(HTLIBS) + +testnet_SOURCES = testnet.cc +testnet_DEPENDENCIES = $(HTLIBS) +testnet_LDFLAGS = $(PROFILING) ${extra_ldflags} +testnet_LDADD = $(HTLIBS) + +document_SOURCES = document.cc +document_DEPENDENCIES = $(HTLIBS) +document_LDFLAGS = $(PROFILING) ${extra_ldflags} +document_LDADD = $(HTLIBS) + +url_SOURCES = url.cc +url_DEPENDENCIES = $(HTLIBS) +url_LDFLAGS = $(PROFILING) ${extra_ldflags} +url_LDADD = $(HTLIBS) + +clean-local: + rm -fr gmon.out test test_weakcmpr __db* + rm -f tmpfile t_htdb.d? monitor.out + cd conf; $(MAKE) clean + +distclean-local: + rm -fr words.all words.uniq + +# +# The benchmark directory contains the result of some benchmarks in HTML +# form generated by benchmark-report, if MONITOR='-m' is specified. +# +dobench: dbbench + $(MAKE) BASE="$(BASE)" CACHESIZE="$(CACHESIZE)" PAGESIZE="$(PAGESIZE)" LOOP="$(LOOP)" NWORDS="$(NWORDS)" CMPR='-z' REPORT='Nz' MONITOR="$(MONITOR)" bench + $(MAKE) BASE="$(BASE)" CACHESIZE="$(CACHESIZE)" PAGESIZE="$(PAGESIZE)" LOOP="$(LOOP)" NWORDS="$(NWORDS)" CMPR='' REPORT='N' MONITOR="$(MONITOR)" bench + $(MAKE) BASE="$(BASE)" CACHESIZE="$(CACHESIZE)" PAGESIZE="$(PAGESIZE)" LOOP="$(LOOP)" NWORDS="$(NWORDS)" CMPR='-W -z' REPORT='Wz' MONITOR="$(MONITOR)" bench + $(MAKE) BASE="$(BASE)" CACHESIZE="$(CACHESIZE)" PAGESIZE="$(PAGESIZE)" LOOP="$(LOOP)" NWORDS="$(NWORDS)" CMPR='-W' REPORT='W' MONITOR="$(MONITOR)" bench + +BASE = test +CACHESIZE = -C `expr 64 \* 1024 \* 1024` +PAGESIZE = -S 8192 +CMPR = -z +WORDS = -w words.all +LOOP = -l 3 +NWORDS = +#MONITOR = -m +MONITOR = + +bench: + rm -f $(BASE) $(BASE)_weakcmpr __db* monitor.out bench.out + ( \ + MIFLUZ_CONFIG=$(top_srcdir)/test/mifluz.conf $(TIMEV) $(top_builddir)/test/dbbench $(CACHESIZE) $(PAGESIZE) $(CMPR) $(WORDS) $(LOOP) -B $(BASE) $(NWORDS) $(MONITOR) ; \ + ls -l $(BASE) ; \ + if [ -f $(BASE)_weakcmpr ] ; then MIFLUZ_CONFIG=$(top_srcdir)/test/mifluz.conf $(top_builddir)/htdb/htdump -p $(BASE)_weakcmpr ; fi ; \ + MIFLUZ_CONFIG=$(top_srcdir)/test/mifluz.conf $(top_builddir)/htdb/htstat $(CMPR) -d $(BASE) ; \ + ) 2>&1 | tee bench.out + if [ "$(RRDTOOL)" -a "X$(MONITOR)" != "X" ] ; then \ + if [ "$(REPORT)" ] ; \ + then \ + output="--output $(REPORT)" ; \ + fi ; \ + $(PERL) benchmark-report $$output --comment="`cat bench.out`" ; \ + rm monitor.out ; \ + fi + +# +# Generate list of words from info and man files. +# +words: + if [ -d /usr/info ] ; then root=/usr ; else root=/usr/share ; fi ; \ + find $$root/info -name '*.gz' -print | xargs zcat | perl -n -e 'print join("\n", map { lc } grep(length() > 2 && length() < 32, m/[a-z]+/ig)) . "\n"' | grep -v '^$$' > words.all ; \ + find $$root/man -type f -name '*.gz' -print | xargs zcat | perl -n -e 'print join("\n", map { lc } grep(length() > 2 && length() < 32, m/[a-z]+/ig)) . "\n"' | grep -v '^$$' >> words.all + sort -u < words.all > words.uniq diff --git a/debian/htdig/htdig-3.2.0b6/test/Makefile.in b/debian/htdig/htdig-3.2.0b6/test/Makefile.in new file mode 100644 index 00000000..adf7268b --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/Makefile.in @@ -0,0 +1,693 @@ +# Makefile.in generated by automake 1.7.9 from Makefile.am. +# @configure_input@ + +# Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003 +# Free Software Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# To compile with profiling do the following: +# +# make CFLAGS=-g CXXFLAGS=-g PROFILING=-p all +# + +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ +VPATH = @srcdir@ +pkgdatadir = $(datadir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +top_builddir = .. + +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +INSTALL = @INSTALL@ +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +host_triplet = @host@ +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMDEP_FALSE = @AMDEP_FALSE@ +AMDEP_TRUE = @AMDEP_TRUE@ +AMTAR = @AMTAR@ +APACHE = @APACHE@ +APACHE_MODULES = @APACHE_MODULES@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CGIBIN_DIR = @CGIBIN_DIR@ +COMMON_DIR = @COMMON_DIR@ +CONFIG_DIR = @CONFIG_DIR@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DATABASE_DIR = @DATABASE_DIR@ +DEFAULT_CONFIG_FILE = @DEFAULT_CONFIG_FILE@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +ECHO = @ECHO@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FIND = @FIND@ +GUNZIP = @GUNZIP@ +HAVE_SSL = @HAVE_SSL@ +HTDIG_MAJOR_VERSION = @HTDIG_MAJOR_VERSION@ +HTDIG_MICRO_VERSION = @HTDIG_MICRO_VERSION@ +HTDIG_MINOR_VERSION = @HTDIG_MINOR_VERSION@ +IMAGE_DIR = @IMAGE_DIR@ +IMAGE_URL_PREFIX = @IMAGE_URL_PREFIX@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LDFLAGS = @LDFLAGS@ +LEX = @LEX@ +LEXLIB = @LEXLIB@ +LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAINTAINER_MODE_FALSE = @MAINTAINER_MODE_FALSE@ +MAINTAINER_MODE_TRUE = @MAINTAINER_MODE_TRUE@ +MAKEINFO = @MAKEINFO@ +MV = @MV@ +OBJEXT = @OBJEXT@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERL = @PERL@ +RANLIB = @RANLIB@ +RRDTOOL = @RRDTOOL@ +SEARCH_DIR = @SEARCH_DIR@ +SEARCH_FORM = @SEARCH_FORM@ +SED = @SED@ +SENDMAIL = @SENDMAIL@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +TAR = @TAR@ +TESTS_FALSE = @TESTS_FALSE@ +TESTS_TRUE = @TESTS_TRUE@ +TIME = @TIME@ +TIMEV = @TIMEV@ +USER = @USER@ +VERSION = @VERSION@ +YACC = @YACC@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_RANLIB = @ac_ct_RANLIB@ +ac_ct_STRIP = @ac_ct_STRIP@ +am__fastdepCC_FALSE = @am__fastdepCC_FALSE@ +am__fastdepCC_TRUE = @am__fastdepCC_TRUE@ +am__fastdepCXX_FALSE = @am__fastdepCXX_FALSE@ +am__fastdepCXX_TRUE = @am__fastdepCXX_TRUE@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +datadir = @datadir@ +exec_prefix = @exec_prefix@ +extra_ldflags = @extra_ldflags@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +oldincludedir = @oldincludedir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +subdirs = @subdirs@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ + +AUTOMAKE_OPTIONS = foreign no-dependencies + +INCLUDES = -DDEFAULT_CONFIG_FILE=\"$(DEFAULT_CONFIG_FILE)\" \ + -I$(top_srcdir)/include -I$(top_srcdir)/htlib \ + -I$(top_srcdir)/htnet -I$(top_srcdir)/htcommon \ + -I$(top_srcdir)/htword \ + -I$(top_srcdir)/db -I$(top_builddir)/db \ + $(LOCAL_DEFINES) $(PROFILING) + + +HTLIBS = $(top_builddir)/htnet/libhtnet.la \ + $(top_builddir)/htcommon/libcommon.la \ + $(top_builddir)/htword/libhtword.la \ + $(top_builddir)/htlib/libht.la \ + $(top_builddir)/htcommon/libcommon.la \ + $(top_builddir)/htword/libhtword.la \ + $(top_builddir)/db/libhtdb.la \ + $(top_builddir)/htlib/libht.la + + + +# +# Part of the ht://Dig package <http://www.htdig.org/> +# Copyright (c) 1999-2004 The ht://Dig Group +# For copyright details, see the file COPYING in your distribution +# or the GNU Library General Public License version 2 or later +# <http://www.gnu.org/copyleft/lgpl.html> +# + +# +# All test programs use the index description from the +# mifluz.conf file (MIFLUZ_CONFIG set in test_functions.in). +# Some have hard wired additions but these +# additions are never redundant with mifluz.conf content. Some +# programs have options that allow command line overriding of +# index parameters (page size, compression and such). +# +# To run individual tests use (for instance): +# make TESTS=t_wordkey check +# +# To turn on verbosity use (for instance): +# VERBOSE=-vv make TESTS=t_wordkey check +# +# Run individual test without using make +# MAKE=make srcdir=. VERBOSE=-vv t_wordkey +# +# To purify objects use: +# ( cd ../test ; rm -f word ; make CXXLD='purify g++' word ) +# +# To generate benchmarks in benchmark/... make sure +# you have rrdtool installed and use +# make MONITOR=' -m' dobench +# +TESTS = t_wordkey t_wordlist t_wordskip t_wordbitstream \ + t_search t_htdb t_rdonly t_trunc t_url \ + t_htdig t_htsearch t_htmerge t_htnet t_htdig_local \ + t_factors t_fuzzy t_parsing t_templates t_validwords + + +TESTS_ENVIRONMENT = $(top_srcdir)/test/test_prepare +AM_MAKEFLAGS = MAKE="$(MAKE)" + +EXTRA_DIST = test_functions.in test_prepare $(TESTS) \ + skiptest_db.txt search.txt mifluz.conf mifluz-search.conf \ + benchmark-report benchmark t_htdb.dump \ + htdocs conf url.parents url.children url.output + + +LOCAL_DEFINES = -I$(top_builddir)/db -I$(top_srcdir)/db $(PROFILING) + +check_PROGRAMS = word dbbench txt2mifluz search testnet document url + +dbbench_SOURCES = dbbench.cc +dbbench_DEPENDENCIES = $(HTLIBS) +dbbench_LDFLAGS = $(PROFILING) ${extra_ldflags} +dbbench_LDADD = $(HTLIBS) + +word_SOURCES = word.cc +word_DEPENDENCIES = $(HTLIBS) +word_LDFLAGS = $(PROFILING) ${extra_ldflags} +word_LDADD = $(HTLIBS) + +txt2mifluz_SOURCES = txt2mifluz.cc +txt2mifluz_DEPENDENCIES = $(HTLIBS) +txt2mifluz_LDFLAGS = $(PROFILING) ${extra_ldflags} +txt2mifluz_LDADD = $(HTLIBS) + +search_SOURCES = search.cc +search_DEPENDENCIES = $(HTLIBS) +search_LDFLAGS = $(PROFILING) ${extra_ldflags} +search_LDADD = $(HTLIBS) + +testnet_SOURCES = testnet.cc +testnet_DEPENDENCIES = $(HTLIBS) +testnet_LDFLAGS = $(PROFILING) ${extra_ldflags} +testnet_LDADD = $(HTLIBS) + +document_SOURCES = document.cc +document_DEPENDENCIES = $(HTLIBS) +document_LDFLAGS = $(PROFILING) ${extra_ldflags} +document_LDADD = $(HTLIBS) + +url_SOURCES = url.cc +url_DEPENDENCIES = $(HTLIBS) +url_LDFLAGS = $(PROFILING) ${extra_ldflags} +url_LDADD = $(HTLIBS) + +BASE = test +CACHESIZE = -C `expr 64 \* 1024 \* 1024` +PAGESIZE = -S 8192 +CMPR = -z +WORDS = -w words.all +LOOP = -l 3 +NWORDS = +#MONITOR = -m +MONITOR = +subdir = test +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs +CONFIG_HEADER = $(top_builddir)/include/config.h +CONFIG_CLEAN_FILES = test_functions +check_PROGRAMS = word$(EXEEXT) dbbench$(EXEEXT) txt2mifluz$(EXEEXT) \ + search$(EXEEXT) testnet$(EXEEXT) document$(EXEEXT) url$(EXEEXT) +am_dbbench_OBJECTS = dbbench.$(OBJEXT) +dbbench_OBJECTS = $(am_dbbench_OBJECTS) +am_document_OBJECTS = document.$(OBJEXT) +document_OBJECTS = $(am_document_OBJECTS) +am_search_OBJECTS = search.$(OBJEXT) +search_OBJECTS = $(am_search_OBJECTS) +am_testnet_OBJECTS = testnet.$(OBJEXT) +testnet_OBJECTS = $(am_testnet_OBJECTS) +am_txt2mifluz_OBJECTS = txt2mifluz.$(OBJEXT) +txt2mifluz_OBJECTS = $(am_txt2mifluz_OBJECTS) +am_url_OBJECTS = url.$(OBJEXT) +url_OBJECTS = $(am_url_OBJECTS) +am_word_OBJECTS = word.$(OBJEXT) +word_OBJECTS = $(am_word_OBJECTS) + +DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir)/include +depcomp = +am__depfiles_maybe = +CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) +LTCXXCOMPILE = $(LIBTOOL) --mode=compile $(CXX) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CXXFLAGS) $(CXXFLAGS) +CXXLD = $(CXX) +CXXLINK = $(LIBTOOL) --mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +DIST_SOURCES = $(dbbench_SOURCES) $(document_SOURCES) $(search_SOURCES) \ + $(testnet_SOURCES) $(txt2mifluz_SOURCES) $(url_SOURCES) \ + $(word_SOURCES) +DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/Makefile.config \ + Makefile.am test_functions.in +SOURCES = $(dbbench_SOURCES) $(document_SOURCES) $(search_SOURCES) $(testnet_SOURCES) $(txt2mifluz_SOURCES) $(url_SOURCES) $(word_SOURCES) + +all: all-am + +.SUFFIXES: +.SUFFIXES: .cc .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ Makefile.am $(top_srcdir)/Makefile.config $(top_srcdir)/configure.in $(ACLOCAL_M4) + cd $(top_srcdir) && \ + $(AUTOMAKE) --foreign test/Makefile +Makefile: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.in $(top_builddir)/config.status + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe) +test_functions: $(top_builddir)/config.status test_functions.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ + +clean-checkPROGRAMS: + @list='$(check_PROGRAMS)'; for p in $$list; do \ + f=`echo $$p|sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f $$p $$f"; \ + rm -f $$p $$f ; \ + done +dbbench$(EXEEXT): $(dbbench_OBJECTS) $(dbbench_DEPENDENCIES) + @rm -f dbbench$(EXEEXT) + $(CXXLINK) $(dbbench_LDFLAGS) $(dbbench_OBJECTS) $(dbbench_LDADD) $(LIBS) +document$(EXEEXT): $(document_OBJECTS) $(document_DEPENDENCIES) + @rm -f document$(EXEEXT) + $(CXXLINK) $(document_LDFLAGS) $(document_OBJECTS) $(document_LDADD) $(LIBS) +search$(EXEEXT): $(search_OBJECTS) $(search_DEPENDENCIES) + @rm -f search$(EXEEXT) + $(CXXLINK) $(search_LDFLAGS) $(search_OBJECTS) $(search_LDADD) $(LIBS) +testnet$(EXEEXT): $(testnet_OBJECTS) $(testnet_DEPENDENCIES) + @rm -f testnet$(EXEEXT) + $(CXXLINK) $(testnet_LDFLAGS) $(testnet_OBJECTS) $(testnet_LDADD) $(LIBS) +txt2mifluz$(EXEEXT): $(txt2mifluz_OBJECTS) $(txt2mifluz_DEPENDENCIES) + @rm -f txt2mifluz$(EXEEXT) + $(CXXLINK) $(txt2mifluz_LDFLAGS) $(txt2mifluz_OBJECTS) $(txt2mifluz_LDADD) $(LIBS) +url$(EXEEXT): $(url_OBJECTS) $(url_DEPENDENCIES) + @rm -f url$(EXEEXT) + $(CXXLINK) $(url_LDFLAGS) $(url_OBJECTS) $(url_LDADD) $(LIBS) +word$(EXEEXT): $(word_OBJECTS) $(word_DEPENDENCIES) + @rm -f word$(EXEEXT) + $(CXXLINK) $(word_LDFLAGS) $(word_OBJECTS) $(word_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) core *.core + +distclean-compile: + -rm -f *.tab.c + +.cc.o: + $(CXXCOMPILE) -c -o $@ `test -f '$<' || echo '$(srcdir)/'`$< + +.cc.obj: + $(CXXCOMPILE) -c -o $@ `if test -f '$<'; then $(CYGPATH_W) '$<'; else $(CYGPATH_W) '$(srcdir)/$<'; fi` + +.cc.lo: + $(LTCXXCOMPILE) -c -o $@ `test -f '$<' || echo '$(srcdir)/'`$< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +distclean-libtool: + -rm -f libtool +uninstall-info-am: + +ETAGS = etags +ETAGSFLAGS = + +CTAGS = ctags +CTAGSFLAGS = + +tags: TAGS + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + mkid -fID $$unique + +TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + test -z "$(ETAGS_ARGS)$$tags$$unique" \ + || $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$tags $$unique + +ctags: CTAGS +CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + test -z "$(CTAGS_ARGS)$$tags$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$tags $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && cd $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) $$here + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +check-TESTS: $(TESTS) + @failed=0; all=0; xfail=0; xpass=0; skip=0; \ + srcdir=$(srcdir); export srcdir; \ + list='$(TESTS)'; \ + if test -n "$$list"; then \ + for tst in $$list; do \ + if test -f ./$$tst; then dir=./; \ + elif test -f $$tst; then dir=; \ + else dir="$(srcdir)/"; fi; \ + if $(TESTS_ENVIRONMENT) $${dir}$$tst; then \ + all=`expr $$all + 1`; \ + case " $(XFAIL_TESTS) " in \ + *" $$tst "*) \ + xpass=`expr $$xpass + 1`; \ + failed=`expr $$failed + 1`; \ + echo "XPASS: $$tst"; \ + ;; \ + *) \ + echo "PASS: $$tst"; \ + ;; \ + esac; \ + elif test $$? -ne 77; then \ + all=`expr $$all + 1`; \ + case " $(XFAIL_TESTS) " in \ + *" $$tst "*) \ + xfail=`expr $$xfail + 1`; \ + echo "XFAIL: $$tst"; \ + ;; \ + *) \ + failed=`expr $$failed + 1`; \ + echo "FAIL: $$tst"; \ + ;; \ + esac; \ + else \ + skip=`expr $$skip + 1`; \ + echo "SKIP: $$tst"; \ + fi; \ + done; \ + if test "$$failed" -eq 0; then \ + if test "$$xfail" -eq 0; then \ + banner="All $$all tests passed"; \ + else \ + banner="All $$all tests behaved as expected ($$xfail expected failures)"; \ + fi; \ + else \ + if test "$$xpass" -eq 0; then \ + banner="$$failed of $$all tests failed"; \ + else \ + banner="$$failed of $$all tests did not behave as expected ($$xpass unexpected passes)"; \ + fi; \ + fi; \ + dashes="$$banner"; \ + skipped=""; \ + if test "$$skip" -ne 0; then \ + skipped="($$skip tests were not run)"; \ + test `echo "$$skipped" | wc -c` -gt `echo "$$banner" | wc -c` && \ + dashes="$$skipped"; \ + fi; \ + report=""; \ + if test "$$failed" -ne 0 && test -n "$(PACKAGE_BUGREPORT)"; then \ + report="Please report to $(PACKAGE_BUGREPORT)"; \ + test `echo "$$report" | wc -c` -gt `echo "$$banner" | wc -c` && \ + dashes="$$report"; \ + fi; \ + dashes=`echo "$$dashes" | sed s/./=/g`; \ + echo "$$dashes"; \ + echo "$$banner"; \ + test -n "$$skipped" && echo "$$skipped"; \ + test -n "$$report" && echo "$$report"; \ + echo "$$dashes"; \ + test "$$failed" -eq 0; \ + else :; fi +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) + +top_distdir = .. +distdir = $(top_distdir)/$(PACKAGE)-$(VERSION) + +distdir: $(DISTFILES) + $(mkinstalldirs) $(distdir)/.. + @srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's|.|.|g'`; \ + list='$(DISTFILES)'; for file in $$list; do \ + case $$file in \ + $(srcdir)/*) file=`echo "$$file" | sed "s|^$$srcdirstrip/||"`;; \ + $(top_srcdir)/*) file=`echo "$$file" | sed "s|^$$topsrcdirstrip/|$(top_builddir)/|"`;; \ + esac; \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test "$$dir" != "$$file" && test "$$dir" != "."; then \ + dir="/$$dir"; \ + $(mkinstalldirs) "$(distdir)$$dir"; \ + else \ + dir=''; \ + fi; \ + if test -d $$d/$$file; then \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \ + fi; \ + cp -pR $$d/$$file $(distdir)$$dir || exit 1; \ + else \ + test -f $(distdir)/$$file \ + || cp -p $$d/$$file $(distdir)/$$file \ + || exit 1; \ + fi; \ + done +check-am: all-am + $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS) + $(MAKE) $(AM_MAKEFLAGS) check-TESTS +check: check-am +all-am: Makefile + +installdirs: +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + `test -z '$(STRIP)' || \ + echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -rm -f $(CONFIG_CLEAN_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-checkPROGRAMS clean-generic clean-libtool clean-local \ + mostlyclean-am + +distclean: distclean-am + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-libtool distclean-local distclean-tags + +dvi: dvi-am + +dvi-am: + +info: info-am + +info-am: + +install-data-am: + +install-exec-am: + +install-info: install-info-am + +install-man: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-info-am + +.PHONY: CTAGS GTAGS all all-am check check-TESTS check-am clean \ + clean-checkPROGRAMS clean-generic clean-libtool clean-local \ + ctags distclean distclean-compile distclean-generic \ + distclean-libtool distclean-local distclean-tags distdir dvi \ + dvi-am info info-am install install-am install-data \ + install-data-am install-exec install-exec-am install-info \ + install-info-am install-man install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags uninstall uninstall-am uninstall-info-am + + +clean-local: + rm -fr gmon.out test test_weakcmpr __db* + rm -f tmpfile t_htdb.d? monitor.out + cd conf; $(MAKE) clean + +distclean-local: + rm -fr words.all words.uniq + +# +# The benchmark directory contains the result of some benchmarks in HTML +# form generated by benchmark-report, if MONITOR='-m' is specified. +# +dobench: dbbench + $(MAKE) BASE="$(BASE)" CACHESIZE="$(CACHESIZE)" PAGESIZE="$(PAGESIZE)" LOOP="$(LOOP)" NWORDS="$(NWORDS)" CMPR='-z' REPORT='Nz' MONITOR="$(MONITOR)" bench + $(MAKE) BASE="$(BASE)" CACHESIZE="$(CACHESIZE)" PAGESIZE="$(PAGESIZE)" LOOP="$(LOOP)" NWORDS="$(NWORDS)" CMPR='' REPORT='N' MONITOR="$(MONITOR)" bench + $(MAKE) BASE="$(BASE)" CACHESIZE="$(CACHESIZE)" PAGESIZE="$(PAGESIZE)" LOOP="$(LOOP)" NWORDS="$(NWORDS)" CMPR='-W -z' REPORT='Wz' MONITOR="$(MONITOR)" bench + $(MAKE) BASE="$(BASE)" CACHESIZE="$(CACHESIZE)" PAGESIZE="$(PAGESIZE)" LOOP="$(LOOP)" NWORDS="$(NWORDS)" CMPR='-W' REPORT='W' MONITOR="$(MONITOR)" bench + +bench: + rm -f $(BASE) $(BASE)_weakcmpr __db* monitor.out bench.out + ( \ + MIFLUZ_CONFIG=$(top_srcdir)/test/mifluz.conf $(TIMEV) $(top_builddir)/test/dbbench $(CACHESIZE) $(PAGESIZE) $(CMPR) $(WORDS) $(LOOP) -B $(BASE) $(NWORDS) $(MONITOR) ; \ + ls -l $(BASE) ; \ + if [ -f $(BASE)_weakcmpr ] ; then MIFLUZ_CONFIG=$(top_srcdir)/test/mifluz.conf $(top_builddir)/htdb/htdump -p $(BASE)_weakcmpr ; fi ; \ + MIFLUZ_CONFIG=$(top_srcdir)/test/mifluz.conf $(top_builddir)/htdb/htstat $(CMPR) -d $(BASE) ; \ + ) 2>&1 | tee bench.out + if [ "$(RRDTOOL)" -a "X$(MONITOR)" != "X" ] ; then \ + if [ "$(REPORT)" ] ; \ + then \ + output="--output $(REPORT)" ; \ + fi ; \ + $(PERL) benchmark-report $$output --comment="`cat bench.out`" ; \ + rm monitor.out ; \ + fi + +# +# Generate list of words from info and man files. +# +words: + if [ -d /usr/info ] ; then root=/usr ; else root=/usr/share ; fi ; \ + find $$root/info -name '*.gz' -print | xargs zcat | perl -n -e 'print join("\n", map { lc } grep(length() > 2 && length() < 32, m/[a-z]+/ig)) . "\n"' | grep -v '^$$' > words.all ; \ + find $$root/man -type f -name '*.gz' -print | xargs zcat | perl -n -e 'print join("\n", map { lc } grep(length() > 2 && length() < 32, m/[a-z]+/ig)) . "\n"' | grep -v '^$$' >> words.all + sort -u < words.all > words.uniq +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/debian/htdig/htdig-3.2.0b6/test/bad_word_list b/debian/htdig/htdig-3.2.0b6/test/bad_word_list new file mode 100644 index 00000000..cbbb047c --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/bad_word_list @@ -0,0 +1 @@ +technical diff --git a/debian/htdig/htdig-3.2.0b6/test/benchmark-report b/debian/htdig/htdig-3.2.0b6/test/benchmark-report new file mode 100644 index 00000000..20de00d5 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/benchmark-report @@ -0,0 +1,290 @@ +# +# Part of the ht://Dig package <http://www.htdig.org/> +# Copyright (c) 1999-2004 The ht://Dig Group +# For copyright details, see the file COPYING in your distribution +# or the GNU Library General Public License (LGPL) version 2 or later +# <http://www.gnu.org/copyleft/lgpl.html> +# +# +# Use the result from WordMonitor (--input=file) and generate a report in +# benchmark/<--output=name>-YYYY-MM-DD/index.html. +# If --comment='bla bla' is provided it is copied in the comment.txt file. +# +use strict; +use Getopt::Long; +use File::Path; +use POSIX qw(strftime); + +my($report) = "unknown"; +my($verbose); +my($comment) = ""; +my($dir); + +sub main { + my($file) = "monitor.out"; + + GetOptions("input=s" => \$file, + "output=s" => \$report, + "comment=s" => \$comment, + "verbose=i" => \$verbose, + ); + + my($timestamp) = strftime("%Y-%m-%d", localtime); + $dir = "benchmark/$report-$timestamp"; + if(-d $dir) { + print STDERR "directory $dir exists, will not override: abort\n"; + exit(1); + } + mkpath($dir, 0, 0777) or die "cannot mkdirp $dir : $!"; + my($cmds) = "$dir/README"; + open(CMDS, ">$cmds") or die "cannot open $cmds for writing : $!"; + report(build($file)); + close(CMDS); + my($file) = "$dir/monitor.rrd"; + unlink($file) or die "cannot unlink $file : $!"; +} + +sub report { + my($started, $finished, @fields) = @_; + + my($file) = "$dir/config.html"; + open(FILE, ">$file") or die "cannot open $file for writing : $!"; + print FILE "<body bgcolor=#ffffff>\n<pre>"; + close(FILE); + run("uname -a >> $file"); + run("cat /proc/cpuinfo >> $file") if(-f "/proc/cpuinfo"); + run("cat /proc/meminfo >> $file") if(-f "/proc/meminfo"); + + $file = "$dir/comment.html"; + open(FILE, ">$file") or die "cannot open $file for writing : $!"; + print FILE "<body bgcolor=#ffffff>\n<pre>"; + if(!$comment) { + print FILE "No comment\n"; + } else { + print FILE $comment; + } + close(FILE); + + # + # Generate a graph for each value + # + my(%vlabel) = ( + 'Write' => 'Pages', + 'Read' => 'Pages', + 'P_IBTREE' => 'Pages', + 'P_LBTREE' => 'Pages', + 'P_UNKNOWN' => 'Pages', + 'Put' => 'Put', + 'Get__0_' => 'Get', + 'Get__NEXT_' => 'Get', + 'Get__SET_RANGE_' => 'Get', + 'Get__Other_' => 'Get', + 'LEVEL' => 'Level', + 'PGNO' => 'Pages', + 'CMP' => 'Compare', + ); + my(%hlabel) = ( + 'Write' => 'Write/second', + 'Read' => 'Read/second', + 'P_IBTREE' => 'Internal B-Tree nodes read + write / second', + 'P_LBTREE' => 'Leaf B-Tree nodes read + write / second', + 'P_UNKNOWN' => 'Unknown pages read + write / second', + 'Put' => 'Put / second', + 'Get__0_' => 'Get(0) / second', + 'Get__NEXT_' => 'Get(DB_NEXT) / second', + 'Get__SET_RANGE_' => 'Get(DB_SET_RANGE) / second', + 'Get__Other_' => 'Get(???) / second', + 'LEVEL' => 'Height of the B-Tree', + 'PGNO' => 'Size of the B-Tree in pages', + 'CMP' => 'Key compare / second', + ); + + my(@graphs); + my($field); + foreach $field (@fields) { + my($image) = "$dir/$field.gif"; + my($hlabel) = exists($hlabel{$field}) ? "--title '$hlabel{$field}'" : ""; + my($vlabel) = exists($vlabel{$field}) ? "--vertical-label '$vlabel{$field}'" : ""; + run("rrdtool graph $image --start $started --end $finished $hlabel $vlabel DEF:in=$dir/monitor.rrd:$field:AVERAGE 'LINE2:in#FF0000' >/dev/null"); + } + + # + # Generate a cumulated graph for compression rates + # + my(%rate) = ( + 'Compress_1_1' => 1, + 'Compress_1_2' => 2, + 'Compress_1_3' => 3, + 'Compress_1_4' => 4, + 'Compress_1_5' => 5, + 'Compress_1_6' => 6, + 'Compress_1_7' => 7, + 'Compress_1_8' => 8, + 'Compress_1_9' => 9, + 'Compress_1_10' => 10, + 'Compress_1__10' => 11, + ); + my(%color) = ( + 'Compress_1_1' => "#ff0000", + 'Compress_1_2' => "#ee1100", + 'Compress_1_3' => "#dd2200", + 'Compress_1_4' => "#cc3300", + 'Compress_1_5' => "#bb4400", + 'Compress_1_6' => "#996600", + 'Compress_1_7' => "#778800", + 'Compress_1_8' => "#55aa00", + 'Compress_1_9' => "#33cc00", + 'Compress_1_10' => "#11ee00", + 'Compress_1__10' => "#00ff00", + ); + my($last_total); + my(@lines); + foreach $field (@fields) { + next if($field !~ /^Compress/); + my($cdef) = ""; + my($total) = "cmpr$rate{$field}"; + if($last_total) { + $total = "total$rate{$field}"; + $cdef = "CDEF:$total=$last_total,cmpr$rate{$field},+"; + } + $last_total = $total; + push(@lines, "DEF:cmpr$rate{$field}=$dir/monitor.rrd:$field:AVERAGE $cdef 'LINE1:$total$color{$field}:1/$rate{$field}'"); + } + run("rrdtool graph $dir/compress.gif --start $started --end $finished --title 'Compression rate comparison / second' --vertical-label 'Pages' @lines >/dev/null"); + + # + # Build home page + # + $file = "$dir/index.html"; + open(FILE, ">$file") or die "cannot open $file for writing : $!"; + print FILE <<EOF; +<body bgcolor=#ffffff> +<center> +[<a href=config.html>Configuration</a> | <a href=comment.html>Comment</a> | <a href=monitor.out>Samples</a> | <a href=compress.html>Compression</a>] +<p> +EOF + print FILE <<EOF; +<br> +<img src="compress.gif"> +<br> +In the graph above, the area under each line is the number of pages +compressed in the corresponding proportions. The bottom line is always 1/1 +compression. For instance the area between the 1/11 line and the 1/10 line +shows how many pages were compressed in a proportion equal or better than 1/11. +In <a href=compress.html>the compression report</a> an individual graph is +shown for each line. +<br> +EOF + + foreach $field (@fields) { + next if($field =~ /^Compress/); + print FILE <<EOF; +<br> +<img src="$field.gif"> +<br> +EOF + } + print FILE "</center>\n"; + close(FILE); + + # + # Build compression details page + # + $file = "$dir/compress.html"; + open(FILE, ">$file") or die "cannot open $file for writing : $!"; + print FILE <<EOF; +<body bgcolor=#ffffff> +<center> +[<a href=index.html>Home page</a>] +<p> +EOF + print FILE <<EOF; +<br> +<img src="compress.gif"> +<br> +EOF + + foreach $field (@fields) { + next if($field !~ /^Compress/); + print FILE <<EOF; +$field +<br> +<img src="$field.gif"> +<br> +EOF + } + print FILE "</center>\n"; + close(FILE); +} + +sub run { + my($cmd) = @_; + + system($cmd); + print STDERR "$cmd\n" if($verbose); + print CMDS "$cmd\n"; +} + +sub build { + my($file) = @_; + + system("cp $file $dir"); + my(@fields); + my($started); + my($step); + my($heartbeat); + my($finished); + my(@updates); + open(FILE, "<$file") or die "cannot open $file for reading : $!"; + while(<FILE>) { + if(/WordMonitor starting/) { + ($started) = <FILE> =~ /^Started:(\d+)/; + ($step) = <FILE> =~ /^Period:(\d+)/; + $heartbeat = $step * 2; + my(@ds); + @fields = split(':', scalar(<FILE>)); + shift(@fields); # get rid of Time field + pop(@fields); # get rid of last empty field + my($field); + foreach $field (@fields) { + my($type) = $field =~ /^(.)\./; + $field =~ s/^..//; + $field =~ s/[^a-z0-9_]/_/gi; + if($type eq 'C') { + push(@ds, "DS:$field:COUNTER:$heartbeat:U:U"); + } elsif($type eq 'G') { + push(@ds, "DS:$field:GAUGE:$heartbeat:0:U"); + } else { + print STDERR "Unknown type $type for field $field\n"; + exit(1); + } + } + my($ds) = join(' ', @ds); + my($rra) = "RRA:AVERAGE:0.5:1:2000"; + my($cmd) = "rrdtool create $dir/monitor.rrd --step $step --start $started $ds $rra"; + run("rm -f $dir/monitor.rrd ; $cmd"); + next; + } + next if(/-------------/ || /^\s*$/); + chop; # remove new line + chop; # remove last : + ($finished) = m/^(\d+):/; + print STDERR "$_\n" if($verbose); + push(@updates, $_); + if(@updates > 50) { + run("rrdtool update $dir/monitor.rrd " . join(' ', @updates)); + @updates = (); + } + } + if(@updates) { + run("rrdtool update $dir/monitor.rrd " . join(' ', @updates)); + } + close(FILE); + + print STDERR "started = $started, finished = $finished\n" if($verbose); + return ($started, $finished, @fields); +} + +main(); + + diff --git a/debian/htdig/htdig-3.2.0b6/test/conf/.cvsignore b/debian/htdig/htdig-3.2.0b6/test/conf/.cvsignore new file mode 100644 index 00000000..625be200 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/conf/.cvsignore @@ -0,0 +1,6 @@ +httpd.conf +srm.conf +access.conf +mime.types +htdig.conf +htdig.conf2 diff --git a/debian/htdig/htdig-3.2.0b6/test/conf/Makefile b/debian/htdig/htdig-3.2.0b6/test/conf/Makefile new file mode 100644 index 00000000..59078c3e --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/conf/Makefile @@ -0,0 +1,17 @@ +.SUFFIXES: .in + +FILES = httpd.conf srm.conf access.conf mime.types \ + htdig.conf htdig.conf2 htdig.conf3 + +all: $(FILES) + +clean: + rm -fr $(FILES) + +.in: + sed -e "s/_USER_/$(user)/g" \ + -e "s|_TESTDIR_|$(testdir)|g" \ + -e "s|_SRCDIR_|$(srcdir)|g" \ + -e "s|_MODULES_|$(modules)|g" \ + -e "s|_PORT_|7400|g" \ + < $< > $@ diff --git a/debian/htdig/htdig-3.2.0b6/test/conf/access.conf.in b/debian/htdig/htdig-3.2.0b6/test/conf/access.conf.in new file mode 100644 index 00000000..b8187b6c --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/conf/access.conf.in @@ -0,0 +1,63 @@ +# access.conf: Global access configuration +# Online docs at http://www.apache.org/ + +# This file defines server settings which affect which types of services +# are allowed, and in what circumstances. + +# Each directory to which Apache has access, can be configured with respect +# to which services and features are allowed and/or disabled in that +# directory (and its subdirectories). + +# Originally by Rob McCool + +# /usr/local/etc/httpd/ should be changed to whatever you set ServerRoot to. +<Directory _TESTDIR_/cgi-bin> +Options Indexes FollowSymLinks ExecCGI +</Directory> + +#<Directory _TESTDIR_/htdocs/cookies> +#Options Indexes FollowSymLinks ExecCGI +#CookieTracking on +#</Directory> + +# This should be changed to whatever you set DocumentRoot to. + +<Directory _TESTDIR_/htdocs> + +# This may also be "None", "All", or any combination of "Indexes", +# "Includes", "FollowSymLinks", "ExecCGI", or "MultiViews". + +# Note that "MultiViews" must be named *explicitly* --- "Options All" +# doesn't give it to you (or at least, not yet). + +Options Indexes FollowSymLinks ExecCGI + +# This option allows you to turn on the XBitHack behavior, which allows you +# to make text/html server-parsed by activating the owner x bit with chmod. +# This directive may be used wherever Options may, and has three +# possible arguments: Off, On or Full. If set to full, Apache will also +# add a Last-Modified header to the document if the group x bit is set. + +# Unless the server has been compiled with -DXBITHACK, this function is +# off by default. To use, uncomment the following line: + +#XBitHack Full + +# This controls which options the .htaccess files in directories can +# override. Can also be "None", or any combination of "Options", "FileInfo", +# "AuthConfig", and "Limit" + +AllowOverride All + +# Controls who can get stuff from this server. + +<Limit GET> +order allow,deny +allow from all +</Limit> + +</Directory> + +# You may place any other directories you wish to have access +# information for after this one. + diff --git a/debian/htdig/htdig-3.2.0b6/test/conf/entry-template b/debian/htdig/htdig-3.2.0b6/test/conf/entry-template new file mode 100644 index 00000000..3166ac2e --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/conf/entry-template @@ -0,0 +1,24 @@ +<dl><dt><strong><a href="$&(URL)">$&(TITLE)</a></strong>$(STARSLEFT) +</dt><dd>$(EXCERPT)<br> +<em><a href="$&(URL)">$&(URL)</a></em> +<font size="-1">$(MODIFIED), $(SIZE) bytes</font> +</dd></dl> + +ANCHOR=$(ANCHOR)<br> +BACKLINKS=$(BACKLINKS)<br> +CURRENT=$(CURRENT)<br> +DESCRIPTION=$(DESCRIPTION)<br> +DESCRIPTIONS=$(DESCRIPTIONS)<br> +DOCID=$(DOCID)<br> +EXCERPT=$(EXCERPT)<br> +HOPCOUNT=$(HOPCOUNT)<br> +METADESCRIPTION=$(METADESCRIPTION)<br> +MODIFIED=$(MODIFIED)<br> +PERCENT=$(PERCENT)<br> +SCORE=$(SCORE)<br> +SIZE=$(SIZE)<br> +SIZEK=$(SIZEK)<br> +STARSLEFT=$(STARSLEFT)<br> +STARSRIGHT=$(STARSRIGHT)<br> +TITLE=$(TITLE)<br> +URL=$(URL)<br> diff --git a/debian/htdig/htdig-3.2.0b6/test/conf/htdig.conf.in b/debian/htdig/htdig-3.2.0b6/test/conf/htdig.conf.in new file mode 100644 index 00000000..5fe2be9d --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/conf/htdig.conf.in @@ -0,0 +1,181 @@ +# +# Example config file for ht://Dig. +# +# This configuration file is used by all the programs that make up ht://Dig. +# Please refer to the attribute reference manual for more details on what +# can be put into this file. (http://www.htdig.org/confindex.html) +# Note that most attributes have very reasonable default values so you +# really only have to add attributes here if you want to change the defaults. +# +# What follows are some of the common attributes you might want to change. +# + +# Specifies the directory for files that will or can be +# shared among different search databases. The default +# value for this attribute is defined at compile time. +common_dir: _SRCDIR_/../installdir + +# +# Specify where the database files need to go. Make sure that there is +# plenty of free disk space available for the databases. They can get +# pretty big. +# +database_dir: _TESTDIR_/var/htdig + +# +# This specifies the URL where the robot (htdig) will start. You can specify +# multiple URLs here. Just separate them by some whitespace. +# The example here will cause the ht://Dig homepage and related pages to be +# indexed. +# You could also index all the URLs in a file like so: +# start_url: `${common_dir}/start.url` +# +start_url: http://localhost:_PORT_/set1/ + +# +# This attribute limits the scope of the indexing process. The default is to +# set it to the same as the start_url above. This way only pages that are on +# the sites specified in the start_url attribute will be indexed and it will +# reject any URLs that go outside of those sites. +# +# Keep in mind that the value for this attribute is just a list of string +# patterns. As long as URLs contain at least one of the patterns it will be +# seen as part of the scope of the index. +# +limit_urls_to: ${start_url} + +# +# If there are particular pages that you definately do NOT want to index, you +# can use the exclude_urls attribute. The value is a list of string patterns. +# If a URL matches any of the patterns, it will NOT be indexed. This is +# useful to exclude things like virtual web trees or database accesses. By +# default, all CGI URLs will be excluded. (Note that the /cgi-bin/ convention +# may not work on your web server. Check the path prefix used on your web +# server.) +# +exclude_urls: /cgi-bin/ .cgi + +# +# The string htdig will send in every request to identify the robot. Change +# this to your email address. +# +maintainer: _USER_ + +# +# The excerpts that are displayed in long results rely on stored information +# in the index databases. The compiled default only stores 512 characters of +# text from each document (this excludes any HTML markup...) If you plan on +# using the excerpts you probably want to make this larger. The only concern +# here is that more disk space is going to be needed to store the additional +# information. Since disk space is cheap (! :-)) you might want to set this +# to a value so that a large percentage of the documents that you are going +# to be indexing are stored completely in the database. At SDSU we found +# that by setting this value to about 50k the index would get 97% of all +# documents completely and only 3% was cut off at 50k. You probably want to +# experiment with this value. +# Note that if you want to set this value low, you probably want to set the +# excerpt_show_top attribute to false so that the top excerpt_length characters +# of the document are always shown. +# +max_head_length: 100000 + +# +# To limit network connections, ht://Dig will only pull up to a certain limit +# of bytes. This prevents the indexing from dying because the server keeps +# sending information. However, several FAQs happen because people have files +# bigger than the default limit of 100KB. This sets the default a bit higher. +# (see <http://www.htdig.org/FAQ.html> for more) +# +max_doc_size: 200000 + +# This sets the maximum length of words that will be +# indexed. Words longer than this value will be silently +# truncated when put into the index, or searched in the +# index. +maximum_word_length: 50 + +# +# Most people expect some sort of excerpt in results. By default, if the +# search words aren't found in context in the stored excerpt, htsearch shows +# the text defined in the no_excerpt_text attribute: +# (None of the search words were found in the top of this document.) +# This attribute instead will show the top of the excerpt. +# +no_excerpt_show_top: true + +# +# Depending on your needs, you might want to enable some of the fuzzy search +# algorithms. There are several to choose from and you can use them in any +# combination you feel comfortable with. Each algorithm will get a weight +# assigned to it so that in combinations of algorithms, certain algorithms get +# preference over others. Note that the weights only affect the ranking of +# the results, not the actual searching. +# The available algorithms are: +# exact +# endings +# metaphone +# prefix +# regex +# soundex +# synonyms +# By default only the "exact" algorithm is used with weight 1. +# Note that if you are going to use the endings, metaphone, soundex, +# or synonyms algorithms, you will need to run htfuzzy to generate +# the databases they use. +# +search_algorithm: exact:1 + +# +# The following are the templates used in the builtin search results +# The default is to use compiled versions of these files, which produces +# slightly faster results. However, uncommenting these lines makes it +# very easy to change the format of search results. +# See <http://www.htdig.org/hts_templates.html for more details. +# +# template_map: Long long ${common_dir}/long.html \ +# Short short ${common_dir}/short.html +# template_name: long + +# +# Enable extended features of WordList +# +wordlist_extend: true + +# +# The following are used to change the text for the page index. +# The defaults are just boring text numbers. These images spice +# up the result pages quite a bit. (Feel free to do whatever, though) +# +next_page_text: <img src=/htdig/buttonr.gif border=0 align=middle width=30 height=30 alt=next> +no_next_page_text: +prev_page_text: <img src=/htdig/buttonl.gif border=0 align=middle width=30 height=30 alt=prev> +no_prev_page_text: +page_number_text: "<img src=/htdig/button1.gif border=0 align=middle width=30 height=30 alt=1>" \ + "<img src=/htdig/button2.gif border=0 align=middle width=30 height=30 alt=2>" \ + "<img src=/htdig/button3.gif border=0 align=middle width=30 height=30 alt=3>" \ + "<img src=/htdig/button4.gif border=0 align=middle width=30 height=30 alt=4>" \ + "<img src=/htdig/button5.gif border=0 align=middle width=30 height=30 alt=5>" \ + "<img src=/htdig/button6.gif border=0 align=middle width=30 height=30 alt=6>" \ + "<img src=/htdig/button7.gif border=0 align=middle width=30 height=30 alt=7>" \ + "<img src=/htdig/button8.gif border=0 align=middle width=30 height=30 alt=8>" \ + "<img src=/htdig/button9.gif border=0 align=middle width=30 height=30 alt=9>" \ + "<img src=/htdig/button10.gif border=0 align=middle width=30 height=30 alt=10>" +# +# To make the current page stand out, we will put a border arround the +# image for that page. +# +no_page_number_text: "<img src=/htdig/button1.gif border=2 align=middle width=30 height=30 alt=1>" \ + "<img src=/htdig/button2.gif border=2 align=middle width=30 height=30 alt=2>" \ + "<img src=/htdig/button3.gif border=2 align=middle width=30 height=30 alt=3>" \ + "<img src=/htdig/button4.gif border=2 align=middle width=30 height=30 alt=4>" \ + "<img src=/htdig/button5.gif border=2 align=middle width=30 height=30 alt=5>" \ + "<img src=/htdig/button6.gif border=2 align=middle width=30 height=30 alt=6>" \ + "<img src=/htdig/button7.gif border=2 align=middle width=30 height=30 alt=7>" \ + "<img src=/htdig/button8.gif border=2 align=middle width=30 height=30 alt=8>" \ + "<img src=/htdig/button9.gif border=2 align=middle width=30 height=30 alt=9>" \ + "<img src=/htdig/button10.gif border=2 align=middle width=30 height=30 alt=10>" + +# local variables: +# mode: text +# eval: (if (eq window-system 'x) (progn (setq font-lock-keywords (list '("^#.*" . font-lock-keyword-face) '("^[a-zA-Z][^ :]+" . font-lock-function-name-face) '("[+$]*:" . font-lock-comment-face) )) (font-lock-mode))) +# end: diff --git a/debian/htdig/htdig-3.2.0b6/test/conf/htdig.conf2.in b/debian/htdig/htdig-3.2.0b6/test/conf/htdig.conf2.in new file mode 100644 index 00000000..370508e9 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/conf/htdig.conf2.in @@ -0,0 +1,189 @@ +# +# Example config file for ht://Dig. +# +# This configuration file is used by all the programs that make up ht://Dig. +# Please refer to the attribute reference manual for more details on what +# can be put into this file. (http://www.htdig.org/confindex.html) +# Note that most attributes have very reasonable default values so you +# really only have to add attributes here if you want to change the defaults. +# +# What follows are some of the common attributes you might want to change. +# + +# Specifies the directory for files that will or can be +# shared among different search databases. The default +# value for this attribute is defined at compile time. +common_dir: _SRCDIR_/../installdir + +# +# Specify where the database files need to go. Make sure that there is +# plenty of free disk space available for the databases. They can get +# pretty big. +# +database_dir: _TESTDIR_/var/htdig2 + +# +# This specifies the URL where the robot (htdig) will start. You can specify +# multiple URLs here. Just separate them by some whitespace. +# The example here will cause the ht://Dig homepage and related pages to be +# indexed. +# You could also index all the URLs in a file like so: +# start_url: `${common_dir}/start.url` +# +start_url: http://localhost:_PORT_/set1/ +local_urls: http://localhost:_PORT_/=_TESTDIR_/htdocs/ +bad_local_extensions: .php .shtml .htm + +# +# This attribute limits the scope of the indexing process. The default is to +# set it to the same as the start_url above. This way only pages that are on +# the sites specified in the start_url attribute will be indexed and it will +# reject any URLs that go outside of those sites. +# +# Keep in mind that the value for this attribute is just a list of string +# patterns. As long as URLs contain at least one of the patterns it will be +# seen as part of the scope of the index. +# +limit_urls_to: ${start_url} + +# +# If there are particular pages that you definately do NOT want to index, you +# can use the exclude_urls attribute. The value is a list of string patterns. +# If a URL matches any of the patterns, it will NOT be indexed. This is +# useful to exclude things like virtual web trees or database accesses. By +# default, all CGI URLs will be excluded. (Note that the /cgi-bin/ convention +# may not work on your web server. Check the path prefix used on your web +# server.) +# +exclude_urls: /cgi-bin/ .cgi /CVS + +# +# The string htdig will send in every request to identify the robot. Change +# this to your email address. +# +maintainer: _USER_ + +# +# The excerpts that are displayed in long results rely on stored information +# in the index databases. The compiled default only stores 512 characters of +# text from each document (this excludes any HTML markup...) If you plan on +# using the excerpts you probably want to make this larger. The only concern +# here is that more disk space is going to be needed to store the additional +# information. Since disk space is cheap (! :-)) you might want to set this +# to a value so that a large percentage of the documents that you are going +# to be indexing are stored completely in the database. At SDSU we found +# that by setting this value to about 50k the index would get 97% of all +# documents completely and only 3% was cut off at 50k. You probably want to +# experiment with this value. +# Note that if you want to set this value low, you probably want to set the +# excerpt_show_top attribute to false so that the top excerpt_length characters +# of the document are always shown. +# +max_head_length: 10000 + +# +# To limit network connections, ht://Dig will only pull up to a certain limit +# of bytes. This prevents the indexing from dying because the server keeps +# sending information. However, several FAQs happen because people have files +# bigger than the default limit of 100KB. This sets the default a bit higher. +# (see <http://www.htdig.org/FAQ.html> for more) +# +max_doc_size: 200000 + +# This sets the maximum length of words that will be +# indexed. Words longer than this value will be silently +# truncated when put into the index, or searched in the +# index. +maximum_word_length: 50 + +# +# Most people expect some sort of excerpt in results. By default, if the +# search words aren't found in context in the stored excerpt, htsearch shows +# the text defined in the no_excerpt_text attribute: +# (None of the search words were found in the top of this document.) +# This attribute instead will show the top of the excerpt. +# +no_excerpt_show_top: true + +# +# Depending on your needs, you might want to enable some of the fuzzy search +# algorithms. There are several to choose from and you can use them in any +# combination you feel comfortable with. Each algorithm will get a weight +# assigned to it so that in combinations of algorithms, certain algorithms get +# preference over others. Note that the weights only affect the ranking of +# the results, not the actual searching. +# The available algorithms are: +# exact +# endings +# metaphone +# prefix +# regex +# soundex +# synonyms +# By default only the "exact" algorithm is used with weight 1. +# Note that if you are going to use the endings, metaphone, soundex, +# or synonyms algorithms, you will need to run htfuzzy to generate +# the databases they use. +# +search_algorithm: exact:1 + +# +# The following are the templates used in the builtin search results +# The default is to use compiled versions of these files, which produces +# slightly faster results. However, uncommenting these lines makes it +# very easy to change the format of search results. +# See <http://www.htdig.org/hts_templates.html for more details. +# +# template_map: Long long ${common_dir}/long.html \ +# Short short ${common_dir}/short.html +# template_name: long + +# +# Enable extended features of WordList +# +wordlist_extend: true + +# +# The following are used to change the text for the page index. +# The defaults are just boring text numbers. These images spice +# up the result pages quite a bit. (Feel free to do whatever, though) +# +next_page_text: <img src=/htdig/buttonr.gif border=0 align=middle width=30 height=30 alt=next> +no_next_page_text: +prev_page_text: <img src=/htdig/buttonl.gif border=0 align=middle width=30 height=30 alt=prev> +no_prev_page_text: +page_number_text: "<img src=/htdig/button1.gif border=0 align=middle width=30 height=30 alt=1>" \ + "<img src=/htdig/button2.gif border=0 align=middle width=30 height=30 alt=2>" \ + "<img src=/htdig/button3.gif border=0 align=middle width=30 height=30 alt=3>" \ + "<img src=/htdig/button4.gif border=0 align=middle width=30 height=30 alt=4>" \ + "<img src=/htdig/button5.gif border=0 align=middle width=30 height=30 alt=5>" \ + "<img src=/htdig/button6.gif border=0 align=middle width=30 height=30 alt=6>" \ + "<img src=/htdig/button7.gif border=0 align=middle width=30 height=30 alt=7>" \ + "<img src=/htdig/button8.gif border=0 align=middle width=30 height=30 alt=8>" \ + "<img src=/htdig/button9.gif border=0 align=middle width=30 height=30 alt=9>" \ + "<img src=/htdig/button10.gif border=0 align=middle width=30 height=30 alt=10>" +# +# To make the current page stand out, we will put a border arround the +# image for that page. +# +no_page_number_text: "<img src=/htdig/button1.gif border=2 align=middle width=30 height=30 alt=1>" \ + "<img src=/htdig/button2.gif border=2 align=middle width=30 height=30 alt=2>" \ + "<img src=/htdig/button3.gif border=2 align=middle width=30 height=30 alt=3>" \ + "<img src=/htdig/button4.gif border=2 align=middle width=30 height=30 alt=4>" \ + "<img src=/htdig/button5.gif border=2 align=middle width=30 height=30 alt=5>" \ + "<img src=/htdig/button6.gif border=2 align=middle width=30 height=30 alt=6>" \ + "<img src=/htdig/button7.gif border=2 align=middle width=30 height=30 alt=7>" \ + "<img src=/htdig/button8.gif border=2 align=middle width=30 height=30 alt=8>" \ + "<img src=/htdig/button9.gif border=2 align=middle width=30 height=30 alt=9>" \ + "<img src=/htdig/button10.gif border=2 align=middle width=30 height=30 alt=10>" + +# local variables: +# mode: text +# eval: (if (eq window-system 'x) (progn (setq font-lock-keywords (list '("^#.*" . font-lock-keyword-face) '("^[a-zA-Z][^ :]+" . font-lock-function-name-face) '("[+$]*:" . font-lock-comment-face) )) (font-lock-mode))) +# end: + +# +# If file's extension isn't known, *don't* classify by contents +# (Ideally, should install HtFileType{,-magic} in .../test/ ) +# +content_classifier: diff --git a/debian/htdig/htdig-3.2.0b6/test/conf/htdig.conf3.in b/debian/htdig/htdig-3.2.0b6/test/conf/htdig.conf3.in new file mode 100644 index 00000000..75151b8d --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/conf/htdig.conf3.in @@ -0,0 +1,193 @@ +# +# Example config file for ht://Dig. +# +# This configuration file is used by all the programs that make up ht://Dig. +# Please refer to the attribute reference manual for more details on what +# can be put into this file. (http://www.htdig.org/confindex.html) +# Note that most attributes have very reasonable default values so you +# really only have to add attributes here if you want to change the defaults. +# +# What follows are some of the common attributes you might want to change. +# + +# Specifies the directory for files that will or can be +# shared among different search databases. The default +# value for this attribute is defined at compile time. +common_dir: _SRCDIR_/../installdir + +# +# Specify where the database files need to go. Make sure that there is +# plenty of free disk space available for the databases. They can get +# pretty big. +# +database_dir: _TESTDIR_/var/htdig + +# +# This specifies the URL where the robot (htdig) will start. You can specify +# multiple URLs here. Just separate them by some whitespace. +# The example here will cause the ht://Dig homepage and related pages to be +# indexed. +# You could also index all the URLs in a file like so: +# start_url: `${common_dir}/start.url` +# +start_url: http://localhost:_PORT_/set1/ +local_urls: http://localhost:_PORT_/=_TESTDIR_/htdocs/ + +# +# This attribute limits the scope of the indexing process. The default is to +# set it to the same as the start_url above. This way only pages that are on +# the sites specified in the start_url attribute will be indexed and it will +# reject any URLs that go outside of those sites. +# +# Keep in mind that the value for this attribute is just a list of string +# patterns. As long as URLs contain at least one of the patterns it will be +# seen as part of the scope of the index. +# +limit_urls_to: ${start_url} + +# +# If there are particular pages that you definately do NOT want to index, you +# can use the exclude_urls attribute. The value is a list of string patterns. +# If a URL matches any of the patterns, it will NOT be indexed. This is +# useful to exclude things like virtual web trees or database accesses. By +# default, all CGI URLs will be excluded. (Note that the /cgi-bin/ convention +# may not work on your web server. Check the path prefix used on your web +# server.) +# +exclude_urls: /cgi-bin/ .cgi + +# +# The string htdig will send in every request to identify the robot. Change +# this to your email address. +# +maintainer: _USER_ + +# +# The excerpts that are displayed in long results rely on stored information +# in the index databases. The compiled default only stores 512 characters of +# text from each document (this excludes any HTML markup...) If you plan on +# using the excerpts you probably want to make this larger. The only concern +# here is that more disk space is going to be needed to store the additional +# information. Since disk space is cheap (! :-)) you might want to set this +# to a value so that a large percentage of the documents that you are going +# to be indexing are stored completely in the database. At SDSU we found +# that by setting this value to about 50k the index would get 97% of all +# documents completely and only 3% was cut off at 50k. You probably want to +# experiment with this value. +# Note that if you want to set this value low, you probably want to set the +# excerpt_show_top attribute to false so that the top excerpt_length characters +# of the document are always shown. +# +max_head_length: 10000 + +# +# To limit network connections, ht://Dig will only pull up to a certain limit +# of bytes. This prevents the indexing from dying because the server keeps +# sending information. However, several FAQs happen because people have files +# bigger than the default limit of 100KB. This sets the default a bit higher. +# (see <http://www.htdig.org/FAQ.html> for more) +# +max_doc_size: 200000 + +# This sets the maximum length of words that will be +# indexed. Words longer than this value will be silently +# truncated when put into the index, or searched in the +# index. +maximum_word_length: 50 + +# +# Most people expect some sort of excerpt in results. By default, if the +# search words aren't found in context in the stored excerpt, htsearch shows +# the text defined in the no_excerpt_text attribute: +# (None of the search words were found in the top of this document.) +# This attribute instead will show the top of the excerpt. +# +no_excerpt_show_top: true + +# +# Depending on your needs, you might want to enable some of the fuzzy search +# algorithms. There are several to choose from and you can use them in any +# combination you feel comfortable with. Each algorithm will get a weight +# assigned to it so that in combinations of algorithms, certain algorithms get +# preference over others. Note that the weights only affect the ranking of +# the results, not the actual searching. +# The available algorithms are: +# exact +# endings +# metaphone +# prefix +# regex +# soundex +# synonyms +# By default only the "exact" algorithm is used with weight 1. +# Note that if you are going to use the endings, metaphone, soundex, +# or synonyms algorithms, you will need to run htfuzzy to generate +# the databases they use. +# +search_algorithm: exact:1 + +# +# The following are the templates used in the builtin search results +# The default is to use compiled versions of these files, which produces +# slightly faster results. However, uncommenting these lines makes it +# very easy to change the format of search results. +# See <http://www.htdig.org/hts_templates.html for more details. +# +# template_map: Long long ${common_dir}/long.html \ +# Short short ${common_dir}/short.html +# template_name: long + +# +# Enable extended features of WordList +# +wordlist_extend: true + +# +# The following are used to change the text for the page index. +# The defaults are just boring text numbers. These images spice +# up the result pages quite a bit. (Feel free to do whatever, though) +# +next_page_text: <img src=/htdig/buttonr.gif border=0 align=middle width=30 height=30 alt=next> +no_next_page_text: +prev_page_text: <img src=/htdig/buttonl.gif border=0 align=middle width=30 height=30 alt=prev> +no_prev_page_text: +page_number_text: "<img src=/htdig/button1.gif border=0 align=middle width=30 height=30 alt=1>" \ + "<img src=/htdig/button2.gif border=0 align=middle width=30 height=30 alt=2>" \ + "<img src=/htdig/button3.gif border=0 align=middle width=30 height=30 alt=3>" \ + "<img src=/htdig/button4.gif border=0 align=middle width=30 height=30 alt=4>" \ + "<img src=/htdig/button5.gif border=0 align=middle width=30 height=30 alt=5>" \ + "<img src=/htdig/button6.gif border=0 align=middle width=30 height=30 alt=6>" \ + "<img src=/htdig/button7.gif border=0 align=middle width=30 height=30 alt=7>" \ + "<img src=/htdig/button8.gif border=0 align=middle width=30 height=30 alt=8>" \ + "<img src=/htdig/button9.gif border=0 align=middle width=30 height=30 alt=9>" \ + "<img src=/htdig/button10.gif border=0 align=middle width=30 height=30 alt=10>" +# +# To make the current page stand out, we will put a border arround the +# image for that page. +# +no_page_number_text: "<img src=/htdig/button1.gif border=2 align=middle width=30 height=30 alt=1>" \ + "<img src=/htdig/button2.gif border=2 align=middle width=30 height=30 alt=2>" \ + "<img src=/htdig/button3.gif border=2 align=middle width=30 height=30 alt=3>" \ + "<img src=/htdig/button4.gif border=2 align=middle width=30 height=30 alt=4>" \ + "<img src=/htdig/button5.gif border=2 align=middle width=30 height=30 alt=5>" \ + "<img src=/htdig/button6.gif border=2 align=middle width=30 height=30 alt=6>" \ + "<img src=/htdig/button7.gif border=2 align=middle width=30 height=30 alt=7>" \ + "<img src=/htdig/button8.gif border=2 align=middle width=30 height=30 alt=8>" \ + "<img src=/htdig/button9.gif border=2 align=middle width=30 height=30 alt=9>" \ + "<img src=/htdig/button10.gif border=2 align=middle width=30 height=30 alt=10>" + +# local variables: +# mode: text +# eval: (if (eq window-system 'x) (progn (setq font-lock-keywords (list '("^#.*" . font-lock-keyword-face) '("^[a-zA-Z][^ :]+" . font-lock-function-name-face) '("[+$]*:" . font-lock-comment-face) )) (font-lock-mode))) +# end: + +boolean_keywords: et ou non + +boolean_syntax_errors: Attendait "un mot" "à la fin" \ + "au lieu de" "fin d'expression" "points de quotation" + +search_rewrite_rules: (.*)site(.*) \\1place\\2 + +multimatch_factor: 2 + + diff --git a/debian/htdig/htdig-3.2.0b6/test/conf/httpd.conf.in b/debian/htdig/htdig-3.2.0b6/test/conf/httpd.conf.in new file mode 100644 index 00000000..744af823 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/conf/httpd.conf.in @@ -0,0 +1,215 @@ +# This is the main server configuration file. See URL http://www.apache.org/ +# for instructions. + +# Do NOT simply read the instructions in here without understanding +# what they do, if you are unsure consult the online docs. You have been +# warned. + +# Originally by Rob McCool + +<IfModule mod_so.c> +#LoadModule mmap_static_module _MODULES_/mod_mmap_static.so +LoadModule env_module _MODULES_/mod_env.so +LoadModule config_log_module _MODULES_/mod_log_config.so +#LoadModule agent_log_module _MODULES_/mod_log_agent.so +#LoadModule referer_log_module _MODULES_/mod_log_referer.so +#LoadModule mime_magic_module _MODULES_/mod_mime_magic.so +LoadModule mime_module _MODULES_/mod_mime.so +LoadModule negotiation_module _MODULES_/mod_negotiation.so +LoadModule status_module _MODULES_/mod_status.so +LoadModule info_module _MODULES_/mod_info.so +LoadModule includes_module _MODULES_/mod_include.so +LoadModule autoindex_module _MODULES_/mod_autoindex.so +LoadModule dir_module _MODULES_/mod_dir.so +LoadModule cgi_module _MODULES_/mod_cgi.so +LoadModule asis_module _MODULES_/mod_asis.so +LoadModule imap_module _MODULES_/mod_imap.so +LoadModule action_module _MODULES_/mod_actions.so +#LoadModule speling_module _MODULES_/mod_speling.so +LoadModule userdir_module _MODULES_/mod_userdir.so +LoadModule proxy_module _MODULES_/libproxy.so +LoadModule alias_module _MODULES_/mod_alias.so +#LoadModule rewrite_module _MODULES_/mod_rewrite.so +LoadModule access_module _MODULES_/mod_access.so +LoadModule auth_module _MODULES_/mod_auth.so +LoadModule anon_auth_module _MODULES_/mod_auth_anon.so +#LoadModule dbm_auth_module _MODULES_/mod_auth_dbm.so +#LoadModule db_auth_module _MODULES_/mod_auth_db.so +LoadModule digest_module _MODULES_/mod_digest.so +#LoadModule cern_meta_module _MODULES_/mod_cern_meta.so +LoadModule expires_module _MODULES_/mod_expires.so +LoadModule headers_module _MODULES_/mod_headers.so +LoadModule usertrack_module _MODULES_/mod_usertrack.so +#LoadModule example_module _MODULES_/mod_example.so +#LoadModule unique_id_module _MODULES_/mod_unique_id.so +LoadModule setenvif_module _MODULES_/mod_setenvif.so + +# Extra Modules +#LoadModule php_module _MODULES_/mod_php.so +#LoadModule php3_module _MODULES_/libphp3.so +#LoadModule perl_module _MODULES_/libperl.so + +# Reconstruction of the complete module list from all available modules +# (static and shared ones) to achieve correct module execution order. +# [WHENEVER YOU CHANGE THE LOADMODULE SECTION ABOVE UPDATE THIS, TOO] +ClearModuleList +#AddModule mod_mmap_static.c +AddModule mod_env.c +AddModule mod_log_config.c +#AddModule mod_log_agent.c +#AddModule mod_log_referer.c +#AddModule mod_mime_magic.c +AddModule mod_mime.c +AddModule mod_negotiation.c +AddModule mod_status.c +AddModule mod_info.c +AddModule mod_include.c +AddModule mod_autoindex.c +AddModule mod_dir.c +AddModule mod_cgi.c +AddModule mod_asis.c +AddModule mod_imap.c +AddModule mod_actions.c +#AddModule mod_speling.c +AddModule mod_userdir.c +AddModule mod_proxy.c +AddModule mod_alias.c +#AddModule mod_rewrite.c +AddModule mod_access.c +AddModule mod_auth.c +AddModule mod_auth_anon.c +#AddModule mod_auth_dbm.c +#AddModule mod_auth_db.c +AddModule mod_digest.c +#AddModule mod_cern_meta.c +AddModule mod_expires.c +AddModule mod_headers.c +AddModule mod_usertrack.c +#AddModule mod_example.c +#AddModule mod_unique_id.c +AddModule mod_so.c +AddModule mod_setenvif.c +</IfModule> + +# ServerType is either inetd, or standalone. + +ServerType standalone + +# If you are running from inetd, go to "ServerAdmin". + +# Port: The port the standalone listens to. For ports < 1023, you will +# need httpd to be run as root initially. + +Port _PORT_ + +# If you wish httpd to run as a different user or group, you must run +# httpd as root initially and it will switch. + +# User/Group: The name (or #number) of the user/group to run httpd as. + +User _USER_ + +# ServerAdmin: Your address, where problems with the server should be +# e-mailed. + +ServerAdmin _USER_ + +ResourceConfig _TESTDIR_/conf/srm.conf +AccessConfig _TESTDIR_/conf/access.conf +LockFile _TESTDIR_/logs/accept.lock + +# BindAddress: You can support virtual hosts with this option. This option +# is used to tell the server which IP address to listen to. It can either +# contain "*", an IP address, or a fully qualified Internet domain name. +# See also the VirtualHost directive. + +BindAddress * + +# ErrorLog: The location of the error log file. If this does not start +# with /, ServerRoot is prepended to it. + +ErrorLog _TESTDIR_/logs/error_log + +# TransferLog: The location of the transfer log file. If this does not +# start with /, ServerRoot is prepended to it. + +TransferLog _TESTDIR_/logs/access_log + +# PidFile: The file the server should log its pid to +PidFile _TESTDIR_/logs/httpd.pid + +# ServerName allows you to set a host name which is sent back to clients for +# your server if it's different than the one the program would get (i.e. use +# "www" instead of the host's real name). +# +# Note: You cannot just invent host names and hope they work. The name you +# define here must be a valid DNS name for your host. If you don't understand +# this, ask your network administrator. + +ServerName localhost + +# CacheNegotiatedDocs: By default, Apache sends Pragma: no-cache with each +# document that was negotiated on the basis of content. This asks proxy +# servers not to cache the document. Uncommenting the following line disables +# this behavior, and proxies will be allowed to cache the documents. + +# CacheNegotiatedDocs + +# Timeout: The number of seconds before receives and sends time out +# n.b. the compiled default is 1200 (20 minutes !) + +Timeout 400 + +# Server-pool size regulation. Rather than making you guess how many +# server processes you need, Apache dynamically adapts to the load it +# sees --- that is, it tries to maintain enough server processes to +# handle the current load, plus a few spare servers to handle transient +# load spikes (e.g., multiple simultaneous requests from a single +# Netscape browser). + +# It does this by periodically checking how many servers are waiting +# for a request. If there are fewer than MinSpareServers, it creates +# a new spare. If there are more than MaxSpareServers, some of the +# spares die off. These values are probably OK for most sites --- + +MinSpareServers 2 +MaxSpareServers 4 + +# Number of servers to start --- should be a reasonable ballpark figure. + +StartServers 2 + +# Limit on total number of servers running, i.e., limit on the number +# of clients who can simultaneously connect --- if this limit is ever +# reached, clients will be LOCKED OUT, so it should NOT BE SET TOO LOW. +# It is intended mainly as a brake to keep a runaway server from taking +# Unix with it as it spirals down... + +MaxClients 30 + +# MaxRequestsPerChild: the number of requests each child process is +# allowed to process before the child dies. +# The child will exit so as to avoid problems after prolonged use when +# Apache (and maybe the libraries it uses) leak. On most systems, this +# isn't really needed, but a few (such as Solaris) do have notable leaks +# in the libraries. + +MaxRequestsPerChild 30 + +#CookieExpires 10 + +# VirtualHost: Allows the daemon to respond to requests for more than one +# server address, if your server machine is configured to accept IP packets +# for multiple addresses. This can be accomplished with the ifconfig +# alias flag, or through kernel patches like VIF. + +# Any httpd.conf or srm.conf directive may go into a VirtualHost command. +# See alto the BindAddress entry. + +#<VirtualHost host.foo.com> +#ServerAdmin [email protected] +#DocumentRoot /www/docs/host.foo.com +#ServerName host.foo.com +#ErrorLog logs/host.foo.com-error_log +#TransferLog logs/host.foo.com-access_log +#</VirtualHost> diff --git a/debian/htdig/htdig-3.2.0b6/test/conf/main-template b/debian/htdig/htdig-3.2.0b6/test/conf/main-template new file mode 100644 index 00000000..2820ad49 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/conf/main-template @@ -0,0 +1,37 @@ +<html> +CGI =${CGI}- <br> +CONFIG =${CONFIG}- <br> +EXCLUDE =${EXCLUDE}- <br> +FIRSTDISPLAYED =${FIRSTDISPLAYED}- <br> +FORMAT =${FORMAT}- <br> +KEYWORDS =${KEYWORDS}- <br> +LASTDISPLAYED =${LASTDISPLAYED}- <br> +LOGICAL_WORDS =${LOGICAL_WORDS}- <br> +MATCHES =${MATCHES}- <br> +MATCHES_PER_PAGE=${MATCHES_PER_PAGE}- <br> +MATCH_MESSAGE =${MATCH_MESSAGE}- <br> +MAX_STARS =${MAX_STARS}- <br> +METADESCRIPTION =${METADESCRIPTION}- <br> +METHOD =${METHOD}- <br> +MODIFIED =${MODIFIED}- <br> +NEXTPAGE =${NEXTPAGE}- <br> +NSTARS =${NSTARS}- <br> +PAGE =${PAGE}- <br> +PAGEHEADER =${PAGEHEADER}- <br> +PAGELIST =${PAGELIST}- <br> +PAGES =${PAGES}- <br> +PLURAL_MATCHES =${PLURAL_MATCHES}- <br> +PREVPAGE =${PREVPAGE}- <br> +RESTRICT =${RESTRICT}- <br> +SELECTED_FORMAT =${SELECTED_FORMAT}- <br> +SELECTED_METHOD =${SELECTED_METHOD}- <br> +SELECTED_SORT =${SELECTED_SORT}- <br> +SORT =${SORT}- <br> +starting date =${STARTYEAR}/${STARTMONTH}/${STARTDAY}-<br> +ending date =${ENDYEAR}/${ENDMONTH}/${ENDDAY}-<br> +VERSION =${VERSION}- <br> +WORDS =${WORDS}- <br> +MATCH_LIST =${MATCH_LIST} <br> + +$(HTSEARCH_RESULTS) +</html> diff --git a/debian/htdig/htdig-3.2.0b6/test/conf/mime.types.in b/debian/htdig/htdig-3.2.0b6/test/conf/mime.types.in new file mode 100644 index 00000000..8d317869 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/conf/mime.types.in @@ -0,0 +1,83 @@ +# This is a comment. I love comments. + + +application/activemessage +application/andrew-inset +application/applefile +application/atomicmail +application/dca-rft +application/dec-dx +application/mac-binhex40 +application/macwriteii +application/msword +application/news-message-id +application/news-transmission +application/octet-stream bin +application/oda oda +application/pdf pdf +application/msword doc +application/postscript ai eps ps +application/remote-printing +application/rtf rtf +application/slate +application/x-mif mif +application/wita +application/wordperfect5.1 +application/x-csh csh +application/x-dvi dvi +application/x-hdf hdf +application/x-latex latex +application/x-netcdf nc cdf +application/x-sh sh +application/x-tcl tcl +application/x-tex tex +application/x-texinfo texinfo texi +application/x-troff t tr roff +application/x-troff-man man +application/x-troff-me me +application/x-troff-ms ms +application/x-wais-source src +application/zip zip +application/x-bcpio bcpio +application/x-cpio cpio +application/x-gtar gtar +application/x-shar shar +application/x-sv4cpio sv4cpio +application/x-sv4crc sv4crc +application/x-tar tar +application/x-ustar ustar +audio/basic au snd +audio/x-aiff aif aiff aifc +audio/x-wav wav +image/gif gif +image/ief ief +image/jpeg jpeg jpg jpe +image/tiff tiff tif +image/x-cmu-raster ras +image/x-portable-anymap pnm +image/x-portable-bitmap pbm +image/x-portable-graymap pgm +image/x-portable-pixmap ppm +image/x-rgb rgb +image/x-xbitmap xbm +image/x-xpixmap xpm +image/x-xwindowdump xwd +message/external-body +message/news +message/partial +message/rfc822 +multipart/alternative +multipart/appledouble +multipart/digest +multipart/mixed +multipart/parallel +text/html html htm +text/plain txt +text/richtext rtx +text/tab-separated-values tsv +text/x-setext etx +video/mpeg mpeg mpg mpe +video/quicktime qt mov +video/x-msvideo avi +video/x-sgi-movie movie + diff --git a/debian/htdig/htdig-3.2.0b6/test/conf/srm.conf.in b/debian/htdig/htdig-3.2.0b6/test/conf/srm.conf.in new file mode 100644 index 00000000..2e92788c --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/conf/srm.conf.in @@ -0,0 +1,191 @@ +# With this document, you define the name space that users see of your http +# server. This file also defines server settings which affect how requests are +# serviced, and how results should be formatted. + +# See the tutorials at http://www.apache.org/ for +# more information. + +# Originally by Rob McCool; Adapted for Apache + + +# DocumentRoot: The directory out of which you will serve your +# documents. By default, all requests are taken from this directory, but +# symbolic links and aliases may be used to point to other locations. + +DocumentRoot _TESTDIR_/htdocs + +# UserDir: The name of the directory which is appended onto a user's home +# directory if a ~user request is recieved. + +UserDir public_html + +# DirectoryIndex: Name of the file or files to use as a pre-written HTML +# directory index. Separate multiple entries with spaces. + +DirectoryIndex index.html index.htm + +# FancyIndexing is whether you want fancy directory indexing or standard + +FancyIndexing off + +# AddIcon tells the server which icon to show for different files or filename +# extensions + +AddIconByEncoding (CMP,/icons/compressed.gif) x-compress x-gzip +AddIconByType (TXT,/icons/text.gif) text/* +AddIconByType (IMG,/icons/image2.gif) image/* +AddIconByType (SND,/icons/sound2.gif) audio/* +AddIconByType (VID,/icons/movie.gif) video/* +AddIcon /icons/text.gif .ps .shtml +AddIcon /icons/movie.gif .mpg .qt +AddIcon /icons/binary.gif .bin +AddIcon /icons/burst.gif .wrl +AddIcon /icons/binhex.gif .hqx .sit +AddIcon /icons/uu.gif .uu +AddIcon /icons/tar.gif .tar .tar +AddIcon /icons/back.gif .. +AddIcon /icons/dir.gif ^^DIRECTORY^^ +AddIcon /icons/blank.gif ^^BLANKICON^^ + +# DefaultIcon is which icon to show for files which do not have an icon +# explicitly set. + +DefaultIcon /icons/unknown.gif + +# AddDescription allows you to place a short description after a file in +# server-generated indexes. +# Format: AddDescription "description" filename + +# ReadmeName is the name of the README file the server will look for by +# default. Format: ReadmeName name +# +# The server will first look for name.html, include it if found, and it will +# then look for name and include it as plaintext if found. +# +# HeaderName is the name of a file which should be prepended to +# directory indexes. + +ReadmeName README +HeaderName HEADER + +# IndexIgnore is a set of filenames which directory indexing should ignore +# Format: IndexIgnore name1 name2... + +IndexIgnore */.??* *~ *# */HEADER* */README* */RCS */CVS + +# AccessFileName: The name of the file to look for in each directory +# for access control information. + +AccessFileName .htaccess + +# TypesConfig describes where the mime.types file (or equivalent) is +# to be found. + +TypesConfig _TESTDIR_/conf/mime.types + +# DefaultType is the default MIME type for documents which the server +# cannot find the type of from filename extensions. + +DefaultType text/plain + +# AddEncoding allows you to have certain browsers (Mosaic/X 2.1+) uncompress +# information on the fly. Note: Not all browsers support this. + +AddEncoding x-compress Z +AddEncoding x-gzip gz + +# AddLanguage allows you to specify the language of a document. You can +# then use content negotiation to give a browser a file in a language +# it can understand. Note that the suffix does not have to be the same +# as the language keyword --- those with documents in Polish (whose +# net-standard language code is pl) may wish to use "AddLanguage pl .po" +# to avoid the ambiguity with the common suffix for perl scripts. + +AddLanguage en .en +AddLanguage fr .fr +AddLanguage de .de +AddLanguage da .da +AddLanguage el .el +AddLanguage it .it + +# LanguagePriority allows you to give precedence to some languages +# in case of a tie during content negotiation. +# Just list the languages in decreasing order of preference. + +LanguagePriority en fr de + +# Redirect allows you to tell clients about documents which used to exist in +# your server's namespace, but do not anymore. This allows you to tell the +# clients where to look for the relocated document. +# Format: Redirect fakename url +Redirect /a http://localhost:7400/b +Redirect /b http://localhost:7400/c +Redirect /c http://localhost:7400/d +Redirect /d http://localhost:7400/e +Redirect /e http://localhost:7400/f +Redirect /f http://localhost:7400/g +Redirect /g http://localhost:7400/h +Redirect /h http://localhost:7400/i +Redirect /i http://localhost:7400/j +Redirect /j http://localhost:7400/k +Redirect /k http://localhost:7400/l +Redirect /l http://localhost:7400/m + +Redirect /outside http://somewhere/ + +# Aliases: Add here as many aliases as you need (with no limit). The format is +# Alias fakename realname + +Alias /icons/ _TESTDIR_/htdocs/icons/ + +# ScriptAlias: This controls which directories contain server scripts. +# Format: ScriptAlias fakename realname + +ScriptAlias /cgi-bin/ _TESTDIR_/cgi-bin/ + +# If you want to use server side includes, or CGI outside +# ScriptAliased directories, uncomment the following lines. + +# AddType allows you to tweak mime.types without actually editing it, or to +# make certain files to be certain types. +# Format: AddType type/subtype ext1 + +AddType foo/bar .foo +AddType text/x-server-parsed-html .shtml +#AddType application/x-httpd-cgi .cgi +#AddType application/x-httpd-cgi .pl +AddHandler cgi-script cgi + +# For server-side includes which will be treated as HTML3 +# for purposes of content negotiation, use + +#AddType text/x-server-parsed-html3 .shtml3 + +# Uncomment the following line to enable Apache's send-asis HTTP file +# feature + +#AddType httpd/send-as-is asis + +# To enable type maps, you might want to use + +#AddType application/x-type-map var + +# If you wish to use server-parsed imagemap files, use + +#AddType application/x-httpd-imap map + +# Customizable error response (Apache style) +# these come in three flavors +# +# 1) plain text +#ErrorDocument 500 "The server made a boo boo. +# n.b. the (") marks it as text, it does not get output +# +# 2) local redirects +#ErrorDocument 404 /missing.html +# to redirect to local url /missing.html +#ErrorDocument 404 /cgi-bin/missing_handler.pl +# n.b. can redirect to a script or a document using server-side-includes. +# +# 3) external redirects +#ErrorDocument 402 http://other.server.com/subscription_info.html diff --git a/debian/htdig/htdig-3.2.0b6/test/dbbench.cc b/debian/htdig/htdig-3.2.0b6/test/dbbench.cc new file mode 100644 index 00000000..ded4ee2a --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/dbbench.cc @@ -0,0 +1,830 @@ +// +// dbbench.cc +// +// dbbench: stress test the Berkeley DB database and WordList interface. +// +// Part of the ht://Dig package <http://www.htdig.org/> +// Copyright (c) 1999-2004 The ht://Dig Group +// For copyright details, see the file COPYING in your distribution +// or the GNU Library General Public License (LGPL) version 2 or later +// <http://www.gnu.org/copyleft/lgpl.html> +// +// $Id: dbbench.cc,v 1.13 2004/05/28 13:15:29 lha Exp $ +// + +#ifdef HAVE_CONFIG_H +#include <htconfig.h> +#endif /* HAVE_CONFIG_H */ + +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif /* HAVE_UNISTD_H */ +//#include <fcntl.h> // included later, as non __STDC__ may #define open +#include <errno.h> + +// If we have this, we probably want it. +#ifdef HAVE_GETOPT_H +#include <getopt.h> +#endif /* HAVE_GETOPT_H */ +#ifdef HAVE_MALLOC_H +#include <malloc.h> +#endif /* HAVE_MALLOC_H */ +#include <stdlib.h> + +/* AIX requires this to be the first thing in the file. */ +//#ifndef __GNUC__ // Why not if g++? Needed by g++ on Solaris 2.8 +# if HAVE_ALLOCA_H +# include <alloca.h> +# else +# ifdef _AIX +#pragma alloca +# else +# ifndef alloca /* predefined by HP cc +Olibcalls */ +char *alloca (); +# endif +# endif +# endif +//#endif + +#include <htString.h> +#include <WordList.h> +#include <WordContext.h> +#include <db.h> + +#include"HtTime.h" +#include"WordMonitor.h" + +#define RAND() ((unsigned int) (1000.0*rand()/(RAND_MAX+1.0))) +/* + * Store all options from the command line + */ +class params_t +{ +public: + char* wordsfile; + char* dbfile; + char* find; + int nwords; + int loop; + DBTYPE type; + int page_size; + int cache_size; + int multiply_keys; + int wordlist; + int compress; + int pool; + int compress_test; + int npage; + int uncompress; + int remove; + int count; + int monitor; + int random; + void show() + { + printf("wordsfile:: %s\n", wordsfile); + printf("dbfile:: %s\n", dbfile); + printf("find:: %s\n", find); + printf("nwords:: %d\n", nwords); + printf("loop:: %d\n", loop); + printf("page_size:: %d\n", page_size); + printf("cache_size:: %d\n", cache_size); + printf("multiply_keys:: %d\n", multiply_keys); + printf("wordlist:: %d\n", wordlist); + printf("compress:: %d\n", compress); + printf("pool:: %d\n", pool); + printf("compress_test:: %d\n", compress_test); + printf("npage:: %d\n", npage); + printf("uncompress:: %d\n", uncompress); + printf("remove:: %d\n", remove); + printf("count:: %d\n", count); + printf("monitor:: %d\n", monitor); + } +}; + +/* + * Explain options + */ +static void usage(); +/* + * Verbosity level set with -v (++) + */ +static int verbose = 0; + +// ***************************************************************** +// Test framework +// +class Dbase { +public: + Dbase(params_t* nparams) { params = nparams; } + virtual ~Dbase() {} + + virtual void dbinit() = 0; + void dobench(); + virtual void dbfinish() = 0; + + void fill(); + virtual void fill_one(String& line, int count) = 0; + virtual void find() = 0; + virtual void remove() = 0; + +protected: + params_t* params; +}; + +/* + * Run function according to user specfied options. + */ +void Dbase::dobench() +{ + dbinit(); + + if(params->find) { + find(); + } else if(params->remove) { + remove(); + } else { + fill(); + } + + dbfinish(); +} + +/* + * Generate a list of words from a file. + * Call the fill_one function for each generated word. + */ +void Dbase::fill() { +#define FILL_BUFFER_SIZE (50*1024) + char buffer[FILL_BUFFER_SIZE + 1]; + int count = params->count; + int words_count; + int i; + + fprintf(stderr, "Reading from %s ... ", params->wordsfile); + + for(i = 0; i < params->loop; i++) { + + FILE* in = fopen(params->wordsfile, "r"); + if(!in) { + fprintf(stderr, "cannot open %s for reading : ", params->wordsfile); + perror(""); + exit(1); + } + + words_count = 0; + + while(fgets(buffer, FILL_BUFFER_SIZE, in)) { + String line(buffer); + line.chop("\r\n"); + + for(int j = 0; j < params->multiply_keys; j++) { + fill_one(line, count); + count++; + } + words_count++; + if(params->nwords > 0 && params->nwords <= words_count) break; + } + + fclose(in); + } + + fprintf(stderr, "pushed %d words\n", count); +} + +// ***************************************************************** +// Test Berkeley DB alone +// +class Dsimple : public Dbase { +public: + Dsimple(params_t* nparams) : Dbase(nparams) { pad = 0; } + virtual ~Dsimple() { if(pad) free(pad); } + + virtual void dbinit(); + void dbinit_env(); + void dbopen(); + virtual void dbfinish(); + + virtual void fill_one(String& line, int count); + virtual void find(); + virtual void remove(); + + void dbput(const String& key, const String& data); + +protected: + DB_ENV* dbenv; + DB* db; + char* pad; +}; + +/* + * Comparison routine for the <int>string keys. + */ +static int +int_cmp(const DBT *a, const DBT *b) +{ + // First compare word + size_t len = (a->size > b->size ? b->size : a->size) - (sizeof(unsigned short) + sizeof(int)); + u_int8_t *p1, *p2; + + for (p1 = (u_int8_t*)a->data + sizeof(unsigned short) + sizeof(int), p2 = (u_int8_t*)b->data + sizeof(unsigned short) + sizeof(int); len--; ++p1, ++p2) + if (*p1 != *p2) + return ((long)*p1 - (long)*p2); + + // + // If words compare equal, compare numbers + // + if(a->size == b->size) { + int ai, bi; + memcpy((char*)&ai, ((char*)a->data + sizeof(unsigned short)), sizeof(int)); + memcpy((char*)&bi, ((char*)b->data + sizeof(unsigned short)), sizeof(int)); + + if(ai - bi) + return ai - bi; + + unsigned short as, bs; + memcpy((char*)&as, ((char*)a->data), sizeof(unsigned short)); + memcpy((char*)&bs, ((char*)b->data), sizeof(unsigned short)); + + return as - bs; + } + + return ((long)a->size - (long)b->size); +} + +/* + * Init and Open the database + */ +void Dsimple::dbinit() +{ + dbinit_env(); + dbopen(); +} + +/* + * Prepare the ground for testing. + */ +void Dsimple::dbinit_env() +{ + char *progname = "dbbench problem..."; + + Configuration* config = WordContext::Initialize(); + config->Add("wordlist_env_skip", "true"); + if(params->monitor) + config->Add("wordlist_monitor", "true"); + WordContext::Initialize(*config); + // + // Make sure the size of a record used with raw Berkeley DB is equal to the + // size of a record used with Word classes. + // + { + pad = strdup("0123456789012345678900123456789012345678901234567890"); + // + // Dsimple uses an int (unique count) and short (docid) in addition to the word + // + int pad_length = WordKeyInfo::Instance()->num_length - sizeof(unsigned short) - sizeof(int); + if(pad_length > 0) { + if(pad_length > (int)(strlen(pad) - 1)) { + fprintf(stderr, "Not enough padding\n"); + exit(1); + } + } else { + fprintf(stderr, "WordKey is always bigger than simulated key\n"); + exit(1); + } + pad[pad_length] = '\0'; + } + + int error; + if((error = CDB_db_env_create(&dbenv, 0)) != 0) { + fprintf(stderr, "%s: %s\n", progname, CDB_db_strerror(error)); + exit (1); + } + dbenv->set_errfile(dbenv, stderr); + dbenv->set_errpfx(dbenv, progname); + if(params->cache_size > 500 * 1024) + dbenv->set_cachesize(dbenv, 0, params->cache_size, 0); + int flags = DB_CREATE | DB_INIT_MPOOL | DB_INIT_LOCK | DB_NOMMAP; + if(!params->pool) + flags |= DB_PRIVATE; + + dbenv->open(dbenv, NULL, NULL, flags, 0666); +} + +/* + * Open of database after dbinit_env + */ +void Dsimple::dbopen() +{ + if(CDB_db_create(&db, dbenv, 0) != 0) + exit(1); + + // Note that prefix is disabled because bt_compare is set and + // bt_prefix is not. + if(params->type == DB_BTREE) db->set_bt_compare(db, int_cmp); + + if(params->page_size) db->set_pagesize(db, params->page_size); + + int flags = DB_CREATE | DB_NOMMAP; + if(params->compress) + flags |= DB_COMPRESS; + if(params->find) + flags |= DB_RDONLY; + + if(db->open(db, params->dbfile, NULL, params->type, flags, 0666) != 0) + exit(1); +} + +/* + * Close the database and free objects + */ +void Dsimple::dbfinish() +{ + (void)db->close(db, 0); + (void)dbenv->close(dbenv, 0); + WordContext::Finish(); +} + +/* + * Create a key from the word in <line> and the unique count in <count> + */ +void Dsimple::fill_one(String& line, int count) +{ + unsigned short docid = params->random ? RAND() : ((count >> 16) & 0xff); + String key((char*)&docid, sizeof(unsigned short)); + key.append((char*)&count, sizeof(int)); + key.append(line.get(), line.length()); + key.append(pad); + dbput(key, ""); +} + +/* + * Search for words. + */ +void Dsimple::find() +{ + int seqrc; + DBC* cursor; + DBT key; + DBT data; + + if((seqrc = db->cursor(db, NULL, &cursor, 0)) != 0) + abort(); + + memset(&key, '\0', sizeof(DBT)); + memset(&data, '\0', sizeof(DBT)); + + String word("\0\0\0\0", sizeof(int)); + int next; + + if(strlen(params->find) > 0) { + word.append(params->find, strlen(params->find)); + key.data = word.get(); + key.size = word.length(); + cursor->c_get(cursor, &key, &data, DB_SET_RANGE); + next = DB_NEXT_DUP; + } else { + cursor->c_get(cursor, &key, &data, DB_FIRST); + next = DB_NEXT; + } + + do { + if(verbose == 1) { + int docid; + memcpy(&docid, key.data, sizeof(int)); + String word(((char*)key.data) + sizeof(int), key.size - sizeof(int)); + + fprintf(stderr, "key: docid = %d word = %s\n", docid, (char*)word); + } + // + // Straight dump of the entry + // + if(verbose > 1) { + String k((const char*)key.data, (int)key.size); + String d((const char*)data.data, (int)data.size); + + fprintf(stderr, "key: %s data: %s\n", (char*)k, (char*)d); + } + + key.flags = 0; + } while(cursor->c_get(cursor, &key, &data, next) == 0); + + cursor->c_close(cursor); +} + +/* + * Delete keys + */ +void Dsimple::remove() +{ + int seqrc; + DBC* cursor; + DBT key; + DBT data; + int removed = 0; + + if((seqrc = db->cursor(db, NULL, &cursor, 0)) != 0) + abort(); + + memset(&key, '\0', sizeof(DBT)); + memset(&data, '\0', sizeof(DBT)); + + String word("\0\0\0\0", sizeof(int)); + + cursor->c_get(cursor, &key, &data, DB_FIRST); + + do { + if(verbose) { + int docid; + memcpy(&docid, key.data, sizeof(int)); + String word(((char*)key.data) + sizeof(int), key.size - sizeof(int)); + + fprintf(stderr, "key: docid = %d word = %s\n", docid, (char*)word); + } + + cursor->c_del(cursor, 0); + removed++; + if(params->remove < removed) break; + } while(cursor->c_get(cursor, &key, &data, DB_NEXT) == 0); + + cursor->c_close(cursor); +} + +/* + * Wrap a key + data insertion from String to DBT + */ +void Dsimple::dbput(const String& key, const String& data) +{ + DBT k, d; + + memset(&k, 0, sizeof(DBT)); + memset(&d, 0, sizeof(DBT)); + + char* key_string = (char*)alloca(key.length()); + memcpy(key_string, key.get(),key.length()); + k.data = key_string; + k.size = key.length(); + + char* data_string = (char*)alloca(data.length()); + memcpy(data_string, data.get(),data.length()); + d.data = data_string; + d.size = data.length(); + + if((db->put)(db, NULL, &k, &d, 0) != 0) + abort(); +} + +// ***************************************************************** +// Test WordList +// +class Dwordlist : public Dbase { +public: + Dwordlist(params_t* nparams) : Dbase(nparams) {} + + virtual void dbinit(); + virtual void dbfinish(); + + virtual void fill_one(String& line, int count); + virtual void find(); + virtual void remove(); + + void dbput(const String& key, const String& data); + +protected: + WordList* words; +}; + +static Configuration* config = 0; + +/* + * Init and Open the database + */ +void Dwordlist::dbinit() +{ + if(verbose) { + fprintf(stderr, "Dwordlist::dbinit\n"); + params->show(); + } + + config = WordContext::Initialize(); + if(params->cache_size > 500 * 1024) { + String str; + str << params->cache_size; + config->Add("wordlist_cache_size", str); + if(verbose) + fprintf(stderr, "setting cache size to: %s\n", (char*)str); + } + if(params->page_size) { + String str; + str << params->page_size; + config->Add("wordlist_page_size", str); + if(verbose) + fprintf(stderr, "setting page size to: %s\n", (char*)str); + } + if(params->compress) + config->Add("wordlist_compress", "true"); + if(params->monitor) + config->Add("wordlist_monitor", "true"); + + WordContext::Initialize(*config); + + words = new WordList(*config); + + if(verbose) WordKeyInfo::Instance()->Show(); + + if(words->Open(params->dbfile, (params->find ? O_RDONLY : O_RDWR)) != OK) + exit(1); +} + +/* + * Close the database and free objects + */ +void Dwordlist::dbfinish() +{ + delete words; + WordContext::Finish(); +} + +/* + * Create a key from the word in <line> and the unique count in <count> + */ +void Dwordlist::fill_one(String& line, int count) +{ + WordReference wordRef; + WordKey& key = wordRef.Key(); + + if(params->random) count = RAND(); + key.SetWord(line); + key.Set(WORD_FIRSTFIELD, count >> 16); + key.Set(WORD_FIRSTFIELD + 1, 0); + key.Set(WORD_FIRSTFIELD + 2, count & 0xffff); + + words->Override(wordRef); +} + +static int +wordlist_walk_callback_file_out(WordList *, WordDBCursor&, const WordReference *word, Object &) +{ + printf("%s\n", (char*)word->Get()); + return OK; +} + +/* + * Search for words. + */ +void Dwordlist::find() +{ + if(strlen(params->find) > 0) { + Object data; + WordKey key; + key.SetWord(params->find); + WordCursor *cursor = words->Cursor(key, + wordlist_walk_callback_file_out, + &data); + cursor->Walk(); + delete cursor; + } else { + words->Write(stdout); + } +} + +/* + * Delete keys + */ +void Dwordlist::remove() +{ +} + +#ifdef HAVE_LIBZ +static void docompress(params_t* params); +#endif /* HAVE_LIBZ */ + +// ***************************************************************************** +// Entry point +// +int main(int ac, char **av) +{ + int c; + extern char *optarg; + params_t params; + + params.wordsfile = strdup("words.uniq"); + params.dbfile = strdup("test"); + params.nwords = -1; + params.loop = 1; + params.type = DB_BTREE; + params.page_size = 4096; + params.cache_size = 0; + params.multiply_keys = 1; + params.compress = 0; + params.wordlist = 0; + params.compress_test = 0; + params.pool = 0; + params.find = 0; + params.npage = 0; + params.remove = 0; + params.count = 0; + params.monitor = 0; + params.random = 0; + + while ((c = getopt(ac, av, "vB:T:C:S:MZf:l:w:k:n:zWp:ur:c:mR")) != -1) + { + switch (c) + { + case 'v': + verbose++; + break; + case 'B': + free(params.dbfile); + params.dbfile = strdup(optarg); + break; + case 'T': + if(!strcmp(optarg, "hash")) { + params.type = DB_HASH; + } else { + params.type = DB_BTREE; + } + break; + case 'C': + params.cache_size = atoi(optarg); + break; + case 'S': + params.page_size = atoi(optarg); + break; + case 'M': + params.pool = 1; + break; + case 'W': + params.wordlist = 1; + break; + case 'z': + params.compress = 1; + break; + case 'f': + params.find = strdup(optarg); + break; + case 'l': + params.loop = atoi(optarg); + break; + case 'w': + free(params.wordsfile); + params.wordsfile = strdup(optarg); + break; + case 'k': + params.multiply_keys = atoi(optarg); + break; + case 'n': + params.nwords = atoi(optarg); + break; + break; + case 'Z': + params.compress_test = 1; + break; + case 'p': + params.npage = atoi(optarg); + break; + case 'u': + params.uncompress = 1; + break; + case 'r': + params.remove = atoi(optarg); + break; + case 'c': + params.count = atoi(optarg); + break; + case 'm': + params.monitor = 1; + break; + case 'R': + params.random = 1; + break; + case '?': + usage(); + break; + } + } + + if(params.compress_test) { +#ifdef HAVE_LIBZ + docompress(¶ms); +#else /* HAVE_LIBZ */ + fprintf(stderr, "compiled without zlib, compression test not available\n"); + exit(1); +#endif /* HAVE_LIBZ */ + } else { + if(params.wordlist) { + Dwordlist bench(¶ms); + bench.dobench(); + } else { + Dsimple bench(¶ms); + bench.dobench(); + } + } + + free(params.wordsfile); + free(params.dbfile); + if(params.find) free(params.find); + + return 0; +} + +// ***************************************************************************** +// void usage() +// Display program usage information +// +static void usage() +{ + printf("usage: dbbench [options]\n"); + printf("Options:\n"); + printf("\t-v\t\tIncreases the verbosity\n"); + printf("\t-B dbfile\tuse <dbfile> as a db file name (default test).\n"); + printf("\t-T {hash|btree}\tfile structure (default btree).\n"); + printf("\t-C <size>\tset cache size to <size>.\n"); + printf("\t-S <size>\tset page size to <size>.\n"); + printf("\t-M\t\tuse shared memory pool (default do not use).\n"); + printf("\t-z\t\tSet DB_COMPRESS flag\n"); + printf("\t-R\t\tUse random number for numerical values\n"); + + printf("\n"); + printf("\t-W\t\tuse WordList instead of raw Berkeley DB\n"); + printf("\n"); + printf("\t-f word\t\tfind word and display entries. If empty string show all.\n"); + printf("\t-m\t\tMonitor Word classes activity\n"); + printf("\n"); + printf("\t-r n\t\tRemove <n> first entries.\n"); + printf("\n"); + printf("\t-Z\t\tcompress blocks of existing dbfile.\n"); + printf("\t-p n\t\ttest compress on first <n> pages (default all pages).\n"); + printf("\t-u\t\tuncompress each page & check with original (default don't uncompress).\n"); + printf("\n"); + printf("\t-l loop\t\tread the word file loop times (default 1).\n"); + printf("\t-w file\t\tRead words list from file (default words.uniq).\n"); + printf("\t-k n\t\tcreate <n> entries for each word (default 1).\n"); + printf("\t-n limit\tRead at most <limit> words (default read all).\n"); + printf("\t-c count\tStart serial count at <count> (default 0).\n"); + exit(0); +} + +#ifdef HAVE_LIBZ +/* + * Compress file one block after the other. Intended for mesuring the + * compression overhead. + */ + +extern "C" +{ + extern int CDB___memp_cmpr_inflate(const u_int8_t *, int, u_int8_t * , int , void *); + extern int CDB___memp_cmpr_deflate(const u_int8_t *, int, u_int8_t **, int *, void *); +} + +int compressone(params_t* params, unsigned char* buffin, int buffin_length) { + u_int8_t *buffout = 0; + int buffout_length = 0; + + if(CDB___memp_cmpr_deflate(buffin, buffin_length, &buffout, &buffout_length,NULL) != 0) { + printf("compressone: deflate failed\n"); + abort(); + } + + if(verbose) fprintf(stderr, "compressone: %d\n", buffout_length); + + if(params->uncompress) { + u_int8_t *bufftmp = (u_int8_t*)malloc(buffin_length); + int bufftmp_length = buffin_length; + + if(CDB___memp_cmpr_inflate(buffout, buffout_length, bufftmp, bufftmp_length,NULL) != 0) { + fprintf(stderr, "compressone: inflate failed\n"); + abort(); + } + if(bufftmp_length != buffin_length) abort(); + if(memcmp(bufftmp, buffin, bufftmp_length)) abort(); + + free(bufftmp); + } + + free(buffout); + + return buffout_length > (params->page_size / 2) ? 1 : 0; +} + +#include <fcntl.h> // if included at top, db->open may have caused problems + +static void docompress(params_t* params) +{ + if(params->page_size == 0) params->page_size = 4096; + int in = open(params->dbfile, O_RDONLY); + unsigned char* buffin = (unsigned char*)malloc(params->page_size); + int read_count; + int overflow = 0; + int count = 0; + + while((read_count = read(in, buffin, params->page_size)) == params->page_size) { + overflow += compressone(params, buffin, params->page_size); + if(params->npage > 1 && params->npage <= count) break; + count++; + } + printf("overflow: %d out of %d\n", overflow, count); + + close(in); +} +#endif /* HAVE_LIBZ */ diff --git a/debian/htdig/htdig-3.2.0b6/test/document.cc b/debian/htdig/htdig-3.2.0b6/test/document.cc new file mode 100644 index 00000000..caa48b2a --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/document.cc @@ -0,0 +1,135 @@ +// +// document.cc +// +// document: Query the document database +// +// Part of the ht://Dig package <http://www.htdig.org/> +// Copyright (c) 1995-2004 The ht://Dig Group +// For copyright details, see the file COPYING in your distribution +// or the GNU Library General Public License (LGPL) version 2 or later +// <http://www.gnu.org/copyleft/lgpl.html> +// +// $Id: document.cc,v 1.5 2004/05/28 13:15:29 lha Exp $ +// + +#ifdef HAVE_CONFIG_H +#include "htconfig.h" +#endif /* HAVE_CONFIG_H */ + +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> + +#ifdef HAVE_STD +#include <iostream> +#ifdef HAVE_NAMESPACES +using namespace std; +#endif +#else +#include <iostream.h> +#endif /* HAVE_STD */ + +// If we have this, we probably want it. +#ifdef HAVE_GETOPT_H +#include <getopt.h> +#endif + +#include "defaults.h" +#include "DocumentDB.h" + +typedef struct { + char* config; + int urls; + int docids; +} params_t; + +static void usage(); +static void dodoc(params_t* params); + +static int verbose = 0; + +//***************************************************************************** +// int main(int ac, char **av) +// +int main(int ac, char **av) +{ + int c; + extern char *optarg; + params_t params; + + params.config = strdup("???"); + params.urls = 0; + params.docids = 0; + + while ((c = getopt(ac, av, "vudc:")) != -1) + { + switch (c) + { + case 'v': + verbose++; + break; + case 'u': + params.urls = 1; + break; + case 'd': + params.docids = 1; + break; + case 'c': + free(params.config); + params.config = strdup(optarg); + break; + case '?': + usage(); + break; + } + } + + dodoc(¶ms); + + free(params.config); + + return 0; +} + +static void dodoc(params_t* params) +{ + HtConfiguration* const config= HtConfiguration::config(); + config->Defaults(&defaults[0]); + config->Read(params->config); + + DocumentDB docs; + if(docs.Read(config->Find("doc_db"), config->Find("doc_index"), config->Find("doc_excerpt")) < 0) { + cerr << "dodoc: cannot open\n"; + exit(1); + } + + List* docids = docs.DocIDs(); + IntObject* docid = 0; + for(docids->Start_Get(); (docid = (IntObject*)docids->Get_Next()); ) { + if(params->docids) cout << docid->Value(); + if(params->urls) { + if(params->docids) cout << " "; + DocumentRef* docref = docs[docid->Value()]; + cout << docref->DocURL(); + cout << "\n"; + delete docref; + } + } + delete docids; +} + +//***************************************************************************** +// void usage() +// Display program usage information +// +static void usage() +{ + cout << "usage: word [options]\n"; + cout << "Options:\n"; + cout << "\t-v\t\tIncreases the verbosity\n"; + cout << "\t-u\t\tShow URLs\n"; + cout << "\t-dl\t\tShow DocIDs\n"; + cout << "\t-c file\tspecify the config file to load\n"; + exit(0); +} + diff --git a/debian/htdig/htdig-3.2.0b6/test/dummy.affixes b/debian/htdig/htdig-3.2.0b6/test/dummy.affixes new file mode 100644 index 00000000..0af6fcce --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/dummy.affixes @@ -0,0 +1,107 @@ +# +# $Id: dummy.affixes,v 1.1 2003/12/14 01:53:13 lha Exp $ +# +# Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. All modifications to the source code must be clearly marked as +# such. Binary redistributions based on modified source code +# must be clearly marked as modified versions in the documentation +# and/or other materials provided with the distribution. +# 4. All advertising materials mentioning features or use of this software +# must display the following acknowledgment: +# This product includes software developed by Geoff Kuenning and +# other unpaid contributors. +# 5. The name of Geoff Kuenning may not be used to endorse or promote +# products derived from this software without specific prior +# written permission. +# +# THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# Affix table for English +# +# $Log: dummy.affixes,v $ +# Revision 1.1 2003/12/14 01:53:13 lha +# Add t_fuzzy t_templates t_validwords and t_factors to test suite +# +# Revision 1.1.1.1 1997/02/03 17:11:11 turtle +# Initial CVS +# +# Revision 1.16 1995/01/08 23:23:59 geoff +# Add a NeXT to the defstringtype statement so that nextispell can +# select it. +# +# Revision 1.15 1994/01/25 07:12:40 geoff +# Get rid of all old RCS log lines in preparation for the 3.1 release. +# +# + +nroffchars ().\\* +texchars ()\[]{}<\>\\$*.% + +# First we declare the character set. Since it's English, it's easy. +# The only special character is the apostrophe, so that possessives can +# be handled. We declare it as a boundary character, so that quoting with +# single quotes doesn't confuse things. The apostrophe is the only +# character that gets such treatment. +# +# We declare the apostrophe first so that "Jon's" collates before "Jonas". +# (This is the way ASCII does it). +# + +defstringtype "nroff" "nroff" ".mm" ".ms" ".me" ".man" ".NeXT" + +boundarychars ' +wordchars [a-z] [A-Z] + +altstringtype "tex" "tex" ".tex" ".bib" + +# Here's a record of flags used, in case you want to add new ones. +# Right now, we fit within the minimal MASKBITS definition. +# +# ABCDEFGHIJKLMNOPQRSTUVWXYZ +# Used: * * **** ** * ***** *** +# A D GHIJ MN P RSTUV XYZ +# Available: -- -- -- - - - +# BC EF KL O Q W + +# Now the prefix table. There are only three prefixes that are truly +# frequent in English, and none of them seem to need conditional variations. +# +prefixes + +flag *A: + . > RE # As in enter > reenter + +flag *I: + . > IN # As in disposed > indisposed + +flag *U: + . > UN # As in natural > unnatural + +# Finally, the suffixes. These are exactly the suffixes that came out +# with the original "ispell"; I haven't tried to improve them. The only +# thing I did besides translate them was to add selected cross-product flags. +# +suffixes +flag *R: + . > RIGHT # As in copy > copyright diff --git a/debian/htdig/htdig-3.2.0b6/test/dummy.stems b/debian/htdig/htdig-3.2.0b6/test/dummy.stems new file mode 100644 index 00000000..c02c0aaf --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/dummy.stems @@ -0,0 +1 @@ +copy/R diff --git a/debian/htdig/htdig-3.2.0b6/test/htdocs/robots.txt b/debian/htdig/htdig-3.2.0b6/test/htdocs/robots.txt new file mode 100644 index 00000000..1017ea35 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/htdocs/robots.txt @@ -0,0 +1,5 @@ +User-agent: * +Disallow: /set1/site + +User-agent: htdig +Disallow: diff --git a/debian/htdig/htdig-3.2.0b6/test/htdocs/set1/bad_local.htm b/debian/htdig/htdig-3.2.0b6/test/htdocs/set1/bad_local.htm new file mode 100644 index 00000000..432c8459 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/htdocs/set1/bad_local.htm @@ -0,0 +1,57 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd"> +<html> + <head> + <title> + ht://Dig: Where to get it + </title> + </head> + <body bgcolor="#eef7ff"> + <h1> + Where to obtain it + </h1> + <p> + ht://Dig Copyright © 1995-2001 <a href="THANKS.html">The ht://Dig Group</a><br> + Please see the file <a href="COPYING">COPYING</a> for + license information. + </p> + <hr size="4" noshade> + <p> + The current release is + <a href="http://www.htdig.org/files/htdig-3.1.5.tar.gz"> + htdig-3.1.5.tar.gz</a>. Note that this is a gzipped tar file. + You will need the GNU gunzip program, part of the gzip package, + to extract the files in the archive. You can get the latest + version of any of the GNU tools from <a + href="ftp://ftp.gnu.org/pub/gnu/">ftp://ftp.gnu.org/pub/gnu/</a> + The latest beta version of 3.2 is <a + href="http://www.htdig.org/files/htdig-3.2.0b3.tar.gz">htdig-3.2.0b3.tar.gz</a>. + </p> + <p> + All releases of ht://Dig are also now available through <a + href="http://sourceforge.net/">SourceForge</a>. Access is + available through <a + href="http://download.sourceforge.net/htdig/">HTTP</a> and <a + href="ftp://ftp.htdig.org/pub/htdig/">FTP</a>. + <p> + There are also previous releases and context diffs available + through <a + href="http://www.htdig.org/files/">http://www.htdig.org/files/</a> + along with contributed binary releases available through <a + href="http://www.htdig.org/files/binaries/"> + http://www.htdig.org/files/binaries/</a> + Additionally, access is available for ftp through <a + href="ftp://ftp.htdig.org/pub/htdig/">ftp.htdig.org</a>. + </p> + <p> + The latest documentation of ht://Dig is always available at + <a href="http://www.htdig.org/" target="_top">http://www.htdig.org/</a> + </p> + <hr size="4" noshade> + + Last modified: $Date: 2004/01/18 12:54:15 $ +<br> + <a href="http://sourceforge.net/"> + <img src="http://sourceforge.net/sflogo.php?group_id=4593&type=1" width="88" height="31" border="0" alt="SourceForge Logo"></a> + + </body> +</html> diff --git a/debian/htdig/htdig-3.2.0b6/test/htdocs/set1/index.html b/debian/htdig/htdig-3.2.0b6/test/htdocs/set1/index.html new file mode 100644 index 00000000..45e684bf --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/htdocs/set1/index.html @@ -0,0 +1,24 @@ +<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN"> +<html><head> +<title>ht://Dig Test Pages</title> +</head> + +<body> + +<a href="script.html">JavaScript test</a> +<a href="site%201.html">Site 1</a> +<a href="site2.html">Site2</a> +<a href="site3.html">Site3</a> +<a href="site4.html">Site4</a> +<a href="bad_local.htm">Extension not to be fetched locally</a> +<a href="nph-location.cgi">bad_extension skipped before RetrieveLocal()</a> +<a href="nph-location.foo">To check bad_local_extensions, distinguish from unknown extensions</a> +<a href="title.html">Title Tag test</a> +<a href="sub%2520dir/">URL-encoding test</a> + +<hr> +<address></address> +<!-- hhmts start --> +Last modified: Fri Jan 3 01:16:15 AEST 2003 +<!-- hhmts end --> +</body> </html> diff --git a/debian/htdig/htdig-3.2.0b6/test/htdocs/set1/nph-location.cgi b/debian/htdig/htdig-3.2.0b6/test/htdocs/set1/nph-location.cgi new file mode 100755 index 00000000..a7ad45f5 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/htdocs/set1/nph-location.cgi @@ -0,0 +1,11 @@ +#!/bin/sh + +echo 'HTTP/1.1 200 OK' +echo 'Connection: close' +echo 'Content-Type: text/html' +echo 'Location: /set3/nph-location.cgi' +echo +cat <<! +This is the content of the +document +! diff --git a/debian/htdig/htdig-3.2.0b6/test/htdocs/set1/nph-location.foo b/debian/htdig/htdig-3.2.0b6/test/htdocs/set1/nph-location.foo new file mode 100755 index 00000000..a7ad45f5 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/htdocs/set1/nph-location.foo @@ -0,0 +1,11 @@ +#!/bin/sh + +echo 'HTTP/1.1 200 OK' +echo 'Connection: close' +echo 'Content-Type: text/html' +echo 'Location: /set3/nph-location.cgi' +echo +cat <<! +This is the content of the +document +! diff --git a/debian/htdig/htdig-3.2.0b6/test/htdocs/set1/script.html b/debian/htdig/htdig-3.2.0b6/test/htdocs/set1/script.html new file mode 100644 index 00000000..c35c713e --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/htdocs/set1/script.html @@ -0,0 +1,182 @@ +<html>
+<head>
+<title>Intelligent Network Solutions - Service Node</title>
+<META NAME="title" content="Intelligent Network Solutions - Service Node">
+<META name="description" content=" The Open Telecommunications Service Node, Open SN, is a cost effective means of introducing IN services into non-IN networks. It has the features of a full IN implementation in a single platform and connects to the network using normal call handling signalling.">
+<META name="keywords" content="Open Technology, Open Telecommunications,Intelligent Network,Intelligent Networks,IN,Service Control PointSCP,Open SCP,Service Switching Point,SSP,Intelligent Peripheral,IP,Open IP,Service Node,SN,Open SN,Signaling Interface Processor,Signalling Interface Processor,SIP,Open SIP,Service Switching Function,SSF,Service Control Function,SCF,Specialized Resource Function,Specialised Resource Function,SRF,ITU-T,Capability Set 1,CS1,CS-1,Capability Set 2,CS2,CS-2,Freephone,Calling Card,Prepaid,Mobile,UPT,Universal Personal Telecommunications,Personal Number,Virtual Private Network,VPN,Account Card Calling,Televoting,Mass calling,Abbreviated dial,Universal Messaging,Loop route,TCAP,ISUP,ISDN,SS7,CCS,Common Channel Signalling,Common Channel Signaling,INAP,IN Application Part,Voice mail">
+<META NAME="author" content="Interfuse Media Group http://www.interfuse.com.au">
+
+<SCRIPT LANGUAGE = "JavaScript">
+
+browserName = navigator.appName;
+browserVer = parseInt(navigator.appVersion);
+
+if (browserVer >= 3) version = "3";
+else version = "0";
+
+if (version == "3") {
+
+ homeon = new Image();
+ homeon.src = "../_interface/home2.gif";
+ homeoff = new Image();
+ homeoff.src = "../_interface/home1.gif";
+
+ searchon = new Image();
+ searchon.src = "../_interface/search2.gif";
+ searchoff = new Image();
+ searchoff.src = "../_interface/search1.gif";
+
+ sitemapon = new Image();
+ sitemapon.src = "../_interface/sitemap2.gif";
+ sitemapoff = new Image();
+ sitemapoff.src = "../_interface/sitemap1.gif";
+
+ feedbackon = new Image();
+ feedbackon.src = "../_interface/feedback2.gif";
+ feedbackoff = new Image();
+ feedbackoff.src = "../_interface/feedback1.gif";
+
+ mailon = new Image();
+ mailon.src = "../_interface/mail2.gif";
+ mailoff = new Image();
+ mailoff.src = "../_interface/mail1.gif";
+
+
+ temp1 = new Image(144,62);
+
+ temp2 = new Image(144,62);
+
+
+}
+
+function img_act(imgName) {
+ if (version == "3") {
+ imgOn = eval(imgName + "on.src");
+ document [imgName].src = imgOn;
+ }
+}
+
+function img_inact(imgName) {
+ if (version == "3") {
+ imgOff = eval(imgName + "off.src");
+ document [imgName].src = imgOff;
+ }
+}
+
+function toggle2(an_image, on_off)
+ {
+ if (version == "3")
+ {
+ if (on_off == 'on')
+ document [an_image].src = eval(an_image + "on.src");
+ else if (on_off == 'off')
+ document [an_image].src=eval(an_image + "off.src");
+ }
+ }
+<!--htdig_noindex-->
+</SCRIPT></head>
+
+This should be considered, as htdig_noindex flag within SCRIPT.
+<!--/htdig_noindex-->
+
+<!--htdig_noindex-->
+<SCRIPT> </SCRIPT>
+This should be neglected, as inside its own htdig_noindex, despite
+</SCRIPT> being listed in noindex_end.
+<!--/htdig_noindex-->
+
+
+<body bgcolor="#000000" text="#6C6C6C" link="blue" vlink="blue">
+
+<table border="0" cellpadding="0" cellspacing="0" bgcolor="#ffffff" width="630">
+<tr>
+<td><table border="0" cellpadding="0" cellspacing="0">
+<tr>
+<td><a name="top"><img src="../_interface/topleft.jpg" width="159" height="35" border="0"></a></td>
+<td><a href="../opentel.html" onMouseOver="toggle2('home', 'on');window.status='home';return true" onMouseOut="; toggle2('home', 'off');window.status='';return true;"><img src="../_interface/home1.gif" width="86" height="35" name="home" border="0" alt="home"></a></td>
+<td><a href="../search.html" onMouseOver="toggle2('search', 'on');window.status='search';return true" onMouseOut="; toggle2('search', 'off');window.status='';return true;"><img src="../_interface/search1.gif" width="99" height="35" name="search" border="0" alt="search"></a></td>
+<td><a href="../sitemap.html" onMouseOver="toggle2('sitemap', 'on');window.status='sitemap';return true" onMouseOut="; toggle2('sitemap', 'off');window.status='';return true;"><img src="../_interface/sitemap1.gif" width="125" height="35" name="sitemap" border="0" alt="sitemap"></a></td>
+<td><a href="../feedback.html" onMouseOver="toggle2('feedback', 'on');window.status='feedback';return true" onMouseOut="; toggle2('feedback', 'off');window.status='';return true;"><img src="../_interface/feedback1.gif" width="161" height="35" name="feedback" border="0" alt="feedback"></a></td>
+</tr>
+</table>
+</td>
+</tr>
+<tr>
+<td><table border="0" cellpadding="0" cellspacing="0">
+<tr>
+<td><img src="../_interface/topbar.jpg" width="543" hieght="37"></td>
+<td><a href="mailto:[email protected]" onMouseOver="toggle2('mail', 'on');window.status='e-mail';return true" onMouseOut="; toggle2('mail', 'off');window.status='';return true;"><img src="../_interface/mail1.gif" width="87" height="37" name="mail" border="0" alt="e-mail: [email protected]"></a></td>
+</tr>
+</table>
+</td>
+</tr>
+<tr>
+<td><table border="0" cellpadding="0" cellspacing="0" width="630">
+<tr>
+<td><table border="0" cellpadding="0" cellspacing="0">
+<tr>
+<td><img src="../_interface/topmid_INS_2.jpg" width="552" height="72"></td>
+</tr>
+<tr>
+<td valign="top" align="right"><table border="0" cellpadding="0" cellspacing="0" bgcolor="#ffffff" width="100%"><tr><td valign="top" align="left">
+<font face="arial" size="2">
+<hr><a href="ins_1.html"><img src="_images/1.gif" width="140" height="26" border="0" alt="Intelligent Peripheral"></a><br>
+<a href="ins_2.html"><img src="_images/2.gif" width="140" height="26" border="0" alt="Signalling Interface Processor (SIP)"></a><br>
+<a href="ins_3.html"><img src="_images/3.gif" width="140" height="26" border="0" alt="Service Control Point"></a><br>
+<a href="ins_4.html"><img src="_images/4d.gif" width="140" height="26" border="0" alt="Service Node"></a><br>
+<a href="ins_6.html"><img src="_images/6.gif" width="140" height="26" border="0" alt="Software Development Services"></a><br>
+<a href="ins_7.html"><img src="_images/7.gif" width="140" height="26" border="0" alt="Project Case Study"></a>
+<br><hr><br>
+</td><td><img src="../_interface/space.gif" width="10" height="26"></td><td valign="top">
+<font face="arial" size="3" color="#0080FF">
+<b>Service Node </b></font><br><br>
+<font face="arial" size="2">
+
+The Open Telecommunications Service Node, Open SN, is a cost effective means of introducing IN services into non-IN networks. It has the features of a full IN implementation in a single platform and connects to the network using normal call handling signalling.
+<br><br>The Open SN is different to most service nodes. It has been designed from the ground-up as an IN component. The Open SN functions are easily migrated to a full IN architecture - there is no need to re-implement services to provide higher capacity and a more distributed architecture.
+<br><br><b><i>All IN functions in a single platform</i></b><br><br>
+The Open SN incorporates all the functions of the Open SCP and the Open IP, and the call handling functions normally done by a Service Switching Point (SSP). The latter include:
+<blockquote><li>IN call models for originating and terminating calls
+<li>Trigger detection and processing
+<li>Call gapping and service filtering
+<li>Production of call detail records (e.g. for billing)</blockquote>
+<B><I>Full IN call handling</B></I><BR><BR>
+The Open SN handles calls using the same powerful service logic and service data functions as the <a href="ins_3.html">Open SCP</a> enabling future migration to a full IN architecture.
+<br><br><B><I>Easy network connection</b></i><BR><BR>
+The Open SN connects to the network using the normal call handling protocol, the ISDN User Part (ISUP) available in the SS7 network. There is no need, then, to upgrade network switches to introduce IN; the existing protocols will suffice.
+<br><br>Moreover, by using �loop routes� it is possible for a single Open SN platform to serve many network switches. This reduces the implementation cost, both in terms of service node hardware and in transmission costs. Loop routes are also utilised by the <a href="ins_2.html">Signalling Interface Processor </a>, where they are explained more fully.
+<br><br><img src="_images/sn-loop.gif" width="400" height="310" alt="SN loop route connection"><br><br>
+
+<B><I>Reliable performance</b></i><br><br>
+All the functions of the Open SN are duplicated for reliability as in other OT IN products.
+<br><br>The Open SN has been designed for a moderate call load of 40 calls/s, which can be increased by installing multiple service node sites. For significantly higher loads, we expect that use of the Open SCP will be more efficient.
+<br><br><b><i>With a full range of OAM features</i></b><br><br>
+As with all OT products, the Open SCP is built for real telecom environments, with a full set of operation, administration and management features, including:<blockquote>
+<li>online loading of new services and new service versions
+<li>online management and backup of data, including voice prompts
+<li>all OAM functions accessible via a text command / response interface or via CORBA objects
+<li>extensive statistics
+<li>comprehensive alarm indications
+<li>diagnostic trace functions for call events</blockquote>
+For further information on the Open SN or any other OT products please contact us at [<a href="mailto:[email protected]">[email protected]</a>].<br><br>
+<a href="#top"><img src="../_interface/top.gif" width="30" height="30" alt="top" border="0"></a>
+</td></tr></table></td>
+</tr>
+</table>
+</td>
+<td valign="top"><img src="../_interface/rightside.gif" width="78" height="282"></td>
+</tr>
+</table>
+</td>
+</tr>
+<tr><td>
+<table border="0"><tr><td>
+
+</tr></td>
+<tr><td><hr>
+<center><img src="../_interface/textanim.gif" width="630" height="40">
+<font face="arial" size="1"><b>Open Telecommunications Pty Ltd</b><br>Level 2/ 53 Walker St North Sydney<br>Phone: +61 02 9964-9633 Fax: +61 02 9957-534<br>E-mail: <a href="[email protected]">[email protected]</a><br>Online: <a href="http://www.ot.com.au" target="top">www.ot.com.au</a><br><br></font>
+</td></tr></table></td></tr></table>
+
+</body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/test/htdocs/set1/site 1.html b/debian/htdig/htdig-3.2.0b6/test/htdocs/set1/site 1.html new file mode 100644 index 00000000..6483f136 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/htdocs/set1/site 1.html @@ -0,0 +1,86 @@ +<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
+<html>
+
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta name="GENERATOR" content="Microsoft FrontPage 3.0">
+<title>Kenfil Distribution (FE) Ltd.</title>
+</head>
+
+<body background="kenbg30.jpg" bgcolor="#FFFFFF" vlink="#FF00FF" leftmargin="0">
+
+<table border="0" width="80%">
+ <tr>
+ <td align="center" valign="top" width="14%"><!--webbot bot="ImageMap" rectangle="(7,0) (139,28) HotFrames.htm" src="Wwwhot30.jpg" align="top" border="0" startspan --><MAP NAME="FrontPageMap"><AREA SHAPE="RECT" COORDS="7, 0, 139, 28" HREF="HotFrames.htm"></MAP><a href="_vti_bin/shtml.dll/index.htm/map"><img align="top" ismap usemap="#FrontPageMap" border="0" height="29" src="Wwwhot30.jpg" width="144"></a><!--webbot bot="ImageMap" endspan i-checksum="3742" --><p><!--webbot bot="ImageMap" default="corporat2.htm" rectangle="(0,3) (128,49) corporat2.htm" src="Wwwclic19in.jpg" align="top" border="0" startspan --><MAP NAME="FrontPageMap1"><AREA SHAPE="RECT" COORDS="0, 3, 128, 49" HREF="corporat2.htm"><AREA COORDS="0,0,10000,10000" HREF="corporat2.htm"></MAP><a href="_vti_bin/shtml.dll/index.htm/map1"><img align="top" ismap usemap="#FrontPageMap1" border="0" height="50" src="Wwwclic19in.jpg" width="130"></a><!--webbot bot="ImageMap" endspan i-checksum="7633" --></p>
+ <p><!--webbot bot="ImageMap" rectangle="(5,112) (179,128) Training/msatecOct.htm" rectangle="(8,1) (152,17) Overview98.htm" rectangle="(6,271) (113,284) orderform.htm" rectangle="(7,136) (185,153) lpp.htm" rectangle="(9,245) (131,258) publisher.htm" rectangle="(5,219) (144,231) technicalform.htm" rectangle="(2,192) (175,205) softwarepulse.htm" rectangle="(3,164) (146,176) ftp://ftp.kenfil.com" rectangle="(4,86) (145,100) Training/trainsch.htm" rectangle="(1,55) (147,73) HotFrames.htm" rectangle="(10,29) (151,47) product_list.htm" src="edgelisting26c.jpg" align="top" border="0" width="187" height="281" startspan --><MAP NAME="FrontPageMap2"><AREA SHAPE="RECT" COORDS="5, 112, 179, 128" HREF="Training/msatecOct.htm"><AREA SHAPE="RECT" COORDS="8, 1, 152, 17" HREF="Overview98.htm"><AREA SHAPE="RECT" COORDS="6, 271, 113, 284" HREF="orderform.htm"><AREA SHAPE="RECT" COORDS="7, 136, 185, 153" HREF="lpp.htm"><AREA SHAPE="RECT" COORDS="9, 245, 131, 258" HREF="publisher.htm"><AREA SHAPE="RECT" COORDS="5, 219, 144, 231" HREF="technicalform.htm"><AREA SHAPE="RECT" COORDS="2, 192, 175, 205" HREF="softwarepulse.htm"><AREA SHAPE="RECT" COORDS="3, 164, 146, 176" HREF="ftp://ftp.kenfil.com"><AREA SHAPE="RECT" COORDS="4, 86, 145, 100" HREF="Training/trainsch.htm"><AREA SHAPE="RECT" COORDS="1, 55, 147, 73" HREF="HotFrames.htm"><AREA SHAPE="RECT" COORDS="10, 29, 151, 47" HREF="product_list.htm"></MAP><a href="_vti_bin/shtml.dll/index.htm/map2"><img align="top" ismap usemap="#FrontPageMap2" border="0" height="281" src="edgelisting26c.jpg" width="187"></a><!--webbot bot="ImageMap" endspan i-checksum="35056" --><br>
+ </p>
+ <p align="left"><!--webbot bot="ImageMap" rectangle="(1,0) (49,32) contact.htm#Hong+Kong+Office:" src="Hkflag_s.gif" align="left" hspace="3" border="0" startspan --><MAP NAME="FrontPageMap3"><AREA SHAPE="RECT" COORDS="1, 0, 49, 32" HREF="contact.htm#Hong Kong Office:"></MAP><a href="_vti_bin/shtml.dll/index.htm/map3"><img align="left" ismap usemap="#FrontPageMap3" border="0" height="33" hspace="3" src="Hkflag_s.gif" width="50"></a><!--webbot bot="ImageMap" endspan i-checksum="20174" --><a href="contact.htm#Kenfil Distribution (Far East) Ltd."><br>
+ </a><a href="contact.htm"><font size="2" color="#FFFF00"><strong>Hong Kong Office</strong></font></a></p>
+ <p align="left"><strong><!--webbot bot="ImageMap" rectangle="(3,0) (49,32) contact.htm#KENFIL+DISTRIBUTION+(M)+SDN.+BHD." src="Malay_s.jpg" align="left" hspace="6" border="0" startspan --><MAP NAME="FrontPageMap4"><AREA SHAPE="RECT" COORDS="3, 0, 49, 32" HREF="contact.htm#KENFIL DISTRIBUTION (M) SDN. BHD."></MAP><a href="_vti_bin/shtml.dll/index.htm/map4"><img align="left" ismap usemap="#FrontPageMap4" border="0" height="33" hspace="6" src="Malay_s.jpg" width="50"></a><!--webbot bot="ImageMap" endspan i-checksum="53177" --><br>
+ <a href="contact.htm#KENFIL DISTRIBUTION (M) SDN. BHD."><font color="#FFFF00"><small>Malaysia
+ Office</small></font></a></strong></p>
+ <p align="left"><font color="#FFFF00"><!--webbot bot="ImageMap" rectangle="(1,5) (50,32) contact.htm#US+Office:" src="image2/mini_US.JPG" alt="mini_US.JPG (6954 bytes)" align="left" hspace="6" border="0" startspan --><MAP NAME="FrontPageMap5"><AREA SHAPE="RECT" COORDS="1, 5, 50, 32" HREF="contact.htm#US Office:"></MAP><a href="_vti_bin/shtml.dll/index.htm/map5"><img align="left" ismap usemap="#FrontPageMap5" border="0" height="33" hspace="6" alt="mini_US.JPG (6954 bytes)" src="image2/mini_US.JPG" width="51"></a><!--webbot bot="ImageMap" endspan i-checksum="32958" --></font><br>
+ <a href="contact.htm#US Office:"><font size="2" color="#FFFF00"><strong>US Office</strong></font></a></td>
+ <td align="center"></td>
+ <td></td>
+ <td align="center" valign="top" width="80%"><blockquote>
+ <p align="center"><font color="#FAE105" size="2" face="Cooper Lt BT"><!--webbot bot="ImageMap" rectangle="(302,43) (392,72) contact.htm" rectangle="(301,6) (395,39) technical.htm" rectangle="(5,42) (95,72) Training/training.htm" rectangle="(1,2) (96,37) software.htm" src="head62.jpg" align="left" border="0" hspace="5" startspan --><MAP NAME="FrontPageMap6"><AREA SHAPE="RECT" COORDS="302, 43, 392, 72" HREF="contact.htm"><AREA SHAPE="RECT" COORDS="301, 6, 395, 39" HREF="technical.htm"><AREA SHAPE="RECT" COORDS="5, 42, 95, 72" HREF="Training/training.htm"><AREA SHAPE="RECT" COORDS="1, 2, 96, 37" HREF="software.htm"></MAP><a href="_vti_bin/shtml.dll/index.htm/map6"><img align="left" ismap usemap="#FrontPageMap6" border="0" height="78" hspace="5" src="head62.jpg" width="396"></a><!--webbot bot="ImageMap" endspan i-checksum="12062" --><br>
+ </font></p>
+ <p align="center"> </p>
+ <p align="left"> </p>
+ </blockquote>
+ <div align="center"><center><table border="0" cellpadding="0" cellspacing="1">
+ <tr>
+ <td rowspan="2"></td>
+ <td align="right" valign="bottom" rowspan="2"><!--webbot bot="ImageMap" rectangle="(2,3) (31,59) index81.htm" src="Trophyt.gif" align="absbottom" border="0" hspace="5" width="32" height="60" startspan --><MAP NAME="FrontPageMap7"><AREA SHAPE="RECT" COORDS="2, 3, 31, 59" HREF="index81.htm"></MAP><a href="_vti_bin/shtml.dll/index.htm/map7"><img align="absbottom" ismap usemap="#FrontPageMap7" border="0" height="60" hspace="5" src="Trophyt.gif" width="32"></a><!--webbot bot="ImageMap" endspan i-checksum="41877" --></td>
+ <td width="85%"></td>
+ </tr>
+ <tr>
+ <td width="85%"><font color="#460000" face="Arial Rounded MT Bold"><em><marquee align="middle" bgcolor="#F1D303" height="20" scrollamount="5" scrolldelay="75" border="0">Kenfil received "Best Solution Provider" Award from Microsoft.... </marquee></em></font></td>
+ </tr>
+ </table>
+ </center></div><blockquote>
+ <p align="left"><font color="#FAE105" size="2" face="Cooper Lt BT"><img src="photo9c.jpg" align="left" hspace="5" WIDTH="382" HEIGHT="301"></font></p>
+ <p align="left"> </p>
+ <p align="left"> </p>
+ <p align="left"> </p>
+ <p align="left"> </p>
+ <p align="left"> </p>
+ <p align="left"> </p>
+ <p align="left"> </p>
+ <p align="left"> </p>
+ <p><font color="#0000FF" size="2"><strong>[<a href="software.htm">Software Distribution</a>][<a href="Training/training.htm">Training&Consultation</a>][<a href="technical.htm">Technical
+ Service</a>] <br>
+ <a href="contact.htm">[Contact Information]</a></strong></font></p>
+ </blockquote>
+ </td>
+ </tr>
+ <tr>
+ <td align="center" valign="top" width="14%"><font color="#FFFF00" size="2" face="Cooper Lt BT"><strong><br>
+ <!--webbot bot="ImageMap" rectangle="(5,4) (102,34) job.htm" src="jobopp8.gif" border="0" startspan --><MAP NAME="FrontPageMap8"><AREA SHAPE="RECT" COORDS="5, 4, 102, 34" HREF="job.htm"></MAP><a href="_vti_bin/shtml.dll/index.htm/map8"><img ismap usemap="#FrontPageMap8" border="0" height="42" src="jobopp8.gif" width="104"></a><!--webbot bot="ImageMap" endspan i-checksum="31031" --></strong></font></td>
+ <td align="center" rowspan="2"></td>
+ <td align="center" rowspan="2"></td>
+ <td align="center" rowspan="2" width="100%"><p align="center"><font color="#000080"><strong><br>
+ You are visitor <!--webbot bot="HitCounter" i-image="4" i-digits="8" b-reset="FALSE" preview="<strong>Hit Counter</strong>" u-custom i-resetvalue="1323850" startspan -->
+<img src="/cgi-bin/fpcount.exe/?Page=index.htm|Image=4|Digits=8"><!--webbot bot="HitCounter" endspan i-checksum="56822" --> since Oct 18, 1996</strong></font></p>
+ <p align="center"><font color="#00007D" size="2"><strong>Best Viewed With<br>
+ </strong></font><img src="IE.gif" WIDTH="82" HEIGHT="31"> <img src="msbo_iis.gif" WIDTH="100" HEIGHT="31"><br>
+ <font color="#800080" size="2"><em><strong>URL: http://www.kenfil.com<br>
+ Created on October 18, 1996 <br>
+ Last updated on <!--webbot bot="Timestamp" s-type="REGENERATED" s-format="%B %d, %Y" startspan -->November 27, 1998<!--webbot bot="Timestamp" endspan i-checksum="41300" --><br>
+ </strong></em></font><br>
+ <font color="#800080" size="2"><em><strong>Please mail your comments & suggestions to<br>
+ </strong></em></font><img src="earth15.gif" WIDTH="32" HEIGHT="32"> <a href="mailto:[email protected]"><font size="3"><em><strong>[email protected]</strong></em></font></a><br>
+ </td>
+ </tr>
+ <tr>
+ <td align="center" valign="top" width="14%"> </td>
+ </tr>
+</table>
+
+<p align="right"><font color="#808080" size="1" face="Arial Black">� 1997 Kenfil
+Distribution (FE) Ltd</font></p>
+
+<p align="center"> </p>
+</body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/test/htdocs/set1/site2.html b/debian/htdig/htdig-3.2.0b6/test/htdocs/set1/site2.html new file mode 100644 index 00000000..4f57c1c2 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/htdocs/set1/site2.html @@ -0,0 +1,26 @@ +<HTML><HEAD> +<SCRIPT LANGUAGE="JavaScript"> +<!-- prevent another homepge from loading this page in its frame. They can't make this homepage appear as if it is thier homepage. + if(self!=top){ + top.location.href=self.location.href; + } +// --> +</SCRIPT> +<META HTTP-EQUIV="Refresh"> +<TITLE>Hong Kong Kung Fu Movies</TITLE> +<META NAME="Keywords" CONTENT="Yuki, Hong Kong, Movies, Kung Fu, Martial Arts, Jackie Chan, Jet Li, Sam Hung, Bruce Lee, Michelle Yeoh, Yuen Biao, Maggie Chung"> +<META NAME="Description" CONTENT="This homepage has a lot of links to Hong Kong movie stars such as Jackie Chan, Jet Li, Sam Hung, Bruce Lee, Michelle Yeoh, Yuen Biao, and Maggie Chung. It also has Yuki's favorite movies and actors/actresses and more!"> +</HEAD> +<FRAMESET ROWS="80,*" BORDERCOLOR="#000000"> + <FRAME SRC="http://www.besthongkongfilms.com/yukih-cgi/hk_bannar.cgi" SCROLLING=NO NAME="HK-BARNER"> + <FRAMESET COLS="25%,*" BORDERCOLOR="#000000"> + <FRAMESET ROWS="*,30" BORDERCOLOR="#000000"> + <FRAME SRC="http://www.besthongkongfilms.com/yukih-cgi/hk_directory.cgi" NAME="HK-DIR"> + <FRAME SRC="hk_clock.shtml" MARGINWIDTH=1 MARGINHEIGHT=1 SCROLLING="NO" NAME="CLOCK-WINDOW"> + </FRAMESET> + <FRAME SRC="http://www.besthongkongfilms.com/yukih-cgi/hk_home.cgi" NAME="HK-MAIN"> + </FRAMESET> +</FRAMESET> +a#bcd +</HTML> + diff --git a/debian/htdig/htdig-3.2.0b6/test/htdocs/set1/site3.html b/debian/htdig/htdig-3.2.0b6/test/htdocs/set1/site3.html new file mode 100644 index 00000000..ee480b75 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/htdocs/set1/site3.html @@ -0,0 +1,234 @@ +<html>
+
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" Arial
+charset="gb2312"">
+<meta http-equiv="refresh">
+<meta name="GENERATOR" content="Microsoft FrontPage 3.0">
+<title>China Business Net</title>
+<style type="text/css">
+
+<!--
+
+.zeng { font-family: "Verdana", "Arial", "Helvetica", "sans-serif"; text-decoration: none; background-color: #FFFFFF}
+
+-->
+
+</style>
+</head>
+<script language="JavaScript">
+
+
+
+<!--
+
+ function MakeArray()
+
+ {
+
+ this.length = MakeArray.arguments.length
+
+ for (var i = 0; i < this.length; i++)
+
+ this[i+1] = MakeArray.arguments[i]
+
+ }
+
+
+
+ var url = new MakeArray("Europe.",
+
+ "html/advertising.html",
+
+ "html/transportation.html",
+
+ "html/financial.html",
+
+ "html/chemistry.html",
+
+ "html/comprehensive.html",
+
+ "html/computer.html",
+
+ "html/decoration.html",
+
+ "html/electronic.html",
+
+ "html/textile.html",
+
+ "html/Watches.html",
+
+ "html/manufacturing.html",
+
+ "html/medical.html",
+
+ "html/real.html",
+
+ "html/trade.html");
+
+ function jumpPage(form)
+
+ {
+
+ i = form.SelectMenu.selectedIndex;
+
+ if (i == 0) return;
+
+ window.location.href = url[i+1];
+
+ }
+
+// -->
+
+
+
+</script>
+
+
+<body topmargin="0" leftmargin="0" marginwidth="0" marginheight="0"
+background="images/mainpages/ground2.gif" bgcolor="#FFFFFF">
+
+<table border="0" width="620" height="1" cellspacing="0" cellpadding="0">
+ <tr>
+ <td width="620" height="1" bgcolor="#000000"><img src="images/mainpages/001.gif" width="1"
+ height="11" alt="001.gif (36 bytes)"></td>
+ </tr>
+ <tr>
+ <td width="100%" height="16"><table border="0" width="1%" cellspacing="0" cellpadding="0"
+ height="247">
+ <tr>
+ <td width="3%" valign="top" height="149"><img src="images/mainpages/002.gif" width="41"
+ height="320" alt="Welcome to China Business Net"></td>
+ <td width="18%" valign="top" height="149"><p align="center"><img
+ src="images/mainpages/0031.gif" width="132" height="125" alt=" picture"><br>
+ <a href="html/company.html"><img src="images/mainpages/0032.gif" width="132" height="47"
+ alt="Company Profile" border="0"></a><br>
+ <a href="html/china.html"><img src="images/mainpages/0033.gif" width="132" height="26"
+ alt="China Yellow Page" border="0"></a><br>
+ <a href="html/what.html"><img src="images/mainpages/0034.gif" width="132" height="26"
+ alt="What`s New" border="0"></a><br>
+ <img src="images/mainpages/0035.gif" width="132" height="25" alt="Trade Bulletin"
+ border="0"><br>
+ <a href="html/serves.html"><img src="images/mainpages/0036.gif" width="132" height="26"
+ alt="Services" border="0"></a><br>
+ <a href="html/reference.html"><img src="images/mainpages/0040.gif" width="132" height="26"
+ alt="Reference Sites" border="0"></a><br>
+ <a href="html/support.html"><img src="images/mainpages/0038.gif" width="132" height="27"
+ alt="Support" border="0"></a><br>
+ <a href="mailto:[email protected]"><img src="images/mainpages/0039.gif" width="132"
+ height="41" alt="Contact Us" border="0"></a></p>
+ <table border="0" width="100%" cellspacing="0" cellpadding="0">
+ <tr>
+ <td width="100%"><p align="center"><font color="#FF0000" face="Arial"><small><strong>Download
+ It</strong></small></font></td>
+ </tr>
+ </table>
+ <p align="center"> <a href="http://www.microsoft.com/msdownload/"><img
+ src="images/mainpages/ie.gif" width="88" height="31" vspace="5" hspace="10" border="0"></a><br>
+ <a href="http://home.netscape.com/download/index.html"><img src="images/mainpages/ns.gif"
+ width="90" height="30" vspace="5" hspace="10" border="0"></a></p>
+ <p align="left"><br>
+ </td>
+ <td width="79%" valign="top" height="149"><img src="images/mainpages/0041.gif" width="72"
+ height="60" alt=" Earth"><img src="images/mainpages/0042.gif" width="111" height="60"
+ alt="China"><img src="images/mainpages/0046.gif" width="144" height="60" alt="Business"><img
+ src="images/mainpages/0048.gif" width="102" height="60" alt="Net"><table border="0"
+ width="449" cellspacing="0" cellpadding="0" height="194">
+ <tr>
+ <td width="292" height="152"><img src="images/mainpages/013.gif" width="280" height="64"
+ alt="We give you the tools to reach the world"><br>
+ <a href="html/china.html"><img src="images/mainpages/0241.gif" width="268" height="28"
+ alt="China Yellow Page" border="0"></a><font face="Arial"><table border="0" width="97%"
+ cellspacing="0" cellpadding="0">
+ <tr>
+ </font><td width="280"><font face="Arial"><strong><font color="#8080FF"><big>A</big></font></strong><small>
+ collection for Chinese enterprises, where you can find business opportunities and march
+ towards the world.</small></font><form NAME="info">
+ <p><font face="Arial" size="2"><select name="SelectMenu" " size="1"
+ onchange="jumpPage(this.form)">
+ <option> Please Select Category </option>
+ <option> Advertising </option>
+ <option> Automobile & Transportation</option>
+ <option> Banking & Finance </option>
+ <option> Chemistry Industry </option>
+ <option> Comprehensive Group </option>
+ <option> Computer </option>
+ <option> Decoration & Building Materials</option>
+ <option> Electronic Communication </option>
+ <option> Fashion & Textile </option>
+ <option> Jewel & Watches </option>
+ <option> Manufacturing </option>
+ <option> Medical Treatment & Health Care </option>
+ <option> Real Estate </option>
+ <option> Trade </option>
+ </select> </p>
+ </form>
+ <p><img src="images/mainpages/024.gif" width="268" height="28" alt="Trade Bulletin"
+ vspace="4" border="0"><br>
+ </font><font color="#8080FF" face="Arial" size="3"><strong><big>A</big></strong></font><font
+ face="Arial" size="2"> free bulletin board service, where you can provide your
+ products and services or you can tell others what you just want. It is a bridge between
+ supplier and purchaser.<br>
+ <br>
+ <a href="html/serves.html"><img src="images/mainpages/022.gif" width="268" height="28"
+ alt="Services" border="0" vspace="5"></a><br>
+ </font><font color="#8080FF" face="Arial" size="3"><strong><big>W</big></strong></font><font
+ face="Arial" size="2">e provide you professional services, which include creative web page
+ design, web site hosting, updating, monitoring and management, web site promotion, domain
+ name registration, etc..</font><font size="3" face="Arial"></p>
+ <hr>
+ <p></font><font color="#8080FF" face="Arial" size="2"><strong>Search by AltaVista:</strong></font><font
+ size="3" face="Arial"> </font></td>
+ </tr>
+ </table>
+ <table border="0" width="279" cellspacing="0" cellpadding="0" height="31">
+ <tr>
+ <td width="282" height="31" background="images/mainpages/back.gif"><form name="mfrm"
+ method="GET" action="http://www.altavista.com/cgi-bin/query">
+ <input type="hidden" name="pg" value="q"><div align="center"><center><p><input NAME="q"
+ size="19" maxlength="800" VALUE> <input type="submit" name="search" value="Search"></p>
+ </center></div><table border="0" width="100%" cellspacing="0" cellpadding="0">
+ <tr>
+ <td width="50%"><div align="right"><p><font face="Arial">You are Visitor:</font></td>
+ <td width="50%"><img src="/cgi-bin/hitcount.dll?counter:chinabusinessnet?font:2"></td>
+ </tr>
+ </table>
+ <div align="center"><center><p><font face="Arial"><small>Powered by <a
+ href="http://www.chinabusiness.net">Welcon Info-tech Ltd.</a><br>
+ Copyright(c) 1997, 1998. All Rights Reserved.</small></font><br>
+ <br>
+ <br>
+ <br>
+ </p>
+ </center></div>
+ </form>
+ </td>
+ </tr>
+ </table>
+ </td>
+ <td width="158" valign="top" height="152"><img src="images/mainpages/010.gif" width="136"
+ height="25" alt="Cool Sites" border="0"><br>
+ <a href="http://www.goodmex.com"><img src="images/mainpages/goodmexmark.gif" width="120"
+ height="40" alt="goodmex" hspace="8" vspace="2" border="0"></a><br>
+ <a href="http://www.gutex.com.cn"><img src="images/mainpages/g4.gif" width="120"
+ height="40" alt="Gutex" hspace="8" vspace="2" border="0"></a><br>
+ <a href="http://www.jxt.com.cn"><img src="images/mainpages/j3.gif" width="120" height="40"
+ alt="jxt" hspace="8" vspace="2" border="0"></a><br>
+ <a href="http://www.chinabusiness.net/citicdev"><img src="images/mainpages/c.gif"
+ width="120" height="40" alt="citicdev" hspace="8" vspace="2" border="0"></a><br>
+ <a href="http://www.chinabusiness.net/richland"><img src="images/mainpages/r2.gif"
+ width="120" height="40" alt="Richland" hspace="8" vspace="2" border="0"></a><br>
+ <a href="indexc.html"><img src="images/mainpages/china.gif" width="88" height="28"
+ alt="GB" border="0" hspace="20"></a><br>
+ <a href="indexb.html"><img src="images/mainpages/china1.gif" width="88" height="28"
+ alt="Big5" hspace="20" border="0"></a></td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+</table>
+</body>
+</html>
diff --git a/debian/htdig/htdig-3.2.0b6/test/htdocs/set1/site4.html b/debian/htdig/htdig-3.2.0b6/test/htdocs/set1/site4.html new file mode 100644 index 00000000..a32e4f98 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/htdocs/set1/site4.html @@ -0,0 +1,808 @@ +<HTML> +<HEAD> + <TITLE>FinAid | Loans | Private Loan Lenders (Graduate)</TITLE> +<? +include ("script-loans.html"); +?> +<meta name="DATE" content="1972-01-01"> +</HEAD> + +<BODY BGCOLOR="#FFFFFF" TEXT="#666666" LINK="#006699" VLINK="#6699CC" ALINK="#FFFFFF" TOPMARGIN="8" LEFTMARGIN="8" MARGINHEIGHT="8" MARGINWIDTH="8"> + +<!-- PHP code to include header file. File referenced may vary by section --> +<? +include ("header-loans.html"); +?> +<CENTER> +<TABLE BORDER="0" CELLPADDING="0" CELLSPACING="0" WIDTH="600"> + <TR> + <!-- 10 pixel spacer row between left edge and nav --> + <TD WIDTH="18"> </TD> + + <!-- PHP code for navigation. File referenced may vary by section--> + <TD WIDTH="141" VALIGN="TOP"> + <? + include ("nav-loans.html"); + ?> + </TD> + + <!-- 10 pixel spacer row between nav and content--> + <TD WIDTH="10"> </TD> + + <!-- content goes in here --> + <TD WIDTH="421" VALIGN="TOP"><BR><FONT FACE="ARIAL, HELVETICA" SIZE="3" CLASS=three><B>Additional Lenders Who Offer Private Loans to Graduate Students</B></FONT><P> + <FONT FACE="ARIAL, HELVETICA" SIZE="2" CLASS=two><TABLE BORDER="0" CELLPADDING="0" CELLSPACING="0"> + <TR> + <TD><IMG SRC="/commonimages/corner_topleft.gif" WIDTH=11 HEIGHT=11 ALT="" BORDER="0"></TD> + <TD BGCOLOR="#CCFFFF"><IMG SRC="/commonimages/corner_bottomright.gif" WIDTH=11 HEIGHT=11 ALT="" BORDER="0"></TD> + <TD><IMG SRC="/commonimages/corner_topright.gif" WIDTH=11 HEIGHT=11 ALT="" BORDER="0"></TD> + </TR> + <TR> + <TD BGCOLOR="#CCFFFF"> </TD> + <TD BGCOLOR="#CCFFFF"><FONT FACE="ARIAL, HELVETICA" SIZE="3" CLASS=three><B>NATE: Remove this cell</B></FONT><BR> + <FONT FACE="ARIAL, HELVETICA" SIZE="2" CLASS=two>Access Group<BR> + + </FONT></TD> + <TD BGCOLOR="#CCFFFF"> </TD> + </TR> + <TR> + <TD><IMG SRC="/commonimages/corner_bottomleft.gif" WIDTH=11 HEIGHT=11 ALT="" BORDER="0"></TD> + <TD BGCOLOR="#CCFFFF"><IMG SRC="/commonimages/spacer.gif" WIDTH=1 HEIGHT=1 ALT="" BORDER="0"></TD> + <TD><IMG SRC="/commonimages/corner_bottomright.gif" WIDTH=11 HEIGHT=11 ALT="" BORDER="0"></TD> + </TR> + </TABLE> + + +& " < > (See the separate list of private loans for <a +href="privateunder.phtml">undergraduate students</a>)<P> + +<A HREF="http://www.accessgroup.org"><B>Access Group</B></A><BR> + +fran�ais +Québec + +The Access Group is a nonprofit organization dedicated to providing +access to education through affordable financing and related +services. The Access Group provides federal and private loans to +graduate and professional students. The Access Group offers the +following loan programs of interest to graduate and professional +students: + +<UL> +<LI>Federal Stafford Loan (Subsidized and Unsubsidized) +<LI>Law Access Loan and Bar Examination Loan +<LI>Business Access Loan +<LI>Medical Access Loan and Medical Residency Loan +<LI>Dental Access Loan and Dental Residency/Board Exam Loan +<LI>Graduate Access Loan +</UL> + +<EM>Key Eligibility Requirements:</EM> Among other requirements, +borrowers must be a US citizen, national or eligible noncitizen, must +not be in default on any education loan or owe a refund on an +education grant (Federal loan only), and must be attending an +approved, accredited school. <P> + +<EM>Loan Limits:</EM> All Access Group loans have a minimum loan +amount of $500. Loan maximums vary by program, and are generally +limited to the cost of attendance less other aid received, though +there may be other limits depending on enrollment status and +school. Maximums include all outstanding undergraduate and graduate +educational debt. The aggregate limits are $120,000 for Graduate +Access and Business Access, $130,000 for Law Access (including Bar +Examination Loan), $195,000 for Medical Access (including the Medical +Residency Loan) and Dental Access (including the Dental +Residency/Board Examination Loan). <P> + +<EM>Interest Rates:</EM> Interest may be deferred while the borrower +is in school. Deferred interest is capitalized once, at +repayment. Interest rates vary quarterly, and are based on the 91-day +T-bill rate plus a spread. The spreads are 2.5% for Medical Access, +2.75% for Dental Access, 2.9% for Law Access, 3.0% for Business +Access, and 3.4% for Graduate Access. <P> + +<EM>Loan Fees:</EM> The guarantee fee for Access Group private loans +is 6.0% at disbursement for all borrowers. A supplemental guarantee +fee is added to the principal balance immediately prior to +repayment. This fee is significantly determined by the borrower's past +credit behavior and ranges from 1.5% to 6.9%. (For any loan for which +a cosigner is required, the supplemental guarantee fee is 6.9%.) <P> + +<EM>Repayment:</EM> Repayment begins 9 months after graduation or when +enrollment status drops below half-time. Borrowers may take up to 20 +years to repay the loan. The minimum monthly payment is $50. There is +no penalty for prepayment. <P> + +To apply for a loan, or for more information on any Access Group +product or service, call the Access Loan Line at 1-800-282-1550 +weekdays 9-8 ET. Use the new online loan application or place an order +for a paper application using the materials order form. You can also +fax 1-302-477-4080, write to the Access Group, 1411 Foulk Road, Box +7430, Wilmington, DE 19803-0430, or send email to <A +HREF="mailto:[email protected]">[email protected]</A>. <p> + + +<A HREF="http://www.nhelp.net"><B>College Funding Company</B></A><BR> + +College Funding Company is a coalition of four Nebraska nonprofit +organizations involved in educational financing. CFC's "Flexible +College Funding Loan" is available to parents of undergraduate and +graduate students. The term of the loan is up to 15 years, and it +offers an in-school interest-only repayment plan in addition to a +standard repayment plan in which interest and principal payments begin +upon disbursement. The interest rate is prime plus 1.5% (prime plus +1.75% during any interest-only payment periods). Parents may borrow up +to the full cost of education less any financial aid received, with a +cumulative limit of $100,000 plus fees. The minimum loan amount is +$1,000. <P> + +For more information, call 1-800-745-6646, send email to <a +href="mailto:[email protected]">[email protected]</a>, or use the +contact form on their website. <P> + +<A HREF="http://www.citibank.com/student/CSLC.html"><B>Citibank Student +Loans</B></A><BR> + +Citibank offers a variety of private graduate student loans, including: + +<UL> +<LI>Citibank GradAssist Loan +<LI>Citibank MBAAssist Loan +<LI>Citibank MedicalAssist Loan (allopathic or osteopathic medicine) +<LI>Citibank EngAssist Loan +<LI>Citibank Nursing Loan +</UL> + +The eligibility requirements require borrowers to be US citizens or +permanent residents. International students can apply if they have a +US citizen or permanent resident cosign the loan. Borrowers must also +meet certain credit criteria and income standards. These loans are +only available at accredited schools with a federal default rate of +less than 15%. <P> + +All of the loans have a minimum loan amount of $500. Annual maximums +are $15,000 for the GradAssist and MBAAssist loans, $20,000 for the +MedicalAssist Loan, and $8,500 for the EngAssist and Nursing +loans. Cumulative maximum limits include the Citibank loans and are +$100,000 for the GradAssist and MBAAssist loans, $135,000 for the +MedicalAssist Loan, and $80,000 for the EngAssist and Nursing +loans. <P> + +Interest may be deferred while the borrower is in school. Deferred +interest is capitalized once, at repayment. Interest rates are based +on 91-day T-bill rates plus a spread. The rates for the GradAssist +Loan are T-bill plus 4.00% before repayment and T-bill plus 4.50% +during repayment. The rates for the MBAAssist loan are T-bill plus +3.40% and T-bill plus 4.25%. The rates for the MedicalAssist loans are +T-bill plus 2.60% and T-bill plus 2.85%. The rates for the EngAssist +loan are T-bill plus 3.25% and T-bill plus 3.50%. The rates for the +Nursing loan are T-bill plus 3.50% and T-bill plus 4.00%. <P> + +The guarantee fee is 6.00% at disbursement if the loan is borrowed +with a cosigner, 9.00% if there is no cosigner. The guarantee fee for +MBAAssist and MedicalAssist Loans is 8.50%. <P> + +Repayment for all loans begins 6 months after graduation or when +enrollment status drops below half-time. You have up to 15 years to +repay the loan, 20 years for MedicalAssist loans. MedicalAssist +borrowers may request a second deferment for residency and internship, +and have the option of paying three years interest-only upon enterring +repayment. The minimum monthly payment is $50. There is no penalty for +prepayment. <P> + +Fluctuations in interest rate are reflected in the length of the +repayment term, not in the monthly payment amount, unless an increase +in monthly amount is needed to keep the payment period within the +program maximum. Citibank offers three repayment options: + +<UL> +<LI>Pay interest-only during the in-school and 6 month grace period. +<LI>Pay 15 years of principal and interest, capitalizing the interest during the in-school period. +<LI>Pay two years of interest-only plus 13 years of principal and interest, capitalizing the interest during the in-school period. +</UL> + +For more information, call 1-800-692-8200 x144 (1-800-846-1298 TDD), +fax 1-716-248-7007, write to Citibank (NYS) Student Loans, PO Box +22948, Rochester, NY 14692. Graduate students may call +1-800-967-8677. <P> + + +<A HREF="http://www.educap.org"><B>Educap Inc.</B></A><BR> + +Educap Inc., (formerly known as University Support Services, Inc.), is +a non-profit corporation founded in 1986. They offer several loan +programs:<P> + +<A HREF="http://www.educap.org/plato.htm">The P.L.A.T.O. student loan +program</A>, which lets you borrow from $1,500 up to $25,000 a year +with a $100,000 cumulative borrowing limit. The loan can be used to +pay educational expenses incurred during the previous year in addition +to expenses from the current year. Up to $5,000 can be borrowed for +purchasing a personal computer. The loan has a repayment term of up to +15 years with no prepayment penalty. If the student does not earn +$15,000 per year, a co-signer will be required. Students have the +option of deferring payments of principal for up to five years or +graduation, whichever comes first. The interest rate is the Prime rate +plus a 1.90% spread during the in-school period and the Prime rate +plus a 2.90% spread during repayment. There is an 8% origination +fee. US citizenship or permanent residency is required. A good credit +history is also required. They also offer an introductory rate of 7.9% +with a 7% origination fee. <P> + +<A HREF="http://www.educap.org/merlin.htm">The Merlin loan +consolidation program</A>, which lowers monthly loan payments by as +much as 45% by using a 20 year repayment period. You can consolidate +between $5,000 and $100,000 in loans, and you can borrow up to an +additional $25,000 in student loans. There is no prepayment +penalty. US citizenship or permanent residency required. The Merlin +Consolidation Loan program can be used to consolidate all +education-related debt to include both private and US federal loans +and credit card debt. There is a 5% origination fee and the interest +rate is the Commercial Paper Rate plus 6.0%. <P> + +<A HREF="http://www.educap.org/pj.htm">The P.L.A.T.O. Junior Education +Loan</A>, which is similar in structure and requirements to the +P.L.A.T.O. loan, but is aimed at families with students in private +schools grades K-12. The main difference is an interest rate of prime +plus 3.7% and a 6% origination fee. <P> + +For more information, call 1-800-230-4080, write to Complete Source +for Financing Education, 205 Van Buren Street, Suite 200, Herndon, VA +22070, or send email to <<A +HREF="mailto:[email protected]">[email protected]</A>. <P> + + +<B>Education Funding Services, Inc. (EFS)</B><BR> + +In conjunction with <A HREF="http://www.teri.org">TERI</A>, EFS offers +the Chiroloan for chiropractic study (Canadian Chiroloan for Canadian +citizens attending ACC accredited chiropractic institutions in US), +the OpLoan for optometry, and the VLoan for veterinary school. <P> + +The Chiroloan program lets the student borrow from $1,000 to $8,000 +annually. The OpLoan and VLoan programs let the student borrow from +$1,500 to $20,000 annually. The Canadian Chiroloan programs let the +student borrow from US$1,000 to US$10,000 annually. <P> + +The Chiroloan, OpLoan, and VLoan programs have a variable interest +rate based on the 91-day T-Bill rate plus 2.5% during the in-school +period and the 91-day T-Bill rate plus 2.9% during repayment. The +Canadian Chiroloan program has a variable interest rate of Prime plus +1.5% during in-school and deferment periods, and Prime plus 2% during +repayment. <P> + +The term of all four loan programs is up to 20 years, with a $50 +minimum monthly payment. Except for the Chiroloan program, which has +an in-school deferment period of 5 consecutive years, all of the +programs have an in-school deferment period of 4 consecutive years, +with interest capitalized at graduation. Repayment begins 12 months +after graduation or withdrawal from school. All loans have +interest-only and interest and principal payment options for the +in-school period. <P> + +The Chiroloan has a $10 application fee and a 10% guarantee fee (6% +with cosigner). The Canadian Chiroloan program has a $10 application +fee and an 11% guarantee fee. The VLoan and OpLoan have no application +fees, but a guarantee fee of 6% at disbursement and 1% at +repayment. For schools with high default rates, the guarantee fees are +7% and 2%, respectively. <P> + +For more information about these programs and a list of participating +schools, contact EFS at 1-800-252-2041. <P> + +<B>International Health Education Loan Program (IHELP) </B><BR> + +The IHELP loan program provides loans for US graduate health +profession students who are studying outside the US for an MD or +DVM. US citizenship or permanent residency is required. Non-citizens +must submit valid INS 151 or INS 551 forms with their +applications. <P> + +The IHELP program lets students borrow from $1,500 to $15,000 per +year, with an aggregate limit of $75,000. The interest rate is the +91-day T-Bill rate plus 3.5% during the in-school and repayment +periods. There is a 10.5% guarantee fee. <P> + +Repayment begins 12 months after graduation. There is a 4-year +deferment period for medical students and a 1-year deferment period +for veterinary students during residency. The term of the loan is up +to 20 years, with a $50 minimum monthly payment. <P> + +This loan program is administered by International Education Finance +Company (IEFC), division of Education Funding Services, +Inc. (EFS). IHELP federal loans are guaranteed by USA Funds and IHELP +alternative loans by TERI. <P> + +For more information, call 1-800-255-TERI (1-800-255-8374), write to +The Education Resources Institute (TERI), 330 Stuart Street, Suite +500, Boston, MA 02116-5237, or send email to <A +HREF="mailto:[email protected]">[email protected]</A>. Customer +service is x4210, school relations is x4262, lender relations is +x4218, claims and collections is x4270, and loan origination is x4287 +(fax 1-617-695-3637). Financial aid administrators should call +1-800-TERI-FAO (1-800-837-4326) or 1-617-422-8800. <P> + + +<A HREF="http://www.keybank.com/educate/key_other.html"><B>Key +Education Resources</A></B><BR> + +Key Education Resources offers several private loan programs for +graduate and professional students:<P> + +<A HREF="http://www.keybank.com/educate/dental.html">Alternative +DEAL</A>(DEAL = Dental Education Assistance Loan), a private loan for +students pursuing dental or post-doctoral dental degrees. Repayment +may be deferred during school and a 24-month grace period by +capitalizing the interest, and the borrower may choose an optional +interest-only repayment for the first three years. <P> + +<A HREF="http://www.keybank.com/educate/med_ach.html">MedAchiever</A>, +a private loan for full-time students pursuing allopathic or +osteopathic medical degrees. Repayment may be deferred during school +and a 48 month grace residency/intership period by capitalizing the +interest, and the borrower may choose an optional interest-only +repayment for the first three years. <P> + +<A +HREF="http://www.keybank.com/educate/grad_ach.html">GradAchiever<</A>, +a private loan for full-time graduate students in most fields of +study. Repayment may be deferred during school and for a 9 month grace +period after graduation by capitalizing the interest. <P> + +<A HREF="http://www.keybank.com/educate/mba_ach.html">MBAchiever</A>, +a private loan program for full time graduate business +students. Repayment may be deferred during school and for a 9 month +grace period after graduation by capitalizing the interest. <P> + +LawAchiever, a private loan program for any student attending an +ABA-approved law school at least half-time pursuing a J.D., J.S.D., +LL.M., or joint degree program. <P> + +There is an aggregate loan limit of $130,000. The interest rate is the +91-day T-Bill rate plus 3.25% during the in-school and grace period, +and the 91-day T-Bill rate plus 2.50% to 3.25% during repayment +depending on the borrower's choice of payment plans. There are loan +fees of 6% at disbursement and up to 3% at repayment. All loan +payments are deferred during the in-school period and for 9 months +after graduation. There is no penalty for prepayment. Each month the +borrower may choose a payment tied to a 10, 15, or 25 year repayment +schedule, with shorter repayment periods having a lower interest +rate. (The 10, 15, and 25 year interest rates are the 91 day T-Bill +rate plus 2.50%, 2.90%, and 3.25%, respectively.) The Select/2 +Repayment Option allows students to make interest-only payments for +the first two years, followed by principal and interest payments for +the balance of the loan term. Students who have their monthly payment +automatically debited from their checking or savings account are +eligible for an additional 0.25% reduction in the interest rate. Key +Education Resources is the largest educational lender to law schools. +The annual loan limit for these loans is the cost of education less +other financial aid received ($7,500 for LawAchiever BarLoan). For +more information, call 1-800-KEY-LEND or send email to Key Education +Resources at <A +HREF="mailto:[email protected]">[email protected]</A>. <P> + + + +<A HREF="http://www.iefc.com/islp.html"><B>International Student Loan +Program (ISLP)</B></A><BR> + +ISLP is an alternative loan program for US students studying abroad or +enrolled at foreign colleges or universities. It is also available to +international students for study in the US with a US citizen or +permanent resident as co-signer. The program for US students combines +FFELP (Stafford and PLUS) loans with supplemental loan financing into +single package.<P> + +Graduate students may borrow up to the full cost of education, +including tuition, fees, and room and board, subject to a $15,000 +maximum. The minimum loan amount is $1,000 per academic year for US +students, $2,000 per academic year for international +students. Students may take up to 25 years to repay the loan (20 years +for Canadian students). There is a $50 minimum payment and some +deferment provisions.<P> + +The interest rate is variable, and is pegged to the prime lending +rate. There are different interest rates and fees depending on whether +the student is engaged in a study abroad program or is actually +enrolled at a foreign institution. For study abroad programs the +interest rate is the prime rate and there is a 9% guarantee fee. For +graduate students enrolled in a foreign school the interest rate is +the prime rate plus 0.25%, and there is a 10% guarantee fee and a 0.5% +origination fee. For Canadian students the interest rate is the prime +rate plus 1.5%, and there is a 7% guarantee fee with co-signer or a 9% +guarantee fee at disbursement and a 2% guarantee fee at repayment +without a co-signer. For international students the interest rate is +the prime rate and there is a 6% guarantee fee.<P> + +For more information, call 1-617-696-7840, fax 1-617-698-3001, write +to Joe Cronin, VP/IEFC, 424 Adams Street, Milton, MA 02186, or send +email to <A HREF="mailto:[email protected]">[email protected]</A>. <P> + + + +<A HREF="http://www.mefa.org"><B>Massachusetts Educational Financing +Authority (MEFA)</B></A><BR> + +MEFA is a not-for-profit state authority that provides college +financing for students attending Massachusetts colleges and +universities. They offer some of the lowest cost college loans +available, including GEL, a fixed rate graduate education loan program +available at many participating Massachusetts colleges and +universities (7.65% for the 1996-97 academic year), and PRISM, a fixed +rate international student loan program for students from Canada or +Argentina attending one of 65 participating schools in +Massachusetts. <P> + +MEFA is able to offer one of the lowest interest rates on their loans +because they are non-profit and exempt for federal and state +tax. Students can borrow up to the full cost of education. These loans +have a term of up to 15 years and there is no penalty for +prepayment. Payments may be deferred while the student is in school +for a maximum of three years. The interest can be made tax-deductible +through their Home Equity Option. <P> + +For more information about MassPlan, GEL, or U.Plan, call +1-800-449-MEFA (6332) or 1-617-261-9760. For more information about +PRISM, call 1-800-842-1531 or 1-617-261-9760, or fax +1-617-261-9765. You can also write to MEFA, 125 Summer Street, Boston, +MA 02110. <P> + +<A HREF="http://www.medfunds.com"><B>MedFunds</B></A><BR> + +MedFunds is a not-for-profit lender of Stafford, HEAL, and alternative +loans for students in various medical curriculums. They are a division +of the Ohio College of Podiatric Medicine. <P> + +The MedFunds Alternative Gradute Loan is offered for graduate students +in Allopathic, Dentistry, Pharmacy, Podiatry, Physical Therapy, Public +Health, Occupational Therapy, Optometry, Osteopathy, Veterinary, +Nursing, Nutrition, and Physician's Assistant disciplines. The +eligibility requirements include a credit check and the debt-to-income +ratio for all applicants as a group must not exceed 40% after +including the loan. Debt includes a percentage of credit lines on +credit cards, regardless of the current balance. The loans are +restricted to US citizens, nationals, and permanent residents, and the +student must also have applied for a MedFunds Stafford Loan. Students +may borrow from $1,000 up to the cost of education or $20,000, +whichever is less, with a cumulative maximum of $80,000. There is an +aggregate education debt limit of $152,500 with the Medfunds +Option. (The MedFunds Option allows students to borrow an additional +$10,000 during the final year of graduate studies to cover internship +and residency interviewing and relocation costs.) Repayment begins 9 +months after graduation or when the student's enrollment status drops +below half-time. An additional deferment of up to four years is +available for residency and internship. Interest is capitalized once, +at repayment. The interest rate is based on the 91 day T-bill rate +plus a spread. The spread is 2.4% before repayment and 2.7% during +repayment. The guarantee fee is 6.00% at origination, plus an +additional 3% at repayment if there is no cosigner. The term of the +loan is up to 20 years, and there is no penalty for prepayment. <P> + +For more information, call 1-800-665-1016, fax 1-216-231-0453, write +to MedFunds, 10515 Carnegie Avenue, Cleveland, OH 44160, or send email +to <A HREF="mailto:[email protected]">[email protected]</A>. Servicing +questions should be directed to SLSC at 1-800-233-0557. <P> + + +<A HREF="http://www.mohela.com"><B>Mohela</B></A><BR> + +Mohela is a secondary market for student loans in Missouri and +neighboring states. Established in 1981, they have $1 billion in +assets and serve more than 125,000 student and parent borrowers and 83 +lenders. More than three-quarters of all student loans in Missouri are +purchased by Mohela. <P> + +They offer the CASH LOAN private loan program as a supplement to FFELP +and FDSLP loans for graduate students. These loans include the +MedCash, LawCash, BarCash, MBACash, and GradCash loan programs. US +citizenship is required. <P> + +Under the MedCash program, medical school students can borrow up to +$20,000 per year, with an aggregate maximum of $80,000 and a total +educational debt limit of $128,500 (including MedCash loans). The +interest rate is the 91-day T-Bill rate plus a spread of 2.50% while +the student is still in school and 2.85% during repayment. There is a +5% guarantee fee, and an additional 2% guarantee fee is added when the +student enters repayment. Repayment begins 36 months after graduation +or 9 months after the student withdraws to less than half-time +enrollment without graduating. No payments are required while the +student is in school, and the term of the loan is up to 20 years. <P> + +Under the LawCash program, law students can borrow up to $15,000 per +year, with an aggregate maximum of $45,000 and a total educational +debt limit of $90,000 (including LawCash loans). The interest rate is +the 91-day T-Bill rate plus a spread of 3.25% while the student is +still in school and 3.40% during repayment. There is an 8% guarantee +fee at disbursement, and an additional 2% guarantee fee when the +student enters repayment. Repayment begins 9 months after the student +either graduates or withdraws. No payments are required while the +student is in school, and the term of the loan is up to 15 years. <P> + +For more information call 1-800-6-MOHELA (1-800-666-4352) or +1-314-469-0600, TDD/TT 1-314-469-6390, fax 1-314-469-4561, or write to +Mohela, 14528 South Outer Forty Road, Suite 300, Chesterfield, MO +63017. <P> + + +<A HREF="http://www.nelliemae.org"><B>Nellie Mae LOAN LINK</B></A><BR> + + +Nellie Mae, established in 1982, is the largest nonprofit provider of +student and parent education loan funds in the US. Nellie Mae offers +the EXCEL Education Loan Program for parents and spouses of +undergraduate and graduate students and the GradEXCEL, LawEXCEL, +MBA-EXCEL, and MedDent-EXCEL Education Loan Programs for graduate and +professional students. <P> + +The Nellie Mae EXCEL Education Loan Program lets parents and spouses +borrow from $2,000 up to the cost of attendance less other financial +aid received each year. Repayment options include paying interest only +while the student is in school or making monthly payments of principal +and interest while the student is in school. You can take up to 20 +years to repay the loan depending on the amount borrowed (15 years for +a $10,000 loan). <P> + +The GradEXCEL, LawEXCEL, MBA-EXCEL, and MedDent-EXCEL Education Loan +Programs lets graduate and professional students borrow from $2,000 to +$10,000 a year (up to $12,000 for law students) on their own, or up to +the cost of attendance with a cosigner. Medical and Dental students +have the option to borrow an additional $5,000 in the final year of +medical or dental school for use during residency interviews or +relocation expenses. Repayment options include defering principal and +interest until six months after leaving school (MedDent-EXCEL +borrowers can continue to defer principal during residency or +internship, but must pay interest), making monthly payments of +interest only while the student is in school, or making monthly +payments of principal and interest while the student is in school. You +can take up to 20 years to repay the loan depending on the amount +borrowed. <P> + +Interest rates are the same for all loans and are pegged to the Prime +rate. You can choose either a monthly variable rate of Prime plus 0.5% +during the first year and Prime plus 1.0% during subsequent years, or +a one-year renewable rate of Prime plus 2-3%. <P> + +There are no origination fees. EXCEL Loans have a 7% guarantee +fee. With a cosigner, the GradEXCEL, LawEXCEL, MBA-EXCEL, and +MedDent-EXCEL loans have a 7% guarantee fee. Without a cosigner, the +guarantee fees are 9% for MBA and medical students and 10% for +graduate, law, and dental students. <P> + + +You can request an application through their online +form. For more information, call 1-800-9-TUITION (1-800-988-4846), +1-800-634-9308, or 1-617-849-3447, fax 1-617-849-6006, write to Nellie +Mae, 50 Braintree Hill Park, Suite 300, Braintree, MA 02184, or send +email to <A +HREF="mailto:[email protected]">[email protected]</A>. <P> + + +<A HREF="http://www.salliemae.com"><B>Sallie Mae</B></A><BR> + +Sallie Mae is the nation's largest secondary market and holds +approximately one third of all educational loans. Sallie Mae buys +student loans from lenders and administers them from origination +through repayment. Sallie Mae is affiliated with several lenders +(currently Norwest Bank and Household Bank fsb) who offer private loan +programs. <P> + +The private loan programs include: + +<UL> +<LI>LAWLOANS: Law Student Loans and Bar Study Loans +<LI>MEDLOANS: Alternative Loan Program (for allopathic medical students) and MEDEX (for residency interviews and relocation) +<LI>MBALOANS: Tuition Loan Program (for students attending approved business schools) and Executive MBA Loan (for students enrolled in non-traditional MBA programs) +</UL> + +Borrowers under these loan programs may be required to apply for +federal student aid first. Eligibility is restricted to US citizens, +nationals, and permanent residents. The LAWLOANS and MBALOANS require +permanent residents to have a US citizen as cosigner. The MBALOANS +requires international students to have creditworthy cosigners and to +make interest payments during the in-school period. <P> + +All loans have let you borrow a minimum of $1,000 ($500 for continuing +borrowers). Each program has a different maximum loan amount. The +LAWLOANS maximum is $60,000 without a cosigner, $100,000 with a +cosigner. The Bar Study Loan lets you borrow up to $7,500. The +MEDLOANS lets you borrow up to $20,000 per year. The MEDEX loan lets +you borrow up to $7,000. In addition, the LAWLOANS has an aggregate +maximum for all educational loans of $102,500 without a cosigner, +$150,000 with a cosigner. For MEDLOANS the aggregate maximum is +$142,500. <P> + +The interest rates are based on the 91-day T-bill rates, plus a +spread, and vary quarterly. For LAWLOANS the interest rate for FFELP +borrowers is T-bill plus 3.25% during the in-school period and for +FDSLP it is T-bill plus 3.50%. During repayment the rate for both is +T-bill plus 3.50%. For MEDLOANS it is T-bill plus 2.5% during the +in-school period, and T-bill plus 2.85% during repayment. For MBALOANS +the rate for FFELP borrowers is T-bill plus 3.25% during the in-school +period, T-bill plus 3.40% during repayment, and T-bill plus 3.50% for +FDSLP borrowers. Interest is capitalized at the beginning of +repayment, except for MEDLOANS where the interest is capitalized upon +leaving medical school and annually until repayment begins. <P> + +Loan fees for LAWLOANS are 7.5% at disbursement, plus an additional +4.25% at repayment if there is no cosigner. Loan fees for MEDLOANS are +7% at disbursement, plus 2% of principal and accrued interest at the +beginning of repayment. The MEDEX loan has an additional origination +fee of 1.5%. Loan fees for MBALOANS are 7.5% at disbursement, plus an +additional 2.5% at repayment if there is no cosigner. <P> + +The minimum payment for all loans is $50. The loan term for LAWLOANS +is up to 15 years and begins following a 9 month grace period after +leaving school or 4 years after disbursement, whichever comes +first. The loan term for MEDLOANS is up to 20 years, and begins 3-4 +years after graduation (depending on length of the residency program) +or 9 months after withdrawal. MEDLOANS offers a repayment option of 3 +years of interest-only payments and 17 years of interest and principal +payments. The loan term for MBALOANS is 12-15 years, depending on the +loan balance, and begins 6 months after leaving school or 3 years +after disbursement, whichever comes first. The MBALOANS offers level +or graduated repayment options. <P> + +Borrowers of these private loans get access to Sallie Mae discounts, +including a 0.25% interest rate reduction for authorizing an automatic +monthly deduction of the education loan payments from your check or +savings account (Direct Repay) and a 0.5% interest rate reduction for +Law Student or Bar Study loan borrowers who make their first 48 +scheduled monthly payments on time (Law Rewards). <P> + +Call 1-800-239-4211 to request copies of Sallie Mae brochures or to +talk to a College Answer[sm] representative. Customer service +telephone numbers are 1-800-366-5626 for the LAWLOANS, 1-800-858-5050 +for MEDLOANS, and 1-800-366-6227 for MBALOANS. <p> + + + +<A HREF="http://www.teri.org/alt.htm"><B>TERI Alternative Loan +Program</B></A><BR> + +The TERI Alternative Loan Program program provides loans for graduate +students who are enrolled at least half-time in a degree-granting +program at a TERI-approved school. Approval is based on +creditworthiness, not financial need. This program lets you borrow +from $2,000 up to the cost of education, less any financial aid +received. You can borrow to cover past due balances owed to the school +from a prior loan period. <P> + +Repayment begins 45 days after disbursement. There is an alternate +deferred repayment option in which payments are interest-only while +the student is in school (for up to four consecutive years), with +repayment of interest and principal beginning 45 days after graduation +or withdrawal from school. There is a $50 minimum payment. The term of +the loan is for up to 25 years, depending on the amount borrowed, and +there is no penalty for prepayment. <P> + +There is a 5% guarantee fee. The interest rate depends on the lender, +and is variable, ranging from a low of Prime plus 0% (Bank of Boston, +Household Bank, and PNC Bank) to a high of Prime plus 2% +(Citibank). First Union National Bank offers the loan at Prime + +1.25%. On the variable interest rate loans, any fluctuation in the +interest rate will be reflected in the length of repayment, not in the +size of the monthly payment, unless an increase in the monthly amount +is needed to keep the repayment period within the 25 year maximum. <P> + +The Maine Education Loan Authority offers the MELA loan for Maine +students and Maine residents. There is a $50 application fee, and the +loan is offered with either a fixed interest rate or a variable +interest rate based on the 91-day T-Bill rate plus a spread determined +by the lender. <P> + +For more information, call 1-800-255-TERI (1-800-255-8374), write to +The Education Resources Institute (TERI), 330 Stuart Street, Suite +500, Boston, MA 02116-5237, or send email to <A +HREF="mailto:[email protected]">[email protected]</A>. Customer +service is x4210, school relations is x4262, lender relations is +x4218, claims and collections is x4270, and loan origination is x4287 +(fax 1-617-695-3637). Financial aid administrators should call +1-800-TERI-FAO (1-800-837-4326) or 1-617-422-8800. <P> + + +<A HREF="http://www.teri.org"><B>TERI Professional Education Program +(PEP)</B></A><BR> + +The TERI PEP program provides loans for graduate and professional +study. It lets graduate and professional students borrow up to $7,500 +($12,000 for law students) annually on their own credit. If the +borrower is creditworthy or has a creditworthy cosigner, the TERI PEP +program lets the student borrow annually up to the cost of education +less any financial aid received or $20,000, whichever is less, with a +cumulative limit of $80,000. <P> + +There are aggregate borrowing limits depending on the +profession. These aggregate limits include all education debt, +undergraduate and graduate. Graduate students who are creditworthy or +who apply with a creditworthy cosigner are not subject to these +limits. The aggregate limits are: + +<UL> + +<LI> $90,000 -- Medicine (MD degree) +<LI> $90,000 -- Osteopathic Medicine +<LI> $77,000 -- Law +<LI> $70,000 -- Pharmacy (PhD degree) +<LI> $45,000 -- Business +<LI> $44,000 -- Dentistry +<LI> $44,000 -- Engineering +<LI> $44,000 -- Physical Sciences +<LI> $33,000 -- All other graduate/professional programs. + +</UL> + +The student makes no payments for up to 4.5 consecutive years while +still in school. Unpaid interest is capitalized at repayment. Medical +school students may request a second deferment on the new principal +balance after the interest has been capitalized, for up to 4 years +while completing a medical internship or residency (with +capitalization at repayment). <P> + +There is a $50 minimum monthly payment and the term of the loan is for +up to 20 years. Fluctuations in the interest rate will be reflected in +the length of repayment, not in the size of the monthly payment, +unless an increase in the monthly amount is needed to keep the +repayment period within the 20 year maximum. <P> + +A guarantee fee is deducted at origination. The guarantee fee is 6% +with a cosigner, 9% without. The interest rate is variable, and +depends on the lender. The Bank of Boston, Household Bank, and PNC +Bank offer the TERI PEP loans at an interest rate of Prime + 0%. The +Bank of Boston charges a origination fee of 0.5%, with a $50 +cap. First Union National Bank offers TERI PEP loans at Prime + +1.25%. Citibank offers the Citibank PEP loan at Prime + 2%. <P> + +For more information, call 1-800-255-TERI (1-800-255-8374), write to +The Education Resources Institute (TERI), 330 Stuart Street, Suite +500, Boston, MA 02116-5237, or send email to <A +HREF="mailto:[email protected]">[email protected]</A>. Customer +service is x4210, school relations is x4262, lender relations is +x4218, claims and collections is x4270, and loan origination is x4287 +(fax 1-617-695-3637). Financial aid administrators should call +1-800-TERI-FAO (1-800-837-4326) or 1-617-422-8800. <P> + + + +<A HREF="http://www.teri.org/cel.htm"><B>TERI Continuing Education +Loan</B></A><BR> + +Funded by the Bank of Boston, the TERI Continuing Education Loan +program provides loans for continuing education students for +undergraduate, graduate, and certificate program study. Approval is +based on creditworthiness, not financial need. There are no enrollment +status requirements - you do not need to be enrolled at least half +time, as with other private loans. The minimum loan amount is $500 and +the maximum is $5,000 per year. You can borrow for past due balances +owed to the school from a prior loan period. No school certification +is required. The interest rate is Prime plus 1.5% and there is a 5% +guarantee fee and no application fee. Repayment of interest and +principal begins six months after disbursement. There is a $25 minimum +monthly payment, and you have up to 10 years to repay the loan. <P> + +For more information, call 1-800-255-TERI (1-800-255-8374), write to +The Education Resources Institute (TERI), 330 Stuart Street, Suite +500, Boston, MA 02116-5237, or send email to <A +HREF="mailto:[email protected]">[email protected]</A>. Customer +service is x4210, school relations is x4262, lender relations is +x4218, claims and collections is x4270, and loan origination is x4287 +(fax 1-617-695-3637). Financial aid administrators should call +1-800-TERI-FAO (1-800-837-4326) or 1-617-422-8800. <P> + + + + </FONT></TD> + <!-- 10 pixel spacer row between left edge and nav --> + <TD WIDTH="10"> </TD> + </TR> +</TABLE> +</CENTER> +<BR> + +<!-- PHP code to include footer file. File referenced may vary by section --> +<? +include ("footer-loans.html"); +?> + +<!-- TRACKING STUFF TO BE PUT IN --> +</BODY> +</HTML> diff --git a/debian/htdig/htdig-3.2.0b6/test/htdocs/set1/sub%20dir/empty file.html b/debian/htdig/htdig-3.2.0b6/test/htdocs/set1/sub%20dir/empty file.html new file mode 100644 index 00000000..90531a4b --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/htdocs/set1/sub%20dir/empty file.html @@ -0,0 +1,2 @@ +<html> +</html> diff --git a/debian/htdig/htdig-3.2.0b6/test/htdocs/set1/title.html b/debian/htdig/htdig-3.2.0b6/test/htdocs/set1/title.html new file mode 100644 index 00000000..252915f7 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/htdocs/set1/title.html @@ -0,0 +1,10 @@ +<html> +<head> +<title>FinAid | Loans | Private Loan Lenders (Graduate)</title> +<meta name="keywords" content="newWord"> +</head> +<body> +This is a test. I'd like to see the weird characters appear. & " < > +<a href="site%201.html">firstCrossRef crossRef</a>. +</body> +</html> diff --git a/debian/htdig/htdig-3.2.0b6/test/htdocs/set2/normal.html b/debian/htdig/htdig-3.2.0b6/test/htdocs/set2/normal.html new file mode 100644 index 00000000..decddd64 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/htdocs/set2/normal.html @@ -0,0 +1 @@ +This is a very normal file. What a relief :-) diff --git a/debian/htdig/htdig-3.2.0b6/test/htdocs/set3/nph-content_type.cgi b/debian/htdig/htdig-3.2.0b6/test/htdocs/set3/nph-content_type.cgi new file mode 100755 index 00000000..f839a6d4 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/htdocs/set3/nph-content_type.cgi @@ -0,0 +1,10 @@ +#!/bin/sh + +echo 'HTTP/1.1 200 OK' +echo 'Connection: close' +echo 'Content-Type: text/html ; ISO-8859-1' +echo +cat <<! +This is the content of the +document +! diff --git a/debian/htdig/htdig-3.2.0b6/test/htdocs/set3/nph-hang.cgi b/debian/htdig/htdig-3.2.0b6/test/htdocs/set3/nph-hang.cgi new file mode 100755 index 00000000..142bfcf6 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/htdocs/set3/nph-hang.cgi @@ -0,0 +1,31 @@ +#!/bin/sh + +if [ ! -z "$QUERY_STRING" ] +then + echo 'HTTP/1.1 200 OK' + echo 'Connection: close' + echo 'Content-Type: text/html' + echo + cat <<! +root::0:root +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: + +! +fi + +sleep 200 + diff --git a/debian/htdig/htdig-3.2.0b6/test/htdocs/set3/nph-location.cgi b/debian/htdig/htdig-3.2.0b6/test/htdocs/set3/nph-location.cgi new file mode 100755 index 00000000..a7ad45f5 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/htdocs/set3/nph-location.cgi @@ -0,0 +1,11 @@ +#!/bin/sh + +echo 'HTTP/1.1 200 OK' +echo 'Connection: close' +echo 'Content-Type: text/html' +echo 'Location: /set3/nph-location.cgi' +echo +cat <<! +This is the content of the +document +! diff --git a/debian/htdig/htdig-3.2.0b6/test/htdocs/set3/nph-slow.cgi b/debian/htdig/htdig-3.2.0b6/test/htdocs/set3/nph-slow.cgi new file mode 100755 index 00000000..7dd4ce2c --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/htdocs/set3/nph-slow.cgi @@ -0,0 +1,1069 @@ +#!/bin/sh + +echo 'HTTP/1.1 200 OK' +echo 'Connection: close' +echo 'Content-Type: text/html' +echo +echo 'a few bytes 1' +sleep 2 +echo 'a few bytes 2' +sleep 2 + +cat <<! +a few bytes 3 +end extract ... +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: +other::1: +bin::2:root,bin,daemon +sys::3:root,bin,sys,adm +adm::4:root,adm,daemon +uucp::5:root,uucp +mail::6:root +tty::7:root,tty,adm +lp::8:root,lp,adm +nuucp::9:root,nuucp +staff::10: +daemon::12:root,daemon +nobody::60001: +noaccess::60002: +users::100: +basis::200: + +! + +sleep 2 +echo 'last bytes' diff --git a/debian/htdig/htdig-3.2.0b6/test/mifluz-search.conf b/debian/htdig/htdig-3.2.0b6/test/mifluz-search.conf new file mode 100644 index 00000000..e20a0825 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/mifluz-search.conf @@ -0,0 +1,17 @@ +# +# Part of the ht://Dig package <http://www.htdig.org/> +# Copyright (c) 1999-2004 The ht://Dig Group +# For copyright details, see the file COPYING in your distribution +# or the GNU Library General Public License (LGPL) version 2 or later +# <http://www.gnu.org/copyleft/lgpl.html> +# +wordlist_extend: true +minimum_word_length: 1 +maximum_word_length: 25 +wordlist_cache_size: 10485760 +wordlist_page_size: 32768 +wordlist_compress: 0 +wordlist_wordrecord_description: NONE +wordlist_wordkey_description: Word/Tag 8/Server 8/URL 8/Location 8 +wordlist_compress_debug: 0 +wordlist_monitor: false diff --git a/debian/htdig/htdig-3.2.0b6/test/mifluz.conf b/debian/htdig/htdig-3.2.0b6/test/mifluz.conf new file mode 100644 index 00000000..5d9b6951 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/mifluz.conf @@ -0,0 +1,18 @@ +# +# Part of the ht://Dig package <http://www.htdig.org/> +# Copyright (c) 1999-2004 The ht://Dig Group +# For copyright details, see the file COPYING in your distribution +# or the GNU Library General Public License (LGPL) version 2 or later +# <http://www.gnu.org/copyleft/lgpl.html> +# +wordlist_extend: true +minimum_word_length: 1 +maximum_word_length: 25 +wordlist_cache_size: 10485760 +wordlist_page_size: 32768 +wordlist_compress: 0 +wordlist_wordrecord_description: NONE +wordlist_wordkey_description: Word/DocID 32/Flags 8/Location 16 +wordlist_compress_debug: 0 +wordlist_monitor_period: 10 +wordlist_monitor_output: monitor.out,rrd diff --git a/debian/htdig/htdig-3.2.0b6/test/search.cc b/debian/htdig/htdig-3.2.0b6/test/search.cc new file mode 100644 index 00000000..4f5e9690 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/search.cc @@ -0,0 +1,3543 @@ +// +// search.cc +// +// search: Sample implementation of search algorithms using +// a mifluz inverted index. +// +// Each class is documented in the class definition. Before +// each method declaration a comment explains the semantic of +// the method. In the method definition comments in the code +// may contain additional information. +// +// Each virtual function is documented in the base class, not +// in the derived classes except for semantic differences. +// +// The class tree is: +// +// WordKeySemantic +// +// WordExclude +// WordExcludeMask +// WordPermute +// +// WordSearch +// +// WordMatch +// +// WordTree +// WordTreeOperand +// WordTreeOptional +// WordTreeOr +// WordTreeAnd +// WordTreeNear +// WordTreeMandatory +// WordTreeNot +// WordTreeLiteral +// +// WordParser +// +// Part of the ht://Dig package <http://www.htdig.org/> +// Copyright (c) 1999-2004 The ht://Dig Group +// For copyright details, see the file COPYING in your distribution +// or the GNU Library General Public License (LGPL) version 2 or later +// <http://www.gnu.org/copyleft/lgpl.html> +// +// $Id: search.cc,v 1.9 2004/05/28 13:15:29 lha Exp $ +// + +#ifdef HAVE_CONFIG_H +#include <htconfig.h> +#endif /* HAVE_CONFIG_H */ + +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif /* HAVE_UNISTD_H */ + +// If we have this, we probably want it. +#ifdef HAVE_GETOPT_H +#include <getopt.h> +#endif /* HAVE_GETOPT_H */ +#ifdef HAVE_MALLOC_H +#include <malloc.h> +#endif /* HAVE_MALLOC_H */ +#include <stdlib.h> + +#include <htString.h> +#include <WordList.h> +#include <WordContext.h> +#include <WordCursor.h> + +// +// Verbosity level set with -v (++) +// +static int verbose = 0; + +// ************************* Document definition implementation *********** + +#define TAG 1 +#define SERVER 2 +#define URL 3 +#define LOCATION 4 + +// *********************** WordKeySemantic implementation ******************** +// +// NAME +// +// encapsulate WordKey semantic for document and location +// +// SYNOPSIS +// +// #include <WordKeySemantic.h> +// +// #define SERVER 1 +// #define URL 2 +// #define LOCATION 3 +// +// static int document[] = { +// SERVER, +// URL +// }; +// +// WordKeySemantic semantic; +// semantic.Initialize(document, sizeof(document)/sizeof(int), LOCATION); +// +// DESCRIPTION +// +// Encapsulate the semantic of a WordKey object fields. It defines +// what a document and a location are. It implements the set of +// operation that a search needs to perform given the fact that it +// implements a search whose purpose is to retrieve a document and +// wants to implement proximity search based on a word location. +// +// +// END +// +// A document is a set of fields in a given order. +// A location is a field. +// The actual fields used to implement WordKeySemantic methods are +// set with the Initialize method. +// +class WordKeySemantic { +public: + WordKeySemantic(); + ~WordKeySemantic(); + + //- + // Set the actual field numbers that define what a document is and + // what a location is. The <b>document_arg<b> is a list of WordKey field + // positions of length <b>document_length_arg</b> that must be adjacent. + // The <b>location_arg</b> is the WordKey field position of the word + // location within a document. + // Return OK on success, NOTOK on failure. + // + int Initialize(int* document_arg, int document_length_arg, int location_arg); + + // + // These functions and only these know what a document is. + // This should really be a class containing function pointers and be + // given as argument to the search algorithm. + // + //- + // Copy the document in <b>from</b> into <b>to.</b> + // + void DocumentSet(const WordKey& from, WordKey& to); + //- + // Increment the document in <b>key</b> using the <i>SetToFollowing</i> + // method of WordKey. <b>uniq</b> is the WordKey position at which the + // increment starts. + // + void DocumentNext(WordKey& key, int uniq); + //- + // Compare the document fields defined in both <b>a</b> and <b>b</b> + // and return the difference a - b, as in strcmp. If all document + // fields in <b>a</b> or <b>b</b> are undefined return 1. + // + int DocumentCompare(const WordKey& a, const WordKey& b); + //- + // Set all document fields to 0. + // + int DocumentClear(WordKey& key); + + // + // These functions and only these know what a location is. + // This should really be a class containing function pointers and be + // given as argument to the search algorithm. + // + //- + // Copy the document and location in <b>from</b> into <b>to.</b> + // + void LocationSet(const WordKey& from, WordKey& to); + //- + // Increment the document and location in <b>key</b> + // using the <i>SetToFollowing</i> + // method of WordKey. + // + void LocationNext(WordKey& key); + //- + // Compare <b>expected</b> location to <b>actual</b> location. Compares equal + // as long as expected location is at a maximum distance of <b>proximity</b> + // of actual. If <b>actual</b> only has undefined field, return > 0. + // <b>expected</b> must always be the lowest possible bound. + // <b>actual</b> is tolerated if it is greater than <b>actual</b> but not + // greater than <b>proximity</b> if <b>proximity</b> > 0 or abs(<b>proximity</b>) * 2 if + // <b>proximity</b> < 0. + // Return the difference expected - actual. + // + int LocationCompare(const WordKey& expected, const WordKey& actual, int proximity = 0); + //- + // <b>key</b> is the expected location of a searched key. + // LocationNearLowest modifies <b>key</b> to add tolerance accroding to + // <b>proximity</b>. + // + // The idea is that <b>key</b> will be the lowest possible match for + // for the <b>proximity</b> range. If <proxmity> is positive, <b>key</b> + // is already the lowest possible match since we accept [0 proximity]. + // If <b>proximity</b> is negative, substract it since we accept + // [-proximity proximity]. + // + // For better understanding see the functions in which it is used. + // + void LocationNearLowest(WordKey& key, int proximity); + + //- + // Undefined the location field in <b>key.</b>. + // + void Location2Document(WordKey& key); + +protected: + int* document; + int document_length; + int location; +}; + +WordKeySemantic::WordKeySemantic() +{ + int nfields = WordKey::NFields(); + document = new int[nfields]; + document_length = 0; + location = -1; +} + +WordKeySemantic::~WordKeySemantic() +{ + if(document) delete [] document; +} + +int WordKeySemantic::Initialize(int* document_arg, int document_length_arg, int location_arg) +{ + memcpy((char*)document, (char*)document_arg, document_length_arg * sizeof(int)); + document_length = document_length_arg; + location = location_arg; + return OK; +} + +void WordKeySemantic::DocumentSet(const WordKey& from, WordKey& to) +{ + to.Clear(); + for(int i = 0; i < document_length; i++) + to.Set(document[i], from.Get(document[i])); +} + +int WordKeySemantic::DocumentCompare(const WordKey& a, const WordKey& b) +{ + int ret = 1; + for(int i = 0; i < document_length; i++) { + int idx = document[i]; + if((a.IsDefined(idx) && b.IsDefined(idx)) && + (ret = a.Get(idx) - b.Get(idx)) != 0) return ret; + } + return ret; +} + +int WordKeySemantic::DocumentClear(WordKey& key) +{ + for(int i = 0; i < document_length; i++) + key.Set(document[i], 0); + return 0; +} + +void WordKeySemantic::DocumentNext(WordKey& key, int uniq) +{ + if(uniq) + key.SetToFollowing(uniq); + else + key.SetToFollowing(document[document_length-1]); +} + + +void WordKeySemantic::LocationSet(const WordKey& from, WordKey& to) +{ + DocumentSet(from, to); + to.Set(location, from.Get(location)); +} + +int WordKeySemantic::LocationCompare(const WordKey& expected, const WordKey& actual, int proximity) +{ + int ret = 1; + if((ret = DocumentCompare(expected, actual)) != 0) return ret; + // + // Only compare location if defined. + // + if((expected.IsDefined(location) && actual.IsDefined(location)) && + (ret = expected.Get(location) - actual.Get(location))) { + if(proximity < 0) { + // + // -N means ok if in range [-N +N] + // + proximity *= 2; + if(ret < 0 && ret >= proximity) + ret = 0; + } else { + // + // N means ok if in range [0 +N] + // + if(ret < 0 && ret >= -proximity) + ret = 0; + } + } + return ret; +} + +void WordKeySemantic::LocationNext(WordKey& key) +{ + key.SetToFollowing(location); +} + +void WordKeySemantic::LocationNearLowest(WordKey& key, int proximity) +{ + if(proximity < 0) { + if(key.Underflow(location, proximity)) + key.Get(location) = 0; + else + key.Get(location) += proximity; + } +} + +void WordKeySemantic::Location2Document(WordKey& key) +{ + key.Undefined(location); +} + +// ************************* WordExclude implementation ******************** +// +// NAME +// +// permute bits in bit field +// +// SYNOPSIS +// +// #include <WordExclude.h> +// +// #define BITS 5 +// +// WordExclude permute; +// permute.Initialize(BITS); +// while(permute.Next() == WORD_EXCLUDE_OK) +// ... +// +// DESCRIPTION +// +// Count from 1 to the specified maximum. A variable++ loop does the same. +// The <b>WordExclude</b> class counts in a specific order. +// It first step thru all the permutations containing only 1 bit set, in +// increasing order. Then thru all the permutations containing 2 bits set, +// in increasing order. As so forth until the maximum number is reached. +// See the <b>Permute</b> method for more information. +// +// +// END + +// +// Helper that displays an unsigned int in binary/hexa/decimal +// +static inline void show_bits(unsigned int result) +{ + int i; + for(i = 0; i < 10; i++) { + fprintf(stderr, "%c", (result & (1 << i)) ? '1' : '0'); + } + fprintf(stderr, " (0x%08x - %15d)\n", result, result); +} + +// +// WordExclude methods return values +// +#define WORD_EXCLUDE_OK 1 +#define WORD_EXCLUDE_END 2 + +// +// Maximum number of bits +// +#define WORD_EXCLUDE_MAX (sizeof(unsigned int) * 8) + +// +// Convert a position <p> in a <l> bits mask into a bit offset (from 0) +// +#define WORD_EXCLUDE_POSITION2BIT(l,p) ((l) - (p) - 1) + +class WordExclude { +public: + //- + // Reset the generator and prepare it for <b>length</b> bits generation. + // The <b>length</b> cannot be greater than <i>WORD_EXCLUDE_MAX.</i> + // Returns OK if no error occurs, NOTOK otherwise. + // + virtual int Initialize(unsigned int length); + //- + // Move to next exclude mask. Returns WORD_EXCLUDE_OK if successfull, + // WORD_EXCLUDE_END if at the end of the permutations. It starts by + // calling <i>Permute</i> with one bit set, then two and up to + // <i>Maxi()</i> included. The last permutation only generates one + // possibility since all the bits are set. + // + virtual int Next(); + //- + // Exclude bit for <b>position</b> starts at most significant bit. That is + // position 0 exclude bit is most significant bit of the current mask. + // Returns true if position is excluded, false otherwise. + // + virtual inline unsigned int Excluded(int position) { return mask & (1 << WORD_EXCLUDE_POSITION2BIT(maxi, position)); } + //- + // Returns how many bits are not excluded with current mask. + // + virtual inline int NotExcludedCount() const { return maxi - bits; } + //- + // Returns how many bits are excluded with current mask. + // + virtual inline int ExcludedCount() const { return bits; } + // + // Save and restore in string + // + //- + // Write an ascii representation of the WordExclude object in <b>buffer.</b> + // Each bit is represented by the character 0 or 1. The most significant + // bit is the last character in the string. For instance + // 1000 is the string representation of a WordExclude object initialized + // with length = 4 after the first <i>Next</i> operation. + // + virtual void Get(String& buffer) const; + //- + // Initialize the object from the string representation in <b>buffer.</b> + // Returns OK on success, NOTOK on failure. + // + virtual int Set(const String& buffer); + + //- + // Generate all the permutations + // containing <i>n</i> bits in a <b>bits</b> bit word in increasing order. + // The <b>mask</b> argument is originally filled by the caller + // with the <i>n</i> least significant bits set. A call to Permute + // generates the next permutation immediately greater (numerically) + // than the one contained in <b>mask</b>. + // + // Permute returns the next permutation or 0 if it reached the + // maximum. + // + // To understand the algorithm, imagine 1 is a ball and 0 a space. + // + // When playing the game you start with a rack of <b>bits</b> slots filled + // with <i>n</i> balls all on the left side. You end the game when all + // the balls are on the right side. + // + // Sarting from the left, search for the first ball that has an empty + // space to the right. While searching remove all the balls you find. + // Place a ball in the empty space you found, at the right of the last + // ball removed. Sarting from the left, fill all empty spaces with + // the removed balls. Repeat until all balls are to the right. + // + // Here is a sample generated by repeated calls to WordExclude::Permute: + // (left most bit is least significant) + // <pre> + // mask = 1111100000 + // while(mask = WordExclude::Permute(mask, 7)) + // show_bits(mask) + // + // 1111100000 (0x0000001f - 31) + // 1111010000 (0x0000002f - 47) + // 1110110000 (0x00000037 - 55) + // 1101110000 (0x0000003b - 59) + // 1011110000 (0x0000003d - 61) + // 0111110000 (0x0000003e - 62) + // 1111001000 (0x0000004f - 79) + // 1110101000 (0x00000057 - 87) + // 1101101000 (0x0000005b - 91) + // 1011101000 (0x0000005d - 93) + // 0111101000 (0x0000005e - 94) + // 1110011000 (0x00000067 - 103) + // 1101011000 (0x0000006b - 107) + // 1011011000 (0x0000006d - 109) + // 0111011000 (0x0000006e - 110) + // 1100111000 (0x00000073 - 115) + // 1010111000 (0x00000075 - 117) + // 0110111000 (0x00000076 - 118) + // 1001111000 (0x00000079 - 121) + // 0101111000 (0x0000007a - 122) + // 0011111000 (0x0000007c - 124) + // </pre> + // A recursive implementation would be: + // <pre> + // /* Recursive */ + // void permute(unsigned int result, int bits_count, int bits_toset) + // { + // if(bits_toset <= 0 || bits_count <= 0) { + // if(bits_toset <= 0) + // do_something(result); + // } else { + // permute(result, bits_count - 1, bits_toset); + // permute(result | (1 << (bits_count - 1)), bits_count - 1, bits_toset - 1); + // } + // } + // </pre> + // Which is more elegant but not practical at all in our case. + // + inline unsigned int Permute(unsigned int mask, unsigned int bits); + + //- + // Return the current bit field value. + // + virtual inline unsigned int& Mask() { return mask; } + virtual inline unsigned int Mask() const { return mask; } + + virtual inline unsigned int& Maxi() { return maxi; } + virtual inline unsigned int Maxi() const { return maxi; } + + virtual inline unsigned int& Bits() { return bits; } + virtual inline unsigned int Bits() const { return bits; } + +private: + unsigned int mask; + unsigned int maxi; + unsigned int bits; +}; + +int WordExclude::Initialize(unsigned int length) +{ + if(length > WORD_EXCLUDE_MAX) { + fprintf(stderr, "WordExclude::Initialize: length must be < %d\n", (int)WORD_EXCLUDE_MAX); + return NOTOK; + } + + mask = 0; + bits = 0; + maxi = length; + + return OK; +} + +inline unsigned int WordExclude::Permute(unsigned int mask, unsigned int bits) +{ + unsigned int bits_cleared = 0; + unsigned int j; + for(j = 0; j < bits; j++) { + if(mask & (1 << j)) { + bits_cleared++; + mask &= ~(1 << j); + } else { + if(bits_cleared) { + bits_cleared--; + mask |= (1 << j); + break; + } + } + } + + if(j >= bits) + return 0; + + for(j = 0; j < bits_cleared; j++) + mask |= (1 << j); + + return mask; +} + +int WordExclude::Next() +{ + mask = Permute(mask, maxi); + + int ret = WORD_EXCLUDE_OK; + + if(mask == 0) { + bits++; + if(bits > maxi) + ret = WORD_EXCLUDE_END; + else { + unsigned int i; + for(i = 0; i < bits; i++) + mask |= (1 << i); + ret = WORD_EXCLUDE_OK; + } + } + + if(verbose > 2) show_bits(mask); + + return ret; +} + +void WordExclude::Get(String& buffer) const +{ + buffer.trunc(); + unsigned int i; + for(i = 0; i < maxi; i++) { + buffer << ((mask & (1 << i)) ? '1' : '0'); + } +} + +int WordExclude::Set(const String& buffer) +{ + if(Initialize(buffer.length()) == NOTOK) + return NOTOK; + unsigned int i; + for(i = 0; i < maxi; i++) { + if(buffer[i] == '1') { + mask |= (1 << i); + bits++; + } + } + return OK; +} + +// ************************* WordExcludeMask implementation ******************* +// +// NAME +// +// WordExclude specialization that ignore some bits +// +// SYNOPSIS +// +// #include <WordExcludeMask.h> +// +// #define BITS 9 +// #define IGNORE 0x0f0 +// #define IGNORE_MASK 0x050 +// +// WordExcludeMask permute; +// permute.Initialize(BITS, IGNORE, IGNORE_MASK); +// while(permute.Next() == WORD_EXCLUDE_OK) +// ... +// +// DESCRIPTION +// +// Only perform WordExclude operations on the bits that are not set in +// <i>ignore.</i> The bits of <i>ignore_mask</i> that are set in +// <i>ignore</i> are untouched. In the synopsis section, for instance, +// bits 1,2,3,4 and 9 will be permuted and the bits 5,6,7,8 will be +// left untouched. +// +// +// END +// +#define WORD_EXCLUDE_IGNORED (-1) + +class WordExcludeMask : public WordExclude { +public: + //- + // <b>ignore</b> gives the mask of bits to ignore. The actual WordExclude + // operations are made on a number of bits that is <b>length</b> - (the number + // of bits set in <b>ignore).</b> + // The <b>ignore_mask_arg</b> contains the actual values of the bits ignored by + // the <b>ignore</b> argument. + // + virtual inline int Initialize(unsigned int length, unsigned int ignore, unsigned int ignore_mask_arg) { + ignore_mask = ignore_mask_arg; + ignore_maxi = length; + unsigned int maxi = 0; + unsigned int i; + for(i = 0, ignore_bits = 0; i < length; i++) { + if(ignore & (1 << i)) { + bit2bit[i] = WORD_EXCLUDE_IGNORED; + if(ignore_mask & (1 << i)) ignore_bits++; + } else { + bit2bit[i] = maxi++; + } + } + + return WordExclude::Initialize(maxi); + } + + virtual inline unsigned int Excluded(int position) { + position = WORD_EXCLUDE_POSITION2BIT(ignore_maxi, position); + if(bit2bit[position] == WORD_EXCLUDE_IGNORED) + return ignore_mask & (1 << position); + else + return WordExclude::Mask() & (1 << bit2bit[position]); + } + + virtual inline int NotExcludedCount() const { + return ignore_maxi - ignore_bits - WordExclude::Bits(); + } + + virtual inline int ExcludedCount() const { + return ignore_bits - WordExclude::Bits(); + } + + //- + // The semantic is the same as the Get method of Wordexclude + // except that ignored bits are assigned 3 and 2 instead of 1 and 0 + // respectively. + // + virtual void Get(String& buffer) const; + //- + // The semantic is the same as the Get method of Wordexclude + // except that ignored bits are assigned 3 and 2 instead of 1 and 0 + // respectively. + // + virtual int Set(const String& buffer); + + virtual inline unsigned int Mask() const { + unsigned int ret = ignore_mask; + unsigned int i; + for(i = 0; i < ignore_maxi; i++) { + if(bit2bit[i] != WORD_EXCLUDE_IGNORED) { + if(WordExclude::Mask() & (1 << bit2bit[i])) + ret |= (1 << i); + } + } + return ret; + } + + virtual inline unsigned int Maxi() const { return ignore_maxi; } + + virtual inline unsigned int Bits() const { return ignore_bits + WordExclude::Bits(); } + +private: + unsigned int ignore_mask; + unsigned int ignore_maxi; + unsigned int ignore_bits; + int bit2bit[WORD_EXCLUDE_MAX]; +}; + +void WordExcludeMask::Get(String& buffer) const +{ + buffer.trunc(); + unsigned int i; + for(i = 0; i < ignore_maxi; i++) { + if(bit2bit[i] == WORD_EXCLUDE_IGNORED) + buffer << ((ignore_mask & (1 << i)) ? '3' : '2'); + else + buffer << ((WordExclude::Mask() & (1 << bit2bit[i])) ? '1' : '0'); + } +} + +int WordExcludeMask::Set(const String& buffer) +{ + WordExclude::Initialize(0); + + unsigned int& maxi = WordExclude::Maxi(); + unsigned int& mask = WordExclude::Mask(); + unsigned int& bits = WordExclude::Bits(); + ignore_mask = 0; + ignore_bits = 0; + ignore_maxi = buffer.length(); + + unsigned int i; + for(i = 0; i < ignore_maxi; i++) { + if(buffer[i] == '1' || buffer[i] == '0') { + if(buffer[i] == '1') { + mask |= (1 << maxi); + bits++; + } + bit2bit[i] = maxi; + maxi++; + } else if(buffer[i] == '3' || buffer[i] == '2') { + if(buffer[i] == '3') { + ignore_mask |= (1 << i); + ignore_bits++; + } + bit2bit[i] = WORD_EXCLUDE_IGNORED; + } + } + + return OK; +} + +// ************************* WordPermute implementation ******************** +// +// NAME +// +// WordExclude specialization with proximity toggle +// +// SYNOPSIS +// +// #include <WordPermute.h> +// +// #define BITS 5 +// +// WordPermute permute; +// permute.Initialize(BITS); +// while(permute.Next() == WORD_EXCLUDE_OK) +// if(permute.UseProximity()) ... +// +// DESCRIPTION +// +// Each WordExclude permutation is used twice by Next. Once with +// the proximity flag set and once with the proximity flag cleared. +// If the length of the bit field (length argument of Initialize) is +// lower or equal to 1, then the proximity flag is always false. +// +// +// END +// +// WordPermute methods return values +// +#define WORD_PERMUTE_OK WORD_EXCLUDE_OK +#define WORD_PERMUTE_END WORD_EXCLUDE_END + +// +// Use or don't use proximity flag +// +#define WORD_PERMUTE_PROXIMITY_NO 0 +#define WORD_PERMUTE_PROXIMITY_TOGGLE 1 +#define WORD_PERMUTE_PROXIMITY_ONLY 2 + +// +// Deals with word exclusion and proximity permutations for +// the implementation of the Optional retrieval model. +// +class WordPermute : public WordExcludeMask { +public: + //- + // The <b>nuse_proximity</b> may be set to the following: + // + // WORD_PERMUTE_PROXIMITY_NO so that the object behaves as + // WordExcludeMask and Proximity() always return false. + // + // WORD_PERMUTE_PROXIMITY_TOGGLE so that each permutation is issued twice: + // once with the proximity flag set (Proximity() method) and once with + // the proximity flag cleared. + // + // WORD_PERMUTE_PROXIMITY_ONLY so that the object behaves as + // WordExcludeMask and Proximity() always return true. + // + virtual inline int Initialize(unsigned int length, unsigned int ignore, unsigned int ignore_mask_arg, int nuse_proximity) { + use_proximity = nuse_proximity; + switch(use_proximity) { + case WORD_PERMUTE_PROXIMITY_NO: + proximity = 0; + break; + case WORD_PERMUTE_PROXIMITY_TOGGLE: + // + // Don't bother to try proximity search if only one word + // is involved. + // + proximity = length > 1; + break; + case WORD_PERMUTE_PROXIMITY_ONLY: + proximity = 1; + break; + default: + fprintf(stderr, "WordPermute::Initialize: unexpected use_proximity = %d\n", use_proximity); + return 0; + } + return WordExcludeMask::Initialize(length, ignore, ignore_mask_arg); + } + + //- + // Return true if the proximity flag is set, false if it is + // cleared. + // + inline int Proximity() { + switch(use_proximity) { + case WORD_PERMUTE_PROXIMITY_NO: + return 0; + break; + case WORD_PERMUTE_PROXIMITY_TOGGLE: + return proximity; + break; + case WORD_PERMUTE_PROXIMITY_ONLY: + return 1; + break; + default: + fprintf(stderr, "WordPermute::Proximity: unexpected use_proximity = %d\n", use_proximity); + return 0; + break; + } + } + + //- + // Return WORD_PERMUTE_PROXIMITY_NO, WORD_PERMUTE_PROXIMITY_TOGGLE or + // WORD_PERMUTE_PROXIMITY_ONLY. + // + inline int UseProximity() { return use_proximity; } + + //- + // Find the next permutation. If <b>WORD_PERMUTE_PROXIMITY_TOGGLE<b> was + // specified in Initialize each permutation is issued twice (see + // Proximity() to differentiate them), except when the mask + // only contains one non exluded bit (NotExcludeCount() <= 1). + // In both case the last permutation with all bits excluded + // (i.e. when NotExcludedCount() <= 0) is never returned because + // it is useless. + // + virtual int Next() { + if(Maxi() <= 0) + return WORD_PERMUTE_END; + + int ret = WORD_PERMUTE_OK; + int check_useless = 0; + if(use_proximity == WORD_PERMUTE_PROXIMITY_TOGGLE) { + // + // Move to next permutation as follows: + // exclude mask 1 + use proximity + // exclude mask 1 + don't use proximity + // exclude mask 2 + use proximity + // exclude mask 2 + don't use proximity + // and so on. + // If only one word is involved never use proximity. + // + if(proximity) { + proximity = 0; + } else { + proximity = 1; + if((ret = WordExcludeMask::Next()) == WORD_PERMUTE_OK) { + // + // Do not toggle proximity for only one non excluded word + // + if(NotExcludedCount() <= 1) + proximity = 0; + check_useless = 1; + } else if(ret == WORD_PERMUTE_END) + proximity = 0; + } + } else { + ret = WordExcludeMask::Next(); + check_useless = 1; + } + + if(check_useless && ret == WORD_PERMUTE_OK) { + // + // If no bits are ignored or all ignore_mask bits are set to + // one, the last permutation has all exclude bits set, which + // is useless. Just skip it and expect to be at the end of + // all permutations. + // + if(NotExcludedCount() <= 0) { + ret = WordExcludeMask::Next(); + if(ret != WORD_PERMUTE_END) { + fprintf(stderr, "WordPermute::Next: expected WORD_PERMUTE_END\n"); + ret = NOTOK; + } + } + } + + return ret; + } + + //- + // The semantic is the same as the Get method of Wordexclude + // but a letter T is appended to the string if the proximity + // flag is set, or F is appended to the string if the proximity + // is clear. + // + virtual inline void Get(String& buffer) const { + WordExcludeMask::Get(buffer); + if(use_proximity == WORD_PERMUTE_PROXIMITY_TOGGLE) + buffer << (proximity ? 'T' : 'F'); + } + + //- + // The semantic is the same as the Get method of Wordexclude + // but if the string end with a T the proximity flag is set + // and if the string end with a F the proximity flag is cleared. + // + virtual inline int Set(const String& buffer) { + if(buffer.length() < 1) { + fprintf(stderr, "WordPermute::Set: buffer length < 1\n"); + return NOTOK; + } + int ret = OK; + if(use_proximity == WORD_PERMUTE_PROXIMITY_TOGGLE) { + if((ret = WordExcludeMask::Set(buffer.sub(0, buffer.length() - 1))) == OK) + proximity = buffer.last() == 'T'; + } else { + ret = WordExcludeMask::Set(buffer); + } + + return ret; + } + +protected: + int use_proximity; + int proximity; +}; + +// ************************* WordTree implementation ******************** +// +// NAME +// +// Base class for query resolution nodes +// +// SYNOPSIS +// +// #include <WordTree.h> +// +// class WordTreeMethod : public WordTree { +// ... +// }; +// +// DESCRIPTION +// +// The WordTree class is derived from the WordCursor class and implement +// the basic operations and data structures needed for query resolution. +// It is the common base class of all the classes that actually implement +// a query resolution. The derived classes must be implemented to follow +// the WordCursor semantic for Walk* operations. +// +// +// END +// + +#define WORD_WALK_REDO 0x1000 +#define WORD_WALK_RESTART 0x2000 +#define WORD_WALK_NEXT 0x4000 + +// +// Return values of CursorsObeyProximity method +// +#define WORD_SEARCH_NOPROXIMITY 1 + +// +// operand values +// +#define WORD_TREE_OR 1 +#define WORD_TREE_AND 2 +#define WORD_TREE_NEAR 3 +#define WORD_TREE_OPTIONAL 4 +#define WORD_TREE_LITERAL 5 +#define WORD_TREE_MANDATORY 6 +#define WORD_TREE_NOT 7 + +#define WORD_TREE_OP_SIZE 20 + +// +// Default proximity is to search for adjacent words in order +// +#ifndef WORD_SEARCH_DEFAULT_PROXIMITY +#define WORD_SEARCH_DEFAULT_PROXIMITY 1 +#endif /* WORD_SEARCH_DEFAULT_PROXIMITY */ + +static char* operator_name[WORD_TREE_OP_SIZE] = { + "", + "or", + "and", + "near", + "optional", + "literal", + "mandatory", + "not", + 0 +}; + +class WordTree : public WordCursor { +public: + WordTree() { + proximity = 0; + uniq = 0; + } + + virtual int ContextSaveList(StringList& list) const { + return OK; + } + + virtual int ContextRestoreList(StringList& list) { + return OK; + } + + //- + // Initialize the object. <b>words</b> is used to initialize the + // WordCursor base class, <b>document, document_length</b> and + // <b>location</b> are used to initialize the WordKeySemantic data + // member. The <b>nuniq</b> is the WordKey field position used by + // the WordKeySemantic::DocumentNext function. The <b>nproximity</b> + // is the proximity factor used by the WordKeySemantic::LocationCompare + // method. + // Return OK on success, NOTOK on failure. + // + virtual int Prepare(WordList *words, int nuniq, int nproximity, int *document, int document_length, int location) { + int ret; + proximity = nproximity; + uniq = nuniq; + if((ret = key_semantic.Initialize(document, document_length, location)) != OK) + return ret; + WordKey key; + if(!scope.empty()) { + if(key.Set(scope) != OK) { + fprintf(stderr, "WordTree::Prepare: setting scope %s failed\n", (char*)scope); + return NOTOK; + } + } + key.SetWord(search); + return WordCursor::Initialize(words, key, 0, 0, HTDIG_WORDLIST_WALKER); + } + + //- + // Return a copy of the last document found. + // + WordKey GetDocument() { + WordKey found; + key_semantic.DocumentSet(GetFound().Key(), found); + return found; + } + + //- + // Store in the <i>info</i> data member textual information about + // the latest match found. + // + virtual void SetInfo() { info = GetFound().Key().GetWord(); } + + //- + // Return a copy of the <i>info</i> data member. Should be + // called after SetInfo(). + // + String GetInfo() { return info; } + + //- + // Sort WordTree data members (if any) in ascending frequency order. + // Return OK on success, NOTOK on failure. + // + virtual int AscendingFrequency() { return OK; } + + //- + // Delete WordTree data members (if any) that have a zero frequency. + // The number of data members deleted is returned in <b>stripped</b>. + // Return OK on success, NOTOK on failure. + // + virtual int StripNonExistent(unsigned int& stripped) { + stripped = 0; + return OK; + } + + // + // Input + // + //- + // Proximity factor. See WordKeySemantic::LocationCompare. + // + int proximity; + //- + // Uniq WordKey field position. See WordKeySemantic::DocumentNext. + // + int uniq; + //- + // Semantic of the WordKey object. + // + WordKeySemantic key_semantic; + //- + // Textual representation of the search scope. + // + String scope; + //- + // Original search criterion that may be different from the + // WordCursor::searchKey data member. + // + String search; + + // + // Internal state + // + //- + // Textual information about the latest match. + // + String info; +}; + +// ************************* WordTreeLiteral implementation **************** + +class WordTreeLiteral : public WordTree { +public: + //- + // Constructor. The search criterion is <b>string</b> and the + // scope is <b>nscope.</b>. + // + WordTreeLiteral(const char* string, const char* nscope = "") { + search.set((char*)string); + scope.set((char*)nscope); + } + + //- + // Returns WORD_TREE_LITERAL. + // + int IsA() const { return WORD_TREE_LITERAL; } + + virtual int WalkRewind(); + //- + // Only return a match for each distinct document. + // + virtual int WalkNext(); + virtual int Seek(const WordKey& patch); + + //- + // If scope is set the <b>bufferout</b> is filled with + // <pre> + // ( word "scope" ) + // </pre> + // otherwise the <b>bufferout</b> only contains the word. + // + virtual int Get(String& bufferout) const { + if(scope.empty()) + bufferout << search; + else + bufferout << "( " << operator_name[IsA()] << " \"" << scope << "\" " << search << " )"; + return OK; + } + +protected: + WordKey current_document; +}; + +int WordTreeLiteral::WalkRewind() +{ + current_document.Clear(); + return WordCursor::WalkRewind(); +} + +int WordTreeLiteral::WalkNext() +{ + int ret; + do { + ret = WordCursor::WalkNext(); + if(verbose > 3) fprintf(stderr, "WordTreeLiteral::WalkNext: reached %s\n", (char*)GetDocument().Get()); + } while(ret == OK && + key_semantic.DocumentCompare(current_document, GetDocument()) == 0); + + if(ret == OK) + current_document = GetDocument(); + else + current_document.Clear(); + + return ret; +} + +int WordTreeLiteral::Seek(const WordKey& position) +{ + current_document.Clear(); + return WordCursor::Seek(position); +} + +// ************************* WordTreeOperand implementation **************** +// +// NAME +// +// Base class for boolean query resolution nodes +// +// SYNOPSIS +// +// #include <WordTree.h> +// +// class WordTreeMethod : public WordTreeOperand { +// ... +// }; +// +// DESCRIPTION +// +// The WordTreeOperand class is derived from WordTree and implemet +// the basic operations and data structures needed for query resultion +// of boolean operators. It contains a list of WordTree objects (the +// operands or cursors) and redefine the basic WordCursor methods +// to operate on all of them according to the logic defined by the +// derived class. +// +// +// END +// + +// +// Helper for debugging that returns the string representation +// of the return codes. +// +static char* ret2str(int ret) +{ + if(ret == WORD_WALK_REDO) + return "REDO"; + + if(ret == WORD_WALK_RESTART) + return "RESTART"; + + if(ret == WORD_WALK_NEXT) + return "NEXT"; + + if(ret == OK) + return "OK"; + + if(ret == NOTOK) + return "NOTOK"; + + if(ret == WORD_WALK_ATEND) + return "ATEND"; + + return "???"; +} + +class WordTreeOperand : public WordTree +{ +public: + //- + // Constructor. The scope is <b>nscope</b>. + // + WordTreeOperand(const char* nscope) { + scope.set((char*)nscope); + } + //- + // Free the objects pointed by <i>cursors</i> with delete as well + // as the <i>cursors</i> array itself with delete []. + // + virtual ~WordTreeOperand(); + + virtual void Clear() { + cursors = 0; + cursors_length = 0; + WordCursor::Clear(); + } + + //- + // Recursively call Optimize on each <i>cursors</i>. + // + virtual int Optimize(); + + //- + // Change the <i>permutation</i> data member ignore mask according + // to WORD_TREE_MANDATORY and WORD_TREE_NOT nodes found in + // <i>cursors</i>. MANDATORY and NOT nodes are reduced (replaced + // by their first child cursor. For each MANDATORY and NOT nodes + // the bit (see WordExcludeMask for information) + // corresponding to their position is ignored (set in the <b>ignore</b> + // argument of the WordExcludeMask::Initialize function. For NOT + // nodes, the bit corresponding to their position is set in + // the <b>ignore_mask</b> of the WordExcludeMask::Initialize function + // (i.e. implementing a <i>not</i> operation). + // The <b>proximity</b> argument may be WORD_PERMUTE_PROXIMITY_TOGGLE or + // WORD_PERMUTE_PROXIMITY_NO. + // Returns OK on success, NOTOK on failure. + // + int OptimizeOr(int proximity); + + virtual int ContextSave(String& buffer) const { + StringList list; + int ret; + if((ret = ContextSaveList(list)) != OK) + return ret; + + buffer.trunc(); + String* element; + list.Start_Get(); + while((element = (String*)list.Get_Next())) { + buffer << (*element) << ';'; + } + // + // Trim last ; + // + buffer.chop(1); + + return OK; + } + + virtual int ContextSaveList(StringList& list) const { + // + // Apply to each cursor + // + unsigned int i; + for(i = 0; i < cursors_length; i++) + if(cursors[i]->ContextSaveList(list) == NOTOK) + return NOTOK; + return OK; + } + + virtual int ContextRestore(const String& buffer) { + if(!buffer.empty()) { + StringList list(buffer, ";"); + return ContextRestoreList(list); + } else { + return OK; + } + } + + virtual int ContextRestoreList(StringList& list) { + // + // Apply to each cursor + // + unsigned int i; + for(i = 0; i < cursors_length; i++) + if(cursors[i]->ContextRestoreList(list) == NOTOK) + return NOTOK; + return OK; + } + + //- + // Recursively call WalkInit on each <i>cursors</i>. + // + virtual int WalkInit(); + //- + // Recursively call WalkRewind on each <i>cursors</i>. + // Reset the <i>pos</i> data member with WordKeySemantic::DocumentClear. + // + virtual int WalkRewind(); + //- + // Recursively call WalkFinish on each <i>cursors</i>. + // + virtual int WalkFinish(); + //- + // Recursively call Seek on each <i>cursors</i>. + // Save the <b>patch</b> argument in the <i>pos</i> data + // member. + // + virtual int Seek(const WordKey& patch); + + //- + // The number of occurrence of a WordTreeOperand is the sum of the + // number of occurrence of each term. + // + virtual int Noccurrence(unsigned int& noccurrence) const { + noccurrence = 0; + unsigned int i; + for(i = 0; i < cursors_length; i++) { + unsigned int frequency; + if(cursors[i]->Noccurrence(frequency) != OK) + return NOTOK; + noccurrence += frequency; + } + return OK; + } + + //- + // The <b>bufferout</b> argument is filled with a lisp like representation + // of the tree starting at this node. + // + virtual int Get(String& bufferout) const { + bufferout << "( " << operator_name[IsA()] << " \"" << scope << "\" "; + unsigned int i; + for(i = 0; i < cursors_length; i++) + bufferout << cursors[i]->Get() << " "; + bufferout << " )"; + return OK; + } + + //- + // Call Prepare on each <i>cursors</i>. Set the <i>search</i> member + // with an textual representation of the tree starting at this node. + // + virtual int Prepare(WordList *words, int nuniq, int nproximity, int *document, int document_length, int location) { + int ret; + if((ret = WordTree::Prepare(words, nuniq, nproximity, document, document_length, location)) != OK) + return ret; + unsigned int i; + for(i = 0; i < cursors_length; i++) { + if((ret = cursors[i]->Prepare(words, nuniq, nproximity, document, document_length, location)) != OK) + return ret; + } + return Get(GetSearch().GetWord()); + } + + //- + // The current cursor offset (set by Seek for instance). It + // duplicates the function of the WordCursor <i>key</i> data member + // because the data type is different (WordKey instead of String). + // + WordKey pos; + //- + // Sub nodes array. + // + WordTree** cursors; + //- + // Number of valid entries in the <i>cursors</i> member. + // + unsigned int cursors_length; + //- + // Permutation generator with proximity toggle + // + WordPermute permutation; +}; + +WordTreeOperand::~WordTreeOperand() +{ + if(cursors) { + unsigned int i; + for(i = 0; i < cursors_length; i++) + delete cursors[i]; + free(cursors); + } +} + +int +WordTreeOperand::Optimize() +{ + // + // Apply to each cursor + // + unsigned int i; + for(i = 0; i < cursors_length; i++) + if(cursors[i]->Optimize() == NOTOK) + return NOTOK; + return OK; +} + +int WordTreeOperand::OptimizeOr(int proximity) +{ + unsigned int ignore = 0; + unsigned int ignore_mask = 0; + unsigned int i; + for(i = 0; i < cursors_length; i++) { + int reduce; + // + // Set ignore & ignore_mask if cursor is NOT or MANDATORY + // + switch(cursors[i]->IsA()) { + case WORD_TREE_MANDATORY: + ignore |= (1 << WORD_EXCLUDE_POSITION2BIT(cursors_length, i)); + reduce = 1; + break; + case WORD_TREE_NOT: + ignore |= (1 << WORD_EXCLUDE_POSITION2BIT(cursors_length, i)); + ignore_mask |= (1 << WORD_EXCLUDE_POSITION2BIT(cursors_length, i)); + reduce = 1; + break; + default: + reduce = 0; + break; + } + // + // Replace the NOT or MANDATORY node by its only child + // + if(reduce) { + WordTreeOperand* old = (WordTreeOperand*)cursors[i]; + cursors[i] = old->cursors[0]; + old->cursors[0] = 0; + old->cursors_length--; + if(old->cursors_length > 0) { + fprintf(stderr, "WordTreeOptional::OptimizeOr: too many cursors\n"); + return NOTOK; + } + delete old; + } + } + return permutation.Initialize(cursors_length, ignore, ignore_mask, proximity); +} + +int +WordTreeOperand::WalkInit() +{ + unsigned int i; + int ret = WORD_WALK_ATEND; + for(i = 0; i < cursors_length; i++) + if((ret = cursors[i]->WalkInit()) != OK) + return ret; + return (status = ret); +} + +int +WordTreeOperand::WalkRewind() +{ + unsigned int i; + int ret = OK; + for(i = 0; i < cursors_length; i++) + if((ret = cursors[i]->WalkRewind()) != OK) + return ret; + status = OK; + key_semantic.DocumentClear(pos); + cursor_get_flags = DB_SET_RANGE; + found.Clear(); + return ret; +} + +int +WordTreeOperand::WalkFinish() +{ + unsigned int i; + int ret = OK; + for(i = 0; i < cursors_length; i++) + if((ret = cursors[i]->WalkFinish()) != OK) + return ret; + return ret; +} + +int +WordTreeOperand::Seek(const WordKey& patch) +{ + pos.CopyFrom(patch); + cursor_get_flags = DB_SET_RANGE; + + unsigned int i; + int ret = OK; + for(i = 0; i < cursors_length; i++) + if((ret = cursors[i]->Seek(patch)) != OK && + ret != WORD_WALK_ATEND) + return ret; + status = OK; + return OK; +} + +// ************************* WordTreeOptional implementation **************** + +class WordTreeOptional : public WordTreeOperand { + public: + WordTreeOptional(const char* nscope) : WordTreeOperand(nscope) { } + + //- + // Return WORD_TREE_OPTIONAL + // + virtual int IsA() const { return WORD_TREE_OPTIONAL; } + + virtual int Optimize(); + + virtual int ContextSaveList(StringList& list) const; + + virtual int ContextRestoreList(StringList& list); + + //- + // Multipass walk of the occurrences according to the <i>permutation</i> + // data member specifications. First search for documents containing + // all occurrences near to each other. Then documents that + // contain all occurrences far appart. Then ignore the most frequent + // search criterion and search for documents that contain all the others + // near to each other. The logic goes on until there only remains the + // most frequent word. + // + virtual int WalkNext(); + //- + // Only seek the first non excluded cursor. The implementation + // of WalkNext makes it useless to seek the others. + // + virtual int Seek(const WordKey& position); + + virtual int Prepare(WordList *words, int nuniq, int nproximity, int *document, int document_length, int location) { + int ret; + if((ret = permutation.Initialize(cursors_length, 0, 0, WORD_PERMUTE_PROXIMITY_TOGGLE)) != OK) + return ret; + return WordTreeOperand::Prepare(words, nuniq, nproximity, document, document_length, location); + } + + virtual void SetInfo(); + + virtual int UseProximity() const { return WORD_PERMUTE_PROXIMITY_TOGGLE; } + + virtual int UsePermutation() const { return 1; } + + //- + // Returns true if all cursors must have a frequency > 0, false otherwise. + // + virtual int AllOrNothing() const { return 0; } + + //- + // Comparison between <b>cursor</b> and <b>constraint</b> is made + // with WordKeySemantic::LocationCompare using the <b>proximity</b> + // argument. If <b>master</b> is NULL it is set to point to <b> + // <b>cursor</b>. + // + // Return WORD_WALK_NEXT if <b>cursor</b> is at <b>constraint</b> and + // set <b>constraint</b> if <b>cursor</b> is <b>master</b>. + // + // Return WORD_WALK_REDO if <b>cursor</b> is above <b>constraint</b> and + // call cursor.WalkNext(). + // + // Return WORD_WALK_RESTART if <b>cursor</b> is below <b>constraint</b> and + // set <b>constraint</b> from <b>cursor</b> using + // WordKeySemantic::DocumentSet if <b>cursor</b> is not <b>master</b> + // otherwise also set location of <b>constraint</b> using + // WordKeySemantic::LocationSet and call WordKeySemantic::LocationNext + // on <b>constraint.</b> + // + // Return WORD_WALK_ATEND if no more match possible. + // + // Return NOTOK on failure. + // + int SearchCursorNear(WordTree& cursor, WordTree*& master, WordKey& constraint, int proximity); + //- + // Comparison between <b>cursor</b> and <b>document</b> is made + // with WordKeySemantic::DocumentCompare. + // + // Return WORD_WALK_NEXT if <b>cursor</b> is above <b>document.</b> + // + // Return WORD_WALK_REDO if <b>cursor</b> is below <b>document</b> + // and call cursor.WalkNext(). + // + // Return WORD_WALK_RESTART if <b>cursor</b> is at <b>document</b> + // and call WordKeySemantic::DocumentNext method on <b>document.</b> + // + // Return WORD_WALK_ATEND if no more match possible. + // + // Return NOTOK on failure. + // + int SearchCursorNot(WordTree& cursor, WordKey& document); + //- + // Comparison between <b>cursor</b> and <b>document</b> is made + // with WordKeySemantic::DocumentCompare. + // + // Return WORD_WALK_NEXT if <b>cursor</b> is at <b>document.</b>. + // + // Return WORD_WALK_REDO if <b>cursor</b> is below <b>document</b> + // + // Return WORD_WALK_RESTART if <b>cursor</b> is above <b>document</b> + // and call WordKeySemantic::DocumentNext method on <b>document.</b> + // + // Return WORD_WALK_ATEND if no more match possible. + // + // Return NOTOK on failure. + // + // + int SearchCursorAnd(WordTree& cursor, WordKey& document, WordExclude& permutation); + // + // We know that : + // 1) document does not contain any excluded words. + // 2) contains at least one occurrence of each non excluded word. + // The logic, although very similar to WordSearchNear::SearchOne + // is therefore simpler. We ignore all excluded cursors and + // return WORD_SEARCH_NOPROXIMITY as soon as a cursor move outside + // <document>. + // + //- + // If <b>document</b> contains words that match proximity + // requirement, return OK. Return WORD_SEARCH_NOPROXIMITY if proximity + // requirement cannot be matched for <document>. + // + int CursorsObeyProximity(WordKey& document); + + //- + // Sort the <i>cursors</i> in ascending frequency order using the + // Noccurrence method on each cursor. + // Return OK on success, NOTOK on failure. + // + virtual int AscendingFrequency(); + //- + // Delete all elements of the <i>cursors</i> array that have a + // zero frequency. The <i>cursors</i> array is shrinked and the + // <i>cursors_length</i> set accordingly. Returns the number of + // deletions in the <b>stripped</i> argument. + // Return OK on success, NOTOK on failure. + // + virtual int StripNonExistent(unsigned int& stripped); +}; + +int WordTreeOptional::Optimize() +{ + int ret; + if((ret = WordTreeOperand::Optimize()) != OK) + return ret; + + if(UseProximity() != WORD_PERMUTE_PROXIMITY_ONLY) { + if((ret = AscendingFrequency()) != OK) + return ret; + } + + unsigned int stripped; + if((ret = StripNonExistent(stripped)) != OK) + return ret; + + if(AllOrNothing() && stripped) { + // + // One word is missing and everything is lost, + // Just kill the remaining cursors. + // + unsigned int i; + for(i = 0; i < cursors_length; i++) + delete cursors[i]; + cursors_length = 0; + + return OK; + } else { + return OptimizeOr(UseProximity()); + } +} + +int WordTreeOptional::ContextSaveList(StringList& list) const +{ + int ret; + if((ret = WordTreeOperand::ContextSaveList(list)) != OK) + return ret; + + if(UsePermutation()) { + String* buffer = new String(); + permutation.Get(*buffer); + + list.Add(buffer); + } + + { + String* buffer = new String(); + if((ret = WordCursor::ContextSave(*buffer)) != OK) + return ret; + + list.Add(buffer); + } + + return OK; +} + +int WordTreeOptional::ContextRestoreList(StringList& list) +{ + int ret; + if((ret = WordTreeOperand::ContextRestoreList(list)) != OK) + return ret; + + if(UsePermutation()) { + char* buffer = list[0]; + if((ret = permutation.Set(buffer)) != OK) + return ret; + list.Remove(0); + } + + { + char* buffer = list[0]; + if(!buffer) return NOTOK; + WordKey key(buffer); + if((ret = Seek(key)) != OK) + return ret; + cursor_get_flags = DB_NEXT; + + list.Remove(0); + } + + return OK; +} + +int WordTreeOptional::WalkNext() +{ + WordKey& constraint = pos; + // + // Set constraint with all 0 + // + if(constraint.Empty()) + key_semantic.DocumentClear(constraint); + + // + // Advance cursors to next constraint, if not at the + // beginning of the search. + // + int ret = OK; + int match_ok = 0; + do { + // + // Advance cursors so that next call fetches another constraint + // + if(cursor_get_flags == DB_NEXT) + key_semantic.DocumentNext(constraint, uniq); + + if((ret = Seek(constraint)) != OK) + return ret; + + int near = permutation.Proximity(); + WordTree* first = 0; + for(unsigned int i = 0; i < cursors_length;) { + WordTree& cursor = *(cursors[i]); + near = permutation.Proximity(); + int excluded = permutation.Excluded(i); + if(verbose) fprintf(stderr, "WordTreeOptional::WalkNext: %s excluded = %s, proximity = %s\n", (char*)cursor.GetSearch().GetWord(), (excluded ? "yes" : "no"), (near ? "yes" : "no" )); + + int ret; + if(excluded) { + ret = SearchCursorNot(cursor, constraint); + if(verbose > 2) fprintf(stderr, "WordTreeOptional::WalkNext: Not -> %s\n", ret2str(ret)); + } else { + if(near) { + ret = SearchCursorNear(cursor, first, constraint, proximity); + if(verbose > 2) fprintf(stderr, "WordTreeOptional::WalkNext: Near -> %s\n", ret2str(ret)); + } else { + ret = SearchCursorAnd(cursor, constraint, permutation); + if(verbose > 2) fprintf(stderr, "WordTreeOptional::WalkNext: And -> %s\n", ret2str(ret)); + } + } + + switch(ret) { + case WORD_WALK_ATEND: + if(UsePermutation()) { + // + // The search is over with this permutation, try another one. + // + switch(permutation.Next()) { + // + // No permutations left, the end + // + case WORD_PERMUTE_END: + return (status = WORD_WALK_ATEND); + break; + + // + // Sart over with this permutation + // + case WORD_PERMUTE_OK: + if(WalkRewind() != OK) + return NOTOK; + break; + } + first = 0; + i = 0; + } else { + return (status = WORD_WALK_ATEND); + } + break; + case WORD_WALK_REDO: + break; + case WORD_WALK_RESTART: + first = 0; + i = 0; + break; + case WORD_WALK_NEXT: + i++; + break; + case NOTOK: + default: + return ret; + break; + } + } + + cursor_get_flags = DB_NEXT; + + SetInfo(); + + // + // Save possible result, i.e. first non excluded cursor + // + for(unsigned int i = 0; i < cursors_length; i++) { + WordTree& cursor = *(cursors[i]); + if(!permutation.Excluded(i)) { + found.Key().CopyFrom(cursor.GetFound().Key()); + break; + } + } + + match_ok = 1; + // + // Only bother if near and non near search are involved + // + if(UseProximity() == WORD_PERMUTE_PROXIMITY_TOGGLE) { + // + // If we reach this point in the function and + // either proximity search is active or there is + // only one word involved, the match is valid. + // Otherwise it may be excluded, see below. + // + if(!near && permutation.NotExcludedCount() > 1) { + // + // If not using proximity, a match that fits the proximity + // requirements must be skipped because it was matched by + // the previous permutation (see WordPermute). + // + switch(CursorsObeyProximity(constraint)) { + case OK: + match_ok = 0; + break; + case WORD_SEARCH_NOPROXIMITY: + match_ok = 1; + break; + default: + case NOTOK: + return NOTOK; + break; + } + } + } + } while(!match_ok && ret == OK); + + return ret; +} + +int WordTreeOptional::Seek(const WordKey& position) +{ + pos.CopyFrom(position); + cursor_get_flags = DB_SET_RANGE; + status = OK; + + unsigned int i; + for(i = 0; i < cursors_length; i++) { + if(!permutation.Excluded(i)) { + WordTree& cursor = *(cursors[i]); + return cursor.Seek(position); + } + } + + fprintf(stderr, "WordTreeOptional::Seek: failed\n"); + return NOTOK; +} + + +void WordTreeOptional::SetInfo() +{ + unsigned int i; + for(i = 0; i < cursors_length; i++) + cursors[i]->SetInfo(); + + info.trunc(); + + for(i = 0; i < cursors_length; i++) { + WordTree& cursor = *(cursors[i]); + + if(!permutation.Excluded(i)) + info << cursor.info << " "; + } + + info << (permutation.Proximity() ? "proximity" : ""); +} + +int WordTreeOptional::SearchCursorNear(WordTree& cursor, WordTree*& master, WordKey& constraint, int proximity) +{ + int is_master = master == 0 || master == &cursor; + if(master == 0) master = &cursor; + const WordKey& masterKey = master->GetFound().Key(); + + int direction = key_semantic.LocationCompare(constraint, cursor.GetFound().Key(), proximity); + if(verbose > 2) fprintf(stderr, "WordTreeOptional::SearchCursorNear: LocationCompare(\n\t%s,\n\t%s)\n\t = %d\n", (char*)(constraint.Get()), (char*)(cursor.GetFound().Key().Get()), direction); + + // + // If the cursor is in the authorized locations, consider + // next cursor + // + if(direction == 0) { + // + // master cursor makes the rules for location : its location + // is the base to calculate other words mandatory loacations. + // + if(is_master) + key_semantic.LocationSet(cursor.GetFound().Key(), constraint); + // + // Fix location constraint to accomodate proximity tolerance. + // + key_semantic.LocationNearLowest(constraint, proximity); + return WORD_WALK_NEXT; + + // + // If current location is above cursor location + // + } else if(direction > 0) { + // + // Move the cursor up to the location. + // + cursor.Seek(constraint); + if(verbose > 1) fprintf(stderr, "WordTreeOptional::SearchCursorNear: leap to %s\n", (char*)constraint.Get()); + int ret; + if((ret = cursor.WalkNext()) == OK) { + // + // Remove the location constraint for the master word + // so that it matches and then enforce location for other + // keys. + // + if(is_master) + key_semantic.Location2Document(constraint); + // + // Reconsider the situation for this cursor + // + return WORD_WALK_REDO; + } else { + return ret; + } + + // + // If current location is lower than cursor location, + // meaning that the cursor found no match for the current + // location. + // + } else if(direction < 0) { + // + // The cursor document becomes the current document. + // The master cursor is forced to catch up. + // + key_semantic.DocumentSet(cursor.GetDocument(), constraint); + // + // It is possible that this cursor document is the same + // as the master cursor document (if this cursor hit in the + // same document but a higher location). In this case we must + // increase the location of the master cursor otherwise it will + // match without moving and loop forever. + // + if(!is_master && key_semantic.DocumentCompare(masterKey, constraint) == 0) { + key_semantic.LocationSet(masterKey, constraint); + key_semantic.LocationNext(constraint); + } + // + // Since the current location changed, start over. + // + return WORD_WALK_RESTART; + } else { + fprintf(stderr, "WordTreeOptional::WordCursorNear: reached unreachable statement\n"); + return NOTOK; + } + return NOTOK; +} + +int WordTreeOptional::SearchCursorNot(WordTree& cursor, WordKey& document) +{ + int direction = key_semantic.DocumentCompare(document, cursor.GetFound().Key()); + if(verbose > 2) fprintf(stderr, "WordTreeOptional::SearchCursorNot: DocumentCompare(\n\t%s,\n\t%s)\n\t = %d\n", (char*)(document.Get()), (char*)(cursor.GetFound().Key().Get()), direction); + + // + // If the cursor is above the current document + // (being at the end of walk is being above all documents). + // + // Means that the cursor is positioned in an acceptable document + // and proceed to the next cursor. + // + if(direction < 0 || cursor.IsAtEnd()) { + return WORD_WALK_NEXT; + + // + // If the cursor is below current document + // + } else if(direction > 0) { + // + // Move the cursor up to the document + // + cursor.Seek(document); + if(verbose > 1) fprintf(stderr, "WordTreeOptional::SearchCursorNot: leap to %s\n", (char*)document.Get()); + int ret; + if((ret = cursor.WalkNext()) != OK && ret != WORD_WALK_ATEND) + return NOTOK; + // + // It is expected in this case that the cursor has moved above + // the current document and another visit in the loop will + // tell us. + // + return WORD_WALK_REDO; + + // + // If the cursor matches the current document. + // + // Means that the current document is not a possible match + // since it is pointed by this cursor. + // + } else if(direction == 0) { + // + // The cursor does not give any hint on a possible + // next document, just go to the next possible one. + // + key_semantic.DocumentNext(document, uniq); + // + // Since the current document changed, start over. + // + return WORD_WALK_RESTART; + } else { + fprintf(stderr, "WordTreeOptional::WordCursorNot: reached unreachable statement\n"); + return NOTOK; + } + return NOTOK; +} + +int WordTreeOptional::SearchCursorAnd(WordTree& cursor, WordKey& document, WordExclude& permutation) +{ + int direction = key_semantic.DocumentCompare(document, cursor.GetFound().Key()); + if(verbose > 2) fprintf(stderr, "WordTreeOptional::SearchCursorAnd: DocumentCompare(\n\t%s,\n\t%s)\n\t = %d\n", (char*)(document.Get()), (char*)(cursor.GetFound().Key().Get()), direction); + + // + // If the cursor is in the current document. + // + // Means that the cursor is positioned in an acceptable document + // and proceed to the next cursor. + // + if(direction == 0) { + return WORD_WALK_NEXT; + + // + // If the cursor is below current document + // + } else if(direction > 0) { + // + // Move the cursor up to the document + // + cursor.Seek(document); + if(verbose > 1) fprintf(stderr, "WordTreeOptional::SearchCursorAnd: leap to %s\n", (char*)document.Get()); + int ret; + if((ret = cursor.WalkNext()) == OK) + return WORD_WALK_REDO; + else + return ret; + + // + // If the cursor is above current document. + // + // Means the the current document is not a possible match + // since it will never reach it because it's already + // above it. + // + } else if(direction < 0) { + // + // The cursor document becomes the current document. + // + key_semantic.DocumentSet(cursor.GetDocument(), document); + + // + // Since the current document changed, start over. + // + return WORD_WALK_RESTART; + } else { + fprintf(stderr, "WordTreeOptional::WordCursorAnd: reached unreachable statement\n"); + return NOTOK; + } + return NOTOK; +} + +int WordTreeOptional::CursorsObeyProximity(WordKey& document) +{ + // + // Run if more than one word is involved, proximity + // is always true if there is only one word. + // + if(permutation.NotExcludedCount() <= 1) return OK; + + WordKey location; + + // + // The first non excluded cursor contains anchor location. + // + unsigned int master_index = 0; + for(unsigned int i = 0; i < cursors_length; i++) { + if(!permutation.Excluded(i)) { + master_index = i; + break; + } + } + const WordKey& masterKey = cursors[master_index]->GetFound().Key(); + key_semantic.DocumentSet(masterKey, location); + + for(unsigned int i = 0; i < cursors_length;) { + if(permutation.Excluded(i)) { + i++; + continue; + } + + WordTree& cursor = *(cursors[i]); + if(cursor.IsAtEnd()) return WORD_SEARCH_NOPROXIMITY; + // if(cursor.status & WORD_WALK_FAILED) return NOTOK; + + // + // If the cursor moved outside of the tested document, + // no proximity match is possible. + // + if(key_semantic.DocumentCompare(cursor.GetFound().Key(), document) != 0) + return WORD_SEARCH_NOPROXIMITY; + + int direction = key_semantic.LocationCompare(location, cursor.GetFound().Key(), proximity); + + // + // If the cursor is in the authorized locations, consider + // next cursor + // + if(direction == 0) { + // + // master cursor makes the rules for location : its location + // is the base to calculate other words mandatory loacations. + // + if(i == master_index) + key_semantic.LocationSet(cursor.GetFound().Key(), location); + // + // Fix location constraint to accomodate proximity tolerance. + // + key_semantic.LocationNearLowest(location, proximity); + i++; + + // + // If current location is greater than cursor location + // + } else if(direction > 0) { + // + // Move the cursor up to the location. + // + cursor.Seek(location); + if(verbose > 1) fprintf(stderr, "WordTreeOptional::CursorsObeyProximity: leap to %s\n", (char*)location.Get()); + int ret; + if((ret = cursor.WalkNext()) != OK) { + if(ret == WORD_WALK_ATEND) { + return WORD_SEARCH_NOPROXIMITY; + } else { + return NOTOK; + } + } + // + // Remove the location constraint for the master word + // so that it matches and then enforce location for other + // keys. + // + if(i == master_index) + key_semantic.Location2Document(location); + // + // Reconsider the situation for this cursor + // + + // + // If current location is lower than cursor location, + // meaning that the cursor found no match in the current + // document. + // + } else if(direction < 0) { + // + // Move to next master key, if possible. + // + if(i != master_index) { + key_semantic.LocationSet(masterKey, location); + key_semantic.LocationNext(location); + } + // + // Since the current location changed, start over. + // + i = 0; + } + } + + return OK; +} + +// +// Helper class for AscendingFrequency method +// +class WordSort { +public: + unsigned int frequency; + WordTree *cursor; +}; + +// +// Helper function for AscendingFrequency method +// +static int ascending_frequency(const void *a, const void *b) +{ + const WordSort& a_cursor = *(WordSort*)a; + const WordSort& b_cursor = *(WordSort*)b; + + return a_cursor.frequency - b_cursor.frequency; +} + +int WordTreeOptional::AscendingFrequency() +{ + // + // Reorder cursors + // + WordSort *tmp = new WordSort[cursors_length]; + + memset((char*)tmp, '\0', cursors_length * sizeof(WordSort)); + + unsigned int i; + for(i = 0; i < cursors_length; i++) { + unsigned int frequency; + if(cursors[i]->Noccurrence(frequency) != OK) { + delete [] tmp; + return NOTOK; + } + if(verbose > 2) fprintf(stderr, "WordTreeOptional::AscendingFrequency: %s occurs %d times\n", (char*)cursors[i]->GetSearch().Get(), frequency); + tmp[i].frequency = frequency; + tmp[i].cursor = cursors[i]; + } + + memset((char*)cursors, '\0', cursors_length * sizeof(WordTree*)); + + qsort((void *)tmp, cursors_length, sizeof(WordSort), &ascending_frequency); + + for(i = 0; i < cursors_length; i++) + cursors[i] = tmp[i].cursor; + + delete [] tmp; + return OK; +} + +int WordTreeOptional::StripNonExistent(unsigned int& stripped) +{ + stripped = 0; + + WordTree** tmp = new WordTree*[cursors_length]; + memset((char*)tmp, '\0', cursors_length * sizeof(WordTree*)); + + unsigned int from; + unsigned int to; + + for(to = from = 0; from < cursors_length; from++) { + unsigned int frequency; + if(cursors[from]->Noccurrence(frequency) != OK) { + delete [] tmp; + return NOTOK; + } + + if(verbose > 2) fprintf(stderr, "WordTreeOptional::StripNonExistent: %s occurs %d times\n", (char*)cursors[from]->GetSearch().Get(), frequency); + if(frequency > 0) { + tmp[to++] = cursors[from]; + } else { + delete cursors[from]; + stripped++; + } + } + + memset((char*)cursors, '\0', cursors_length * sizeof(WordTree*)); + + cursors_length = to; + unsigned int i; + for(i = 0; i < cursors_length; i++) + cursors[i] = tmp[i]; + + delete [] tmp; + + return OK; +} + +// ************************* WordTreeOr implementation ******************** + +class WordTreeOr : public WordTreeOperand { + public: + WordTreeOr(const char* nscope) : WordTreeOperand(nscope) { } + + //- + // Return WORD_TREE_OR + // + virtual int IsA() const { return WORD_TREE_OR; } + + virtual int Optimize(); + + virtual int ContextSaveList(StringList& list) const; + + virtual int ContextRestoreList(StringList& list); + + virtual void SetInfo(); + + virtual int WalkNext(); + + virtual int UsePermutation() const { return 0; } + + virtual int UseProximity() const { return WORD_PERMUTE_PROXIMITY_NO; } +}; + +int WordTreeOr::Optimize() +{ + int ret; + if((ret = WordTreeOperand::Optimize()) != OK) + return ret; + + if((ret = AscendingFrequency()) != OK) + return ret; + + unsigned int stripped; + if((ret = StripNonExistent(stripped)) != OK) + return ret; + + return OptimizeOr(WORD_PERMUTE_PROXIMITY_NO); +} + +int WordTreeOr::ContextSaveList(StringList& list) const +{ + int ret; + if((ret = WordTreeOperand::ContextSaveList(list)) != OK) + return ret; + + { + String* buffer = new String(); + permutation.Get(*buffer); + + list.Add(buffer); + } + + { + String* buffer = new String(); + if((ret = WordCursor::ContextSave(*buffer)) != OK) + return ret; + + list.Add(buffer); + } + + return OK; +} + +int WordTreeOr::ContextRestoreList(StringList& list) +{ + int ret; + if((ret = WordTreeOperand::ContextRestoreList(list)) != OK) + return ret; + + { + char* buffer = list[0]; + if((ret = permutation.Set(buffer)) != OK) + return ret; + list.Remove(0); + } + + { + char* buffer = list[0]; + if(!buffer) return NOTOK; + WordKey key(buffer); + if((ret = Seek(key)) != OK) + return ret; + cursor_get_flags = DB_NEXT; + + list.Remove(0); + } + + return OK; +} + +void WordTreeOr::SetInfo() +{ + unsigned int i; + for(i = 0; i < cursors_length; i++) + cursors[i]->SetInfo(); + + info.trunc(); + + for(i = 0; i < cursors_length; i++) { + WordTree& cursor = *(cursors[i]); + + if(!permutation.Excluded(i) && + !cursor.IsAtEnd() && + key_semantic.DocumentCompare(cursor.GetFound().Key(), GetFound().Key()) == 0) { + info << cursor.info << " "; + } + } +} + +int WordTreeOr::WalkNext() +{ + WordKey& constraint = pos; + // + // Set constraint with all 0 + // + if(constraint.Empty()) + key_semantic.DocumentClear(constraint); + + WordKey candidate; + int match_ok; + do { + int ret; + unsigned int i; + candidate.Clear(); + // + // Advance cursors so that next call fetches another constraint + // + if(cursor_get_flags == DB_NEXT) + key_semantic.DocumentNext(constraint, uniq); + + if((ret = Seek(constraint)) != OK) + return ret; + + match_ok = 1; + // + // All non excluded cursors are about to move + // at or beyond constraint. Search for the one (candidate) that + // is located at the lowest location beyond the constraint. + // + for(i = 0; i < cursors_length; i++) { + if(permutation.Excluded(i)) + continue; + WordTree& cursor = *(cursors[i]); + + switch((ret = cursor.WalkNext())) { + case WORD_WALK_ATEND: + // + // Constraint is above all matches for this cursor + // + break; + case OK: + // + // If candidate is not set or current cursor is below + // the current candidate, the curent cursor document becomes + // the candidate. + // + if(candidate.Empty() || + key_semantic.DocumentCompare(candidate, cursor.GetFound().Key()) > 0) { + key_semantic.DocumentSet(cursor.GetDocument(), candidate); + } + break; + default: + return ret; + break; + } + } + + // + // No candidate ? It's the end of the match list. + // + if(candidate.Empty()) + return WORD_WALK_ATEND; + + found.Key().CopyFrom(candidate); + + SetInfo(); + + if(permutation.ExcludedCount() > 0) { + if((ret = Seek(candidate)) != OK) + return ret; + + // + // Restart loop if candidate matches an excluded cursor. + // + for(i = 0; i < cursors_length && match_ok; i++) { + if(!permutation.Excluded(i)) + continue; + WordTree& cursor = *(cursors[i]); + + switch((ret = cursor.WalkNext())) { + case WORD_WALK_ATEND: + // + // This excluded cursor can't match the candidate, fine. + // + break; + case OK: + // + // This excluded cursor matches candidate therefore it's + // not a valid candidate. Restart search with this candidate + // as the constraint. + // + if(key_semantic.DocumentCompare(candidate, cursor.GetFound().Key()) == 0) { + constraint = candidate; + match_ok = 0; + } + break; + default: + return ret; + break; + } + + } + } + + cursor_get_flags = DB_NEXT; + + } while(!match_ok); + + constraint = candidate; + + return OK; +} + +// ************************* WordTreeAnd implementation ******************** + +class WordTreeAnd : public WordTreeOptional { + public: + WordTreeAnd(const char* nscope) : WordTreeOptional(nscope) { } + + //- + // Return WORD_TREE_AND + // + virtual int IsA() const { return WORD_TREE_AND; } + + virtual int UsePermutation() const { return 0; } + + virtual int UseProximity() const { return WORD_PERMUTE_PROXIMITY_NO; } + + virtual int AllOrNothing() const { return 1; } +}; + +// ************************* WordTreeNear implementation ******************** + +class WordTreeNear : public WordTreeOptional { + public: + WordTreeNear(const char* nscope) : WordTreeOptional(nscope) { } + + //- + // Return WORD_TREE_NEAR + // + virtual int IsA() const { return WORD_TREE_NEAR; } + + virtual int UsePermutation() const { return 0; } + + virtual int UseProximity() const { return WORD_PERMUTE_PROXIMITY_ONLY; } + + virtual int AllOrNothing() const { return 1; } +}; + +// ************************* WordTreeMandatory implementation *************** + +class WordTreeMandatory : public WordTreeOperand { + public: + WordTreeMandatory(const char* nscope) : WordTreeOperand(nscope) { } + + //- + // Return WORD_TREE_MANDATORY + // + virtual int IsA() const { return WORD_TREE_MANDATORY; } +}; + +// ************************* WordTreeNot implementation *************** + +class WordTreeNot : public WordTreeOperand { + public: + WordTreeNot(const char* nscope) : WordTreeOperand(nscope) { } + + //- + // Return WORD_TREE_NOT + // + virtual int IsA() const { return WORD_TREE_NOT; } +}; + +// ************************* WordMatch implementation ******************** + +// +// Return value of the Search method, tells us which document +// matched and why. +// +class WordMatch { +public: + + //- + // Return a textual representation of the object. + // + String Get() const; + + //- + // The document that matched + // + WordKey match; + //- + // An ascii description of why it matched. + // + String info; +}; + +String WordMatch::Get() const +{ + String tmp; + match.Get(tmp); + if(!info.empty()) + tmp << "(" << info << ")"; + return tmp; +} + +// ************************* WordSearch implementation ******************** +// +// NAME +// +// Solve a query from a WordTree syntax tree +// +// SYNOPSIS +// +// #include <WordSearch.h> +// +// WordTree* expr = get_query(); +// WordSearch search; +// search.limit_count = NUMBER_OF_RESULTS; +// WordMatch* search.Search(expr); +// ... +// +// DESCRIPTION +// +// The WordSearch class is a wrapper to query an inverted index +// using a WordTree syntax tree. +// +// END +// +class WordSearch { +public: + WordSearch(); + + //- + // Perform a search from the <b>expr</b> specifications. + // Restore the context from <i>context_in</i> on <b>expr</b>. + // Then skip (using WalkNext) <i>limit_bottom</i> entries. + // Then collect in a WordMatch array of size <i>limit_count</i> + // each match returned by WalkNext. When finished store + // the context (ContextSave) in <i>context_out</i>. + // It is the responsibility of the caller to free the WordMatch + // array. If no match are found a null pointer is returned. + // + WordMatch *Search(); + + // + // Search backend, only run the WalkNext loop but does not + // allocate/deallocate data. + // + int SearchLoop(WordTree *expr); + + // + // Return a context description string to resume the + // search at a given point. + // + const String& Context() const { return context_out; } + + // + // Input + // + unsigned int limit_bottom; + unsigned int limit_count; + String context_in; + WordTree* expr; + + // + // Output + // + WordMatch* matches; + unsigned int matches_size; + unsigned int matches_length; + String context_out; +}; + +WordSearch::WordSearch() +{ + // + // Input + // + limit_bottom = 0; + limit_count = 0; + context_in.trunc(); + expr = 0; + + // + // Output + // + matches = 0; + matches_size = 0; + matches_length = 0; + context_out.trunc(); +} + +WordMatch *WordSearch::Search() +{ + int ret = 0; + + if(verbose) fprintf(stderr, "WordSearch::Search: non optimized expression %s\n", (char*)expr->Get()); + if(expr->Optimize() != OK) + return 0; + if(verbose) fprintf(stderr, "WordSearch::Search: optimized expression %s\n", (char*)expr->Get()); + + + // + // Build space for results + // + matches_size = limit_count + 1; + matches = new WordMatch[matches_size]; + matches_length = 0; + + // + // Move to first possible position. + // + if(expr->WalkInit() != OK) + goto end; + + if(expr->ContextRestore(context_in) == NOTOK) + goto end; + ret = SearchLoop(expr); + // + // Don't bother saving the context if at end of + // search (WORD_WALK_ATEND) or error (NOTOK) + // + if(ret == OK && expr->ContextSave(context_out) == NOTOK) + goto end; + +end: + expr->WalkFinish(); + + if(ret == NOTOK || matches_length <= 0) { + delete [] matches; + matches = 0; + } + + return matches; +} + +int WordSearch::SearchLoop(WordTree *expr) +{ + int ret = OK; + unsigned int i; + // + // Skip the first <limit_bottom> documents + // + { + for(i = 0; i < limit_bottom; i++) { + if((ret = expr->WalkNext()) != OK) + return ret; + } + } + // + // Get documents up to <limit_count> or exhaustion + // + for(matches_length = 0; matches_length < limit_count; matches_length++) { + if((ret = expr->WalkNext()) != OK) { + break; + } else { + matches[matches_length].match = expr->GetDocument(); + if(expr->IsA() != WORD_TREE_LITERAL) + matches[matches_length].info = ((WordTreeOperand*)expr)->GetInfo(); + if(verbose) fprintf(stderr, "WordSearch::Search: match %s\n", (char*)matches[matches_length].match.Get()); + } + } + + if(ret == WORD_WALK_ATEND) + matches[matches_length].match.Clear(); + + return ret; +} + +// ************************* WordParser implementation ******************** +// +// NAME +// +// Textual query parser for test purpose +// +// SYNOPSIS +// +// #include <WordParser.h> +// +// WordParser parser; +// WordTree* expr = parser.Parse("( or \"scope1\" a query )"); +// ... +// delete expr; +// +// DESCRIPTION +// +// The WordParser class implement a lisp-like parser for queries +// implemented by the WordTree derived classes. The syntax is rigid +// and should not be used for human input. The generic syntax of an +// expression is +// <pre> +// ( operator "scope" operand [operand ...] ) +// </pre> +// The parenthesis must <b>always</b> be surrounded by white space otherwise +// the parser will be lost. The separator is white space and newline. +// Tabulation may be used in scope to separate key fields. +// +// As a special case a single word is strictly equivalent +// to +// <pre> +// ( literal "" word ) +// </pre> +// +// Operators can be lower case or upper case. There is almost no syntax +// checking and it's the responsibility of the caller to associate meaningfull +// operands. For instance ( near ( not foo ) bar ) is meaningless. +// +// OPERATORS +// +// <dl> +// +// <dt> optional +// <dd> WordTreeOptional +// +// <dt> or +// <dd> WordTreeOr +// +// <dt> and +// <dd> WordTreeAnd +// +// <dt> near +// <dd> WordTreeNear +// +// <dt> not,forbiden +// <dd> WordTreeNot +// +// <dt> mandatory +// <dd> WordTreeMandatory +// +// <dt> literal +// <dd> WordTreeLiteral +// +// </dl> +// +// +// END + +// +// Possible values of the info argument of ParseOperands +// +#define WORD_TREE_MANY 0x01 +#define WORD_TREE_ONE 0x02 +#define WORD_TREE_TWO 0x04 + +class WordParser { +public: + WordTree *Parse(const String& expr); + WordTree *ParseList(StringList& terms); + + WordTree *ParseExpr(StringList& terms); + WordTree *ParseUnary(StringList& terms); + WordTree *ParseConj(StringList& terms); + void ParseOperands(StringList& terms, int info, WordTreeOperand* expr); + WordTree *ParseLiteral(StringList& terms); + char *ParseScope(StringList& terms); + + void Shift(StringList& terms); + char *Term(StringList& terms); +}; + +WordTree *WordParser::Parse(const String& expr) +{ + StringList terms(expr, " \n"); + return ParseList(terms); +} + +WordTree *WordParser::ParseList(StringList& terms) +{ + WordTree *expr = ParseExpr(terms); + return expr; +} + +WordTree *WordParser::ParseExpr(StringList& terms) +{ + WordTree *expr = 0; + char* term = strdup(Term(terms)); + if(!strcmp(term, "(")) { + Shift(terms); + expr = ParseExpr(terms); + } else if(!strcmp(term, ")")) { + // + // At end of expression, return null + // + } else if(!mystrcasecmp(term, "optional") || + !mystrcasecmp(term, "or") || + !mystrcasecmp(term, "and") || + !mystrcasecmp(term, "near")) { + expr = ParseConj(terms); + } else if(!mystrcasecmp(term, "not") || + !mystrcasecmp(term, "mandatory") || + !mystrcasecmp(term, "forbiden")) { + expr = ParseUnary(terms); + } else { + expr = ParseLiteral(terms); + } + free(term); + return expr; +} + +WordTree *WordParser::ParseUnary(StringList& terms) +{ + int op = 0; + if(!mystrcasecmp(Term(terms), "mandatory")) + op = WORD_TREE_MANDATORY; + else if(!mystrcasecmp(Term(terms), "forbiden") || + !mystrcasecmp(Term(terms), "not")) + op = WORD_TREE_NOT; + + Shift(terms); + char* scope = ParseScope(terms); + WordTreeOperand *expr = 0; + switch(op) { + case WORD_TREE_MANDATORY: + expr = new WordTreeMandatory(scope); + break; + case WORD_TREE_NOT: + expr = new WordTreeNot(scope); + break; + default: + fprintf(stderr, "WordParser::ParseUnary: unexpected operator %d\n", op); + exit(1); + break; + } + free(scope); + ParseOperands(terms, WORD_TREE_ONE, expr); + return expr; +} + +WordTree *WordParser::ParseConj(StringList& terms) +{ + int op = 0; + if(!mystrcasecmp(Term(terms), "optional")) + op = WORD_TREE_OPTIONAL; + else if(!mystrcasecmp(Term(terms), "or")) + op = WORD_TREE_OR; + else if(!mystrcasecmp(Term(terms), "and")) + op = WORD_TREE_AND; + else if(!mystrcasecmp(Term(terms), "near")) + op = WORD_TREE_NEAR; + + Shift(terms); + char* scope = ParseScope(terms); + WordTreeOperand *expr = 0; + switch(op) { + case WORD_TREE_OR: + expr = new WordTreeOr(scope); + break; + case WORD_TREE_OPTIONAL: + expr = new WordTreeOptional(scope); + break; + case WORD_TREE_AND: + expr = new WordTreeAnd(scope); + break; + case WORD_TREE_NEAR: + expr = new WordTreeNear(scope); + break; + default: + fprintf(stderr, "WordParser::ParseOrAnd: unexpected operator %d\n", op); + exit(1); + break; + } + free(scope); + ParseOperands(terms, WORD_TREE_MANY, expr); + return expr; +} + +void WordParser::ParseOperands(StringList& terms, int info, WordTreeOperand* expr) +{ + unsigned int operands_length = 0; + unsigned int operands_size = 1; + WordTree **operands = (WordTree**)malloc(operands_size * sizeof(WordTree*)); + WordTree *subexpr = 0; + while((subexpr = ParseExpr(terms))) { + operands_length++; + if((info & WORD_TREE_ONE) && operands_length > 1) { + fprintf(stderr, "WordParser::ParseOperands: expected only one operands\n"); + exit(1); + } else if((info & WORD_TREE_TWO) && operands_length > 2) { + fprintf(stderr, "WordParser::ParseOperands: expected only two operands\n"); + exit(1); + } + if(operands_length > operands_size) { + operands_size = operands_length * 2; + operands = (WordTree**)realloc(operands, operands_size * sizeof(WordTree*)); + } + operands[operands_length - 1] = subexpr; + } + // + // Discard close parenthesis + // + if(strcmp(Term(terms), ")")) { + fprintf(stderr, "WordParser::ParseOperands: expected close parenthesis\n"); + exit(1); + } + Shift(terms); + + expr->cursors = operands; + expr->cursors_length = operands_length; +} + +WordTree *WordParser::ParseLiteral(StringList& terms) +{ + char* term = strdup(Term(terms)); + char* scope = 0; + if(!mystrcasecmp(term, "literal")) { + Shift(terms); + scope = ParseScope(terms); + free(term); + term = strdup(Term(terms)); + Shift(terms); + } else { + scope = strdup(""); + } + WordTreeLiteral *expr = new WordTreeLiteral(term, scope); + Shift(terms); + free(scope); + free(term); + return expr; +} + +char *WordParser::ParseScope(StringList& terms) +{ + char *scope = Term(terms); + int scope_length = strlen(scope); + + // + // Remove surrounding quotes, if any + // + if(scope_length > 0) { + if(scope[scope_length - 1] == '"') + scope[--scope_length] = '\0'; + if(scope[0] == '"') + scope++; + } + + scope = strdup(scope); + + Shift(terms); + + return scope; +} + +char *WordParser::Term(StringList& terms) +{ + char *term = terms[0]; + if(!term) { + fprintf(stderr, "WordParser::Term: unexpected end of expression\n"); + exit(1); + } + return term; +} + +void WordParser::Shift(StringList& terms) +{ + terms.Shift(LIST_REMOVE_DESTROY); +} + +// ************************* main loop implementation ******************** + +// +// Store all options from the command line +// +class params_t +{ +public: + char* dbfile; + char* find; + unsigned int bottom; + unsigned int count; + char* context; + int uniq_server; + int proximity; + int nop; + int exclude; +}; + +// +// Explain options +// +static void usage(); +// +// Torture WordExclude* classes +// +static void exclude_test(); + +int main(int ac, char **av) +{ + int c; + extern char *optarg; + params_t params; + + params.dbfile = strdup("test"); + params.find = 0; + params.bottom = 0; + params.count = 10; + params.context = 0; + params.uniq_server = 0; + params.proximity = WORD_SEARCH_DEFAULT_PROXIMITY; + params.nop = 0; + params.exclude = 0; + + while ((c = getopt(ac, av, "vB:f:b:c:C:SP:ne")) != -1) + { + switch (c) + { + case 'v': + verbose++; + break; + case 'B': + free(params.dbfile); + params.dbfile = strdup(optarg); + break; + case 'f': + params.find = strdup(optarg); + break; + case 'b': + params.bottom = (unsigned int)atoi(optarg); + break; + case 'c': + params.count = (unsigned int)atoi(optarg); + break; + case 'C': + params.context = strdup(optarg); + break; + case 'P': + params.proximity = atoi(optarg); + break; + case 'S': + params.uniq_server = SERVER; + break; + case 'n': + params.nop = 1; + break; + case 'e': + params.exclude = 1; + break; + case '?': + usage(); + break; + } + } + + if(params.exclude) { + exclude_test(); + exit(0); + } + + if(!params.find) + usage(); + + Configuration* config = WordContext::Initialize(); + if(!config) { + fprintf(stderr, "search: no config file found\n"); + exit(1); + } + + // + // Forward command line verbosity to htword library. + // + if(verbose > 1) { + String tmp; + tmp << (verbose - 1); + config->Add("wordlist_verbose", tmp); + } + + // + // Prepare the index (-B). + // + WordList words(*config); + words.Open(params.dbfile, O_RDONLY); + + // + // Try the query parser alone + // + if(params.nop) { + WordTree* expr = WordParser().Parse(params.find); + printf("%s\n", (char*)expr->Get()); + exit(0); + } + + // + // Build a syntax tree from the expression provided by user + // + WordTree* expr = WordParser().Parse(params.find); + + // + // Define the semantic of the key + // + { +#define DOCUMENT_LENGTH 3 + static int document[DOCUMENT_LENGTH] = { + TAG, + SERVER, + URL + }; + int document_length = DOCUMENT_LENGTH; + int location = LOCATION; + if(expr->Prepare(&words, params.uniq_server, params.proximity, document, document_length, location) != OK) + exit(1); + } + + WordSearch* search = new WordSearch(); + + // + // Forward query options to WordSearch object + // + search->limit_bottom = params.bottom; // -b + search->limit_count = params.count; // -c + if(params.context) // -C + search->context_in.set(params.context, strlen(params.context)); + + // + // Perform the search (-f) + // + search->expr = expr; + WordMatch* matches = search->Search(); + + // + // Display results, if any. + // + if(matches) { + int i; + for(i = 0; !matches[i].match.Empty(); i++) + printf("match: %s\n", (char*)matches[i].Get()); + const String& context = search->Context(); + if(!context.empty()) + printf("context: %s\n", (const char*)context); + delete [] matches; + } else { + printf("match: none\n"); + } + + // + // Cleanup + // + delete search; + if(params.context) free(params.context); + if(params.find) free(params.find); + if(params.dbfile) free(params.dbfile); + delete expr; + + words.Close(); + delete config; +} + +static void exclude_test() +{ + static unsigned int expected[] = { + 0x00000001, + 0x00000002, + 0x00000004, + 0x00000008, + 0x00000010, + 0x00000003, + 0x00000005, + 0x00000006, + 0x00000009, + 0x0000000a, + 0x0000000c, + 0x00000011, + 0x00000012, + 0x00000014, + 0x00000018, + 0x00000007, + 0x0000000b, + 0x0000000d, + 0x0000000e, + 0x00000013, + 0x00000015, + 0x00000016, + 0x00000019, + 0x0000001a, + 0x0000001c, + 0x0000000f, + 0x00000017, + 0x0000001b, + 0x0000001d, + 0x0000001e, + 0x0000001f + }; + + // + // WordExclude + // + if(verbose) fprintf(stderr, "exclude_test: testing WordExclude\n"); + { + WordExclude exclude; + exclude.Initialize(5); + int count = 0; + while(exclude.Next() == WORD_EXCLUDE_OK) { + if(expected[count] != exclude.Mask()) { + fprintf(stderr, "exclude_test: WordExclude iteration %d expected 0x%08x but got 0x%08x\n", count, expected[count], exclude.Mask()); + exit(1); + } + count++; + } + if(count != sizeof(expected)/sizeof(unsigned int)) { + fprintf(stderr, "exclude_test: WordExclude expected %d iterations but got %d\n", (int)(sizeof(expected)/sizeof(unsigned int)), count); + exit(1); + } + } + + // + // WordExcludeMask without ignore bits behaves exactly the same + // as WordExclude. + // + if(verbose) fprintf(stderr, "exclude_test: testing WordExcludeMask behaving like WordExclude\n"); + { + WordExcludeMask exclude; + exclude.Initialize(5, 0, 0); + int count = 0; + while(exclude.Next() == WORD_EXCLUDE_OK) { + if(expected[count] != exclude.Mask()) { + fprintf(stderr, "exclude_test: WordExcludeMask 1 iteration %d expected 0x%08x but got 0x%08x\n", count, expected[count], exclude.Mask()); + exit(1); + } + count++; + } + if(count != sizeof(expected)/sizeof(unsigned int)) { + fprintf(stderr, "exclude_test: WordExcludeMask 1 expected %d iterations but got %d\n", (int)(sizeof(expected)/sizeof(unsigned int)), count); + exit(1); + } + } + + // + // WordExcludeMask + // + if(verbose) fprintf(stderr, "exclude_test: testing WordExcludeMask\n"); + { + static unsigned int expected[] = { + 0x00000102, + 0x00000108, + 0x00000120, + 0x00000180, + 0x0000010a, + 0x00000122, + 0x00000128, + 0x00000182, + 0x00000188, + 0x000001a0, + 0x0000012a, + 0x0000018a, + 0x000001a2, + 0x000001a8, + 0x000001aa + }; + static unsigned int excluded[] = { + 1, + 0, + 0, + 0, + 1, + 1, + 0, + 1, + 0, + 0, + 1, + 1, + 1, + 0, + 1 + }; + + WordExcludeMask exclude; + unsigned int ignore = 0x155; + unsigned int ignore_mask = 0x100; + exclude.Initialize(9, ignore, ignore_mask); + if(verbose) { + fprintf(stderr, "exclude_test: ignore\n"); + show_bits(ignore); + fprintf(stderr, "exclude_test: ignore_mask\n"); + show_bits(ignore_mask); + } + if(exclude.NotExcludedCount() != 8) { + fprintf(stderr, "exclude_test: WordExcludeMask 2 expected NoExcludedCount = 8 but got %d\n", exclude.NotExcludedCount()); + exit(1); + } + int count = 0; + while(exclude.Next() == WORD_EXCLUDE_OK) { + if(expected[count] != exclude.Mask()) { + fprintf(stderr, "exclude_test: WordExcludeMask 2 iteration %d expected 0x%08x but got 0x%08x\n", count, expected[count], exclude.Mask()); + exit(1); + } + // + // Test Excluded() method on ignored bit + // Is bit 5 set ? (9 - 4) = 5 (counting from 1) + // + if(exclude.Excluded(4)) { + fprintf(stderr, "exclude_test: WordExcludeMask 2 iteration %d bit 5 was set 0x%08x\n", count, exclude.Mask()); + exit(1); + } + // + // Test Excluded() method on variable bit + // Is bit 2 set ? (9 - 2) = 7 (counting from 1) + // + if((exclude.Excluded(7) && !excluded[count]) || + (!exclude.Excluded(7) && excluded[count])) { + fprintf(stderr, "exclude_test: WordExcludeMask 2 iteration %d expected bit 2 %s but was %s in 0x%08x\n", count, (excluded[count] ? "set" : "not set"), (excluded[count] ? "not set" : "set"), expected[count]); + exit(1); + } + count++; + } + if(count != sizeof(expected)/sizeof(unsigned int)) { + fprintf(stderr, "exclude_test: WordExcludeMask 2 expected %d iterations but got %d\n", (int)(sizeof(expected)/sizeof(unsigned int)), count); + exit(1); + } + } + + { + WordExclude exclude; + String ascii("110101"); + String tmp; + exclude.Set(ascii); + exclude.Get(tmp); + if(tmp != ascii) { + fprintf(stderr, "exclude_test: WordExclude::Get/Set expected %s but got %s\n", (char*)ascii, (char*)tmp); + exit(1); + } + if(exclude.Mask() != 0x2b) { + fprintf(stderr, "exclude_test: WordExclude::Mask expected 0x2b but got 0x%02x\n", exclude.Mask()); + exit(1); + } + } + { + WordExcludeMask exclude; + String ascii("12031"); + String tmp; + exclude.Set(ascii); + exclude.Get(tmp); + if(tmp != ascii) { + fprintf(stderr, "exclude_test: WordExcludeMask::Get/Set expected %s but got %s\n", (char*)ascii, (char*)tmp); + exit(1); + } + if(exclude.Mask() != 0x19) { + fprintf(stderr, "exclude_test: WordExcludeMask::Mask expected 0x19 but got 0x%02x\n", exclude.Mask()); + exit(1); + } + } +} + +// ***************************************************************************** +// void usage() +// Display program usage information +// +static void usage() +{ + printf("usage:\tsearch -f words [options]\n"); + printf("\tsearch -e\n"); + printf("Options:\n"); + printf("\t-v\t\tIncreases the verbosity.\n"); + printf("\t-B dbfile\tUse <dbfile> as a db file name (default test).\n"); + printf("\t-f expr\t\tLisp like search expression.\n"); + printf("\t\t\tSee WordParser comments in source for more information.\n"); + printf("\t-b number\tSkip number documents before retrieving.\n"); + printf("\t-c number\tRetrieve number documents at most.\n"); + printf("\t-n\t\tOnly parse the search expression and print it.\n"); + printf("\t-P proximity\tUse with near/optional, proximity tolerance is <proximity>\n"); + printf("\t\t\tif negative order of terms is not meaningful\n"); + printf("\t\t\t(default 1).\n"); + printf("\t-C context\tResume search at <context>.\n"); + printf("\t-S\t\tReturn at most one match per server.\n"); + printf("\n"); + printf("\t-e\t\tRun tests on WordExclude and WordExcludeMask.\n"); + exit(1); +} diff --git a/debian/htdig/htdig-3.2.0b6/test/search.txt b/debian/htdig/htdig-3.2.0b6/test/search.txt new file mode 100644 index 00000000..be24754a --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/search.txt @@ -0,0 +1,35 @@ +to <DEF> 0 0 1 1 +the <DEF> 0 0 3 1 +the <DEF> 0 0 5 1 +world <DEF> 0 0 5 2 +the <DEF> 0 0 5 3 +world <DEF> 0 0 5 4 +comes <DEF> 0 0 6 1 +world <DEF> 0 0 11 1 +the <DEF> 0 0 20 1 +an <DEF> 0 0 20 2 +end <DEF> 0 0 20 3 +the <DEF> 0 0 20 4 +world <DEF> 0 0 20 5 +the <DEF> 0 0 21 1 +world <DEF> 0 0 21 2 +the <DEF> 0 0 51 1 +world <DEF> 0 0 51 2 +the <DEF> 0 0 71 1 +world <DEF> 0 0 71 2 +the <DEF> 0 0 81 1 +world <DEF> 0 0 81 2 +lazy <DEF> 0 1 11 2 +dog <DEF> 0 1 11 3 +lazy <DEF> 0 1 11 4 +lazy <DEF> 0 1 21 2 +dog <DEF> 0 1 21 3 +lazy <DEF> 0 5 9 1 +lazy <DEF> 0 5 21 2 +dog <DEF> 0 5 21 3 +lazy <DEF> 0 5 53 2 +dog <DEF> 0 5 53 4 +lazy <DEF> 0 5 56 7 +dog <DEF> 0 5 56 8 +dog <DEF> 0 6 1 5 +lazy <DEF> 0 6 1 8 diff --git a/debian/htdig/htdig-3.2.0b6/test/set_attr b/debian/htdig/htdig-3.2.0b6/test/set_attr new file mode 100644 index 00000000..d283a8c9 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/set_attr @@ -0,0 +1,9 @@ +#!/bin/sh + +conf=conf/htdig.conf.tmp +if grep $1 $conf > /dev/null ; then + mv $conf tmp_conf + sed "s@$1.*@$1: $2@" < tmp_conf > $conf +else + echo "$1: $2" >> $conf +fi diff --git a/debian/htdig/htdig-3.2.0b6/test/skiptest_db.txt b/debian/htdig/htdig-3.2.0b6/test/skiptest_db.txt new file mode 100644 index 00000000..0730ec21 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/skiptest_db.txt @@ -0,0 +1,25 @@ +az <DEF> 5 1 0 +bz <DEF> 5 0 2 +cz <DEF> 6 0 3 +et <DEF> 1 0 50 +et <DEF> 20 0 10 +et <DEF> 20 0 20 +et <DEF> 20 0 40 +et <DEF> 20 0 50 +et <DEF> 20 0 60 +et <DEF> 21 0 10 +et <DEF> 21 0 20 +et <DEF> 21 0 30 +et <DEF> 23 0 56 +et <DEF> 28 0 10 +eta <DEF> 1 0 5 +eta <DEF> 9 0 4 +eta <DEF> 10 0 3 +eu <DEF> 2 0 10 +serhysdfh <DEF> 1 0 11 +serhysdfh <DEF> 1 0 50 +tata <DEF> 4 0 6 +tata <DEF> 8 0 6 +tito <DEF> 1 0 3 +tz <DEF> 7 0 4 +uu <DEF> 6 0 5 diff --git a/debian/htdig/htdig-3.2.0b6/test/synonym_dict b/debian/htdig/htdig-3.2.0b6/test/synonym_dict new file mode 100644 index 00000000..ced1921b --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/synonym_dict @@ -0,0 +1,3 @@ +xxyyzz promotion +updating zzyyxx +fred xyzxyz monitoring diff --git a/debian/htdig/htdig-3.2.0b6/test/t_factors b/debian/htdig/htdig-3.2.0b6/test/t_factors new file mode 100755 index 00000000..c1127077 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/t_factors @@ -0,0 +1,235 @@ +#!/bin/sh +# Part of the ht://Dig package <http://www.htdig.org/> +# Copyright (c) 1999-2004 The ht://Dig Group +# For copyright details, see the file COPYING in your distribution +# or the GNU Library General Public License (LGPL) version 2 or later +# <http://www.gnu.org/copyleft/lgpl.html> +# +# $Id: t_factors,v 1.7 2004/06/05 06:26:22 lha Exp $ +# + +# Tests (or should eventually test) the following config attributes: +# author_factor +# backlink_factor +# caps_factor +# date_factor (TODO) +# description_factor +# heading_factor +# keywords_factor +# meta_description_factor +# multimatch_factor +# search_results_order +# text_factor +# title_factor +# url_seed_score +# url_text_factor + +# try_order comment query pattern1 patern2 ... +# comment - description of test, displayed if error occurs +# query - search string passed to htsearch +# pattern - strings expected to occur *in order* in the output +try_order() { + comment="$1" + shift + query="$1" + shift + $htsearch -c $config "$query" > $tmp 2> /dev/null + array="" + for pattern + do + array="$array; array[i++] = "\"$pattern\" + done + miss=`$awk "BEGIN {$array; line = 0; } \ + "'$0'" ~ \".*\"array[line] { line++ } \ + END { print array[line] } " < $tmp ` + if [ "$miss" != "" ] + then + $htsearch -vv -c $config "$query" > /dev/null + echo "String \"$miss\" was not found where expected" + fail "$htsearch -c $config '$query' >> $tmp -- + $comment" + fi +} + + + + +test_functions_action=--start-apache +. ./test_functions + +config=$testdir/conf/htdig.conf.tmp +tmp=/tmp/t_htsearch$$ + +# set up config file with chosen non-default values +cp $testdir/conf/htdig.conf $config + +$htdig "$@" -t -i -c $config || fail "Couldn't dig" +$htpurge -c $config || fail "Couldn't purge" + +try_order "Search for 'also'" \ + "words=also" \ + '4 matches' 'site2.html' 'site4.html' 'bad_local.htm' 'script.html' + +set_attr url_seed_score "site4 *1000+1000" +try_order "Seed score 1000 for site4.html" \ + "words=also" \ + '4 matches' 'site4.html' 'site2.html' 'bad_local.htm' 'script.html' + +set_attr url_seed_score "site4 *1000+1000 script *1000+1000" +try_order "Seed score 1000 for site4.html and script.html" \ + "words=also" \ + '4 matches' 'site4.html' 'script.html' 'site2.html' 'bad_local.htm' + +set_attr url_seed_score "site4|script *1000+1000" +try_order "Seed score 1000 for site4|script" \ + "words=also" \ + '4 matches' 'site4.html' 'script.html' 'site2.html' 'bad_local.htm' + +set_attr search_results_order "bad_local" +try_order "Search_results_order bad_local" \ + "words=also" \ + '4 matches' 'bad_local.htm' 'site4.html' 'script.html' 'site2.html' + +set_attr search_results_order "script * e2|e4" +try_order "Search_results_order * script e2|e4" \ + "words=also" \ + '4 matches' 'script.html' 'bad_local.htm' 'site4.html' 'site2.html' + +set_attr url_seed_score "" +set_attr search_results_order "" +set_attr author_factor 0 +set_attr backlink_factor 0 +set_attr caps_factor 0 # not implemented +set_attr date_factor 0 # TODO +set_attr description_factor 0 +set_attr heading_factor 0 +set_attr keywords_factor 0 +set_attr meta_description_factor 0 +set_attr multimatch_factor 0 +set_attr text_factor 0 +set_attr title_factor 0 +set_attr url_text_factor 0 # not implemented + +try_order "Search with factors 0" \ + "words=also" \ + 'No matches' + +try_order "Search for 'service' with title_factor 0" \ + "words=service" \ + 'No matches' +set_attr title_factor 1 +try_order "Search for 'service' with title_factor 1" \ + "words=service" \ + '1 matches' 'script.html' +set_attr text_factor 0.3 +try_order "Greater weight to title factor" \ + "words=service" \ + '4 matches' 'script.html' 'site4.html' 'site%201.html' 'site3.html' +set_attr title_factor -3.2 +try_order "Checking negative title factor" \ + "words=service" \ + '4 matches' 'site4.html' 'site%201.html' 'site3.html' 'script.html' +set_attr title_factor 0 +set_attr text_factor 0 + +# test with all factors 0 except the one which matches + +set_attr description_factor 1 +try_order "Search for 'crossRef' with description_factor 1" \ + "words=crossRef" \ + '1 matches' 'site%201.html' +set_attr description_factor 0 + +set_attr author_factor 1 +try_order "Search for 'media' with author_factor 1" \ + "words=media" \ + '1 matches' 'script.html' +set_attr author_factor 0 + +set_attr meta_description_factor 1 +try_order "Search for 'stars' with meta_description_factor 1" \ + "words=stars" \ + '1 matches' 'site2.html' +set_attr meta_description_factor 0 + +set_attr heading_factor 1 +try_order "Search for 'obtain' with heading_factor 1" \ + "words=obtain" \ + '1 matches' 'bad_local.htm' +set_attr heading_factor 0 + +set_attr keywords_factor 1 +try_order "Search for 'newWord' with keywords_factor 1" \ + "words=newWord" \ + '1 matches' 'title.html' +set_attr keywords_factor 0 + + +# test with all document-based factors non-zero except the one which matches +set_attr author_factor 1 +#set_attr backlink_factor 1 # not document based +set_attr caps_factor 1 +#set_attr date_factor 1 # not document based +set_attr description_factor 1 +set_attr heading_factor 1 +set_attr keywords_factor 1 +set_attr meta_description_factor 1 +set_attr multimatch_factor 1 +set_attr text_factor 1 +set_attr title_factor 1 +set_attr url_text_factor 1 +set_attr description_factor 1 + +set_attr description_factor 0 +try_order "Search for 'crossRef' with description_factor 0" \ + "words=crossRef" \ + '1 matches' 'title.html' +set_attr description_factor 1 + +set_attr author_factor 0 +try_order "Search for 'media' with author_factor 0" \ + "words=media" \ + 'No matches' +set_attr author_factor 1 + +set_attr meta_description_factor 0 +try_order "Search for 'stars' with meta_description_factor 0" \ + "words=stars" \ + 'No matches' +set_attr meta_description_factor 1 + +set_attr heading_factor 0 +try_order "Search for 'obtain' with heading_factor 0" \ + "words=obtain" \ + 'No matches' +set_attr heading_factor 1 + +set_attr keywords_factor 0 +try_order "Search for 'newWord' with keywords_factor 0" \ + "words=newWord" \ + 'No matches' +set_attr keywords_factor 1 + +# multimatch_factor gives a "boost" to searches matching multiple terms +set_attr title_factor 10 # "get" in title of bad_local +set_attr multimatch_factor 10000 +try_order "Search for 'get or interest or repay' with multimatch_factor 10000" \ + "words=get+interest+repay;method=or" \ + '2 matches' 'site4.html' 'bad_local.htm' +set_attr multimatch_factor 0 +try_order "Search for 'get or interest or repay' with multimatch_factor 0" \ + "words=get+interest+repay;method=or" \ + '2 matches' 'bad_local.htm' 'site4.html' + +# backlink counts the number of references (of any type) to this document +set_attr backlink_factor 0 +try_order "site4.html has repay+interest, site 1.html only has suggestions" \ + "words=suggestions+repay+interest;method=or" \ + '2 matches' 'site4.html' 'site%201.html' +set_attr backlink_factor 100 +try_order "site 1.html has a higher ratio of backlinks to outgoing links" \ + "words=suggestions+repay+interest;method=or" \ + '2 matches' 'site%201.html' 'site4.html' + +test_functions_action=--stop-apache +. ./test_functions diff --git a/debian/htdig/htdig-3.2.0b6/test/t_fuzzy b/debian/htdig/htdig-3.2.0b6/test/t_fuzzy new file mode 100755 index 00000000..6c28ceef --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/t_fuzzy @@ -0,0 +1,290 @@ +#!/bin/sh +# Part of the ht://Dig package <http://www.htdig.org/> +# Copyright (c) 1999-2004 The ht://Dig Group +# For copyright details, see the file COPYING in your distribution +# or the GNU Library General Public License (LGPL) version 2 or later +# <http://www.gnu.org/copyleft/lgpl.html> +# +# $Id: t_fuzzy,v 1.2 2004/05/28 13:15:30 lha Exp $ +# + +try() { + comment="$1" + shift + query="$1" + shift + $htsearch -c $config "$query" > $tmp + for pattern + do + if grep "$pattern" $tmp > /dev/null + then : + else + $htsearch -vv -c $config "$query" > /dev/null + echo "Output doesn't match \"$pattern\"" + fail "$htsearch -c $config '$query' >> $tmp -- + $comment" + fi + done +} + + + + +test_functions_action=--start-apache +. ./test_functions + +config=$testdir/conf/htdig.conf.tmp +tmp=/tmp/t_htsearch$$ + +databases="accents metaphone soundex synonym endings_root2word endings_word2root" + +# set up config file with chosen non-default values +cp $testdir/conf/htdig.conf $config +for database in $databases; do + set_attr ${database}_db "$testdir/var/htdig/test_${database}.db" + rm -f $testdir/var/htdig/test_${database}.db +done +set_attr synonym_dictionary "$testdir/synonym_dict" +set_attr endings_affix_file "$testdir/dummy.affixes" +set_attr endings_dictionary "$testdir/dummy.stems" + +# db.words.db needed by htfuzzy, so dig before generating fuzzy databases +#set_attr locale fr +set_attr extra_word_characters �� +$htdig "$@" -t -i -c $config +$htpurge -c $config + +# Check that databases can be generated +$htfuzzy -c $config accents soundex metaphone endings synonyms || fail "Error generating fuzzy database" + +# Make sure databases put in correct locations +for database in $databases; do + if [ ! -f $testdir/var/htdig/test_${database}.db ] ; then + fail "htfuzzy didn't recognise ${database}_db attribute" + fi +done + +try "Search for 'álso' without search_algorithm=accents" \ + "words=�lso;search_algorithm=exact" \ + 'No matches' + +try "Search for 'álso' with search_algorithm not in allow_in_form" \ + "words=�lso;search_algorithm=accents" \ + 'No matches' + +set_attr allow_in_form search_algorithm + +try "Search for 'álso' with search_algorithm=accents" \ + "words=�lso;search_algorithm=accents:0.1" \ + '4 matches' 'bad_local.htm' 'site2.html' 'script.html' 'site4.html' '<strong>also</strong>' + +try "Search for 'francais' without search_algorithm=accents" \ + "words=francais;search_algorithm=exact:0.1" \ + 'No matches' + +#try "Search for '\"fran ais\"' with search_algorithm=accents" \ +# "words=%22fran+ais%22;search_algorithm=accents:1" \ +# '1 matches' 'site4.html' + +try "Search for 'francais' with search_algorithm=accents" \ + "words=francais;search_algorithm=accents:0.1" \ + '1 matches' 'site4.html' + +try "Search for 'quebec' without search_algorithm=accents" \ + "words=quebec;search_algorithm=exact:0.1" \ + 'No matches' + +try "Search for 'quebec' with search_algorithm=accents" \ + "words=quebec;search_algorithm=accents:0.1" \ + '1 matches' 'site4.html' + + + + +try "Search for 'accownt' without search_algorithm=soundex" \ + "words=accownt;search_algorithm=exact:1" \ + 'No matches' + +try "Search for 'accownt' with search_algorithm=soundex" \ + "words=accownt;search_algorithm=soundex:1" \ + '2 matches' 'script.html' 'site4.html' '<strong>account</strong>' + +try "Search for 'accownt' with search_algorithm=metaphone" \ + "words=accownt;search_algorithm=metaphone:1" \ + '2 matches' 'script.html' 'site4.html' '<strong>account</strong>' + + + + +try "Search for 'zzyyxx' without search_algorithm=synonyms" \ + "words=zzyyxx;search_algorithm=exact:1" \ + 'No matches' + +try "Checking synonyms of the form word-in-doc word-in-query" \ + "words=zzyyxx;search_algorithm=synonyms:1" \ + '1 matches' 'site3.html' + +try "Search for 'xxyyzz' without search_algorithm=synonyms" \ + "words=xxyyzz;search_algorithm=exact:1" \ + 'No matches' + +try "Checking synonyms of the form word-in-query word-in-doc" \ + "words=xxyyzz;search_algorithm=synonyms:1" \ + '1 matches' 'site3.html' + +try "Search for 'xyzxyz' without search_algorithm=synonyms" \ + "words=xyzxyz;search_algorithm=exact:1" \ + 'No matches' + +try "Checking synonyms of the form common-word word-in-query word-in-doc" \ + "words=xyzxyz;search_algorithm=synonyms:1" \ + '1 matches' 'site3.html' + +try "Checking exact is used as a default if fuzzy rules yield no word" \ + "words=road;search_algorithm=synonyms:1" \ + '1 matches' 'site4.html' '<strong>Road</strong>' + + +try "Searching for 'copy' without 'endings'" \ + "words=copy;search_algorithm=exact:1" \ + 'No matches' + +try "Searching for 'copy' with 'endings'" \ + "words=copy;search_algorithm=endings:1" \ + '2 matches' 'bad_local.htm' 'site3.html' '<strong>Copyright</strong>' + + + +try "Searching for 'univers*' without 'prefix'" \ + "words=univers*;search_algorithm=exact:1" \ + 'No matches' + +try "Searching for 'univers*' with 'prefix'" \ + "words=univers*;search_algorithm=prefix:1" \ + '2 matches' 'script.html' 'site4.html' + +set_attr allow_in_form "search_algorithm max_prefix_matches min_prefix_length" +set_attr prefix_match_character "?" + +try "Searching for 'res?' with 'prefix'" \ + "words=res?;search_algorithm=prefix:1;max_prefix_matches=100" \ + '3 matches' 'script.html' 'site3.html' 'site4.html' 'reserved' 'residency' 'residents' 'resources' 'respectively' 'response' 'restricted' + +try "Searching for 'res?' with 'prefix', at most 2 prefixes" \ + "words=res?;search_algorithm=prefix:1;max_prefix_matches=2" \ + '2 matches' 'site3.html' 'site4.html' 'reserved' 'residency)' + +set_attr prefix_match_character "etc" + +try "Searching for 'resetc' with 'prefix', prefix_match_character=etc" \ + "words=resetc;search_algorithm=prefix:1;max_prefix_matches=2" \ + '2 matches' 'site3.html' 'site4.html' 'reserved' 'residency)' + +set_attr minimum_prefix_length 4 + +# Check it doesn't count the prefix_match "character" as part of the +# "minimum_prefix_length" +try "Searching for 'resetc' with 'prefix', minimum prefix length 4" \ + "words=resetc;search_algorithm=prefix:1;max_prefix_matches=4" \ + 'No matches' + +set_attr prefix_match_character "?" + +try "Searching for 'res?' with 'prefix', minimum prefix length 4" \ + "words=res?;search_algorithm=prefix:1;max_prefix_matches=100" \ + 'No matches' + + + + + + +try "Searching for 'ili' without 'substring'" \ + "words=ili;search_algorithm=exact:1" \ + 'No matches' + +try "Searching for 'ili' with 'substring'" \ + "words=ili;search_algorithm=substring:1" \ + '2 matches' 'script.html' 'site4.html' '(affiliated' 'utilised)' + +set_attr allow_in_form "search_algorithm substring_max_words" + +try "Searching for 'ili' with 'substring', substring_max_words=3" \ + "words=ili;search_algorithm=substring:1;substring_max_words=3" \ + '2 matches' 'script.html' 'site4.html' 'eligibility)' + + + +try "Searching for 'acccount' without 'speling'" \ + "words=acccount;search_algorithm=exact:1" \ + 'No matches' + +try "Searching for 'acccount' with 'speling'" \ + "words=acccount;search_algorithm=speling:1" \ + '2 matches' 'script.html' 'site4.html' '<strong>account</strong>' + +try "Searching for 'accountx' with 'speling'" \ + "words=accountx;search_algorithm=speling:1" \ + '2 matches' 'script.html' 'site4.html' '<strong>account</strong>' + + +set_attr allow_in_form "search_algorithm minimum_speling_length" + +try "Searching for 'accountx' with minimum_speling_length=9" \ + "words=accountx;search_algorithm=speling:1;minimum_speling_length=9" \ + 'No matches' + +try "Searching for 'accountx' with minimum_speling_length=8" \ + "words=accountx;search_algorithm=speling:1;minimum_speling_length=8" \ + '2 matches' 'script.html' 'site4.html' '<strong>account</strong>' + + + + + +try "Searching for '.*vers[^a].*' without 'regex'" \ + "words=.*vers[^a].*;search_algorithm=exact:1" \ + 'No matches' + +set_attr extra_word_characters ".*[^]\\$" + +try "Searching for '.*vers[^a].*' with 'regex'" \ + "words=.*vers[^a].*;search_algorithm=regex:1" \ + '3 matches' '(universities' 'versions)' + +set_attr allow_in_form "search_algorithm regex_max_words" + +try "Searching for '.*vers[^a].*' with 'regex', regex_max_words=3" \ + "words=.*vers[^a].*;search_algorithm=regex:1;regex_max_words=3" \ + '2 matches' '(universities' 'version)' + + + + +try "Searching for 'versi' without 'substring'" \ + "words=versi;search_algorithm=exact:1" \ + 'No matches' + +try "Searching for 'versi' with 'substring'" \ + "words=versi;search_algorithm=substring:1" \ + '3 matches' '(universities' 'versions)' + +set_attr allow_in_form "search_algorithm substring_max_words" + +try "Searching for 'versi' with 'substring', substring_max_words=3" \ + "words=versi;search_algorithm=substring:1;substring_max_words=3" \ + '2 matches' '(universities' 'version)' + + + + + +for database in $databases; do + rm -f $testdir/var/htdig/test_${database}.db + try "Searching width $database database missing" \ + "words=account" \ + '2 matches' +done + +test_functions_action=--stop-apache +. ./test_functions diff --git a/debian/htdig/htdig-3.2.0b6/test/t_htdb b/debian/htdig/htdig-3.2.0b6/test/t_htdb new file mode 100755 index 00000000..05b83e86 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/t_htdb @@ -0,0 +1,33 @@ +# +# Part of the ht://Dig package <http://www.htdig.org/> +# Copyright (c) 1999-2004 The ht://Dig Group +# For copyright details, see the file COPYING in your distribution +# or the GNU Library General Public License (LGPL) version 2 or later +# <http://www.gnu.org/copyleft/lgpl.html> +# +# $Id: t_htdb,v 1.4 2004/05/28 13:15:30 lha Exp $ +# + +. ./test_functions + +for compress in "" "-z" +do + rm -f test + ../htdb/htdb_load -f $srcdir/t_htdb.dump $compress test || exit 1 + ../htdb/htdb_dump -p $compress test > t_htdb.d1 || exit 1 + cmp $srcdir/t_htdb.dump t_htdb.d1 || exit 1 + ../htdb/htdb_stat $compress -d test | grep '11 Number of keys in the tree' > /dev/null || exit 1 +done + +export MIFLUZ_CONFIG ; MIFLUZ_CONFIG=${srcdir}/mifluz-search.conf + +for compress in "" "-z" +do + rm -f test test_weakcmpr + ./txt2mifluz $compress $VERBOSE < $srcdir/search.txt + ../htdb/htdb_dump -W -p $compress test > t_htdb.d1 || exit 1 + rm -f test test_weakcmpr + ../htdb/htdb_load -W -f t_htdb.d1 $compress test || exit 1 + ../htdb/htdb_dump -W -p $compress test > t_htdb.d2 || exit 1 + cmp t_htdb.d1 t_htdb.d2 || exit 1 +done diff --git a/debian/htdig/htdig-3.2.0b6/test/t_htdb.dump b/debian/htdig/htdig-3.2.0b6/test/t_htdb.dump new file mode 100755 index 00000000..e1f899c7 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/t_htdb.dump @@ -0,0 +1,28 @@ +VERSION=2 +format=print +type=btree +db_pagesize=8192 +HEADER=END + \00\00\00\00aaa0123456789012345678900123456789012345678901234567890 + 0123456789 + \01\00\00\00aaaa0123456789012345678900123456789012345678901234567890 + 0123456789 + \02\00\00\00aaaaaaaaaa0123456789012345678900123456789012345678901234567890 + 0123456789 + \03\00\00\00aaaabcd0123456789012345678900123456789012345678901234567890 + 0123456789 + \15u\00\00aaa0123456789012345678900123456789012345678901234567890 + 0123456789 + \16u\00\00aaaa0123456789012345678900123456789012345678901234567890 + 0123456789 + \17u\00\00aaaaaaaaaa0123456789012345678900123456789012345678901234567890 + 0123456789 + \18u\00\00aaaabcd0123456789012345678900123456789012345678901234567890 + 0123456789 + *\ea\00\00aaa0123456789012345678900123456789012345678901234567890 + 0123456789 + +\ea\00\00aaaa0123456789012345678900123456789012345678901234567890 + 0123456789 + ,\ea\00\00aaaaaaaaaa0123456789012345678900123456789012345678901234567890 + 0123456789 +DATA=END diff --git a/debian/htdig/htdig-3.2.0b6/test/t_htdig b/debian/htdig/htdig-3.2.0b6/test/t_htdig new file mode 100755 index 00000000..d549bf4e --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/t_htdig @@ -0,0 +1,151 @@ +# +# Part of the ht://Dig package <http://www.htdig.org/> +# Copyright (c) 1999-2004 The ht://Dig Group +# For copyright details, see the file COPYING in your distribution +# or the GNU Library General Public License (LGPL) version 2 or later +# <http://www.gnu.org/copyleft/lgpl.html> +# +# $Id: t_htdig,v 1.16 2004/06/05 06:26:22 lha Exp $ +# + +# Tests the following config attributes: +# case_sensitive +# common_url_parts +# limit_urls_to +# robotstxt_name +# url_part_aliases +# url_rewrite_rules + +flags="$@" + +try() { + config=$1 + expected=$2 + +# $htdig "$flags" -t -i -c $config # crashes on Solaris, HP-UX -- lha + $htdig $flags -t -i -c $config + # One test gives empty database -- suppress this warning. + $htpurge -c $config 2&> /dev/null + + # only used when url_part_aliases was set before the call... + set_attr url_part_aliases "bar foo" + + got=`./document -c $config -u | sort` + + if [ "$expected" != "$got" ] + then + fail "running htdig: expected +$expected +but got +$got" + + fi +} + +test_functions_action=--start-apache +. ./test_functions + +conf=$testdir/conf/htdig.conf.tmp +cp $testdir/conf/htdig.conf $conf + +# complete dig of set 1 + +try $conf \ +'http://localhost:7400/set1/ +http://localhost:7400/set1/bad_local.htm +http://localhost:7400/set1/script.html +http://localhost:7400/set1/site%201.html +http://localhost:7400/set1/site2.html +http://localhost:7400/set1/site3.html +http://localhost:7400/set1/site4.html +http://localhost:7400/set1/sub%2520dir/ +http://localhost:7400/set1/sub%2520dir/empty%20file.html +http://localhost:7400/set1/title.html' + + +# Check common_url_parts being encoded properly +set_attr common_url_parts "dummy1 dummy2 dummy3 dummy4 dummy5 dummy6 dummy7 dummy8 dummy9 dummy10 dummy11 dummy12" +got=`./document -c $config -u | sort` +expected='dummy1localhost:7400/set1/ +dummy1localhost:7400/set1/bad_localdummy7 +dummy1localhost:7400/set1/scriptdummy6 +dummy1localhost:7400/set1/site%201dummy6 +dummy1localhost:7400/set1/site2dummy6 +dummy1localhost:7400/set1/site3dummy6 +dummy1localhost:7400/set1/site4dummy6 +dummy1localhost:7400/set1/sub%2520dir/ +dummy1localhost:7400/set1/sub%2520dir/empty%20filedummy6 +dummy1localhost:7400/set1/titledummy6' +if [ "$expected" != "$got" ] +then + fail "running htdig: expected +$expected +but got +$got" +fi + + +# Pretend we are another user; robots.txt bans us from seeing 'site*' + +set_attr robotstxt_name other +# (Reverse mapping from 'foo' to 'bar' implemented in try.) +set_attr url_part_aliases "http://localhost:7400/set1 foo" + +try $conf \ +'bar/ +bar/bad_local.htm +bar/script.html +bar/sub%2520dir/ +bar/sub%2520dir/empty%20file.html +bar/title.html' + +# back to default. +set_attr url_part_aliases + +# check limit_urls_to obeys case sensitive +set_attr start_url HTTP://LocalHost:7400/Set1/ +try $conf "" + +set_attr case_sensitive false +set_attr robotstxt_name htdig +# common_url_parts is case sensitive, despite case_sensitive=false +set_attr common_url_parts "http:// http://local HTTP://LocalHost 7400/set1" +# Replace site4.html by a file:/// URL. Must explicitly add leading chars +set_attr url_rewrite_rules '(.*)si[a-z]*[4-9]*\.([a-z]*)tml file:///'$PWD'/htdocs/set1/site4.\\2tml' +set_attr limit_urls_to '${start_url} site4.html' + +try $conf \ +'file://'$PWD'/htdocs/set1/site4.html +http://localhost:7400/set1/ +http://localhost:7400/set1/bad_local.htm +http://localhost:7400/set1/script.html +http://localhost:7400/set1/site%201.html +http://localhost:7400/set1/site2.html +http://localhost:7400/set1/site3.html +http://localhost:7400/set1/sub%2520dir/ +http://localhost:7400/set1/sub%2520dir/empty%20file.html +http://localhost:7400/set1/title.html' + +# Check common_url_parts being encoded properly +set_attr common_url_parts "dummy1 dummy2 dummy3 dummy4" +got=`./document -c $config -u | sort` +expected='dummy2host:dummy4/ +dummy2host:dummy4/bad_local.htm +dummy2host:dummy4/script.html +dummy2host:dummy4/site%201.html +dummy2host:dummy4/site2.html +dummy2host:dummy4/site3.html +dummy2host:dummy4/sub%2520dir/ +dummy2host:dummy4/sub%2520dir/empty%20file.html +dummy2host:dummy4/title.html +file://'$PWD'/htdocs/set1/site4.html' +if [ "$expected" != "$got" ] +then + fail "running htdig: expected +$expected +but got +$got" +fi + +test_functions_action=--stop-apache +. ./test_functions diff --git a/debian/htdig/htdig-3.2.0b6/test/t_htdig_local b/debian/htdig/htdig-3.2.0b6/test/t_htdig_local new file mode 100755 index 00000000..8405ee07 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/t_htdig_local @@ -0,0 +1,359 @@ +# +# Part of the ht://Dig package <http://www.htdig.org/> +# Copyright (c) 1999-2004 The ht://Dig Group +# For copyright details, see the file COPYING in your distribution +# or the GNU Library General Public License (LGPL) version 2 or later +# <http://www.gnu.org/copyleft/lgpl.html> +# +# $Id: t_htdig_local,v 1.10 2004/05/28 13:15:30 lha Exp $ +# + +# Tests the following config attributes: +# bad_local_extensions +# check_unique_md5 +# content_classifier +# database_dir +# exclude_urls +# limit_normalized +# limit_urls_to +# local_extensions +# local_urls +# local_urls_only +# local_user_urls +# max_hop_count +# md5_db +# mime_types +# remove_default_doc +# server_aliases +# start_url + +test_functions_action=--start-apache +. ./test_functions + +# set up config file with chosen non-default values +config=$testdir/conf/htdig.conf.tmp +cp $testdir/conf/htdig.conf2 $config + +################################################################################ +#test for local-file-system access to <http://...> URLs + +/bin/rm -f var/htdig2/* +set_attr start_url "http://localhost:7400/set1/ http://localhost:7400/set1/title.html?site3.html http://localhost:7400/set1/title.html?site4.html" +# ban ite3.htm from query, but not from main URL. +# Allow site3.html, but not title.html?site3.html +set_attr bad_querystr ite3.htm +expected='bad_local.htm' +got=`$htdig "$@" -t -i -vv -c $config | grep "Bad local extension:" | sed -e"s-.*/--"` +if [ "$expected" != "$got" ] +then + fail "first htdig: expected +$expected +but got +$got" +fi + +expected='db.docdb +db.docs +db.docs.index +db.excerpts +db.worddump +db.words.db +db.words.db_weakcmpr' +got=`/bin/ls var/htdig2` +if [ "$expected" != "$got" ] +then + fail "created files: expected +$expected +but got +$got" +fi + +$htpurge -c $config + +# should http://localhost:7400/set1/sub%2520dir be purged? +expected='http://localhost:7400/set1/ +http://localhost:7400/set1/bad_local.htm +http://localhost:7400/set1/script.html +http://localhost:7400/set1/site%201.html +http://localhost:7400/set1/site2.html +http://localhost:7400/set1/site3.html +http://localhost:7400/set1/site4.html +http://localhost:7400/set1/sub%2520dir/ +http://localhost:7400/set1/sub%2520dir/empty%20file.html +http://localhost:7400/set1/title.html +http://localhost:7400/set1/title.html?site4.html' + +got=`./document -c $config -u | sort` + +if [ "$expected" != "$got" ] +then + fail "first document: expected +$expected +but got +$got" +fi + +set_attr bad_query_str + + +################################################################################ +# limit_urls_to applies before server alias expansion +set_attr start_url http://myhost/set1/index.html +set_attr limit_urls_to "http://myhost/set1/" +set_attr server_aliases myhost=localhost:7400 +$htdig "$@" -t -i -c $config || fail "couldn't dig second time" +$htpurge -c $config || fail "couldn't purge second time" +# only start_url uses alias, so only it passes the limit_urls_to test +expected='http://localhost:7400/set1/' + +got=`./document -c $config -u | sort` + +if [ "$expected" != "$got" ] +then + fail "second document: expected +$expected +but got +$got" +fi + + + +################################################################################ +# Check remote URLs not retrieved if local_urls_only specified +set_attr local_urls_only true +set_attr remove_default_doc site2.html +# Note: local_urls_only doesn't handle directories without a default doc +set_attr local_default_doc "site2.html empty%20file.html" +set_attr start_url http://myhost/set1/index.html +# don't care what the aliased URL is; only check the normalized one +set_attr limit_urls_to +set_attr limit_normalized "http://localhost:7400/set1/" +set_attr server_aliases myhost=localhost:7400 +$htdig "$@" -t -i -c $config || fail "couldn't dig third time" +$htpurge -c $config || fail "couldn't purge third time" +expected='http://localhost:7400/set1/ +http://localhost:7400/set1/index.html +http://localhost:7400/set1/script.html +http://localhost:7400/set1/site%201.html +http://localhost:7400/set1/site3.html +http://localhost:7400/set1/site4.html +http://localhost:7400/set1/sub%2520dir/ +http://localhost:7400/set1/title.html' + +got=`./document -c $config -u | sort` + +if [ "$expected" != "$got" ] +then + fail "third document: expected +$expected +but got +$got" +fi +set_attr remove_default_doc index.html +set_attr local_urls_only false +set_attr limit_normalized + + +################################################################################ +#test for <file:///...> URLs + +expected='' # no "bad local" extensions for file:/// +# Check only one "title.html" found... +set_attr check_unique_md5 true +set_attr start_url "http://localhost:7400/set1/title.html file://$PWD/htdocs/set1/" +set_attr limit_urls_to '${start_url}' +got=`$htdig "$@" -t -i -vv -c $config | grep "Bad local extension:" | sed -e"s-.*/--"` +if [ "$expected" != "$got" ] +then + fail "fourth htdig: expected +$expected +but got +$got" +fi + +expected='db.docdb +db.docs +db.docs.index +db.excerpts +db.md5hash.db +db.worddump +db.words.db +db.words.db_weakcmpr' +got=`/bin/ls var/htdig2` +if [ "$expected" != "$got" ] +then + fail "fourth created files: expected +$expected +but got +$got" +fi + +$htpurge -c $config || fail "couldn't purge fourth time" + +expected='file:///set1/bad_local.htm +file:///set1/index.html +file:///set1/script.html +file:///set1/site%201.html +file:///set1/site2.html +file:///set1/site3.html +file:///set1/site4.html +file:///set1/sub%2520dir/empty%20file.html +/title.html' + +got=`./document -c $config -u | sed "s#${PWD}/htdocs##" | sort | sed "s#.*/title.html#/title.html#"` + +if [ "$expected" != "$got" ] +then + fail "fourth document: expected +$expected +but got +$got" +fi + + +################################################################################ +#test mime types handling + +expected='' # no "bad local" extensions for file:/// +set_attr max_hop_count 1 # removes "empty%20file.html" +set_attr exclude_urls "site4.html script.html site[3].html" +set_attr bad_extensions .foo +set_attr local_urls_only false + +rm -f var/htdig2/db.md5hash.db +set_attr md5_db '${database_base}.md5.db' + +set_attr mime_types $PWD/mime-without-htm +set_attr content_classifier $PWD/say-text +echo 'text/html html' > mime-without-htm +echo '#!/bin/sh + echo text/plain' > say-text +chmod 700 say-text +got=`$htdig "$@" -t -i -vv -c $config | grep "MIME type:" | sed -e"s-.*/--"` +if [ "$expected" != "$got" ] +then + fail "fifth htdig: expected +$expected +but got +$got" +fi + +expected='db.docdb +db.docs +db.docs.index +db.excerpts +db.md5.db +db.worddump +db.words.db +db.words.db_weakcmpr' +got=`/bin/ls var/htdig2` +if [ "$expected" != "$got" ] +then + fail "fifth created files: expected +$expected +but got +$got" +fi + +$htpurge -c $config || fail "couldn't purge fifth time" + +expected='file:///set1/bad_local.htm +file:///set1/index.html +file:///set1/nph-location.cgi +file:///set1/site%201.html +file:///set1/site2.html +file:///set1/site3.html +file:///set1/title.html' + +got=`./document -c $config -u | sed "s#${PWD}/htdocs##" | sort` + +if [ "$expected" != "$got" ] +then + fail "fifth document: expected +$expected +but got +$got" +fi + +################################################################################ +expected='' # no "bad local" extensions for file:/// +set_attr max_hop_count # removes "empty%20file.html" +set_attr exclude_urls /CVS/ +set_attr valid_extensions ".foo .html" +set_attr bad_extensions + +set_attr mime_types $PWD/mime-without-htm +set_attr content_classifier $PWD/say-text +echo 'text/html html' > mime-without-htm +echo '#!/bin/sh + echo text/plain' > say-text +chmod 700 say-text +got=`$htdig "$@" -t -i -vv -c $config | grep "MIME type:" | sed -e"s-.*/--"` +if [ "$expected" != "$got" ] +then + fail "sixth htdig: expected +$expected +but got +$got" +fi + +$htpurge -c $config || fail "couldn't purge sixth time" + +expected='file:///set1/index.html +file:///set1/nph-location.foo +file:///set1/script.html +file:///set1/site%201.html +file:///set1/site2.html +file:///set1/site3.html +file:///set1/site4.html +file:///set1/sub%2520dir/empty%20file.html +file:///set1/title.html' + +got=`./document -c $config -u | sed "s#${PWD}/htdocs##" | sort` + +if [ "$expected" != "$got" ] +then + fail "sixth document: expected +$expected +but got +$got" +fi + + +################################################################################ +set_attr local_urls_only +set_attr local_urls "http://somewhere/=$PWD/htdocs/" +set_attr local_user_urls "http://somewhere/=$PWD/,/set1/" +set_attr start_url "http://somewhere/~htdocs/" + +set_attr valid_extensions +set_attr local_default_doc index.html +set_attr remove_default_doc index.html + +$htdig "$@" -t -i -c $config || fail "couldn't dig seventh time" +$htpurge -c $config || fail "couldn't purge seventh time" + +#local_urls_only can't handle .../~htdocs/sub%2520dir/empty%20file.html +expected='http://somewhere/~htdocs/ +http://somewhere/~htdocs/script.html +http://somewhere/~htdocs/site%201.html +http://somewhere/~htdocs/site2.html +http://somewhere/~htdocs/site3.html +http://somewhere/~htdocs/site4.html +http://somewhere/~htdocs/title.html' + +got=`./document -c $config -u | sort` + +if [ "$expected" != "$got" ] +then + fail "seventh document: expected +$expected +but got +$got" +fi + + +/bin/rm mime-without-htm say-text + +test_functions_action=--stop-apache +. ./test_functions diff --git a/debian/htdig/htdig-3.2.0b6/test/t_htmerge b/debian/htdig/htdig-3.2.0b6/test/t_htmerge new file mode 100755 index 00000000..11b56e76 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/t_htmerge @@ -0,0 +1,16 @@ +# +# Part of the ht://Dig package <http://www.htdig.org/> +# Copyright (c) 1999-2004 The ht://Dig Group +# For copyright details, see the file COPYING in your distribution +# or the GNU Library General Public License (LGPL) version 2 or later +# <http://www.gnu.org/copyleft/lgpl.html> +# +# $Id: t_htmerge,v 1.7 2004/05/28 13:15:30 lha Exp $ +# + +. ./test_functions + +$htdig "$@" -t -i -c $testdir/conf/htdig.conf +$htdig "$@" -t -i -c $testdir/conf/htdig.conf2 + +$htmerge "$@" -c $testdir/conf/htdig.conf -m $testdir/conf/htdig.conf2 >/dev/null 2>/dev/null diff --git a/debian/htdig/htdig-3.2.0b6/test/t_htnet b/debian/htdig/htdig-3.2.0b6/test/t_htnet new file mode 100755 index 00000000..80ed0861 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/t_htnet @@ -0,0 +1,25 @@ +# +# Part of the ht://Dig package <http://www.htdig.org/> +# Copyright (c) 1999-2004 The ht://Dig Group +# For copyright details, see the file COPYING in your distribution +# or the GNU Library General Public License (LGPL) version 2 or later +# <http://www.gnu.org/copyleft/lgpl.html> +# +# $Id: t_htnet,v 1.9 2004/05/28 13:15:30 lha Exp $ +# + +test_functions_action=--start-apache +. ./test_functions + +verbose=$1 + +./testnet -U http://localhost:7400/set3/nph-hang.cgi $verbose +if test $? = 1 +then + exit 0 +else + fail "Could not fetch URL" +fi + +test-functions_action=--stop-apache +. ./test-functions diff --git a/debian/htdig/htdig-3.2.0b6/test/t_htsearch b/debian/htdig/htdig-3.2.0b6/test/t_htsearch new file mode 100755 index 00000000..9041da75 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/t_htsearch @@ -0,0 +1,164 @@ +# +# Part of the ht://Dig package <http://www.htdig.org/> +# Copyright (c) 1999-2004 The ht://Dig Group +# For copyright details, see the file COPYING in your distribution +# or the GNU Library General Public License (LGPL) version 2 or later +# <http://www.gnu.org/copyleft/lgpl.html> +# +# $Id: t_htsearch,v 1.14 2004/05/28 13:15:30 lha Exp $ +# + +test_functions_action=--start-apache +. ./test_functions + +config=$testdir/conf/htdig.conf +tmp=/tmp/t_htsearch$$ + +$htdig "$@" -t -i -c $config +$htpurge -c $config + +test_functions_action=--stop-apache +. ./test_functions + +try() { + comment="$1" + shift + query="$1" + shift + $htsearch -c $config "$query" > $tmp + for pattern + do + if grep "$pattern" $tmp > /dev/null + then : + else + $htsearch -v -c $config "$query" > /dev/null + echo "Output doesn't match \"$pattern\"" + fail "$htsearch -c $config '$query' >> $tmp -- + $comment" + fi + done +} + +try "Simple search for 'also'" \ + "words=also" \ + '4 matches' 'bad_local.htm' 'site2.html' 'script.html' 'site4.html' + +try "Implicit and search with two words 'also movies'" \ + "method=and&words=also+movies" \ + '1 match' 'site2.html' + +try "Explicit and search with two words 'also movies'" \ + "method=boolean&words=also+and+movies" \ + '1 match' 'site2.html' + +try "Implicit or search for 'also distribution'" \ + "method=or&words=also+distribution" \ + '5 matches' 'bad_local.htm' 'site2.html' 'script.html' 'site4.html' 'site%201.html' + +try "Explicit or search for 'also distribution'" \ + "method=boolean&words=also+or+distribution" \ + '5 matches' 'bad_local.htm' 'site2.html' 'script.html' 'site4.html' 'site%201.html' + +try "Boolean and/or mixed 'also or distribution and ltd'" \ + "method=boolean&words=also+or+distribution+and+ltd" \ + '5 matches' 'bad_local.htm' 'site2.html' 'script.html' 'site4.html' 'site%201.html' + +try "Boolean explicit priority '(also or distribution) and ltd'" \ + "method=boolean&words=(also+or+distribution)+and+ltd" \ + '2 matches' 'script.html' 'site%201.html' + +try "Wildcard search for '*'" \ + "method=and&words=*" \ + '10 matches' 'set1/bad_local.htm' 'set1/title.html' 'set1/site2.html' 'set1/script.html' 'set1/"' 'set1/site3.html' 'set1/site4.html' 'set1/site%201.html' 'set1/sub%2520dir/' 'set1/sub%2520dir/empty%20file.html' + +try "Phrase search for '\"who offer\" and \"loans to graduate\"'" \ + "method=boolean&words=%22who+offer%22+and+%22loans+to+graduate%22" \ + '1 match' 'site4.html' + +try "Phrase search for 'may be deferred' -- stop words at start" \ + "method=boolean&words=%22may+be+deferred%22" \ + '1 match' 'site4.html' '<strong>may be deferred</strong>' + +try "Phrase search for 'Repayment may be' -- stop words at end" \ + "method=boolean&words=%22Repayment+may+be%22" \ + '1 match' 'site4.html' '<strong>Repayment may be</strong>' + +# This test fails +# +#try "Phrase search for 'may be' -- all stop words" \ +# "method=boolean&words=%22may+be%22" \ +# '1 match' 'site4.html' + +try "Phrase search for 'Repayment may be deferred' -- stop words in middle" \ + "method=boolean&words=%22Repayment+may+be+deferred%22" \ + '1 match' 'site4.html' '<strong>Repayment may be deferred</strong>' + +try "Phrase search for 'Repayment deferred' -- phrase interrupted by stop words" \ + "method=boolean&words=%22Repayment+deferred%22" \ + 'No match' + +try "Syntax error: finishing with 'and'" \ + "method=boolean&words=also+and" \ + 'Expected a search word, a quoted phrase or a boolean expression between () at the end' + +try "Syntax error: excess open brackets" \ + "method=boolean&words=(also+or+distribution" \ + "Expected ')' at the end" + +try "Syntax error: excess close brackets" \ + "method=boolean&words=also+or+distribution)" \ + "Expected end of expression instead of ')'" + +try "Syntax error: missing end quote" \ + "method=boolean&words=also+or+%22distribution" \ + 'Expected quotes at the end' + +try "Unrestricted search for 'group'" \ + "method=and&words=group" \ + '4 matches' 'script.html' 'bad_local.htm' 'site3.html' 'site4.html' + +try "Field-restricted search for 'author:group'" \ + "method=and&words=author:group" \ + '1 match' 'script.html' + +try "Field-restricted search for 'text:group'" \ + "method=and&words=text:group" \ + '3 matches' 'bad_local.htm' 'site3.html' 'site4.html' + +try "Checking prefix parsing using 'text: group'" \ + "method=and&words=text:%20group" \ + '1 match' 'script.html' + +try "Checking prefix parsing using 'text::group'" \ + "method=and&words=text::group" \ + '1 match' 'script.html' + +try "Checking prefix parsing using 'unknown:group'" \ + "method=any&words=unknown:group" \ + '5 matches' 'script.html' 'bad_local.htm' 'site3.html' 'site4.html' 'set1/"' + +try "Field-restricted search for 'descr:cost'" \ + "method=and&words=descr:cost" \ + '1 match' 'script.html' + +config=$testdir/conf/htdig.conf3 + +try "Testing boolean_keywords and search_rewrite_urls" \ + "method=boolean&words=also+ou+distribution+et+ltd" \ + '5 matches' 'bad_local.htm' 'place2.html' 'script.html' 'place4.html' 'place%201.html' + +try "Testing boolean_syntax_errors" \ + "method=boolean&words=ou+distribution" \ + "Attendait un mot au lieu de '|' ou 'ou'" + +try "Testing htdig's noindex_start/end overlapping" \ + "words=considered" \ + '1 match' 'script.html' + +try "Testing htdig's noindex_start/end nested" \ + "words=neglected" \ + 'No match' + +rm -f $tmp + +exit 0 diff --git a/debian/htdig/htdig-3.2.0b6/test/t_parsing b/debian/htdig/htdig-3.2.0b6/test/t_parsing new file mode 100755 index 00000000..d11a7068 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/t_parsing @@ -0,0 +1,189 @@ +# +# Part of the ht://Dig package <http://www.htdig.org/> +# Copyright (c) 1999-2004 The ht://Dig Group +# For copyright details, see the file COPYING in your distribution +# or the GNU Library General Public License (LGPL) version 2 or later +# <http://www.gnu.org/copyleft/lgpl.html> +# +# $Id: t_parsing,v 1.4 2004/05/28 13:15:30 lha Exp $ +# + + +# Tests (or should eventually test) the following config attributes: +# description_meta_tag_names +# ignore_alt_text +# max_doc_size +# max_keywords +# max_meta_description_length +# max_description_length +# max_descriptions +# max_head_length +# noindex_end +# noindex_start +# external_parsers +# external_protocols +# use_meta_description + + +test_functions_action=--start-apache +. ./test_functions + +config=$testdir/conf/htdig.conf.tmp +tmp=/tmp/t_htsearch$$ + +# set up config file with chosen non-default values +cp $testdir/conf/htdig.conf $config + +try() { + comment="$1" + shift + query="$1" + shift + $htsearch -c $config "$query" > $tmp + for pattern + do + if grep "$pattern" $tmp > /dev/null + then : + else + $htsearch -v -c $config "$query" > /dev/null + echo "Output doesn't match \"$pattern\"" + fail "$htsearch -c $config '$query' >> $tmp -- + $comment" + fi + done +} + + +# Tests (or should eventually test) the following config attributes: +# description_meta_tag_names +# ignore_alt_text +# max_doc_size +# max_keywords +# max_meta_description_length +# max_description_length (May put in t_templates) +# max_descriptions (May put in t_templates) +# max_head_length +# noindex_end +# noindex_start +# external_parsers (TODO) +# external_protocols +# use_meta_description + +$htdig "$@" -t -i -c $config || fail "Couldn't do first dig" +$htpurge -c $config || fail "Couldn't do first purge" + +try "Search for alt text 'earth'" \ + "words=earth" \ + '1 matches' 'site3.html' + +try "'claims and collections', unlimited doc size" \ + "words=%22claims+and+collections%22" \ + '1 matches' 'site4.html' + +try "Search for keyword 'martial', default max_keywords" \ + "words=martial" \ + '1 matches' 'site2.html' + +try "Search for 'service', default noindex_start/end" \ + "words=technical" \ + '1 matches' 'site%201.html' + +set_attr use_meta_description true +try "Search for 'call handling' with default max_meta_description_length" \ + "words=%22call+handling%22" \ + '1 matches' 'script.html' 'call handling.*signalling' + +set_attr ignore_alt_text true +set_attr max_doc_size 15112 +set_attr max_keywords 5 +set_attr noindex_start "'Software Distribution'" +set_attr noindex_end "'Contact Information'" +set_attr max_meta_description_length 80 +set_attr description_meta_tag_names "description generator" +set_attr max_head_length 30 + +$htdig "$@" -t -i -c $config || fail "Couldn't do second dig" +$htpurge -c $config || fail "Couldn't do second purge" + +try "Search for alt text 'earth' with ignore_alt_text=true" \ + "words=earth" \ + 'No matches' + +try "'claims and collections', max_doc_size 15112" \ + "words=%22claims+and+collections%22" \ + '1 matches' 'site4.html' + +# (Martial is 6th keyword listed in site 2, but "Fu" is too short and omitted.) +try "Search for keyword 'martial', max_keywords = 5" \ + "words=martial" \ + 'No matches' + +# Only occurrence of "technical" is between noindex_start and _end in site 1 +try "Search for 'technical', noindex_start=Software Distribution, noindex_end=Contact Information" \ + "words=technical" \ + 'No matches' + +# Visitor occurs after noindex_end +try "Search for 'visitor', noindex_start=Software Distribution, noindex_end=Contact Information" \ + "words=visitor" \ + '2 matches' 'site%201.html' 'site3.html' + +# Displaying meta description instead of excerpt, check it is truncated +try "Search for 'call handling' with max_meta_description_length=80" \ + "words=%22call+handling%22" \ + '1 matches' 'script.html' 'means of<br>' + +# Check <meta name="generator"...> counts as a description +try "Search for 'category', description_meta_tag_names includes 'generator'" \ + "words=category" \ + '1 matches' 'site3.html' 'FrontPage' + +# Check that only specified number of bytes of header is stored. +# Header size is rounded up to contain the whole of the last word. +try "Search for 'also', max_head_length=30" \ + "words=also" \ + '4 matches' 'bad_local.htm' 'site2.html' 'script.html' 'site4.html' \ + 'WHERE.*Copyright<br>' + +set_attr max_doc_size 15042 +set_attr max_keywords 6 +set_attr noindex_start "'software distribution'" +set_attr noindex_end "'contact information'" + +$htdig "$@" -t -i -c $config || fail "Couldn't do third dig" +$htpurge -c $config || fail "Couldn't do third purge" + +try "Search for keyword 'martial', max_keywords = 6" \ + "words=martial" \ + '1 matches' 'site2.html' + +try "'claims and collections', max_doc_size 15042" \ + "words=%22claims+and+collections%22" \ + 'No matches' + +# Check noindex_start/end are case-insensitive +try "Search for 'technical', noindex_start=software distribution, noindex_end=contact information" \ + "words=technical" \ + 'No matches' + +PROTOCOL=my-protocol +echo '#!/bin/sh + echo "s 200" + echo "t text/html" + echo + echo "<html>$2</html>"' > $PROTOCOL +chmod 755 $PROTOCOL +set_attr external_protocols "echo: $PWD/$PROTOCOL" +set_attr start_url "echo:foo.html" +$htdig "$@" -t -i -c $config || fail "Couldn't do fourth dig" +try "trying external protocol echo" \ + "words=foo" \ + "1 matches" "echo:foo.html" + + +test_functions_action=--stop-apache +. ./test_functions + +rm -f $tmp $PROTOCOL + +exit 0 diff --git a/debian/htdig/htdig-3.2.0b6/test/t_rdonly b/debian/htdig/htdig-3.2.0b6/test/t_rdonly new file mode 100755 index 00000000..c6a3239b --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/t_rdonly @@ -0,0 +1,32 @@ +# +# Part of the ht://Dig package <http://www.htdig.org/> +# Copyright (c) 1999-2004 The ht://Dig Group +# For copyright details, see the file COPYING in your distribution +# or the GNU Library General Public License (LGPL) version 2 or later +# <http://www.gnu.org/copyleft/lgpl.html> +# +# $Id: t_rdonly,v 1.4 2004/05/28 13:15:30 lha Exp $ +# + +. ./test_functions + +export MIFLUZ_CONFIG ; MIFLUZ_CONFIG=${srcdir}/mifluz-search.conf +./txt2mifluz -z $VERBOSE < $srcdir/search.txt +chmod a-w test test_weakcmpr +a=`./dbbench -Wz -f world` +b="world <DEF> 0 0 5 2 +world <DEF> 0 0 5 4 +world <DEF> 0 0 11 1 +world <DEF> 0 0 20 5 +world <DEF> 0 0 21 2 +world <DEF> 0 0 51 2 +world <DEF> 0 0 71 2 +world <DEF> 0 0 81 2 " +if [ "$a" != "$b" ] +then + echo "expected +$b +but got +$a" >&2 + exit 1 +fi diff --git a/debian/htdig/htdig-3.2.0b6/test/t_search b/debian/htdig/htdig-3.2.0b6/test/t_search new file mode 100755 index 00000000..bef6a7fe --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/t_search @@ -0,0 +1,519 @@ +# +# Part of the ht://Dig package <http://www.htdig.org/> +# Copyright (c) 1999-2004 The ht://Dig Group +# For copyright details, see the file COPYING in your distribution +# or the GNU Library General Public License (LGPL) version 2 or later +# <http://www.gnu.org/copyleft/lgpl.html> +# +# $Id: t_search,v 1.4 2004/05/28 13:15:30 lha Exp $ +# + +. ./test_functions + +export MIFLUZ_CONFIG ; MIFLUZ_CONFIG=${srcdir}/mifluz-search.conf + +# +# Test the query parser +# + +# +# Run $1 and expect $2 as a result +# +runparser() { + command="$1" + expected="$2" + out=`eval "$command"` + if [ "$expected" != "$out" ] + then + echo "running $command: expected +$expected +but got +$out" + exit 1 + fi +} + +# +# Simple test +# +runparser "./search -n -f '( and scope1 the world )' $VERBOSE" \ +'( and "scope1" the world )' + +# +# All boolean constructions +# +runparser "./search -n -f '( and scope1 ( not scope2 the ) world ( or scope3 is coming to ( near scope4 an ( literal scope5 end ) ) ) )' $VERBOSE" \ +'( and "scope1" ( not "scope2" the ) world ( or "scope3" is coming to ( near "scope4" an ( literal "scope5" end ) ) ) )' + +# +# Mandatory and Forbiden nodes +# +runparser "./search -n -f '( or scope1 ( mandatory scope2 the ) world ( forbiden scope3 is ) )' $VERBOSE" \ +'( or "scope1" ( mandatory "scope2" the ) world ( not "scope3" is ) )' + +# +# Test the WordExclude* classes +# +./search -e || exit 1 + +# +# Run queries with various operators on an index built from the content +# of search.txt. +# + +./txt2mifluz $VERBOSE < $srcdir/search.txt + +# +# Run $1 and expect $2 as a result (all lines starting with match:) +# Feed the context variable with output starting with context:, stripping +# context: itself. +# +runsearch() { + command="$1" + expected="$2" + if [ "$VERBOSE" ] + then + echo "running $command" >&2 + fi + out=`eval "$command"` + match=`echo "$out" | grep '^match:'` + context=`echo "$out" | sed -n -e 's/^context: *//p'` +# echo "context: $context" >&2 + if [ "$expected" != "$match" ] + then + echo "running $command: expected +$expected +but got +$match" + exit 1 + fi +} + + +# +# Test context restoration on WordTreeLiteral +# +runsearch "./search -c 1 -f 'lazy' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 1 11 <UNDEF> ' + +runsearch "./search -c 1 -C '$context' -f 'lazy' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 1 21 <UNDEF> ' + +# +# Literal search using scope : only want documents with Flags set to 5 +# Be carefull to use tabulation in scope. +# +runsearch "./search -f '( literal \"<UNDEF> <UNDEF> <UNDEF> 5 <UNDEF> <UNDEF>\" lazy )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 5 9 <UNDEF> +match: <UNDEF> <DEF> 0 5 21 <UNDEF> +match: <UNDEF> <DEF> 0 5 53 <UNDEF> +match: <UNDEF> <DEF> 0 5 56 <UNDEF> ' + +# +# And search using scope : only want documents with Flags set to 5 +# Be carefull to use tabulation in scope. +# +runsearch "./search -f '( and \"\" ( literal \"<UNDEF> <UNDEF> <UNDEF> 5 <UNDEF> <UNDEF>\" lazy ) dog )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 5 21 <UNDEF> (dog lazy ) +match: <UNDEF> <DEF> 0 5 53 <UNDEF> (dog lazy ) +match: <UNDEF> <DEF> 0 5 56 <UNDEF> (dog lazy )' + +# +# And/Not : document 20 is excluded because it contains 'an' +# +runsearch "./search -f '( and \"\" world ( not \"\" an ) the )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 0 5 <UNDEF> (world the ) +match: <UNDEF> <DEF> 0 0 21 <UNDEF> (world the ) +match: <UNDEF> <DEF> 0 0 51 <UNDEF> (world the ) +match: <UNDEF> <DEF> 0 0 71 <UNDEF> (world the ) +match: <UNDEF> <DEF> 0 0 81 <UNDEF> (world the )' + +# +# Or/Not : document 20 is excluded because it contains 'an' +# +runsearch "./search -f '( or \"\" world ( not \"\" an ) the )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 0 3 <UNDEF> (the ) +match: <UNDEF> <DEF> 0 0 5 <UNDEF> (world the ) +match: <UNDEF> <DEF> 0 0 11 <UNDEF> (world ) +match: <UNDEF> <DEF> 0 0 21 <UNDEF> (world the ) +match: <UNDEF> <DEF> 0 0 51 <UNDEF> (world the ) +match: <UNDEF> <DEF> 0 0 71 <UNDEF> (world the ) +match: <UNDEF> <DEF> 0 0 81 <UNDEF> (world the )' + +# +# Or : each word matches only once in separate documents +# +runsearch "./search -c 2 -f '( or \"\" comes end )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 0 6 <UNDEF> (comes ) +match: <UNDEF> <DEF> 0 0 20 <UNDEF> (end )' + +# +# Or : each word matches only once in separate documents +# docid 20 contains 'the' and 'end', therefore first +# docid 6 contains 'comes', is second before any document +# containing 'the' alone because 'comes' is less frequent than 'the' +# other docid only contain 'the'. +# +runsearch "./search -c 8 -f '( or \"\" the comes end )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 0 3 <UNDEF> (the ) +match: <UNDEF> <DEF> 0 0 5 <UNDEF> (the ) +match: <UNDEF> <DEF> 0 0 6 <UNDEF> (comes ) +match: <UNDEF> <DEF> 0 0 20 <UNDEF> (the end ) +match: <UNDEF> <DEF> 0 0 21 <UNDEF> (the ) +match: <UNDEF> <DEF> 0 0 51 <UNDEF> (the ) +match: <UNDEF> <DEF> 0 0 71 <UNDEF> (the ) +match: <UNDEF> <DEF> 0 0 81 <UNDEF> (the )' + +# +# Run the same search in 3 times using context to resume search +# +runsearch "./search -c 2 -f '( or \"\" the comes end )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 0 3 <UNDEF> (the ) +match: <UNDEF> <DEF> 0 0 5 <UNDEF> (the )' + +runsearch "./search -c 2 -C '$context' -f '( or \"\" the comes end )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 0 6 <UNDEF> (comes ) +match: <UNDEF> <DEF> 0 0 20 <UNDEF> (the end )' + +runsearch "./search -c 5 -C '$context' -f '( or \"\" the comes end )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 0 21 <UNDEF> (the ) +match: <UNDEF> <DEF> 0 0 51 <UNDEF> (the ) +match: <UNDEF> <DEF> 0 0 71 <UNDEF> (the ) +match: <UNDEF> <DEF> 0 0 81 <UNDEF> (the )' + +# +# After a search that stopped because there was not matches left, there +# must be no context for resuming. +# +if test "$context" != "" +then + echo "Expected empty context after fulfilled search" + exit 1 +fi + +# +# Or search with word not in database (klklk) +# +runsearch "./search -f '( or \"\" the klkl )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 0 3 <UNDEF> (the ) +match: <UNDEF> <DEF> 0 0 5 <UNDEF> (the ) +match: <UNDEF> <DEF> 0 0 20 <UNDEF> (the ) +match: <UNDEF> <DEF> 0 0 21 <UNDEF> (the ) +match: <UNDEF> <DEF> 0 0 51 <UNDEF> (the ) +match: <UNDEF> <DEF> 0 0 71 <UNDEF> (the ) +match: <UNDEF> <DEF> 0 0 81 <UNDEF> (the )' + +# +# Compound boolean query: nested 'and' +# +runsearch "./search -f '( and \"\" the ( and \"\" an end ) )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 0 20 <UNDEF> (an end the )' + +# +# Compound boolean query: nested 'and' that fails immediately +# because 'foo' is not in the inverted index. +# +runsearch "./search -f '( and \"\" the ( and \"\" an foo ) )' $VERBOSE" \ +'match: none' + +# +# Compound boolean query: 'or' & 'and' +# +runsearch "./search -f '( and \"\" the ( or \"\" comes end ) )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 0 20 <UNDEF> (end the )' + +runsearch "./search -f '( or \"\" comes ( and \"\" the world ) )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 0 5 <UNDEF> (world the ) +match: <UNDEF> <DEF> 0 0 6 <UNDEF> (comes ) +match: <UNDEF> <DEF> 0 0 20 <UNDEF> (world the ) +match: <UNDEF> <DEF> 0 0 21 <UNDEF> (world the ) +match: <UNDEF> <DEF> 0 0 51 <UNDEF> (world the ) +match: <UNDEF> <DEF> 0 0 71 <UNDEF> (world the ) +match: <UNDEF> <DEF> 0 0 81 <UNDEF> (world the )' + +runsearch "./search -P 1 -f '( or \"\" comes ( near \"\" lazy dog ) )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 0 6 <UNDEF> (comes ) +match: <UNDEF> <DEF> 0 1 11 <UNDEF> (lazy dog proximity ) +match: <UNDEF> <DEF> 0 1 21 <UNDEF> (lazy dog proximity ) +match: <UNDEF> <DEF> 0 5 21 <UNDEF> (lazy dog proximity ) +match: <UNDEF> <DEF> 0 5 56 <UNDEF> (lazy dog proximity )' + +# +# Compound boolean query: limit to 2 documents +# +runsearch "./search -f '( or \"\" comes ( or \"\" the world ) )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 0 3 <UNDEF> (the ) +match: <UNDEF> <DEF> 0 0 5 <UNDEF> (the world ) +match: <UNDEF> <DEF> 0 0 6 <UNDEF> (comes ) +match: <UNDEF> <DEF> 0 0 11 <UNDEF> (world ) +match: <UNDEF> <DEF> 0 0 20 <UNDEF> (the world ) +match: <UNDEF> <DEF> 0 0 21 <UNDEF> (the world ) +match: <UNDEF> <DEF> 0 0 51 <UNDEF> (the world ) +match: <UNDEF> <DEF> 0 0 71 <UNDEF> (the world ) +match: <UNDEF> <DEF> 0 0 81 <UNDEF> (the world )' + +runsearch "./search -c 1 -f '( or \"\" comes ( or \"\" the world ) )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 0 3 <UNDEF> (the )' + +runsearch "./search -c 4 -C '$context' -f '( or \"\" comes ( or \"\" the world ) )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 0 5 <UNDEF> (the world ) +match: <UNDEF> <DEF> 0 0 6 <UNDEF> (comes ) +match: <UNDEF> <DEF> 0 0 11 <UNDEF> (world ) +match: <UNDEF> <DEF> 0 0 20 <UNDEF> (the world )' + + +# +# Compound boolean query: nested 'optional' +# +runsearch "./search -f '( optional \"\" the ( optional \"\" world foo ) )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 0 5 <UNDEF> (world the proximity) +match: <UNDEF> <DEF> 0 0 20 <UNDEF> (world the ) +match: <UNDEF> <DEF> 0 0 21 <UNDEF> (world the ) +match: <UNDEF> <DEF> 0 0 51 <UNDEF> (world the ) +match: <UNDEF> <DEF> 0 0 71 <UNDEF> (world the ) +match: <UNDEF> <DEF> 0 0 81 <UNDEF> (world the ) +match: <UNDEF> <DEF> 0 0 11 <UNDEF> (world ) +match: <UNDEF> <DEF> 0 0 3 <UNDEF> (the )' + +# +# +# Most simple search : single word +# +runsearch "./search -f 'the' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 0 3 <UNDEF> +match: <UNDEF> <DEF> 0 0 5 <UNDEF> +match: <UNDEF> <DEF> 0 0 20 <UNDEF> +match: <UNDEF> <DEF> 0 0 21 <UNDEF> +match: <UNDEF> <DEF> 0 0 51 <UNDEF> +match: <UNDEF> <DEF> 0 0 71 <UNDEF> +match: <UNDEF> <DEF> 0 0 81 <UNDEF> ' + +# +# Get all we can +# +runsearch "./search -f '( and \"\" the world )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 0 5 <UNDEF> (world the ) +match: <UNDEF> <DEF> 0 0 20 <UNDEF> (world the ) +match: <UNDEF> <DEF> 0 0 21 <UNDEF> (world the ) +match: <UNDEF> <DEF> 0 0 51 <UNDEF> (world the ) +match: <UNDEF> <DEF> 0 0 71 <UNDEF> (world the ) +match: <UNDEF> <DEF> 0 0 81 <UNDEF> (world the )' + +# +# First two +# +runsearch "./search -c 2 -f '( and \"\" the world )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 0 5 <UNDEF> (world the ) +match: <UNDEF> <DEF> 0 0 20 <UNDEF> (world the )' + +# +# The next two +# +runsearch "./search -b 2 -c 2 -f '( and \"\" the world )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 0 21 <UNDEF> (world the ) +match: <UNDEF> <DEF> 0 0 51 <UNDEF> (world the )' + +# +# First four +# +runsearch "./search -c 4 -f '( and \"\" the world )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 0 5 <UNDEF> (world the ) +match: <UNDEF> <DEF> 0 0 20 <UNDEF> (world the ) +match: <UNDEF> <DEF> 0 0 21 <UNDEF> (world the ) +match: <UNDEF> <DEF> 0 0 51 <UNDEF> (world the )' + +# +# Next document, using last document returned +# +runsearch "./search -c 1 -C '$context' -f '( and \"\" the world )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 0 71 <UNDEF> (world the )' + +# +# Implicit or : each word matches only once in separate documents +# +runsearch "./search -c 2 -f '( optional \"\" comes end )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 0 6 <UNDEF> (comes ) +match: <UNDEF> <DEF> 0 0 20 <UNDEF> (end )' + +# +# Implicit or : each word matches only once in separate documents +# docid 20 contains 'the' and 'end', therefore first +# docid 6 contains 'comes', is second before any document +# containing 'the' alone because 'comes' is less frequent than 'the' +# other docid only contain 'the'. +# +runsearch "./search -c 8 -f '( optional \"\" the comes end )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 0 20 <UNDEF> (end the proximity) +match: <UNDEF> <DEF> 0 0 6 <UNDEF> (comes ) +match: <UNDEF> <DEF> 0 0 3 <UNDEF> (the ) +match: <UNDEF> <DEF> 0 0 5 <UNDEF> (the ) +match: <UNDEF> <DEF> 0 0 21 <UNDEF> (the ) +match: <UNDEF> <DEF> 0 0 51 <UNDEF> (the ) +match: <UNDEF> <DEF> 0 0 71 <UNDEF> (the ) +match: <UNDEF> <DEF> 0 0 81 <UNDEF> (the )' + +# +# Run the same search in 3 times using context to resume search +# +runsearch "./search -c 2 -f '( optional \"\" the comes end )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 0 20 <UNDEF> (end the proximity) +match: <UNDEF> <DEF> 0 0 6 <UNDEF> (comes )' + +runsearch "./search -c 2 -C '$context' -f '( optional \"\" the comes end )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 0 3 <UNDEF> (the ) +match: <UNDEF> <DEF> 0 0 5 <UNDEF> (the )' + +runsearch "./search -c 5 -C '$context' -f '( optional \"\" the comes end )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 0 21 <UNDEF> (the ) +match: <UNDEF> <DEF> 0 0 51 <UNDEF> (the ) +match: <UNDEF> <DEF> 0 0 71 <UNDEF> (the ) +match: <UNDEF> <DEF> 0 0 81 <UNDEF> (the )' + +# +# After a search that stopped because there was not matches left, there +# must be no context for resuming. +# +if test "$context" != "" +then + echo "Expected empty context after fulfilled search" + exit 1 +fi + +# +# Or search with word not in database (klklk) +# +runsearch "./search -f '( optional \"\" the klkl )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 0 3 <UNDEF> (the ) +match: <UNDEF> <DEF> 0 0 5 <UNDEF> (the ) +match: <UNDEF> <DEF> 0 0 20 <UNDEF> (the ) +match: <UNDEF> <DEF> 0 0 21 <UNDEF> (the ) +match: <UNDEF> <DEF> 0 0 51 <UNDEF> (the ) +match: <UNDEF> <DEF> 0 0 71 <UNDEF> (the ) +match: <UNDEF> <DEF> 0 0 81 <UNDEF> (the )' + +# +# And search with word not in database (klklk) +# +runsearch "./search -c 1 -f '( and \"\" comes klkl )' $VERBOSE" \ +'match: none' + +# +# From there we deal with more complex keys (TAG,SERVER,URL) +# instead of URL alone above. +# + +# +# And search with 'dog lazy' +# +runsearch "./search -c 3 -f '( and \"\" dog lazy )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 1 11 <UNDEF> (dog lazy ) +match: <UNDEF> <DEF> 0 1 21 <UNDEF> (dog lazy ) +match: <UNDEF> <DEF> 0 5 21 <UNDEF> (dog lazy )' + +# +# And search with 'dog lazy' one URL per server only (-S) +# +runsearch "./search -S -f '( and \"\" dog lazy )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 1 11 <UNDEF> (dog lazy ) +match: <UNDEF> <DEF> 0 5 21 <UNDEF> (dog lazy ) +match: <UNDEF> <DEF> 0 6 1 <UNDEF> (dog lazy )' + +# +# Or search with 'dog lazy' one URL per server only (-S) +# +runsearch "./search -S -f '( optional \"\" dog lazy )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 1 11 <UNDEF> (dog lazy proximity) +match: <UNDEF> <DEF> 0 5 21 <UNDEF> (dog lazy ) +match: <UNDEF> <DEF> 0 6 1 <UNDEF> (dog lazy ) +match: <UNDEF> <DEF> 0 5 9 <UNDEF> (lazy )' + +# +# Near search with 'lazy dog' +# +runsearch "./search -f '( near \"\" lazy dog )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 1 11 <UNDEF> (lazy dog proximity) +match: <UNDEF> <DEF> 0 1 21 <UNDEF> (lazy dog proximity) +match: <UNDEF> <DEF> 0 5 21 <UNDEF> (lazy dog proximity) +match: <UNDEF> <DEF> 0 5 56 <UNDEF> (lazy dog proximity)' + +# +# Near search with 'dog lazy' +# +runsearch "./search -f '( near \"\" dog lazy )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 1 11 <UNDEF> (dog lazy proximity)' + +# +# Near search with 'dog lazy', order of term is not meaningfull +# matching 'dog lazy' and 'lazy dog' +# +runsearch "./search -P -1 -f '( near \"\" dog lazy )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 1 11 <UNDEF> (dog lazy proximity) +match: <UNDEF> <DEF> 0 1 21 <UNDEF> (dog lazy proximity) +match: <UNDEF> <DEF> 0 5 21 <UNDEF> (dog lazy proximity) +match: <UNDEF> <DEF> 0 5 56 <UNDEF> (dog lazy proximity)' + +# +# Near search with 'dog lazy', order of term is not meaningfull +# tolerance is -2, adding match for 'dog ? lazy' and 'lazy ? dog' +# +runsearch "./search -P -2 -f '( near \"\" dog lazy )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 1 11 <UNDEF> (dog lazy proximity) +match: <UNDEF> <DEF> 0 1 21 <UNDEF> (dog lazy proximity) +match: <UNDEF> <DEF> 0 5 21 <UNDEF> (dog lazy proximity) +match: <UNDEF> <DEF> 0 5 53 <UNDEF> (dog lazy proximity) +match: <UNDEF> <DEF> 0 5 56 <UNDEF> (dog lazy proximity)' + +# +# Near search with 'dog lazy', order of term is meaningfull +# tolerance is 3, adding match for 'dog ? lazy' and 'dog ? ? lazy' +# +runsearch "./search -P 3 -f '( near \"\" dog lazy )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 1 11 <UNDEF> (dog lazy proximity) +match: <UNDEF> <DEF> 0 6 1 <UNDEF> (dog lazy proximity)' + +# +# Near search with 'lazy dog', only first 2 +# +runsearch "./search -c 2 -f '( near \"\" lazy dog )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 1 11 <UNDEF> (lazy dog proximity) +match: <UNDEF> <DEF> 0 1 21 <UNDEF> (lazy dog proximity)' + +# +# Near search with 'lazy dog', resume from previous search +# and get 2 more. +# +runsearch "./search -c 2 -C '$context' -f '( near \"\" lazy dog )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 5 21 <UNDEF> (lazy dog proximity) +match: <UNDEF> <DEF> 0 5 56 <UNDEF> (lazy dog proximity)' + +# +# Near search with non existent word +# +runsearch "./search -f '( near \"\" lazy bar )' $VERBOSE" \ +'match: none' + +# +# Or search using proximity (document 0 5 53 contains lazy ? dog) +# order of term is meaningfull. +# +runsearch "./search -P 2 -f '( optional \"\" lazy dog )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 1 11 <UNDEF> (dog lazy proximity) +match: <UNDEF> <DEF> 0 1 21 <UNDEF> (dog lazy ) +match: <UNDEF> <DEF> 0 5 21 <UNDEF> (dog lazy ) +match: <UNDEF> <DEF> 0 5 53 <UNDEF> (dog lazy ) +match: <UNDEF> <DEF> 0 5 56 <UNDEF> (dog lazy ) +match: <UNDEF> <DEF> 0 6 1 <UNDEF> (dog lazy ) +match: <UNDEF> <DEF> 0 5 9 <UNDEF> (lazy )' + +# +# Or search using proximity (document 0 5 53 contains lazy ? dog) +# order of term is not meaningfull. +# +runsearch "./search -P -2 -f '( optional \"\" lazy dog )' $VERBOSE" \ +'match: <UNDEF> <DEF> 0 1 11 <UNDEF> (dog lazy proximity) +match: <UNDEF> <DEF> 0 1 21 <UNDEF> (dog lazy proximity) +match: <UNDEF> <DEF> 0 5 21 <UNDEF> (dog lazy proximity) +match: <UNDEF> <DEF> 0 5 53 <UNDEF> (dog lazy proximity) +match: <UNDEF> <DEF> 0 5 56 <UNDEF> (dog lazy proximity) +match: <UNDEF> <DEF> 0 6 1 <UNDEF> (dog lazy ) +match: <UNDEF> <DEF> 0 5 9 <UNDEF> (lazy )' diff --git a/debian/htdig/htdig-3.2.0b6/test/t_templates b/debian/htdig/htdig-3.2.0b6/test/t_templates new file mode 100755 index 00000000..48ef47ee --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/t_templates @@ -0,0 +1,331 @@ +#!/bin/sh +# Part of the ht://Dig package <http://www.htdig.org/> +# Copyright (c) 1999-2004 The ht://Dig Group +# For copyright details, see the file COPYING in your distribution +# or the GNU Library General Public License (LGPL) version 2 or later +# <http://www.gnu.org/copyleft/lgpl.html> +# +# $Id: t_templates,v 1.4 2004/05/28 13:15:30 lha Exp $ +# + +# Tests the following config attributes: +# add_anchors_to_excerpt +# allow_in_form +# anchor_target +# any_keywords +# build_select_lists +# date_format +# end_ellipses +# end_highlight +# excerpt_show_top +# image_url_prefix +# iso_8601 +# matches_per_page +# matches_per_page_list +# max_excerpts +# max_descriptions +# max_description_length +# max_stars +# maximum_page_buttons +# maximum_pages +# method_names +# next_page_text +# no_excerpt_show_top +# no_excerpt_text +# no_page_list_header +# no_page_number_text +# no_next_page_text +# no_prev_page_text +# no_title_text +# nothing_found_file +# page_list_header +# page_number_separator +# page_number_text +# prev_page_text +# plural_suffix +# search_results_contenttype +# search_results_wrapper +# sort_names +# star_blank +# star_image +# star_patterns +# start_ellipses +# start_highlight +# syntax_error_file +# use_star_image + + +try() { + comment="$1" + shift + query="$1" + shift + $htsearch -c $config "$query" > $tmp 2> /dev/null + for pattern + do + if grep "$pattern" $tmp > /dev/null + then : + else + $htsearch -vv -c $config "$query" > /dev/null + echo "Output doesn't match \"$pattern\"" + fail "$htsearch -c $config '$query' >> $tmp -- + $comment" + fi + done +} + + + + +test_functions_action=--start-apache +. ./test_functions + +config=$testdir/conf/htdig.conf.tmp +tmp=/tmp/t_htsearch$$ + +# set up config file with chosen non-default values +cp $testdir/conf/htdig.conf $config + +$htdig "$@" -t -i -c $config || fail "Couldn't dig" + +$htpurge -vv -c $config > tmp1 || fail "Couldn't purge" + +# How can I check that unretrieved urls have been removed, but bad ones haven't? + + +set_attr search_results_wrapper "$testdir/conf/main-template" +set_attr any_keywords "true" +set_attr image_url_prefix "image-prefix" + +try ". Single page of results" \ + "words=also;config=hello;exclude=site2;format=builtin-short;keywords=subject+please+node;matchesperpage=10;method=and;page=1;restrict=http;sort=score;startyear=1999;startmonth=8;startday=1;endyear=2005;endmonth=9;endday=30" \ + 'MATCHES *=3' 'bad_local.htm' 'script.html' 'site4.html' \ + 'CGI *=-' 'CONFIG *=hello' 'EXCLUDE *=site2' 'FIRSTDISPLAYED *=1' \ + 'builtin-short" selected>Short' 'KEYWORDS *=subject please node' \ + 'LASTDISPLAYED *=3' 'LOGICAL_WORDS *=also' \ + 'MATCHES_PER_PAGE *=10' 'MATCH_MESSAGE *=All' 'MAX_STARS *=4' \ + 'METADESCRIPTION *=-' 'selected>All' 'MODIFIED *=-' \ + 'NEXTPAGE *=-' 'NSTARS *=-' 'PAGE *=1' 'PAGEHEADER *=-' \ + 'PAGELIST *=-' 'PAGES *=1' 'PLURAL_MATCHES *=s' \ + 'PREVPAGE *=-' 'RESTRICT *=http' 'SELECTED_FORMAT *=builtin-short' \ + 'SELECTED_METHOD *=and' 'SELECTED_SORT *=score' \ + 'selected>Score' 'VERSION *=3.2' 'WORDS *=also' \ + 'image-prefix/star.gif' + + +set_attr plural_suffix "es" +set_attr allow_in_form "script_name" +set_attr page_list_header "head" +set_attr add_anchors_to_excerpt "true" +# make sure "script" is the page returned... +set_attr search_results_order "bad_local script *" +# set_attr iso_8601 FALSE # no point -- format set by locale + +# (Ensure keyword 'moderate' for script.html occurs after 'also', as anchors +# are broken if keyword preceeds anchor, but search term follows it.) +try "2nd page, 1 match per page, no excludes" \ + "words=also;script_name=qtest;format=builtin-long;keywords=subject+please+moderate;matchesperpage=1;method=or;page=2;restrict=http;sort=revtime;startyear=1999;startmonth=8;startday=1;endyear=2005;endmonth=9;endday=30" \ + 'MATCHES *=3' 'script.html' \ + 'CGI *=qtest' 'CONFIG *=-' 'EXCLUDE *=-' 'FIRSTDISPLAYED *=2' \ + 'builtin-long" selected>Long' 'KEYWORDS *=subject please moderate' \ + 'LASTDISPLAYED *=2' 'LOGICAL_WORDS *=also' \ + 'MATCHES_PER_PAGE *=1' 'MATCH_MESSAGE *=Any' 'MAX_STARS *=4' \ + 'METADESCRIPTION *=-' 'selected>Any' 'MODIFIED *=-' \ + 'NEXTPAGE *=.*;page=3' 'NSTARS *=-' 'PAGE *=1' 'PAGEHEADER *=head' \ + 'PAGELIST *=.*;page=1.*;page=3' 'PAGES *=3' 'PLURAL_MATCHES *=es' \ + 'PREVPAGE *=.*;page=1' 'RESTRICT *=http' 'SELECTED_FORMAT *=builtin-long' \ + 'SELECTED_METHOD *=or' 'SELECTED_SORT *=revtime' \ + 'selected>Reverse Time' 'VERSION *=3.2' 'WORDS *=also' \ + '<strong><code>\.\.\. </code></strong>' '<strong><a href="[^"]*">also</a></strong>' \ + +set_attr method_names "or Or and And boolean Logical" +set_attr page_list_header 'multiple' +set_attr no_page_list_header 'single' +set_attr anchor_target "body" +set_attr maximum_page_buttons "2" +set_attr no_next_page_text "none" +set_attr no_prev_page_text "none" +set_attr next_page_text "following" +set_attr prev_page_text "preceeding" +set_attr iso_8601 true + +try "2nd page, 1 match per page, no excludes, no keywords" \ + "words=also;script_name=qtest;format=builtin-long;matchesperpage=1;method=or;page=2;restrict=http;sort=revtime;startyear=1999;startmonth=8;startday=1;endyear=2005;endmonth=9;endday=30" \ + 'MATCHES *=4' \ + 'CGI *=qtest' 'CONFIG *=-' 'EXCLUDE *=-' 'FIRSTDISPLAYED *=2' \ + 'builtin-long" selected>Long' 'KEYWORDS *=-' \ + 'LASTDISPLAYED *=2' 'LOGICAL_WORDS *=also' \ + 'MATCHES_PER_PAGE *=1' 'MATCH_MESSAGE *=Or' 'MAX_STARS *=4' \ + 'METADESCRIPTION *=-' 'selected>Or' 'MODIFIED *=-' \ + 'NEXTPAGE *=.*;page=3.*following' 'NSTARS *=-' 'PAGE *=1' 'PAGEHEADER *=multiple' \ + 'PAGELIST *=.*;page=1.*2[^0-9]*-' 'PAGES *=4' 'PLURAL_MATCHES *=es' \ + 'PREVPAGE *=.*;page=1.*preceeding' 'RESTRICT *=http' 'SELECTED_FORMAT *=builtin-long' \ + 'SELECTED_METHOD *=or' 'SELECTED_SORT *=revtime' \ + 'selected>Reverse Time' 'VERSION *=3.2' 'WORDS *=also' \ + '<strong><code>\.\.\. </code></strong>' '<strong><a target="body" href="[^"]*">also</a></strong>' \ + '20[0-9][0-9]-[0-9]*-[0-9]* [0-9]*:[0-9]*:[0-9]* [a-zA-Z]*' # iso date + +# ('\\\\' because original line in htdig.conf has a continuation '\' ) +set_attr page_number_text "first second third fourth \\\\" +set_attr no_page_number_text "FIRST SECOND THIRD FOURTH \\\\" +set_attr page_number_separator "PAGE_SEP" +set_attr maximum_pages 2 +set_attr date_format "%Y-date-%m-date-%d-" # overrides iso_8601 + +try "2nd page, 1 match per page, changed page numbers" \ + "words=also;script_name=qtest;format=builtin-long;matchesperpage=1;method=or;page=2;restrict=http;sort=revtime;startyear=1999;startmonth=8;startday=1;endyear=2005;endmonth=9;endday=30" \ + 'MATCHES *=4' \ + 'CGI *=qtest' 'CONFIG *=-' 'EXCLUDE *=-' 'FIRSTDISPLAYED *=2' \ + 'builtin-long" selected>Long' 'KEYWORDS *=-' \ + 'LASTDISPLAYED *=2' 'LOGICAL_WORDS *=also' \ + 'MATCHES_PER_PAGE *=1' 'MATCH_MESSAGE *=Or' 'MAX_STARS *=4' \ + 'METADESCRIPTION *=-' 'selected>Or' 'MODIFIED *=-' \ + 'NEXTPAGE *=none' 'NSTARS *=-' 'PAGE *=1' 'PAGEHEADER *=multiple' \ + 'PAGELIST *=.*first.*PAGE_SEP.*SECOND' 'PAGES *=2' 'PLURAL_MATCHES *=es' \ + 'PREVPAGE *=.*;page=1' 'RESTRICT *=http' 'SELECTED_FORMAT *=builtin-long' \ + 'SELECTED_METHOD *=or' 'SELECTED_SORT *=revtime' \ + 'selected>Reverse Time' 'VERSION *=3.2' 'WORDS *=also' \ + '<strong><code>\.\.\. </code></strong>' '<strong><a target="body" href="[^"]*">also</a></strong>' \ + '20[0-9][0-9]-date-[0-9]*-date-[0-9]*-' + +#'MATCHES *=4' 'bad_local.htm' 'script.html' 'site2.html' 'site4.html' + +set_attr any_keywords "false" +set_attr nothing_found_file "$testdir/conf/main-template" +set_attr add_anchors_to_excerpt "false" +try "fails keyword match" \ + "words=also;script_name=qtest;format=builtin-long;keywords=subject+please+node;matchesperpage=1;method=or;page=2;restrict=http;sort=revtime;startyear=1999;startmonth=8;startday=1;endyear=2005;endmonth=9;endday=30" \ + 'MATCHES *=0' \ + 'CGI *=qtest' 'CONFIG *=-' 'EXCLUDE *=-' \ + 'builtin-long" selected>Long' 'KEYWORDS *=subject please node' \ + 'LOGICAL_WORDS *=also' \ + 'MATCHES_PER_PAGE *=1' 'MATCH_MESSAGE *=Or' 'MAX_STARS *=4' \ + 'METADESCRIPTION *=-' 'selected>Or' 'MODIFIED *=-' \ + 'NSTARS *=-' 'PAGELIST *=-' 'PAGES *=1' \ + 'RESTRICT *=http' 'SELECTED_FORMAT *=builtin-long' \ + 'SELECTED_METHOD *=or' 'SELECTED_SORT *=revtime' \ + 'selected>Reverse Time' 'VERSION *=3.2' 'WORDS *=also' \ + 'starting date =1999/8/1-' \ + 'ending date =2005/9/30-' + +set_attr start_ellipses "START_ELLIPSES" +set_attr end_ellipses "END_ELLIPSES" +set_attr star_blank "NIL" +set_attr star_image "STAR" +set_attr matches_per_page 8 + +try "test highlighting" \ + "words=also;script_name=qtest;format=builtin-long;method=or;restrict=http;sort=revscore" \ + 'MATCHES *=4' \ + 'CGI *=qtest' 'CONFIG *=-' 'EXCLUDE *=-' 'FIRSTDISPLAYED *=1' \ + 'LASTDISPLAYED *=4' 'LOGICAL_WORDS *=also' \ + 'MATCHES_PER_PAGE *=8' 'MATCH_MESSAGE *=Or' 'MAX_STARS *=4' \ + 'METADESCRIPTION *=-' 'selected>Or' 'MODIFIED *=-' \ + 'NEXTPAGE *=-' 'NSTARS *=-' 'PAGE *=1' 'PAGEHEADER *=single' \ + 'PAGELIST *=-' 'PAGES *=1' 'PLURAL_MATCHES *=es' \ + 'PREVPAGE *=-' 'RESTRICT *=http' 'SELECTED_FORMAT *=builtin-long' \ + 'SELECTED_METHOD *=or' 'SELECTED_SORT *=revscore' \ + 'selected>Reverse Score' 'VERSION *=3.2' 'WORDS *=also' \ + 'START_ELLIPSES' 'END_ELLIPSES' '<strong>also</strong>' \ + '^[^S]*"STAR".*"NIL".*"NIL".*"NIL"' + +set_attr max_stars 5 +set_attr start_highlight "START" +set_attr end_highlight "STOP" +set_attr max_excerpts 2 +set_attr template_map "Long builtin-long builtin-long Short builtin-short builtin-short Very-long user-long $PWD/conf/entry-template" +try "test max_stars. Also needs max_descriptions >= 2" \ + "words=also;script_name=qtest;format=user-long;method=or;restrict=http;sort=revscore" \ + 'MATCHES *=4' \ + 'CGI *=qtest' 'CONFIG *=-' 'EXCLUDE *=-' 'FIRSTDISPLAYED *=1' \ + 'LASTDISPLAYED *=4' 'LOGICAL_WORDS *=also' \ + 'MATCHES_PER_PAGE *=8' 'MATCH_MESSAGE *=Or' 'MAX_STARS *=5' \ + 'METADESCRIPTION *=-' 'selected>Or' 'MODIFIED *=-' \ + 'NEXTPAGE *=-' 'NSTARS *=-' 'PAGE *=1' 'PAGEHEADER *=single' \ + 'PAGELIST *=-' 'PAGES *=1' 'PLURAL_MATCHES *=es' \ + 'PREVPAGE *=-' 'RESTRICT *=http' 'SELECTED_FORMAT *=user-long' \ + 'SELECTED_METHOD *=or' 'SELECTED_SORT *=revscore' \ + 'selected>Reverse Score' 'VERSION *=3.2' 'WORDS *=also' \ + 'START_ELLIPSES.*START_ELLIPSES.*END_ELLIPSES' 'STARTalsoSTOP' \ + '^[^N]*"STAR".*"NIL".*"NIL".*"NIL".*"NIL"' \ + 'DESCRIPTIONS=JavaScript test<br>top<br><br>' + +try "Search for 'empty'" \ + "words=empty;format=builtin-long" \ + 'MATCHES *=2' \ + 'empty%20file.html' 'empty file.html' 'sub%2520dir' 'INDEX OF /SET1/SUB%20DIR' +set_attr no_title_text "Empty-Title" +try "Search for 'empty'" \ + "words=empty;format=builtin-long" \ + 'MATCHES *=2' \ + 'empty%20file.html' 'Empty-Title' 'sub%2520dir' + +set_attr star_patterns "site foo bad_local bar" +try "test star_patterns" \ + "words=also;format=builtin-long" \ + 'MATCHES *=4' '"foo"' '"bar"' + +set_attr star_patterns "site foo bad_local" +try "test star_patterns syntax error" \ + "words=also;format=builtin-long" \ + 'MATCHES *=4' '"foo"' '""' + +set_attr search_results_contenttype "foo" +set_attr use_star_image "false" +try "Checking use_star_image" \ + "words=bad_local;format=builtin-short" \ + "^[^*]*bad_local" "Content-type: foo" + +set_attr search_results_contenttype "" +set_attr no_excerpt_text "No-excerpt-found" +try "Checking no_excerpt_show_top true" \ + "words=Yuki" \ + "a#bcd" + +set_attr no_excerpt_show_top "false" +try "Checking no_excerpt_show_top false" \ + "words=Yuki" \ + "No-excerpt-found" + +try "Checking excerpt_show_top false" \ + "words=also" \ + "STARTalsoSTOP" + +set_attr build_select_lists 'MATCH_LIST,multiple matchesperpage matches_per_page_list 2 2 1 matches_per_page "Previous Amount"' +set_attr matches_per_page_list 'one 1 five 5 ten 10 twenty 20 "one hundred" 100 "two hundred" 200' +set_attr sort_names "score 'Best Match' time Newest title A-Z revscore 'Worst Match' revtime Oldest revtitle Z-A" +set_attr excerpt_show_top "true" + +try "Checking excerpt_show_top true" \ + "words=also;matchesperpage=40" \ + "Copyright" \ + "MATCH_LIST *=<" "selected>Previous Amount" '<option value="1">one' \ + 'SORT *=<select name="sort">' \ + '<option value="score" selected>Best Match' \ + '<option value="time">Newest' \ + '<option value="title">A-Z' \ + '<option value="revscore">Worst Match' \ + '<option value="revtime">Oldest' \ + '<option value="revtitle">Z-A' \ + '</select>' + +set_attr syntax_error_file "$testdir/conf/main-template" +try "Checking syntax error file" \ + "words=also+and;method=boolean" \ + 'MATCHES *=0' 'LOGICAL_WORDS *=also and' \ + 'MATCH_MESSAGE *=Logical' 'METADESCRIPTION *=-' 'selected>Logical' \ + 'PAGELIST *=-' 'PAGES *=1' 'SELECTED_METHOD *=boolean' \ + 'VERSION *=3.2' 'WORDS *=also and' + + +set_attr max_descriptions 1 +set_attr max_description_length 5 +$htdig "$@" -t -i -c $config || fail "Couldn't dig" +$htpurge -c $config || fail "Couldn't purge" +try "Check anchor descriptions" \ + "words=also;script_name=qtest;format=user-long;method=or;restrict=http;sort=revscore" \ + 'DESCRIPTIONS=JavaScript ...<br><br>' +# Why does it end with "..." when START/END_ELLIPSES aren't ...? + +test_functions_action=--stop-apache +. ./test_functions diff --git a/debian/htdig/htdig-3.2.0b6/test/t_trunc b/debian/htdig/htdig-3.2.0b6/test/t_trunc new file mode 100755 index 00000000..83e1333c --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/t_trunc @@ -0,0 +1,25 @@ +# +# Part of the ht://Dig package <http://www.htdig.org/> +# Copyright (c) 1999-2004 The ht://Dig Group +# For copyright details, see the file COPYING in your distribution +# or the GNU Library General Public License (LGPL) version 2 or later +# <http://www.gnu.org/copyleft/lgpl.html> +# +# $Id: t_trunc,v 1.4 2004/05/28 13:15:30 lha Exp $ +# + +. ./test_functions + +export MIFLUZ_CONFIG ; MIFLUZ_CONFIG=${srcdir}/mifluz-search.conf + +count1=`head -1 $srcdir/search.txt | ./txt2mifluz -z -v $VERBOSE` +count2=`tail -1 $srcdir/search.txt | ./txt2mifluz -z -v $VERBOSE` +if [ "$count1" != "$count2" ] +then + echo "inserted $count1 then $count2, expected same number" >&2 + exit 1 +fi +../htdb/htdb_stat -zW -d test | grep '2 Number of keys in the tree' > /dev/null || { + fail "Wrong number of keys" + ../htdb/htdb_stat -zW -d test +} diff --git a/debian/htdig/htdig-3.2.0b6/test/t_url b/debian/htdig/htdig-3.2.0b6/test/t_url new file mode 100755 index 00000000..6a351e22 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/t_url @@ -0,0 +1,12 @@ +# +# Part of the ht://Dig package <http://www.htdig.org/> +# Copyright (c) 2002-2004 The ht://Dig Group +# For copyright details, see the file COPYING in your distribution +# or the GNU Library General Public License (LGPL) version 2 or later +# <http://www.gnu.org/copyleft/lgpl.html> +# +# $Id: t_url,v 1.4 2004/05/28 13:15:30 lha Exp $ +# + +# Is there a way to show errors as "Parent + child = url"? +./url | diff -C1 - url.output diff --git a/debian/htdig/htdig-3.2.0b6/test/t_validwords b/debian/htdig/htdig-3.2.0b6/test/t_validwords new file mode 100755 index 00000000..8dc0f9be --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/t_validwords @@ -0,0 +1,196 @@ +#!/bin/sh +# Part of the ht://Dig package <http://www.htdig.org/> +# Copyright (c) 1999-2004 The ht://Dig Group +# For copyright details, see the file COPYING in your distribution +# or the GNU Library General Public License (LGPL) version 2 or later +# <http://www.gnu.org/copyleft/lgpl.html> +# +# $Id: t_validwords,v 1.2 2004/05/28 13:15:30 lha Exp $ +# + +try() { + comment="$1" + shift + query="$1" + shift + $htsearch -c $config "$query" > $tmp 2> /dev/null + for pattern + do + if grep "$pattern" $tmp > /dev/null + then : + else + $htsearch -vv -c $config "$query" > /dev/null + echo "Output doesn't match \"$pattern\"" + fail "$htsearch -c $config '$query' >> $tmp -- + $comment" + fi + done +} + + + + +test_functions_action=--start-apache +. ./test_functions + +config=$testdir/conf/htdig.conf.tmp +tmp=/tmp/t_htsearch$$ + +# set up config file with chosen non-default values +cp $testdir/conf/htdig.conf $config + +set_attr allow_numbers "false" +set_attr minimum_word_length "3" +set_attr maximum_word_length "10" +set_attr translate_latin1 "0" +set_attr valid_punctuation "." +set_attr extra_word_characters "��" +#set_attr locale fr + +$htdig "$@" -t -i -c $config || fail "Couldn't dig" + +set_attr remove_bad_urls "false" +set_attr remove_unretrieved_urls "true" +$htpurge -vv -c $config > tmp1 || fail "Couldn't purge" + +# How can I check that unretrieved urls have been removed, but bad ones haven't? + + + +try "Search for '2001' without allow_numbers" \ + "words=2001" \ + 'No matches' + +try "Search for '0b3' without allow_numbers" \ + "words=0b3" \ + '1 matches' 'bad_local.htm' '3.2.<strong>0b3</strong>' + +try "Search for '3.2.0b3' without allow_numbers" \ + "words=3.2.0b3" \ + '1 matches' 'bad_local.htm' '<strong>3.2.0b3</strong>' + +try "Search for '320b3' without allow_numbers" \ + "words=320b3" \ + '1 matches' 'bad_local.htm' + +try 'Search for "archive." without . in extra_word_characters' \ + 'words=archive.' \ + '1 matches' 'bad_local.htm' '<strong>archive</strong>.' + +try 'Search for "archive" without . in extra_word_characters' \ + 'words=archive' \ + '1 matches' 'bad_local.htm' '<strong>archive</strong>.' + +try "Search for 'graduateprofessional' which should not match a slash" \ + "words=graduateprofessional" \ + 'No matches' + +try "Search for 'now' with minimum_word_length=3" \ + "words=now" \ + '1 matches' 'bad_local.htm' + +try "Search for 'fran�ais' without translate_latin1" \ + "words=fran�ais" \ + '1 matches' 'site4.html' '<strong>fran�ais</strong>' + +try "Search for 'qu�bec' without translate_latin1" \ + "words=qu�bec" \ + 'No matches' + +try "Search for 'with' with default bad_word_list" \ + "words=with" \ + 'No matches' + +try "Search for 'technical' with default bad_word_list" \ + "words=technical" \ + '1 matches' 'site%201.html' + + + + + +set_attr allow_numbers "true" +set_attr minimum_word_length "4" +set_attr maximum_word_length "13" +set_attr translate_latin1 "yes" +set_attr valid_punctuation "/" +set_attr extra_word_characters '.\\\$��' # string is .\$��, chars: .$�� +set_attr bad_word_list "${testdir}/bad_word_list" +#set_attr locale fr + +$htdig "$@" -t -i -c $config || fail "Couldn't dig" + +set_attr remove_bad_urls "true" +set_attr remove_unretrieved_urls "false" +$htpurge -vv -c $config > tmp || fail "Couldn't purge" + +# How can I check that bad urls have been removed, but unretrieved ones haven't? + + + +try "Search for '2001' " \ + "words=2001" \ + '1 matches' '1995-<strong>2001</strong>' + +try "Search for '9.00'" \ + "words=9.00" \ + '1 matches' 'site4.html' '<strong>9.00</strong>' + +try "Search for '9/00' -- checking . is not just valid_punctuation" \ + "words=9/00" \ + 'No matches' + +try 'Search for "archive." with . in extra_word_characters' \ + 'words=archive.' \ + '1 matches' 'bad_local.htm' '<strong>archive.</strong>' + +try 'Search for "archive" with . in extra_word_characters' \ + 'words=archive' \ + 'No matches' + +try 'Search for "$195"' \ + 'words=$195' \ + '1 matches' 'site4.html' '<strong>$195</strong>,000' + +try "Search for 'graduateprofessional' which should match a slash" \ + "words=graduateprofessional" \ + '1 matches' 'site4.html' '<strong>graduate/professional</strong>' + +#try "Search for 'graduateprofexyz' which should match a truncated word" \ +# "words=graduateprofexyz" \ +# '1 matches' 'site4.html' '<strong>graduate/professional</strong>' + +try "Search for 'graduateprofexyz' which should match a truncated word" \ + "words=graduateprofexyz" \ + '1 matches' 'site4.html' + +try "Search for 'graduateprofxyz' which should fail to match a truncated word" \ + "words=graduateprofxyz" \ + 'No matches' + +try "Search for 'part' with minimum_word_length=4" \ + "words=part" \ + '2 matches' 'bad_local.htm' 'script.html' + +try "Search for 'now' with minimum_word_length=4" \ + "words=now" \ + 'No matches' + +try "Search for 'fran�ais' with translate_latin1" \ + "words=fran�ais" \ + '1 matches' 'site4.html' '<strong>français</strong>' + +try "Search for 'qu�bec' with translate_latin1" \ + "words=qu�bec" \ + '1 matches' 'site4.html' '<strong>Québec</strong>' + +try "Search for 'with' with new bad_word_list" \ + "words=with" \ + '4 matches' 'bad_local.htm' 'script.html' 'site4.html' 'site%201.html' + +try "Search for 'technical' with new bad_word_list" \ + "words=technical" \ + 'No matches' + +test_functions_action=--stop-apache +. ./test_functions diff --git a/debian/htdig/htdig-3.2.0b6/test/t_wordbitstream b/debian/htdig/htdig-3.2.0b6/test/t_wordbitstream new file mode 100755 index 00000000..0c60dbe1 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/t_wordbitstream @@ -0,0 +1,15 @@ +# +# Part of the ht://Dig package <http://www.htdig.org/> +# Copyright (c) 1999-2004 The ht://Dig Group +# For copyright details, see the file COPYING in your distribution +# or the GNU Library General Public License (LGPL) version 2 or later +# <http://www.gnu.org/copyleft/lgpl.html> +# +# $Id: t_wordbitstream,v 1.4 2004/05/28 13:15:30 lha Exp $ +# + +. ./test_functions + +verbose=$1 + +./word -b $verbose diff --git a/debian/htdig/htdig-3.2.0b6/test/t_wordkey b/debian/htdig/htdig-3.2.0b6/test/t_wordkey new file mode 100755 index 00000000..58c404ec --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/t_wordkey @@ -0,0 +1,13 @@ +# +# Part of the ht://Dig package <http://www.htdig.org/> +# Copyright (c) 1999-2004 The ht://Dig Group +# For copyright details, see the file COPYING in your distribution +# or the GNU Library General Public License (LGPL) version 2 or later +# <http://www.gnu.org/copyleft/lgpl.html> +# +# $Id: t_wordkey,v 1.9 2004/05/28 13:15:30 lha Exp $ +# + +. ./test_functions + +./word -k $VERBOSE diff --git a/debian/htdig/htdig-3.2.0b6/test/t_wordlist b/debian/htdig/htdig-3.2.0b6/test/t_wordlist new file mode 100755 index 00000000..da4629ae --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/t_wordlist @@ -0,0 +1,15 @@ +# +# Part of the ht://Dig package <http://www.htdig.org/> +# Copyright (c) 1999-2004 The ht://Dig Group +# For copyright details, see the file COPYING in your distribution +# or the GNU Library General Public License (LGPL) version 2 or later +# <http://www.gnu.org/copyleft/lgpl.html> +# +# $Id: t_wordlist,v 1.6 2004/05/28 13:15:30 lha Exp $ +# + +. ./test_functions + +./word -l $VERBOSE +rm -f test test_weakcmpr __db* +./word -l -z $VERBOSE diff --git a/debian/htdig/htdig-3.2.0b6/test/t_wordskip b/debian/htdig/htdig-3.2.0b6/test/t_wordskip new file mode 100755 index 00000000..f7ee993f --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/t_wordskip @@ -0,0 +1,19 @@ +# +# Part of the ht://Dig package <http://www.htdig.org/> +# Copyright (c) 1999-2004 The ht://Dig Group +# For copyright details, see the file COPYING in your distribution +# or the GNU Library General Public License (LGPL) version 2 or later +# <http://www.gnu.org/copyleft/lgpl.html> +# +# $Id: t_wordskip,v 1.5 2004/05/28 13:15:30 lha Exp $ +# + +. ./test_functions + +if [ ! -f skiptest_db.txt ] +then + cp $srcdir/skiptest_db.txt . +fi + +./txt2mifluz < $srcdir/skiptest_db.txt +./word -s $VERBOSE diff --git a/debian/htdig/htdig-3.2.0b6/test/test_functions.in b/debian/htdig/htdig-3.2.0b6/test/test_functions.in new file mode 100644 index 00000000..25fd5e8a --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/test_functions.in @@ -0,0 +1,130 @@ +# +# Initialise variables and directories to prepare to run test suite. +# If run with argument --start-apache it will also start httpd. +# If run with argument --stop-apache it will stop httpd and exit. +# If there is a problem before the test, "exit 77" skips the test (not fails it) +# +# Part of the ht://Dig package <http://www.htdig.org/> +# Copyright (c) 1999-2004 The ht://Dig Group +# For copyright details, see the file COPYING in your distribution +# or the GNU Library General Public License (LGPL) version 2 or later +# <http://www.gnu.org/copyleft/lgpl.html> +# +# $Id: test_functions.in,v 1.16 2004/05/28 13:15:30 lha Exp $ +# + +if [ "$test_functions_action" = --stop-apache ] +then + if [ -n "$httpd" -a -f logs/httpd.pid ] + then + kill -15 `cat logs/httpd.pid` + sleep 2 + rm -f logs/httpd.pid + fi +else + +testdir=`pwd` + +perl=@PERL@ +awk=@AWK@ + +@SET_MAKE@ + +if [ -z "$MAKE" ] +then + echo "no make command found" + exit 77 +fi + +# +# Prepare http server +# +( + cd conf + $MAKE user="@USER@" modules="@APACHE_MODULES@" testdir=$testdir all > /dev/null +) + +httpd=@APACHE@ + +# if apache requested, either start it or warn it was not configured. +if [ "$test_functions_action" = --start-apache ] +then + + if [ -z "$httpd" ] + then + prog_name=`basename $0` + echo "Run configure with --with-apache=<httpd path> to run $prog_name." + exit 77 + fi + + if [ ! -d logs ] + then + mkdir -p logs + else + if [ -f logs/httpd.pid ] + then + kill -15 `cat logs/httpd.pid` + sleep 2 + rm -f logs/httpd.pid + fi + fi + if $httpd -f $testdir/conf/httpd.conf + then + sleep 2 + else + echo + echo "****Could not start apache. This test may fail, but that is not ht://Dig's fault" + echo + fi +fi + +# +# Prepare htdig test environment +# +rm -fr var/htdig +rm -fr var/htdig2 +mkdir -p var/htdig +mkdir -p var/htdig2 + +htdig=../htdig/htdig +htsearch=../htsearch/htsearch +htmerge=../httools/htmerge +htpurge=../httools/htpurge +htstat=../httools/htstat +htdump=../httools/htdump +htload=../httools/htload +htfuzzy=../htfuzzy/htfuzzy + +# +# Default index description used for testing +# +export MIFLUZ_CONFIG ; MIFLUZ_CONFIG=${srcdir}/mifluz.conf + +rm -f test test_weakcmpr __db* + +# +# Provide a unified means for scripts to clean up. +# +fail() { + echo "$1" + if [ -n "$httpd" -a -f logs/httpd.pid ] + then + kill -15 `cat logs/httpd.pid` + sleep 2 + rm -f logs/httpd.pid + fi + exit 1 +} + +conf=conf/htdig.conf.tmp +set_attr () +{ + if @EGREP@ "^$1" $conf > /dev/null ; then + @MV@ $conf tmp_conf + @SED@ "s@^$1.*@$1: $2@" < tmp_conf > $conf + else + echo "$1: $2" >> $conf + fi +} + +fi # $1 != --stop-apache diff --git a/debian/htdig/htdig-3.2.0b6/test/test_prepare b/debian/htdig/htdig-3.2.0b6/test/test_prepare new file mode 100755 index 00000000..24b74d17 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/test_prepare @@ -0,0 +1,19 @@ +# +# Part of the ht://Dig package <http://www.htdig.org/> +# Copyright (c) 1999-2004 The ht://Dig Group +# For copyright details, see the file COPYING in your distribution +# or the GNU Library General Public License (LGPL) version 2 or later +# <http://www.gnu.org/copyleft/lgpl.html> +# +# Make sure all data has been copied from the source directory +# +test_prog=$1 + +[ ! -d conf ] && cp -rp $srcdir/conf . +[ ! -d htdocs ] && cp -rp $srcdir/htdocs . + +if [ ! -f skiptest_db.txt ] ; then + cp $srcdir/skiptest_db.txt . +fi + +exec $srcdir/$test_prog diff --git a/debian/htdig/htdig-3.2.0b6/test/testnet.cc b/debian/htdig/htdig-3.2.0b6/test/testnet.cc new file mode 100644 index 00000000..9eb0c707 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/testnet.cc @@ -0,0 +1,443 @@ +// $Id: testnet.cc,v 1.11 2003/07/21 08:16:12 angusgb Exp $ +#ifdef HAVE_CONFIG_H +#include <htconfig.h> +#endif /* HAVE_CONFIG_H */ + +#include "Transport.h" +#include "HtHTTP.h" +#include "HtHTTPBasic.h" +#include "HtDateTime.h" +#include <URL.h> + +#ifdef HAVE_STD +#include <iostream> +#include <iomanip> +#ifdef HAVE_NAMESPACES +using namespace std; +#endif +#else +#include <iostream.h> +#include <iomanip.h> +#endif /* HAVE_STD */ + +#include <errno.h> +#include <string.h> + +// If we have this, we probably want it. +#ifdef HAVE_GETOPT_H +#include <getopt.h> +#endif + +#include <unistd.h> + +#define DEFAULT_MAX_DOCUMENT_SIZE 40000 + +int debug = 0; +int timesvar = 1; +int persistent = 1; +int timeout = 10; +int head_before_get = 1; +int max_doc = DEFAULT_MAX_DOCUMENT_SIZE; +int retries = 1; +int waittime = 5; + + +URL *url; +Transport *transportConnect = NULL; +HtHTTP *HTTPConnect = NULL; + + +static void usage(); +void reportError(char *msg); +Transport::DocStatus Retrieve(); +int Parser(char *ct); + + + +int main(int ac, char **av) +{ +/////// + // Local variables +/////// + + // Url to be retrieved + String URL_To_Retrieve=""; + + // Character for get_opt function + int c; + + // Transport return Status + Transport::DocStatus _return_status; + + // Variable storing the number of timed out requests + int _timed_out = 0; + + // Flag variable for errors + int _errors = 0; + +/////// + // Retrieving options from command line with getopt +/////// + + while((c = getopt(ac, av, "vU:T:t:ngm:r:w:")) != -1) + { + switch (c) + { + case 'v': + debug++; + break; + case 'U': + URL_To_Retrieve=optarg; + break; + case 'T': + timesvar=atoi(optarg); + break; + case 't': + timeout=atoi(optarg); + break; + case 'r': + retries=atoi(optarg); + break; + case 'w': + waittime=atoi(optarg); + break; + case 'm': + max_doc=atoi(optarg); + break; + case 'n': + persistent = 0; + break; + case 'g': + head_before_get = 0; + HtHTTP::DisableHeadBeforeGet(); + break; + case '?': + usage(); + } + } + + if (URL_To_Retrieve.length() == 0) usage(); + + if (!persistent) + head_before_get=0; // No HEAD before GET if no persistent connections + + // Create the new URL + + url = new URL ((char*)URL_To_Retrieve); + + if (!url) reportError(strerror(errno)); + + + if (debug>0) + { + cout << "Testing the net for " << url->get() << endl; + cout << "Host: " << url->host() << " - Port: " << url->port() + << " - Service: " << url->service() << endl; + cout << endl; + } + + Transport::SetDebugLevel(debug); + HtHTTP::SetParsingController(Parser); + int i; + + HtDateTime StartTime; + + for (i=0; i < timesvar; i++) + { + if (debug>0) + cout << setw(5) << i+1 << "/" << timesvar; + + _return_status = Retrieve(); + + if (debug>0) + { + cout << " | Start time: " << transportConnect->GetStartTime()->GetISO8601(); + + cout << " | End time: " << transportConnect->GetEndTime()->GetISO8601() + << " | "; + } + + + switch(_return_status) + { + case Transport::Document_ok: + if(debug>0) cout << "OK (" + << transportConnect->GetResponse()->GetStatusCode() << ")"; + break; + case Transport::Document_not_changed: + if(debug>0) cout << "Not changed (" + << transportConnect->GetResponse()->GetStatusCode() << ")"; + break; + case Transport::Document_not_found: + if(debug>0) cout << "Not found (" + << transportConnect->GetResponse()->GetStatusCode() << ")"; + break; + case Transport::Document_not_parsable: + if(debug>0) cout << "Not parsable (" + << transportConnect->GetResponse()->GetContentType() << ")"; + break; + case Transport::Document_redirect: + if(debug>0) cout << "Redirected (" + << transportConnect->GetResponse()->GetStatusCode() << ")"; + break; + case Transport::Document_not_authorized: + if(debug>0) cout << "Not authorized"; + break; + case Transport::Document_no_connection: + if(debug>0) cout << "No Connection"; + break; + case Transport::Document_connection_down: + if(debug>0) cout << "Connection down"; + _timed_out++; + break; + case Transport::Document_no_header: + if(debug>0) cout << "No header"; + break; + case Transport::Document_no_host: + if(debug>0) cout << "No host"; + break; + case Transport::Document_no_port: + if(debug>0) cout << "No port"; + break; + case Transport::Document_not_local: + if(debug>0) cout << "Not local"; + break; + case Transport::Document_not_recognized_service: + if(debug>0) cout << "Service not recognized"; + break; + case Transport::Document_other_error: + if(debug>0) cout << "Other error"; + _errors++; + break; + } + + + if (debug>0) + cout << endl; + } + + HtDateTime EndTime; + + // Memory freeing + + if (HTTPConnect) + delete HTTPConnect; + + if (url) delete url; + + // Show statistics + + if(debug>0) + { + cout << endl; + cout << "HTTP Info" << endl; + cout << "=========" << endl; + + if (persistent) + { + cout << " Persistent connections : On" << endl; + if (head_before_get) + cout << " HTTP/1.1 HEAD before GET : On" << endl; + else + cout << " HTTP/1.1 HEAD before GET : Off" << endl; + } + else + cout << " Persistent connections : Off" << endl; + + + cout << " Timeout value : " << timeout << endl; + + cout << " Retries for timeout : " << retries << endl; + + cout << " Sleep after timeout : " << waittime << endl; + + cout << " Document requests : " << timesvar << endl; + + HtHTTP::ShowStatistics(cout); + + cout << " Timed out : " << _timed_out << endl; + cout << " Unknown errors : " << _errors << endl; + cout << " Elapsed time : approximately " + << HtDateTime::GetDiff(EndTime, StartTime) << " secs" << endl; + + } + + // Return values + + if (_errors) return -1; + + if (_timed_out) return 1; + + return 0; + +} + + +void usage() +{ + cout << "usage: testnet [-v] [-n] [-g] [-U URL] [-t times]" << endl; + cout << "Ht://Dig " << VERSION << endl << endl; + + cout << "Options:" << endl; + + cout << "\t-v\tVerbose mode" << endl << endl; + + cout << "\t-U URL" << endl; + cout << "\t\tURL to be retrieved" << endl << endl; + + cout << "\t-T times" << endl; + cout << "\t\tTimes to retrieve it" << endl << endl; + + cout << "\t-t timeout" << endl; + cout << "\t\tTimeout value" << endl << endl; + + cout << "\t-r retries" << endl; + cout << "\t\tNumber of retries after a timeout" << endl << endl; + + cout << "\t-w wait time" << endl; + cout << "\t\tWait time value after a timeout" << endl << endl; + + cout << "\t-m maxdocsize" << endl; + cout << "\t\tMax Document size to be retrieved" << endl << endl; + + cout << "\t-n\tNormal connection (disable persistent)" << endl << endl; + + cout << "\t-g\tOnly GET requests instead of HEAD+GET" << endl << endl; + + exit(1); +} + + +// +// Report an error and die +// +void reportError(char *msg) +{ + cout << "testnet: " << msg << "\n\n"; + exit(1); +} + + +Transport::DocStatus Retrieve() +{ + // Right now we just handle http:// service + // Soon this will include file:// + // as well as an ExternalTransport system + // eventually maybe ftp:// and a few others + + Transport::DocStatus status; + Transport_Response *response = 0; + HtDateTime *ptrdatetime = 0; + HtDateTime modtime; + + String contents; + String contentType; + int contentLength; + + transportConnect = 0; + + if (mystrncasecmp(url->service(), "http", 4) == 0) + { + + if (!HTTPConnect) + { + + if (debug>1) + cout << "Creating an HtHTTP object" << endl; + + HTTPConnect = new HtHTTPBasic(); + + if (!HTTPConnect) + reportError(strerror(errno)); + } + + if (HTTPConnect) + { + // Here we must set only thing for a HTTP request + + HTTPConnect->SetRequestURL(*url); + + // Let's disable the cookies for this test + HTTPConnect->DisableCookies(); + + // We may issue a config paramater to enable/disable them + if (!persistent) HTTPConnect->DisablePersistentConnection(); + + // HTTPConnect->SetRequestMethod(HtHTTP::Method_GET); + if (debug > 2) + { + cout << "Making HTTP request on " << url->get(); + cout << endl; + } + } + + transportConnect = HTTPConnect; + + transportConnect->SetRequestMaxDocumentSize(max_doc); + transportConnect->SetTimeOut(timeout); + transportConnect->SetRetry(retries); + transportConnect->SetWaitTime(waittime); + + } + else + { + if (debug) + { + cout << '"' << url->service() << + "\" not a recognized transport service. Ignoring\n"; + } + } + + // Is a transport object pointer available? + + if (transportConnect) + { + + transportConnect->SetConnection(url); + + // Make the request + // Here is the main operation ... Let's make the request !!! + status = transportConnect->Request(); + + // Let's get out the info we need + response = transportConnect->GetResponse(); + + if (response) + { + // We got the response + + contents = response->GetContents(); + contentType = response->GetContentType(); + contentLength = response->GetContentLength(); + ptrdatetime = response->GetModificationTime(); + + if (ptrdatetime) + { + // We got the modification date/time + modtime = *ptrdatetime; + } + // How to manage it when there's no modification date/time? + + if (debug > 5) + { + cout << "Contents:\n" << contents << endl; + cout << "Content Type: " << contentType << endl; + cout << "Content Lenght: " << contentLength << endl; + cout << "Modification Time: " << modtime.GetISO8601() << endl; + } + } + + return status; + + } + else + return Transport::Document_not_found; + +} + + + +int Parser(char *) +{ + return false; +} diff --git a/debian/htdig/htdig-3.2.0b6/test/txt2mifluz.cc b/debian/htdig/htdig-3.2.0b6/test/txt2mifluz.cc new file mode 100644 index 00000000..de9aeacc --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/txt2mifluz.cc @@ -0,0 +1,179 @@ +// +// txt2mifluz.cc +// +// txt2mifluz: stress test the Berkeley DB database and WordList interface. +// +// Part of the ht://Dig package <http://www.htdig.org/> +// Copyright (c) 1999-2004 The ht://Dig Group +// For copyright details, see the file COPYING in your distribution +// or the GNU Library General Public License (LGPL) version 2 or later +// <http://www.gnu.org/copyleft/lgpl.html> +// +// $Id: txt2mifluz.cc,v 1.4 2004/05/28 13:15:30 lha Exp $ +// + +#ifdef HAVE_CONFIG_H +#include <htconfig.h> +#endif /* HAVE_CONFIG_H */ + +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif /* HAVE_UNISTD_H */ + +// If we have this, we probably want it. +#ifdef HAVE_GETOPT_H +#include <getopt.h> +#endif /* HAVE_GETOPT_H */ +#ifdef HAVE_MALLOC_H +#include <malloc.h> +#endif /* HAVE_MALLOC_H */ +#include <stdlib.h> + +#include <htString.h> +#include <WordList.h> +#include <WordContext.h> + +/* + * Store all options from the command line + */ +class params_t +{ +public: + char* dbfile; + int compress; +}; + +/* + * Explain options + */ +static void usage(); +/* + * Verbosity level set with -v (++) + */ +static int verbose = 0; + +class WordSearch { +public: + WordSearch(); + + WordKey *Search(const String& expr); + WordKey *Search(const StringList& terms); + WordKey *Search(WordKey* keys); + + WordKey* Terms2WordKey(const StringList& terms); + + void DocumentSet(const WordKey& from, WordKey& to); + void DocumentCompare(const WordKey& a, const WordKey& b); + + int limit_bottom; + int limit_count; + WordList* words; +}; + +WordSearch::WordSearch() +{ + limit_bottom = 0; + limit_count = 0; + words = 0; +} + +WordKey *WordSearch::Search(const String& expr) +{ + return Search(StringList(expr, " \t")); +} + +WordKey *WordSearch::Search(const StringList& terms) +{ + return Search(Terms2WordKey(terms)); +} + +WordKey *WordSearch::Search(WordKey* keys) +{ + // WordKey* AscendingFrequency(ter); + return 0; +} + +WordKey* WordSearch::Terms2WordKey(const StringList& terms) +{ + WordKey* keys = new WordKey[terms.Count() + 1]; + + int i; + String* term; + ListCursor cursor; + terms.Start_Get(cursor); + for(i = 0; (term = (String*)terms.Get_Next(cursor)); i++) { + keys[i].SetWord(*term); + } + + for(i = 0; !keys[i].Empty(); i++) { + fprintf(stderr, "%s\n", (char*)keys[i].Get()); + } + return 0; +} + + +// ***************************************************************************** +// Entry point +// +int main(int ac, char **av) +{ + int c; + extern char *optarg; + params_t params; + + params.compress = 0; + params.dbfile = strdup("test"); + + while ((c = getopt(ac, av, "vB:f:z")) != -1) + { + switch (c) + { + case 'v': + verbose++; + break; + case 'B': + free(params.dbfile); + params.dbfile = strdup(optarg); + break; + case 'z': + params.compress = 1; + break; + case '?': + usage(); + break; + } + } + + Configuration* config = WordContext::Initialize(); + if(!config) { + fprintf(stderr, "txt2mifluz: no config file found\n"); + exit(1); + } + + if(params.compress) { + config->Add("wordlist_compress", "true"); + } + + WordList words(*config); + words.Open(params.dbfile, O_RDWR|O_TRUNC); + int inserted = words.Read(stdin); + if(verbose) + printf("inserted %d WordReferences\n", inserted); + words.Close(); + + delete config; + free(params.dbfile); +} + +// ***************************************************************************** +// void usage() +// Display program usage information +// +static void usage() +{ + printf("usage: txt2mifluz [options] < txtfile\n"); + printf("Options:\n"); + printf("\t-v\t\tIncreases the verbosity\n"); + printf("\t-B dbfile\tuse <dbfile> as a db file name (default test).\n"); + exit(0); +} diff --git a/debian/htdig/htdig-3.2.0b6/test/url.cc b/debian/htdig/htdig-3.2.0b6/test/url.cc new file mode 100644 index 00000000..91448643 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/url.cc @@ -0,0 +1,187 @@ +// +// url.cc +// +// url: Implement tests for the URL parser +// Should ensure compliance to RFC2396 +// <http://www.faqs.org/rfcs/rfc2396.html> +// +// Part of the ht://Dig package <http://www.htdig.org/> +// Copyright (c) 1995-2004 The ht://Dig Group +// For copyright details, see the file COPYING in your distribution +// or the GNU Library General Public License (LGPL) version 2 or later +// <http://www.gnu.org/copyleft/lgpl.html> +// +// $Id: url.cc,v 1.8 2004/05/28 13:15:30 lha Exp $ +// + +#ifdef HAVE_CONFIG_H +#include "htconfig.h" +#endif /* HAVE_CONFIG_H */ + +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> + +#ifdef HAVE_STD +#include <iostream> +#ifdef HAVE_NAMESPACES +using namespace std; +#endif +#else +#include <iostream.h> +#endif /* HAVE_STD */ + +// If we have this, we probably want it. +#ifdef HAVE_GETOPT_H +#include <getopt.h> +#endif + +#include "HtConfiguration.h" +#include "URL.h" + + +// These should probably be tested individually +// but for now, we'll just assume they're set to defaults +// (except for external protocol test). +static ConfigDefaults defaults[] = { + { "external_protocols", "https:// dummy.transport help: dummy.transport", 0 }, + { "allow_virtual_hosts", "true", 0 }, + { "case_sensitive", "true", 0 }, + { "remove_default_doc", "index.html", 0 }, + { "server_aliases", "alias.com:443=true.com:443", 0 }, + { 0 } +}; + +typedef struct { + char* url_parents; + char* url_children; + int test_children; +} params_t; + + +static void usage(); +static void dourl(params_t* params); +static void dolist(params_t* params); + +static int verbose = 0; + +//***************************************************************************** +// int main(int ac, char **av) +// +int main(int ac, char **av) +{ + int c; + extern char *optarg; + params_t params; + + params.url_parents = strdup("url.parents"); + params.url_children = strdup("url.children"); + params.test_children = 1; + + while ((c = getopt(ac, av, "vop:c:")) != -1) + { + switch (c) + { + case 'v': + verbose++; + break; + case 'p': + free(params.url_parents); + params.url_parents = strdup(optarg); + break; + case 'c': + free(params.url_children); + params.url_children = strdup(optarg); + break; + case 'o' : + params.test_children = 0; + break; + case '?': + usage(); + break; + } + } + + dourl(¶ms); + + free(params.url_parents); + free(params.url_children); + + return 0; +} + +static void dourl(params_t* params) +{ + if(verbose) cerr << "Test WordKey class with " << + params->url_parents << " and " << params->url_children << "\n"; + HtConfiguration* const config= HtConfiguration::config(); + config->Defaults(defaults); + dolist(params); + + cout << "\nAnd now without turning // into / ...\n\n"; + config->Add(String("allow_double_slash"), "true"); + dolist(params); +} + +static void dolist(params_t* params) +{ + // To start, we read in the list of child URLs into a List object + FILE *urllist = fopen(params->url_children, "r"); + char buffer[1000]; + List children; + + if (params->test_children) + { + while (fgets(buffer, sizeof(buffer), urllist)) + { + buffer [sizeof(buffer) - 1] = '\0'; // make strlen() safe + int len = strlen(buffer); + if (len && buffer [len-1] == '\n') + buffer [len-1] = '\0'; // remove trailing '\n' + children.Add(new String(buffer)); + } + fclose(urllist); + } + + urllist = fopen(params->url_parents, "r"); + URL parent, child; + String *current; + while (fgets(buffer, sizeof(buffer), urllist)) + { + parent = URL(buffer); + cout << "Parent: " << buffer << '(' << parent.signature().get() << ")\n"; + parent.dump(); + if (params->test_children) + { + cout << "\nChildren: \n"; + children.Start_Get(); + while ((current = (String *)children.Get_Next())) + { + cout << "\nChild: " << current->get() << endl; + child = URL(current->get(), parent); + child.dump(); + } + cout << endl; + } + } + + fclose(urllist); + children.Destroy(); +} + +//***************************************************************************** +// void usage() +// Display program usage information +// +static void usage() +{ + cout << "usage: url [options]\n"; + cout << "Options:\n"; + cout << "\t-v\t\tIncreases the verbosity\n"; + cout << "\t-p file\tname of the url parent file\n"; + cout << "\t-c file\tname of the url children file\n"; + exit(0); +} + + + diff --git a/debian/htdig/htdig-3.2.0b6/test/url.children b/debian/htdig/htdig-3.2.0b6/test/url.children new file mode 100644 index 00000000..438d41a0 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/url.children @@ -0,0 +1,11 @@ +./ +./../ +../foo.html +.../foo.html +/foo.html +#top +index.html +test.htm +/top/README +next/foo.html +.//relative.html diff --git a/debian/htdig/htdig-3.2.0b6/test/url.output b/debian/htdig/htdig-3.2.0b6/test/url.output new file mode 100644 index 00000000..7b30c4cb --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/url.output @@ -0,0 +1,8913 @@ +Parent: http://www.williams.edu:803/ +(http://www.williams.edu:803/) +service = http +user = +host = www.williams.edu +port = 803 +path = / +url = http://www.williams.edu:803/ + +Children: + +Child: ./ +service = http +user = +host = www.williams.edu +port = 803 +path = / +url = http://www.williams.edu:803/ + +Child: ./../ +service = http +user = +host = www.williams.edu +port = 803 +path = / +url = http://www.williams.edu:803/ + +Child: ../foo.html +service = http +user = +host = www.williams.edu +port = 803 +path = /foo.html +url = http://www.williams.edu:803/foo.html + +Child: .../foo.html +service = http +user = +host = www.williams.edu +port = 803 +path = /.../foo.html +url = http://www.williams.edu:803/.../foo.html + +Child: /foo.html +service = http +user = +host = www.williams.edu +port = 803 +path = /foo.html +url = http://www.williams.edu:803/foo.html + +Child: #top +service = http +user = +host = www.williams.edu +port = 803 +path = / +url = http://www.williams.edu:803/ + +Child: index.html +service = http +user = +host = www.williams.edu +port = 803 +path = / +url = http://www.williams.edu:803/ + +Child: test.htm +service = http +user = +host = www.williams.edu +port = 803 +path = /test.htm +url = http://www.williams.edu:803/test.htm + +Child: /top/README +service = http +user = +host = www.williams.edu +port = 803 +path = /top/README +url = http://www.williams.edu:803/top/README + +Child: next/foo.html +service = http +user = +host = www.williams.edu +port = 803 +path = /next/foo.html +url = http://www.williams.edu:803/next/foo.html + +Child: .//relative.html +service = http +user = +host = www.williams.edu +port = 803 +path = /relative.html +url = http://www.williams.edu:803/relative.html + +Parent: http://wso.williams.edu/ +(http://wso.williams.edu:80/) +service = http +user = +host = wso.williams.edu +port = 80 +path = / +url = http://wso.williams.edu/ + +Children: + +Child: ./ +service = http +user = +host = wso.williams.edu +port = 80 +path = / +url = http://wso.williams.edu/ + +Child: ./../ +service = http +user = +host = wso.williams.edu +port = 80 +path = / +url = http://wso.williams.edu/ + +Child: ../foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /foo.html +url = http://wso.williams.edu/foo.html + +Child: .../foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /.../foo.html +url = http://wso.williams.edu/.../foo.html + +Child: /foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /foo.html +url = http://wso.williams.edu/foo.html + +Child: #top +service = http +user = +host = wso.williams.edu +port = 80 +path = / +url = http://wso.williams.edu/ + +Child: index.html +service = http +user = +host = wso.williams.edu +port = 80 +path = / +url = http://wso.williams.edu/ + +Child: test.htm +service = http +user = +host = wso.williams.edu +port = 80 +path = /test.htm +url = http://wso.williams.edu/test.htm + +Child: /top/README +service = http +user = +host = wso.williams.edu +port = 80 +path = /top/README +url = http://wso.williams.edu/top/README + +Child: next/foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /next/foo.html +url = http://wso.williams.edu/next/foo.html + +Child: .//relative.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /relative.html +url = http://wso.williams.edu/relative.html + +Parent: https://web.horde.org/williams/ +(https://web.horde.org:443/) +service = https +user = +host = web.horde.org +port = 443 +path = /williams/ +url = https://web.horde.org/williams/ + +Children: + +Child: ./ +service = https +user = +host = web.horde.org +port = 443 +path = /williams/ +url = https://web.horde.org/williams/ + +Child: ./../ +service = https +user = +host = web.horde.org +port = 443 +path = / +url = https://web.horde.org/ + +Child: ../foo.html +service = https +user = +host = web.horde.org +port = 443 +path = /foo.html +url = https://web.horde.org/foo.html + +Child: .../foo.html +service = https +user = +host = web.horde.org +port = 443 +path = /williams/.../foo.html +url = https://web.horde.org/williams/.../foo.html + +Child: /foo.html +service = https +user = +host = web.horde.org +port = 443 +path = /foo.html +url = https://web.horde.org/foo.html + +Child: #top +service = https +user = +host = web.horde.org +port = 443 +path = /williams/ +url = https://web.horde.org/williams/ + +Child: index.html +service = https +user = +host = web.horde.org +port = 443 +path = /williams/ +url = https://web.horde.org/williams/ + +Child: test.htm +service = https +user = +host = web.horde.org +port = 443 +path = /williams/test.htm +url = https://web.horde.org/williams/test.htm + +Child: /top/README +service = https +user = +host = web.horde.org +port = 443 +path = /top/README +url = https://web.horde.org/top/README + +Child: next/foo.html +service = https +user = +host = web.horde.org +port = 443 +path = /williams/next/foo.html +url = https://web.horde.org/williams/next/foo.html + +Child: .//relative.html +service = https +user = +host = web.horde.org +port = 443 +path = /williams/relative.html +url = https://web.horde.org/williams/relative.html + +Parent: http://www.williams.edu/Administration/index.html +(http://www.williams.edu:80/) +service = http +user = +host = www.williams.edu +port = 80 +path = /Administration/ +url = http://www.williams.edu/Administration/ + +Children: + +Child: ./ +service = http +user = +host = www.williams.edu +port = 80 +path = /Administration/ +url = http://www.williams.edu/Administration/ + +Child: ./../ +service = http +user = +host = www.williams.edu +port = 80 +path = / +url = http://www.williams.edu/ + +Child: ../foo.html +service = http +user = +host = www.williams.edu +port = 80 +path = /foo.html +url = http://www.williams.edu/foo.html + +Child: .../foo.html +service = http +user = +host = www.williams.edu +port = 80 +path = /Administration/.../foo.html +url = http://www.williams.edu/Administration/.../foo.html + +Child: /foo.html +service = http +user = +host = www.williams.edu +port = 80 +path = /foo.html +url = http://www.williams.edu/foo.html + +Child: #top +service = http +user = +host = www.williams.edu +port = 80 +path = /Administration/ +url = http://www.williams.edu/Administration/ + +Child: index.html +service = http +user = +host = www.williams.edu +port = 80 +path = /Administration/ +url = http://www.williams.edu/Administration/ + +Child: test.htm +service = http +user = +host = www.williams.edu +port = 80 +path = /Administration/test.htm +url = http://www.williams.edu/Administration/test.htm + +Child: /top/README +service = http +user = +host = www.williams.edu +port = 80 +path = /top/README +url = http://www.williams.edu/top/README + +Child: next/foo.html +service = http +user = +host = www.williams.edu +port = 80 +path = /Administration/next/foo.html +url = http://www.williams.edu/Administration/next/foo.html + +Child: .//relative.html +service = http +user = +host = www.williams.edu +port = 80 +path = /Administration/relative.html +url = http://www.williams.edu/Administration/relative.html + +Parent: ftp://[email protected]/mail/index +(ftp://[email protected]:21/) +service = ftp +user = ghutchis +host = wso.williams.edu +port = 21 +path = /mail/index +url = ftp://[email protected]/mail/index + +Children: + +Child: ./ +service = ftp +user = ghutchis +host = wso.williams.edu +port = 21 +path = /mail/ +url = ftp://[email protected]/mail/ + +Child: ./../ +service = ftp +user = ghutchis +host = wso.williams.edu +port = 21 +path = / +url = ftp://[email protected]/ + +Child: ../foo.html +service = ftp +user = ghutchis +host = wso.williams.edu +port = 21 +path = /foo.html +url = ftp://[email protected]/foo.html + +Child: .../foo.html +service = ftp +user = ghutchis +host = wso.williams.edu +port = 21 +path = /mail/.../foo.html +url = ftp://[email protected]/mail/.../foo.html + +Child: /foo.html +service = ftp +user = ghutchis +host = wso.williams.edu +port = 21 +path = /foo.html +url = ftp://[email protected]/foo.html + +Child: #top +service = ftp +user = ghutchis +host = wso.williams.edu +port = 21 +path = /mail/index +url = ftp://[email protected]/mail/index + +Child: index.html +service = ftp +user = ghutchis +host = wso.williams.edu +port = 21 +path = /mail/index.html +url = ftp://[email protected]/mail/index.html + +Child: test.htm +service = ftp +user = ghutchis +host = wso.williams.edu +port = 21 +path = /mail/test.htm +url = ftp://[email protected]/mail/test.htm + +Child: /top/README +service = ftp +user = ghutchis +host = wso.williams.edu +port = 21 +path = /top/README +url = ftp://[email protected]/top/README + +Child: next/foo.html +service = ftp +user = ghutchis +host = wso.williams.edu +port = 21 +path = /mail/next/foo.html +url = ftp://[email protected]/mail/next/foo.html + +Child: .//relative.html +service = ftp +user = ghutchis +host = wso.williams.edu +port = 21 +path = /mail/relative.html +url = ftp://[email protected]/mail/relative.html + +Parent: http://wso.williams.edu/cgi-bin/BBS/bbs_forum.cgi?forum=open +(http://wso.williams.edu:80/) +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/BBS/bbs_forum.cgi?forum=open +url = http://wso.williams.edu/cgi-bin/BBS/bbs_forum.cgi?forum=open + +Children: + +Child: ./ +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/BBS/ +url = http://wso.williams.edu/cgi-bin/BBS/ + +Child: ./../ +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/ +url = http://wso.williams.edu/cgi-bin/ + +Child: ../foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/foo.html +url = http://wso.williams.edu/cgi-bin/foo.html + +Child: .../foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/BBS/.../foo.html +url = http://wso.williams.edu/cgi-bin/BBS/.../foo.html + +Child: /foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /foo.html +url = http://wso.williams.edu/foo.html + +Child: #top +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/BBS/bbs_forum.cgi?forum=open +url = http://wso.williams.edu/cgi-bin/BBS/bbs_forum.cgi?forum=open + +Child: index.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/BBS/ +url = http://wso.williams.edu/cgi-bin/BBS/ + +Child: test.htm +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/BBS/test.htm +url = http://wso.williams.edu/cgi-bin/BBS/test.htm + +Child: /top/README +service = http +user = +host = wso.williams.edu +port = 80 +path = /top/README +url = http://wso.williams.edu/top/README + +Child: next/foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/BBS/next/foo.html +url = http://wso.williams.edu/cgi-bin/BBS/next/foo.html + +Child: .//relative.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/BBS/relative.html +url = http://wso.williams.edu/cgi-bin/BBS/relative.html + +Parent: http://wso/~ghutchis/bookmarks.html#mac +(http://wso:80/) +service = http +user = +host = wso +port = 80 +path = /~ghutchis/bookmarks.html +url = http://wso/~ghutchis/bookmarks.html + +Children: + +Child: ./ +service = http +user = +host = wso +port = 80 +path = /~ghutchis/ +url = http://wso/~ghutchis/ + +Child: ./../ +service = http +user = +host = wso +port = 80 +path = / +url = http://wso/ + +Child: ../foo.html +service = http +user = +host = wso +port = 80 +path = /foo.html +url = http://wso/foo.html + +Child: .../foo.html +service = http +user = +host = wso +port = 80 +path = /~ghutchis/.../foo.html +url = http://wso/~ghutchis/.../foo.html + +Child: /foo.html +service = http +user = +host = wso +port = 80 +path = /foo.html +url = http://wso/foo.html + +Child: #top +service = http +user = +host = wso +port = 80 +path = /~ghutchis/bookmarks.html +url = http://wso/~ghutchis/bookmarks.html + +Child: index.html +service = http +user = +host = wso +port = 80 +path = /~ghutchis/ +url = http://wso/~ghutchis/ + +Child: test.htm +service = http +user = +host = wso +port = 80 +path = /~ghutchis/test.htm +url = http://wso/~ghutchis/test.htm + +Child: /top/README +service = http +user = +host = wso +port = 80 +path = /top/README +url = http://wso/top/README + +Child: next/foo.html +service = http +user = +host = wso +port = 80 +path = /~ghutchis/next/foo.html +url = http://wso/~ghutchis/next/foo.html + +Child: .//relative.html +service = http +user = +host = wso +port = 80 +path = /~ghutchis/relative.html +url = http://wso/~ghutchis/relative.html + +Parent: file:///opt/htdig/maindocs/index.html +(file://localhost:0/) +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/maindocs/index.html +url = file:///opt/htdig/maindocs/index.html + +Children: + +Child: ./ +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/maindocs/ +url = file:///opt/htdig/maindocs/ + +Child: ./../ +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/ +url = file:///opt/htdig/ + +Child: ../foo.html +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/foo.html +url = file:///opt/htdig/foo.html + +Child: .../foo.html +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/maindocs/.../foo.html +url = file:///opt/htdig/maindocs/.../foo.html + +Child: /foo.html +service = file +user = +host = localhost +port = 0 +path = /foo.html +url = file:///foo.html + +Child: #top +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/maindocs/index.html +url = file:///opt/htdig/maindocs/index.html + +Child: index.html +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/maindocs/index.html +url = file:///opt/htdig/maindocs/index.html + +Child: test.htm +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/maindocs/test.htm +url = file:///opt/htdig/maindocs/test.htm + +Child: /top/README +service = file +user = +host = localhost +port = 0 +path = /top/README +url = file:///top/README + +Child: next/foo.html +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/maindocs/next/foo.html +url = file:///opt/htdig/maindocs/next/foo.html + +Child: .//relative.html +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/maindocs/relative.html +url = file:///opt/htdig/maindocs/relative.html + +Parent: http://www.htdig.org/This/Is/A/test.html?dowepass +(http://www.htdig.org:80/) +service = http +user = +host = www.htdig.org +port = 80 +path = /This/Is/A/test.html?dowepass +url = http://www.htdig.org/This/Is/A/test.html?dowepass + +Children: + +Child: ./ +service = http +user = +host = www.htdig.org +port = 80 +path = /This/Is/A/ +url = http://www.htdig.org/This/Is/A/ + +Child: ./../ +service = http +user = +host = www.htdig.org +port = 80 +path = /This/Is/ +url = http://www.htdig.org/This/Is/ + +Child: ../foo.html +service = http +user = +host = www.htdig.org +port = 80 +path = /This/Is/foo.html +url = http://www.htdig.org/This/Is/foo.html + +Child: .../foo.html +service = http +user = +host = www.htdig.org +port = 80 +path = /This/Is/A/.../foo.html +url = http://www.htdig.org/This/Is/A/.../foo.html + +Child: /foo.html +service = http +user = +host = www.htdig.org +port = 80 +path = /foo.html +url = http://www.htdig.org/foo.html + +Child: #top +service = http +user = +host = www.htdig.org +port = 80 +path = /This/Is/A/test.html?dowepass +url = http://www.htdig.org/This/Is/A/test.html?dowepass + +Child: index.html +service = http +user = +host = www.htdig.org +port = 80 +path = /This/Is/A/ +url = http://www.htdig.org/This/Is/A/ + +Child: test.htm +service = http +user = +host = www.htdig.org +port = 80 +path = /This/Is/A/test.htm +url = http://www.htdig.org/This/Is/A/test.htm + +Child: /top/README +service = http +user = +host = www.htdig.org +port = 80 +path = /top/README +url = http://www.htdig.org/top/README + +Child: next/foo.html +service = http +user = +host = www.htdig.org +port = 80 +path = /This/Is/A/next/foo.html +url = http://www.htdig.org/This/Is/A/next/foo.html + +Child: .//relative.html +service = http +user = +host = www.htdig.org +port = 80 +path = /This/Is/A/relative.html +url = http://www.htdig.org/This/Is/A/relative.html + +Parent: http://localhost/index.asp?date=11/21/index.asp?date=12/1/98 +(http://localhost:80/) +service = http +user = +host = localhost +port = 80 +path = /index.asp?date=11/21/index.asp?date=12/1/98 +url = http://localhost/index.asp?date=11/21/index.asp?date=12/1/98 + +Children: + +Child: ./ +service = http +user = +host = localhost +port = 80 +path = / +url = http://localhost/ + +Child: ./../ +service = http +user = +host = localhost +port = 80 +path = / +url = http://localhost/ + +Child: ../foo.html +service = http +user = +host = localhost +port = 80 +path = /foo.html +url = http://localhost/foo.html + +Child: .../foo.html +service = http +user = +host = localhost +port = 80 +path = /.../foo.html +url = http://localhost/.../foo.html + +Child: /foo.html +service = http +user = +host = localhost +port = 80 +path = /foo.html +url = http://localhost/foo.html + +Child: #top +service = http +user = +host = localhost +port = 80 +path = /index.asp?date=11/21/index.asp?date=12/1/98 +url = http://localhost/index.asp?date=11/21/index.asp?date=12/1/98 + +Child: index.html +service = http +user = +host = localhost +port = 80 +path = / +url = http://localhost/ + +Child: test.htm +service = http +user = +host = localhost +port = 80 +path = /test.htm +url = http://localhost/test.htm + +Child: /top/README +service = http +user = +host = localhost +port = 80 +path = /top/README +url = http://localhost/top/README + +Child: next/foo.html +service = http +user = +host = localhost +port = 80 +path = /next/foo.html +url = http://localhost/next/foo.html + +Child: .//relative.html +service = http +user = +host = localhost +port = 80 +path = /relative.html +url = http://localhost/relative.html + +Parent: http://www.test.com/cgi-bin/test.cgi?http://this.com/url +(http://www.test.com:80/) +service = http +user = +host = www.test.com +port = 80 +path = /cgi-bin/test.cgi?http://this.com/url +url = http://www.test.com/cgi-bin/test.cgi?http://this.com/url + +Children: + +Child: ./ +service = http +user = +host = www.test.com +port = 80 +path = /cgi-bin/ +url = http://www.test.com/cgi-bin/ + +Child: ./../ +service = http +user = +host = www.test.com +port = 80 +path = / +url = http://www.test.com/ + +Child: ../foo.html +service = http +user = +host = www.test.com +port = 80 +path = /foo.html +url = http://www.test.com/foo.html + +Child: .../foo.html +service = http +user = +host = www.test.com +port = 80 +path = /cgi-bin/.../foo.html +url = http://www.test.com/cgi-bin/.../foo.html + +Child: /foo.html +service = http +user = +host = www.test.com +port = 80 +path = /foo.html +url = http://www.test.com/foo.html + +Child: #top +service = http +user = +host = www.test.com +port = 80 +path = /cgi-bin/test.cgi?http://this.com/url +url = http://www.test.com/cgi-bin/test.cgi?http://this.com/url + +Child: index.html +service = http +user = +host = www.test.com +port = 80 +path = /cgi-bin/ +url = http://www.test.com/cgi-bin/ + +Child: test.htm +service = http +user = +host = www.test.com +port = 80 +path = /cgi-bin/test.htm +url = http://www.test.com/cgi-bin/test.htm + +Child: /top/README +service = http +user = +host = www.test.com +port = 80 +path = /top/README +url = http://www.test.com/top/README + +Child: next/foo.html +service = http +user = +host = www.test.com +port = 80 +path = /cgi-bin/next/foo.html +url = http://www.test.com/cgi-bin/next/foo.html + +Child: .//relative.html +service = http +user = +host = www.test.com +port = 80 +path = /cgi-bin/relative.html +url = http://www.test.com/cgi-bin/relative.html + +Parent: http://wso.williams.edu/%7Eghutchis/index.html +(http://wso.williams.edu:80/) +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/ +url = http://wso.williams.edu/~ghutchis/ + +Children: + +Child: ./ +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/ +url = http://wso.williams.edu/~ghutchis/ + +Child: ./../ +service = http +user = +host = wso.williams.edu +port = 80 +path = / +url = http://wso.williams.edu/ + +Child: ../foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /foo.html +url = http://wso.williams.edu/foo.html + +Child: .../foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/.../foo.html +url = http://wso.williams.edu/~ghutchis/.../foo.html + +Child: /foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /foo.html +url = http://wso.williams.edu/foo.html + +Child: #top +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/ +url = http://wso.williams.edu/~ghutchis/ + +Child: index.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/ +url = http://wso.williams.edu/~ghutchis/ + +Child: test.htm +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/test.htm +url = http://wso.williams.edu/~ghutchis/test.htm + +Child: /top/README +service = http +user = +host = wso.williams.edu +port = 80 +path = /top/README +url = http://wso.williams.edu/top/README + +Child: next/foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/next/foo.html +url = http://wso.williams.edu/~ghutchis/next/foo.html + +Child: .//relative.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/relative.html +url = http://wso.williams.edu/~ghutchis/relative.html + +Parent: http://wso.williams.edu/~ghutchis/ +(http://wso.williams.edu:80/) +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/ +url = http://wso.williams.edu/~ghutchis/ + +Children: + +Child: ./ +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/ +url = http://wso.williams.edu/~ghutchis/ + +Child: ./../ +service = http +user = +host = wso.williams.edu +port = 80 +path = / +url = http://wso.williams.edu/ + +Child: ../foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /foo.html +url = http://wso.williams.edu/foo.html + +Child: .../foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/.../foo.html +url = http://wso.williams.edu/~ghutchis/.../foo.html + +Child: /foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /foo.html +url = http://wso.williams.edu/foo.html + +Child: #top +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/ +url = http://wso.williams.edu/~ghutchis/ + +Child: index.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/ +url = http://wso.williams.edu/~ghutchis/ + +Child: test.htm +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/test.htm +url = http://wso.williams.edu/~ghutchis/test.htm + +Child: /top/README +service = http +user = +host = wso.williams.edu +port = 80 +path = /top/README +url = http://wso.williams.edu/top/README + +Child: next/foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/next/foo.html +url = http://wso.williams.edu/~ghutchis/next/foo.html + +Child: .//relative.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/relative.html +url = http://wso.williams.edu/~ghutchis/relative.html + +Parent: http://wso.williams.edu/~ghutchis/index.shtml +(http://wso.williams.edu:80/) +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/index.shtml +url = http://wso.williams.edu/~ghutchis/index.shtml + +Children: + +Child: ./ +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/ +url = http://wso.williams.edu/~ghutchis/ + +Child: ./../ +service = http +user = +host = wso.williams.edu +port = 80 +path = / +url = http://wso.williams.edu/ + +Child: ../foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /foo.html +url = http://wso.williams.edu/foo.html + +Child: .../foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/.../foo.html +url = http://wso.williams.edu/~ghutchis/.../foo.html + +Child: /foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /foo.html +url = http://wso.williams.edu/foo.html + +Child: #top +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/index.shtml +url = http://wso.williams.edu/~ghutchis/index.shtml + +Child: index.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/ +url = http://wso.williams.edu/~ghutchis/ + +Child: test.htm +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/test.htm +url = http://wso.williams.edu/~ghutchis/test.htm + +Child: /top/README +service = http +user = +host = wso.williams.edu +port = 80 +path = /top/README +url = http://wso.williams.edu/top/README + +Child: next/foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/next/foo.html +url = http://wso.williams.edu/~ghutchis/next/foo.html + +Child: .//relative.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/relative.html +url = http://wso.williams.edu/~ghutchis/relative.html + +Parent: http://wso.williams.edu//ghutchis///test/index.html +(http://wso.williams.edu:80/) +service = http +user = +host = wso.williams.edu +port = 80 +path = /ghutchis/test/ +url = http://wso.williams.edu/ghutchis/test/ + +Children: + +Child: ./ +service = http +user = +host = wso.williams.edu +port = 80 +path = /ghutchis/test/ +url = http://wso.williams.edu/ghutchis/test/ + +Child: ./../ +service = http +user = +host = wso.williams.edu +port = 80 +path = /ghutchis/ +url = http://wso.williams.edu/ghutchis/ + +Child: ../foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /ghutchis/foo.html +url = http://wso.williams.edu/ghutchis/foo.html + +Child: .../foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /ghutchis/test/.../foo.html +url = http://wso.williams.edu/ghutchis/test/.../foo.html + +Child: /foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /foo.html +url = http://wso.williams.edu/foo.html + +Child: #top +service = http +user = +host = wso.williams.edu +port = 80 +path = /ghutchis/test/ +url = http://wso.williams.edu/ghutchis/test/ + +Child: index.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /ghutchis/test/ +url = http://wso.williams.edu/ghutchis/test/ + +Child: test.htm +service = http +user = +host = wso.williams.edu +port = 80 +path = /ghutchis/test/test.htm +url = http://wso.williams.edu/ghutchis/test/test.htm + +Child: /top/README +service = http +user = +host = wso.williams.edu +port = 80 +path = /top/README +url = http://wso.williams.edu/top/README + +Child: next/foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /ghutchis/test/next/foo.html +url = http://wso.williams.edu/ghutchis/test/next/foo.html + +Child: .//relative.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /ghutchis/test/relative.html +url = http://wso.williams.edu/ghutchis/test/relative.html + +Parent: http://wso.williams.edu/./ghutchis/../orgs/life.html +(http://wso.williams.edu:80/) +service = http +user = +host = wso.williams.edu +port = 80 +path = /orgs/life.html +url = http://wso.williams.edu/orgs/life.html + +Children: + +Child: ./ +service = http +user = +host = wso.williams.edu +port = 80 +path = /orgs/ +url = http://wso.williams.edu/orgs/ + +Child: ./../ +service = http +user = +host = wso.williams.edu +port = 80 +path = / +url = http://wso.williams.edu/ + +Child: ../foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /foo.html +url = http://wso.williams.edu/foo.html + +Child: .../foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /orgs/.../foo.html +url = http://wso.williams.edu/orgs/.../foo.html + +Child: /foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /foo.html +url = http://wso.williams.edu/foo.html + +Child: #top +service = http +user = +host = wso.williams.edu +port = 80 +path = /orgs/life.html +url = http://wso.williams.edu/orgs/life.html + +Child: index.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /orgs/ +url = http://wso.williams.edu/orgs/ + +Child: test.htm +service = http +user = +host = wso.williams.edu +port = 80 +path = /orgs/test.htm +url = http://wso.williams.edu/orgs/test.htm + +Child: /top/README +service = http +user = +host = wso.williams.edu +port = 80 +path = /top/README +url = http://wso.williams.edu/top/README + +Child: next/foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /orgs/next/foo.html +url = http://wso.williams.edu/orgs/next/foo.html + +Child: .//relative.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /orgs/relative.html +url = http://wso.williams.edu/orgs/relative.html + +Parent: http://www.example.com/blank_news.shtml?pages/1.i +(http://www.example.com:80/) +service = http +user = +host = www.example.com +port = 80 +path = /blank_news.shtml?pages/1.i +url = http://www.example.com/blank_news.shtml?pages/1.i + +Children: + +Child: ./ +service = http +user = +host = www.example.com +port = 80 +path = / +url = http://www.example.com/ + +Child: ./../ +service = http +user = +host = www.example.com +port = 80 +path = / +url = http://www.example.com/ + +Child: ../foo.html +service = http +user = +host = www.example.com +port = 80 +path = /foo.html +url = http://www.example.com/foo.html + +Child: .../foo.html +service = http +user = +host = www.example.com +port = 80 +path = /.../foo.html +url = http://www.example.com/.../foo.html + +Child: /foo.html +service = http +user = +host = www.example.com +port = 80 +path = /foo.html +url = http://www.example.com/foo.html + +Child: #top +service = http +user = +host = www.example.com +port = 80 +path = /blank_news.shtml?pages/1.i +url = http://www.example.com/blank_news.shtml?pages/1.i + +Child: index.html +service = http +user = +host = www.example.com +port = 80 +path = / +url = http://www.example.com/ + +Child: test.htm +service = http +user = +host = www.example.com +port = 80 +path = /test.htm +url = http://www.example.com/test.htm + +Child: /top/README +service = http +user = +host = www.example.com +port = 80 +path = /top/README +url = http://www.example.com/top/README + +Child: next/foo.html +service = http +user = +host = www.example.com +port = 80 +path = /next/foo.html +url = http://www.example.com/next/foo.html + +Child: .//relative.html +service = http +user = +host = www.example.com +port = 80 +path = /relative.html +url = http://www.example.com/relative.html + +Parent: http://www.williams.edu:803/Admin/Depts/.test/.news/Index.Html +(http://www.williams.edu:803/) +service = http +user = +host = www.williams.edu +port = 803 +path = /Admin/Depts/.test/.news/ +url = http://www.williams.edu:803/Admin/Depts/.test/.news/ + +Children: + +Child: ./ +service = http +user = +host = www.williams.edu +port = 803 +path = /Admin/Depts/.test/.news/ +url = http://www.williams.edu:803/Admin/Depts/.test/.news/ + +Child: ./../ +service = http +user = +host = www.williams.edu +port = 803 +path = /Admin/Depts/.test/ +url = http://www.williams.edu:803/Admin/Depts/.test/ + +Child: ../foo.html +service = http +user = +host = www.williams.edu +port = 803 +path = /Admin/Depts/.test/foo.html +url = http://www.williams.edu:803/Admin/Depts/.test/foo.html + +Child: .../foo.html +service = http +user = +host = www.williams.edu +port = 803 +path = /Admin/Depts/.test/.news/.../foo.html +url = http://www.williams.edu:803/Admin/Depts/.test/.news/.../foo.html + +Child: /foo.html +service = http +user = +host = www.williams.edu +port = 803 +path = /foo.html +url = http://www.williams.edu:803/foo.html + +Child: #top +service = http +user = +host = www.williams.edu +port = 803 +path = /Admin/Depts/.test/.news/ +url = http://www.williams.edu:803/Admin/Depts/.test/.news/ + +Child: index.html +service = http +user = +host = www.williams.edu +port = 803 +path = /Admin/Depts/.test/.news/ +url = http://www.williams.edu:803/Admin/Depts/.test/.news/ + +Child: test.htm +service = http +user = +host = www.williams.edu +port = 803 +path = /Admin/Depts/.test/.news/test.htm +url = http://www.williams.edu:803/Admin/Depts/.test/.news/test.htm + +Child: /top/README +service = http +user = +host = www.williams.edu +port = 803 +path = /top/README +url = http://www.williams.edu:803/top/README + +Child: next/foo.html +service = http +user = +host = www.williams.edu +port = 803 +path = /Admin/Depts/.test/.news/next/foo.html +url = http://www.williams.edu:803/Admin/Depts/.test/.news/next/foo.html + +Child: .//relative.html +service = http +user = +host = www.williams.edu +port = 803 +path = /Admin/Depts/.test/.news/relative.html +url = http://www.williams.edu:803/Admin/Depts/.test/.news/relative.html + +Parent: http://test.com/test/ +(http://test.com:80/) +service = http +user = +host = test.com +port = 80 +path = /test/ +url = http://test.com/test/ + +Children: + +Child: ./ +service = http +user = +host = test.com +port = 80 +path = /test/ +url = http://test.com/test/ + +Child: ./../ +service = http +user = +host = test.com +port = 80 +path = / +url = http://test.com/ + +Child: ../foo.html +service = http +user = +host = test.com +port = 80 +path = /foo.html +url = http://test.com/foo.html + +Child: .../foo.html +service = http +user = +host = test.com +port = 80 +path = /test/.../foo.html +url = http://test.com/test/.../foo.html + +Child: /foo.html +service = http +user = +host = test.com +port = 80 +path = /foo.html +url = http://test.com/foo.html + +Child: #top +service = http +user = +host = test.com +port = 80 +path = /test/ +url = http://test.com/test/ + +Child: index.html +service = http +user = +host = test.com +port = 80 +path = /test/ +url = http://test.com/test/ + +Child: test.htm +service = http +user = +host = test.com +port = 80 +path = /test/test.htm +url = http://test.com/test/test.htm + +Child: /top/README +service = http +user = +host = test.com +port = 80 +path = /top/README +url = http://test.com/top/README + +Child: next/foo.html +service = http +user = +host = test.com +port = 80 +path = /test/next/foo.html +url = http://test.com/test/next/foo.html + +Child: .//relative.html +service = http +user = +host = test.com +port = 80 +path = /test/relative.html +url = http://test.com/test/relative.html + +Parent: http://test.com/opt/test/ +(http://test.com:80/) +service = http +user = +host = test.com +port = 80 +path = /opt/test/ +url = http://test.com/opt/test/ + +Children: + +Child: ./ +service = http +user = +host = test.com +port = 80 +path = /opt/test/ +url = http://test.com/opt/test/ + +Child: ./../ +service = http +user = +host = test.com +port = 80 +path = /opt/ +url = http://test.com/opt/ + +Child: ../foo.html +service = http +user = +host = test.com +port = 80 +path = /opt/foo.html +url = http://test.com/opt/foo.html + +Child: .../foo.html +service = http +user = +host = test.com +port = 80 +path = /opt/test/.../foo.html +url = http://test.com/opt/test/.../foo.html + +Child: /foo.html +service = http +user = +host = test.com +port = 80 +path = /foo.html +url = http://test.com/foo.html + +Child: #top +service = http +user = +host = test.com +port = 80 +path = /opt/test/ +url = http://test.com/opt/test/ + +Child: index.html +service = http +user = +host = test.com +port = 80 +path = /opt/test/ +url = http://test.com/opt/test/ + +Child: test.htm +service = http +user = +host = test.com +port = 80 +path = /opt/test/test.htm +url = http://test.com/opt/test/test.htm + +Child: /top/README +service = http +user = +host = test.com +port = 80 +path = /top/README +url = http://test.com/top/README + +Child: next/foo.html +service = http +user = +host = test.com +port = 80 +path = /opt/test/next/foo.html +url = http://test.com/opt/test/next/foo.html + +Child: .//relative.html +service = http +user = +host = test.com +port = 80 +path = /opt/test/relative.html +url = http://test.com/opt/test/relative.html + +Parent: http://test.com/./opt/../test/ +(http://test.com:80/) +service = http +user = +host = test.com +port = 80 +path = /test/ +url = http://test.com/test/ + +Children: + +Child: ./ +service = http +user = +host = test.com +port = 80 +path = /test/ +url = http://test.com/test/ + +Child: ./../ +service = http +user = +host = test.com +port = 80 +path = / +url = http://test.com/ + +Child: ../foo.html +service = http +user = +host = test.com +port = 80 +path = /foo.html +url = http://test.com/foo.html + +Child: .../foo.html +service = http +user = +host = test.com +port = 80 +path = /test/.../foo.html +url = http://test.com/test/.../foo.html + +Child: /foo.html +service = http +user = +host = test.com +port = 80 +path = /foo.html +url = http://test.com/foo.html + +Child: #top +service = http +user = +host = test.com +port = 80 +path = /test/ +url = http://test.com/test/ + +Child: index.html +service = http +user = +host = test.com +port = 80 +path = /test/ +url = http://test.com/test/ + +Child: test.htm +service = http +user = +host = test.com +port = 80 +path = /test/test.htm +url = http://test.com/test/test.htm + +Child: /top/README +service = http +user = +host = test.com +port = 80 +path = /top/README +url = http://test.com/top/README + +Child: next/foo.html +service = http +user = +host = test.com +port = 80 +path = /test/next/foo.html +url = http://test.com/test/next/foo.html + +Child: .//relative.html +service = http +user = +host = test.com +port = 80 +path = /test/relative.html +url = http://test.com/test/relative.html + +Parent: http://test.com/./././orgs/life.html +(http://test.com:80/) +service = http +user = +host = test.com +port = 80 +path = /orgs/life.html +url = http://test.com/orgs/life.html + +Children: + +Child: ./ +service = http +user = +host = test.com +port = 80 +path = /orgs/ +url = http://test.com/orgs/ + +Child: ./../ +service = http +user = +host = test.com +port = 80 +path = / +url = http://test.com/ + +Child: ../foo.html +service = http +user = +host = test.com +port = 80 +path = /foo.html +url = http://test.com/foo.html + +Child: .../foo.html +service = http +user = +host = test.com +port = 80 +path = /orgs/.../foo.html +url = http://test.com/orgs/.../foo.html + +Child: /foo.html +service = http +user = +host = test.com +port = 80 +path = /foo.html +url = http://test.com/foo.html + +Child: #top +service = http +user = +host = test.com +port = 80 +path = /orgs/life.html +url = http://test.com/orgs/life.html + +Child: index.html +service = http +user = +host = test.com +port = 80 +path = /orgs/ +url = http://test.com/orgs/ + +Child: test.htm +service = http +user = +host = test.com +port = 80 +path = /orgs/test.htm +url = http://test.com/orgs/test.htm + +Child: /top/README +service = http +user = +host = test.com +port = 80 +path = /top/README +url = http://test.com/top/README + +Child: next/foo.html +service = http +user = +host = test.com +port = 80 +path = /orgs/next/foo.html +url = http://test.com/orgs/next/foo.html + +Child: .//relative.html +service = http +user = +host = test.com +port = 80 +path = /orgs/relative.html +url = http://test.com/orgs/relative.html + +Parent: http://test.com/./../../orgs/life.html +(http://test.com:80/) +service = http +user = +host = test.com +port = 80 +path = /orgs/life.html +url = http://test.com/orgs/life.html + +Children: + +Child: ./ +service = http +user = +host = test.com +port = 80 +path = /orgs/ +url = http://test.com/orgs/ + +Child: ./../ +service = http +user = +host = test.com +port = 80 +path = / +url = http://test.com/ + +Child: ../foo.html +service = http +user = +host = test.com +port = 80 +path = /foo.html +url = http://test.com/foo.html + +Child: .../foo.html +service = http +user = +host = test.com +port = 80 +path = /orgs/.../foo.html +url = http://test.com/orgs/.../foo.html + +Child: /foo.html +service = http +user = +host = test.com +port = 80 +path = /foo.html +url = http://test.com/foo.html + +Child: #top +service = http +user = +host = test.com +port = 80 +path = /orgs/life.html +url = http://test.com/orgs/life.html + +Child: index.html +service = http +user = +host = test.com +port = 80 +path = /orgs/ +url = http://test.com/orgs/ + +Child: test.htm +service = http +user = +host = test.com +port = 80 +path = /orgs/test.htm +url = http://test.com/orgs/test.htm + +Child: /top/README +service = http +user = +host = test.com +port = 80 +path = /top/README +url = http://test.com/top/README + +Child: next/foo.html +service = http +user = +host = test.com +port = 80 +path = /orgs/next/foo.html +url = http://test.com/orgs/next/foo.html + +Child: .//relative.html +service = http +user = +host = test.com +port = 80 +path = /orgs/relative.html +url = http://test.com/orgs/relative.html + +Parent: http://test.com/blank_news.shtml?pages/3.i +(http://test.com:80/) +service = http +user = +host = test.com +port = 80 +path = /blank_news.shtml?pages/3.i +url = http://test.com/blank_news.shtml?pages/3.i + +Children: + +Child: ./ +service = http +user = +host = test.com +port = 80 +path = / +url = http://test.com/ + +Child: ./../ +service = http +user = +host = test.com +port = 80 +path = / +url = http://test.com/ + +Child: ../foo.html +service = http +user = +host = test.com +port = 80 +path = /foo.html +url = http://test.com/foo.html + +Child: .../foo.html +service = http +user = +host = test.com +port = 80 +path = /.../foo.html +url = http://test.com/.../foo.html + +Child: /foo.html +service = http +user = +host = test.com +port = 80 +path = /foo.html +url = http://test.com/foo.html + +Child: #top +service = http +user = +host = test.com +port = 80 +path = /blank_news.shtml?pages/3.i +url = http://test.com/blank_news.shtml?pages/3.i + +Child: index.html +service = http +user = +host = test.com +port = 80 +path = / +url = http://test.com/ + +Child: test.htm +service = http +user = +host = test.com +port = 80 +path = /test.htm +url = http://test.com/test.htm + +Child: /top/README +service = http +user = +host = test.com +port = 80 +path = /top/README +url = http://test.com/top/README + +Child: next/foo.html +service = http +user = +host = test.com +port = 80 +path = /next/foo.html +url = http://test.com/next/foo.html + +Child: .//relative.html +service = http +user = +host = test.com +port = 80 +path = /relative.html +url = http://test.com/relative.html + +Parent: HTTP://www.Yahoo.com/ +(http://www.yahoo.com:80/) +service = http +user = +host = www.yahoo.com +port = 80 +path = / +url = http://www.yahoo.com/ + +Children: + +Child: ./ +service = http +user = +host = www.yahoo.com +port = 80 +path = / +url = http://www.yahoo.com/ + +Child: ./../ +service = http +user = +host = www.yahoo.com +port = 80 +path = / +url = http://www.yahoo.com/ + +Child: ../foo.html +service = http +user = +host = www.yahoo.com +port = 80 +path = /foo.html +url = http://www.yahoo.com/foo.html + +Child: .../foo.html +service = http +user = +host = www.yahoo.com +port = 80 +path = /.../foo.html +url = http://www.yahoo.com/.../foo.html + +Child: /foo.html +service = http +user = +host = www.yahoo.com +port = 80 +path = /foo.html +url = http://www.yahoo.com/foo.html + +Child: #top +service = http +user = +host = www.yahoo.com +port = 80 +path = / +url = http://www.yahoo.com/ + +Child: index.html +service = http +user = +host = www.yahoo.com +port = 80 +path = / +url = http://www.yahoo.com/ + +Child: test.htm +service = http +user = +host = www.yahoo.com +port = 80 +path = /test.htm +url = http://www.yahoo.com/test.htm + +Child: /top/README +service = http +user = +host = www.yahoo.com +port = 80 +path = /top/README +url = http://www.yahoo.com/top/README + +Child: next/foo.html +service = http +user = +host = www.yahoo.com +port = 80 +path = /next/foo.html +url = http://www.yahoo.com/next/foo.html + +Child: .//relative.html +service = http +user = +host = www.yahoo.com +port = 80 +path = /relative.html +url = http://www.yahoo.com/relative.html + +Parent: http://this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu/go/Departments/Chemistry/classes/102/chem_102.html +(http://this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu:80/) +service = http +user = +host = this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu +port = 80 +path = /go/Departments/Chemistry/classes/102/chem_102.html +url = http://this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu/go/Departments/Chemistry/classes/102/chem_102.html + +Children: + +Child: ./ +service = http +user = +host = this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu +port = 80 +path = /go/Departments/Chemistry/classes/102/ +url = http://this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu/go/Departments/Chemistry/classes/102/ + +Child: ./../ +service = http +user = +host = this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu +port = 80 +path = /go/Departments/Chemistry/classes/ +url = http://this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu/go/Departments/Chemistry/classes/ + +Child: ../foo.html +service = http +user = +host = this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu +port = 80 +path = /go/Departments/Chemistry/classes/foo.html +url = http://this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu/go/Departments/Chemistry/classes/foo.html + +Child: .../foo.html +service = http +user = +host = this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu +port = 80 +path = /go/Departments/Chemistry/classes/102/.../foo.html +url = http://this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu/go/Departments/Chemistry/classes/102/.../foo.html + +Child: /foo.html +service = http +user = +host = this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu +port = 80 +path = /foo.html +url = http://this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu/foo.html + +Child: #top +service = http +user = +host = this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu +port = 80 +path = /go/Departments/Chemistry/classes/102/chem_102.html +url = http://this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu/go/Departments/Chemistry/classes/102/chem_102.html + +Child: index.html +service = http +user = +host = this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu +port = 80 +path = /go/Departments/Chemistry/classes/102/ +url = http://this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu/go/Departments/Chemistry/classes/102/ + +Child: test.htm +service = http +user = +host = this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu +port = 80 +path = /go/Departments/Chemistry/classes/102/test.htm +url = http://this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu/go/Departments/Chemistry/classes/102/test.htm + +Child: /top/README +service = http +user = +host = this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu +port = 80 +path = /top/README +url = http://this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu/top/README + +Child: next/foo.html +service = http +user = +host = this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu +port = 80 +path = /go/Departments/Chemistry/classes/102/next/foo.html +url = http://this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu/go/Departments/Chemistry/classes/102/next/foo.html + +Child: .//relative.html +service = http +user = +host = this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu +port = 80 +path = /go/Departments/Chemistry/classes/102/relative.html +url = http://this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu/go/Departments/Chemistry/classes/102/relative.html + +Parent: http://this-is-a.slashdot.org/slash/elimination/test//////////////////////////././././.././././/./././.././././../../././.test/ +(http://this-is-a.slashdot.org:80/) +service = http +user = +host = this-is-a.slashdot.org +port = 80 +path = /.test/ +url = http://this-is-a.slashdot.org/.test/ + +Children: + +Child: ./ +service = http +user = +host = this-is-a.slashdot.org +port = 80 +path = /.test/ +url = http://this-is-a.slashdot.org/.test/ + +Child: ./../ +service = http +user = +host = this-is-a.slashdot.org +port = 80 +path = / +url = http://this-is-a.slashdot.org/ + +Child: ../foo.html +service = http +user = +host = this-is-a.slashdot.org +port = 80 +path = /foo.html +url = http://this-is-a.slashdot.org/foo.html + +Child: .../foo.html +service = http +user = +host = this-is-a.slashdot.org +port = 80 +path = /.test/.../foo.html +url = http://this-is-a.slashdot.org/.test/.../foo.html + +Child: /foo.html +service = http +user = +host = this-is-a.slashdot.org +port = 80 +path = /foo.html +url = http://this-is-a.slashdot.org/foo.html + +Child: #top +service = http +user = +host = this-is-a.slashdot.org +port = 80 +path = /.test/ +url = http://this-is-a.slashdot.org/.test/ + +Child: index.html +service = http +user = +host = this-is-a.slashdot.org +port = 80 +path = /.test/ +url = http://this-is-a.slashdot.org/.test/ + +Child: test.htm +service = http +user = +host = this-is-a.slashdot.org +port = 80 +path = /.test/test.htm +url = http://this-is-a.slashdot.org/.test/test.htm + +Child: /top/README +service = http +user = +host = this-is-a.slashdot.org +port = 80 +path = /top/README +url = http://this-is-a.slashdot.org/top/README + +Child: next/foo.html +service = http +user = +host = this-is-a.slashdot.org +port = 80 +path = /.test/next/foo.html +url = http://this-is-a.slashdot.org/.test/next/foo.html + +Child: .//relative.html +service = http +user = +host = this-is-a.slashdot.org +port = 80 +path = /.test/relative.html +url = http://this-is-a.slashdot.org/.test/relative.html + +Parent: http://www.htdig.org/cgi-bin/htdig3.private/incoming?id=338;user=ghutchis;addsignature=1 +(http://www.htdig.org:80/) +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/htdig3.private/incoming?id=338;user=ghutchis;addsignature=1 +url = http://www.htdig.org/cgi-bin/htdig3.private/incoming?id=338;user=ghutchis;addsignature=1 + +Children: + +Child: ./ +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/htdig3.private/ +url = http://www.htdig.org/cgi-bin/htdig3.private/ + +Child: ./../ +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/ +url = http://www.htdig.org/cgi-bin/ + +Child: ../foo.html +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/foo.html +url = http://www.htdig.org/cgi-bin/foo.html + +Child: .../foo.html +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/htdig3.private/.../foo.html +url = http://www.htdig.org/cgi-bin/htdig3.private/.../foo.html + +Child: /foo.html +service = http +user = +host = www.htdig.org +port = 80 +path = /foo.html +url = http://www.htdig.org/foo.html + +Child: #top +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/htdig3.private/incoming?id=338;user=ghutchis;addsignature=1 +url = http://www.htdig.org/cgi-bin/htdig3.private/incoming?id=338;user=ghutchis;addsignature=1 + +Child: index.html +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/htdig3.private/ +url = http://www.htdig.org/cgi-bin/htdig3.private/ + +Child: test.htm +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/htdig3.private/test.htm +url = http://www.htdig.org/cgi-bin/htdig3.private/test.htm + +Child: /top/README +service = http +user = +host = www.htdig.org +port = 80 +path = /top/README +url = http://www.htdig.org/top/README + +Child: next/foo.html +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/htdig3.private/next/foo.html +url = http://www.htdig.org/cgi-bin/htdig3.private/next/foo.html + +Child: .//relative.html +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/htdig3.private/relative.html +url = http://www.htdig.org/cgi-bin/htdig3.private/relative.html + +Parent: http://www.htdig.org/cgi-bin/htdig3.private/incoming?id=342;user=ghutchis +(http://www.htdig.org:80/) +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/htdig3.private/incoming?id=342;user=ghutchis +url = http://www.htdig.org/cgi-bin/htdig3.private/incoming?id=342;user=ghutchis + +Children: + +Child: ./ +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/htdig3.private/ +url = http://www.htdig.org/cgi-bin/htdig3.private/ + +Child: ./../ +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/ +url = http://www.htdig.org/cgi-bin/ + +Child: ../foo.html +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/foo.html +url = http://www.htdig.org/cgi-bin/foo.html + +Child: .../foo.html +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/htdig3.private/.../foo.html +url = http://www.htdig.org/cgi-bin/htdig3.private/.../foo.html + +Child: /foo.html +service = http +user = +host = www.htdig.org +port = 80 +path = /foo.html +url = http://www.htdig.org/foo.html + +Child: #top +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/htdig3.private/incoming?id=342;user=ghutchis +url = http://www.htdig.org/cgi-bin/htdig3.private/incoming?id=342;user=ghutchis + +Child: index.html +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/htdig3.private/ +url = http://www.htdig.org/cgi-bin/htdig3.private/ + +Child: test.htm +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/htdig3.private/test.htm +url = http://www.htdig.org/cgi-bin/htdig3.private/test.htm + +Child: /top/README +service = http +user = +host = www.htdig.org +port = 80 +path = /top/README +url = http://www.htdig.org/top/README + +Child: next/foo.html +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/htdig3.private/next/foo.html +url = http://www.htdig.org/cgi-bin/htdig3.private/next/foo.html + +Child: .//relative.html +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/htdig3.private/relative.html +url = http://www.htdig.org/cgi-bin/htdig3.private/relative.html + +Parent: http://wso.williams.edu/cgi-bin/BBS/bbs_forum.cgi?forum=open&read=001746-000000.msg&session=36c4f59c0f6ecb26&use_last_read=on&last_read=0 +(http://wso.williams.edu:80/) +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/BBS/bbs_forum.cgi?forum=open&read=001746-000000.msg&session=36c4f59c0f6ecb26&use_last_read=on&last_read=0 +url = http://wso.williams.edu/cgi-bin/BBS/bbs_forum.cgi?forum=open&read=001746-000000.msg&session=36c4f59c0f6ecb26&use_last_read=on&last_read=0 + +Children: + +Child: ./ +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/BBS/ +url = http://wso.williams.edu/cgi-bin/BBS/ + +Child: ./../ +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/ +url = http://wso.williams.edu/cgi-bin/ + +Child: ../foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/foo.html +url = http://wso.williams.edu/cgi-bin/foo.html + +Child: .../foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/BBS/.../foo.html +url = http://wso.williams.edu/cgi-bin/BBS/.../foo.html + +Child: /foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /foo.html +url = http://wso.williams.edu/foo.html + +Child: #top +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/BBS/bbs_forum.cgi?forum=open&read=001746-000000.msg&session=36c4f59c0f6ecb26&use_last_read=on&last_read=0 +url = http://wso.williams.edu/cgi-bin/BBS/bbs_forum.cgi?forum=open&read=001746-000000.msg&session=36c4f59c0f6ecb26&use_last_read=on&last_read=0 + +Child: index.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/BBS/ +url = http://wso.williams.edu/cgi-bin/BBS/ + +Child: test.htm +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/BBS/test.htm +url = http://wso.williams.edu/cgi-bin/BBS/test.htm + +Child: /top/README +service = http +user = +host = wso.williams.edu +port = 80 +path = /top/README +url = http://wso.williams.edu/top/README + +Child: next/foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/BBS/next/foo.html +url = http://wso.williams.edu/cgi-bin/BBS/next/foo.html + +Child: .//relative.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/BBS/relative.html +url = http://wso.williams.edu/cgi-bin/BBS/relative.html + +Parent: http:/www.fail.com/ +(http://:0/) +service = http +user = +host = +port = 0 +path = /www.fail.com/ +url = + +Children: + +Child: ./ +service = http +user = +host = +port = 0 +path = /www.fail.com/ +url = + +Child: ./../ +service = http +user = +host = +port = 0 +path = / +url = + +Child: ../foo.html +service = http +user = +host = +port = 0 +path = /foo.html +url = + +Child: .../foo.html +service = http +user = +host = +port = 0 +path = /www.fail.com/.../foo.html +url = + +Child: /foo.html +service = http +user = +host = +port = 0 +path = /foo.html +url = + +Child: #top +service = http +user = +host = +port = 0 +path = /www.fail.com/ +url = + +Child: index.html +service = http +user = +host = +port = 0 +path = /www.fail.com/ +url = + +Child: test.htm +service = http +user = +host = +port = 0 +path = /www.fail.com/test.htm +url = + +Child: /top/README +service = http +user = +host = +port = 0 +path = /top/README +url = + +Child: next/foo.html +service = http +user = +host = +port = 0 +path = /www.fail.com/next/foo.html +url = + +Child: .//relative.html +service = http +user = +host = +port = 0 +path = /www.fail.com/relative.html +url = + +Parent: http:www.fail.com +(http://:0/) +service = http +user = +host = +port = 0 +path = www.fail.com +url = + +Children: + +Child: ./ +service = http +user = +host = +port = 0 +path = www.fail.com +url = + +Child: ./../ +service = http +user = +host = +port = 0 +path = www.fail.com +url = + +Child: ../foo.html +service = http +user = +host = +port = 0 +path = www.fail.com +url = + +Child: .../foo.html +service = http +user = +host = +port = 0 +path = www.fail.com +url = + +Child: /foo.html +service = http +user = +host = +port = 0 +path = /foo.html +url = + +Child: #top +service = http +user = +host = +port = 0 +path = www.fail.com +url = + +Child: index.html +service = http +user = +host = +port = 0 +path = www.fail.com +url = + +Child: test.htm +service = http +user = +host = +port = 0 +path = www.fail.com +url = + +Child: /top/README +service = http +user = +host = +port = 0 +path = /top/README +url = + +Child: next/foo.html +service = http +user = +host = +port = 0 +path = www.fail.com +url = + +Child: .//relative.html +service = http +user = +host = +port = 0 +path = www.fail.com +url = + +Parent: file://localhost/opt/htdig/maindocs/index.html +(file://localhost:0/) +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/maindocs/index.html +url = file:///opt/htdig/maindocs/index.html + +Children: + +Child: ./ +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/maindocs/ +url = file:///opt/htdig/maindocs/ + +Child: ./../ +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/ +url = file:///opt/htdig/ + +Child: ../foo.html +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/foo.html +url = file:///opt/htdig/foo.html + +Child: .../foo.html +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/maindocs/.../foo.html +url = file:///opt/htdig/maindocs/.../foo.html + +Child: /foo.html +service = file +user = +host = localhost +port = 0 +path = /foo.html +url = file:///foo.html + +Child: #top +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/maindocs/index.html +url = file:///opt/htdig/maindocs/index.html + +Child: index.html +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/maindocs/index.html +url = file:///opt/htdig/maindocs/index.html + +Child: test.htm +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/maindocs/test.htm +url = file:///opt/htdig/maindocs/test.htm + +Child: /top/README +service = file +user = +host = localhost +port = 0 +path = /top/README +url = file:///top/README + +Child: next/foo.html +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/maindocs/next/foo.html +url = file:///opt/htdig/maindocs/next/foo.html + +Child: .//relative.html +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/maindocs/relative.html +url = file:///opt/htdig/maindocs/relative.html + +Parent: file://localhost:80/home/ghutchis/www/home.html +(file://localhost:0/) +service = file +user = +host = localhost +port = 0 +path = /home/ghutchis/www/home.html +url = file:///home/ghutchis/www/home.html + +Children: + +Child: ./ +service = file +user = +host = localhost +port = 0 +path = /home/ghutchis/www/ +url = file:///home/ghutchis/www/ + +Child: ./../ +service = file +user = +host = localhost +port = 0 +path = /home/ghutchis/ +url = file:///home/ghutchis/ + +Child: ../foo.html +service = file +user = +host = localhost +port = 0 +path = /home/ghutchis/foo.html +url = file:///home/ghutchis/foo.html + +Child: .../foo.html +service = file +user = +host = localhost +port = 0 +path = /home/ghutchis/www/.../foo.html +url = file:///home/ghutchis/www/.../foo.html + +Child: /foo.html +service = file +user = +host = localhost +port = 0 +path = /foo.html +url = file:///foo.html + +Child: #top +service = file +user = +host = localhost +port = 0 +path = /home/ghutchis/www/home.html +url = file:///home/ghutchis/www/home.html + +Child: index.html +service = file +user = +host = localhost +port = 0 +path = /home/ghutchis/www/index.html +url = file:///home/ghutchis/www/index.html + +Child: test.htm +service = file +user = +host = localhost +port = 0 +path = /home/ghutchis/www/test.htm +url = file:///home/ghutchis/www/test.htm + +Child: /top/README +service = file +user = +host = localhost +port = 0 +path = /top/README +url = file:///top/README + +Child: next/foo.html +service = file +user = +host = localhost +port = 0 +path = /home/ghutchis/www/next/foo.html +url = file:///home/ghutchis/www/next/foo.html + +Child: .//relative.html +service = file +user = +host = localhost +port = 0 +path = /home/ghutchis/www/relative.html +url = file:///home/ghutchis/www/relative.html + +Parent: http://www.htdig.org/cgi-bin/test.cgi?date=10/1/99#anchor1 +(http://www.htdig.org:80/) +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/test.cgi?date=10/1/99 +url = http://www.htdig.org/cgi-bin/test.cgi?date=10/1/99 + +Children: + +Child: ./ +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/ +url = http://www.htdig.org/cgi-bin/ + +Child: ./../ +service = http +user = +host = www.htdig.org +port = 80 +path = / +url = http://www.htdig.org/ + +Child: ../foo.html +service = http +user = +host = www.htdig.org +port = 80 +path = /foo.html +url = http://www.htdig.org/foo.html + +Child: .../foo.html +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/.../foo.html +url = http://www.htdig.org/cgi-bin/.../foo.html + +Child: /foo.html +service = http +user = +host = www.htdig.org +port = 80 +path = /foo.html +url = http://www.htdig.org/foo.html + +Child: #top +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/test.cgi?date=10/1/99 +url = http://www.htdig.org/cgi-bin/test.cgi?date=10/1/99 + +Child: index.html +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/ +url = http://www.htdig.org/cgi-bin/ + +Child: test.htm +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/test.htm +url = http://www.htdig.org/cgi-bin/test.htm + +Child: /top/README +service = http +user = +host = www.htdig.org +port = 80 +path = /top/README +url = http://www.htdig.org/top/README + +Child: next/foo.html +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/next/foo.html +url = http://www.htdig.org/cgi-bin/next/foo.html + +Child: .//relative.html +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/relative.html +url = http://www.htdig.org/cgi-bin/relative.html + +Parent: ftp://default.removal.com/index.html +(ftp://default.removal.com:21/) +service = ftp +user = +host = default.removal.com +port = 21 +path = /index.html +url = ftp://default.removal.com/index.html + +Children: + +Child: ./ +service = ftp +user = +host = default.removal.com +port = 21 +path = / +url = ftp://default.removal.com/ + +Child: ./../ +service = ftp +user = +host = default.removal.com +port = 21 +path = / +url = ftp://default.removal.com/ + +Child: ../foo.html +service = ftp +user = +host = default.removal.com +port = 21 +path = /foo.html +url = ftp://default.removal.com/foo.html + +Child: .../foo.html +service = ftp +user = +host = default.removal.com +port = 21 +path = /.../foo.html +url = ftp://default.removal.com/.../foo.html + +Child: /foo.html +service = ftp +user = +host = default.removal.com +port = 21 +path = /foo.html +url = ftp://default.removal.com/foo.html + +Child: #top +service = ftp +user = +host = default.removal.com +port = 21 +path = /index.html +url = ftp://default.removal.com/index.html + +Child: index.html +service = ftp +user = +host = default.removal.com +port = 21 +path = /index.html +url = ftp://default.removal.com/index.html + +Child: test.htm +service = ftp +user = +host = default.removal.com +port = 21 +path = /test.htm +url = ftp://default.removal.com/test.htm + +Child: /top/README +service = ftp +user = +host = default.removal.com +port = 21 +path = /top/README +url = ftp://default.removal.com/top/README + +Child: next/foo.html +service = ftp +user = +host = default.removal.com +port = 21 +path = /next/foo.html +url = ftp://default.removal.com/next/foo.html + +Child: .//relative.html +service = ftp +user = +host = default.removal.com +port = 21 +path = /relative.html +url = ftp://default.removal.com/relative.html + +Parent: https://test.com/life.html +(https://test.com:443/) +service = https +user = +host = test.com +port = 443 +path = /life.html +url = https://test.com/life.html + +Children: + +Child: ./ +service = https +user = +host = test.com +port = 443 +path = / +url = https://test.com/ + +Child: ./../ +service = https +user = +host = test.com +port = 443 +path = / +url = https://test.com/ + +Child: ../foo.html +service = https +user = +host = test.com +port = 443 +path = /foo.html +url = https://test.com/foo.html + +Child: .../foo.html +service = https +user = +host = test.com +port = 443 +path = /.../foo.html +url = https://test.com/.../foo.html + +Child: /foo.html +service = https +user = +host = test.com +port = 443 +path = /foo.html +url = https://test.com/foo.html + +Child: #top +service = https +user = +host = test.com +port = 443 +path = /life.html +url = https://test.com/life.html + +Child: index.html +service = https +user = +host = test.com +port = 443 +path = / +url = https://test.com/ + +Child: test.htm +service = https +user = +host = test.com +port = 443 +path = /test.htm +url = https://test.com/test.htm + +Child: /top/README +service = https +user = +host = test.com +port = 443 +path = /top/README +url = https://test.com/top/README + +Child: next/foo.html +service = https +user = +host = test.com +port = 443 +path = /next/foo.html +url = https://test.com/next/foo.html + +Child: .//relative.html +service = https +user = +host = test.com +port = 443 +path = /relative.html +url = https://test.com/relative.html + +Parent: https://default.removal.com/index.html +(https://default.removal.com:443/) +service = https +user = +host = default.removal.com +port = 443 +path = / +url = https://default.removal.com/ + +Children: + +Child: ./ +service = https +user = +host = default.removal.com +port = 443 +path = / +url = https://default.removal.com/ + +Child: ./../ +service = https +user = +host = default.removal.com +port = 443 +path = / +url = https://default.removal.com/ + +Child: ../foo.html +service = https +user = +host = default.removal.com +port = 443 +path = /foo.html +url = https://default.removal.com/foo.html + +Child: .../foo.html +service = https +user = +host = default.removal.com +port = 443 +path = /.../foo.html +url = https://default.removal.com/.../foo.html + +Child: /foo.html +service = https +user = +host = default.removal.com +port = 443 +path = /foo.html +url = https://default.removal.com/foo.html + +Child: #top +service = https +user = +host = default.removal.com +port = 443 +path = / +url = https://default.removal.com/ + +Child: index.html +service = https +user = +host = default.removal.com +port = 443 +path = / +url = https://default.removal.com/ + +Child: test.htm +service = https +user = +host = default.removal.com +port = 443 +path = /test.htm +url = https://default.removal.com/test.htm + +Child: /top/README +service = https +user = +host = default.removal.com +port = 443 +path = /top/README +url = https://default.removal.com/top/README + +Child: next/foo.html +service = https +user = +host = default.removal.com +port = 443 +path = /next/foo.html +url = https://default.removal.com/next/foo.html + +Child: .//relative.html +service = https +user = +host = default.removal.com +port = 443 +path = /relative.html +url = https://default.removal.com/relative.html + +Parent: https://test.com:803/./../../orgs/life.html +(https://test.com:803/) +service = https +user = +host = test.com +port = 803 +path = /orgs/life.html +url = https://test.com:803/orgs/life.html + +Children: + +Child: ./ +service = https +user = +host = test.com +port = 803 +path = /orgs/ +url = https://test.com:803/orgs/ + +Child: ./../ +service = https +user = +host = test.com +port = 803 +path = / +url = https://test.com:803/ + +Child: ../foo.html +service = https +user = +host = test.com +port = 803 +path = /foo.html +url = https://test.com:803/foo.html + +Child: .../foo.html +service = https +user = +host = test.com +port = 803 +path = /orgs/.../foo.html +url = https://test.com:803/orgs/.../foo.html + +Child: /foo.html +service = https +user = +host = test.com +port = 803 +path = /foo.html +url = https://test.com:803/foo.html + +Child: #top +service = https +user = +host = test.com +port = 803 +path = /orgs/life.html +url = https://test.com:803/orgs/life.html + +Child: index.html +service = https +user = +host = test.com +port = 803 +path = /orgs/ +url = https://test.com:803/orgs/ + +Child: test.htm +service = https +user = +host = test.com +port = 803 +path = /orgs/test.htm +url = https://test.com:803/orgs/test.htm + +Child: /top/README +service = https +user = +host = test.com +port = 803 +path = /top/README +url = https://test.com:803/top/README + +Child: next/foo.html +service = https +user = +host = test.com +port = 803 +path = /orgs/next/foo.html +url = https://test.com:803/orgs/next/foo.html + +Child: .//relative.html +service = https +user = +host = test.com +port = 803 +path = /orgs/relative.html +url = https://test.com:803/orgs/relative.html + +Parent: https://alias.com:8080/./../../orgs/life.html +(https://alias.com:8080/) +service = https +user = +host = alias.com +port = 8080 +path = /orgs/life.html +url = https://alias.com:8080/orgs/life.html + +Children: + +Child: ./ +service = https +user = +host = alias.com +port = 8080 +path = /orgs/ +url = https://alias.com:8080/orgs/ + +Child: ./../ +service = https +user = +host = alias.com +port = 8080 +path = / +url = https://alias.com:8080/ + +Child: ../foo.html +service = https +user = +host = alias.com +port = 8080 +path = /foo.html +url = https://alias.com:8080/foo.html + +Child: .../foo.html +service = https +user = +host = alias.com +port = 8080 +path = /orgs/.../foo.html +url = https://alias.com:8080/orgs/.../foo.html + +Child: /foo.html +service = https +user = +host = alias.com +port = 8080 +path = /foo.html +url = https://alias.com:8080/foo.html + +Child: #top +service = https +user = +host = alias.com +port = 8080 +path = /orgs/life.html +url = https://alias.com:8080/orgs/life.html + +Child: index.html +service = https +user = +host = alias.com +port = 8080 +path = /orgs/ +url = https://alias.com:8080/orgs/ + +Child: test.htm +service = https +user = +host = alias.com +port = 8080 +path = /orgs/test.htm +url = https://alias.com:8080/orgs/test.htm + +Child: /top/README +service = https +user = +host = alias.com +port = 8080 +path = /top/README +url = https://alias.com:8080/top/README + +Child: next/foo.html +service = https +user = +host = alias.com +port = 8080 +path = /orgs/next/foo.html +url = https://alias.com:8080/orgs/next/foo.html + +Child: .//relative.html +service = https +user = +host = alias.com +port = 8080 +path = /orgs/relative.html +url = https://alias.com:8080/orgs/relative.html + +Parent: https://alias.com/./../../orgs/life.html +(https://true.com:443/) +service = https +user = +host = true.com +port = 443 +path = /orgs/life.html +url = https://true.com/orgs/life.html + +Children: + +Child: ./ +service = https +user = +host = true.com +port = 443 +path = /orgs/ +url = https://true.com/orgs/ + +Child: ./../ +service = https +user = +host = true.com +port = 443 +path = / +url = https://true.com/ + +Child: ../foo.html +service = https +user = +host = true.com +port = 443 +path = /foo.html +url = https://true.com/foo.html + +Child: .../foo.html +service = https +user = +host = true.com +port = 443 +path = /orgs/.../foo.html +url = https://true.com/orgs/.../foo.html + +Child: /foo.html +service = https +user = +host = true.com +port = 443 +path = /foo.html +url = https://true.com/foo.html + +Child: #top +service = https +user = +host = true.com +port = 443 +path = /orgs/life.html +url = https://true.com/orgs/life.html + +Child: index.html +service = https +user = +host = true.com +port = 443 +path = /orgs/ +url = https://true.com/orgs/ + +Child: test.htm +service = https +user = +host = true.com +port = 443 +path = /orgs/test.htm +url = https://true.com/orgs/test.htm + +Child: /top/README +service = https +user = +host = true.com +port = 443 +path = /top/README +url = https://true.com/top/README + +Child: next/foo.html +service = https +user = +host = true.com +port = 443 +path = /orgs/next/foo.html +url = https://true.com/orgs/next/foo.html + +Child: .//relative.html +service = https +user = +host = true.com +port = 443 +path = /orgs/relative.html +url = https://true.com/orgs/relative.html + +Parent: https:/www.fail.com/ +(https://:0/) +service = https +user = +host = +port = 0 +path = /www.fail.com/ +url = + +Children: + +Child: ./ +service = https +user = +host = +port = 0 +path = /www.fail.com/ +url = + +Child: ./../ +service = https +user = +host = +port = 0 +path = / +url = + +Child: ../foo.html +service = https +user = +host = +port = 0 +path = /foo.html +url = + +Child: .../foo.html +service = https +user = +host = +port = 0 +path = /www.fail.com/.../foo.html +url = + +Child: /foo.html +service = https +user = +host = +port = 0 +path = /foo.html +url = + +Child: #top +service = https +user = +host = +port = 0 +path = /www.fail.com/ +url = + +Child: index.html +service = https +user = +host = +port = 0 +path = /www.fail.com/ +url = + +Child: test.htm +service = https +user = +host = +port = 0 +path = /www.fail.com/test.htm +url = + +Child: /top/README +service = https +user = +host = +port = 0 +path = /top/README +url = + +Child: next/foo.html +service = https +user = +host = +port = 0 +path = /www.fail.com/next/foo.html +url = + +Child: .//relative.html +service = https +user = +host = +port = 0 +path = /www.fail.com/relative.html +url = + +Parent: https:www.fail.com +(https://:0/) +service = https +user = +host = +port = 0 +path = www.fail.com +url = + +Children: + +Child: ./ +service = https +user = +host = +port = 0 +path = www.fail.com +url = + +Child: ./../ +service = https +user = +host = +port = 0 +path = www.fail.com +url = + +Child: ../foo.html +service = https +user = +host = +port = 0 +path = www.fail.com +url = + +Child: .../foo.html +service = https +user = +host = +port = 0 +path = www.fail.com +url = + +Child: /foo.html +service = https +user = +host = +port = 0 +path = /foo.html +url = + +Child: #top +service = https +user = +host = +port = 0 +path = www.fail.com +url = + +Child: index.html +service = https +user = +host = +port = 0 +path = www.fail.com +url = + +Child: test.htm +service = https +user = +host = +port = 0 +path = www.fail.com +url = + +Child: /top/README +service = https +user = +host = +port = 0 +path = /top/README +url = + +Child: next/foo.html +service = https +user = +host = +port = 0 +path = www.fail.com +url = + +Child: .//relative.html +service = https +user = +host = +port = 0 +path = www.fail.com +url = + +Parent: help:/khelpcenter/ +(help://localhost:0/) +service = help +user = +host = localhost +port = 0 +path = /khelpcenter/ +url = help:/khelpcenter/ + +Children: + +Child: ./ +service = help +user = +host = localhost +port = 0 +path = /khelpcenter/ +url = help:/khelpcenter/ + +Child: ./../ +service = help +user = +host = localhost +port = 0 +path = / +url = help:/ + +Child: ../foo.html +service = help +user = +host = localhost +port = 0 +path = /foo.html +url = help:/foo.html + +Child: .../foo.html +service = help +user = +host = localhost +port = 0 +path = /khelpcenter/.../foo.html +url = help:/khelpcenter/.../foo.html + +Child: /foo.html +service = help +user = +host = localhost +port = 0 +path = /foo.html +url = help:/foo.html + +Child: #top +service = help +user = +host = localhost +port = 0 +path = /khelpcenter/ +url = help:/khelpcenter/ + +Child: index.html +service = help +user = +host = localhost +port = 0 +path = /khelpcenter/ +url = help:/khelpcenter/ + +Child: test.htm +service = help +user = +host = localhost +port = 0 +path = /khelpcenter/test.htm +url = help:/khelpcenter/test.htm + +Child: /top/README +service = help +user = +host = localhost +port = 0 +path = /top/README +url = help:/top/README + +Child: next/foo.html +service = help +user = +host = localhost +port = 0 +path = /khelpcenter/next/foo.html +url = help:/khelpcenter/next/foo.html + +Child: .//relative.html +service = help +user = +host = localhost +port = 0 +path = /khelpcenter/relative.html +url = help:/khelpcenter/relative.html + +Parent: help:/khelpcenter/what-is-kde.html#what-is-kde-introduction +(help://localhost:0/) +service = help +user = +host = localhost +port = 0 +path = /khelpcenter/what-is-kde.html +url = help:/khelpcenter/what-is-kde.html + +Children: + +Child: ./ +service = help +user = +host = localhost +port = 0 +path = /khelpcenter/ +url = help:/khelpcenter/ + +Child: ./../ +service = help +user = +host = localhost +port = 0 +path = / +url = help:/ + +Child: ../foo.html +service = help +user = +host = localhost +port = 0 +path = /foo.html +url = help:/foo.html + +Child: .../foo.html +service = help +user = +host = localhost +port = 0 +path = /khelpcenter/.../foo.html +url = help:/khelpcenter/.../foo.html + +Child: /foo.html +service = help +user = +host = localhost +port = 0 +path = /foo.html +url = help:/foo.html + +Child: #top +service = help +user = +host = localhost +port = 0 +path = /khelpcenter/what-is-kde.html +url = help:/khelpcenter/what-is-kde.html + +Child: index.html +service = help +user = +host = localhost +port = 0 +path = /khelpcenter/ +url = help:/khelpcenter/ + +Child: test.htm +service = help +user = +host = localhost +port = 0 +path = /khelpcenter/test.htm +url = help:/khelpcenter/test.htm + +Child: /top/README +service = help +user = +host = localhost +port = 0 +path = /top/README +url = help:/top/README + +Child: next/foo.html +service = help +user = +host = localhost +port = 0 +path = /khelpcenter/next/foo.html +url = help:/khelpcenter/next/foo.html + +Child: .//relative.html +service = help +user = +host = localhost +port = 0 +path = /khelpcenter/relative.html +url = help:/khelpcenter/relative.html + + +And now without turning // into / ... + +Parent: http://www.williams.edu:803/ +(http://www.williams.edu:803/) +service = http +user = +host = www.williams.edu +port = 803 +path = / +url = http://www.williams.edu:803/ + +Children: + +Child: ./ +service = http +user = +host = www.williams.edu +port = 803 +path = / +url = http://www.williams.edu:803/ + +Child: ./../ +service = http +user = +host = www.williams.edu +port = 803 +path = / +url = http://www.williams.edu:803/ + +Child: ../foo.html +service = http +user = +host = www.williams.edu +port = 803 +path = /foo.html +url = http://www.williams.edu:803/foo.html + +Child: .../foo.html +service = http +user = +host = www.williams.edu +port = 803 +path = /.../foo.html +url = http://www.williams.edu:803/.../foo.html + +Child: /foo.html +service = http +user = +host = www.williams.edu +port = 803 +path = /foo.html +url = http://www.williams.edu:803/foo.html + +Child: #top +service = http +user = +host = www.williams.edu +port = 803 +path = / +url = http://www.williams.edu:803/ + +Child: index.html +service = http +user = +host = www.williams.edu +port = 803 +path = / +url = http://www.williams.edu:803/ + +Child: test.htm +service = http +user = +host = www.williams.edu +port = 803 +path = /test.htm +url = http://www.williams.edu:803/test.htm + +Child: /top/README +service = http +user = +host = www.williams.edu +port = 803 +path = /top/README +url = http://www.williams.edu:803/top/README + +Child: next/foo.html +service = http +user = +host = www.williams.edu +port = 803 +path = /next/foo.html +url = http://www.williams.edu:803/next/foo.html + +Child: .//relative.html +service = http +user = +host = www.williams.edu +port = 803 +path = //relative.html +url = http://www.williams.edu:803//relative.html + +Parent: http://wso.williams.edu/ +(http://wso.williams.edu:80/) +service = http +user = +host = wso.williams.edu +port = 80 +path = / +url = http://wso.williams.edu/ + +Children: + +Child: ./ +service = http +user = +host = wso.williams.edu +port = 80 +path = / +url = http://wso.williams.edu/ + +Child: ./../ +service = http +user = +host = wso.williams.edu +port = 80 +path = / +url = http://wso.williams.edu/ + +Child: ../foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /foo.html +url = http://wso.williams.edu/foo.html + +Child: .../foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /.../foo.html +url = http://wso.williams.edu/.../foo.html + +Child: /foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /foo.html +url = http://wso.williams.edu/foo.html + +Child: #top +service = http +user = +host = wso.williams.edu +port = 80 +path = / +url = http://wso.williams.edu/ + +Child: index.html +service = http +user = +host = wso.williams.edu +port = 80 +path = / +url = http://wso.williams.edu/ + +Child: test.htm +service = http +user = +host = wso.williams.edu +port = 80 +path = /test.htm +url = http://wso.williams.edu/test.htm + +Child: /top/README +service = http +user = +host = wso.williams.edu +port = 80 +path = /top/README +url = http://wso.williams.edu/top/README + +Child: next/foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /next/foo.html +url = http://wso.williams.edu/next/foo.html + +Child: .//relative.html +service = http +user = +host = wso.williams.edu +port = 80 +path = //relative.html +url = http://wso.williams.edu//relative.html + +Parent: https://web.horde.org/williams/ +(https://web.horde.org:443/) +service = https +user = +host = web.horde.org +port = 443 +path = /williams/ +url = https://web.horde.org/williams/ + +Children: + +Child: ./ +service = https +user = +host = web.horde.org +port = 443 +path = /williams/ +url = https://web.horde.org/williams/ + +Child: ./../ +service = https +user = +host = web.horde.org +port = 443 +path = / +url = https://web.horde.org/ + +Child: ../foo.html +service = https +user = +host = web.horde.org +port = 443 +path = /foo.html +url = https://web.horde.org/foo.html + +Child: .../foo.html +service = https +user = +host = web.horde.org +port = 443 +path = /williams/.../foo.html +url = https://web.horde.org/williams/.../foo.html + +Child: /foo.html +service = https +user = +host = web.horde.org +port = 443 +path = /foo.html +url = https://web.horde.org/foo.html + +Child: #top +service = https +user = +host = web.horde.org +port = 443 +path = /williams/ +url = https://web.horde.org/williams/ + +Child: index.html +service = https +user = +host = web.horde.org +port = 443 +path = /williams/ +url = https://web.horde.org/williams/ + +Child: test.htm +service = https +user = +host = web.horde.org +port = 443 +path = /williams/test.htm +url = https://web.horde.org/williams/test.htm + +Child: /top/README +service = https +user = +host = web.horde.org +port = 443 +path = /top/README +url = https://web.horde.org/top/README + +Child: next/foo.html +service = https +user = +host = web.horde.org +port = 443 +path = /williams/next/foo.html +url = https://web.horde.org/williams/next/foo.html + +Child: .//relative.html +service = https +user = +host = web.horde.org +port = 443 +path = /williams//relative.html +url = https://web.horde.org/williams//relative.html + +Parent: http://www.williams.edu/Administration/index.html +(http://www.williams.edu:80/) +service = http +user = +host = www.williams.edu +port = 80 +path = /Administration/ +url = http://www.williams.edu/Administration/ + +Children: + +Child: ./ +service = http +user = +host = www.williams.edu +port = 80 +path = /Administration/ +url = http://www.williams.edu/Administration/ + +Child: ./../ +service = http +user = +host = www.williams.edu +port = 80 +path = / +url = http://www.williams.edu/ + +Child: ../foo.html +service = http +user = +host = www.williams.edu +port = 80 +path = /foo.html +url = http://www.williams.edu/foo.html + +Child: .../foo.html +service = http +user = +host = www.williams.edu +port = 80 +path = /Administration/.../foo.html +url = http://www.williams.edu/Administration/.../foo.html + +Child: /foo.html +service = http +user = +host = www.williams.edu +port = 80 +path = /foo.html +url = http://www.williams.edu/foo.html + +Child: #top +service = http +user = +host = www.williams.edu +port = 80 +path = /Administration/ +url = http://www.williams.edu/Administration/ + +Child: index.html +service = http +user = +host = www.williams.edu +port = 80 +path = /Administration/ +url = http://www.williams.edu/Administration/ + +Child: test.htm +service = http +user = +host = www.williams.edu +port = 80 +path = /Administration/test.htm +url = http://www.williams.edu/Administration/test.htm + +Child: /top/README +service = http +user = +host = www.williams.edu +port = 80 +path = /top/README +url = http://www.williams.edu/top/README + +Child: next/foo.html +service = http +user = +host = www.williams.edu +port = 80 +path = /Administration/next/foo.html +url = http://www.williams.edu/Administration/next/foo.html + +Child: .//relative.html +service = http +user = +host = www.williams.edu +port = 80 +path = /Administration//relative.html +url = http://www.williams.edu/Administration//relative.html + +Parent: ftp://[email protected]/mail/index +(ftp://[email protected]:21/) +service = ftp +user = ghutchis +host = wso.williams.edu +port = 21 +path = /mail/index +url = ftp://[email protected]/mail/index + +Children: + +Child: ./ +service = ftp +user = ghutchis +host = wso.williams.edu +port = 21 +path = /mail/ +url = ftp://[email protected]/mail/ + +Child: ./../ +service = ftp +user = ghutchis +host = wso.williams.edu +port = 21 +path = / +url = ftp://[email protected]/ + +Child: ../foo.html +service = ftp +user = ghutchis +host = wso.williams.edu +port = 21 +path = /foo.html +url = ftp://[email protected]/foo.html + +Child: .../foo.html +service = ftp +user = ghutchis +host = wso.williams.edu +port = 21 +path = /mail/.../foo.html +url = ftp://[email protected]/mail/.../foo.html + +Child: /foo.html +service = ftp +user = ghutchis +host = wso.williams.edu +port = 21 +path = /foo.html +url = ftp://[email protected]/foo.html + +Child: #top +service = ftp +user = ghutchis +host = wso.williams.edu +port = 21 +path = /mail/index +url = ftp://[email protected]/mail/index + +Child: index.html +service = ftp +user = ghutchis +host = wso.williams.edu +port = 21 +path = /mail/index.html +url = ftp://[email protected]/mail/index.html + +Child: test.htm +service = ftp +user = ghutchis +host = wso.williams.edu +port = 21 +path = /mail/test.htm +url = ftp://[email protected]/mail/test.htm + +Child: /top/README +service = ftp +user = ghutchis +host = wso.williams.edu +port = 21 +path = /top/README +url = ftp://[email protected]/top/README + +Child: next/foo.html +service = ftp +user = ghutchis +host = wso.williams.edu +port = 21 +path = /mail/next/foo.html +url = ftp://[email protected]/mail/next/foo.html + +Child: .//relative.html +service = ftp +user = ghutchis +host = wso.williams.edu +port = 21 +path = /mail//relative.html +url = ftp://[email protected]/mail//relative.html + +Parent: http://wso.williams.edu/cgi-bin/BBS/bbs_forum.cgi?forum=open +(http://wso.williams.edu:80/) +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/BBS/bbs_forum.cgi?forum=open +url = http://wso.williams.edu/cgi-bin/BBS/bbs_forum.cgi?forum=open + +Children: + +Child: ./ +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/BBS/ +url = http://wso.williams.edu/cgi-bin/BBS/ + +Child: ./../ +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/ +url = http://wso.williams.edu/cgi-bin/ + +Child: ../foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/foo.html +url = http://wso.williams.edu/cgi-bin/foo.html + +Child: .../foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/BBS/.../foo.html +url = http://wso.williams.edu/cgi-bin/BBS/.../foo.html + +Child: /foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /foo.html +url = http://wso.williams.edu/foo.html + +Child: #top +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/BBS/bbs_forum.cgi?forum=open +url = http://wso.williams.edu/cgi-bin/BBS/bbs_forum.cgi?forum=open + +Child: index.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/BBS/ +url = http://wso.williams.edu/cgi-bin/BBS/ + +Child: test.htm +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/BBS/test.htm +url = http://wso.williams.edu/cgi-bin/BBS/test.htm + +Child: /top/README +service = http +user = +host = wso.williams.edu +port = 80 +path = /top/README +url = http://wso.williams.edu/top/README + +Child: next/foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/BBS/next/foo.html +url = http://wso.williams.edu/cgi-bin/BBS/next/foo.html + +Child: .//relative.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/BBS//relative.html +url = http://wso.williams.edu/cgi-bin/BBS//relative.html + +Parent: http://wso/~ghutchis/bookmarks.html#mac +(http://wso:80/) +service = http +user = +host = wso +port = 80 +path = /~ghutchis/bookmarks.html +url = http://wso/~ghutchis/bookmarks.html + +Children: + +Child: ./ +service = http +user = +host = wso +port = 80 +path = /~ghutchis/ +url = http://wso/~ghutchis/ + +Child: ./../ +service = http +user = +host = wso +port = 80 +path = / +url = http://wso/ + +Child: ../foo.html +service = http +user = +host = wso +port = 80 +path = /foo.html +url = http://wso/foo.html + +Child: .../foo.html +service = http +user = +host = wso +port = 80 +path = /~ghutchis/.../foo.html +url = http://wso/~ghutchis/.../foo.html + +Child: /foo.html +service = http +user = +host = wso +port = 80 +path = /foo.html +url = http://wso/foo.html + +Child: #top +service = http +user = +host = wso +port = 80 +path = /~ghutchis/bookmarks.html +url = http://wso/~ghutchis/bookmarks.html + +Child: index.html +service = http +user = +host = wso +port = 80 +path = /~ghutchis/ +url = http://wso/~ghutchis/ + +Child: test.htm +service = http +user = +host = wso +port = 80 +path = /~ghutchis/test.htm +url = http://wso/~ghutchis/test.htm + +Child: /top/README +service = http +user = +host = wso +port = 80 +path = /top/README +url = http://wso/top/README + +Child: next/foo.html +service = http +user = +host = wso +port = 80 +path = /~ghutchis/next/foo.html +url = http://wso/~ghutchis/next/foo.html + +Child: .//relative.html +service = http +user = +host = wso +port = 80 +path = /~ghutchis//relative.html +url = http://wso/~ghutchis//relative.html + +Parent: file:///opt/htdig/maindocs/index.html +(file://localhost:0/) +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/maindocs/index.html +url = file:///opt/htdig/maindocs/index.html + +Children: + +Child: ./ +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/maindocs/ +url = file:///opt/htdig/maindocs/ + +Child: ./../ +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/ +url = file:///opt/htdig/ + +Child: ../foo.html +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/foo.html +url = file:///opt/htdig/foo.html + +Child: .../foo.html +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/maindocs/.../foo.html +url = file:///opt/htdig/maindocs/.../foo.html + +Child: /foo.html +service = file +user = +host = localhost +port = 0 +path = /foo.html +url = file:///foo.html + +Child: #top +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/maindocs/index.html +url = file:///opt/htdig/maindocs/index.html + +Child: index.html +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/maindocs/index.html +url = file:///opt/htdig/maindocs/index.html + +Child: test.htm +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/maindocs/test.htm +url = file:///opt/htdig/maindocs/test.htm + +Child: /top/README +service = file +user = +host = localhost +port = 0 +path = /top/README +url = file:///top/README + +Child: next/foo.html +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/maindocs/next/foo.html +url = file:///opt/htdig/maindocs/next/foo.html + +Child: .//relative.html +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/maindocs//relative.html +url = file:///opt/htdig/maindocs//relative.html + +Parent: http://www.htdig.org/This/Is/A/test.html?dowepass +(http://www.htdig.org:80/) +service = http +user = +host = www.htdig.org +port = 80 +path = /This/Is/A/test.html?dowepass +url = http://www.htdig.org/This/Is/A/test.html?dowepass + +Children: + +Child: ./ +service = http +user = +host = www.htdig.org +port = 80 +path = /This/Is/A/ +url = http://www.htdig.org/This/Is/A/ + +Child: ./../ +service = http +user = +host = www.htdig.org +port = 80 +path = /This/Is/ +url = http://www.htdig.org/This/Is/ + +Child: ../foo.html +service = http +user = +host = www.htdig.org +port = 80 +path = /This/Is/foo.html +url = http://www.htdig.org/This/Is/foo.html + +Child: .../foo.html +service = http +user = +host = www.htdig.org +port = 80 +path = /This/Is/A/.../foo.html +url = http://www.htdig.org/This/Is/A/.../foo.html + +Child: /foo.html +service = http +user = +host = www.htdig.org +port = 80 +path = /foo.html +url = http://www.htdig.org/foo.html + +Child: #top +service = http +user = +host = www.htdig.org +port = 80 +path = /This/Is/A/test.html?dowepass +url = http://www.htdig.org/This/Is/A/test.html?dowepass + +Child: index.html +service = http +user = +host = www.htdig.org +port = 80 +path = /This/Is/A/ +url = http://www.htdig.org/This/Is/A/ + +Child: test.htm +service = http +user = +host = www.htdig.org +port = 80 +path = /This/Is/A/test.htm +url = http://www.htdig.org/This/Is/A/test.htm + +Child: /top/README +service = http +user = +host = www.htdig.org +port = 80 +path = /top/README +url = http://www.htdig.org/top/README + +Child: next/foo.html +service = http +user = +host = www.htdig.org +port = 80 +path = /This/Is/A/next/foo.html +url = http://www.htdig.org/This/Is/A/next/foo.html + +Child: .//relative.html +service = http +user = +host = www.htdig.org +port = 80 +path = /This/Is/A//relative.html +url = http://www.htdig.org/This/Is/A//relative.html + +Parent: http://localhost/index.asp?date=11/21/index.asp?date=12/1/98 +(http://localhost:80/) +service = http +user = +host = localhost +port = 80 +path = /index.asp?date=11/21/index.asp?date=12/1/98 +url = http://localhost/index.asp?date=11/21/index.asp?date=12/1/98 + +Children: + +Child: ./ +service = http +user = +host = localhost +port = 80 +path = / +url = http://localhost/ + +Child: ./../ +service = http +user = +host = localhost +port = 80 +path = / +url = http://localhost/ + +Child: ../foo.html +service = http +user = +host = localhost +port = 80 +path = /foo.html +url = http://localhost/foo.html + +Child: .../foo.html +service = http +user = +host = localhost +port = 80 +path = /.../foo.html +url = http://localhost/.../foo.html + +Child: /foo.html +service = http +user = +host = localhost +port = 80 +path = /foo.html +url = http://localhost/foo.html + +Child: #top +service = http +user = +host = localhost +port = 80 +path = /index.asp?date=11/21/index.asp?date=12/1/98 +url = http://localhost/index.asp?date=11/21/index.asp?date=12/1/98 + +Child: index.html +service = http +user = +host = localhost +port = 80 +path = / +url = http://localhost/ + +Child: test.htm +service = http +user = +host = localhost +port = 80 +path = /test.htm +url = http://localhost/test.htm + +Child: /top/README +service = http +user = +host = localhost +port = 80 +path = /top/README +url = http://localhost/top/README + +Child: next/foo.html +service = http +user = +host = localhost +port = 80 +path = /next/foo.html +url = http://localhost/next/foo.html + +Child: .//relative.html +service = http +user = +host = localhost +port = 80 +path = //relative.html +url = http://localhost//relative.html + +Parent: http://www.test.com/cgi-bin/test.cgi?http://this.com/url +(http://www.test.com:80/) +service = http +user = +host = www.test.com +port = 80 +path = /cgi-bin/test.cgi?http://this.com/url +url = http://www.test.com/cgi-bin/test.cgi?http://this.com/url + +Children: + +Child: ./ +service = http +user = +host = www.test.com +port = 80 +path = /cgi-bin/ +url = http://www.test.com/cgi-bin/ + +Child: ./../ +service = http +user = +host = www.test.com +port = 80 +path = / +url = http://www.test.com/ + +Child: ../foo.html +service = http +user = +host = www.test.com +port = 80 +path = /foo.html +url = http://www.test.com/foo.html + +Child: .../foo.html +service = http +user = +host = www.test.com +port = 80 +path = /cgi-bin/.../foo.html +url = http://www.test.com/cgi-bin/.../foo.html + +Child: /foo.html +service = http +user = +host = www.test.com +port = 80 +path = /foo.html +url = http://www.test.com/foo.html + +Child: #top +service = http +user = +host = www.test.com +port = 80 +path = /cgi-bin/test.cgi?http://this.com/url +url = http://www.test.com/cgi-bin/test.cgi?http://this.com/url + +Child: index.html +service = http +user = +host = www.test.com +port = 80 +path = /cgi-bin/ +url = http://www.test.com/cgi-bin/ + +Child: test.htm +service = http +user = +host = www.test.com +port = 80 +path = /cgi-bin/test.htm +url = http://www.test.com/cgi-bin/test.htm + +Child: /top/README +service = http +user = +host = www.test.com +port = 80 +path = /top/README +url = http://www.test.com/top/README + +Child: next/foo.html +service = http +user = +host = www.test.com +port = 80 +path = /cgi-bin/next/foo.html +url = http://www.test.com/cgi-bin/next/foo.html + +Child: .//relative.html +service = http +user = +host = www.test.com +port = 80 +path = /cgi-bin//relative.html +url = http://www.test.com/cgi-bin//relative.html + +Parent: http://wso.williams.edu/%7Eghutchis/index.html +(http://wso.williams.edu:80/) +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/ +url = http://wso.williams.edu/~ghutchis/ + +Children: + +Child: ./ +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/ +url = http://wso.williams.edu/~ghutchis/ + +Child: ./../ +service = http +user = +host = wso.williams.edu +port = 80 +path = / +url = http://wso.williams.edu/ + +Child: ../foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /foo.html +url = http://wso.williams.edu/foo.html + +Child: .../foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/.../foo.html +url = http://wso.williams.edu/~ghutchis/.../foo.html + +Child: /foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /foo.html +url = http://wso.williams.edu/foo.html + +Child: #top +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/ +url = http://wso.williams.edu/~ghutchis/ + +Child: index.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/ +url = http://wso.williams.edu/~ghutchis/ + +Child: test.htm +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/test.htm +url = http://wso.williams.edu/~ghutchis/test.htm + +Child: /top/README +service = http +user = +host = wso.williams.edu +port = 80 +path = /top/README +url = http://wso.williams.edu/top/README + +Child: next/foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/next/foo.html +url = http://wso.williams.edu/~ghutchis/next/foo.html + +Child: .//relative.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis//relative.html +url = http://wso.williams.edu/~ghutchis//relative.html + +Parent: http://wso.williams.edu/~ghutchis/ +(http://wso.williams.edu:80/) +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/ +url = http://wso.williams.edu/~ghutchis/ + +Children: + +Child: ./ +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/ +url = http://wso.williams.edu/~ghutchis/ + +Child: ./../ +service = http +user = +host = wso.williams.edu +port = 80 +path = / +url = http://wso.williams.edu/ + +Child: ../foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /foo.html +url = http://wso.williams.edu/foo.html + +Child: .../foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/.../foo.html +url = http://wso.williams.edu/~ghutchis/.../foo.html + +Child: /foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /foo.html +url = http://wso.williams.edu/foo.html + +Child: #top +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/ +url = http://wso.williams.edu/~ghutchis/ + +Child: index.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/ +url = http://wso.williams.edu/~ghutchis/ + +Child: test.htm +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/test.htm +url = http://wso.williams.edu/~ghutchis/test.htm + +Child: /top/README +service = http +user = +host = wso.williams.edu +port = 80 +path = /top/README +url = http://wso.williams.edu/top/README + +Child: next/foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/next/foo.html +url = http://wso.williams.edu/~ghutchis/next/foo.html + +Child: .//relative.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis//relative.html +url = http://wso.williams.edu/~ghutchis//relative.html + +Parent: http://wso.williams.edu/~ghutchis/index.shtml +(http://wso.williams.edu:80/) +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/index.shtml +url = http://wso.williams.edu/~ghutchis/index.shtml + +Children: + +Child: ./ +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/ +url = http://wso.williams.edu/~ghutchis/ + +Child: ./../ +service = http +user = +host = wso.williams.edu +port = 80 +path = / +url = http://wso.williams.edu/ + +Child: ../foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /foo.html +url = http://wso.williams.edu/foo.html + +Child: .../foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/.../foo.html +url = http://wso.williams.edu/~ghutchis/.../foo.html + +Child: /foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /foo.html +url = http://wso.williams.edu/foo.html + +Child: #top +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/index.shtml +url = http://wso.williams.edu/~ghutchis/index.shtml + +Child: index.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/ +url = http://wso.williams.edu/~ghutchis/ + +Child: test.htm +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/test.htm +url = http://wso.williams.edu/~ghutchis/test.htm + +Child: /top/README +service = http +user = +host = wso.williams.edu +port = 80 +path = /top/README +url = http://wso.williams.edu/top/README + +Child: next/foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis/next/foo.html +url = http://wso.williams.edu/~ghutchis/next/foo.html + +Child: .//relative.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /~ghutchis//relative.html +url = http://wso.williams.edu/~ghutchis//relative.html + +Parent: http://wso.williams.edu//ghutchis///test/index.html +(http://wso.williams.edu:80/) +service = http +user = +host = wso.williams.edu +port = 80 +path = //ghutchis///test/ +url = http://wso.williams.edu//ghutchis///test/ + +Children: + +Child: ./ +service = http +user = +host = wso.williams.edu +port = 80 +path = //ghutchis///test/ +url = http://wso.williams.edu//ghutchis///test/ + +Child: ./../ +service = http +user = +host = wso.williams.edu +port = 80 +path = //ghutchis/// +url = http://wso.williams.edu//ghutchis/// + +Child: ../foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = //ghutchis///foo.html +url = http://wso.williams.edu//ghutchis///foo.html + +Child: .../foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = //ghutchis///test/.../foo.html +url = http://wso.williams.edu//ghutchis///test/.../foo.html + +Child: /foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /foo.html +url = http://wso.williams.edu/foo.html + +Child: #top +service = http +user = +host = wso.williams.edu +port = 80 +path = //ghutchis///test/ +url = http://wso.williams.edu//ghutchis///test/ + +Child: index.html +service = http +user = +host = wso.williams.edu +port = 80 +path = //ghutchis///test/ +url = http://wso.williams.edu//ghutchis///test/ + +Child: test.htm +service = http +user = +host = wso.williams.edu +port = 80 +path = //ghutchis///test/test.htm +url = http://wso.williams.edu//ghutchis///test/test.htm + +Child: /top/README +service = http +user = +host = wso.williams.edu +port = 80 +path = /top/README +url = http://wso.williams.edu/top/README + +Child: next/foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = //ghutchis///test/next/foo.html +url = http://wso.williams.edu//ghutchis///test/next/foo.html + +Child: .//relative.html +service = http +user = +host = wso.williams.edu +port = 80 +path = //ghutchis///test//relative.html +url = http://wso.williams.edu//ghutchis///test//relative.html + +Parent: http://wso.williams.edu/./ghutchis/../orgs/life.html +(http://wso.williams.edu:80/) +service = http +user = +host = wso.williams.edu +port = 80 +path = /orgs/life.html +url = http://wso.williams.edu/orgs/life.html + +Children: + +Child: ./ +service = http +user = +host = wso.williams.edu +port = 80 +path = /orgs/ +url = http://wso.williams.edu/orgs/ + +Child: ./../ +service = http +user = +host = wso.williams.edu +port = 80 +path = / +url = http://wso.williams.edu/ + +Child: ../foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /foo.html +url = http://wso.williams.edu/foo.html + +Child: .../foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /orgs/.../foo.html +url = http://wso.williams.edu/orgs/.../foo.html + +Child: /foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /foo.html +url = http://wso.williams.edu/foo.html + +Child: #top +service = http +user = +host = wso.williams.edu +port = 80 +path = /orgs/life.html +url = http://wso.williams.edu/orgs/life.html + +Child: index.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /orgs/ +url = http://wso.williams.edu/orgs/ + +Child: test.htm +service = http +user = +host = wso.williams.edu +port = 80 +path = /orgs/test.htm +url = http://wso.williams.edu/orgs/test.htm + +Child: /top/README +service = http +user = +host = wso.williams.edu +port = 80 +path = /top/README +url = http://wso.williams.edu/top/README + +Child: next/foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /orgs/next/foo.html +url = http://wso.williams.edu/orgs/next/foo.html + +Child: .//relative.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /orgs//relative.html +url = http://wso.williams.edu/orgs//relative.html + +Parent: http://www.example.com/blank_news.shtml?pages/1.i +(http://www.example.com:80/) +service = http +user = +host = www.example.com +port = 80 +path = /blank_news.shtml?pages/1.i +url = http://www.example.com/blank_news.shtml?pages/1.i + +Children: + +Child: ./ +service = http +user = +host = www.example.com +port = 80 +path = / +url = http://www.example.com/ + +Child: ./../ +service = http +user = +host = www.example.com +port = 80 +path = / +url = http://www.example.com/ + +Child: ../foo.html +service = http +user = +host = www.example.com +port = 80 +path = /foo.html +url = http://www.example.com/foo.html + +Child: .../foo.html +service = http +user = +host = www.example.com +port = 80 +path = /.../foo.html +url = http://www.example.com/.../foo.html + +Child: /foo.html +service = http +user = +host = www.example.com +port = 80 +path = /foo.html +url = http://www.example.com/foo.html + +Child: #top +service = http +user = +host = www.example.com +port = 80 +path = /blank_news.shtml?pages/1.i +url = http://www.example.com/blank_news.shtml?pages/1.i + +Child: index.html +service = http +user = +host = www.example.com +port = 80 +path = / +url = http://www.example.com/ + +Child: test.htm +service = http +user = +host = www.example.com +port = 80 +path = /test.htm +url = http://www.example.com/test.htm + +Child: /top/README +service = http +user = +host = www.example.com +port = 80 +path = /top/README +url = http://www.example.com/top/README + +Child: next/foo.html +service = http +user = +host = www.example.com +port = 80 +path = /next/foo.html +url = http://www.example.com/next/foo.html + +Child: .//relative.html +service = http +user = +host = www.example.com +port = 80 +path = //relative.html +url = http://www.example.com//relative.html + +Parent: http://www.williams.edu:803/Admin/Depts/.test/.news/Index.Html +(http://www.williams.edu:803/) +service = http +user = +host = www.williams.edu +port = 803 +path = /Admin/Depts/.test/.news/ +url = http://www.williams.edu:803/Admin/Depts/.test/.news/ + +Children: + +Child: ./ +service = http +user = +host = www.williams.edu +port = 803 +path = /Admin/Depts/.test/.news/ +url = http://www.williams.edu:803/Admin/Depts/.test/.news/ + +Child: ./../ +service = http +user = +host = www.williams.edu +port = 803 +path = /Admin/Depts/.test/ +url = http://www.williams.edu:803/Admin/Depts/.test/ + +Child: ../foo.html +service = http +user = +host = www.williams.edu +port = 803 +path = /Admin/Depts/.test/foo.html +url = http://www.williams.edu:803/Admin/Depts/.test/foo.html + +Child: .../foo.html +service = http +user = +host = www.williams.edu +port = 803 +path = /Admin/Depts/.test/.news/.../foo.html +url = http://www.williams.edu:803/Admin/Depts/.test/.news/.../foo.html + +Child: /foo.html +service = http +user = +host = www.williams.edu +port = 803 +path = /foo.html +url = http://www.williams.edu:803/foo.html + +Child: #top +service = http +user = +host = www.williams.edu +port = 803 +path = /Admin/Depts/.test/.news/ +url = http://www.williams.edu:803/Admin/Depts/.test/.news/ + +Child: index.html +service = http +user = +host = www.williams.edu +port = 803 +path = /Admin/Depts/.test/.news/ +url = http://www.williams.edu:803/Admin/Depts/.test/.news/ + +Child: test.htm +service = http +user = +host = www.williams.edu +port = 803 +path = /Admin/Depts/.test/.news/test.htm +url = http://www.williams.edu:803/Admin/Depts/.test/.news/test.htm + +Child: /top/README +service = http +user = +host = www.williams.edu +port = 803 +path = /top/README +url = http://www.williams.edu:803/top/README + +Child: next/foo.html +service = http +user = +host = www.williams.edu +port = 803 +path = /Admin/Depts/.test/.news/next/foo.html +url = http://www.williams.edu:803/Admin/Depts/.test/.news/next/foo.html + +Child: .//relative.html +service = http +user = +host = www.williams.edu +port = 803 +path = /Admin/Depts/.test/.news//relative.html +url = http://www.williams.edu:803/Admin/Depts/.test/.news//relative.html + +Parent: http://test.com/test/ +(http://test.com:80/) +service = http +user = +host = test.com +port = 80 +path = /test/ +url = http://test.com/test/ + +Children: + +Child: ./ +service = http +user = +host = test.com +port = 80 +path = /test/ +url = http://test.com/test/ + +Child: ./../ +service = http +user = +host = test.com +port = 80 +path = / +url = http://test.com/ + +Child: ../foo.html +service = http +user = +host = test.com +port = 80 +path = /foo.html +url = http://test.com/foo.html + +Child: .../foo.html +service = http +user = +host = test.com +port = 80 +path = /test/.../foo.html +url = http://test.com/test/.../foo.html + +Child: /foo.html +service = http +user = +host = test.com +port = 80 +path = /foo.html +url = http://test.com/foo.html + +Child: #top +service = http +user = +host = test.com +port = 80 +path = /test/ +url = http://test.com/test/ + +Child: index.html +service = http +user = +host = test.com +port = 80 +path = /test/ +url = http://test.com/test/ + +Child: test.htm +service = http +user = +host = test.com +port = 80 +path = /test/test.htm +url = http://test.com/test/test.htm + +Child: /top/README +service = http +user = +host = test.com +port = 80 +path = /top/README +url = http://test.com/top/README + +Child: next/foo.html +service = http +user = +host = test.com +port = 80 +path = /test/next/foo.html +url = http://test.com/test/next/foo.html + +Child: .//relative.html +service = http +user = +host = test.com +port = 80 +path = /test//relative.html +url = http://test.com/test//relative.html + +Parent: http://test.com/opt/test/ +(http://test.com:80/) +service = http +user = +host = test.com +port = 80 +path = /opt/test/ +url = http://test.com/opt/test/ + +Children: + +Child: ./ +service = http +user = +host = test.com +port = 80 +path = /opt/test/ +url = http://test.com/opt/test/ + +Child: ./../ +service = http +user = +host = test.com +port = 80 +path = /opt/ +url = http://test.com/opt/ + +Child: ../foo.html +service = http +user = +host = test.com +port = 80 +path = /opt/foo.html +url = http://test.com/opt/foo.html + +Child: .../foo.html +service = http +user = +host = test.com +port = 80 +path = /opt/test/.../foo.html +url = http://test.com/opt/test/.../foo.html + +Child: /foo.html +service = http +user = +host = test.com +port = 80 +path = /foo.html +url = http://test.com/foo.html + +Child: #top +service = http +user = +host = test.com +port = 80 +path = /opt/test/ +url = http://test.com/opt/test/ + +Child: index.html +service = http +user = +host = test.com +port = 80 +path = /opt/test/ +url = http://test.com/opt/test/ + +Child: test.htm +service = http +user = +host = test.com +port = 80 +path = /opt/test/test.htm +url = http://test.com/opt/test/test.htm + +Child: /top/README +service = http +user = +host = test.com +port = 80 +path = /top/README +url = http://test.com/top/README + +Child: next/foo.html +service = http +user = +host = test.com +port = 80 +path = /opt/test/next/foo.html +url = http://test.com/opt/test/next/foo.html + +Child: .//relative.html +service = http +user = +host = test.com +port = 80 +path = /opt/test//relative.html +url = http://test.com/opt/test//relative.html + +Parent: http://test.com/./opt/../test/ +(http://test.com:80/) +service = http +user = +host = test.com +port = 80 +path = /test/ +url = http://test.com/test/ + +Children: + +Child: ./ +service = http +user = +host = test.com +port = 80 +path = /test/ +url = http://test.com/test/ + +Child: ./../ +service = http +user = +host = test.com +port = 80 +path = / +url = http://test.com/ + +Child: ../foo.html +service = http +user = +host = test.com +port = 80 +path = /foo.html +url = http://test.com/foo.html + +Child: .../foo.html +service = http +user = +host = test.com +port = 80 +path = /test/.../foo.html +url = http://test.com/test/.../foo.html + +Child: /foo.html +service = http +user = +host = test.com +port = 80 +path = /foo.html +url = http://test.com/foo.html + +Child: #top +service = http +user = +host = test.com +port = 80 +path = /test/ +url = http://test.com/test/ + +Child: index.html +service = http +user = +host = test.com +port = 80 +path = /test/ +url = http://test.com/test/ + +Child: test.htm +service = http +user = +host = test.com +port = 80 +path = /test/test.htm +url = http://test.com/test/test.htm + +Child: /top/README +service = http +user = +host = test.com +port = 80 +path = /top/README +url = http://test.com/top/README + +Child: next/foo.html +service = http +user = +host = test.com +port = 80 +path = /test/next/foo.html +url = http://test.com/test/next/foo.html + +Child: .//relative.html +service = http +user = +host = test.com +port = 80 +path = /test//relative.html +url = http://test.com/test//relative.html + +Parent: http://test.com/./././orgs/life.html +(http://test.com:80/) +service = http +user = +host = test.com +port = 80 +path = /orgs/life.html +url = http://test.com/orgs/life.html + +Children: + +Child: ./ +service = http +user = +host = test.com +port = 80 +path = /orgs/ +url = http://test.com/orgs/ + +Child: ./../ +service = http +user = +host = test.com +port = 80 +path = / +url = http://test.com/ + +Child: ../foo.html +service = http +user = +host = test.com +port = 80 +path = /foo.html +url = http://test.com/foo.html + +Child: .../foo.html +service = http +user = +host = test.com +port = 80 +path = /orgs/.../foo.html +url = http://test.com/orgs/.../foo.html + +Child: /foo.html +service = http +user = +host = test.com +port = 80 +path = /foo.html +url = http://test.com/foo.html + +Child: #top +service = http +user = +host = test.com +port = 80 +path = /orgs/life.html +url = http://test.com/orgs/life.html + +Child: index.html +service = http +user = +host = test.com +port = 80 +path = /orgs/ +url = http://test.com/orgs/ + +Child: test.htm +service = http +user = +host = test.com +port = 80 +path = /orgs/test.htm +url = http://test.com/orgs/test.htm + +Child: /top/README +service = http +user = +host = test.com +port = 80 +path = /top/README +url = http://test.com/top/README + +Child: next/foo.html +service = http +user = +host = test.com +port = 80 +path = /orgs/next/foo.html +url = http://test.com/orgs/next/foo.html + +Child: .//relative.html +service = http +user = +host = test.com +port = 80 +path = /orgs//relative.html +url = http://test.com/orgs//relative.html + +Parent: http://test.com/./../../orgs/life.html +(http://test.com:80/) +service = http +user = +host = test.com +port = 80 +path = /orgs/life.html +url = http://test.com/orgs/life.html + +Children: + +Child: ./ +service = http +user = +host = test.com +port = 80 +path = /orgs/ +url = http://test.com/orgs/ + +Child: ./../ +service = http +user = +host = test.com +port = 80 +path = / +url = http://test.com/ + +Child: ../foo.html +service = http +user = +host = test.com +port = 80 +path = /foo.html +url = http://test.com/foo.html + +Child: .../foo.html +service = http +user = +host = test.com +port = 80 +path = /orgs/.../foo.html +url = http://test.com/orgs/.../foo.html + +Child: /foo.html +service = http +user = +host = test.com +port = 80 +path = /foo.html +url = http://test.com/foo.html + +Child: #top +service = http +user = +host = test.com +port = 80 +path = /orgs/life.html +url = http://test.com/orgs/life.html + +Child: index.html +service = http +user = +host = test.com +port = 80 +path = /orgs/ +url = http://test.com/orgs/ + +Child: test.htm +service = http +user = +host = test.com +port = 80 +path = /orgs/test.htm +url = http://test.com/orgs/test.htm + +Child: /top/README +service = http +user = +host = test.com +port = 80 +path = /top/README +url = http://test.com/top/README + +Child: next/foo.html +service = http +user = +host = test.com +port = 80 +path = /orgs/next/foo.html +url = http://test.com/orgs/next/foo.html + +Child: .//relative.html +service = http +user = +host = test.com +port = 80 +path = /orgs//relative.html +url = http://test.com/orgs//relative.html + +Parent: http://test.com/blank_news.shtml?pages/3.i +(http://test.com:80/) +service = http +user = +host = test.com +port = 80 +path = /blank_news.shtml?pages/3.i +url = http://test.com/blank_news.shtml?pages/3.i + +Children: + +Child: ./ +service = http +user = +host = test.com +port = 80 +path = / +url = http://test.com/ + +Child: ./../ +service = http +user = +host = test.com +port = 80 +path = / +url = http://test.com/ + +Child: ../foo.html +service = http +user = +host = test.com +port = 80 +path = /foo.html +url = http://test.com/foo.html + +Child: .../foo.html +service = http +user = +host = test.com +port = 80 +path = /.../foo.html +url = http://test.com/.../foo.html + +Child: /foo.html +service = http +user = +host = test.com +port = 80 +path = /foo.html +url = http://test.com/foo.html + +Child: #top +service = http +user = +host = test.com +port = 80 +path = /blank_news.shtml?pages/3.i +url = http://test.com/blank_news.shtml?pages/3.i + +Child: index.html +service = http +user = +host = test.com +port = 80 +path = / +url = http://test.com/ + +Child: test.htm +service = http +user = +host = test.com +port = 80 +path = /test.htm +url = http://test.com/test.htm + +Child: /top/README +service = http +user = +host = test.com +port = 80 +path = /top/README +url = http://test.com/top/README + +Child: next/foo.html +service = http +user = +host = test.com +port = 80 +path = /next/foo.html +url = http://test.com/next/foo.html + +Child: .//relative.html +service = http +user = +host = test.com +port = 80 +path = //relative.html +url = http://test.com//relative.html + +Parent: HTTP://www.Yahoo.com/ +(http://www.yahoo.com:80/) +service = http +user = +host = www.yahoo.com +port = 80 +path = / +url = http://www.yahoo.com/ + +Children: + +Child: ./ +service = http +user = +host = www.yahoo.com +port = 80 +path = / +url = http://www.yahoo.com/ + +Child: ./../ +service = http +user = +host = www.yahoo.com +port = 80 +path = / +url = http://www.yahoo.com/ + +Child: ../foo.html +service = http +user = +host = www.yahoo.com +port = 80 +path = /foo.html +url = http://www.yahoo.com/foo.html + +Child: .../foo.html +service = http +user = +host = www.yahoo.com +port = 80 +path = /.../foo.html +url = http://www.yahoo.com/.../foo.html + +Child: /foo.html +service = http +user = +host = www.yahoo.com +port = 80 +path = /foo.html +url = http://www.yahoo.com/foo.html + +Child: #top +service = http +user = +host = www.yahoo.com +port = 80 +path = / +url = http://www.yahoo.com/ + +Child: index.html +service = http +user = +host = www.yahoo.com +port = 80 +path = / +url = http://www.yahoo.com/ + +Child: test.htm +service = http +user = +host = www.yahoo.com +port = 80 +path = /test.htm +url = http://www.yahoo.com/test.htm + +Child: /top/README +service = http +user = +host = www.yahoo.com +port = 80 +path = /top/README +url = http://www.yahoo.com/top/README + +Child: next/foo.html +service = http +user = +host = www.yahoo.com +port = 80 +path = /next/foo.html +url = http://www.yahoo.com/next/foo.html + +Child: .//relative.html +service = http +user = +host = www.yahoo.com +port = 80 +path = //relative.html +url = http://www.yahoo.com//relative.html + +Parent: http://this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu/go/Departments/Chemistry/classes/102/chem_102.html +(http://this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu:80/) +service = http +user = +host = this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu +port = 80 +path = /go/Departments/Chemistry/classes/102/chem_102.html +url = http://this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu/go/Departments/Chemistry/classes/102/chem_102.html + +Children: + +Child: ./ +service = http +user = +host = this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu +port = 80 +path = /go/Departments/Chemistry/classes/102/ +url = http://this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu/go/Departments/Chemistry/classes/102/ + +Child: ./../ +service = http +user = +host = this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu +port = 80 +path = /go/Departments/Chemistry/classes/ +url = http://this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu/go/Departments/Chemistry/classes/ + +Child: ../foo.html +service = http +user = +host = this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu +port = 80 +path = /go/Departments/Chemistry/classes/foo.html +url = http://this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu/go/Departments/Chemistry/classes/foo.html + +Child: .../foo.html +service = http +user = +host = this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu +port = 80 +path = /go/Departments/Chemistry/classes/102/.../foo.html +url = http://this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu/go/Departments/Chemistry/classes/102/.../foo.html + +Child: /foo.html +service = http +user = +host = this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu +port = 80 +path = /foo.html +url = http://this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu/foo.html + +Child: #top +service = http +user = +host = this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu +port = 80 +path = /go/Departments/Chemistry/classes/102/chem_102.html +url = http://this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu/go/Departments/Chemistry/classes/102/chem_102.html + +Child: index.html +service = http +user = +host = this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu +port = 80 +path = /go/Departments/Chemistry/classes/102/ +url = http://this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu/go/Departments/Chemistry/classes/102/ + +Child: test.htm +service = http +user = +host = this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu +port = 80 +path = /go/Departments/Chemistry/classes/102/test.htm +url = http://this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu/go/Departments/Chemistry/classes/102/test.htm + +Child: /top/README +service = http +user = +host = this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu +port = 80 +path = /top/README +url = http://this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu/top/README + +Child: next/foo.html +service = http +user = +host = this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu +port = 80 +path = /go/Departments/Chemistry/classes/102/next/foo.html +url = http://this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu/go/Departments/Chemistry/classes/102/next/foo.html + +Child: .//relative.html +service = http +user = +host = this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu +port = 80 +path = /go/Departments/Chemistry/classes/102//relative.html +url = http://this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu/go/Departments/Chemistry/classes/102//relative.html + +Parent: http://this-is-a.slashdot.org/slash/elimination/test//////////////////////////././././.././././/./././.././././../../././.test/ +(http://this-is-a.slashdot.org:80/) +service = http +user = +host = this-is-a.slashdot.org +port = 80 +path = /slash/elimination/test///////////////////////.test/ +url = http://this-is-a.slashdot.org/slash/elimination/test///////////////////////.test/ + +Children: + +Child: ./ +service = http +user = +host = this-is-a.slashdot.org +port = 80 +path = /slash/elimination/test///////////////////////.test/ +url = http://this-is-a.slashdot.org/slash/elimination/test///////////////////////.test/ + +Child: ./../ +service = http +user = +host = this-is-a.slashdot.org +port = 80 +path = /slash/elimination/test/////////////////////// +url = http://this-is-a.slashdot.org/slash/elimination/test/////////////////////// + +Child: ../foo.html +service = http +user = +host = this-is-a.slashdot.org +port = 80 +path = /slash/elimination/test///////////////////////foo.html +url = http://this-is-a.slashdot.org/slash/elimination/test///////////////////////foo.html + +Child: .../foo.html +service = http +user = +host = this-is-a.slashdot.org +port = 80 +path = /slash/elimination/test///////////////////////.test/.../foo.html +url = http://this-is-a.slashdot.org/slash/elimination/test///////////////////////.test/.../foo.html + +Child: /foo.html +service = http +user = +host = this-is-a.slashdot.org +port = 80 +path = /foo.html +url = http://this-is-a.slashdot.org/foo.html + +Child: #top +service = http +user = +host = this-is-a.slashdot.org +port = 80 +path = /slash/elimination/test///////////////////////.test/ +url = http://this-is-a.slashdot.org/slash/elimination/test///////////////////////.test/ + +Child: index.html +service = http +user = +host = this-is-a.slashdot.org +port = 80 +path = /slash/elimination/test///////////////////////.test/ +url = http://this-is-a.slashdot.org/slash/elimination/test///////////////////////.test/ + +Child: test.htm +service = http +user = +host = this-is-a.slashdot.org +port = 80 +path = /slash/elimination/test///////////////////////.test/test.htm +url = http://this-is-a.slashdot.org/slash/elimination/test///////////////////////.test/test.htm + +Child: /top/README +service = http +user = +host = this-is-a.slashdot.org +port = 80 +path = /top/README +url = http://this-is-a.slashdot.org/top/README + +Child: next/foo.html +service = http +user = +host = this-is-a.slashdot.org +port = 80 +path = /slash/elimination/test///////////////////////.test/next/foo.html +url = http://this-is-a.slashdot.org/slash/elimination/test///////////////////////.test/next/foo.html + +Child: .//relative.html +service = http +user = +host = this-is-a.slashdot.org +port = 80 +path = /slash/elimination/test///////////////////////.test//relative.html +url = http://this-is-a.slashdot.org/slash/elimination/test///////////////////////.test//relative.html + +Parent: http://www.htdig.org/cgi-bin/htdig3.private/incoming?id=338;user=ghutchis;addsignature=1 +(http://www.htdig.org:80/) +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/htdig3.private/incoming?id=338;user=ghutchis;addsignature=1 +url = http://www.htdig.org/cgi-bin/htdig3.private/incoming?id=338;user=ghutchis;addsignature=1 + +Children: + +Child: ./ +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/htdig3.private/ +url = http://www.htdig.org/cgi-bin/htdig3.private/ + +Child: ./../ +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/ +url = http://www.htdig.org/cgi-bin/ + +Child: ../foo.html +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/foo.html +url = http://www.htdig.org/cgi-bin/foo.html + +Child: .../foo.html +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/htdig3.private/.../foo.html +url = http://www.htdig.org/cgi-bin/htdig3.private/.../foo.html + +Child: /foo.html +service = http +user = +host = www.htdig.org +port = 80 +path = /foo.html +url = http://www.htdig.org/foo.html + +Child: #top +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/htdig3.private/incoming?id=338;user=ghutchis;addsignature=1 +url = http://www.htdig.org/cgi-bin/htdig3.private/incoming?id=338;user=ghutchis;addsignature=1 + +Child: index.html +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/htdig3.private/ +url = http://www.htdig.org/cgi-bin/htdig3.private/ + +Child: test.htm +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/htdig3.private/test.htm +url = http://www.htdig.org/cgi-bin/htdig3.private/test.htm + +Child: /top/README +service = http +user = +host = www.htdig.org +port = 80 +path = /top/README +url = http://www.htdig.org/top/README + +Child: next/foo.html +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/htdig3.private/next/foo.html +url = http://www.htdig.org/cgi-bin/htdig3.private/next/foo.html + +Child: .//relative.html +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/htdig3.private//relative.html +url = http://www.htdig.org/cgi-bin/htdig3.private//relative.html + +Parent: http://www.htdig.org/cgi-bin/htdig3.private/incoming?id=342;user=ghutchis +(http://www.htdig.org:80/) +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/htdig3.private/incoming?id=342;user=ghutchis +url = http://www.htdig.org/cgi-bin/htdig3.private/incoming?id=342;user=ghutchis + +Children: + +Child: ./ +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/htdig3.private/ +url = http://www.htdig.org/cgi-bin/htdig3.private/ + +Child: ./../ +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/ +url = http://www.htdig.org/cgi-bin/ + +Child: ../foo.html +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/foo.html +url = http://www.htdig.org/cgi-bin/foo.html + +Child: .../foo.html +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/htdig3.private/.../foo.html +url = http://www.htdig.org/cgi-bin/htdig3.private/.../foo.html + +Child: /foo.html +service = http +user = +host = www.htdig.org +port = 80 +path = /foo.html +url = http://www.htdig.org/foo.html + +Child: #top +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/htdig3.private/incoming?id=342;user=ghutchis +url = http://www.htdig.org/cgi-bin/htdig3.private/incoming?id=342;user=ghutchis + +Child: index.html +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/htdig3.private/ +url = http://www.htdig.org/cgi-bin/htdig3.private/ + +Child: test.htm +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/htdig3.private/test.htm +url = http://www.htdig.org/cgi-bin/htdig3.private/test.htm + +Child: /top/README +service = http +user = +host = www.htdig.org +port = 80 +path = /top/README +url = http://www.htdig.org/top/README + +Child: next/foo.html +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/htdig3.private/next/foo.html +url = http://www.htdig.org/cgi-bin/htdig3.private/next/foo.html + +Child: .//relative.html +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/htdig3.private//relative.html +url = http://www.htdig.org/cgi-bin/htdig3.private//relative.html + +Parent: http://wso.williams.edu/cgi-bin/BBS/bbs_forum.cgi?forum=open&read=001746-000000.msg&session=36c4f59c0f6ecb26&use_last_read=on&last_read=0 +(http://wso.williams.edu:80/) +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/BBS/bbs_forum.cgi?forum=open&read=001746-000000.msg&session=36c4f59c0f6ecb26&use_last_read=on&last_read=0 +url = http://wso.williams.edu/cgi-bin/BBS/bbs_forum.cgi?forum=open&read=001746-000000.msg&session=36c4f59c0f6ecb26&use_last_read=on&last_read=0 + +Children: + +Child: ./ +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/BBS/ +url = http://wso.williams.edu/cgi-bin/BBS/ + +Child: ./../ +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/ +url = http://wso.williams.edu/cgi-bin/ + +Child: ../foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/foo.html +url = http://wso.williams.edu/cgi-bin/foo.html + +Child: .../foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/BBS/.../foo.html +url = http://wso.williams.edu/cgi-bin/BBS/.../foo.html + +Child: /foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /foo.html +url = http://wso.williams.edu/foo.html + +Child: #top +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/BBS/bbs_forum.cgi?forum=open&read=001746-000000.msg&session=36c4f59c0f6ecb26&use_last_read=on&last_read=0 +url = http://wso.williams.edu/cgi-bin/BBS/bbs_forum.cgi?forum=open&read=001746-000000.msg&session=36c4f59c0f6ecb26&use_last_read=on&last_read=0 + +Child: index.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/BBS/ +url = http://wso.williams.edu/cgi-bin/BBS/ + +Child: test.htm +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/BBS/test.htm +url = http://wso.williams.edu/cgi-bin/BBS/test.htm + +Child: /top/README +service = http +user = +host = wso.williams.edu +port = 80 +path = /top/README +url = http://wso.williams.edu/top/README + +Child: next/foo.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/BBS/next/foo.html +url = http://wso.williams.edu/cgi-bin/BBS/next/foo.html + +Child: .//relative.html +service = http +user = +host = wso.williams.edu +port = 80 +path = /cgi-bin/BBS//relative.html +url = http://wso.williams.edu/cgi-bin/BBS//relative.html + +Parent: http:/www.fail.com/ +(http://:0/) +service = http +user = +host = +port = 0 +path = /www.fail.com/ +url = + +Children: + +Child: ./ +service = http +user = +host = +port = 0 +path = /www.fail.com/ +url = + +Child: ./../ +service = http +user = +host = +port = 0 +path = / +url = + +Child: ../foo.html +service = http +user = +host = +port = 0 +path = /foo.html +url = + +Child: .../foo.html +service = http +user = +host = +port = 0 +path = /www.fail.com/.../foo.html +url = + +Child: /foo.html +service = http +user = +host = +port = 0 +path = /foo.html +url = + +Child: #top +service = http +user = +host = +port = 0 +path = /www.fail.com/ +url = + +Child: index.html +service = http +user = +host = +port = 0 +path = /www.fail.com/ +url = + +Child: test.htm +service = http +user = +host = +port = 0 +path = /www.fail.com/test.htm +url = + +Child: /top/README +service = http +user = +host = +port = 0 +path = /top/README +url = + +Child: next/foo.html +service = http +user = +host = +port = 0 +path = /www.fail.com/next/foo.html +url = + +Child: .//relative.html +service = http +user = +host = +port = 0 +path = /www.fail.com//relative.html +url = + +Parent: http:www.fail.com +(http://:0/) +service = http +user = +host = +port = 0 +path = www.fail.com +url = + +Children: + +Child: ./ +service = http +user = +host = +port = 0 +path = www.fail.com +url = + +Child: ./../ +service = http +user = +host = +port = 0 +path = www.fail.com +url = + +Child: ../foo.html +service = http +user = +host = +port = 0 +path = www.fail.com +url = + +Child: .../foo.html +service = http +user = +host = +port = 0 +path = www.fail.com +url = + +Child: /foo.html +service = http +user = +host = +port = 0 +path = /foo.html +url = + +Child: #top +service = http +user = +host = +port = 0 +path = www.fail.com +url = + +Child: index.html +service = http +user = +host = +port = 0 +path = www.fail.com +url = + +Child: test.htm +service = http +user = +host = +port = 0 +path = www.fail.com +url = + +Child: /top/README +service = http +user = +host = +port = 0 +path = /top/README +url = + +Child: next/foo.html +service = http +user = +host = +port = 0 +path = www.fail.com +url = + +Child: .//relative.html +service = http +user = +host = +port = 0 +path = www.fail.com +url = + +Parent: file://localhost/opt/htdig/maindocs/index.html +(file://localhost:0/) +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/maindocs/index.html +url = file:///opt/htdig/maindocs/index.html + +Children: + +Child: ./ +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/maindocs/ +url = file:///opt/htdig/maindocs/ + +Child: ./../ +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/ +url = file:///opt/htdig/ + +Child: ../foo.html +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/foo.html +url = file:///opt/htdig/foo.html + +Child: .../foo.html +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/maindocs/.../foo.html +url = file:///opt/htdig/maindocs/.../foo.html + +Child: /foo.html +service = file +user = +host = localhost +port = 0 +path = /foo.html +url = file:///foo.html + +Child: #top +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/maindocs/index.html +url = file:///opt/htdig/maindocs/index.html + +Child: index.html +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/maindocs/index.html +url = file:///opt/htdig/maindocs/index.html + +Child: test.htm +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/maindocs/test.htm +url = file:///opt/htdig/maindocs/test.htm + +Child: /top/README +service = file +user = +host = localhost +port = 0 +path = /top/README +url = file:///top/README + +Child: next/foo.html +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/maindocs/next/foo.html +url = file:///opt/htdig/maindocs/next/foo.html + +Child: .//relative.html +service = file +user = +host = localhost +port = 0 +path = /opt/htdig/maindocs//relative.html +url = file:///opt/htdig/maindocs//relative.html + +Parent: file://localhost:80/home/ghutchis/www/home.html +(file://localhost:0/) +service = file +user = +host = localhost +port = 0 +path = /home/ghutchis/www/home.html +url = file:///home/ghutchis/www/home.html + +Children: + +Child: ./ +service = file +user = +host = localhost +port = 0 +path = /home/ghutchis/www/ +url = file:///home/ghutchis/www/ + +Child: ./../ +service = file +user = +host = localhost +port = 0 +path = /home/ghutchis/ +url = file:///home/ghutchis/ + +Child: ../foo.html +service = file +user = +host = localhost +port = 0 +path = /home/ghutchis/foo.html +url = file:///home/ghutchis/foo.html + +Child: .../foo.html +service = file +user = +host = localhost +port = 0 +path = /home/ghutchis/www/.../foo.html +url = file:///home/ghutchis/www/.../foo.html + +Child: /foo.html +service = file +user = +host = localhost +port = 0 +path = /foo.html +url = file:///foo.html + +Child: #top +service = file +user = +host = localhost +port = 0 +path = /home/ghutchis/www/home.html +url = file:///home/ghutchis/www/home.html + +Child: index.html +service = file +user = +host = localhost +port = 0 +path = /home/ghutchis/www/index.html +url = file:///home/ghutchis/www/index.html + +Child: test.htm +service = file +user = +host = localhost +port = 0 +path = /home/ghutchis/www/test.htm +url = file:///home/ghutchis/www/test.htm + +Child: /top/README +service = file +user = +host = localhost +port = 0 +path = /top/README +url = file:///top/README + +Child: next/foo.html +service = file +user = +host = localhost +port = 0 +path = /home/ghutchis/www/next/foo.html +url = file:///home/ghutchis/www/next/foo.html + +Child: .//relative.html +service = file +user = +host = localhost +port = 0 +path = /home/ghutchis/www//relative.html +url = file:///home/ghutchis/www//relative.html + +Parent: http://www.htdig.org/cgi-bin/test.cgi?date=10/1/99#anchor1 +(http://www.htdig.org:80/) +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/test.cgi?date=10/1/99 +url = http://www.htdig.org/cgi-bin/test.cgi?date=10/1/99 + +Children: + +Child: ./ +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/ +url = http://www.htdig.org/cgi-bin/ + +Child: ./../ +service = http +user = +host = www.htdig.org +port = 80 +path = / +url = http://www.htdig.org/ + +Child: ../foo.html +service = http +user = +host = www.htdig.org +port = 80 +path = /foo.html +url = http://www.htdig.org/foo.html + +Child: .../foo.html +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/.../foo.html +url = http://www.htdig.org/cgi-bin/.../foo.html + +Child: /foo.html +service = http +user = +host = www.htdig.org +port = 80 +path = /foo.html +url = http://www.htdig.org/foo.html + +Child: #top +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/test.cgi?date=10/1/99 +url = http://www.htdig.org/cgi-bin/test.cgi?date=10/1/99 + +Child: index.html +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/ +url = http://www.htdig.org/cgi-bin/ + +Child: test.htm +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/test.htm +url = http://www.htdig.org/cgi-bin/test.htm + +Child: /top/README +service = http +user = +host = www.htdig.org +port = 80 +path = /top/README +url = http://www.htdig.org/top/README + +Child: next/foo.html +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin/next/foo.html +url = http://www.htdig.org/cgi-bin/next/foo.html + +Child: .//relative.html +service = http +user = +host = www.htdig.org +port = 80 +path = /cgi-bin//relative.html +url = http://www.htdig.org/cgi-bin//relative.html + +Parent: ftp://default.removal.com/index.html +(ftp://default.removal.com:21/) +service = ftp +user = +host = default.removal.com +port = 21 +path = /index.html +url = ftp://default.removal.com/index.html + +Children: + +Child: ./ +service = ftp +user = +host = default.removal.com +port = 21 +path = / +url = ftp://default.removal.com/ + +Child: ./../ +service = ftp +user = +host = default.removal.com +port = 21 +path = / +url = ftp://default.removal.com/ + +Child: ../foo.html +service = ftp +user = +host = default.removal.com +port = 21 +path = /foo.html +url = ftp://default.removal.com/foo.html + +Child: .../foo.html +service = ftp +user = +host = default.removal.com +port = 21 +path = /.../foo.html +url = ftp://default.removal.com/.../foo.html + +Child: /foo.html +service = ftp +user = +host = default.removal.com +port = 21 +path = /foo.html +url = ftp://default.removal.com/foo.html + +Child: #top +service = ftp +user = +host = default.removal.com +port = 21 +path = /index.html +url = ftp://default.removal.com/index.html + +Child: index.html +service = ftp +user = +host = default.removal.com +port = 21 +path = /index.html +url = ftp://default.removal.com/index.html + +Child: test.htm +service = ftp +user = +host = default.removal.com +port = 21 +path = /test.htm +url = ftp://default.removal.com/test.htm + +Child: /top/README +service = ftp +user = +host = default.removal.com +port = 21 +path = /top/README +url = ftp://default.removal.com/top/README + +Child: next/foo.html +service = ftp +user = +host = default.removal.com +port = 21 +path = /next/foo.html +url = ftp://default.removal.com/next/foo.html + +Child: .//relative.html +service = ftp +user = +host = default.removal.com +port = 21 +path = //relative.html +url = ftp://default.removal.com//relative.html + +Parent: https://test.com/life.html +(https://test.com:443/) +service = https +user = +host = test.com +port = 443 +path = /life.html +url = https://test.com/life.html + +Children: + +Child: ./ +service = https +user = +host = test.com +port = 443 +path = / +url = https://test.com/ + +Child: ./../ +service = https +user = +host = test.com +port = 443 +path = / +url = https://test.com/ + +Child: ../foo.html +service = https +user = +host = test.com +port = 443 +path = /foo.html +url = https://test.com/foo.html + +Child: .../foo.html +service = https +user = +host = test.com +port = 443 +path = /.../foo.html +url = https://test.com/.../foo.html + +Child: /foo.html +service = https +user = +host = test.com +port = 443 +path = /foo.html +url = https://test.com/foo.html + +Child: #top +service = https +user = +host = test.com +port = 443 +path = /life.html +url = https://test.com/life.html + +Child: index.html +service = https +user = +host = test.com +port = 443 +path = / +url = https://test.com/ + +Child: test.htm +service = https +user = +host = test.com +port = 443 +path = /test.htm +url = https://test.com/test.htm + +Child: /top/README +service = https +user = +host = test.com +port = 443 +path = /top/README +url = https://test.com/top/README + +Child: next/foo.html +service = https +user = +host = test.com +port = 443 +path = /next/foo.html +url = https://test.com/next/foo.html + +Child: .//relative.html +service = https +user = +host = test.com +port = 443 +path = //relative.html +url = https://test.com//relative.html + +Parent: https://default.removal.com/index.html +(https://default.removal.com:443/) +service = https +user = +host = default.removal.com +port = 443 +path = / +url = https://default.removal.com/ + +Children: + +Child: ./ +service = https +user = +host = default.removal.com +port = 443 +path = / +url = https://default.removal.com/ + +Child: ./../ +service = https +user = +host = default.removal.com +port = 443 +path = / +url = https://default.removal.com/ + +Child: ../foo.html +service = https +user = +host = default.removal.com +port = 443 +path = /foo.html +url = https://default.removal.com/foo.html + +Child: .../foo.html +service = https +user = +host = default.removal.com +port = 443 +path = /.../foo.html +url = https://default.removal.com/.../foo.html + +Child: /foo.html +service = https +user = +host = default.removal.com +port = 443 +path = /foo.html +url = https://default.removal.com/foo.html + +Child: #top +service = https +user = +host = default.removal.com +port = 443 +path = / +url = https://default.removal.com/ + +Child: index.html +service = https +user = +host = default.removal.com +port = 443 +path = / +url = https://default.removal.com/ + +Child: test.htm +service = https +user = +host = default.removal.com +port = 443 +path = /test.htm +url = https://default.removal.com/test.htm + +Child: /top/README +service = https +user = +host = default.removal.com +port = 443 +path = /top/README +url = https://default.removal.com/top/README + +Child: next/foo.html +service = https +user = +host = default.removal.com +port = 443 +path = /next/foo.html +url = https://default.removal.com/next/foo.html + +Child: .//relative.html +service = https +user = +host = default.removal.com +port = 443 +path = //relative.html +url = https://default.removal.com//relative.html + +Parent: https://test.com:803/./../../orgs/life.html +(https://test.com:803/) +service = https +user = +host = test.com +port = 803 +path = /orgs/life.html +url = https://test.com:803/orgs/life.html + +Children: + +Child: ./ +service = https +user = +host = test.com +port = 803 +path = /orgs/ +url = https://test.com:803/orgs/ + +Child: ./../ +service = https +user = +host = test.com +port = 803 +path = / +url = https://test.com:803/ + +Child: ../foo.html +service = https +user = +host = test.com +port = 803 +path = /foo.html +url = https://test.com:803/foo.html + +Child: .../foo.html +service = https +user = +host = test.com +port = 803 +path = /orgs/.../foo.html +url = https://test.com:803/orgs/.../foo.html + +Child: /foo.html +service = https +user = +host = test.com +port = 803 +path = /foo.html +url = https://test.com:803/foo.html + +Child: #top +service = https +user = +host = test.com +port = 803 +path = /orgs/life.html +url = https://test.com:803/orgs/life.html + +Child: index.html +service = https +user = +host = test.com +port = 803 +path = /orgs/ +url = https://test.com:803/orgs/ + +Child: test.htm +service = https +user = +host = test.com +port = 803 +path = /orgs/test.htm +url = https://test.com:803/orgs/test.htm + +Child: /top/README +service = https +user = +host = test.com +port = 803 +path = /top/README +url = https://test.com:803/top/README + +Child: next/foo.html +service = https +user = +host = test.com +port = 803 +path = /orgs/next/foo.html +url = https://test.com:803/orgs/next/foo.html + +Child: .//relative.html +service = https +user = +host = test.com +port = 803 +path = /orgs//relative.html +url = https://test.com:803/orgs//relative.html + +Parent: https://alias.com:8080/./../../orgs/life.html +(https://alias.com:8080/) +service = https +user = +host = alias.com +port = 8080 +path = /orgs/life.html +url = https://alias.com:8080/orgs/life.html + +Children: + +Child: ./ +service = https +user = +host = alias.com +port = 8080 +path = /orgs/ +url = https://alias.com:8080/orgs/ + +Child: ./../ +service = https +user = +host = alias.com +port = 8080 +path = / +url = https://alias.com:8080/ + +Child: ../foo.html +service = https +user = +host = alias.com +port = 8080 +path = /foo.html +url = https://alias.com:8080/foo.html + +Child: .../foo.html +service = https +user = +host = alias.com +port = 8080 +path = /orgs/.../foo.html +url = https://alias.com:8080/orgs/.../foo.html + +Child: /foo.html +service = https +user = +host = alias.com +port = 8080 +path = /foo.html +url = https://alias.com:8080/foo.html + +Child: #top +service = https +user = +host = alias.com +port = 8080 +path = /orgs/life.html +url = https://alias.com:8080/orgs/life.html + +Child: index.html +service = https +user = +host = alias.com +port = 8080 +path = /orgs/ +url = https://alias.com:8080/orgs/ + +Child: test.htm +service = https +user = +host = alias.com +port = 8080 +path = /orgs/test.htm +url = https://alias.com:8080/orgs/test.htm + +Child: /top/README +service = https +user = +host = alias.com +port = 8080 +path = /top/README +url = https://alias.com:8080/top/README + +Child: next/foo.html +service = https +user = +host = alias.com +port = 8080 +path = /orgs/next/foo.html +url = https://alias.com:8080/orgs/next/foo.html + +Child: .//relative.html +service = https +user = +host = alias.com +port = 8080 +path = /orgs//relative.html +url = https://alias.com:8080/orgs//relative.html + +Parent: https://alias.com/./../../orgs/life.html +(https://true.com:443/) +service = https +user = +host = true.com +port = 443 +path = /orgs/life.html +url = https://true.com/orgs/life.html + +Children: + +Child: ./ +service = https +user = +host = true.com +port = 443 +path = /orgs/ +url = https://true.com/orgs/ + +Child: ./../ +service = https +user = +host = true.com +port = 443 +path = / +url = https://true.com/ + +Child: ../foo.html +service = https +user = +host = true.com +port = 443 +path = /foo.html +url = https://true.com/foo.html + +Child: .../foo.html +service = https +user = +host = true.com +port = 443 +path = /orgs/.../foo.html +url = https://true.com/orgs/.../foo.html + +Child: /foo.html +service = https +user = +host = true.com +port = 443 +path = /foo.html +url = https://true.com/foo.html + +Child: #top +service = https +user = +host = true.com +port = 443 +path = /orgs/life.html +url = https://true.com/orgs/life.html + +Child: index.html +service = https +user = +host = true.com +port = 443 +path = /orgs/ +url = https://true.com/orgs/ + +Child: test.htm +service = https +user = +host = true.com +port = 443 +path = /orgs/test.htm +url = https://true.com/orgs/test.htm + +Child: /top/README +service = https +user = +host = true.com +port = 443 +path = /top/README +url = https://true.com/top/README + +Child: next/foo.html +service = https +user = +host = true.com +port = 443 +path = /orgs/next/foo.html +url = https://true.com/orgs/next/foo.html + +Child: .//relative.html +service = https +user = +host = true.com +port = 443 +path = /orgs//relative.html +url = https://true.com/orgs//relative.html + +Parent: https:/www.fail.com/ +(https://:0/) +service = https +user = +host = +port = 0 +path = /www.fail.com/ +url = + +Children: + +Child: ./ +service = https +user = +host = +port = 0 +path = /www.fail.com/ +url = + +Child: ./../ +service = https +user = +host = +port = 0 +path = / +url = + +Child: ../foo.html +service = https +user = +host = +port = 0 +path = /foo.html +url = + +Child: .../foo.html +service = https +user = +host = +port = 0 +path = /www.fail.com/.../foo.html +url = + +Child: /foo.html +service = https +user = +host = +port = 0 +path = /foo.html +url = + +Child: #top +service = https +user = +host = +port = 0 +path = /www.fail.com/ +url = + +Child: index.html +service = https +user = +host = +port = 0 +path = /www.fail.com/ +url = + +Child: test.htm +service = https +user = +host = +port = 0 +path = /www.fail.com/test.htm +url = + +Child: /top/README +service = https +user = +host = +port = 0 +path = /top/README +url = + +Child: next/foo.html +service = https +user = +host = +port = 0 +path = /www.fail.com/next/foo.html +url = + +Child: .//relative.html +service = https +user = +host = +port = 0 +path = /www.fail.com//relative.html +url = + +Parent: https:www.fail.com +(https://:0/) +service = https +user = +host = +port = 0 +path = www.fail.com +url = + +Children: + +Child: ./ +service = https +user = +host = +port = 0 +path = www.fail.com +url = + +Child: ./../ +service = https +user = +host = +port = 0 +path = www.fail.com +url = + +Child: ../foo.html +service = https +user = +host = +port = 0 +path = www.fail.com +url = + +Child: .../foo.html +service = https +user = +host = +port = 0 +path = www.fail.com +url = + +Child: /foo.html +service = https +user = +host = +port = 0 +path = /foo.html +url = + +Child: #top +service = https +user = +host = +port = 0 +path = www.fail.com +url = + +Child: index.html +service = https +user = +host = +port = 0 +path = www.fail.com +url = + +Child: test.htm +service = https +user = +host = +port = 0 +path = www.fail.com +url = + +Child: /top/README +service = https +user = +host = +port = 0 +path = /top/README +url = + +Child: next/foo.html +service = https +user = +host = +port = 0 +path = www.fail.com +url = + +Child: .//relative.html +service = https +user = +host = +port = 0 +path = www.fail.com +url = + +Parent: help:/khelpcenter/ +(help://localhost:0/) +service = help +user = +host = localhost +port = 0 +path = /khelpcenter/ +url = help:/khelpcenter/ + +Children: + +Child: ./ +service = help +user = +host = localhost +port = 0 +path = /khelpcenter/ +url = help:/khelpcenter/ + +Child: ./../ +service = help +user = +host = localhost +port = 0 +path = / +url = help:/ + +Child: ../foo.html +service = help +user = +host = localhost +port = 0 +path = /foo.html +url = help:/foo.html + +Child: .../foo.html +service = help +user = +host = localhost +port = 0 +path = /khelpcenter/.../foo.html +url = help:/khelpcenter/.../foo.html + +Child: /foo.html +service = help +user = +host = localhost +port = 0 +path = /foo.html +url = help:/foo.html + +Child: #top +service = help +user = +host = localhost +port = 0 +path = /khelpcenter/ +url = help:/khelpcenter/ + +Child: index.html +service = help +user = +host = localhost +port = 0 +path = /khelpcenter/ +url = help:/khelpcenter/ + +Child: test.htm +service = help +user = +host = localhost +port = 0 +path = /khelpcenter/test.htm +url = help:/khelpcenter/test.htm + +Child: /top/README +service = help +user = +host = localhost +port = 0 +path = /top/README +url = help:/top/README + +Child: next/foo.html +service = help +user = +host = localhost +port = 0 +path = /khelpcenter/next/foo.html +url = help:/khelpcenter/next/foo.html + +Child: .//relative.html +service = help +user = +host = localhost +port = 0 +path = /khelpcenter//relative.html +url = help:/khelpcenter//relative.html + +Parent: help:/khelpcenter/what-is-kde.html#what-is-kde-introduction +(help://localhost:0/) +service = help +user = +host = localhost +port = 0 +path = /khelpcenter/what-is-kde.html +url = help:/khelpcenter/what-is-kde.html + +Children: + +Child: ./ +service = help +user = +host = localhost +port = 0 +path = /khelpcenter/ +url = help:/khelpcenter/ + +Child: ./../ +service = help +user = +host = localhost +port = 0 +path = / +url = help:/ + +Child: ../foo.html +service = help +user = +host = localhost +port = 0 +path = /foo.html +url = help:/foo.html + +Child: .../foo.html +service = help +user = +host = localhost +port = 0 +path = /khelpcenter/.../foo.html +url = help:/khelpcenter/.../foo.html + +Child: /foo.html +service = help +user = +host = localhost +port = 0 +path = /foo.html +url = help:/foo.html + +Child: #top +service = help +user = +host = localhost +port = 0 +path = /khelpcenter/what-is-kde.html +url = help:/khelpcenter/what-is-kde.html + +Child: index.html +service = help +user = +host = localhost +port = 0 +path = /khelpcenter/ +url = help:/khelpcenter/ + +Child: test.htm +service = help +user = +host = localhost +port = 0 +path = /khelpcenter/test.htm +url = help:/khelpcenter/test.htm + +Child: /top/README +service = help +user = +host = localhost +port = 0 +path = /top/README +url = help:/top/README + +Child: next/foo.html +service = help +user = +host = localhost +port = 0 +path = /khelpcenter/next/foo.html +url = help:/khelpcenter/next/foo.html + +Child: .//relative.html +service = help +user = +host = localhost +port = 0 +path = /khelpcenter//relative.html +url = help:/khelpcenter//relative.html + diff --git a/debian/htdig/htdig-3.2.0b6/test/url.parents b/debian/htdig/htdig-3.2.0b6/test/url.parents new file mode 100644 index 00000000..c056c5ea --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/url.parents @@ -0,0 +1,45 @@ +http://www.williams.edu:803/ +http://wso.williams.edu/ +https://web.horde.org/williams/ +http://www.williams.edu/Administration/index.html +ftp://[email protected]/mail/index +http://wso.williams.edu/cgi-bin/BBS/bbs_forum.cgi?forum=open +http://wso/~ghutchis/bookmarks.html#mac +file:///opt/htdig/maindocs/index.html +http://www.htdig.org/This/Is/A/test.html?dowepass +http://localhost/index.asp?date=11/21/index.asp?date=12/1/98 +http://www.test.com/cgi-bin/test.cgi?http://this.com/url +http://wso.williams.edu/%7Eghutchis/index.html +http://wso.williams.edu/~ghutchis/ +http://wso.williams.edu/~ghutchis/index.shtml +http://wso.williams.edu//ghutchis///test/index.html +http://wso.williams.edu/./ghutchis/../orgs/life.html +http://www.example.com/blank_news.shtml?pages/1.i +http://www.williams.edu:803/Admin/Depts/.test/.news/Index.Html +http://test.com/test/ +http://test.com/opt/test/ +http://test.com/./opt/../test/ +http://test.com/./././orgs/life.html +http://test.com/./../../orgs/life.html +http://test.com/blank_news.shtml?pages/3.i +HTTP://www.Yahoo.com/ +http://this-is-a-legal.domain-name-with.several-subdomains-in.an-attempt-to.overflow-any-buffers.pointy-haired-boss.science-servers.williamscollege.edu/go/Departments/Chemistry/classes/102/chem_102.html +http://this-is-a.slashdot.org/slash/elimination/test//////////////////////////././././.././././/./././.././././../../././.test/ +http://www.htdig.org/cgi-bin/htdig3.private/incoming?id=338;user=ghutchis;addsignature=1 +http://www.htdig.org/cgi-bin/htdig3.private/incoming?id=342;user=ghutchis +http://wso.williams.edu/cgi-bin/BBS/bbs_forum.cgi?forum=open&read=001746-000000.msg&session=36c4f59c0f6ecb26&use_last_read=on&last_read=0 +http:/www.fail.com/ +http:www.fail.com +file://localhost/opt/htdig/maindocs/index.html +file://localhost:80/home/ghutchis/www/home.html +http://www.htdig.org/cgi-bin/test.cgi?date=10/1/99#anchor1 +ftp://default.removal.com/index.html +https://test.com/life.html +https://default.removal.com/index.html +https://test.com:803/./../../orgs/life.html +https://alias.com:8080/./../../orgs/life.html +https://alias.com/./../../orgs/life.html +https:/www.fail.com/ +https:www.fail.com +help:/khelpcenter/ +help:/khelpcenter/what-is-kde.html#what-is-kde-introduction diff --git a/debian/htdig/htdig-3.2.0b6/test/word.cc b/debian/htdig/htdig-3.2.0b6/test/word.cc new file mode 100644 index 00000000..f0f571b1 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/word.cc @@ -0,0 +1,1075 @@ +// +// word.cc +// +// word: Implement tests for the word database related classes. +// +// Part of the ht://Dig package <http://www.htdig.org/> +// Copyright (c) 1999-2004 The ht://Dig Group +// For copyright details, see the file COPYING in your distribution +// or the GNU Library General Public License (LGPL) version 2 or later +// <http://www.gnu.org/copyleft/lgpl.html> +// +// $Id: word.cc,v 1.19 2004/05/28 13:15:30 lha Exp $ +// + +#ifdef HAVE_CONFIG_H +#include "htconfig.h" +#endif /* HAVE_CONFIG_H */ + +#include <fcntl.h> +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> +#include <ctype.h> + +// If we have this, we probably want it. +#ifdef HAVE_GETOPT_H +#include <getopt.h> +#endif + +#include "WordKey.h" +#include "WordList.h" +#include "WordContext.h" +#include "Configuration.h" + +static ConfigDefaults config_defaults[] = { + { "word_db", "test", 0 }, + { 0 } +}; + +static Configuration* config = 0; + +typedef struct +{ + int key; + int list; + int skip; + int compress; + int env; +} params_t; + +static void usage(); +static void doword(params_t* params); +static void dolist(params_t* params); +static void dokey(params_t* params); +static void doskip(params_t* params); +static void doenv(params_t* params); +static void pack_show_wordreference(const WordReference& wordRef); +static void pack_show_key(const String& key); + +static int verbose = 0; + +// ***************************************************************************** +// int main(int ac, char **av) +// + +int main(int ac, char **av) +{ + int c; + params_t params; + + params.key = 0; + params.list = 0; + params.skip = 0; + params.env = 0; + params.compress = 0; + + while ((c = getopt(ac, av, "ve:klbszw:")) != -1) + { + switch (c) + { + case 'v': + verbose++; + break; + case 'k': + params.key = 1; + break; + case 'l': + params.list = 1; + break; + case 's': + params.skip = 1; + break; + case 'e': + params.env = atoi(optarg); + break; + case 'z': + params.compress = 1; + break; + case '?': + usage(); + break; + } + } + + doword(¶ms); + + return 0; +} + +// +// mifluz.conf structure +// +#define WORD_DOCID 1 +#define WORD_FLAGS 2 +#define WORD_LOCATION 3 + +static void doword(params_t* params) +{ + if(params->key) { + if(verbose) fprintf(stderr, "Test WordKey class\n"); + dokey(params); + } + + if(params->list || params->skip || params->env) { + config = WordContext::Initialize(config_defaults); + if(params->compress) { + config->Add("wordlist_compress", "true"); + } + if(verbose > 2) { + String tmp; + tmp << (verbose - 2); + config->Add("wordlist_verbose", tmp); + } + if(params->env) { + config->Add("wordlist_env_share", "true"); + config->Add("wordlist_env_dir", "."); + } + + WordContext::Initialize(*config); + } + + + if(params->list) { + if(verbose) fprintf(stderr, "Test WordList class\n"); + dolist(params); + } + + if(params->skip) { + if(verbose) fprintf(stderr, "Test WordList::SkipUselessSequentialWalking method\n"); + doskip(params); + } + + if(params->env) { + if(verbose) fprintf(stderr, "Test WordList with shared env\n"); + doenv(params); + } +} + +static void dolist(params_t*) +{ + static char* word_list[] = { + "The", // DocID = 1 + "quick", // DocID = 2 + "brown", // DocID = 3 + "fox", // DocID = 4 + "jumps", // DocID = 5 + "over", // DocID = 6 + "the", // DocID = 7 + "lazy", // DocID = 8 + "dog", // DocID = 9 + 0 + }; + + // + // Most simple case. Insert a few words and + // search them, using exact match. + // + { + + // setup a new wordlist + WordList words(*config); + if(verbose)WordKeyInfo::Instance()->Show(); + words.Open((*config)["word_db"], O_RDWR); + + + // create entries from word_list + WordReference wordRef; + wordRef.Key().Set(WORD_FLAGS, 67); + unsigned int location = 0; + unsigned int anchor = 0; + unsigned int docid = 1; + if(verbose) fprintf(stderr, "Inserting\n"); + + for(char** p = word_list; *p; p++) { + if(verbose > 4) fprintf(stderr, "inserting word: %s\n", *p); + wordRef.Key().SetWord(*p); + wordRef.Key().Set(WORD_DOCID, docid); + wordRef.Key().Set(WORD_LOCATION, location); + wordRef.Record().info.data = anchor; + if(verbose > 1) fprintf(stderr, "%s\n", (char*)wordRef.Get()); + if(verbose > 2) pack_show_wordreference(wordRef); + words.Insert(wordRef); + location += strlen(*p); + anchor++; + docid++; + } + words.Close(); + + location = anchor = 0; + docid = 1; + + if(verbose) fprintf(stderr, "Searching\n"); + + // reopen wordlist + words.Open((*config)["word_db"], O_RDONLY); + // check if each word (from word_list) is there + for(char** p = word_list; *p; p++) + { + // recreate wordref from each word + wordRef.Key().SetWord(*p); + wordRef.Key().Set(WORD_LOCATION, location); + wordRef.Record().info.data = anchor; + wordRef.Key().Set(WORD_DOCID, docid); + + location += strlen(*p); + anchor++; + docid++; + + // + // Skip first word because we don't want to deal with upper/lower case at present. + // + if(p == word_list) continue; + + // check if wordref is in wordlist + if(verbose) fprintf(stderr, "searching for %s ... ", *p); + if(verbose > 2) pack_show_wordreference(wordRef); + if(verbose > 1) fprintf(stderr, "%s\n", (char*)wordRef.Get()); + // find matches in wordlist + List *result = words[wordRef]; + if(!result) { + fprintf(stderr, "dolist: words[wordRef] returned null pointer\n"); + exit(1); + } + result->Start_Get(); + int count = 0; + WordReference* found; + // loop through found matches + while((found = (WordReference*)result->Get_Next())) + { + if(wordRef.Key().GetWord() != found->Key().GetWord()) + { + fprintf(stderr, "dolist: simple: expected %s, got %s\n", (char*)wordRef.Key().GetWord(), (char*)found->Key().GetWord()); + exit(1); + } + count++; + } + if(count != 1) { + fprintf(stderr, "dolist: simple: searching %s, got %d matches instead of 1\n", (char*)wordRef.Key().GetWord(), count); + exit(1); + } + if(verbose) fprintf(stderr, "done\n"); + + delete result; + + } + } + // + // Print all records as sorted within Berkeley DB with number + // of occurrences. + // + if(verbose) { + WordList words(*config); + words.Open((*config)["word_db"], O_RDWR); + + List *result = words.Words(); + if(result == 0) { + fprintf(stderr, "dolist: getting all words failed\n"); + exit(1); + } + result->Start_Get(); + int count = 0; + String* found; + while((found = (String*)result->Get_Next())) { + unsigned int noccurrence; + WordKey key; + key.SetWord(*found); + words.Noccurrence(key, noccurrence); + fprintf(stderr, "%s (%d)\n", (char*)(*found), noccurrence); + count++; + } + if(count != 8) { + fprintf(stderr, "dolist: getting all words, got %d matches instead of 8\n", count); + exit(1); + } + + delete result; + } + // + // Search all occurrences of 'the' + // + { + WordList words(*config); + words.Open((*config)["word_db"], O_RDWR); + + WordReference wordRef; + wordRef.Key().SetWord("the"); + + unsigned int noccurrence; + if(words.Noccurrence(wordRef.Key(), noccurrence) != OK) { + fprintf(stderr, "dolist: get ref count of 'the' failed\n"); + exit(1); + } else if(noccurrence != 2) { + fprintf(stderr, "dolist: get ref count of 'the' failed, got %d instead of 2\n", noccurrence); + exit(1); + } + List *result = words[wordRef]; + result->Start_Get(); + int count = 0; + WordReference* found; + while((found = (WordReference*)result->Get_Next())) { + if(wordRef.Key().GetWord() != found->Key().GetWord()) { + fprintf(stderr, "dolist: simple: expected %s, got %s\n", (char*)wordRef.Key().GetWord(), (char*)found->Key().GetWord()); + exit(1); + } + if(verbose) fprintf(stderr, "%s\n", (char*)found->Get()); + count++; + } + if(count != 2) { + fprintf(stderr, "dolist: searching occurrences of '%s', got %d matches instead of 2\n", (char*)wordRef.Key().GetWord(), count); + exit(1); + } + + delete result; + } + // + // Delete all occurrences of 'the' + // + { + WordList words(*config); + words.Open((*config)["word_db"], O_RDWR); + + WordReference wordRef("the"); + if(verbose) { + fprintf(stderr, "**** Delete test:\n"); + words.Write(stderr); + fprintf(stderr, "**** Delete test:\n"); + } + int count; + if((count = words.WalkDelete(wordRef)) != 2) { + fprintf(stderr, "dolist: delete occurrences of 'the', got %d deletion instead of 2\n", count); + exit(1); + } + + List* result = words[wordRef]; + if(result->Count() != 0) { + fprintf(stderr, "dolist: unexpectedly found 'the' \n"); + exit(1); + } + delete result; + + unsigned int noccurrence; + if(words.Noccurrence(wordRef.Key(), noccurrence) != OK) { + fprintf(stderr, "dolist: get ref count of 'thy' failed\n"); + exit(1); + } else if(noccurrence != 0) { + fprintf(stderr, "dolist: get ref count of 'thy' failed, got %d instead of 0\n", noccurrence); + exit(1); + } + } + // + // Delete all words in document 5 (only one word : jumps) + // + { + WordList words(*config); + words.Open((*config)["word_db"], O_RDWR); + + WordReference wordRef; + wordRef.Key().Set(WORD_DOCID, 5); + int count; + if((count = words.WalkDelete(wordRef)) != 1) { + fprintf(stderr, "dolist: delete occurrences in DocID 5, %d deletion instead of 1\n", count); + exit(1); + } + + wordRef.Clear(); + wordRef.Key().SetWord("jumps"); + List* result = words[wordRef]; + if(result->Count() != 0) { + fprintf(stderr, "dolist: unexpectedly found 'jumps' \n"); + exit(1); + } + delete result; + + unsigned int noccurrence; + if(words.Noccurrence(wordRef.Key(), noccurrence) != OK) { + fprintf(stderr, "dolist: get ref count of 'jumps' failed\n"); + exit(1); + } else if(noccurrence != 0) { + fprintf(stderr, "dolist: get ref count of 'jumps' failed, got %d instead of 0\n", noccurrence); + exit(1); + } + } +} + +#define WORD_BIT_MASK(b) ((b) == 32 ? 0xffffffff : (( 1 << (b)) - 1)) + +// +// See WordKey.h +// Tested: Pack, Unpack, Compare (both forms), accessors, meta information +// +static void +dokey(params_t* params) +{ + static char *key_descs[] = { + "Word/DocID 5/Flags 8/Location 19", + "Word/DocID 3/Location 2/Flags 11", + "Word/DocID 3/Flags 8/Location 5", + "Word/DocID 3/Flags 14/Location 7", + "Word/DocID 3/Flags 9/Location 7/Foo1 13/Foo2 16", + 0, + }; + char** key_desc; + + for(key_desc = key_descs; *key_desc; key_desc++) { + WordKeyInfo::InitializeFromString(*key_desc); + + if(verbose) + WordKeyInfo::Instance()->Show(); + + WordKey word; + word.SetWord("aword"); + int j; + for(j = WORD_FIRSTFIELD; j < word.NFields(); j++) { + WordKeyNum value = (0xdededede & word.MaxValue(j)); + word.Set(j, value); + } + if(verbose > 1) fprintf(stderr, "WORD: %s\n", (char*)word.Get()); + + String packed; + word.Pack(packed); + + WordKey other_word; + other_word.Unpack(packed); + if(verbose > 1) fprintf(stderr, "OTHER_WORD: %s\n", (char*)other_word.Get()); + + int failed = 0 ; + for(j = WORD_FIRSTFIELD; j < word.NFields(); j++) { + if(word.Get(j) != other_word.Get(j)) { + failed = 1; + break; + } + } + if(word.GetWord() != other_word.GetWord() || + !word.IsDefined(0) || + !other_word.IsDefined(0)) + failed = 1; + + if(failed) { + fprintf(stderr, "Original and packed/unpacked not equal\n"); + WordKeyInfo::Instance()->Show(); + fprintf(stderr, "WORD: %s\n", (char*)word.Get()); + pack_show_key(packed); + fprintf(stderr, "OTHER_WORD: %s\n", (char*)other_word.Get()); + exit(1); + } + + // + // Compare in packed form + // + if(!word.PackEqual(other_word)) + { + fprintf(stderr, "dokey: %s not equal (object compare)\n", *key_desc); + exit(1); + } + + // + // Pack the other_word + // + String other_packed; + + other_word.Pack(other_packed); + // + // The two (word and other_word) must compare equal + // using the alternate comparison (fast) interface. + // + if(WordKey::Compare(packed, other_packed) != 0) { + fprintf(stderr, "dokey: %s not equal (fast compare)\n", *key_desc); + exit(1); + } + + word.SetWord("Test string"); + word.Set(WORD_DOCID,1); + other_word.SetWord("Test string"); + word.Pack(packed); + // + // Add one char to the word, they must not compare equal and + // the difference must be minus one. + // + other_word.GetWord().append("a"); + other_word.Pack(other_packed); + { + int ret; + if((ret = WordKey::Compare(packed, other_packed)) != -1) + { + fprintf(stderr, "%s\n", (char*)word.Get()); + fprintf(stderr, "%s\n", (char*)other_word.Get()); + fprintf(stderr, "dokey: %s different length, expected -1 got %d\n", *key_desc, ret); + exit(1); + } + } + other_word.SetWord("Test string"); + + // + // Change T to S + // the difference must be one. + // + { + String& tmp = other_word.GetWord(); + tmp[tmp.indexOf('T')] = 'S'; + } + other_word.Pack(other_packed); + { + int ret; + if((ret = WordKey::Compare(packed, other_packed)) != 1) + { + fprintf(stderr, "%s\n", (char*)word.Get()); + fprintf(stderr, "%s\n", (char*)other_word.Get()); + fprintf(stderr, "dokey: %s different letter (S instead of T), expected 1 got %d\n", *key_desc, ret); + exit(1); + } + } + other_word.SetWord("Test string"); + + // + // Substract one to the first numeric field + // The difference must be one. + // + other_word.Set(WORD_DOCID,word.Get(WORD_DOCID) - 1); + other_word.Pack(other_packed); + { + int ret; + if((ret = WordKey::Compare(packed, other_packed)) != 1) + { + fprintf(stderr, "%s\n", (char*)word.Get()); + fprintf(stderr, "%s\n", (char*)other_word.Get()); + fprintf(stderr, "dokey: %s different numeric field, expected 1 got %d\n", *key_desc, ret); + exit(1); + } + } + } + // + // WordKey::Diff function + // + { + WordKey word("Test1 <DEF> 1 2 3 4 5"); + WordKey other_word("Sest1 <DEF> 1 2 3 4 5"); + // + // Diff must say that field 0 differ and that word is lower than other_word + // + { + int position = 0; + int lower = 0; + if(!word.Diff(other_word, position, lower)) { + fprintf(stderr, "%s\n", (char*)word.Get()); + fprintf(stderr, "%s\n", (char*)other_word.Get()); + fprintf(stderr, "dokey: diff failed\n"); + exit(1); + } + if(position != 0 || lower != 1) { + fprintf(stderr, "%s\n", (char*)word.Get()); + fprintf(stderr, "%s\n", (char*)other_word.Get()); + fprintf(stderr, "dokey: diff expected position = 0 and lower = 1 but got position = %d and lower = %d\n", position, lower); + exit(1); + } + } + // + // Only compare prefix + // + other_word.SetWord("Test"); + other_word.UndefinedWordSuffix(); + other_word.Set(WORD_DOCID, 5); + { + int position = 0; + int lower = 0; + if(!word.Diff(other_word, position, lower)) { + fprintf(stderr, "dokey: diff failed\n"); + exit(1); + } + if(position != 1 || lower != 1) { + fprintf(stderr, "%s\n", (char*)word.Get()); + fprintf(stderr, "%s\n", (char*)other_word.Get()); + fprintf(stderr, "dokey: diff expected position = 1 and lower = 1 but got position = %d and lower = %d\n", position, lower); + exit(1); + } + } + // + // Same key have no diff + // + { + int position = 0; + int lower = 0; + if(word.Diff(word, position, lower)) { + fprintf(stderr, "dokey: diff found when comparing %s with itself\n", (char*)word.Get()); + exit(1); + } + } + } +} + +static void pack_show_key(const String& key) +{ + int i; + char c; + + for(i=0; i < key.length(); i++) { + c = (isprint(key[i]) ? key[i] : '#'); + fprintf(stderr, "%c-%2x ", c, key[i]); + } + fprintf(stderr, "\n"); + + for(i = 0; i < key.length(); i++) { + int j; + for(j = 0; j < 8; j++) + fprintf(stderr, "%c", (key[j] & (1<<(j))) ? '1' : '0'); + } + + fprintf(stderr, "\n"); + fprintf(stderr, "^0 ^1 ^2 ^3 ^4 ^5 ^6 ^7\n"); + fprintf(stderr, "0123456701234567012345670123456701234567012345670123456701234567\n"); +} + +static void pack_show_wordreference(const WordReference& wordRef) +{ + String key; + String record; + + wordRef.Pack(key, record); + + fprintf(stderr, "key = "); + for(int i = 0; i < key.length(); i++) { + fprintf(stderr, "0x%02x(%c) ", key[i] & 0xff, key[i]); + } + fprintf(stderr, " record = "); + for(int i = 0; i < record.length(); i++) { + fprintf(stderr, "0x%02x(%c) ", record[i] & 0xff, record[i]); + } + fprintf(stderr, "\n"); +} + + + +//***************************************************************************** +// void doskip() +// Test SkipUselessSequentialWalking in WordList class +// +static void doskip_normal(params_t*); +static void doskip_harness(params_t*); +static void doskip_overflow(params_t*); +static void doskip_try(WordList& words, WordCursor& search, char* found_string, char* expected_string); + +static void doskip(params_t* params) +{ + if(verbose) fprintf(stderr, "Test WordList::SkipUselessSequentialWalking normal\n"); + doskip_normal(params); + if(verbose) fprintf(stderr, "Test WordList::SkipUselessSequentialWalking harness\n"); + doskip_harness(params); + if(verbose) fprintf(stderr, "Test WordList::SkipUselessSequentialWalking overflow\n"); + doskip_overflow(params); +} + +static void doskip_try(WordList& words, WordCursor& search, char* found_string, char* expected_string) +{ + const WordKey& found = search.GetFound().Key(); + ((WordKey&)found).Set(found_string); + if(search.SkipUselessSequentialWalking() == NOTOK) { + fprintf(stderr, "doskip_try: SkipUselessSequentialWalking NOTOK searching %s at step %s expecting %s\n", (char*)search.GetSearch().Get(), (char*)found.Get(), (char*)expected_string); + exit(1); + } + + WordKey expected(expected_string); + if(!found.ExactEqual(expected)) { + fprintf(stderr, "doskip_try: expected %s but got %s\n", (char*)expected.Get(), (char*)found.Get()); + exit(1); + } +} + +// +// Create artificial WordCursor context +// in which SkipUselessSequentialWalking calls SetToFollowing +// that triggers overflow condition. +// +static void doskip_overflow(params_t*) +{ +#define WORD_FIELD1 1 +#define WORD_FIELD2 2 +#define WORD_FIELD3 3 + + static ConfigDefaults config_defaults[] = { + { "wordlist_wordkey_description", "Word/FIELD1 32/FIELD2 8/FIELD3 16", 0 }, + { 0 } + }; + Configuration config; + config.Defaults(config_defaults); + if(verbose > 2) config.Add("wordlist_verbose", "3"); + WordContext::Initialize(config); + { + WordList* words = new WordList(config); + + // + // Looking for zebra at location 3 + // + WordKey key("zebra <UNDEF> <UNDEF> <UNDEF> 3"); + WordCursor *search = words->Cursor(key); + + { + // + // Pretend we found zebra <DEF> 3 <MAX> 7 + // That is a valid candidate for SkipUselessSequentialWalking + // + String found; + found << "zebra <DEF> 3 " << WordKey::MaxValue(WORD_FIELD2) << " 7"; + + // + // Overflow on FIELD2 must trigger ++ on FIELD1 + // + String expected("zebra <DEF> 4 0 3"); + doskip_try(*words, *search, found, expected); + } + + { + // + // Prented we found zebra <DEF> <MAX> <MAX> 7 + // That is a valid candidate for SkipUselessSequentialWalking + // + String found; + found << "zebra <DEF> " << WordKey::MaxValue(WORD_FIELD1) << " " << WordKey::MaxValue(WORD_FIELD2) << " 7"; + + // + // Overflow on FIELD2 must trigger append \001 on word Word + // + String expected("zebra\001 <DEF> 0 0 3"); + doskip_try(*words, *search, found, expected); + + // + // Cannot increment, SkipUselessSequentialWalking returns NOTOK + // + ((WordKey&)search->GetSearch()).SetDefinedWordSuffix(); + ((WordReference&)search->GetFound()).Key().Set(found); + if(search->SkipUselessSequentialWalking() != WORD_WALK_ATEND) { + fprintf(stderr, "doskip_overflow: SkipUselessSequentialWalking expected NOTOK & WORD_WALK_ATEND searching %s\n", (char*)key.Get()); + exit(1); + } + + } + + delete search; + words->Close(); + delete words; + } + + // + // Restore default configuration + // + WordContext::Initialize(*::config); + +#undef WORD_FIELD1 +#undef WORD_FIELD2 +#undef WORD_FIELD3 +} + +// +// Create artificial WordCursor contexts +// that covers all possible behaviour of SkipUselessSequentialWalking. +// +static void doskip_harness(params_t*) +{ +#define WORD_FIELD1 1 +#define WORD_FIELD2 2 +#define WORD_FIELD3 3 +#define WORD_FIELD4 4 +#define WORD_FIELD5 5 + + static ConfigDefaults config_defaults[] = { + { "wordlist_wordkey_description", "Word/FIELD1 8/FIELD2 8/FIELD3 8/FIELD4 8/FIELD5 8", 0 }, + { 0 } + }; + Configuration config; + config.Defaults(config_defaults); + if(verbose > 2) config.Add("wordlist_verbose", "3"); + WordContext::Initialize(config); + { + WordList* words = new WordList(config); + + // + // Searching + // + // z <UNDEF> <UNDEF> 5 <UNDEF> 4 <UNDEF> + // + // in data set + // + // DATA SEE STATUS OPERATION + // zebra <DEF> 1 5 1 4 3 found next + // zebra <DEF> 1 6 1 4 3 a nomatch skip to zebra <DEF> 2 5 0 4 0 + // zebra <DEF> 1 6 2 4 3 ignore + // zebra <DEF> 2 3 1 4 3 ignore + // zebra <DEF> <MAX> 6 1 4 3 b nomatch skip to zebra\001 <DEF> 0 5 0 4 0 + // zebra <DEF> <MAX> 7 1 4 3 ignore + // zebra <DEF> <MAX> 8 1 4 3 ignore + // zebra <DEF> <MAX> 9 1 4 3 ignore + // zippo <DEF> 0 3 1 4 3 ignore + // zippo <DEF> 8 5 1 1 3 c nomatch skip to zippo <DEF> 8 5 1 4 0 + // zippo <DEF> 8 5 1 2 3 ignore + // zippo <DEF> 8 5 1 2 5 ignore + // zippo <DEF> 8 5 1 2 9 ignore + // zippo <DEF> 8 5 1 3 9 ignore + // zorro <DEF> 3 5 <MAX> 6 3 d nomatch skip to zorro <DEF> 4 5 0 4 0 + // zorro <DEF> 3 5 <MAX> 6 5 ignore + // zorro <DEF> 3 5 <MAX> 8 5 ignore + // zorro <DEF> 4 5 2 4 3 found + // + // legend: status is what WalkNextStep function says about the key + // nomatch means searchKey.Equal(found.Key()) is false + // found means searchKey.Equal(found.Key()) is true + // ignore means we jump over it + // operation is the next operation decided by WalkNextStep + // always skip if SkipUselessSequentialWalking is called. + // In general SkipUselessSequentialWalking is not always + // called on nomatch. But it is always called if the + // search key is not a prefix key, which is our case. + // see is a reference to the list bellow + // + // a) Needless to search for keys in which the FIELD1 is equal to 1 since + // the FIELD2 is greater than the searched value. Any key with the FIELD1 + // set to 1 that follow this one will have a FIELD2 greater than the searched + // value (5) since the keys are sorted in ascending order. + // The next possible key is the one that has FIELD1++. + // + // b) Same logic as before but, the FIELD1 has already reached its maximum value + // and can't be incremented. zebra will therefore be incremented by appending + // a \001 to it. This is only possible since we search for words beginning + // with z (z <UNDEF>). We would not do that if searching for (zebra <DEF>). + // + // c) The found key does not match the constraint (FIELD4 is lower than the searched + // value). We only need to set FIELD4 to the searched value to jump to the + // match. No incrementation in this case. + // + // d) The FIELD4 is greater than the searched value, making this a lot similar + // to the b) case since the FIELD3 value is <MAX>. However FIELD2 matches + // the search key, it is therefore useless to increment it. We must ignore + // it and increment FIELD1. + // + // Looking for zebra with flags 5 + // + WordKey key("z <UNDEF> <UNDEF> 5 <UNDEF> 4 <UNDEF>"); + WordCursor *search = words->Cursor(key); + +#define WORD_NTEST 4 + + static char* found_strings[WORD_NTEST]; + static char* expected_strings[WORD_NTEST]; + + int i = 0; + char tmp[1024]; + + // + // See a) in comment above + // + found_strings[i] = strdup("zebra <DEF> 1 6 1 4 3"); + expected_strings[i] = strdup("zebra <DEF> 2 5 0 4 0"); + i++; + + // + // See b) in comment above + // + sprintf(tmp, "zebra <DEF> %d 6 1 4 3", WordKey::MaxValue(WORD_FIELD1)); + found_strings[i] = strdup(tmp); + expected_strings[i] = strdup("zebra\001 <DEF> 0 5 0 4 0"); + i++; + + // + // See c) in comment above + // + found_strings[i] = strdup("zippo <DEF> 8 5 1 1 3"); + expected_strings[i] = strdup("zippo <DEF> 8 5 1 4 0"); + i++; + + // + // See d) in comment above + // + sprintf(tmp, "zorro <DEF> 3 5 %d 6 3", WordKey::MaxValue(WORD_FIELD3)); + found_strings[i] = strdup(tmp); + expected_strings[i] = strdup("zorro <DEF> 4 5 0 4 0"); + i++; + + for(i = 0; i < WORD_NTEST; i++) { + doskip_try(*words, *search, found_strings[i], expected_strings[i]); + free(found_strings[i]); + free(expected_strings[i]); + } + + delete search; + words->Close(); + delete words; + } + + // + // Restore default configuration + // + WordContext::Initialize(*::config); + +#undef WORD_FIELD1 +#undef WORD_FIELD2 +#undef WORD_FIELD3 +#undef WORD_FIELD4 +#undef WORD_FIELD5 +} + +int +get_int_array(char *s,int **plist,int &n) +{ + int i=0; + for(n=0;s[i];n++) + { + for(;s[i] && !isalnum(s[i]);i++); + if(!s[i]){break;} + for(;s[i] && isalnum(s[i]);i++); + } + if(!n){*plist=NULL;return(NOTOK);} + int *list=new int[n]; + *plist=list; + int j; + i=0; + for(j=0;s[i];j++) + { + for(;s[i] && !isalnum(s[i]);i++); + if(!s[i]){break;} + list[j]=atoi(s+i); + for(;s[i] && isalnum(s[i]);i++); + } + return(OK); +} +class SkipTestEntry +{ +public: + char *searchkey; + char *goodorder; + void GetSearchKey(WordKey &searchKey) + { + searchKey.Set((String)searchkey); + if(verbose) fprintf(stderr, "GetSearchKey: string: %s got: %s\n", (char*)searchkey, (char*)searchKey.Get()); + } + int Check(WordList &WList) + { + WordKey empty; + WordReference srchwrd; + GetSearchKey(srchwrd.Key()); + Object o; + if(verbose) fprintf(stderr, "checking SkipUselessSequentialWalking on: %s\n", (char*)srchwrd.Get()); + if(verbose) fprintf(stderr, "walking all:\n"); + List *all = WList.WordRefs(); + if(verbose) fprintf(stderr, "walking search: searching for: %s\n", (char*)srchwrd.Get()); + + WordCursor *search = WList.Cursor(srchwrd.Key(), HTDIG_WORDLIST_COLLECTOR); + search->SetTraces(new List); + search->Walk(); + List *wresw = search->GetResults(); + List *wres = search->GetTraces(); + wresw->Start_Get(); + wres->Start_Get(); + WordReference *found; + WordReference *correct; + int i; + int ngoodorder; + int *goodorder_a; + get_int_array(goodorder,&goodorder_a,ngoodorder); + for(i=0;(found = (WordReference*)wres->Get_Next());i++) + { + if(i>=ngoodorder) { + fprintf(stderr, "SkipUselessSequentialWalking test failed! i>=ngoodorder\n"); + exit(1); + } + if(verbose) fprintf(stderr, "Check actual %d'th walked: %s\n", i, (char*)found->Get()); + correct = (WordReference*)all->Nth(goodorder_a[i]); + if(verbose) fprintf(stderr, "Check correct %d : %s\n", goodorder_a[i], (char*)correct->Get()); + if(!correct->Key().Equal(found->Key())) { + fprintf(stderr, "SkipUselessSequentialWalking test failed! at position: %d\n", i); + exit(1); + } + } + if(i<ngoodorder) { + fprintf(stderr, "SkipUselessSequentialWalking test failed! n<ngoodorder\n"); + exit(1); + } + + delete [] goodorder_a; + delete wresw; + delete wres; + delete all; + delete search; + return OK; + } +}; + +SkipTestEntry SkipTestEntries[]= +{ + { + "et <DEF> <UNDEF> 0 10 ", + "3 4 5 9 10 12 13 14" + }, + { + "et <UNDEF> 20 0 <UNDEF> ", + "3 4 5 6 7 8 9 14 17", + }, +}; + +static void doskip_normal(params_t*) +{ + if(verbose > 0) fprintf(stderr, "doing SkipUselessSequentialWalking test\n"); + // read db into WList from file: skiptest_db.txt + if(verbose) fprintf(stderr, "WList config:minimum_word_length: %d\n", config->Value("minimum_word_length")); + WordList WList(*config); + WList.Open((*config)["word_db"], O_RDWR); + // now check walk order for a few search terms + int i; + if(verbose) fprintf(stderr, "number of entries: %d\n", (int)(sizeof(SkipTestEntries)/sizeof(SkipTestEntry))); + for(i=0;i<(int)(sizeof(SkipTestEntries)/sizeof(SkipTestEntry));i++) { + if(SkipTestEntries[i].Check(WList) == NOTOK) { + fprintf(stderr, "SkipUselessSequentialWalking test failed on SkipTestEntry number: %d\n", i); + exit(1); + } + } + WList.Close(); +} + +static void doenv(params_t* params) +{ + WordReference wordRef; + WordKey& key = wordRef.Key(); + key.Set("the <def> 1 2 3"); + WordList words(*config); + words.Open((*config)["word_db"], O_RDWR); + int i; + for(i = params->env; i < 10000; i += 2) { + key.Set(WORD_DOCID, i); + if(words.Insert(wordRef) != OK) { + fprintf(stderr, "doenv: cannot insert %d\n", i); + exit(1); + } + } + for(i = params->env; i < 10000; i += 2) { + key.Set(WORD_DOCID, i); + if(words.Exists(wordRef) != OK) { + fprintf(stderr, "doenv: cannot find %d\n", i); + exit(1); + } + } + words.Close(); +} + +//***************************************************************************** +// void usage() +// Display program usage information +// +static void usage() +{ + printf("usage: word [options]\n"); + printf("Options:\n"); + printf("\t-v\t\tIncreases the verbosity\n"); + printf("\t-k\t\tTest WordKey\n"); + printf("\t-l\t\tTest WordList\n"); + printf("\t-e n\t\tTest WordList with shared environnement, process number <n>\n"); + printf("\t-s\t\tTest WordList::SkipUselessSequentialWalking\n"); + printf("\t-z\t\tActivate compression test (use with -s, -b or -l)\n"); + exit(0); +} diff --git a/debian/htdig/htdig-3.2.0b6/test/word.try1 b/debian/htdig/htdig-3.2.0b6/test/word.try1 new file mode 100644 index 00000000..88166a38 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/word.try1 @@ -0,0 +1,26 @@ +# +# Simple non aligned +# +Key + Definition + Word + type = String + end + DocID + type = unsigned int + bits = 5 + end + Location + type = unsigned int + bits = 16 + end + Flags + type = unsigned int + bits = 8 + end + end + + EncodingOrder = DocID,Flags,Location,Word + + SortOrder = Word asc,DocID asc,Flags asc,Location asc +end diff --git a/debian/htdig/htdig-3.2.0b6/test/word.try2 b/debian/htdig/htdig-3.2.0b6/test/word.try2 new file mode 100644 index 00000000..29229e25 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/word.try2 @@ -0,0 +1,26 @@ +# +# DocID and Location on the same byte +# +Key + Definition + Word + type = String + end + DocID + type = unsigned int + bits = 3 + end + Location + type = unsigned int + bits = 2 + end + Flags + type = unsigned int + bits = 8 + end + end + + EncodingOrder = DocID,Location,Flags,Word + + SortOrder = Word asc,DocID asc,Flags asc,Location asc +end diff --git a/debian/htdig/htdig-3.2.0b6/test/word.try3 b/debian/htdig/htdig-3.2.0b6/test/word.try3 new file mode 100644 index 00000000..074a0539 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/word.try3 @@ -0,0 +1,27 @@ +# +# DocID and Location on the same byte and +# Location ends on a byte boundary. +# +Key + Definition + Word + type = String + end + DocID + type = unsigned int + bits = 3 + end + Location + type = unsigned int + bits = 5 + end + Flags + type = unsigned int + bits = 8 + end + end + + EncodingOrder = DocID,Location,Flags,Word + + SortOrder = Word asc,DocID asc,Flags asc,Location asc +end diff --git a/debian/htdig/htdig-3.2.0b6/test/word.try4 b/debian/htdig/htdig-3.2.0b6/test/word.try4 new file mode 100644 index 00000000..b9e165ab --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/word.try4 @@ -0,0 +1,27 @@ +# +# DocID and Location on the same byte and +# Location ends on a byte boundary. +# +Key + Definition + Word + type = String + end + DocID + type = unsigned int + bits = 3 + end + Location + type = unsigned int + bits = 7 + end + Flags + type = unsigned int + bits = 9 + end + end + + EncodingOrder = DocID,Location,Flags,Word + + SortOrder = Word asc,DocID asc,Flags asc,Location asc +end diff --git a/debian/htdig/htdig-3.2.0b6/test/word.try5 b/debian/htdig/htdig-3.2.0b6/test/word.try5 new file mode 100644 index 00000000..53994ce1 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/test/word.try5 @@ -0,0 +1,35 @@ +# +# DocID and Location on the same byte and +# Location ends on a byte boundary. +# +Key + Definition + Word + type = String + end + DocID + type = unsigned int + bits = 3 + end + Location + type = unsigned int + bits = 7 + end + Flags + type = unsigned int + bits = 9 + end + Foo1 + type = unsigned int + bits = 13 + end + Foo2 + type = unsigned int + bits = 9 + end + end + + EncodingOrder = DocID,Location,Flags,Foo1,Foo2,Word + + SortOrder = Word asc,DocID asc,Flags asc,Location asc, Foo1 asc, Foo2 asc +end |