diff options
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/httools')
-rw-r--r-- | debian/htdig/htdig-3.2.0b6/httools/.cvsignore | 12 | ||||
-rw-r--r-- | debian/htdig/htdig-3.2.0b6/httools/Makefile.am | 36 | ||||
-rw-r--r-- | debian/htdig/htdig-3.2.0b6/httools/Makefile.in | 527 | ||||
-rw-r--r-- | debian/htdig/htdig-3.2.0b6/httools/Makefile.win32 | 63 | ||||
-rw-r--r-- | debian/htdig/htdig-3.2.0b6/httools/htdump.cc | 200 | ||||
-rw-r--r-- | debian/htdig/htdig-3.2.0b6/httools/htload.cc | 199 | ||||
-rw-r--r-- | debian/htdig/htdig-3.2.0b6/httools/htmerge.cc | 403 | ||||
-rw-r--r-- | debian/htdig/htdig-3.2.0b6/httools/htnotify.cc | 613 | ||||
-rw-r--r-- | debian/htdig/htdig-3.2.0b6/httools/htpurge.cc | 399 | ||||
-rw-r--r-- | debian/htdig/htdig-3.2.0b6/httools/htstat.cc | 200 |
10 files changed, 2652 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/httools/.cvsignore b/debian/htdig/htdig-3.2.0b6/httools/.cvsignore new file mode 100644 index 00000000..5e084bef --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/httools/.cvsignore @@ -0,0 +1,12 @@ +Makefile +*.lo +*.la +.purify +.pure +.deps +.libs +htpurge +htload +htdump +htstat +htnotify diff --git a/debian/htdig/htdig-3.2.0b6/httools/Makefile.am b/debian/htdig/htdig-3.2.0b6/httools/Makefile.am new file mode 100644 index 00000000..10f29b28 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/httools/Makefile.am @@ -0,0 +1,36 @@ + +include $(top_srcdir)/Makefile.config + +LOCAL_DEFINES= -DSENDMAIL=\"$(SENDMAIL)\" + +bin_PROGRAMS = htpurge htnotify htdump htstat htload htmerge + +htpurge_SOURCES = htpurge.cc +htpurge_DEPENDENCIES = $(HTLIBS) +htpurge_LDFLAGS = $(PROFILING) ${extra_ldflags} +htpurge_LDADD = $(HTLIBS) + +htnotify_SOURCES = htnotify.cc +htnotify_DEPENDENCIES = $(HTLIBS) +htnotify_LDFLAGS = $(PROFILING) ${extra_ldflags} +htnotify_LDADD = $(HTLIBS) + +htdump_SOURCES = htdump.cc +htdump_DEPENDENCIES = $(HTLIBS) +htdump_LDFLAGS = $(PROFILING) ${extra_ldflags} +htdump_LDADD = $(HTLIBS) + +htstat_SOURCES = htstat.cc +htstat_DEPENDENCIES = $(HTLIBS) +htstat_LDFLAGS = $(PROFILING) ${extra_ldflags} +htstat_LDADD = $(HTLIBS) + +htload_SOURCES = htload.cc +htload_DEPENDENCIES = $(HTLIBS) +htload_LDFLAGS = $(PROFILING) ${extra_ldflags} +htload_LDADD = $(HTLIBS) + +htmerge_SOURCES = htmerge.cc +htmerge_DEPENDENCIES = $(HTLIBS) +htmerge_LDFLAGS = $(PROFILING) ${extra_ldflags} +htmerge_LDADD = $(HTLIBS) diff --git a/debian/htdig/htdig-3.2.0b6/httools/Makefile.in b/debian/htdig/htdig-3.2.0b6/httools/Makefile.in new file mode 100644 index 00000000..b0703eb5 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/httools/Makefile.in @@ -0,0 +1,527 @@ +# Makefile.in generated by automake 1.7.9 from Makefile.am. +# @configure_input@ + +# Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003 +# Free Software Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# To compile with profiling do the following: +# +# make CFLAGS=-g CXXFLAGS=-g PROFILING=-p all +# + +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ +VPATH = @srcdir@ +pkgdatadir = $(datadir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +top_builddir = .. + +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +INSTALL = @INSTALL@ +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +host_triplet = @host@ +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMDEP_FALSE = @AMDEP_FALSE@ +AMDEP_TRUE = @AMDEP_TRUE@ +AMTAR = @AMTAR@ +APACHE = @APACHE@ +APACHE_MODULES = @APACHE_MODULES@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CGIBIN_DIR = @CGIBIN_DIR@ +COMMON_DIR = @COMMON_DIR@ +CONFIG_DIR = @CONFIG_DIR@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DATABASE_DIR = @DATABASE_DIR@ +DEFAULT_CONFIG_FILE = @DEFAULT_CONFIG_FILE@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +ECHO = @ECHO@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FIND = @FIND@ +GUNZIP = @GUNZIP@ +HAVE_SSL = @HAVE_SSL@ +HTDIG_MAJOR_VERSION = @HTDIG_MAJOR_VERSION@ +HTDIG_MICRO_VERSION = @HTDIG_MICRO_VERSION@ +HTDIG_MINOR_VERSION = @HTDIG_MINOR_VERSION@ +IMAGE_DIR = @IMAGE_DIR@ +IMAGE_URL_PREFIX = @IMAGE_URL_PREFIX@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LDFLAGS = @LDFLAGS@ +LEX = @LEX@ +LEXLIB = @LEXLIB@ +LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAINTAINER_MODE_FALSE = @MAINTAINER_MODE_FALSE@ +MAINTAINER_MODE_TRUE = @MAINTAINER_MODE_TRUE@ +MAKEINFO = @MAKEINFO@ +MV = @MV@ +OBJEXT = @OBJEXT@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERL = @PERL@ +RANLIB = @RANLIB@ +RRDTOOL = @RRDTOOL@ +SEARCH_DIR = @SEARCH_DIR@ +SEARCH_FORM = @SEARCH_FORM@ +SED = @SED@ +SENDMAIL = @SENDMAIL@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +TAR = @TAR@ +TESTS_FALSE = @TESTS_FALSE@ +TESTS_TRUE = @TESTS_TRUE@ +TIME = @TIME@ +TIMEV = @TIMEV@ +USER = @USER@ +VERSION = @VERSION@ +YACC = @YACC@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_RANLIB = @ac_ct_RANLIB@ +ac_ct_STRIP = @ac_ct_STRIP@ +am__fastdepCC_FALSE = @am__fastdepCC_FALSE@ +am__fastdepCC_TRUE = @am__fastdepCC_TRUE@ +am__fastdepCXX_FALSE = @am__fastdepCXX_FALSE@ +am__fastdepCXX_TRUE = @am__fastdepCXX_TRUE@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +datadir = @datadir@ +exec_prefix = @exec_prefix@ +extra_ldflags = @extra_ldflags@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +oldincludedir = @oldincludedir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +subdirs = @subdirs@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ + +AUTOMAKE_OPTIONS = foreign no-dependencies + +INCLUDES = -DDEFAULT_CONFIG_FILE=\"$(DEFAULT_CONFIG_FILE)\" \ + -I$(top_srcdir)/include -I$(top_srcdir)/htlib \ + -I$(top_srcdir)/htnet -I$(top_srcdir)/htcommon \ + -I$(top_srcdir)/htword \ + -I$(top_srcdir)/db -I$(top_builddir)/db \ + $(LOCAL_DEFINES) $(PROFILING) + + +HTLIBS = $(top_builddir)/htnet/libhtnet.la \ + $(top_builddir)/htcommon/libcommon.la \ + $(top_builddir)/htword/libhtword.la \ + $(top_builddir)/htlib/libht.la \ + $(top_builddir)/htcommon/libcommon.la \ + $(top_builddir)/htword/libhtword.la \ + $(top_builddir)/db/libhtdb.la \ + $(top_builddir)/htlib/libht.la + + +LOCAL_DEFINES = -DSENDMAIL=\"$(SENDMAIL)\" + +bin_PROGRAMS = htpurge htnotify htdump htstat htload htmerge + +htpurge_SOURCES = htpurge.cc +htpurge_DEPENDENCIES = $(HTLIBS) +htpurge_LDFLAGS = $(PROFILING) ${extra_ldflags} +htpurge_LDADD = $(HTLIBS) + +htnotify_SOURCES = htnotify.cc +htnotify_DEPENDENCIES = $(HTLIBS) +htnotify_LDFLAGS = $(PROFILING) ${extra_ldflags} +htnotify_LDADD = $(HTLIBS) + +htdump_SOURCES = htdump.cc +htdump_DEPENDENCIES = $(HTLIBS) +htdump_LDFLAGS = $(PROFILING) ${extra_ldflags} +htdump_LDADD = $(HTLIBS) + +htstat_SOURCES = htstat.cc +htstat_DEPENDENCIES = $(HTLIBS) +htstat_LDFLAGS = $(PROFILING) ${extra_ldflags} +htstat_LDADD = $(HTLIBS) + +htload_SOURCES = htload.cc +htload_DEPENDENCIES = $(HTLIBS) +htload_LDFLAGS = $(PROFILING) ${extra_ldflags} +htload_LDADD = $(HTLIBS) + +htmerge_SOURCES = htmerge.cc +htmerge_DEPENDENCIES = $(HTLIBS) +htmerge_LDFLAGS = $(PROFILING) ${extra_ldflags} +htmerge_LDADD = $(HTLIBS) +subdir = httools +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs +CONFIG_HEADER = $(top_builddir)/include/config.h +CONFIG_CLEAN_FILES = +bin_PROGRAMS = htpurge$(EXEEXT) htnotify$(EXEEXT) htdump$(EXEEXT) \ + htstat$(EXEEXT) htload$(EXEEXT) htmerge$(EXEEXT) +PROGRAMS = $(bin_PROGRAMS) + +am_htdump_OBJECTS = htdump.$(OBJEXT) +htdump_OBJECTS = $(am_htdump_OBJECTS) +am_htload_OBJECTS = htload.$(OBJEXT) +htload_OBJECTS = $(am_htload_OBJECTS) +am_htmerge_OBJECTS = htmerge.$(OBJEXT) +htmerge_OBJECTS = $(am_htmerge_OBJECTS) +am_htnotify_OBJECTS = htnotify.$(OBJEXT) +htnotify_OBJECTS = $(am_htnotify_OBJECTS) +am_htpurge_OBJECTS = htpurge.$(OBJEXT) +htpurge_OBJECTS = $(am_htpurge_OBJECTS) +am_htstat_OBJECTS = htstat.$(OBJEXT) +htstat_OBJECTS = $(am_htstat_OBJECTS) + +DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir)/include +depcomp = +am__depfiles_maybe = +CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) +LTCXXCOMPILE = $(LIBTOOL) --mode=compile $(CXX) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CXXFLAGS) $(CXXFLAGS) +CXXLD = $(CXX) +CXXLINK = $(LIBTOOL) --mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +DIST_SOURCES = $(htdump_SOURCES) $(htload_SOURCES) $(htmerge_SOURCES) \ + $(htnotify_SOURCES) $(htpurge_SOURCES) $(htstat_SOURCES) +DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/Makefile.config \ + Makefile.am +SOURCES = $(htdump_SOURCES) $(htload_SOURCES) $(htmerge_SOURCES) $(htnotify_SOURCES) $(htpurge_SOURCES) $(htstat_SOURCES) + +all: all-am + +.SUFFIXES: +.SUFFIXES: .cc .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ Makefile.am $(top_srcdir)/Makefile.config $(top_srcdir)/configure.in $(ACLOCAL_M4) + cd $(top_srcdir) && \ + $(AUTOMAKE) --foreign httools/Makefile +Makefile: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.in $(top_builddir)/config.status + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe) +binPROGRAMS_INSTALL = $(INSTALL_PROGRAM) +install-binPROGRAMS: $(bin_PROGRAMS) + @$(NORMAL_INSTALL) + $(mkinstalldirs) $(DESTDIR)$(bindir) + @list='$(bin_PROGRAMS)'; for p in $$list; do \ + p1=`echo $$p|sed 's/$(EXEEXT)$$//'`; \ + if test -f $$p \ + || test -f $$p1 \ + ; then \ + f=`echo "$$p1" | sed 's,^.*/,,;$(transform);s/$$/$(EXEEXT)/'`; \ + echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) --mode=install $(binPROGRAMS_INSTALL) $$p $(DESTDIR)$(bindir)/$$f"; \ + $(INSTALL_PROGRAM_ENV) $(LIBTOOL) --mode=install $(binPROGRAMS_INSTALL) $$p $(DESTDIR)$(bindir)/$$f || exit 1; \ + else :; fi; \ + done + +uninstall-binPROGRAMS: + @$(NORMAL_UNINSTALL) + @list='$(bin_PROGRAMS)'; for p in $$list; do \ + f=`echo "$$p" | sed 's,^.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/'`; \ + echo " rm -f $(DESTDIR)$(bindir)/$$f"; \ + rm -f $(DESTDIR)$(bindir)/$$f; \ + done + +clean-binPROGRAMS: + @list='$(bin_PROGRAMS)'; for p in $$list; do \ + f=`echo $$p|sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f $$p $$f"; \ + rm -f $$p $$f ; \ + done +htdump$(EXEEXT): $(htdump_OBJECTS) $(htdump_DEPENDENCIES) + @rm -f htdump$(EXEEXT) + $(CXXLINK) $(htdump_LDFLAGS) $(htdump_OBJECTS) $(htdump_LDADD) $(LIBS) +htload$(EXEEXT): $(htload_OBJECTS) $(htload_DEPENDENCIES) + @rm -f htload$(EXEEXT) + $(CXXLINK) $(htload_LDFLAGS) $(htload_OBJECTS) $(htload_LDADD) $(LIBS) +htmerge$(EXEEXT): $(htmerge_OBJECTS) $(htmerge_DEPENDENCIES) + @rm -f htmerge$(EXEEXT) + $(CXXLINK) $(htmerge_LDFLAGS) $(htmerge_OBJECTS) $(htmerge_LDADD) $(LIBS) +htnotify$(EXEEXT): $(htnotify_OBJECTS) $(htnotify_DEPENDENCIES) + @rm -f htnotify$(EXEEXT) + $(CXXLINK) $(htnotify_LDFLAGS) $(htnotify_OBJECTS) $(htnotify_LDADD) $(LIBS) +htpurge$(EXEEXT): $(htpurge_OBJECTS) $(htpurge_DEPENDENCIES) + @rm -f htpurge$(EXEEXT) + $(CXXLINK) $(htpurge_LDFLAGS) $(htpurge_OBJECTS) $(htpurge_LDADD) $(LIBS) +htstat$(EXEEXT): $(htstat_OBJECTS) $(htstat_DEPENDENCIES) + @rm -f htstat$(EXEEXT) + $(CXXLINK) $(htstat_LDFLAGS) $(htstat_OBJECTS) $(htstat_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) core *.core + +distclean-compile: + -rm -f *.tab.c + +.cc.o: + $(CXXCOMPILE) -c -o $@ `test -f '$<' || echo '$(srcdir)/'`$< + +.cc.obj: + $(CXXCOMPILE) -c -o $@ `if test -f '$<'; then $(CYGPATH_W) '$<'; else $(CYGPATH_W) '$(srcdir)/$<'; fi` + +.cc.lo: + $(LTCXXCOMPILE) -c -o $@ `test -f '$<' || echo '$(srcdir)/'`$< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +distclean-libtool: + -rm -f libtool +uninstall-info-am: + +ETAGS = etags +ETAGSFLAGS = + +CTAGS = ctags +CTAGSFLAGS = + +tags: TAGS + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + mkid -fID $$unique + +TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + test -z "$(ETAGS_ARGS)$$tags$$unique" \ + || $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$tags $$unique + +ctags: CTAGS +CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + test -z "$(CTAGS_ARGS)$$tags$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$tags $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && cd $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) $$here + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) + +top_distdir = .. +distdir = $(top_distdir)/$(PACKAGE)-$(VERSION) + +distdir: $(DISTFILES) + $(mkinstalldirs) $(distdir)/.. + @srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's|.|.|g'`; \ + list='$(DISTFILES)'; for file in $$list; do \ + case $$file in \ + $(srcdir)/*) file=`echo "$$file" | sed "s|^$$srcdirstrip/||"`;; \ + $(top_srcdir)/*) file=`echo "$$file" | sed "s|^$$topsrcdirstrip/|$(top_builddir)/|"`;; \ + esac; \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test "$$dir" != "$$file" && test "$$dir" != "."; then \ + dir="/$$dir"; \ + $(mkinstalldirs) "$(distdir)$$dir"; \ + else \ + dir=''; \ + fi; \ + if test -d $$d/$$file; then \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \ + fi; \ + cp -pR $$d/$$file $(distdir)$$dir || exit 1; \ + else \ + test -f $(distdir)/$$file \ + || cp -p $$d/$$file $(distdir)/$$file \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(PROGRAMS) + +installdirs: + $(mkinstalldirs) $(DESTDIR)$(bindir) +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + `test -z '$(STRIP)' || \ + echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -rm -f $(CONFIG_CLEAN_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-binPROGRAMS clean-generic clean-libtool mostlyclean-am + +distclean: distclean-am + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-libtool distclean-tags + +dvi: dvi-am + +dvi-am: + +info: info-am + +info-am: + +install-data-am: + +install-exec-am: install-binPROGRAMS + +install-info: install-info-am + +install-man: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-binPROGRAMS uninstall-info-am + +.PHONY: CTAGS GTAGS all all-am check check-am clean clean-binPROGRAMS \ + clean-generic clean-libtool ctags distclean distclean-compile \ + distclean-generic distclean-libtool distclean-tags distdir dvi \ + dvi-am info info-am install install-am install-binPROGRAMS \ + install-data install-data-am install-exec install-exec-am \ + install-info install-info-am install-man install-strip \ + installcheck installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags uninstall uninstall-am uninstall-binPROGRAMS \ + uninstall-info-am + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/debian/htdig/htdig-3.2.0b6/httools/Makefile.win32 b/debian/htdig/htdig-3.2.0b6/httools/Makefile.win32 new file mode 100644 index 00000000..9c6e2a94 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/httools/Makefile.win32 @@ -0,0 +1,63 @@ +# +# Makefile - makefile for htsearch +# + +APP_NAME = Right Now Web CGI +RNT_PRODUCT = rnw + +TARGET1 = $(BINDIR)/htdump$(EXESFX) +TARGET2 = $(BINDIR)/htload$(EXESFX) +TARGET3 = $(BINDIR)/htmerge$(EXESFX) +TARGET4 = $(BINDIR)/htnotify$(EXESFX) +TARGET5 = $(BINDIR)/htpurge$(EXESFX) +TARGET6 = $(BINDIR)/htstat$(EXESFX) + +#htdump.cc htload.cc htmerge.cc htnotify.cc htpurge.cc htstat.cc + +include ../Makedefs.win32 + +# ----------------------------------------------------------------------------- +# add new executable members to this list + + +CXXSRC = htdump.cc htload.cc htmerge.cc htnotify.cc htpurge.cc htstat.cc + +OBJS1 = win32/htdump.obj +OBJS2 = win32/htload.obj +OBJS3 = win32/htmerge.obj +OBJS4 = win32/htnotify.obj +OBJS5 = win32/htpurge.obj +OBJS6 = win32/htstat.obj + + +CPPFLAGS += -DHAVE_CONFIG_H -DSENDMAIL=\"\" -I. -I../include -I../htlib -I../htcommon -I../htword -I../db -I../htnet + +LDLIBS = ../lib/$(ARCH)/libhtnet.lib ../lib/$(ARCH)/libcommon.lib ../lib/$(ARCH)/libhtword.lib ../lib/$(ARCH)/libht.lib ../lib/$(ARCH)/libcommon.lib ../lib/$(ARCH)/libhtword.lib ../lib/$(ARCH)/libht.lib ../lib/$(ARCH)/libfuzzy.lib ../lib/$(ARCH)/libhtdb.lib +OTHERLIBS = ws2_32.lib L:/win32/lib/zlib114/zlib.lib + +DEPLIBS += $(LDLIBS) + +win32/%.obj: %.cc %.c + $(CC) $(CPPFLAGS) -c $< -o $@ + +all: $(TARGET1) $(TARGET2) $(TARGET3) $(TARGET4) $(TARGET5) $(TARGET6) + +$(TARGET1): $(OBJDIRDEP) $(BINDIRDEP) $(OBJS1) $(DEPLIBS) + $(EXELD) $(LDFLAGS) $(OBJS1) $(LDLIBS) $(OTHERLIBS) + +$(TARGET2): $(OBJDIRDEP) $(BINDIRDEP) $(OBJS2) $(DEPLIBS) + $(EXELD) $(LDFLAGS) $(OBJS2) $(LDLIBS) $(OTHERLIBS) + +$(TARGET3): $(OBJDIRDEP) $(BINDIRDEP) $(OBJS3) $(DEPLIBS) + $(EXELD) $(LDFLAGS) $(OBJS3) $(LDLIBS) $(OTHERLIBS) + +$(TARGET4): $(OBJDIRDEP) $(BINDIRDEP) $(OBJS4) $(DEPLIBS) + $(EXELD) $(LDFLAGS) $(OBJS4) $(LDLIBS) $(OTHERLIBS) + +$(TARGET5): $(OBJDIRDEP) $(BINDIRDEP) $(OBJS5) $(DEPLIBS) + $(EXELD) $(LDFLAGS) $(OBJS5) $(LDLIBS) $(OTHERLIBS) + +$(TARGET6): $(OBJDIRDEP) $(BINDIRDEP) $(OBJS6) $(DEPLIBS) + $(EXELD) $(LDFLAGS) $(OBJS6) $(LDLIBS) $(OTHERLIBS) + +include ../Makerules.win32 diff --git a/debian/htdig/htdig-3.2.0b6/httools/htdump.cc b/debian/htdig/htdig-3.2.0b6/httools/htdump.cc new file mode 100644 index 00000000..c52dbda9 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/httools/htdump.cc @@ -0,0 +1,200 @@ +// +// htdump.cc +// +// htdump: A utility to create ASCII text versions of the document +// and/or word databases. These can be used by external programs, +// edited, or used as a platform and version-independent form of the DB. +// +// Part of the ht://Dig package <http://www.htdig.org/> +// Copyright (c) 1999-2004 The ht://Dig Group +// For copyright details, see the file COPYING in your distribution +// or the GNU Library General Public License (LGPL) version 2 or later +// <http://www.gnu.org/copyleft/lgpl.html> +// +// $Id: htdump.cc,v 1.6 2004/05/28 13:15:25 lha Exp $ +// +#ifdef HAVE_CONFIG_H +#include "htconfig.h" +#endif /* HAVE_CONFIG_H */ + +#include "WordContext.h" +#include "HtURLCodec.h" +#include "HtWordList.h" +#include "HtConfiguration.h" +#include "DocumentDB.h" +#include "defaults.h" + +#include <errno.h> + +#ifndef _MSC_VER /* _WIN32 */ +#include <unistd.h> +#endif + +// If we have this, we probably want it. +#ifdef HAVE_GETOPT_H +#include <getopt.h> +#elif HAVE_GETOPT_LOCAL +#include <getopt_local.h> +#endif + +int verbose = 0; + +void usage(); +void reportError(char *msg); + +//***************************************************************************** +// int main(int ac, char **av) +// +int main(int ac, char **av) +{ + int do_words = 1; + int do_docs = 1; + int alt_work_area = 0; + String configfile = DEFAULT_CONFIG_FILE; + int c; + extern char *optarg; + + while ((c = getopt(ac, av, "vdwc:a")) != -1) + { + switch (c) + { + case 'c': + configfile = optarg; + break; + case 'v': + verbose++; + break; + case 'a': + alt_work_area++; + break; + case 'w': + do_words = 0; + break; + case 'd': + do_docs = 0; + break; + case '?': + usage(); + break; + } + } + + HtConfiguration* config= HtConfiguration::config(); + config->Defaults(&defaults[0]); + + if (access((char*)configfile, R_OK) < 0) + { + reportError(form("Unable to find configuration file '%s'", + configfile.get())); + } + + config->Read(configfile); + + // + // Check url_part_aliases and common_url_parts for + // errors. + String url_part_errors = HtURLCodec::instance()->ErrMsg(); + + if (url_part_errors.length() != 0) + reportError(form("Invalid url_part_aliases or common_url_parts: %s", + url_part_errors.get())); + + + // We may need these through the methods we call + if (alt_work_area != 0) + { + String configValue; + + configValue = config->Find("word_db"); + if (configValue.length() != 0) + { + configValue << ".work"; + config->Add("word_db", configValue); + } + + configValue = config->Find("doc_db"); + if (configValue.length() != 0) + { + configValue << ".work"; + config->Add("doc_db", configValue); + } + + configValue = config->Find("doc_index"); + if (configValue.length() != 0) + { + configValue << ".work"; + config->Add("doc_index", configValue); + } + + configValue = config->Find("doc_excerpt"); + if (configValue.length() != 0) + { + configValue << ".work"; + config->Add("doc_excerpt", configValue); + } + } + + if (do_docs) + { + const String doc_list = config->Find("doc_list"); + unlink(doc_list); + DocumentDB docs; + if (docs.Read(config->Find("doc_db"), config->Find("doc_index"), + config->Find("doc_excerpt")) == OK) + { + docs.DumpDB(doc_list, verbose); + docs.Close(); + } + } + if (do_words) + { + + // Initialize htword + WordContext::Initialize(*config); + + const String word_dump = config->Find("word_dump"); + unlink(word_dump); + HtWordList words(*config); + if(words.Open(config->Find("word_db"), O_RDONLY) == OK) { + words.Dump(word_dump); + words.Close(); + } + } + + return 0; +} + + +//***************************************************************************** +// void usage() +// Display program usage information +// +void usage() +{ + cout << "usage: htdump [-v][-d][-w][-a][-c configfile]\n"; + cout << "This program is part of ht://Dig " << VERSION << "\n\n"; + cout << "Options:\n"; + cout << "\t-v\tVerbose mode. This increases the verbosity of the\n"; + cout << "\t\tprogram. Using more than 2 is probably only useful\n"; + cout << "\t\tfor debugging purposes. The default verbose mode\n"; + cout << "\t\tgives a progress on what it is doing and where it is.\n\n"; + cout << "\t-d\tDo NOT dump the document database.\n\n"; + cout << "\t-w\tDo NOT dump the word database.\n\n"; + cout << "\t-a\tUse alternate work files.\n"; + cout << "\t\tTells htdump to append .work to the database files \n"; + cout << "\t\tallowing it to operate on a second set of databases.\n"; + cout << "\t-c configfile\n"; + cout << "\t\tUse the specified configuration file instead on the\n"; + cout << "\t\tdefault.\n\n"; + exit(0); +} + + +//***************************************************************************** +// Report an error and die +// +void reportError(char *msg) +{ + cout << "htdump: " << msg << "\n\n"; + exit(1); +} diff --git a/debian/htdig/htdig-3.2.0b6/httools/htload.cc b/debian/htdig/htdig-3.2.0b6/httools/htload.cc new file mode 100644 index 00000000..79ec15b7 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/httools/htload.cc @@ -0,0 +1,199 @@ +// +// htload.cc +// +// htload: A utility to read ASCII text versions of the document +// and/or word databases. These can be used by external programs, +// edited, or used as a platform and version-independent form of the DB. +// +// Part of the ht://Dig package <http://www.htdig.org/> +// Copyright (c) 1999-2004 The ht://Dig Group +// For copyright details, see the file COPYING in your distribution +// or the GNU Library General Public License (LGPL) version 2 or later +// <http://www.gnu.org/copyleft/lgpl.html> +// +// $Id: htload.cc,v 1.6 2004/05/28 13:15:25 lha Exp $ +// + +#ifdef HAVE_CONFIG_H +#include "htconfig.h" +#endif /* HAVE_CONFIG_H */ + +#include "WordContext.h" +#include "HtURLCodec.h" +#include "HtWordList.h" +#include "HtConfiguration.h" +#include "DocumentDB.h" +#include "defaults.h" + +#include <errno.h> + +#ifndef _MSC_VER /* _WIN32 */ +#include <unistd.h> +#endif + +// If we have this, we probably want it. +#ifdef HAVE_GETOPT_H +#include <getopt.h> +#elif HAVE_GETOPT_LOCAL +#include <getopt_local.h> +#endif + +int verbose = 0; + +void usage(); +void reportError(char *msg); + +//***************************************************************************** +// int main(int ac, char **av) +// +int main(int ac, char **av) +{ + int do_words = 1; + int do_docs = 1; + int alt_work_area = 0; + String configfile = DEFAULT_CONFIG_FILE; + int c; + extern char *optarg; + + while ((c = getopt(ac, av, "vdwc:a")) != -1) + { + switch (c) + { + case 'c': + configfile = optarg; + break; + case 'v': + verbose++; + break; + case 'a': + alt_work_area++; + break; + case 'w': + do_words = 0; + break; + case 'd': + do_docs = 0; + break; + case '?': + usage(); + break; + } + } + + HtConfiguration* config= HtConfiguration::config(); + config->Defaults(&defaults[0]); + + if (access((char*)configfile, R_OK) < 0) + { + reportError(form("Unable to find configuration file '%s'", + configfile.get())); + } + + config->Read(configfile); + + // + // Check url_part_aliases and common_url_parts for + // errors. + String url_part_errors = HtURLCodec::instance()->ErrMsg(); + + if (url_part_errors.length() != 0) + reportError(form("Invalid url_part_aliases or common_url_parts: %s", + url_part_errors.get())); + + + // We may need these through the methods we call + if (alt_work_area != 0) + { + String configValue; + + configValue = config->Find("word_db"); + if (configValue.length() != 0) + { + configValue << ".work"; + config->Add("word_db", configValue); + } + + configValue = config->Find("doc_db"); + if (configValue.length() != 0) + { + configValue << ".work"; + config->Add("doc_db", configValue); + } + + configValue = config->Find("doc_index"); + if (configValue.length() != 0) + { + configValue << ".work"; + config->Add("doc_index", configValue); + } + + configValue = config->Find("doc_excerpt"); + if (configValue.length() != 0) + { + configValue << ".work"; + config->Add("doc_excerpt", configValue); + } + } + + if (do_docs) + { + const String doc_list = config->Find("doc_list"); + DocumentDB docs; + if (docs.Open(config->Find("doc_db"), config->Find("doc_index"), + config->Find("doc_excerpt")) == OK) + { + docs.LoadDB(doc_list, verbose); + docs.Close(); + } + } + if (do_words) + { + + // Initialize htword + WordContext::Initialize(*config); + + const String word_dump = config->Find("word_dump"); + HtWordList words(*config); + if(words.Open(config->Find("word_db"), O_RDWR) == OK) { + words.Load(word_dump); + words.Close(); + } + } + + return 0; +} + + +//***************************************************************************** +// void usage() +// Display program usage information +// +void usage() +{ + cout << "usage: htload [-v][-d][-w][-a][-c configfile]\n"; + cout << "This program is part of ht://Dig " << VERSION << "\n\n"; + cout << "Options:\n"; + cout << "\t-v\tVerbose mode. This increases the verbosity of the\n"; + cout << "\t\tprogram. Using more than 2 is probably only useful\n"; + cout << "\t\tfor debugging purposes. The default verbose mode\n"; + cout << "\t\tgives a progress on what it is doing and where it is.\n\n"; + cout << "\t-d\tDo NOT load the document database.\n\n"; + cout << "\t-w\tDo NOT load the word database.\n\n"; + cout << "\t-a\tUse alternate work files.\n"; + cout << "\t\tTells htload to append .work to the database files \n"; + cout << "\t\tallowing it to operate on a second set of databases.\n"; + cout << "\t-c configfile\n"; + cout << "\t\tUse the specified configuration file instead on the\n"; + cout << "\t\tdefault.\n\n"; + exit(0); +} + + +//***************************************************************************** +// Report an error and die +// +void reportError(char *msg) +{ + cout << "htload: " << msg << "\n\n"; + exit(1); +} diff --git a/debian/htdig/htdig-3.2.0b6/httools/htmerge.cc b/debian/htdig/htdig-3.2.0b6/httools/htmerge.cc new file mode 100644 index 00000000..d25267fb --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/httools/htmerge.cc @@ -0,0 +1,403 @@ +// +// htmerge.cc +// +// htmerge: Merges two databases and/or updates databases to remove +// old documents and ensures the databases are consistent. +// Calls db.cc, docs.cc, and/or words.cc as necessary +// +// Part of the ht://Dig package <http://www.htdig.org/> +// Copyright (c) 1999-2004 The ht://Dig Group +// For copyright details, see the file COPYING in your distribution +// or the GNU Library General Public License (LGPL) version 2 or later +// <http://www.gnu.org/copyleft/lgpl.html> +// +// $Id: htmerge.cc,v 1.7 2004/05/28 13:15:25 lha Exp $ +// + +#ifdef HAVE_CONFIG_H +#include "htconfig.h" +#endif /* HAVE_CONFIG_H */ + +#include "WordContext.h" +#include "good_strtok.h" +#include "defaults.h" +#include "DocumentDB.h" +#include "HtURLCodec.h" +#include "HtWordList.h" +#include "HtWordReference.h" +#include "htString.h" + +#ifdef HAVE_STD +#include <fstream> +#ifdef HAVE_NAMESPACES +using namespace std; +#endif +#else +#include <fstream.h> +#endif /* HAVE_STD */ + +#include <stdio.h> + +#ifndef _MSC_VER /* _WIN32 */ +#include <unistd.h> +#endif + +#include <stdlib.h> +#include <ctype.h> +#include <string.h> + +// If we have this, we probably want it. +#ifdef HAVE_GETOPT_H +#include <getopt.h> +#elif HAVE_GETOPT_LOCAL +#include <getopt_local.h> +#endif + + +// +// This hash is used to keep track of all the document IDs which have to be +// discarded. +// This is generated from the doc database and is used to prune words +// from the word db +// +Dictionary discard_list; + + +// This config is used for merging multiple databses +HtConfiguration merge_config; + +int verbose = 0; +int stats = 0; + +// Component procedures +void mergeDB(); +void usage(); +void reportError(char *msg); + +//***************************************************************************** +// int main(int ac, char **av) +// +int main(int ac, char **av) +{ + int alt_work_area = 0; + String configfile = DEFAULT_CONFIG_FILE; + String merge_configfile = 0; + int c; + extern char *optarg; + + while ((c = getopt(ac, av, "svm:c:dwa")) != -1) + { + switch (c) + { + case 'd': + break; + case 'w': + break; + case 'c': + configfile = optarg; + break; + case 'm': + merge_configfile = optarg; + break; + case 'v': + verbose++; + break; + case 's': + break; + case 'a': + alt_work_area++; + break; + case '?': + usage(); + break; + } + } + + HtConfiguration* config= HtConfiguration::config(); + config->Defaults(&defaults[0]); + + if (access((char*)configfile, R_OK) < 0) + { + reportError(form("Unable to find configuration file '%s'", + configfile.get())); + } + + config->Read(configfile); + + // + // Check url_part_aliases and common_url_parts for + // errors. + String url_part_errors = HtURLCodec::instance()->ErrMsg(); + + if (url_part_errors.length() != 0) + reportError(form("Invalid url_part_aliases or common_url_parts: %s", + url_part_errors.get())); + + if (merge_configfile.length()) + { + merge_config.Defaults(&defaults[0]); + if (access((char*)merge_configfile, R_OK) < 0) + { + reportError(form("Unable to find configuration file '%s'", + merge_configfile.get())); + } + merge_config.Read(merge_configfile); + } + + if (alt_work_area != 0) + { + String configValue; + + configValue = config->Find("word_db"); + if (configValue.length() != 0) + { + configValue << ".work"; + config->Add("word_db", configValue); + } + + configValue = config->Find("doc_db"); + if (configValue.length() != 0) + { + configValue << ".work"; + config->Add("doc_db", configValue); + } + + configValue = config->Find("doc_index"); + if (configValue.length() != 0) + { + configValue << ".work"; + config->Add("doc_index", configValue); + } + + configValue = config->Find("doc_excerpt"); + if (configValue.length() != 0) + { + configValue << ".work"; + config->Add("doc_excerpt", configValue); + } + } + + WordContext::Initialize(*config); + + if (merge_configfile.length()) + { + // Merge the databases specified in merge_configfile into the current + // databases. Do this first then update the other databases as usual + // Note: We don't have to specify anything, it's all in the config vars + + mergeDB(); + } + + return 0; +} + +//***************************************************************************** +// void mergeDB() +// +void +mergeDB() +{ + HtConfiguration* config= HtConfiguration::config(); + DocumentDB merge_db, db; + List *urls; + Dictionary merge_dup_ids, db_dup_ids; // Lists of DocIds to ignore + int docIDOffset; + + const String doc_index = config->Find("doc_index"); + if (access(doc_index, R_OK) < 0) + { + reportError(form("Unable to open document index '%s'", (const char*)doc_index)); + } + const String doc_excerpt = config->Find("doc_excerpt"); + if (access(doc_excerpt, R_OK) < 0) + { + reportError(form("Unable to open document excerpts '%s'", (const char*)doc_excerpt)); + } + const String doc_db = config->Find("doc_db"); + if (db.Open(doc_db, doc_index, doc_excerpt) < 0) + { + reportError(form("Unable to open/create document database '%s'", + (const char*)doc_db)); + } + + + const String merge_doc_index = merge_config["doc_index"]; + if (access(merge_doc_index, R_OK) < 0) + { + reportError(form("Unable to open document index '%s'", (const char*)merge_doc_index)); + } + const String merge_doc_excerpt = merge_config["doc_excerpt"]; + if (access(merge_doc_excerpt, R_OK) < 0) + { + reportError(form("Unable to open document excerpts '%s'", (const char*)merge_doc_excerpt)); + } + const String merge_doc_db = merge_config["doc_db"]; + if (merge_db.Open(merge_doc_db, merge_doc_index, merge_doc_excerpt) < 0) + { + reportError(form("Unable to open document database '%s'", + (const char*)merge_doc_db)); + } + + // Start the merging by going through all the URLs that are in + // the database to be merged + + urls = merge_db.URLs(); + // This ensures that every document added from merge_db has a unique ID + // in the new database + docIDOffset = db.NextDocID(); + + urls->Start_Get(); + String *url; + String id; + while ((url = (String *) urls->Get_Next())) + { + DocumentRef *ref = merge_db[url->get()]; + DocumentRef *old_ref = db[url->get()]; + if (!ref) + continue; + + if (old_ref) + { + // Oh well, we knew this would happen. Let's get the duplicate + // And we'll only use the most recent date. + + if ( old_ref->DocTime() >= ref->DocTime() ) + { + // Cool, the ref we're merging is too old, just ignore it + char str[20]; + sprintf(str, "%d", ref->DocID()); + merge_dup_ids.Add(str, 0); + + if (verbose > 1) + { + cout << "htmerge: Duplicate, URL: " << url << " ignoring merging copy \n"; + cout.flush(); + } + } + else + { + // The ref we're merging is newer, delete the old one and add + char str[20]; + sprintf(str, "%d", old_ref->DocID()); + db_dup_ids.Add(str, 0); + db.Delete(old_ref->DocID()); + ref->DocID(ref->DocID() + docIDOffset); + db.Add(*ref); + if (verbose > 1) + { + cout << "htmerge: Duplicate, URL: "; + cout << url->get() << " ignoring destination copy \n"; + cout.flush(); + } + } + } + else + { + // It's a new URL, just add it, making sure to load the excerpt + merge_db.ReadExcerpt(*ref); + ref->DocID(ref->DocID() + docIDOffset); + db.Add(*ref); + if (verbose > 1) + { + cout << "htmerge: Merged URL: " << url->get() << " \n"; + cout.flush(); + } + } + delete ref; + delete old_ref; + } + delete urls; + + // As reported by Roman Dimov, we must update db.NextDocID() + // because of all the added records... + db.IncNextDocID( merge_db.NextDocID() ); + merge_db.Close(); + db.Close(); + + // OK, after merging the doc DBs, we do the same for the words + HtWordList mergeWordDB(*config), wordDB(*config); + List *words; + String docIDKey; + + if (wordDB.Open(config->Find("word_db"), O_RDWR) < 0) + { + reportError(form("Unable to open/create document database '%s'", + (const char*)config->Find("word_db"))); + } + + if (mergeWordDB.Open(merge_config["word_db"], O_RDONLY) < 0) + { + reportError(form("Unable to open document database '%s'", + (const char *)merge_config["word_db"])); + } + + // Start the merging by going through all the URLs that are in + // the database to be merged + + words = mergeWordDB.WordRefs(); + + words->Start_Get(); + HtWordReference *word; + while ((word = (HtWordReference *) words->Get_Next())) + { + docIDKey = word->DocID(); + if (merge_dup_ids.Exists(docIDKey)) + continue; + + word->DocID(word->DocID() + docIDOffset); + wordDB.Override(*word); + } + delete words; + + words = wordDB.WordRefs(); + words->Start_Get(); + while ((word = (HtWordReference *) words->Get_Next())) + { + docIDKey = word->DocID(); + if (db_dup_ids.Exists(docIDKey)) + wordDB.Delete(*word); + } + delete words; + + // Cleanup--just close the two word databases + mergeWordDB.Close(); + wordDB.Close(); +} + + +//***************************************************************************** +// void usage() +// Display program usage information +// +void usage() +{ + cout << "usage: htmerge [-v][-c configfile][-m merge_configfile]\n"; + cout << "This program is part of ht://Dig " << VERSION << "\n\n"; + cout << "Options:\n"; + cout << "\t-v\tVerbose mode. This increases the verbosity of the\n"; + cout << "\t\tprogram. Using more than 2 is probably only useful\n"; + cout << "\t\tfor debugging purposes. The default verbose mode\n"; + cout << "\t\tgives a progress on what it is doing and where it is.\n\n"; + cout << "\t-m merge_configfile\n"; + cout << "\t\tMerge the databases specified into the databases specified\n"; + cout << "\t\tby -c or the default.\n\n"; + cout << "\t-c configfile\n"; + cout << "\t\tUse the specified configuration file instead on the\n"; + cout << "\t\tdefault.\n\n"; + cout << "\t-a\tUse alternate work files.\n"; + cout << "\t\tTells htmerge to append .work to database files causing\n"; + cout << "\t\ta second copy of the database to be built. This allows\n"; + cout << "\t\toriginal files to be used by htsearch during the indexing\n"; + cout << "\t\trun.\n\n"; + exit(0); +} + + +//***************************************************************************** +// Report an error and die +// +void reportError(char *msg) +{ + cout << "htmerge: " << msg << "\n\n"; + exit(1); +} diff --git a/debian/htdig/htdig-3.2.0b6/httools/htnotify.cc b/debian/htdig/htdig-3.2.0b6/httools/htnotify.cc new file mode 100644 index 00000000..3562433c --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/httools/htnotify.cc @@ -0,0 +1,613 @@ +// +// htnotify.cc +// +// htnotify: Check through databases and look for notify META information +// Send e-mail to addresses mentioned in documents if the doc +// has "expired" +// +// Part of the ht://Dig package <http://www.htdig.org/> +// Copyright (c) 1995-2004 The ht://Dig Group +// For copyright details, see the file COPYING in your distribution +// or the GNU Library General Public License (LGPL) version 2 or later +// <http://www.gnu.org/copyleft/lgpl.html> +// +// $Id: htnotify.cc,v 1.8 2004/05/28 13:15:25 lha Exp $ +// + +#ifdef HAVE_CONFIG_H +#include "htconfig.h" +#endif /* HAVE_CONFIG_H */ + +#include "HtConfiguration.h" +#include "Dictionary.h" +#include "DocumentDB.h" +#include "DocumentRef.h" +#include "defaults.h" +#include "HtURLCodec.h" + +#include <stdlib.h> + +#ifndef _MSC_VER /* _WIN32 */ +#include <unistd.h> +#endif +#include <errno.h> + +#ifdef HAVE_STD +#include <fstream> +#ifdef HAVE_NAMESPACES +using namespace std; +#endif +#else +#include <fstream.h> +#endif /* HAVE_STD */ + +#include <time.h> +#include <stdio.h> +#include <ctype.h> + +// If we have this, we probably want it. +#ifdef HAVE_GETOPT_H +#include <getopt.h> +#elif HAVE_GETOPT_LOCAL +#include <getopt_local.h> +#endif + +// Declare a record for storing email/URL data for later retrieval +class EmailNotification : public Object +{ +public: + EmailNotification (char* date, char* email, char* url, char* subject); + + // + //accessors + // + String getDate() const { return date; } + String getEmail() const { return email; } + String getUrl() const { return url; } + String getSubject() const { return subject; } + +private: + String date; + String email; + String url; + String subject; +}; + +EmailNotification::EmailNotification (char* pDate, char* pEmail, + char* pUrl, char* pSubject) +{ + date = pDate; + email = pEmail; + url = pUrl; + if (!pSubject || !*pSubject) + { + subject = "page expired"; + } + else + { + subject = pSubject; + } +} + +void htnotify(DocumentRef &); +void usage(); +void readPreAndPostamble(void); +void add_notification(char *date, char *email, char *url, char *subject); +void send_notification(char *email, List * notifList); +void send_email(List * notifList, String& command, String& to, + String& listText, int singleSubject); +int parse_date(char *date, int &year, int &month, int &day); + + +int verbose = 0; +int sendEmail = 1; + +// +// This variable is used to hold today's date. It is global for +// efficiency reasons since computing it is a relatively expensive +// operation +// +struct tm *today; + +// +// This structure holds the set of email notifications requiring +// sending. It is indexed by email address of recipients, and +// each entry is a List of EmailNotification objects. +// +Dictionary * allNotifications; + +// +// These strings holds the preamble/postamble text used in +// email messages. +// +String preambleText; +String postambleText; + +//{{{ main +//***************************************************************************** +// int main(int ac, char **av) +// +int main(int ac, char **av) +{ + int c; + extern char *optarg; + String base; + String configFile = DEFAULT_CONFIG_FILE; + + while ((c = getopt(ac, av, "nvb:c:")) != -1) + { + switch (c) + { + case 'b': + base = optarg; + break; + case 'c': + configFile = optarg; + break; + case 'v': + verbose++; + break; + case 'n': + verbose++; + sendEmail = 0; + break; + case '?': + usage(); + break; + } + } + + HtConfiguration* config= HtConfiguration::config(); + config->Defaults(&defaults[0]); + config->Read(configFile); + + // + // Check url_part_aliases and common_url_parts for + // errors. + String url_part_errors = HtURLCodec::instance()->ErrMsg(); + + if (url_part_errors.length() != 0) + { + cerr << form("htnotify: Invalid url_part_aliases or common_url_parts: %s", + url_part_errors.get()) << endl; + exit (1); + } + + if (base.length()) + { + config->Add("database_base", base); + } + + String doc_db = config->Find("doc_db"); + DocumentDB docdb; + + docdb.Read(doc_db); + List *docs = docdb.DocIDs(); + + // + // Compute today's date + // + time_t now = time(0); + today = localtime(&now); + + readPreAndPostamble(); + + // + // Traverse all the known documents to check for notification requirements + // + allNotifications = new Dictionary(); + DocumentRef *ref; + IntObject *id; + docs->Start_Get(); + while ((id = (IntObject *) docs->Get_Next())) + { + ref = docdb[id->Value()]; + if (ref) + htnotify(*ref); + delete ref; + } + delete docs; + + // + // Iterate through the list of notifications + // + allNotifications->Start_Get(); + char * email; + while ((email = (char *) allNotifications->Get_Next())) + { + List * notifList = (List *) allNotifications->Find (email); + send_notification(email, notifList); + } + + // + // tidy up + // + docdb.Close(); + delete allNotifications; + return 0; +} + + +//}}} +//{{{ readPreAndPostamble +//***************************************************************************** +// void readPreAndPostamble(void) +// +void readPreAndPostamble(void) +{ + HtConfiguration* config= HtConfiguration::config(); + const char* prefixfile = config->Find("htnotify_prefix_file"); + const char* suffixfile = config->Find("htnotify_suffix_file"); + + // define default preamble text - blank string + preambleText = ""; + + if (prefixfile != NULL && *prefixfile) + { + ifstream in(prefixfile); + char buffer[1024]; + + if (! in.bad()) + { + while (! in.bad() && ! in.eof()) + { + in.getline(buffer, sizeof(buffer)); + if (in.eof() && !*buffer) + break; + preambleText << buffer << '\n'; + } + in.close(); + } + } + + // define default postamble text + postambleText = ""; + postambleText << "Note: This message will be sent again if you do not change or\n"; + postambleText << "take away the notification of the above mentioned HTML page.\n"; + postambleText << "\n"; + postambleText << "Find out more about the notification service at\n\n"; + postambleText << " http://www.htdig.org/meta.html\n\n"; + postambleText << "Cheers!\n\nht://Dig Notification Service\n"; + + if (suffixfile != NULL && *suffixfile) + { + ifstream in(suffixfile); + char buffer[1024]; + + if (! in.bad()) + { + postambleText = ""; + while (! in.bad() && ! in.eof()) + { + in.getline(buffer, sizeof(buffer)); + if (in.eof() && !*buffer) + break; + postambleText << buffer << '\n'; + } + in.close(); + } + } + + if (verbose > 1) + { + cout << "Preamble text:" << endl; + cout << preambleText << endl << endl; + cout << "Postamble text:" << endl; + cout << postambleText << endl; + cout << endl; + } +} + +//}}} +//{{{ htnotify +//***************************************************************************** +// void htnotify(DocumentRef &ref) +// +void htnotify(DocumentRef &ref) +{ + char *date = ref.DocNotification(); + char *email = ref.DocEmail(); + + if (date && *date && email && *email) + { + if (verbose > 2) + { + cout << "Saw a date:" << endl; + cout << "Date: " << date << endl; + cout << "URL: " << ref.DocURL() << endl; + cout << "Subject: " << ref.DocSubject() << endl; + cout << "Email: " << email << endl; + cout << endl; + } + + int month, day, year; + if (!parse_date(date, year, month, day)) + { + // Parsing Failed + if (verbose > 2) + { + cout << "Malformed date: " << date << endl; + } + + add_notification(date, email, ref.DocURL(), "Malformed Date"); + return; + } + + year -= 1900; + month--; + + // + // Compare this date with today's date + // + if (year < today->tm_year || + (year == today->tm_year && month < today->tm_mon) || + (year == today->tm_year && month == today->tm_mon && + day < today->tm_mday)) + { + // + // It seems that this date is either today or before + // today. Send a notification + // + add_notification(date, email, ref.DocURL(), ref.DocSubject()); + } + else + { + // Page not yet expired + if (verbose > 2) + { + cout << "htnotify: URL " << ref.DocURL() + << " (" << year+1900 << "-" << month+1 + << "-" << day << ")" << endl; + } + } + } +} + + +//}}} +//{{{ add_notification +//***************************************************************************** +// void add_notification(char *date, char *email, char *url, char *subject) +// +void add_notification(char *date, char *email, char *url, char *subject) +{ + + List * list = (List *) allNotifications->Find (email); + if (list == NULL) + { // here's a new recipient so add it + list = new List(); + allNotifications->Add (email, list); + } + + // now add the notification to the selected list + EmailNotification* notif = new EmailNotification(date, email, url, subject); + list->Add (notif); +} + +//}}} +//{{{ send_notification +//***************************************************************************** +// void send_notification(char * email, List * notifList) +// +void send_notification(char* email, List * notifList) +{ + String command = SENDMAIL; + command << " -t"; + + String em = email; + String to = ""; + char *token = strtok(em.get(), " ,\t\r\n"); + while (token) + { + if (*token) + { + if (to.length()) + to << ", "; + to << token; + } + token = strtok(0, " ,\t\r\n"); + } + +// Before we use the email address string, we may want to sanitize it. +// static char ok_chars[] = "abcdefghijklmnopqrstuvwxyz +// ABCDEFGHIJKLMNOPQRSTUVWXYZ +// 1234567890_-.@/=+:%!, "; +// char *cursor; // cursor into email address +// for (cursor = to.get(); *(cursor += strspn(cursor, ok_chars));) +// *cursor = '_'; // Set it to something harmless + + EmailNotification* notif = (EmailNotification*) notifList->Get_First(); + String firstSubject = notif->getSubject(); + int singleSubject = 1; + + // + // scan to determine whether the same subject message is used throughout + // + notifList->Start_Get(); + notifList->Get_Next(); + // continue with the second item in the list + while ((notif = (EmailNotification*) notifList->Get_Next())) + { + String current = notif->getSubject(); + if ( firstSubject != current ) + { + singleSubject = 0; + break; + } + } + + + // + // Aggregate the list text + // + String listText = ""; + notifList->Start_Get(); + while ((notif = (EmailNotification*) notifList->Get_Next())) + { + listText << notif->getUrl() << '\n'; + listText << " expired " << notif->getDate() << "\n"; + if (! singleSubject) + { listText << " " << notif->getSubject() << '\n'; } + } + + if (sendEmail) + { + send_email (notifList, command, to, listText, singleSubject); + } + else if (verbose) + { // just list the notifiable pages + cout << endl; + cout << "Notification required to " << to << endl; + cout << listText; + } +} + + +//}}} +//{{{ send_email +//***************************************************************************** +// void send_email(List * notifList, String& command, String& to) +// +void send_email (List * notifList, String& command, + String& to, String& listText, int singleSubject) +{ + HtConfiguration* config= HtConfiguration::config(); + String from = "\""; + from << config->Find("htnotify_webmaster") << "\" <" + << config->Find("htnotify_sender") << ">"; + + String replyto = config->Find("htnotify_replyto"); + + if (verbose) + { + if (verbose > 1) { cout << endl; } + + cout << "From: " << from << endl; + cout << "To: " << to << endl; + + if (verbose > 1) { cout << listText; } + } + + FILE *fileptr; + if ( (fileptr = popen(command.get(), "w")) != NULL ) + { + EmailNotification* notif = (EmailNotification*) notifList->Get_First(); + String out; + out << "From: " << from << '\n'; + out << "To: " << to << '\n'; + if (replyto.length() > 0) + { out << "Reply-To: " << replyto << '\n'; } + + if (singleSubject) + { + out << "Subject: " << notif->getSubject() << '\n'; + } + else + { + out << "Subject: Web page expiry (" << notif->getSubject() << ", inter alia)\n"; + } + + out << '\n'; // this is the important header/body separator + out << preambleText; + out << listText; + out << postambleText; + out << '\n'; + fputs( out.get(), fileptr ); + pclose( fileptr ); + } + else + { + perror( "popen" ); + } +} + + +//***************************************************************************** +// Display usage information for the htnotify program +// +void usage() +{ + cout << "usage: htnotify [-c configfile][-b db_base]\n"; + cout << "This program is part of ht://Dig " << VERSION << "\n\n"; + cout << "There can be any number or words.\n"; + cout << "Options:\n"; + cout << "\t-c configfile\n"; + cout << "\t\tUse the specified configuration file instead of the default.\n\n"; + cout << "\t-b db_base\n"; + cout << "\t\tSet the base path of the document database.\n"; + cout << "\t-v\n"; + cout << "\t\tIncrease the verbose level. Use two or three times for\n"; + cout << "\t\tmore output.\n"; + cout << "\t-n\n"; + cout << "\t\tDon't send any email, just list what has expired.\n"; + exit(0); +} + + +//}}} +//{{{ parse_date +//***************************************************************************** +// Parse the notification date string from the user's document +// +int parse_date(char *date, int &year, int &month, int &day) +{ + HtConfiguration* config= HtConfiguration::config(); + int mm = -1, dd = -1, yy = -1, t; + String scandate = date; + + for (char *s = scandate.get(); *s; s++) + if (ispunct(*s)) + *s = ' '; + + if (config->Boolean("iso_8601")) + { + // conf file specified ISO standard, so expect [yy]yy mm dd. + sscanf(scandate.get(), "%d%d%d", &yy, &mm, &dd); + } + else + { + // Default to American standard when not specified in conf, + // so expect mm dd [yy]yy. + sscanf(scandate.get(), "%d%d%d", &mm, &dd, &yy); + if (mm > 31 && dd <= 12 && yy <= 31) + { + // probably got yyyy-mm-dd instead of mm/dd/yy + t = mm; mm = dd; dd = yy; yy = t; + } + } + + // OK, we took our best guess at the order the y, m & d should be. + // Now let's see if we guessed wrong, and fix it. This won't work + // for ambiguous dates (e.g. 01/02/03), which must be given in the + // expected format. + if (dd > 31 && yy <= 31) + { + t = yy; yy = dd; dd = t; + } + if (mm > 31 && yy <= 31) + { + t = yy; yy = mm; mm = t; + } + if (mm > 12 && dd <= 12) + { + t = dd; dd = mm; mm = t; + } + if (yy < 0 || mm < 1 || mm > 12 || dd < 1 || dd > 31) + return 0; // Invalid date + + if (yy < 70) // before UNIX Epoch + yy += 2000; + else if (yy < 1900) // before computer age + yy += 1900; + if (verbose > 2) + cout << "Date used (y-m-d): " << yy << '-' << mm << '-' << dd << endl; + + year = yy; + month = mm; + day = dd; + + return 1; +} + + +//}}} diff --git a/debian/htdig/htdig-3.2.0b6/httools/htpurge.cc b/debian/htdig/htdig-3.2.0b6/httools/htpurge.cc new file mode 100644 index 00000000..6e0524d7 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/httools/htpurge.cc @@ -0,0 +1,399 @@ +// +// htpurge.cc +// +// htpurge: A utility to remove specified URLs and any documents +// marked for removal from the word and document databases. +// +// Part of the ht://Dig package <http://www.htdig.org/> +// Copyright (c) 1999-2004 The ht://Dig Group +// For copyright details, see the file COPYING in your distribution +// or the GNU Library General Public License (LGPL) version 2 or later +// <http://www.gnu.org/copyleft/lgpl.html> +// +// $Id: htpurge.cc,v 1.6 2004/05/28 13:15:25 lha Exp $ +// + +#ifdef HAVE_CONFIG_H +#include "htconfig.h" +#endif /* HAVE_CONFIG_H */ + +#include <fcntl.h> + +#include "WordContext.h" +#include "HtWordReference.h" +#include "HtConfiguration.h" +#include "DocumentDB.h" +#include "DocumentRef.h" +#include "defaults.h" +#include "HtURLCodec.h" + +#include <errno.h> + +#ifndef _MSC_VER /* _WIN32 */ +#include <unistd.h> +#endif + +// If we have this, we probably want it. +#ifdef HAVE_GETOPT_H +#include <getopt.h> +#elif HAVE_GETOPT_LOCAL +#include <getopt_local.h> +#endif + +int verbose = 0; + +Dictionary *purgeDocs(Dictionary *); +void purgeWords(Dictionary *); +void usage(); +void reportError(char *msg); + +//***************************************************************************** +// int main(int ac, char **av) +// +int main(int ac, char **av) +{ + int alt_work_area = 0; + String configfile = DEFAULT_CONFIG_FILE; + int c; + extern char *optarg; + Dictionary *discard_ids = 0; + Dictionary *discard_urls = new Dictionary; + + while ((c = getopt(ac, av, "vc:au:")) != -1) + { + switch (c) + { + case 'c': + configfile = optarg; + break; + case 'v': + verbose++; + break; + case 'a': + alt_work_area++; + break; + case 'u': + discard_urls->Add(optarg, NULL); + break; + case '?': + usage(); + break; + } + } + + HtConfiguration* config= HtConfiguration::config(); + config->Defaults(&defaults[0]); + + if (access((char*)configfile, R_OK) < 0) + { + reportError(form("Unable to find configuration file '%s'", + configfile.get())); + } + + config->Read(configfile); + + // + // Check url_part_aliases and common_url_parts for + // errors. + String url_part_errors = HtURLCodec::instance()->ErrMsg(); + + if (url_part_errors.length() != 0) + reportError(form("Invalid url_part_aliases or common_url_parts: %s", + url_part_errors.get())); + + if (alt_work_area != 0) + { + String configValue; + + configValue = config->Find("word_db"); + if (configValue.length() != 0) + { + configValue << ".work"; + config->Add("word_db", configValue); + } + + configValue = config->Find("doc_db"); + if (configValue.length() != 0) + { + configValue << ".work"; + config->Add("doc_db", configValue); + } + + configValue = config->Find("doc_index"); + if (configValue.length() != 0) + { + configValue << ".work"; + config->Add("doc_index", configValue); + } + + configValue = config->Find("doc_excerpt"); + if (configValue.length() != 0) + { + configValue << ".work"; + config->Add("doc_excerpt", configValue); + } + } + + if (optind < ac && strcmp(av[optind], "-") == 0) + { + String str; + while (!cin.eof()) + { + cin >> str; + str.chop("\r\n"); + if (str.length() > 0) + discard_urls->Add(str, NULL); + } + } + + WordContext::Initialize(*config); + + // We pass in our list of URLs (which may be empty) + // and we get back the list of IDs purged from the doc DB + // to make sure words with these IDs are purged + discard_ids = purgeDocs(discard_urls); + delete discard_urls; + discard_urls = 0; + + purgeWords(discard_ids); + delete discard_ids; + discard_ids = 0; + + return 0; +} + +//***************************************************************************** +// Dictionary purgeDocs(Dictionary &purgeURLs) +// Pass in a hash of the URLs to delete (it could be empty) +// Return a hash of the IDs deleted from the doc DB +// +Dictionary *purgeDocs(Dictionary *purgeURLs) +{ + HtConfiguration* config= HtConfiguration::config(); + const String doc_db = config->Find("doc_db"); + const String doc_index = config->Find("doc_index"); + const String doc_excerpt = config->Find("doc_excerpt"); + int remove_unused; + int remove_unretrieved; + DocumentDB db; + List *IDs; + int document_count = 0; + Dictionary *discard_list = new Dictionary; + + // + // Start the conversion by going through all the URLs that are in + // the document database + // + if(db.Open(doc_db, doc_index, doc_excerpt) != OK) + return discard_list; // It's empty right now + + IDs = db.DocIDs(); + + if (IDs->Count() == 0) + reportError("Database is empty!"); + + IDs->Start_Get(); + IntObject *id; + String idStr; + String url; + URL u_url; + + while ((id = (IntObject *) IDs->Get_Next())) + { + DocumentRef *ref = db[id->Value()]; + + if (!ref) + continue; + + db.ReadExcerpt(*ref); + url = ref->DocURL(); + u_url = URL((char *)url); + + remove_unused = config->Boolean("server", u_url.host() ,"remove_bad_urls"); + remove_unretrieved = config->Boolean("server", u_url.host(), "remove_unretrieved_urls"); + idStr = 0; + idStr << id->Value(); + + if (ref->DocState() == Reference_noindex) + { + // This document either wasn't found or shouldn't be indexed. + db.Delete(ref->DocID()); + if (verbose) + cout << "Deleted, noindex: ID: " << idStr << " URL: " + << url << endl; + discard_list->Add(idStr.get(), NULL); + } + else if (ref->DocState() == Reference_obsolete) + { + // This document was replaced by a newer one + db.Delete(ref->DocID()); + if (verbose) + cout << "Deleted, obsolete: ID: " << idStr << " URL: " + << url << endl; + discard_list->Add(idStr.get(), NULL); + } + else if (remove_unused && ref->DocState() == Reference_not_found) + { + // This document wasn't actually found + db.Delete(ref->DocID()); + if (verbose) + cout << "Deleted, not found: ID: " << idStr << " URL: " + << url << endl; + discard_list->Add(idStr.get(), NULL); + } + else if (remove_unused && strlen(ref->DocHead()) == 0 + && ref->DocAccessed() != 0) + { + // For some reason, this document was retrieved, but doesn't + // have an excerpt (probably because of a noindex directive) + db.Delete(ref->DocID()); + if (verbose) + cout << "Deleted, no excerpt: ID: " << idStr << " URL: " + << url << endl; + discard_list->Add(idStr.get(), NULL); + } + else if (remove_unretrieved && ref->DocAccessed() == 0) + { + // This document has not been retrieved + db.Delete(ref->DocID()); + if (verbose) + cout << "Deleted, never retrieved: ID: " << idStr << " URL: " + << url << endl; + discard_list->Add(idStr.get(), NULL); + } + else if (purgeURLs->Exists(url)) + { + // This document has been marked to be purged by the user + db.Delete(ref->DocID()); + if (verbose) + cout << "Deleted, marked by user input: ID: " << idStr << " URL: " + << url << endl; + discard_list->Add(idStr.get(), NULL); + } + else + { + // This is a valid document. Let's keep stats on it. + if (verbose > 1) + cout << "ID: " << idStr << " URL: " << url << endl; + + document_count++; + if (verbose && document_count % 10 == 0) + { + cout << "htpurge: " << document_count << '\n'; + cout.flush(); + } + } + delete ref; + } + if (verbose) + cout << "\n"; + + delete IDs; + db.Close(); + + return discard_list; +} + +// +// Callback data dedicated to Dump and dump_word communication +// +class DeleteWordData : public Object +{ +public: + DeleteWordData(const Dictionary& discard_arg) : discard(discard_arg) { deleted = remains = 0; } + + const Dictionary& discard; + int deleted; + int remains; +}; + +//***************************************************************************** +// +// +static int delete_word(WordList *words, WordDBCursor& cursor, const WordReference *word_arg, Object &data) +{ + const HtWordReference *word = (const HtWordReference *)word_arg; + DeleteWordData& d = (DeleteWordData&)data; + static String docIDStr; + + docIDStr = 0; + docIDStr << word->DocID(); + + if(d.discard.Exists(docIDStr)) { + if(words->Delete(cursor) != 0) { + cerr << "htpurge: deletion of " << (char*)word->Get() << " failed " << strerror(errno) << "\n"; + return NOTOK; + } + if (verbose) + { + cout << "htpurge: Discarding "; + if(verbose > 2) + cout << (char*)word->Get(); + else + cout << word->Word(); + cout << "\n"; + cout.flush(); + } + d.deleted++; + } else { + d.remains++; + } + + return OK; +} + +//***************************************************************************** +// void purgeWords(Dictionary *discard_list) +// +void purgeWords(Dictionary *discard_list) +{ + HtConfiguration* config= HtConfiguration::config(); + HtWordList words(*config); + DeleteWordData data(*discard_list); + + words.Open(config->Find("word_db"), O_RDWR); + WordCursor* search = words.Cursor(delete_word, &data); + search->Walk(); + delete search; + + words.Close(); + + if (verbose) + cout << "\n"; + +} + +//***************************************************************************** +// void usage() +// Display program usage information +// +void usage() +{ + cout << "usage: htpurge [-][-u url][-v][-a][-c configfile]\n"; + cout << "This program is part of ht://Dig " << VERSION << "\n\n"; + cout << "Options:\n"; + cout << "\t-\tURL input. Read in a list of URLs to remove, one per line.\n\n"; + cout << "\t-u\tURL input. Add this url to the list of URLs to remove.\n"; + cout << "\t\t(can be specified multiple times)\n\n"; + cout << "\t-v\tVerbose mode. This increases the verbosity of the\n"; + cout << "\t\tprogram. Using more than 2 is probably only useful\n"; + cout << "\t\tfor debugging purposes. The default verbose mode\n"; + cout << "\t\tgives a progress on what it is doing and where it is.\n\n"; + cout << "\t-a\tUse alternate work files.\n"; + cout << "\t\tTells htpurge to append .work to the database files \n"; + cout << "\t\tallowing it to operate on a second set of databases.\n\n"; + cout << "\t-c configfile\n"; + cout << "\t\tUse the specified configuration file instead on the\n"; + cout << "\t\tdefault.\n\n"; + exit(0); +} + + +//***************************************************************************** +// Report an error and die +// +void reportError(char *msg) +{ + cout << "htpurge: " << msg << "\n\n"; + exit(1); +} diff --git a/debian/htdig/htdig-3.2.0b6/httools/htstat.cc b/debian/htdig/htdig-3.2.0b6/httools/htstat.cc new file mode 100644 index 00000000..b7cc5790 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/httools/htstat.cc @@ -0,0 +1,200 @@ +// +// htstat.cc +// +// htstat: A utility to give statistics on the contents of the word and doc DB. +// +// Part of the ht://Dig package <http://www.htdig.org/> +// Copyright (c) 1999-2004 The ht://Dig Group +// For copyright details, see the file COPYING in your distribution +// or the GNU Library General Public License (LGPL) version 2 or later +// <http://www.gnu.org/copyleft/lgpl.html> +// +// $Id: htstat.cc,v 1.6 2004/05/28 13:15:25 lha Exp $ +// +#ifdef HAVE_CONFIG_H +#include "htconfig.h" +#endif /* HAVE_CONFIG_H */ + +#include "WordContext.h" +#include "HtURLCodec.h" +#include "HtWordList.h" +#include "HtConfiguration.h" +#include "DocumentDB.h" +#include "defaults.h" + +#include <errno.h> + +#ifndef _MSC_VER /* _WIN32 */ +#include <unistd.h> +#endif + +// If we have this, we probably want it. +#ifdef HAVE_GETOPT_H +#include <getopt.h> +#elif HAVE_GETOPT_LOCAL +#include <getopt_local.h> +#endif + +int verbose = 0; + +void usage(); +void reportError(char *msg); + +//***************************************************************************** +// int main(int ac, char **av) +// +int main(int ac, char **av) +{ + int alt_work_area = 0; + int url_list = 0; + String configfile = DEFAULT_CONFIG_FILE; + int c; + extern char *optarg; + + while ((c = getopt(ac, av, "vc:au")) != -1) + { + switch (c) + { + case 'c': + configfile = optarg; + break; + case 'v': + verbose++; + break; + case 'a': + alt_work_area++; + break; + case 'u': + url_list++; + break; + case '?': + usage(); + break; + } + } + + HtConfiguration* config= HtConfiguration::config(); + config->Defaults(&defaults[0]); + + if (access((char*)configfile, R_OK) < 0) + { + reportError(form("Unable to find configuration file '%s'", + configfile.get())); + } + + config->Read(configfile); + + // + // Check url_part_aliases and common_url_parts for + // errors. + String url_part_errors = HtURLCodec::instance()->ErrMsg(); + + if (url_part_errors.length() != 0) + reportError(form("Invalid url_part_aliases or common_url_parts: %s", + url_part_errors.get())); + + + // We may need these through the methods we call + if (alt_work_area != 0) + { + String configValue; + + configValue = config->Find("word_db"); + if (configValue.length() != 0) + { + configValue << ".work"; + config->Add("word_db", configValue); + } + + configValue = config->Find("doc_db"); + if (configValue.length() != 0) + { + configValue << ".work"; + config->Add("doc_db", configValue); + } + + configValue = config->Find("doc_index"); + if (configValue.length() != 0) + { + configValue << ".work"; + config->Add("doc_index", configValue); + } + + configValue = config->Find("doc_excerpt"); + if (configValue.length() != 0) + { + configValue << ".work"; + config->Add("doc_excerpt", configValue); + } + } + + DocumentDB docs; + if (docs.Read(config->Find("doc_db"), config->Find("doc_index"), + config->Find("doc_excerpt")) == OK) + { + List *urls = docs.URLs(); + cout << "htstat: Total documents: " << urls->Count() << endl; + if (url_list) + { + // Spit out the list of URLs too + String *url; + + cout << "htstat: URLs in database: " << endl; + urls->Start_Get(); + while ((url = (String *) urls->Get_Next())) + { + cout << "\t" << url->get() << endl; + } + } + + delete urls; + docs.Close(); + } + + // Initialize htword + WordContext::Initialize(*config); + + HtWordList words(*config); + if(words.Open(config->Find("word_db"), O_RDONLY) == OK) + { + cout << "htstat: Total words: " << words.WordRefs()->Count() << endl; + cout << "htstat: Total unique words: " << words.Words()->Count() << endl; + words.Close(); + } + + return 0; +} + + +//***************************************************************************** +// void usage() +// Display program usage information +// +void usage() +{ + cout << "usage: htstat [-v][-a][-c configfile][-u]\n"; + cout << "This program is part of ht://Dig " << VERSION << "\n\n"; + cout << "Options:\n"; + cout << "\t-v\tVerbose mode. This increases the verbosity of the\n"; + cout << "\t\tprogram. Using more than 2 is probably only useful\n"; + cout << "\t\tfor debugging purposes. The default verbose mode\n"; + cout << "\t\tgives a progress on what it is doing and where it is.\n\n"; + cout << "\t-a\tUse alternate work files.\n"; + cout << "\t\tTells htstat to append .work to the database files \n"; + cout << "\t\tallowing it to operate on a second set of databases.\n"; + cout << "\t-c configfile\n"; + cout << "\t\tUse the specified configuration file instead on the\n"; + cout << "\t\tdefault.\n\n"; + cout << "\t-u\tGive a list of URLs in the document database.\n\n"; + exit(0); +} + + +//***************************************************************************** +// Report an error and die +// +void reportError(char *msg) +{ + cout << "htstat: " << msg << "\n\n"; + exit(1); +} |