summaryrefslogtreecommitdiffstats
path: root/debian/htdig/htdig-3.2.0b6/libhtdig/libhtdig_api.h
diff options
context:
space:
mode:
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/libhtdig/libhtdig_api.h')
-rw-r--r--debian/htdig/htdig-3.2.0b6/libhtdig/libhtdig_api.h614
1 files changed, 614 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/libhtdig/libhtdig_api.h b/debian/htdig/htdig-3.2.0b6/libhtdig/libhtdig_api.h
new file mode 100644
index 00000000..5b915e39
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/libhtdig/libhtdig_api.h
@@ -0,0 +1,614 @@
+//----------------------------------------------------------------
+//
+// libhtdig_api.h
+//
+// Header function for htdig shared library API
+//
+// 1/25/2002 created
+//
+// Neal Richter [email protected]
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: libhtdig_api.h,v 1.4 2004/05/28 13:15:29 lha Exp $
+//
+//----------------------------------------------------------------
+
+#ifndef LIBHTDIG_API_H
+#define LIBHTDIG_API_H
+
+#include <time.h>
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+
+#define HTDIG_MAX_FILENAME_PATH_L 1024
+#define HTDIG_DOCUMENT_ID_L 32
+#define HTDIG_DOCUMENT_TITLE_L 256
+#define HTDIG_DOCUMENT_META_L 4096
+#define HTDIG_DOCUMENT_CONTENT_TYPE_L 32
+#define HTDIG_DOCUMENT_EXCERPT_L 1024
+//make sure HTDIG_DOCUMENT_EXCERPT_L is more than config 'excerpt_length'
+
+//default failsafe size of 'excerpt' document
+//make sure it's more than config 'max_head_length'
+#define HTDIG_DEFAULT_EXCERPT_SIZE 524288
+
+//should be the same as the default value in HTDIG
+#define HTDIG_MAX_QUERY_L 256
+
+
+#define HTDIG_CUSTOM_TEXT_MIME_TYPE "text/vnd.customdocument"
+
+//htfuzzy
+#define HTDIG_ALG_ACCENTS 0x00000100 //"accents"
+#define HTDIG_ALG_ACCENTS_STR "accents"
+
+#define HTDIG_ALG_ENDINGS 0x00001000 //"endings"
+#define HTDIG_ALG_ENDINGS_STR "endings"
+
+#define HTDIG_ALG_METAPHONE 0x00000010 //"metaphone"
+#define HTDIG_ALG_METAPHONE_STR "metaphone"
+
+#define HTDIG_ALG_SOUNDEX 0x00000001 //"soundex"
+#define HTDIG_ALG_SOUNDEX_STR "soundex"
+
+#define HTDIG_ALG_SYNONYMS 0x00010000 //"synonyms"
+#define HTDIG_ALG_SYNONYMS_STR "synonyms"
+
+
+//searching
+#define HTSEARCH_ALG_AND 0x00000100 //"and"
+#define HTSEARCH_ALG_AND_STR "and"
+
+#define HTSEARCH_ALG_BOOLEAN 0x00000001 //"boolean"
+#define HTSEARCH_ALG_BOOLEAN_STR "boolean"
+
+#define HTSEARCH_ALG_OR 0x00000010 //"or"
+#define HTSEARCH_ALG_OR_STR "or"
+
+
+#define HTSEARCH_FORMAT_LONG 0x00000001 //"long"
+#define HTSEARCH_FORMAT_LONG_STR "long"
+
+#define HTSEARCH_FORMAT_SHORT 0x00000010 //"short"
+#define HTSEARCH_FORMAT_SHORT_STR "short"
+
+
+#define HTSEARCH_SORT_SCORE 0x00000001 //"score"
+#define HTSEARCH_SORT_SCORE_STR "score"
+
+#define HTSEARCH_SORT_REV_SCORE 0x00000010 //"reverse score"
+#define HTSEARCH_SORT_REV_SCORE_STR "reverse score"
+
+#define HTSEARCH_SORT_TIME 0x00000100 //"time"
+#define HTSEARCH_SORT_TIME_STR "time"
+
+#define HTSEARCH_SORT_REV_TIME 0x00001000 //"reverse time"
+#define HTSEARCH_SORT_REV_TIME_STR "reverse time"
+
+#define HTSEARCH_SORT_TITLE 0x00010000 //"title"
+#define HTSEARCH_SORT_TITLE_STR "title"
+
+#define HTSEARCH_SORT_REV_TITLE 0x00100000 //"reverse title"
+#define HTSEARCH_SORT_REV_TITLE_STR "reverse title"
+
+
+
+#define HTDIG_ERROR_CONFIG_READ -101
+#define HTDIG_ERROR_URL_PART -102
+#define HTDIG_ERROR_URL_REWRITE -103
+#define HTDIG_ERROR_URL_CREATE_FILE -104
+#define HTDIG_ERROR_IMAGE_CREATE_FILE -105
+#define HTDIG_ERROR_OPEN_CREATE_DOCDB -106
+#define HTDIG_ERROR_LOGFILE_OPEN -107
+#define HTDIG_ERROR_LOGFILE_CLOSE -108
+
+#define HTDIG_ERROR_TESTURL_EXCLUDE -109
+#define HTDIG_ERROR_TESTURL_BADQUERY -110
+#define HTDIG_ERROR_TESTURL_EXTENSION -111
+#define HTDIG_ERROR_TESTURL_EXTENSION2 -112
+#define HTDIG_ERROR_TESTURL_LIMITS -113
+#define HTDIG_ERROR_TESTURL_LIMITSNORM -114
+#define HTDIG_ERROR_TESTURL_SRCH_RESTRICT -115
+#define HTDIG_ERROR_TESTURL_SRCH_EXCLUDE -116
+#define HTDIG_ERROR_TESTURL_REWRITE_EMPTY -117
+#define HTDIG_ERROR_TESTURL_ROBOT_FORBID -118
+
+#define HTSEARCH_ERROR_NO_MATCH -201
+#define HTSEARCH_ERROR_BAD_MATCH_INDEX -202
+#define HTSEARCH_ERROR_BAD_DOCUMENT -203
+#define HTSEARCH_ERROR_TEMPLATE_ERROR -204
+#define HTSEARCH_ERROR_LOGFILE_OPEN -205
+#define HTSEARCH_ERROR_LOGFILE_CLOSE -206
+#define HTSEARCH_ERROR_CONFIG_READ -207
+#define HTSEARCH_ERROR_URL_PART -208
+#define HTSEARCH_ERROR_WORDDB_READ -209
+#define HTSEARCH_ERROR_DOCINDEX_READ -210
+#define HTSEARCH_ERROR_DOCDB_READ -211
+#define HTSEARCH_ERROR_EXCERPTDB_READ -212
+
+#define HTMERGE_ERROR_LOGFILE_OPEN -301
+#define HTMERGE_ERROR_LOGFILE_CLOSE -302
+#define HTMERGE_ERROR_CONFIG_READ -303
+#define HTMERGE_ERROR_URL_PART -304
+#define HTMERGE_ERROR_WORDDB_READ -305
+#define HTMERGE_ERROR_DOCINDEX_READ -306
+#define HTMERGE_ERROR_DOCDB_READ -307
+#define HTMERGE_ERROR_EXCERPTDB_READ -308
+
+#define PHP_HTDIG_CONFIGFILE_PARM "configFile"
+#define PHP_HTDIG_URL_PARM "URL"
+#define PHP_HTDIG_LIMITTO_PARM "limit_urls_to"
+#define PHP_HTDIG_LIMITN_PARM "limit_normalized"
+#define PHP_HTDIG_EXCLUDEURLS_PARM "exclude_urls"
+#define PHP_HTDIG_SEARCHRESTRICT_PARM "search_restrict"
+#define PHP_HTDIG_SEARCHEXCLUDE_PARM "search_exclude"
+#define PHP_HTDIG_MAXHOPCOUNT_PARM "max_hop_cont"
+#define PHP_HTDIG_URLREWRITE_PARM "url_rewrite_rules"
+#define PHP_HTDIG_BAD_QUERYSTR_PARM "bad_querystr"
+
+//=============================================================================
+//===== HTDIG INDEXING API ====================================================
+
+
+/***************************************************
+ * HTDIG_DOCUMENTATION for htdig_parameters_struct
+ *
+ * DEBUGGING PARAMETERS
+ *
+ * int debug
+ * Verbose mode. This increases the verbosity of the
+ * program. Using more than 2 is probably only useful
+ * for debugging purposes. The default verbose mode
+ * gives a nice progress report while digging.
+ *
+ * char logFile
+ * File to stream debugging & error messages to!
+ *
+ * BOOLEAN PARAMETERS
+ *
+ * int initial
+ * Initial. Do not use any old databases. This is
+ * accomplished by first erasing the databases
+ *
+ * int create_text_database
+ * Create an ASCII version of the document database.
+ * This database is easy to parse with other programs so
+ * that information can be extracted from it.
+ *
+ * int report_statistics
+ * Report statistics after completion.
+ *
+ * int alt_work_area
+ * Use alternate work files.
+ * Tells htdig to append .work to database files, causing
+ * a second copy of the database to be built. This allows
+ * the original files to be used by htsearch during the
+ * indexing run.
+ *
+ *
+ * STRING PARAMETERS
+ *
+ * char configFile
+ * configfile
+ * Use the specified configuration file instead of the
+ * default.
+ *
+ * char credentials
+ * username:password
+ * Tells htdig to send the supplied username and
+ * password with each HTTP request. The credentials
+ * will be encoded using the 'Basic' authentication scheme.
+ * There *HAS* to be a colon (:) between the username
+ * and password.
+ *
+ *
+ * char maxhops //9 digit limit
+ * hopcount
+ * Limit the stored documents to those which are at
+ * most hopcount links away from the start URL.
+ *
+ * char minimalFile
+ *
+ * char URL
+ * 'command-line' URLs from stdin
+ * fetches & indexes these URLs
+ *
+ ******************************************************************/
+
+typedef struct htdig_parameters_struct {
+
+ char configFile[HTDIG_MAX_FILENAME_PATH_L];
+ char DBpath[HTDIG_MAX_FILENAME_PATH_L];
+ char credentials[HTDIG_MAX_FILENAME_PATH_L];
+ char max_hops[10]; //9 digit limit
+ char minimalFile[HTDIG_MAX_FILENAME_PATH_L];
+
+ //debugging & logfile
+ char logFile[HTDIG_MAX_FILENAME_PATH_L]; //location of log file
+ int debug; //0, 1 ,2, 3, 4, 5
+
+ //booelan values
+ int initial;
+ int create_text_database;
+ int report_statistics;
+ int alt_work_area;
+ int use_cookies;
+
+ //spidering filters
+ char URL[HTDIG_MAX_FILENAME_PATH_L];
+ char limit_urls_to[HTDIG_MAX_FILENAME_PATH_L];
+ char limit_normalized[HTDIG_MAX_FILENAME_PATH_L];
+ char exclude_urls[HTDIG_MAX_FILENAME_PATH_L];
+ char search_restrict[HTDIG_MAX_FILENAME_PATH_L];
+ char search_exclude[HTDIG_MAX_FILENAME_PATH_L];
+ char url_rewrite_rules[HTDIG_MAX_FILENAME_PATH_L];
+ char bad_querystr[HTDIG_MAX_FILENAME_PATH_L];
+ char locale[16];
+ char title_factor[16];
+ char text_factor[16];
+ char meta_description_factor[16];
+ int max_hop_count;
+
+ //the rewritten URL - OUTGOING after htdig_index_test_url
+ char rewritten_URL[HTDIG_MAX_FILENAME_PATH_L];
+
+} htdig_parameters_struct;
+
+/*****************************************************************
+ * HTDIG_DOCUMENTATION for htdig_simple_doc_struct
+ *
+ * STRING PARAMETERS
+ *
+ * char location
+ * the 'URL' of the document. Can be any usefull string.
+ *
+ * char documentid
+ * document id of document [NOT CURRENTLY USED - IGNORED]
+ *
+ * char title
+ * document title
+ *
+ * char meta
+ * content that is indexed but won appear in an search excerpts
+ *
+ * char * contents
+ * pointer to a NULL TERMINATED string on information to be
+ * indexed.
+ *
+ * char content_type
+ * a MIME-like string
+ * custom MIME-type defined above, others are supported by
+ * htdig as well.
+ *
+ *
+ *****************************************************************/
+
+typedef struct htdig_simple_doc_struct {
+
+ char location[HTDIG_MAX_FILENAME_PATH_L];
+ char documentid[HTDIG_DOCUMENT_ID_L];
+ char title[HTDIG_DOCUMENT_TITLE_L];
+ char meta[HTDIG_DOCUMENT_META_L];
+ char *contents; //MUST ALLOCATE & FREE!!!
+ char content_type[HTDIG_DOCUMENT_CONTENT_TYPE_L]; //MIME-ISH string
+ //struct tm time_tm; // use to override index time
+ time_t doc_time;
+
+} htdig_simple_doc_struct;
+
+
+int htdig_index_open(htdig_parameters_struct *);
+int htdig_index_simple_doc(htdig_simple_doc_struct * );
+int htdig_index_urls(void);
+int htdig_index_reset(void);
+int htdig_index_close(void);
+
+int htdig_index_test_url(htdig_parameters_struct *htparms);
+
+int htdig_get_max_head_length(void);
+
+
+
+
+//=============================================================================
+//===== HTDIG MERGING API =====================================================
+
+/**************************************************
+ * HTDIG_DOCUMENTATION for htmerge_parameters_struct
+ *
+ * DEBUGGING PARAMETERS
+ *
+ * int debug
+ * Verbose mode. This increases the verbosity of the
+ * program. Using more than 2 is probably only useful
+ * for debugging purposes. The default verbose mode
+ * gives a progress on what it is doing and where it is.
+ *
+ * char logFile
+ * File to stream debugging & error messages to!
+ *
+ *
+ * BOOLEAN PARAMETERS
+ *
+ * int alt_work_area
+ * Use alternate work files.
+ * Tells htmerge to append .work to database files causing
+ * a second copy of the database to be built. This allows
+ * original files to be used by htsearch during the indexing run.
+ *
+ *
+ * STRING PARAMETERS
+ *
+ * char configFile
+ * configfile
+ * Use the specified configuration file instead of the default.
+ *
+ * char merge_configFile
+ * merge_configfile
+ * Merge the databases specified into the databases specified
+ * by -c or the default.
+ *
+ *
+ *************************************************/
+
+typedef struct htmerge_parameters_struct {
+
+ char configFile[HTDIG_MAX_FILENAME_PATH_L];
+ char merge_configFile[HTDIG_MAX_FILENAME_PATH_L];
+
+ //debugging & logfile
+ char logFile[HTDIG_MAX_FILENAME_PATH_L]; //location of log file
+ int debug; //0, 1 ,2, 3, 4, 5
+
+ //booelan values
+ int alt_work_area;
+
+} htmerge_parameters_struct;
+
+int htmerge_index_merge(htmerge_parameters_struct *);
+
+
+
+
+
+//=============================================================================
+//===== HTDIG HTFUZZY API =====================================================
+
+
+
+/**************************************************
+ * HTDIG_DOCUMENTATION for htfuzzy_parameters_struct
+ *
+ * DEBUGGING PARAMETERS
+ *
+ * int debug
+ * Verbose mode. This increases the verbosity of the
+ * program. Using more than 2 is probably only useful
+ * for debugging purposes.
+ *
+ * char logFile
+ * File to stream debugging & error messages to!
+ *
+ *
+ * PARAMETERS
+ *
+ * char configFile
+ * configfile
+ * Use the specified configuration file instead of the default.
+ *
+ * int algorithms_flag
+ * Bitwise Flags to signal algorithms to be used
+ *
+ * soundex == HTDIG_ALG_SOUNDEX
+ * metaphone == HTDIG_ALG_METAPHONE
+ * accents == HTDIG_ALG_ACCENTS
+ * endings == HTDIG_ALG_ENDINGS
+ * synonyms == HTDIG_ALG_SYNONYMS
+ *
+ ***************************************************/
+
+
+typedef struct htfuzzy_parameters_struct {
+
+ char configFile[HTDIG_MAX_FILENAME_PATH_L];
+ int algorithms_flag;
+
+ //debugging & logfile
+ char logFile[HTDIG_MAX_FILENAME_PATH_L]; //location of log file
+ int debug; //0, 1 ,2, 3, 4, 5
+
+ //booelan values
+
+} htfuzzy_parameters_struct;
+
+
+// htfuzzy functions
+int htfuzzy_index(htfuzzy_parameters_struct *);
+
+
+
+
+//==============================================================================
+//===== HTDIG SEARCHING API ====================================================
+
+/************************************************
+ * HTDIG_DOCUMENTATION for htsearch_parameters_struct
+ *
+ * DEBUGGING PARAMETERS
+ *
+ * int debug
+ * Verbose mode. This increases the verbosity of the;
+ * program. Using more than 2 is probably only useful;
+ * for debugging purposes. The default verbose mode;
+ * gives a progress on what it is doing and where it is.;
+ *
+ * char logFile
+ * File to stream debugging & error messages to!
+ *
+ * STRING PARAMETERS
+ *
+ * char configFile
+ * configfile
+ * Use the specified configuration file instead of the default.
+ *
+ *
+ **************************************************/
+
+typedef struct htsearch_parameters_struct {
+
+ char configFile[HTDIG_MAX_FILENAME_PATH_L];
+ char DBpath[HTDIG_MAX_FILENAME_PATH_L];
+ char locale[16];
+
+ //debugging & logfile
+ char logFile[HTDIG_MAX_FILENAME_PATH_L]; //location of log file
+ int debug; //0, 1 ,2, 3, 4, 5
+
+ //filters
+ char search_restrict[HTDIG_MAX_FILENAME_PATH_L];
+ char search_exclude[HTDIG_MAX_FILENAME_PATH_L];
+ char title_factor[16];
+ char text_factor[16];
+ char meta_description_factor[16];
+
+} htsearch_parameters_struct;
+
+
+
+
+/*****************************************************************
+ * HTDIG_DOCUMENTATION for htsearch_query_struct
+ *
+ * STRING PARAMETERS
+ *
+ * char raw_query
+ * STRING of text that is the search query -- syntax is important
+ *
+ * INTEGER PARAMETERS
+ *
+ * int algorithms_flag [ALSO CALLED 'method' IN HTDIG]
+ * HTSEARCH_ALG_BOOLEAN
+ * HTSEARCH_ALG_OR
+ * HTSEARCH_ALG_AND
+ *
+ * int sortby_flag
+ * score, date, title & reversed
+ * HTSEARCH_SORT_SCORE
+ * HTSEARCH_SORT_REV_SCORE
+ * HTSEARCH_SORT_TIME
+ * HTSEARCH_SORT_REV_TIME
+ * HTSEARCH_SORT_TITLE
+ * HTSEARCH_SORT_REV_TITLE
+ *
+ * int format
+ * short, long (with excerpt)
+ * HTSEARCH_FORMAT_LONG
+ * HTSEARCH_FORMAT_SHORT
+ *
+ *
+ *
+ * TODO: 'Connect' these htsearch features to this API
+ *
+ * config
+ * Specifies the name of the configuration file.
+ *
+ * exclude
+ * This value is a pattern that specifies which URLs are to be excluded from
+ * the search results.
+ *
+ * keywords
+ * Used to specify a list of required words that have to be in the documents.
+ *
+ * restrict
+ * This value is a pattern that all URLs of the search results will have to
+ * match.
+ *
+ * startyear, startmonth, startday, endyear, endmonth, endday
+ * These values specify the allowed range of document modification dates
+ * allowed in the search results.
+ *
+ *
+ *
+ *****************************************************************/
+
+typedef struct htsearch_query_struct {
+
+ char raw_query[HTDIG_MAX_QUERY_L];
+
+ int algorithms_flag;
+ int sortby_flag;
+ int format;
+
+} htsearch_query_struct;
+
+
+/*****************************************************************
+ * HTDIG_DOCUMENTATION for htsearch_query_match_struct
+ *
+ * STRING PARAMETERS
+ *
+ * char title
+ * Title of document returned
+ *
+ * char URL
+ * URL/location-string of document returned
+ *
+ * char excerpt
+ * Excerpt with search words highlighted with
+ * <strong>searchword</strong>
+ *
+ * INTEGER PARAMETERS
+ *
+ * int score
+ * score in 'number of stars'
+ * [MAX NUMBER OF STARS DECLARED IN CONFIG FILE]
+ *
+ * int score_percent //top result is 100%
+ *
+ * time_t time [DOCUMENT TIME]
+ * struct tm time_tm [DOCUMENT TIME]
+ * int size [TOTAL DOCUMENT SIZE]
+ *
+ *
+ *****************************************************************/
+
+typedef struct htsearch_query_match_struct {
+
+ char title[HTDIG_DOCUMENT_TITLE_L];
+ char URL[HTDIG_MAX_FILENAME_PATH_L];
+ char excerpt[HTDIG_DOCUMENT_EXCERPT_L];
+ int score;
+ int score_percent; //top result is 100%
+ struct tm time_tm;
+ int size;
+
+} htsearch_query_match_struct;
+
+
+// htsearch functions
+
+int htsearch_open(htsearch_parameters_struct *);
+int htsearch_query(htsearch_query_struct *);
+
+int htsearch_get_nth_match(int, htsearch_query_match_struct *);
+int htsearch_close();
+
+//htsearch_free(indicator)
+
+char * htsearch_get_error();
+
+
+#endif /* LIBHTDIG_API_H */
+