diff options
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/libhtdig/libhtdig_api.h')
-rw-r--r-- | debian/htdig/htdig-3.2.0b6/libhtdig/libhtdig_api.h | 614 |
1 files changed, 614 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/libhtdig/libhtdig_api.h b/debian/htdig/htdig-3.2.0b6/libhtdig/libhtdig_api.h new file mode 100644 index 00000000..5b915e39 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/libhtdig/libhtdig_api.h @@ -0,0 +1,614 @@ +//---------------------------------------------------------------- +// +// libhtdig_api.h +// +// Header function for htdig shared library API +// +// 1/25/2002 created +// +// Neal Richter [email protected] +// +// Part of the ht://Dig package <http://www.htdig.org/> +// Copyright (c) 1995-2004 The ht://Dig Group +// For copyright details, see the file COPYING in your distribution +// or the GNU Library General Public License (LGPL) version 2 or later or later +// <http://www.gnu.org/copyleft/lgpl.html> +// +// $Id: libhtdig_api.h,v 1.4 2004/05/28 13:15:29 lha Exp $ +// +//---------------------------------------------------------------- + +#ifndef LIBHTDIG_API_H +#define LIBHTDIG_API_H + +#include <time.h> + +#ifndef TRUE +#define TRUE 1 +#endif + +#ifndef FALSE +#define FALSE 0 +#endif + + +#define HTDIG_MAX_FILENAME_PATH_L 1024 +#define HTDIG_DOCUMENT_ID_L 32 +#define HTDIG_DOCUMENT_TITLE_L 256 +#define HTDIG_DOCUMENT_META_L 4096 +#define HTDIG_DOCUMENT_CONTENT_TYPE_L 32 +#define HTDIG_DOCUMENT_EXCERPT_L 1024 +//make sure HTDIG_DOCUMENT_EXCERPT_L is more than config 'excerpt_length' + +//default failsafe size of 'excerpt' document +//make sure it's more than config 'max_head_length' +#define HTDIG_DEFAULT_EXCERPT_SIZE 524288 + +//should be the same as the default value in HTDIG +#define HTDIG_MAX_QUERY_L 256 + + +#define HTDIG_CUSTOM_TEXT_MIME_TYPE "text/vnd.customdocument" + +//htfuzzy +#define HTDIG_ALG_ACCENTS 0x00000100 //"accents" +#define HTDIG_ALG_ACCENTS_STR "accents" + +#define HTDIG_ALG_ENDINGS 0x00001000 //"endings" +#define HTDIG_ALG_ENDINGS_STR "endings" + +#define HTDIG_ALG_METAPHONE 0x00000010 //"metaphone" +#define HTDIG_ALG_METAPHONE_STR "metaphone" + +#define HTDIG_ALG_SOUNDEX 0x00000001 //"soundex" +#define HTDIG_ALG_SOUNDEX_STR "soundex" + +#define HTDIG_ALG_SYNONYMS 0x00010000 //"synonyms" +#define HTDIG_ALG_SYNONYMS_STR "synonyms" + + +//searching +#define HTSEARCH_ALG_AND 0x00000100 //"and" +#define HTSEARCH_ALG_AND_STR "and" + +#define HTSEARCH_ALG_BOOLEAN 0x00000001 //"boolean" +#define HTSEARCH_ALG_BOOLEAN_STR "boolean" + +#define HTSEARCH_ALG_OR 0x00000010 //"or" +#define HTSEARCH_ALG_OR_STR "or" + + +#define HTSEARCH_FORMAT_LONG 0x00000001 //"long" +#define HTSEARCH_FORMAT_LONG_STR "long" + +#define HTSEARCH_FORMAT_SHORT 0x00000010 //"short" +#define HTSEARCH_FORMAT_SHORT_STR "short" + + +#define HTSEARCH_SORT_SCORE 0x00000001 //"score" +#define HTSEARCH_SORT_SCORE_STR "score" + +#define HTSEARCH_SORT_REV_SCORE 0x00000010 //"reverse score" +#define HTSEARCH_SORT_REV_SCORE_STR "reverse score" + +#define HTSEARCH_SORT_TIME 0x00000100 //"time" +#define HTSEARCH_SORT_TIME_STR "time" + +#define HTSEARCH_SORT_REV_TIME 0x00001000 //"reverse time" +#define HTSEARCH_SORT_REV_TIME_STR "reverse time" + +#define HTSEARCH_SORT_TITLE 0x00010000 //"title" +#define HTSEARCH_SORT_TITLE_STR "title" + +#define HTSEARCH_SORT_REV_TITLE 0x00100000 //"reverse title" +#define HTSEARCH_SORT_REV_TITLE_STR "reverse title" + + + +#define HTDIG_ERROR_CONFIG_READ -101 +#define HTDIG_ERROR_URL_PART -102 +#define HTDIG_ERROR_URL_REWRITE -103 +#define HTDIG_ERROR_URL_CREATE_FILE -104 +#define HTDIG_ERROR_IMAGE_CREATE_FILE -105 +#define HTDIG_ERROR_OPEN_CREATE_DOCDB -106 +#define HTDIG_ERROR_LOGFILE_OPEN -107 +#define HTDIG_ERROR_LOGFILE_CLOSE -108 + +#define HTDIG_ERROR_TESTURL_EXCLUDE -109 +#define HTDIG_ERROR_TESTURL_BADQUERY -110 +#define HTDIG_ERROR_TESTURL_EXTENSION -111 +#define HTDIG_ERROR_TESTURL_EXTENSION2 -112 +#define HTDIG_ERROR_TESTURL_LIMITS -113 +#define HTDIG_ERROR_TESTURL_LIMITSNORM -114 +#define HTDIG_ERROR_TESTURL_SRCH_RESTRICT -115 +#define HTDIG_ERROR_TESTURL_SRCH_EXCLUDE -116 +#define HTDIG_ERROR_TESTURL_REWRITE_EMPTY -117 +#define HTDIG_ERROR_TESTURL_ROBOT_FORBID -118 + +#define HTSEARCH_ERROR_NO_MATCH -201 +#define HTSEARCH_ERROR_BAD_MATCH_INDEX -202 +#define HTSEARCH_ERROR_BAD_DOCUMENT -203 +#define HTSEARCH_ERROR_TEMPLATE_ERROR -204 +#define HTSEARCH_ERROR_LOGFILE_OPEN -205 +#define HTSEARCH_ERROR_LOGFILE_CLOSE -206 +#define HTSEARCH_ERROR_CONFIG_READ -207 +#define HTSEARCH_ERROR_URL_PART -208 +#define HTSEARCH_ERROR_WORDDB_READ -209 +#define HTSEARCH_ERROR_DOCINDEX_READ -210 +#define HTSEARCH_ERROR_DOCDB_READ -211 +#define HTSEARCH_ERROR_EXCERPTDB_READ -212 + +#define HTMERGE_ERROR_LOGFILE_OPEN -301 +#define HTMERGE_ERROR_LOGFILE_CLOSE -302 +#define HTMERGE_ERROR_CONFIG_READ -303 +#define HTMERGE_ERROR_URL_PART -304 +#define HTMERGE_ERROR_WORDDB_READ -305 +#define HTMERGE_ERROR_DOCINDEX_READ -306 +#define HTMERGE_ERROR_DOCDB_READ -307 +#define HTMERGE_ERROR_EXCERPTDB_READ -308 + +#define PHP_HTDIG_CONFIGFILE_PARM "configFile" +#define PHP_HTDIG_URL_PARM "URL" +#define PHP_HTDIG_LIMITTO_PARM "limit_urls_to" +#define PHP_HTDIG_LIMITN_PARM "limit_normalized" +#define PHP_HTDIG_EXCLUDEURLS_PARM "exclude_urls" +#define PHP_HTDIG_SEARCHRESTRICT_PARM "search_restrict" +#define PHP_HTDIG_SEARCHEXCLUDE_PARM "search_exclude" +#define PHP_HTDIG_MAXHOPCOUNT_PARM "max_hop_cont" +#define PHP_HTDIG_URLREWRITE_PARM "url_rewrite_rules" +#define PHP_HTDIG_BAD_QUERYSTR_PARM "bad_querystr" + +//============================================================================= +//===== HTDIG INDEXING API ==================================================== + + +/*************************************************** + * HTDIG_DOCUMENTATION for htdig_parameters_struct + * + * DEBUGGING PARAMETERS + * + * int debug + * Verbose mode. This increases the verbosity of the + * program. Using more than 2 is probably only useful + * for debugging purposes. The default verbose mode + * gives a nice progress report while digging. + * + * char logFile + * File to stream debugging & error messages to! + * + * BOOLEAN PARAMETERS + * + * int initial + * Initial. Do not use any old databases. This is + * accomplished by first erasing the databases + * + * int create_text_database + * Create an ASCII version of the document database. + * This database is easy to parse with other programs so + * that information can be extracted from it. + * + * int report_statistics + * Report statistics after completion. + * + * int alt_work_area + * Use alternate work files. + * Tells htdig to append .work to database files, causing + * a second copy of the database to be built. This allows + * the original files to be used by htsearch during the + * indexing run. + * + * + * STRING PARAMETERS + * + * char configFile + * configfile + * Use the specified configuration file instead of the + * default. + * + * char credentials + * username:password + * Tells htdig to send the supplied username and + * password with each HTTP request. The credentials + * will be encoded using the 'Basic' authentication scheme. + * There *HAS* to be a colon (:) between the username + * and password. + * + * + * char maxhops //9 digit limit + * hopcount + * Limit the stored documents to those which are at + * most hopcount links away from the start URL. + * + * char minimalFile + * + * char URL + * 'command-line' URLs from stdin + * fetches & indexes these URLs + * + ******************************************************************/ + +typedef struct htdig_parameters_struct { + + char configFile[HTDIG_MAX_FILENAME_PATH_L]; + char DBpath[HTDIG_MAX_FILENAME_PATH_L]; + char credentials[HTDIG_MAX_FILENAME_PATH_L]; + char max_hops[10]; //9 digit limit + char minimalFile[HTDIG_MAX_FILENAME_PATH_L]; + + //debugging & logfile + char logFile[HTDIG_MAX_FILENAME_PATH_L]; //location of log file + int debug; //0, 1 ,2, 3, 4, 5 + + //booelan values + int initial; + int create_text_database; + int report_statistics; + int alt_work_area; + int use_cookies; + + //spidering filters + char URL[HTDIG_MAX_FILENAME_PATH_L]; + char limit_urls_to[HTDIG_MAX_FILENAME_PATH_L]; + char limit_normalized[HTDIG_MAX_FILENAME_PATH_L]; + char exclude_urls[HTDIG_MAX_FILENAME_PATH_L]; + char search_restrict[HTDIG_MAX_FILENAME_PATH_L]; + char search_exclude[HTDIG_MAX_FILENAME_PATH_L]; + char url_rewrite_rules[HTDIG_MAX_FILENAME_PATH_L]; + char bad_querystr[HTDIG_MAX_FILENAME_PATH_L]; + char locale[16]; + char title_factor[16]; + char text_factor[16]; + char meta_description_factor[16]; + int max_hop_count; + + //the rewritten URL - OUTGOING after htdig_index_test_url + char rewritten_URL[HTDIG_MAX_FILENAME_PATH_L]; + +} htdig_parameters_struct; + +/***************************************************************** + * HTDIG_DOCUMENTATION for htdig_simple_doc_struct + * + * STRING PARAMETERS + * + * char location + * the 'URL' of the document. Can be any usefull string. + * + * char documentid + * document id of document [NOT CURRENTLY USED - IGNORED] + * + * char title + * document title + * + * char meta + * content that is indexed but won appear in an search excerpts + * + * char * contents + * pointer to a NULL TERMINATED string on information to be + * indexed. + * + * char content_type + * a MIME-like string + * custom MIME-type defined above, others are supported by + * htdig as well. + * + * + *****************************************************************/ + +typedef struct htdig_simple_doc_struct { + + char location[HTDIG_MAX_FILENAME_PATH_L]; + char documentid[HTDIG_DOCUMENT_ID_L]; + char title[HTDIG_DOCUMENT_TITLE_L]; + char meta[HTDIG_DOCUMENT_META_L]; + char *contents; //MUST ALLOCATE & FREE!!! + char content_type[HTDIG_DOCUMENT_CONTENT_TYPE_L]; //MIME-ISH string + //struct tm time_tm; // use to override index time + time_t doc_time; + +} htdig_simple_doc_struct; + + +int htdig_index_open(htdig_parameters_struct *); +int htdig_index_simple_doc(htdig_simple_doc_struct * ); +int htdig_index_urls(void); +int htdig_index_reset(void); +int htdig_index_close(void); + +int htdig_index_test_url(htdig_parameters_struct *htparms); + +int htdig_get_max_head_length(void); + + + + +//============================================================================= +//===== HTDIG MERGING API ===================================================== + +/************************************************** + * HTDIG_DOCUMENTATION for htmerge_parameters_struct + * + * DEBUGGING PARAMETERS + * + * int debug + * Verbose mode. This increases the verbosity of the + * program. Using more than 2 is probably only useful + * for debugging purposes. The default verbose mode + * gives a progress on what it is doing and where it is. + * + * char logFile + * File to stream debugging & error messages to! + * + * + * BOOLEAN PARAMETERS + * + * int alt_work_area + * Use alternate work files. + * Tells htmerge to append .work to database files causing + * a second copy of the database to be built. This allows + * original files to be used by htsearch during the indexing run. + * + * + * STRING PARAMETERS + * + * char configFile + * configfile + * Use the specified configuration file instead of the default. + * + * char merge_configFile + * merge_configfile + * Merge the databases specified into the databases specified + * by -c or the default. + * + * + *************************************************/ + +typedef struct htmerge_parameters_struct { + + char configFile[HTDIG_MAX_FILENAME_PATH_L]; + char merge_configFile[HTDIG_MAX_FILENAME_PATH_L]; + + //debugging & logfile + char logFile[HTDIG_MAX_FILENAME_PATH_L]; //location of log file + int debug; //0, 1 ,2, 3, 4, 5 + + //booelan values + int alt_work_area; + +} htmerge_parameters_struct; + +int htmerge_index_merge(htmerge_parameters_struct *); + + + + + +//============================================================================= +//===== HTDIG HTFUZZY API ===================================================== + + + +/************************************************** + * HTDIG_DOCUMENTATION for htfuzzy_parameters_struct + * + * DEBUGGING PARAMETERS + * + * int debug + * Verbose mode. This increases the verbosity of the + * program. Using more than 2 is probably only useful + * for debugging purposes. + * + * char logFile + * File to stream debugging & error messages to! + * + * + * PARAMETERS + * + * char configFile + * configfile + * Use the specified configuration file instead of the default. + * + * int algorithms_flag + * Bitwise Flags to signal algorithms to be used + * + * soundex == HTDIG_ALG_SOUNDEX + * metaphone == HTDIG_ALG_METAPHONE + * accents == HTDIG_ALG_ACCENTS + * endings == HTDIG_ALG_ENDINGS + * synonyms == HTDIG_ALG_SYNONYMS + * + ***************************************************/ + + +typedef struct htfuzzy_parameters_struct { + + char configFile[HTDIG_MAX_FILENAME_PATH_L]; + int algorithms_flag; + + //debugging & logfile + char logFile[HTDIG_MAX_FILENAME_PATH_L]; //location of log file + int debug; //0, 1 ,2, 3, 4, 5 + + //booelan values + +} htfuzzy_parameters_struct; + + +// htfuzzy functions +int htfuzzy_index(htfuzzy_parameters_struct *); + + + + +//============================================================================== +//===== HTDIG SEARCHING API ==================================================== + +/************************************************ + * HTDIG_DOCUMENTATION for htsearch_parameters_struct + * + * DEBUGGING PARAMETERS + * + * int debug + * Verbose mode. This increases the verbosity of the; + * program. Using more than 2 is probably only useful; + * for debugging purposes. The default verbose mode; + * gives a progress on what it is doing and where it is.; + * + * char logFile + * File to stream debugging & error messages to! + * + * STRING PARAMETERS + * + * char configFile + * configfile + * Use the specified configuration file instead of the default. + * + * + **************************************************/ + +typedef struct htsearch_parameters_struct { + + char configFile[HTDIG_MAX_FILENAME_PATH_L]; + char DBpath[HTDIG_MAX_FILENAME_PATH_L]; + char locale[16]; + + //debugging & logfile + char logFile[HTDIG_MAX_FILENAME_PATH_L]; //location of log file + int debug; //0, 1 ,2, 3, 4, 5 + + //filters + char search_restrict[HTDIG_MAX_FILENAME_PATH_L]; + char search_exclude[HTDIG_MAX_FILENAME_PATH_L]; + char title_factor[16]; + char text_factor[16]; + char meta_description_factor[16]; + +} htsearch_parameters_struct; + + + + +/***************************************************************** + * HTDIG_DOCUMENTATION for htsearch_query_struct + * + * STRING PARAMETERS + * + * char raw_query + * STRING of text that is the search query -- syntax is important + * + * INTEGER PARAMETERS + * + * int algorithms_flag [ALSO CALLED 'method' IN HTDIG] + * HTSEARCH_ALG_BOOLEAN + * HTSEARCH_ALG_OR + * HTSEARCH_ALG_AND + * + * int sortby_flag + * score, date, title & reversed + * HTSEARCH_SORT_SCORE + * HTSEARCH_SORT_REV_SCORE + * HTSEARCH_SORT_TIME + * HTSEARCH_SORT_REV_TIME + * HTSEARCH_SORT_TITLE + * HTSEARCH_SORT_REV_TITLE + * + * int format + * short, long (with excerpt) + * HTSEARCH_FORMAT_LONG + * HTSEARCH_FORMAT_SHORT + * + * + * + * TODO: 'Connect' these htsearch features to this API + * + * config + * Specifies the name of the configuration file. + * + * exclude + * This value is a pattern that specifies which URLs are to be excluded from + * the search results. + * + * keywords + * Used to specify a list of required words that have to be in the documents. + * + * restrict + * This value is a pattern that all URLs of the search results will have to + * match. + * + * startyear, startmonth, startday, endyear, endmonth, endday + * These values specify the allowed range of document modification dates + * allowed in the search results. + * + * + * + *****************************************************************/ + +typedef struct htsearch_query_struct { + + char raw_query[HTDIG_MAX_QUERY_L]; + + int algorithms_flag; + int sortby_flag; + int format; + +} htsearch_query_struct; + + +/***************************************************************** + * HTDIG_DOCUMENTATION for htsearch_query_match_struct + * + * STRING PARAMETERS + * + * char title + * Title of document returned + * + * char URL + * URL/location-string of document returned + * + * char excerpt + * Excerpt with search words highlighted with + * <strong>searchword</strong> + * + * INTEGER PARAMETERS + * + * int score + * score in 'number of stars' + * [MAX NUMBER OF STARS DECLARED IN CONFIG FILE] + * + * int score_percent //top result is 100% + * + * time_t time [DOCUMENT TIME] + * struct tm time_tm [DOCUMENT TIME] + * int size [TOTAL DOCUMENT SIZE] + * + * + *****************************************************************/ + +typedef struct htsearch_query_match_struct { + + char title[HTDIG_DOCUMENT_TITLE_L]; + char URL[HTDIG_MAX_FILENAME_PATH_L]; + char excerpt[HTDIG_DOCUMENT_EXCERPT_L]; + int score; + int score_percent; //top result is 100% + struct tm time_tm; + int size; + +} htsearch_query_match_struct; + + +// htsearch functions + +int htsearch_open(htsearch_parameters_struct *); +int htsearch_query(htsearch_query_struct *); + +int htsearch_get_nth_match(int, htsearch_query_match_struct *); +int htsearch_close(); + +//htsearch_free(indicator) + +char * htsearch_get_error(); + + +#endif /* LIBHTDIG_API_H */ + |