1 files changed, 4379 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/htcommon/defaults.xml b/debian/htdig/htdig-3.2.0b6/htcommon/defaults.xml
new file mode 100644
index 00000000..f3fd2eb7
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htcommon/defaults.xml
@@ -0,0 +1,4379 @@
+<!DOCTYPE HtdigAttributes SYSTEM "defaults.dtd" >
+<HtdigAttributes>
+   <attribute name="accents_db" 
+              type="string" 
+              programs="htfuzzy htsearch" 
+              version="all" 
+              category="File Layout" >
+     <default>${database_base}.accents.db</default>
+     <example>${database_base}.uml.db</example>
+     <description> 
+	The database file used for the fuzzy "accents" search 
+	algorithm. This database is created by 
+	<ref type="program">htfuzzy</ref> and used by 
+	<ref type="program">htsearch</ref>. 
+     </description>
+   </attribute>
+
+   <attribute name="accept_language" 
+              type="string_list" 
+              programs="htdig" 
+              version="3.2.0b4" 
+              category="Indexing:Out" 
+              block="Server" >
+     <default></default>
+     <example>en-us en it</example>
+     <description> 
+	This attribute allows you to restrict the set of natural languages 
+	that are preferred as a response to an HTTP request performed by the 
+	digger. This can be done by putting one or more language tags 
+	(as defined by RFC 1766) in the preferred order, separated by spaces. 
+	By doing this, when the server performs a content negotiation based 
+	on the 'accept-language' given by the HTTP user agent, a different 
+	content can be shown depending on the value of this attribute. If 
+	set to an empty list, no language will be sent and the server default 
+	will be returned. 
+     </description>
+   </attribute>
+
+   <attribute name="add_anchors_to_excerpt" 
+              type="boolean" 
+              programs="htsearch" 
+              version="3.1.0" 
+              category="Presentation:How" >
+     <default>true</default>
+     <example>no</example>
+     <description> 
+	If set to true, the first occurrence of each matched 
+	word in the excerpt will be linked to the closest 
+	anchor in the document. This only has effect if the 
+	<strong>EXCERPT</strong> variable is used in the output 
+	template and the excerpt is actually going to be displayed. 
+     </description>
+   </attribute>
+
+   <attribute name="allow_double_slash" 
+              type="boolean" 
+              programs="htdig" 
+              version="3.2.0b4" 
+              category="Indexing:Out" >
+     <default>false</default>
+     <example>true</example>
+     <description> 
+	If set to true, strings of multiple slashes ('/') in URL paths 
+	will be left intact, rather than being collapsed. This is necessary 
+	for some search engine URLs which use slashes to separate fields rather 
+	than to separate directory components.  However, it can lead to multiple database 
+	entries refering to the same file, and it causes '/foo//../' to 
+	be equivalent to '/foo/', rather than to '/'. 
+     </description>
+   </attribute>
+
+   <attribute name="allow_in_form" 
+              type="string_list" 
+              programs="htsearch" 
+              version="3.1.0" 
+              category="Searching:UI" >
+     <default></default>
+     <example>search_algorithm search_results_header</example>
+     <description>	Allows the specified config file attributes to be specified
+	in search forms as separate fields. This could be used to
+	allow form writers to design their own headers and footers
+	and specify them in the search form. Another example would
+	be to offer a menu of search_algorithms in the form.
+	<codeblock>
+	&lt;SELECT NAME="search_algorithm"&gt;
+	&lt;OPTION VALUE="exact:1 prefix:0.6 synonyms:0.5 endings:0.1" SELECTED&gt;fuzzy
+	&lt;OPTION VALUE="exact:1"&gt;exact
+	&lt;/SELECT&gt;
+	</codeblock>
+	The general idea behind this is to make an input parameter out
+	of any configuration attribute that's not already automatically
+	handled by an input parameter. You can even make up your own
+	configuration attribute names, for purposes of passing data from
+	the search form to the results output. You're not restricted to
+	the existing attribute names. The attributes listed in the
+	allow_in_form list will be settable in the search form using
+	input parameters of the same name, and will be propagated to
+	the follow-up search form in the results template using template
+	variables of the same name in upper-case.
+	You can also make select lists out of any of these input
+	parameters, in the follow-up search form, using the
+	<ref type="attr">build_select_lists</ref>
+	configuration attribute.
+</description>
+   </attribute>
+
+   <attribute name="allow_numbers" 
+              type="boolean" 
+              programs="htdig htsearch" 
+              version="all" 
+              category="Indexing:What" >
+     <default>false</default>
+     <example>true</example>
+     <description> 
+	If set to true, numbers are considered words. This 
+	means that searches can be done on number as well as 
+	regular words. All the same rules apply to numbers as 
+	to words. See the description of 
+	<ref type="attr">valid_punctuation</ref> for the 
+	rules used to determine what a word is. 
+     </description>
+   </attribute>
+
+   <attribute name="allow_space_in_url" 
+              type="boolean" 
+              programs="htdig" 
+              version="3.2.0b6" 
+              category="Indexing:Where" >
+     <default>false</default>
+     <example>true</example>
+     <description> 
+	If set to true, htdig will handle URLs that contain
+	embedded spaces. Technically, this is a violation of
+	<em>RFC 2396</em>, which says spaces should be stripped out
+	(as htdig does by default).  However, many web browsers
+	and HTML code generators violate this standard already,
+	so enabling this attribute allows htdig to handle these
+	non-compliant URLs.  Even with this attribute set, htdig
+	still strips out all white space (leading, trailing and
+	embedded), except that space characters embedded within
+	the URL will be encoded as %20.
+     </description>
+   </attribute>
+
+   <attribute name="allow_virtual_hosts" 
+              type="boolean" 
+              programs="htdig" 
+              version="3.0.8b2" 
+              category="Indexing:Where" >
+     <default>true</default>
+     <example>false</example>
+     <description> 
+	If set to true, htdig will index virtual web sites as 
+	expected. If false, all URL host names will be 
+	normalized into whatever the DNS server claims the IP 
+	address to map to. If this option is set to false, 
+	there is no way to index either "soft" or "hard" 
+	virtual web sites. 
+     </description>
+   </attribute>
+
+   <attribute name="anchor_target" 
+              type="string" 
+              programs="htdig" 
+              version="3.1.6" 
+              category="Presentation:How" >
+     <default></default>
+     <example>body</example>
+     <description> 
+	When the first matched word in the excerpt is linked 
+	to the closest anchor in the document, this string 
+	can be set to specify a target in the link so the 
+	resulting page is displayed in the desired frame. 
+	This value will only be used if the 
+	<ref type="attr">add_anchors_to_excerpt</ref> 
+	attribute is set to true, the <strong>EXCERPT</strong> 
+	variable is used in the output template and the 
+	excerpt is actually displayed with a link. 
+     </description>
+   </attribute>
+
+   <attribute name="any_keywords" 
+              type="boolean" 
+              programs="htsearch" 
+              version="3.2.0b2" 
+              category="Searching:Method" >
+     <default>false</default>
+     <example>yes</example>
+     <description> 
+	If set to true, the words in the <strong>keywords</strong> 
+	input parameter in the search form will be joined with logical 
+	ORs rather than ANDs, so that any of the words provided will do. 
+	Note that this has nothing to do with limiting the search to 
+	words in META keywords tags. See the <a href="hts_form.html"> 
+	search form</a> documentation for details on this. 
+     </description>
+   </attribute>
+
+   <attribute name="author_factor" 
+              type="number" 
+              programs="htsearch" 
+              version="??" 
+              category="Searching:Ranking" >
+     <default>1</default>
+     <example>1</example>
+     <description> 
+	TO BE COMPLETED<br/> 
+	See also <ref type="attr">heading_factor</ref>. 
+     </description>
+   </attribute>
+
+   <attribute name="authorization" 
+              type="string" 
+              programs="htdig" 
+              version="3.1.4" 
+              category="Indexing:Out" 
+              block="URL" >
+     <default></default>
+     <example>mypassword</example>
+     <description> 
+	This tells htdig to send the supplied 
+	<em>username</em><strong>:</strong><em>password</em> with each HTTP request. 
+	The credentials will be encoded using the "Basic" authentication 
+	scheme. There <em>must</em> be a colon (:) between the username and 
+	password.<br/> 
+	This attribute can also be specified on htdig's command line using 
+	the -u option, and will be blotted out so it won't show up in a 
+	process listing. If you use it directly in a configuration file, 
+	be sure to protect it so it is readable only by you, and do not 
+	use that same configuration file for htsearch. 
+     </description>
+   </attribute>
+
+   <attribute name="backlink_factor" 
+              type="number" 
+              programs="htsearch" 
+              version="3.1.0" 
+              category="Searching:Ranking" >
+     <default>1000</default>
+     <example>501.1</example>
+     <description> 
+	This is a weight of "how important" a page is, based on 
+	the number of URLs pointing to it. It's actually 
+	multiplied by the ratio of the incoming URLs (backlinks) 
+	and outgoing URLs (links on the page), to balance out pages 
+	with lots of links to pages that link back to them. The ratio 
+	gives lower weight to "link farms", which often have many 
+	links to them.  This factor can 
+	be changed without changing the database in any way. 
+	However, setting this value to something other than 0 
+	incurs a slowdown on search results. 
+     </description>
+   </attribute>
+
+   <attribute name="bad_extensions" 
+              type="string_list" 
+              programs="htdig" 
+              version="all" 
+              category="Indexing:Where" 
+              block="URL" >
+     <default>.wav .gz .z .sit .au .zip .tar .hqx .exe .com .gif .jpg .jpeg .aiff .class .map .ram .tgz .bin .rpm .mpg .mov .avi .css</default>
+     <example>.foo .bar .bad</example>
+     <description> 
+	This is a list of extensions on URLs which are 
+	considered non-parsable. This list is used mainly to 
+	supplement the MIME-types that the HTTP server provides 
+	with documents. Some HTTP servers do not have a correct 
+	list of MIME-types and so can advertise certain 
+	documents as text while they are some binary format. 
+	If the list is empty, then all extensions are acceptable, 
+	provided they pass other criteria for acceptance or rejection. 
+	See also <ref type="attr">valid_extensions</ref>. 
+     </description>
+   </attribute>
+
+   <attribute name="bad_querystr" 
+              type="pattern_list" 
+              programs="htdig" 
+              version="3.1.0" 
+              category="Indexing:Where" 
+              block="URL" >
+     <default></default>
+     <example>forum=private section=topsecret&amp;passwd=required</example>
+     <description> 
+	This is a list of CGI query strings to be excluded from 
+	indexing. This can be used in conjunction with CGI-generated 
+	portions of a website to control which pages are 
+	indexed. 
+     </description>
+   </attribute>
+
+   <attribute name="bad_word_list" 
+              type="string" 
+              programs="htdig htsearch" 
+              version="all" 
+              category="Indexing:What,Searching:Method" >
+     <default>${common_dir}/bad_words</default>
+     <example>${common_dir}/badwords.txt</example>
+     <description> 
+	This specifies a file which contains words which should 
+	be excluded when digging or searching. This list should 
+	include the most common words or other words that you 
+	don't want to be able to search on (things like <em> 
+	sex</em> or <em>smut</em> are examples of these.)<br/> 
+	The file should contain one word per line. A sample 
+	bad words file is located in the <code>contrib/examples</code> 
+	directory. 
+     </description>
+   </attribute>
+
+   <attribute name="bin_dir" 
+              type="string" 
+              programs="all" 
+              version="all" 
+              category="File Layout" >
+     <default configmacro="true">BIN_DIR</default>
+     <example>/usr/local/bin</example>
+     <description> 
+	This is the directory in which the executables 
+	related to ht://Dig are installed. It is never used 
+	directly by any of the programs, but other attributes 
+	can be defined in terms of this one. 
+	<p> 
+	The default value of this attribute is determined at 
+	compile time. 
+	</p> 
+     </description>
+   </attribute>
+
+   <attribute name="boolean_keywords" 
+              type="string list" 
+              programs="htsearch" 
+              version="3.1.6" 
+	      category="Presentation:How" >
+     <default configmacro="true">and or not</default>
+     <example>et ou non</example>
+     <description> 
+	These three strings are used as the keywords used in
+	constructing the LOGICAL_WORDS template variable,
+	and in parsing the <a href="hts_form.html#words">words</a> input
+	parameter when the <a href="hts_form.html#method">method</a> parameter
+	or <ref type="attr">match_method</ref> attribute
+	is set to <code>boolean</code>.
+	See also the <ref type="attr">boolean_syntax_errors</ref> attribute.
+     </description>
+   </attribute>
+
+   <attribute name="boolean_syntax_errors" 
+              type="quoted string list" 
+              programs="htsearch" 
+              version="3.1.6" 
+	      category="Presentation:How" >
+     <default configmacro="true">Expected 'a search word, a quoted phrase, a boolean expression between ()' 'at the end' 'instead of' 'end of expression' quotes</default>
+     <example> Attendait "un mot" "&agrave; la fin" "au lieu de" "fin d'expression" "points de quotation" </example>
+     <description> 
+	These six strings are used as the keywords used to
+	construct various syntax error messages for errors encountered in
+	parsing the <a href="hts_form.html#words">words</a> input
+	parameter when the <a href="hts_form.html#method">method</a> parameter
+	or <ref type="attr">match_method</ref> attribute
+	is set to <code>boolean</code>.
+	They are used in conjunction with the
+	<ref type="attr">boolean_keywords</ref> attribute, and comprise all
+	English-specific parts of these error messages.  The order in which
+	the strings are put together may not be ideal, or even gramatically
+	correct, for all languages, but they can be used to make fairly
+	intelligible messages in many languages.
+     </description>
+   </attribute>
+
+   <attribute name="build_select_lists" 
+              type="quoted_string_list" 
+              programs="htsearch" 
+              version="3.2.0b1" 
+              category="Searching:UI" >
+     <default></default>
+     <example>MATCH_LIST matchesperpage matches_per_page_list \ 
+				1 1 1 matches_per_page "Previous Amount" \ 
+		RESTRICT_LIST,multiple restrict restrict_names 2 1 2 restrict "" \ 
+		FORMAT_LIST,radio format template_map 3 2 1 template_name ""</example>
+     <description> 
+	This list allows you to define any htsearch input parameter as 
+	a select list for use in templates, provided you also define 
+	the corresponding name list attribute which enumerates all the 
+	choices to put in the list. It can be used for existing input 
+	parameters, as well as any you define using the 
+	<ref type="attr">allow_in_form</ref> 
+	attribute. The entries in this list each consist of an octuple, 
+	a set of eight strings defining the variables and how they are to 
+	be used to build a select list. The attribute can contain many 
+	of these octuples. The strings in the string list are merely 
+	taken eight at a time. For each octuple of strings specified in 
+	build_select_lists, the elements have the following meaning:  
+	<ol> 
+	   <li>the name of the template variable to be defined as a list, 
+	   optionally followed by a comma and the type of list, and 
+	   optional formatting codes</li> 
+	   <li>the input parameter name that the select list will set</li>  
+	   <li>the name of the user-defined attribute containing the 
+	   name list</li> 
+	   <li>the tuple size used in the name list above</li>  
+	   <li>the index into a name list tuple for the value</li>  
+	   <li>the index for the corresponding label on the selector</li> 
+	   <li>the configuration attribute where the default value for 
+	   this input parameter is defined</li> 
+	   <li>the default label, if not an empty string, which will be 
+	   used as the label for an additional list item for the current 
+	   input parameter value if it doesn't match any value in the 
+	   given list</li> 
+	</ol> 
+	See the <a href="hts_selectors.html">select list documentation</a> 
+	for more information on this attribute. 
+     </description>
+   </attribute>
+
+   <attribute name="caps_factor" 
+              type="number" 
+              programs="htsearch" 
+              version="??" 
+              category="Searching:Ranking" >
+     <default>1</default>
+     <example>1</example>
+     <description> 
+	TO BE COMPLETED<br/> 
+	See also <ref type="attr">heading_factor</ref>. 
+     </description>
+   </attribute>
+
+   <attribute name="case_sensitive" 
+              type="boolean" 
+              programs="htdig" 
+              version="3.1.0b2" 
+              category="Indexing:Where" >
+     <default>true</default>
+     <example>false</example>
+     <description> 
+	This specifies whether ht://Dig should consider URLs 
+	case-sensitive or not. If your server is case-insensitive, 
+	you should probably set this to false. 
+     </description>
+   </attribute>
+
+   <attribute name="check_unique_date" 
+              type="boolean" 
+              programs="htdig" 
+              version="3.2.0b3" 
+              category="" 
+              block="Global" >
+     <default>false</default>
+     <example>false</example>
+     <description> 
+	Include the modification date of the page in the MD5 hash, to reduce the 
+	problem with identical but physically separate pages in different parts of the tree pointing to 
+	different pages.  
+     </description>
+   </attribute>
+
+   <attribute name="check_unique_md5" 
+              type="boolean" 
+              programs="htdig" 
+              version="3.2.0b3" 
+              category="" 
+              block="Global" >
+     <default>false</default>
+     <example>false</example>
+     <description> 
+	Uses the MD5 hash of pages to reject aliases, prevents multiple entries 
+	in the index caused by such things as symbolic links 
+	Note: May not do the right thing for incremental update 
+     </description>
+   </attribute>
+
+   <attribute name="collection_names" 
+              type="string_list" 
+              programs="htsearch" 
+              version="3.2.0b2" 
+              category="" >
+     <default></default>
+     <example>htdig_docs htdig_bugs</example>
+     <description> 
+	This is a list of config file names that are used for searching multiple databases. 
+	Simply put, htsearch will loop through the databases specified by each of these config 
+	files and present the result of the search on all of the databases. 
+	The corresponding config files are looked up in the <ref type="attr">config_dir</ref> directory. 
+	Each listed config file <strong>must</strong> exist, as well as the corresponding databases. 
+     </description>
+   </attribute>
+
+   <attribute name="common_dir" 
+              type="string" 
+              programs="all" 
+              version="all" 
+              category="File Layout" >
+     <default configmacro="true">COMMON_DIR</default>
+     <example>/tmp</example>
+     <description> 
+	Specifies the directory for files that will or can be 
+	shared among different search databases. The default 
+	value for this attribute is defined at compile time. 
+     </description>
+   </attribute>
+
+   <attribute name="common_url_parts" 
+              type="string_list" 
+              programs="all" 
+              version="3.1.0" 
+              category="URLs" >
+     <default>http:// http://www. ftp:// ftp://ftp. /pub/ .html .htm .gif .jpg .jpeg /index.html /index.htm .com/ .com mailto:</default>
+     <example>//www.htdig.org/ml/ \ 
+.html \ 
+http://dev.htdig.org/ \ 
+http://www.htdig.org/</example>
+     <description> 
+	Sub-strings often found in URLs stored in the 
+	database.  These are replaced in the database by an 
+	internal space-saving encoding.  If a string 
+	specified in <ref type="attr">url_part_aliases</ref>, 
+	overlaps any string in common_url_parts, the 
+	common_url_parts string is ignored.<br/> 
+	Note that when this attribute is changed, the 
+	database should be rebuilt, unless the effect of 
+	"changing" the affected URLs in the database is 
+	wanted.<br/> 
+     </description>
+   </attribute>
+
+   <attribute name="compression_level" 
+              type="integer" 
+              programs="htdig" 
+              version="3.1.0" 
+              category="Indexing:How" >
+     <default>0</default>
+     <example>6</example>
+     <description> 
+	If specified and the <a 
+	href="http://www.cdrom.com/pub/infozip/zlib/">zlib</a> 
+	compression library was available when compiled, 
+	this attribute controls 
+	the amount of compression used in the <ref type="attr">doc_excerpt</ref> file. 
+     </description>
+   </attribute>
+
+   <attribute name="config" 
+              type="string" 
+              programs="all" 
+              version="??" 
+              category="File Layout" >
+     <default configmacro="true">DEFAULT_CONFIG_FILE</default>
+     <example></example>
+     <description> 
+	Name of configuration file to load. 
+	For security reasons, restrictions are placed on the values which 
+	can be specified on the command line to 
+	<ref type="program">htsearch</ref>. 
+	The default value of this attribute is determined at 
+	compile time. 
+     </description>
+   </attribute>
+
+   <attribute name="config_dir" 
+              type="string" 
+              programs="all" 
+              version="all" 
+              category="File Layout" >
+     <default configmacro="true">CONFIG_DIR</default>
+     <example>/var/htdig/conf</example>
+     <description> 
+	This is the directory which contains all configuration 
+	files related to ht://Dig. It is never used 
+	directly by any of the programs, but other attributes 
+	or the <ref type="attr">include</ref> directive 
+	can be defined in terms of this one. 
+	<p> 
+	The default value of this attribute is determined at 
+	compile time. 
+	</p> 
+     </description>
+   </attribute>
+
+   <attribute name="cookies_input_file" 
+              type="string" 
+              programs="htdig" 
+              version="3.2.0b4" 
+              category="Indexing:Connection" >
+     <default></default>
+     <example>${common_dir}/cookies.txt</example>
+     <description> 
+	Specifies the location of the file used for importing cookies
+	for the crawl. These cookies will be preloaded into htdig's
+	in-memory cookie jar, but aren't written back to the file.
+	Cookies are specified according to Netscape's format
+	(tab-separated fields). If this attribute is left blank,
+	no cookie file will be read.
+	<p> 
+	For more information, see the sample cookies.txt file in the
+	ht://Dig source distribution.
+	</p> 
+     </description>
+   </attribute>
+
+   <attribute name="create_image_list" 
+              type="boolean" 
+              programs="htdig" 
+              version="all" 
+              category="Extra Output" >
+     <default>false</default>
+     <example>yes</example>
+     <description> 
+	If set to true, a file with all the image URLs that 
+	were seen will be created, one URL per line. This list 
+	will not be in any order and there will be lots of 
+	duplicates, so after htdig has completed, it should be 
+	piped through <code>sort -u</code> to get a unique list. 
+     </description>
+   </attribute>
+
+   <attribute name="create_url_list" 
+              type="boolean" 
+              programs="htdig" 
+              version="all" 
+              category="Extra Output" >
+     <default>false</default>
+     <example>yes</example>
+     <description> 
+	If set to true, a file with all the URLs that were seen 
+	will be created, one URL per line. This list will not 
+	be in any order and there will be lots of duplicates, 
+	so after htdig has completed, it should be piped 
+	through <code>sort -u</code> to get a unique list. 
+     </description>
+   </attribute>
+
+   <attribute name="database_base" 
+              type="string" 
+              programs="all" 
+              version="all" 
+              category="File Layout" >
+     <default>${database_dir}/db</default>
+     <example>${database_dir}/sales</example>
+     <description> 
+	This is the common prefix for files that are specific 
+	to a search database. Many different attributes use 
+	this prefix to specify filenames. Several search 
+	databases can share the same directory by just changing 
+	this value for each of the databases. 
+     </description>
+   </attribute>
+
+   <attribute name="database_dir" 
+              type="string" 
+              programs="all" 
+              version="all" 
+              category="File Layout" >
+     <default configmacro="true">DATABASE_DIR</default>
+     <example>/var/htdig</example>
+     <description> 
+	This is the directory which contains all database and 
+	other files related to ht://Dig. It is never used 
+	directly by any of the programs, but other attributes 
+	are defined in terms of this one. 
+	<p> 
+	The default value of this attribute is determined at 
+	compile time. 
+	</p> 
+     </description>
+   </attribute>
+
+   <attribute name="date_factor" 
+              type="number" 
+              programs="htsearch" 
+              version="3.1.0" 
+              category="Searching:Ranking" >
+     <default>0</default>
+     <example>0.35</example>
+     <description> 
+	This factor, gives higher 
+	rankings to newer documents and lower rankings to older 
+	documents. Before setting this factor, it's advised to 
+	make sure your servers are returning accurate dates 
+	(check the dates returned in the long format). 
+	Additionally, setting this to a nonzero value incurs a 
+	small performance hit on searching. 
+     </description>
+   </attribute>
+
+   <attribute name="date_format" 
+              type="string" 
+              programs="htsearch" 
+              version="3.1.2" 
+              category="Presentation:How" >
+     <default></default>
+     <example>%Y-%m-%d</example>
+     <description> 
+	This format string determines the output format for 
+	modification dates of documents in the search results. 
+	It is interpreted by your system's <em>strftime</em> 
+	function. Please refer to your system's manual page 
+	for this function, for a description of available 
+	format codes. If this format string is empty, as it 
+	is by default,  
+	<ref type="program">htsearch</ref> 
+	will pick a format itself. In this case, the <ref type="attr">iso_8601</ref> attribute can be used 
+	to modify the appearance of the date. 
+     </description>
+   </attribute>
+
+   <attribute name="description_factor" 
+              type="number" 
+              programs="htsearch" 
+              version="3.1.0b3" 
+              category="Searching:Ranking" >
+     <default>150</default>
+     <example>350</example>
+     <description> 
+	Plain old "descriptions" are the text of a link pointing 
+	to a document. This factor gives weight to the words of 
+	these descriptions of the document. Not surprisingly, 
+	these can be pretty accurate summaries of a document's 
+	content. See also <ref type="attr">heading_factor</ref> 
+	and <ref type="attr">meta_description_factor</ref>. 
+     </description>
+   </attribute>
+
+   <attribute name="description_meta_tag_names" 
+              type="number" 
+              programs="htsearch" 
+              version="3.1.6" 
+              category="Searching:Ranking" >
+     <default>description</default>
+     <example>"description htdig-description"</example>
+     <description> 
+	The words in this list are used to search for descriptions in HTML 
+	<em>META</em> tags. This list can contain any number of strings 
+	that each will be seen as the name for whatever description 
+	convention is used. While words in any of the specified 
+	description contents will be indexed, only the last meta tag 
+	containing a description will be kept as the meta description 
+	field for the document, for use in search results. The order in 
+	which the names are specified in this configuration attribute 
+	is irrelevant, as it is the order in which the tags appear in 
+	the documents that matters.<br/> The <em>META</em> tags have the 
+	following format:<br/> 
+	<code>   &lt;META name="<em>somename</em>" 
+	                       content="<em>somevalue</em>"&gt; </code><br/> 
+	See also <ref type="attr">meta_description_factor</ref>. 
+     </description>
+   </attribute>
+
+   <attribute name="disable_cookies" 
+              type="boolean" 
+              programs="htdig" 
+              version="3.2.0b4" 
+              category="Indexing:Connection" 
+              block="Server" >
+     <default>true</default>
+     <example>true</example>
+     <description> 
+        This option, if set to true, will disable HTTP cookies. 
+     </description>
+   </attribute>
+
+   <attribute name="doc_db" 
+              type="string" 
+              programs="all" 
+              version="all" 
+              category="File Layout" >
+     <default>${database_base}.docdb</default>
+     <example>${database_base}documents.db</example>
+     <description> 
+	This file will contain a Berkeley database of documents 
+	indexed by document number. It contains all the information 
+	gathered for each document, except the document excerpts 
+	which are stored in the <ref type="attr">doc_excerpt</ref> file. 
+     </description>
+   </attribute>
+
+   <attribute name="doc_excerpt" 
+              type="string" 
+              programs="all" 
+              version="3.2.0b1" 
+              category="File Layout" >
+     <default>${database_base}.excerpts</default>
+     <example>${database_base}excerpts.db</example>
+     <description> 
+	This file will contain a Berkeley database of document excerpts 
+	indexed by document number. It contains all the text 
+	gathered for each document, so this file can become 
+	rather large if <ref type="attr">max_head_length</ref> is set to a large value. 
+	The size can be reduced by setting the 
+	<ref type="attr">compression_level</ref>, 
+	if supported on your system. 
+     </description>
+   </attribute>
+
+   <attribute name="doc_index" 
+              type="string" 
+              programs="htdig" 
+              version="all" 
+              category="File Layout" >
+     <default>${database_base}.docs.index</default>
+     <example>documents.index.db</example>
+     <description> 
+	This file contains a mapping of document numbers to URLs and is 
+	used by htdig during indexing. It is used on updates if it exists. 
+     </description>
+   </attribute>
+
+   <attribute name="doc_list" 
+              type="string" 
+              programs="htdig htdump htload" 
+              version="all" 
+              category="File Layout" >
+     <default>${database_base}.docs</default>
+     <example>/tmp/documents.text</example>
+     <description> 
+	This file is basically a text version of the file 
+	specified in <ref type="attr">doc_db</ref>. Its 
+	only use is to have a human readable database of all 
+	documents. The file is easy to parse with tools like 
+	perl or tcl. 
+     </description>
+   </attribute>
+
+   <attribute name="endday" 
+              type="integer" 
+              programs="htsearch" 
+              version="3.1.6" 
+              category="Searching:Method" >
+     <default></default>
+     <example>31</example>
+     <description> 
+	Day component of last date allowed as last-modified date 
+	of returned docutments. 
+	This is most usefully specified as a 
+	<a href="hts_form.html#startyear">GCI argument</a>. 
+	See also <ref type="attr">startyear</ref>. 
+     </description>
+   </attribute>
+
+   <attribute name="end_ellipses" 
+              type="string" 
+              programs="htsearch" 
+              version="all" 
+              category="Presentation:Text" >
+     <default>&lt;strong&gt;&lt;code&gt; ...&lt;/code&gt;&lt;/strong&gt;</default>
+     <example>...</example>
+     <description> 
+	When excerpts are displayed in the search output, this 
+	string will be appended to the excerpt if there is text 
+	following the text displayed. This is just a visual 
+	reminder to the user that the excerpt is only part of 
+	the complete document. 
+     </description>
+   </attribute>
+
+   <attribute name="end_highlight" 
+              type="string" 
+              programs="htsearch" 
+              version="3.1.4" 
+              category="Presentation:Text" >
+     <default>&lt;/strong&gt;</default>
+     <example>&lt;/font&gt;</example>
+     <description> 
+	When excerpts are displayed in the search output, matched 
+	words will be highlighted using <ref type="attr">start_highlight</ref> and this string. 
+	You should ensure that highlighting tags are balanced, 
+	that is, this string should close any formatting 
+	tag opened by start_highlight. 
+     </description>
+   </attribute>
+
+   <attribute name="endings_affix_file" 
+              type="string" 
+              programs="htfuzzy" 
+              version="all" 
+              category="File Layout" >
+     <default>${common_dir}/english.aff</default>
+     <example>/var/htdig/affix_rules</example>
+     <description> 
+	Specifies the location of the file which contains the 
+	affix rules used to create the endings search algorithm 
+	databases. Consult the documentation on 
+	<ref type="program">htfuzzy</ref> for more information on the 
+	format of this file. 
+     </description>
+   </attribute>
+
+   <attribute name="endings_dictionary" 
+              type="string" 
+              programs="htfuzzy" 
+              version="all" 
+              category="File Layout" >
+     <default>${common_dir}/english.0</default>
+     <example>/var/htdig/dictionary</example>
+     <description> 
+	Specifies the location of the file which contains the 
+	dictionary used to create the endings search algorithm 
+	databases. Consult the documentation on 
+	<ref type="program">htfuzzy</ref> for more information on the 
+	format of this file. 
+     </description>
+   </attribute>
+
+   <attribute name="endings_root2word_db" 
+              type="string" 
+              programs="htfuzzy htsearch" 
+              version="all" 
+              category="File Layout" >
+     <default>${common_dir}/root2word.db</default>
+     <example>/var/htdig/r2w.db</example>
+     <description> 
+	This attributes specifies the database filename to be 
+	used in the 'endings' fuzzy search algorithm. The 
+	database maps word roots to all legal words with that 
+	root. For more information about this and other fuzzy 
+	search algorithms, consult the 
+	<ref type="program">htfuzzy</ref> documentation.<br/> 
+	Note that the default value uses the 
+	<ref type="attr">common_dir</ref> attribute instead of the 
+	<ref type="attr">database_dir</ref> attribute. 
+	This is because this database can be shared with 
+	different search databases. 
+     </description>
+   </attribute>
+
+   <attribute name="endings_word2root_db" 
+              type="string" 
+              programs="htfuzzy htsearch" 
+              version="all" 
+              category="File Layout" >
+     <default>${common_dir}/word2root.db</default>
+     <example>/var/htdig/w2r.bm</example>
+     <description> 
+	This attributes specifies the database filename to be 
+	used in the 'endings' fuzzy search algorithm. The 
+	database maps words to their root. For more information 
+	about this and other fuzzy search algorithms, consult 
+	the <ref type="program">htfuzzy</ref> 
+	documentation.<br/> 
+	Note that the default value uses the 
+	<ref type="attr">common_dir</ref> attribute instead of the 
+	<ref type="attr">database_dir</ref> attribute. 
+	This is because this database can be shared with 
+	different search databases. 
+     </description>
+   </attribute>
+
+   <attribute name="endmonth" 
+              type="integer" 
+              programs="htsearch" 
+              version="3.1.6" 
+              category="Searching:Method" >
+     <default></default>
+     <example>12</example>
+     <description> 
+	Month component of last date allowed as last-modified date 
+	of returned docutments. 
+	This is most usefully specified as a 
+	<a href="hts_form.html#startyear">GCI argument</a>. 
+	See also <ref type="attr">startyear</ref>. 
+     </description>
+   </attribute>
+
+   <attribute name="endyear" 
+              type="integer" 
+              programs="htsearch" 
+              version="3.1.6" 
+              category="Searching:Method" >
+     <default></default>
+     <example>2002</example>
+     <description> 
+	Year component of last date allowed as last-modified date 
+	of returned docutments. 
+	This is most usefully specified as a 
+	<a href="hts_form.html#startyear">GCI argument</a>. 
+	See also <ref type="attr">startyear</ref>. 
+     </description>
+   </attribute>
+
+   <attribute name="excerpt_length" 
+              type="integer" 
+              programs="htsearch" 
+              version="all" 
+              category="Presentation:How" >
+     <default>300</default>
+     <example>500</example>
+     <description> 
+	This is the maximum number of characters the displayed 
+	excerpt will be limited to. The first matched word will 
+	be highlighted in the middle of the excerpt so that there is 
+	some surrounding context.<br/> 
+	The <ref type="attr">start_ellipses</ref> and 
+	<ref type="attr">end_ellipses</ref> are used to 
+	indicate that the document contains text before and 
+	after the displayed excerpt respectively. 
+	The <ref type="attr">start_highlight</ref> and 
+	<ref type="attr">end_highlight</ref> are used to 
+	specify what formatting tags are used to highlight matched words. 
+     </description>
+   </attribute>
+
+   <attribute name="excerpt_show_top" 
+              type="boolean" 
+              programs="htsearch" 
+              version="all" 
+              category="Presentation:How" >
+     <default>false</default>
+     <example>yes</example>
+     <description> 
+	If set to true, the excerpt of a match will always show 
+	the top of the matching document. If it is false (the 
+	default), the excerpt will attempt to show the part of 
+	the document that actually contains one of the words. 
+     </description>
+   </attribute>
+
+   <attribute name="exclude" 
+              type="pattern_list" 
+              programs="htsearch" 
+              version="3.2.0b4" 
+              category="Searching:Method" >
+     <default></default>
+     <example>myhost.com/mailarchive/</example>
+     <description> 
+	If a URL contains any of the space separated patterns, it will be 
+	discarded in the searching phase. This is used to exclude certain 
+	URLs from search results. The list can be specified from within 
+	the configuration file, and can be overridden with the "exclude" 
+	input parameter in the search form. 
+     </description>
+   </attribute>
+
+   <attribute name="exclude_urls" 
+              type="pattern_list" 
+              programs="htdig" 
+              version="all" 
+              category="Indexing:Where" 
+              block="URL" >
+     <default>/cgi-bin/ .cgi</default>
+     <example>students.html cgi-bin</example>
+     <description> 
+	If a URL contains any of the space separated patterns, 
+	it will be rejected. This is used to exclude such 
+	common things such as an infinite virtual web-tree 
+	which start with cgi-bin. 
+     </description>
+   </attribute>
+
+   <attribute name="external_parsers" 
+              type="quoted_string_list" 
+              programs="htdig" 
+              version="3.0.7" 
+              category="External:Parsers" >
+     <default></default>
+     <example>text/html /usr/local/bin/htmlparser \ 
+	application/pdf /usr/local/bin/parse_doc.pl \ 
+	application/msword-&gt;text/plain "/usr/local/bin/mswordtotxt -w" \ 
+	application/x-gunzip-&gt;user-defined /usr/local/bin/ungzipper</example>
+     <description> 
+			This attribute is used to specify a list of 
+			content-type/parsers that are to be used to parse 
+			documents that cannot by parsed by any of the internal 
+			parsers. The list of external parsers is examined 
+			before the builtin parsers are checked, so this can be 
+			used to override the internal behavior without 
+			recompiling htdig.<br/> 
+			 The external parsers are specified as pairs of 
+			strings. The first string of each pair is the 
+			content-type that the parser can handle while the 
+			second string of each pair is the path to the external 
+			parsing program. If quoted, it may contain parameters, 
+			separated by spaces.<br/> 
+			 External parsing can also be done with external 
+			converters, which convert one content-type to 
+			another. To do this, instead of just specifying 
+			a single content-type as the first string 
+			of a pair, you specify two types, in the form 
+			<em>type1</em><strong>-&gt;</strong><em>type2</em>, 
+			as a single string with no spaces. The second 
+			string will define an external converter 
+			rather than an external parser, to convert 
+			the first type to the second. If the second 
+			type is <strong>user-defined</strong>, then 
+			it's up to the converter script to put out a 
+			"Content-Type: <em>type</em>" header followed 
+			by a blank line, to indicate to htdig what type it 
+			should expect for the output, much like what a CGI 
+			script would do. The resulting content-type must 
+			be one that htdig can parse, either internally, 
+			or with another external parser or converter.<br/> 
+			 Only one external parser or converter can be 
+			specified for any given content-type. However, 
+			an external converter for one content-type can be 
+			chained to the internal parser for the same type, 
+			by appending <strong>-internal</strong> to the 
+			second type string (e.g. text/html-&gt;text/html-internal) 
+			to perform external preprocessing on documents of 
+			this type before internal parsing. 
+			There are two internal parsers, for text/html and 
+			text/plain.<p> 
+			 The parser program takes four command-line 
+			parameters, not counting any parameters already 
+			given in the command string:<br/> 
+			<em>infile content-type URL configuration-file</em><br/> 
+			</p>
+<table border="1"> 
+			  <tr> 
+				<th> 
+				  Parameter 
+				</th> 
+				<th> 
+				  Description 
+				</th> 
+				<th> 
+				  Example 
+				</th> 
+			  </tr> 
+			  <tr> 
+				<td valign="top"> 
+				  infile 
+				</td> 
+				<td> 
+				  A temporary file with the contents to be parsed. 
+				</td> 
+				<td> 
+				  /var/tmp/htdext.14242 
+				</td> 
+			  </tr> 
+			  <tr> 
+				<td valign="top"> 
+				  content-type 
+				</td> 
+				<td> 
+				  The MIME-type of the contents. 
+				</td> 
+				<td> 
+				  text/html 
+				</td> 
+			  </tr> 
+			  <tr> 
+				<td valign="top"> 
+				  URL 
+				</td> 
+				<td> 
+				  The URL of the contents. 
+				</td> 
+				<td> 
+				  http://www.htdig.org/attrs.html 
+				</td> 
+			  </tr> 
+			  <tr> 
+				<td valign="top"> 
+				  configuration-file 
+				</td> 
+				<td> 
+				  The configuration-file in effect. 
+				</td> 
+				<td> 
+				  /etc/htdig/htdig.conf 
+				</td> 
+			  </tr> 
+			</table><p> 
+			The external parser is to write information for 
+			htdig on its standard output. Unless it is an 
+			external converter, which will output a document 
+			of a different content-type, then its output must 
+			follow the format described here.<br/> 
+			 The output consists of records, each record terminated 
+			with a newline. Each record is a series of (unless 
+			expressively allowed to be empty) non-empty tab-separated 
+			fields. The first field is a single character 
+			that specifies the record type. The rest of the fields 
+			are determined by the record type. 
+			</p>
+<table border="1"> 
+			  <tr> 
+				<th> 
+				  Record type 
+				</th> 
+				<th> 
+				  Fields 
+				</th> 
+				<th> 
+				  Description 
+				</th> 
+			  </tr> 
+			  <tr> 
+				<th rowspan="3" valign="top"> 
+				  w 
+				</th> 
+				<td valign="top"> 
+				  word 
+				</td> 
+				<td> 
+				  A word that was found in the document. 
+				</td> 
+			  </tr> 
+			  <tr> 
+				<td valign="top"> 
+				  location 
+				</td> 
+				<td> 
+				  A number indicating the normalized location of 
+				  the word within the document. The number has to 
+				  fall in the range 0-1000 where 0 means the top of 
+				  the document. 
+				</td> 
+			  </tr> 
+			  <tr> 
+				<td valign="top"> 
+				  heading level 
+				</td> 
+				<td> 
+				  A heading level that is used to compute the 
+				  weight of the word depending on its context in 
+				  the document itself. The level is in the range of 
+				  0-10 and are defined as follows: 
+				  <dl compact="true"> 
+					<dt> 
+					  0 
+					</dt> 
+					<dd> 
+					  Normal text 
+					</dd> 
+					<dt> 
+					  1 
+					</dt> 
+					<dd> 
+					  Title text 
+					</dd> 
+					<dt> 
+					  2 
+					</dt> 
+					<dd> 
+					  Heading 1 text 
+					</dd> 
+					<dt> 
+					  3 
+					</dt> 
+					<dd> 
+					  Heading 2 text 
+					</dd> 
+					<dt> 
+					  4 
+					</dt> 
+					<dd> 
+					  Heading 3 text 
+					</dd> 
+					<dt> 
+					  5 
+					</dt> 
+					<dd> 
+					  Heading 4 text 
+					</dd> 
+					<dt> 
+					  6 
+					</dt> 
+					<dd> 
+					  Heading 5 text 
+					</dd> 
+					<dt> 
+					  7 
+					</dt> 
+					<dd> 
+					  Heading 6 text 
+					</dd> 
+					<dt> 
+					  8 
+					</dt> 
+					<dd> 
+					  <em>unused</em> 
+					</dd> 
+					<dt> 
+					  9 
+					</dt> 
+					<dd> 
+					  <em>unused</em> 
+					</dd> 
+					<dt> 
+					  10 
+					</dt> 
+					<dd> 
+					  Keywords 
+					</dd> 
+				  </dl> 
+				</td> 
+			  </tr> 
+			  <tr> 
+				<th rowspan="2" valign="top"> 
+				  u 
+				</th> 
+				<td valign="top"> 
+				  document URL 
+				</td> 
+				<td> 
+				  A hyperlink to another document that is 
+				  referenced by the current document.  It must be 
+				  complete and non-relative, using the URL parameter to 
+				  resolve any relative references found in the document. 
+				</td> 
+			  </tr> 
+			  <tr> 
+				<td valign="top"> 
+				  hyperlink description 
+				</td> 
+				<td> 
+				  For HTML documents, this would be the text 
+				  between the &lt;a href...&gt; and &lt;/a&gt; 
+				  tags. 
+				</td> 
+			  </tr> 
+			  <tr> 
+				<th valign="top"> 
+				  t 
+				</th> 
+				<td valign="top"> 
+				  title 
+				</td> 
+				<td> 
+				  The title of the document 
+				</td> 
+			  </tr> 
+			  <tr> 
+				<th valign="top"> 
+				  h 
+				</th> 
+				<td valign="top"> 
+				  head 
+				</td> 
+				<td> 
+				  The top of the document itself. This is used to 
+				  build the excerpt. This should only contain 
+				  normal ASCII text 
+				</td> 
+			  </tr> 
+			  <tr> 
+				<th valign="top"> 
+				  a 
+				</th> 
+				<td valign="top"> 
+				  anchor 
+				</td> 
+				<td> 
+				  The label that identifies an anchor that can be 
+				  used as a target in an URL. This really only 
+				  makes sense for HTML documents. 
+				</td> 
+			  </tr> 
+			  <tr> 
+				<th valign="top"> 
+				  i 
+				</th> 
+				<td valign="top"> 
+				  image URL 
+				</td> 
+				<td> 
+				  An URL that points at an image that is part of 
+				  the document. 
+				</td> 
+			  </tr> 
+			  <tr> 
+				<th rowspan="3" valign="top"> 
+				  m 
+				</th> 
+				<td valign="top"> 
+				  http-equiv 
+				</td> 
+				<td> 
+				  The HTTP-EQUIV attribute of a 
+				  <a href="meta.html"><em>META</em> tag</a>. 
+				  May be empty. 
+				</td> 
+			  </tr> 
+			  <tr> 
+				<td valign="top"> 
+				  name 
+				</td> 
+				<td> 
+				  The NAME attribute of this 
+				  <a href="meta.html"><em>META</em> tag</a>. 
+				  May be empty. 
+				</td> 
+			  </tr> 
+			  <tr> 
+				<td valign="top"> 
+				  contents 
+				</td> 
+				<td> 
+				  The CONTENTS attribute of this 
+				  <a href="meta.html"><em>META</em> tag</a>. 
+				  May be empty. 
+				</td> 
+			  </tr> 
+			</table> 
+	<p><em>See also FAQ questions <ref type="faq">4.8</ref> and <ref type="faq">4.9</ref> for more 
+	examples.</em></p> 
+     </description>
+   </attribute>
+
+   <attribute name="external_protocols" 
+              type="quoted_string_list" 
+              programs="htdig" 
+              version="3.2.0b1" 
+              category="External:Protocols" >
+     <default></default>
+     <example>https /usr/local/bin/handler.pl \ 
+	ftp /usr/local/bin/ftp-handler.pl</example>
+     <description> 
+	This attribute is a bit like <ref type="attr">external_parsers</ref> 
+	since it specifies a list of protocols/handlers that are used to download documents 
+	that cannot be retrieved using the internal methods. This enables htdig to index 
+	documents with URL schemes it does not understand, or to use more advanced authentication 
+	for the documents it is retrieving. This list is checked before HTTP or other methods, 
+	so this can override the internal behavior without writing additional code for htdig.<br/> 
+	  The external protocols are specified as pairs of strings, the first being the URL scheme that 
+	the script can handle while the second is the path to the script itself. If the second is 
+	quoted, then additional command-line arguments may be given.<br/> 
+	If the external protocol does not contain a colon (:), it is assumed 
+	to have the standard format 
+	"protocol://[usr[:password]@]address[:port]/path". 
+	If it ends with a colon, then it is assumed to have the simpler format 
+	"protocol:path". If it ends with "://" then the standard form is 
+	again assumed. <br/> 
+	If the external protocol does not contain a colon (:), it is assumed 
+	to have the standard format 
+	"protocol://[usr[:password]@]address[:port]/path". 
+	If it ends with a colon, then it is assumed to have the simpler format 
+	"protocol:path". If it ends with "://" then the standard form is 
+	again assumed. <br/> 
+	  The program takes three command-line parameters, not counting any parameters already given  
+	in the command string:<br/> 
+	<em>protocol URL configuration-file</em><br/> 
+	<table border="1"> 
+	  <tr> 
+		<th> 
+		  Parameter 
+		</th> 
+		<th> 
+		  Description 
+		</th> 
+		<th> 
+		  Example 
+		</th> 
+	  </tr> 
+	  <tr> 
+		<td valign="top"> 
+		  protocol 
+		</td> 
+		<td> 
+		  The URL scheme to be used. 
+		</td> 
+		<td> 
+		  https 
+		</td> 
+	  </tr> 
+	  <tr> 
+		<td valign="top"> 
+		  URL 
+		</td> 
+		<td> 
+		  The URL to be retrieved. 
+		</td> 
+		<td> 
+		  https://www.htdig.org:8008/attrs.html 
+		</td> 
+	  </tr> 
+	  <tr> 
+		<td valign="top"> 
+		  configuration-file 
+		</td> 
+		<td> 
+		  The configuration-file in effect. 
+		</td> 
+		<td> 
+		  /etc/htdig/htdig.conf 
+		</td> 
+	  </tr> 
+	</table><p> 
+	The external protocol script is to write information for htdig on the  
+	standard output. The output must follow the form described here. The output  
+	consists of a header followed by a blank line, followed by the contents of  
+	the document. Each record in the header is terminated with a newline.  
+	Each record is a series of (unless expressively allowed to be empty) non-empty  
+	tab-separated fields. The first field is a single character that specifies the  
+	record type. The rest of the fields are determined by the record type. 
+	</p>
+<table border="1"> 
+	  <tr> 
+		<th> 
+		  Record type 
+		</th> 
+		<th> 
+		  Fields 
+		</th> 
+		<th> 
+		  Description 
+		</th> 
+	  </tr> 
+	  <tr> 
+		<th valign="top"> 
+		  s 
+		</th> 
+		<td valign="top"> 
+		  status code 
+		</td> 
+		<td> 
+		  An HTTP-style status code, e.g. 200, 404. Typical codes include: 
+		    <dl compact="true"> 
+			<dt> 
+			  200 
+			</dt> 
+			<dd> 
+			  Successful retrieval 
+			</dd> 
+			<dt> 
+			  304 
+			</dt> 
+			<dd> 
+			  Not modified (for example, if the document hasn't changed) 
+			</dd> 
+			<dt> 
+			  301 
+			</dt> 
+			<dd> 
+			  Redirect (to another URL) 
+			</dd> 
+			<dt> 
+			  401 
+			</dt> 
+			<dd> 
+			  Not authorized 
+			</dd> 
+			<dt> 
+			  404 
+			</dt> 
+			<dd> 
+			  Not found 
+			</dd> 
+		  </dl> 
+		</td> 
+	  </tr> 
+	  <tr> 
+		<th valign="top"> 
+		  r 
+		</th> 
+		<td valign="top"> 
+		  reason 
+		</td> 
+		<td> 
+		  A text string describing the status code, e.g "Redirect" or "Not Found." 
+		</td> 
+	  </tr> 
+	  <tr> 
+		<th valign="top"> 
+		  m 
+		</th> 
+		<td valign="top"> 
+		  status code 
+		</td> 
+		<td> 
+		  The modification time of this document. While the code is fairly flexible 
+		  about the time/date formats it accepts, it is recommended to use something 
+		  standard, like RFC1123: Sun, 06 Nov 1994 08:49:37 GMT, or ISO-8601:  
+		  1994-11-06 08:49:37 GMT. 
+		</td> 
+	  </tr> 
+	  <tr> 
+		<th valign="top"> 
+		  t 
+		</th> 
+		<td valign="top"> 
+		  content-type 
+		</td> 
+		<td> 
+		  A valid MIME type for the document, like text/html or text/plain. 
+		</td> 
+	  </tr> 
+	  <tr> 
+		<th valign="top"> 
+		  l 
+		</th> 
+		<td valign="top"> 
+		  content-length 
+		</td> 
+		<td> 
+		  The length of the document on the server, which may not necessarily 
+		  be the length of the buffer returned. 
+		</td> 
+	  </tr> 
+	  <tr> 
+		<th valign="top"> 
+		  u 
+		</th> 
+		<td valign="top"> 
+		  url 
+		</td> 
+		<td> 
+		  The URL of the document, or in the case of a redirect, the URL 
+		  that should be indexed as a result of the redirect. 
+		</td> 
+	  </tr> 
+      </table>	   
+     </description>
+   </attribute>
+
+   <attribute name="extra_word_characters" 
+              type="string" 
+              programs="htdig htsearch" 
+              version="3.1.2" 
+              category="Indexing:What" >
+     <default></default>
+     <example>_</example>
+     <description> 
+	These characters are considered part of a word. 
+	In contrast to the characters in the 
+	<ref type="attr">valid_punctuation</ref> 
+	attribute, they are treated just like letter 
+	characters.<br/> 
+	Note that the <ref type="attr">locale</ref> attribute 
+	is normally used to configure which characters 
+	constitute letter characters. 
+     </description>
+   </attribute>
+
+   <attribute name="head_before_get" 
+              type="boolean" 
+              programs="htdig" 
+              version="3.2.0b1" 
+              category="Indexing:Connection" 
+              block="Server" >
+     <default>false</default>
+     <example>true</example>
+     <description> 
+	This option works only if we take advantage of persistent connections (see 
+	persistent_connections attribute). If set to true an HTTP/1.1 <em>HEAD</em> 
+	call is made in order to retrieve header information about a document. 
+	If the status code and the content-type returned let the document be parsable, 
+	then a following 'GET' call is made. 
+     </description>
+   </attribute>
+
+   <attribute name="heading_factor" 
+              type="number" 
+              programs="htsearch" 
+              version="3.2.0b1" 
+              category="Searching:Ranking" >
+     <default>5</default>
+     <example>20</example>
+     <description> 
+			This is a factor which will be used to multiply the 
+			weight of words between &lt;h1&gt; and &lt;/h1&gt; 
+			tags, as well as headings of levels &lt;h2&gt; through 
+			&lt;h6&gt;. It is used to assign the level of importance 
+			to headings. Setting a factor to 0 will cause words 
+			in these headings to be ignored. The number may be a 
+	floating point number. See also 
+	<ref type="attr">author_factor</ref> 
+	<ref type="attr">backlink_factor</ref> 
+	<ref type="attr">caps_factor</ref> 
+	<ref type="attr">date_factor</ref> 
+	<ref type="attr">description_factor</ref> 
+	<ref type="attr">keywords_factor</ref> 
+	<ref type="attr">meta_description_factor</ref> 
+	<ref type="attr">text_factor</ref> 
+	<ref type="attr">title_factor</ref> 
+	<ref type="attr">url_text_factor</ref> 
+     </description>
+   </attribute>
+
+   <attribute name="htnotify_prefix_file" 
+              type="string" 
+              programs="htnotify" 
+              version="3.2.0b3" 
+              category="Extra Output" >
+     <default></default>
+     <example>${common_dir}/notify_prefix.txt</example>
+     <description> 
+	Specifies the file containing text to be inserted in each mail  
+	message sent by htnotify before the list of expired webpages. If omitted,  
+	nothing is inserted. 
+     </description>
+   </attribute>
+
+   <attribute name="htnotify_replyto" 
+              type="string" 
+              programs="htnotify" 
+              version="3.2.0b3" 
+              category="Extra Output" >
+     <default></default>
+     <example>[email protected]</example>
+     <description> 
+	This specifies the email address that htnotify email messages 
+	include in the Reply-to: field. 
+     </description>
+   </attribute>
+
+   <attribute name="htnotify_sender" 
+              type="string" 
+              programs="htnotify" 
+              version="all" 
+              category="Extra Output" >
+     <default>webmaster@www</default>
+     <example>[email protected]</example>
+     <description> 
+	This specifies the email address that htnotify email 
+	messages get sent out from. The address is forged using 
+	/usr/lib/sendmail. Check htnotify/htnotify.cc for 
+	detail on how this is done. 
+     </description>
+   </attribute>
+
+   <attribute name="htnotify_suffix_file" 
+              type="string" 
+              programs="htnotify" 
+              version="3.2.0b3" 
+              category="Extra Output" >
+     <default></default>
+     <example>${common_dir}/notify_suffix.txt</example>
+     <description> 
+	Specifies the file containing text to be inserted in each mail message  
+	sent by htnotify after the list of expired webpages. If omitted, htnotify  
+	will insert a standard message. 
+     </description>
+   </attribute>
+
+   <attribute name="htnotify_webmaster" 
+              type="string" 
+              programs="htnotify" 
+              version="3.2.0b3" 
+              category="Extra Output" >
+     <default>ht://Dig Notification Service</default>
+     <example>Notification Service</example>
+     <description> 
+	This provides a name for the From field, in addition to the email address 
+	for the email messages sent out by htnotify. 
+     </description>
+   </attribute>
+
+   <attribute name="http_proxy" 
+              type="string" 
+              programs="htdig" 
+              version="3.0" 
+              category="Indexing:Connection" 
+              block="URL" >
+     <default></default>
+     <example>3128</example>
+     <description> 
+	When this attribute is set, all HTTP document 
+	retrievals will be done using the HTTP-PROXY protocol. 
+	The URL specified in this attribute points to the host 
+	and port where the proxy server resides.<br/> 
+	The use of a proxy server greatly improves performance 
+	of the indexing process. 
+     </description>
+   </attribute>
+
+   <attribute name="http_proxy_authorization" 
+              type="string" 
+              programs="htdig" 
+              version="3.2.0b4" 
+              category="Indexing:Connection" 
+              block="URL" >
+     <default></default>
+     <example>mypassword</example>
+     <description> 
+	This tells htdig to send the supplied 
+	<em>username</em><strong>:</strong><em>password</em> with each HTTP request, 
+	when using a proxy with authorization requested. 
+	The credentials will be encoded using the "Basic" authentication 
+	scheme. There <em>must</em> be a colon (:) between the username and 
+	password. 
+     </description>
+   </attribute>
+
+   <attribute name="http_proxy_exclude" 
+              type="pattern_list" 
+              programs="htdig" 
+              version="3.1.0b3" 
+              category="Indexing:Connection" >
+     <default></default>
+     <example>//intranet.foo.com/</example>
+     <description> 
+	When this is set, URLs matching this will not use the 
+	proxy. This is useful when you have a mixture of sites 
+	near to the digging server and far away. 
+     </description>
+   </attribute>
+
+   <attribute name="ignore_alt_text" 
+              type="boolean" 
+              programs="htdig" 
+              version="3.1.6" 
+              category="Indexing:What" >
+     <default>false</default>
+     <example>true</example>
+     <description> 
+	If set, this causes the text of the ALT field in an &lt;IMG...&gt; tag 
+	not to be indexed as part of the text of the document, nor included in 
+	excerpts. 
+     </description>
+   </attribute>
+
+   <attribute name="ignore_dead_servers" 
+              type="boolean" 
+              programs="htdig" 
+              version="3.1.6" 
+              category="Indexing:Connection" >
+     <default>true</default>
+     <example>false</example>
+     <description> 
+	Determines whether htdig will continue to index URLs from a 
+	server after an attempted connection to the server fails as 
+	&quot;no host found&quot; or &quot;host not found (port).&quot; If 
+	set to false, htdig will try <em>every</em> URL from that server. 
+     </description>
+   </attribute>
+
+   <attribute name="image_list" 
+              type="string" 
+              programs="htdig" 
+              version="all" 
+              category="Extra Output" >
+     <default>${database_base}.images</default>
+     <example>allimages</example>
+     <description> 
+	This is the file that a list of image URLs gets written 
+	to by <ref type="program">htdig</ref> when the 
+	<ref type="attr">create_image_list</ref> is set to 
+	true. As image URLs are seen, they are just appended to 
+	this file, so after htdig finishes it is probably a 
+	good idea to run <code>sort -u</code> on the file to 
+	eliminate duplicates from the file. 
+     </description>
+   </attribute>
+
+   <attribute name="image_url_prefix" 
+              type="string" 
+              programs="htsearch" 
+              version="all" 
+              category="Presentation:Text" >
+     <default configmacro="true">IMAGE_URL_PREFIX</default>
+     <example>/images/htdig</example>
+     <description> 
+	This specifies the directory portion of the URL used 
+	to display star images. This attribute isn't directly 
+	used by htsearch, but is used in the default URL for 
+	the <ref type="attr">star_image</ref> and 
+	<ref type="attr">star_blank</ref> attributes, and 
+	other attributes may be defined in terms of this one. 
+	<p> 
+	The default value of this attribute is determined at 
+	compile time. 
+	</p> 
+     </description>
+   </attribute>
+
+   <attribute name="include" 
+              type="string" 
+              programs="all" 
+              version="3.1.0" 
+              category="" >
+     <default></default>
+     <example>${config_dir}/htdig.conf</example>
+     <description> 
+	This is not quite a configuration attribute, but 
+	rather a directive. It can be used within one 
+	configuration file to include the definitions of 
+	another file. The last definition of an attribute 
+	is the one that applies, so after including a file, 
+	any of its definitions can be overridden with 
+	subsequent definitions. This can be useful when 
+	setting up many configurations that are mostly the 
+	same, so all the common attributes can be maintained 
+	in a single configuration file. The include directives 
+	can be nested, but watch out for nesting loops. 
+     </description>
+   </attribute>
+
+   <attribute name="iso_8601" 
+              type="boolean" 
+              programs="htsearch htnotify" 
+              version="3.1.0b2" 
+              category="Presentation:How,Extra Output" >
+     <default>false</default>
+     <example>true</example>
+     <description> 
+	This sets whether dates should be output in ISO 8601 
+	format. For example, this was written on: 1998-10-31 11:28:13 EST. 
+	See also the <ref type="attr">date_format</ref> attribute, which 
+	can override any date format that 
+	<ref type="program">htsearch</ref> 
+	picks by default.<br/> 
+	This attribute also affects the format of the date 
+	<ref type="program">htnotify</ref> expects to find 
+	in a <strong>htdig-notification-date</strong> field. 
+     </description>
+   </attribute>
+
+   <attribute name="keywords" 
+              type="string_list" 
+              programs="htsearch" 
+              version="??" 
+              category="Searching:Method" >
+     <default></default>
+     <example>documentation</example>
+     <description> 
+	Keywords which <strong>must</strong> be found on all pages returned, 
+    	even if the "or" ("Any") <ref type="attr">method</ref> is 
+	selected. 
+     </description>
+   </attribute>
+
+   <attribute name="keywords_factor" 
+              type="number" 
+              programs="htsearch" 
+              version="all" 
+              category="Searching:Ranking" >
+     <default>100</default>
+     <example>12</example>
+     <description> 
+	This is a factor which will be used to multiply the 
+	weight of words in the list of keywords of a document. 
+	The number may be a floating point number. See also the 
+	<ref type="attr">heading_factor</ref>attribute. 
+     </description>
+   </attribute>
+
+   <attribute name="keywords_meta_tag_names" 
+              type="string_list" 
+              programs="htdig" 
+              version="3.0.6" 
+              category="Indexing:What" >
+     <default>keywords htdig-keywords</default>
+     <example>keywords description</example>
+     <description>	The words in this list are used to search for keywords 
+	in HTML <em>META</em> tags. This list can contain any 
+	number of strings that each will be seen as the name 
+	for whatever keyword convention is used.<br/> 
+	The <em>META</em> tags have the following format: 
+    <codeblock> 
+    &lt;META name="<em>somename</em>" content="<em>somevalue</em>"&gt; 
+    </codeblock> 
+</description>
+   </attribute>
+
+   <attribute name="limit_normalized" 
+              type="pattern_list" 
+              programs="htdig" 
+              version="3.1.0b2" 
+              category="Indexing:Where" >
+     <default></default>
+     <example>//www.mydomain.com</example>
+     <description> 
+	This specifies a set of patterns that all URLs have to 
+	match against in order for them to be included in the 
+	search. Unlike the limit_urls_to attribute, this is done 
+	<strong>after</strong> the URL is normalized and the 
+	<ref type="attr">server_aliases</ref> 
+	attribute is applied. This allows filtering after any 
+	hostnames and DNS aliases are resolved. Otherwise, this 
+	attribute is the same as the <ref type="attr">limit_urls_to</ref> attribute. 
+     </description>
+   </attribute>
+
+   <attribute name="limit_urls_to" 
+              type="pattern_list" 
+              programs="htdig" 
+              version="all" 
+              category="Indexing:Where" >
+     <default>${start_url}</default>
+     <example>.sdsu.edu kpbs [.*\.html]</example>
+     <description> 
+	This specifies a set of patterns that all URLs have to 
+	match against in order for them to be included in the 
+	search. Any number of strings can be specified, 
+	separated by spaces. If multiple patterns are given, at 
+	least one of the patterns has to match the URL.<br/> 
+	Matching, by default, is a case-insensitive string match on the URL 
+	to be used, unless the <ref type="attr">case_sensitive</ref>  
+	attribute is set. The match will be performed <em>after</em> 
+	the relative references have been converted to a valid 
+	URL. This means that the URL will <em>always</em> start 
+	with <code>http://</code>.<br/> 
+	Granted, this is not the perfect way of doing this, 
+	but it is simple enough and it covers most cases. 
+     </description>
+   </attribute>
+
+   <attribute name="local_default_doc" 
+              type="string_list" 
+              programs="htdig" 
+              version="3.0.8b2" 
+              category="Indexing:Where" 
+              block="Server" >
+     <default>index.html</default>
+     <example>default.html default.htm index.html index.htm</example>
+     <description> 
+	Set this to the default documents in a directory used by the 
+	server. This is used for local filesystem access to 
+	translate URLs like http://foo.com/ into something like 
+	/home/foo.com/index.html<br/> 
+	The list should only contain names that the local server 
+	recognizes as default documents for directory URLs, as defined 
+	by the DirectoryIndex setting in Apache's srm.conf, for example. 
+	As of version 3.1.5, this can be a string list rather than a single name, 
+	and htdig will use the first name that works. Since this requires a 
+	loop, setting the most common name first will improve performance. 
+	Special characters can be embedded in these names using %xx hex encoding. 
+     </description>
+   </attribute>
+
+   <attribute name="local_urls" 
+              type="string_list" 
+              programs="htdig" 
+              version="3.0.8b2" 
+              category="Indexing:Where" >
+     <default></default>
+     <example>//www.foo.com/=/usr/www/htdocs/</example>
+     <description> 
+	Set this to tell ht://Dig to access certain URLs through 
+	local filesystems. At first ht://Dig will try to access 
+	pages with URLs matching the patterns through the 
+	filesystems specified. If it cannot find the file, or 
+	if it doesn't recognize the file name extension, it will 
+	try the URL through HTTP instead. Note the example--the 
+	equal sign and the final slashes in both the URL and the 
+	directory path are critical. 
+	<br/>The fallback to HTTP can be disabled by setting the 
+	<ref type="attr">local_urls_only</ref> attribute to true. 
+	To access user directory URLs through the local filesystem, 
+	set <ref type="attr">local_user_urls</ref>.  The only 
+	file name extensions currently recognized for local filesystem 
+	access are .html, .htm, .txt, .asc, .ps, .eps and .pdf. For 
+	anything else, htdig must ask the HTTP server for the file, 
+	so it can determine the MIME content-type of it. 
+	As of version 3.1.5, you can provide multiple mappings of a given 
+	URL to different directories, and htdig will use the first 
+	mapping that works. 
+	Special characters can be embedded in these names using %xx hex encoding. 
+	For example, you can use %3D to embed an "=" sign in an URL pattern. 
+     </description>
+   </attribute>
+
+   <attribute name="local_urls_only" 
+              type="boolean" 
+              programs="htdig" 
+              version="3.1.4" 
+              category="Indexing:Where" >
+     <default>false</default>
+     <example>true</example>
+     <description> 
+	Set this to tell ht://Dig to access files only through the  
+	local filesystem, for URLs matching the patterns in the 
+	<ref type="attr">local_urls</ref> or 
+	<ref type="attr">local_user_urls</ref> attribute. If it cannot  
+	find the file, it will give up rather than trying HTTP or another protocol. 
+     </description>
+   </attribute>
+
+   <attribute name="local_user_urls" 
+              type="string_list" 
+              programs="htdig" 
+              version="3.0.8b2" 
+              category="Indexing:Where" >
+     <default></default>
+     <example>//www.my.org/=/home/,/www/</example>
+     <description> 
+	Set this to access user directory URLs through the local 
+	filesystem. If you leave the "path" portion out, it will 
+	look up the user's home directory in /etc/password (or NIS 
+	or whatever). As with <ref type="attr">local_urls</ref>, 
+	if the files are not found, ht://Dig will try with HTTP or the 
+	appropriate protocol. Again, note the 
+	example's format. To map http://www.my.org/~joe/foo/bar.html 
+	to /home/joe/www/foo/bar.html, try the example below. 
+	<br/>The fallback to HTTP can be disabled by setting the 
+	<ref type="attr">local_urls_only</ref> attribute to true. 
+	As of version 3.1.5, you can provide multiple mappings of a given 
+	URL to different directories, and htdig will use the first 
+	mapping that works. 
+	Special characters can be embedded in these names using %xx hex encoding. 
+	For example, you can use %3D to embed an "=" sign in an URL pattern. 
+     </description>
+   </attribute>
+
+   <attribute name="locale" 
+              type="string" 
+              programs="htdig" 
+              version="3.0" 
+              category="Indexing:What,Presentation:How" >
+     <default>C</default>
+     <example>en_US</example>
+     <description> 
+	Set this to whatever locale you want your search 
+	database cover. It affects the way international 
+	characters are dealt with. On most systems a list of 
+	legal locales can be found in /usr/lib/locale. Also 
+	check the <strong>setlocale(3C)</strong> man page. 
+	Note that depending the locale you choose, and whether 
+	your system's locale implementation affects floating 
+	point input, you may need to specify the decimal point 
+	as a comma rather than a period. This will affect 
+	settings of <ref type="attr">search_algorithm</ref> 
+	and any of the scoring factors. 
+     </description>
+   </attribute>
+
+   <attribute name="logging" 
+              type="boolean" 
+              programs="htsearch" 
+              version="3.1.0b2" 
+              category="Extra Output" >
+     <default>false</default>
+     <example>true</example>
+     <description> 
+	This sets whether htsearch should use the syslog() to log 
+	search requests. If set, this will log requests with a 
+	default level of LOG_INFO and a facility of LOG_LOCAL5. For 
+	details on redirecting the log into a separate file or other 
+	actions, see the <strong>syslog.conf(5)</strong> man 
+	page. To set the level and facility used in logging, change 
+	LOG_LEVEL and LOG_FACILITY in the include/htconfig.h file 
+	before compiling. 
+	<dl> 
+	  <dt> 
+	    Each line logged by htsearch contains the following: 
+	  </dt> 
+	  <dd> 
+	    REMOTE_ADDR [config] (match_method) [words] 
+	    [logicalWords] (matches/matches_per_page) - 
+	    page, HTTP_REFERER 
+	  </dd> 
+	</dl> 
+	where any of the above are null or empty, it 
+	either puts in '-' or 'default' (for config). 
+     </description>
+   </attribute>
+
+   <attribute name="maintainer" 
+              type="string" 
+              programs="htdig" 
+              version="all" 
+              category="Indexing:Out" 
+              block="Server" >
+     <default>[email protected]</default>
+     <example>[email protected]</example>
+     <description> 
+	This should be the email address of the person in 
+	charge of the digging operation. This string is added 
+	to the user-agent: field when the digger sends a 
+	request to a server. 
+     </description>
+   </attribute>
+
+   <attribute name="match_method" 
+              type="string" 
+              programs="htsearch" 
+              version="3.0" 
+              category="Searching:Method" >
+     <default>and</default>
+     <example>boolean</example>
+     <description> 
+	This is the default method for matching that htsearch 
+	uses. The valid choices are: 
+	<ul> 
+	  <li> or </li> 
+	  <li> and </li> 
+	  <li> boolean </li> 
+	</ul> 
+	This attribute will only be used if the HTML form that 
+	calls htsearch didn't have the <a href="hts_form.html#method">method</a> 
+	value set. 
+     </description>
+   </attribute>
+
+   <attribute name="matches_per_page" 
+              type="integer" 
+              programs="htsearch" 
+              version="3.0" 
+              category="Searching:Method" >
+     <default>10</default>
+     <example>999</example>
+     <description> 
+	If this is set to a relatively small number, the 
+	matches will be shown in pages instead of all at once. 
+	This attribute will only be used if the HTML form that 
+	calls htsearch didn't have the 
+	<a href="hts_form.html#matchesperpage">matchesperpage</a> value set. 
+     </description>
+   </attribute>
+
+   <attribute name="max_connection_requests" 
+              type="integer" 
+              programs="htdig" 
+              version="3.2.0b1" 
+              category="Indexing:Connection" >
+     <default>-1</default>
+     <example>100</example>
+     <description> 
+	This attribute tells htdig to limit the number of requests it will 
+	send to a server using a single, persistent HTTP connection. This 
+	only applies when the 
+	<ref type="attr">persistent_connections</ref> 
+	attribute is set. You may set the limit as high as you want, 
+	but it must be at least 1. A value of -1 specifies no limit. 
+	Requests in the queue for a server will be combined until either 
+	the limit is reached, or the queue is empty. 
+     </description>
+   </attribute>
+
+   <attribute name="max_description_length" 
+              type="integer" 
+              programs="htdig" 
+              version="all" 
+              category="Indexing:What" >
+     <default>60</default>
+     <example>40</example>
+     <description> 
+	While gathering descriptions of URLs, 
+	<ref type="program">htdig</ref> will only record those 
+	descriptions which are shorter than this length. This 
+	is used mostly to deal with broken HTML. (If a 
+	hyperlink is not terminated with a &lt;/a&gt; the 
+	description will go on until the end of the document.) 
+     </description>
+   </attribute>
+
+   <attribute name="max_descriptions" 
+              type="integer" 
+              programs="htdig" 
+              version="all" 
+              category="Indexing:What" >
+     <default>5</default>
+     <example>15</example>
+     <description> 
+	While gathering descriptions of URLs, 
+	<ref type="program">htdig</ref> will only record up to this 
+	number of descriptions, in the order in which it encounters 
+	them. This is used to prevent the database entry for a document 
+	from growing out of control if the document has a huge number 
+	of links to it. 
+     </description>
+   </attribute>
+
+   <attribute name="max_doc_size" 
+              type="integer" 
+              programs="htdig" 
+              version="3.0" 
+              category="Indexing:What" 
+              block="URL" >
+     <default>100000</default>
+     <example>5000000</example>
+     <description> 
+	This is the upper limit to the amount of data retrieved 
+	for documents. This is mainly used to prevent 
+	unreasonable memory consumption since each document 
+	will be read into memory by <ref type="program">htdig</ref>. 
+     </description>
+   </attribute>
+
+   <attribute name="max_excerpts" 
+              type="integer" 
+              programs="htsearch" 
+              version="3.1.6" 
+              category="Presentation:How" 
+              block="URL" >
+     <default>1</default>
+     <example>10</example>
+     <description> 
+	This value determines the maximum number of excerpts 
+	that can be displayed for one matching document in the 
+	search results. 
+     </description>
+   </attribute>
+
+   <attribute name="max_head_length" 
+              type="integer" 
+              programs="htdig" 
+              version="all" 
+              category="Indexing:How" >
+     <default>512</default>
+     <example>50000</example>
+     <description> 
+	For each document retrieved, the top of the document is 
+	stored. This attribute determines the size of this 
+	block. The text that will be stored is only the text; 
+	no markup is stored.<br/> 
+	We found that storing 50,000 bytes will store about 
+	95% of all the documents completely. This really 
+	depends on how much storage is available and how much 
+	you want to show. 
+     </description>
+   </attribute>
+
+   <attribute name="max_hop_count" 
+              type="integer" 
+              programs="htdig" 
+              version="all" 
+              category="Indexing:Where" >
+     <default>999999</default>
+     <example>4</example>
+     <description> 
+	Instead of limiting the indexing process by URL 
+	pattern, it can also be limited by the number of hops 
+	or clicks a document is removed from the starting URL. 
+	<br/> 
+	The starting page or pages will have hop count 0. 
+     </description>
+   </attribute>
+
+   <attribute name="max_keywords" 
+              type="integer" 
+              programs="htdig" 
+              version="3.2.0b1" 
+              category="Indexing:What" >
+     <default>-1</default>
+     <example>10</example>
+     <description> 
+	This attribute can be used to limit the number of keywords 
+	per document that htdig will accept from meta keywords tags. 
+	A value of -1 or less means no limit. This can help combat meta 
+	keyword spamming, by limiting the amount of keywords that will be 
+	indexed, but it will not completely prevent irrelevant matches 
+	in a search if the first few keywords in an offending document 
+	are not relevant to its contents. 
+     </description>
+   </attribute>
+
+   <attribute name="max_meta_description_length" 
+              type="integer" 
+              programs="htdig" 
+              version="3.1.0b1" 
+              category="Indexing:How" >
+     <default>512</default>
+     <example>1000</example>
+     <description> 
+	While gathering descriptions from meta description tags, 
+	<ref type="program">htdig</ref> will only store up to  
+	this much of the text for each document. 
+     </description>
+   </attribute>
+
+   <attribute name="max_prefix_matches" 
+              type="integer" 
+              programs="htsearch" 
+              version="3.1.0b1" 
+              category="Searching:Method" >
+     <default>1000</default>
+     <example>100</example>
+     <description> 
+	The Prefix fuzzy algorithm could potentially match a 
+	very large number of words. This value limits the 
+	number of words each prefix can match. Note 
+	that this does not limit the number of documents that 
+	are matched in any way. 
+     </description>
+   </attribute>
+
+   <attribute name="max_retries" 
+              type="number" 
+              programs="htdig" 
+              version="3.2.0b1" 
+              category="Indexing:Connection" >
+     <default>3</default>
+     <example>6</example>
+     <description> 
+	 This option set the maximum number of retries when retrieving a document 
+	 fails (mainly for reasons of connection). 
+     </description>
+   </attribute>
+
+   <attribute name="max_stars" 
+              type="number" 
+              programs="htsearch" 
+              version="all" 
+              category="Presentation:How" >
+     <default>4</default>
+     <example>6</example>
+     <description> 
+	When stars are used to display the score of a match, 
+	this value determines the maximum number of stars that 
+	can be displayed. 
+     </description>
+   </attribute>
+
+   <attribute name="maximum_page_buttons" 
+              type="integer" 
+              programs="htsearch" 
+              version="3.2.0b3" 
+              category="Presentation:How" >
+     <default>${maximum_pages}</default>
+     <example>20</example>
+     <description> 
+	This value limits the number of page links that will be 
+	included in the page list at the bottom of the search 
+	results page. By default, it takes on the value of the 
+	<ref type="attr">maximum_pages</ref> 
+	attribute, but you can set it to something lower to allow 
+	more pages than buttons. In this case, pages above this 
+	number will have no corresponding button. 
+     </description>
+   </attribute>
+
+   <attribute name="maximum_pages" 
+              type="integer" 
+              programs="htsearch" 
+              version="all" 
+              category="Presentation:How" >
+     <default>10</default>
+     <example>20</example>
+     <description> 
+	This value limits the number of page links that will be 
+	included in the page list at the bottom of the search 
+	results page. As of version 3.1.4, this will limit the 
+	total number of matching documents that are shown. 
+	You can make the number of page buttons smaller than the 
+	number of allowed pages by setting the 
+	<ref type="attr">maximum_page_buttons</ref> 
+	attribute. 
+     </description>
+   </attribute>
+
+   <attribute name="maximum_word_length" 
+              type="integer" 
+              programs="htdig htsearch" 
+              version="3.1.3" 
+              category="Indexing:What" >
+     <default>32</default>
+     <example>15</example>
+     <description> 
+	This sets the maximum length of words that will be 
+	indexed. Words longer than this value will be silently 
+	truncated when put into the index, or searched in the 
+	index. 
+     </description>
+   </attribute>
+
+   <attribute name="md5_db" 
+              type="string" 
+              programs="htdig" 
+              version="3.2.0b3" 
+              category="File Layout" >
+     <default>${database_base}.md5hash.db</default>
+     <example>${database_base}.md5.db</example>
+     <description> 
+	This file holds a database of md5 and date hashes of pages to 
+	catch and eliminate duplicates of pages. See also the 
+	<ref type="attr">check_unique_md5</ref> and 
+	<ref type="attr">check_unique_date</ref> attributes. 
+     </description>
+   </attribute>
+
+   <attribute name="meta_description_factor" 
+              type="number" 
+              programs="htsearch" 
+              version="3.1.0b1" 
+              category="Searching:Ranking" >
+     <default>50</default>
+     <example>20</example>
+     <description> 
+	This is a factor which will be used to multiply the 
+	weight of words in any META description tags in a document. 
+	The number may be a floating point number. See also the 
+	<ref type="attr">heading_factor</ref> attribute and the 
+	<ref type="attr">description_factor</ref> attribute. 
+     </description>
+   </attribute>
+
+   <attribute name="metaphone_db" 
+              type="string" 
+              programs="htfuzzy htsearch" 
+              version="all" 
+              category="File Layout" >
+     <default>${database_base}.metaphone.db</default>
+     <example>${database_base}.mp.db</example>
+     <description> 
+	The database file used for the fuzzy "metaphone" search 
+	algorithm. This database is created by 
+	<ref type="program">htfuzzy</ref> and used by 
+	<ref type="program">htsearch</ref>. 
+     </description>
+   </attribute>
+
+   <attribute name="method_names" 
+              type="quoted_string_list" 
+              programs="htsearch" 
+              version="all" 
+              category="Searching:UI" >
+     <default>and All or Any boolean Boolean</default>
+     <example>or Or and And</example>
+     <description> 
+	These values are used to create the <strong> 
+	method</strong> menu. It consists of pairs. The first 
+	element of each pair is one of the known methods, the 
+	second element is the text that will be shown in the 
+	menu for that method. This text needs to be quoted if 
+	it contains spaces. 
+	See the <a href="hts_selectors.html">select list documentation</a> 
+	for more information on how this attribute is used. 
+     </description>
+   </attribute>
+
+   <attribute name="mime_types" 
+              type="string" 
+              programs="htdig" 
+              version="3.2.0b1" 
+              category="Indexing:Where" >
+     <default>${config_dir}/mime.types</default>
+     <example>/etc/mime.types</example>
+     <description> 
+	This file is used by htdig for local file access and resolving 
+	file:// URLs to ensure the files are parsable. If you are running 
+	a webserver with its own MIME file, you should set this attribute 
+	to point to that file. 
+     </description>
+   </attribute>
+
+   <attribute name="minimum_prefix_length" 
+              type="integer" 
+              programs="htsearch" 
+              version="3.1.0b1" 
+              category="Searching:Method" >
+     <default>1</default>
+     <example>2</example>
+     <description> 
+	This sets the minimum length of prefix matches used by the 
+	"prefix" fuzzy matching algorithm. Words shorter than this 
+	will not be used in prefix matching. 
+     </description>
+   </attribute>
+
+   <attribute name="minimum_speling_length" 
+              type="integer" 
+              programs="htsearch" 
+              version="3.2.0b1" 
+              category="Searching:Method" >
+     <default>5</default>
+     <example>3</example>
+     <description> 
+	This sets the minimum length of words used by the 
+	"speling" fuzzy matching algorithm. Words shorter than this 
+	will not be used in this fuzzy matching. 
+     </description>
+   </attribute>
+
+   <attribute name="minimum_word_length" 
+              type="integer" 
+              programs="htdig htsearch" 
+              version="all" 
+              category="Indexing:What" >
+     <default>3</default>
+     <example>2</example>
+     <description> 
+	This sets the minimum length of words that will be 
+	indexed. Words shorter than this value will be silently 
+	ignored but still put into the excerpt.<br/> 
+	Note that by making this value less than 3, a lot more 
+	words that are very frequent will be indexed. It might 
+	be advisable to add some of these to the
+	<ref type="attr">bad_word_list</ref>. 
+     </description>
+   </attribute>
+
+   <attribute name="multimatch_factor" 
+              type="number" 
+              programs="htsearch" 
+              version="3.1.6" 
+              category="Searching:Ranking" >
+     <default>1</default>
+     <example>1000</example>
+     <description> 
+	This factor gives higher rankings to documents that have more than
+	one matching search word when the <strong>or</strong>
+	<ref type="attr">match_method</ref> is used.
+	In version 3.1.6, the matching words' combined scores were multiplied
+	by this factor for each additional matching word.  Currently, this
+	multiplier is applied at most once.
+     </description>
+   </attribute>
+
+   <attribute name="next_page_text" 
+              type="string" 
+              programs="htsearch" 
+              version="3.1.0" 
+              category="Presentation:Text" >
+     <default>[next]</default>
+     <example>&lt;img src="/htdig/buttonr.gif"&gt;</example>
+     <description> 
+	The text displayed in the hyperlink to go to the next 
+	page of matches. 
+     </description>
+   </attribute>
+
+   <attribute name="no_excerpt_show_top" 
+              type="boolean" 
+              programs="htsearch" 
+              version="3.1.0b3" 
+              category="Presentation:How" >
+     <default>false</default>
+     <example>yes</example>
+     <description> 
+	If no excerpt is available, this option will act the 
+	same as <ref type="attr">excerpt_show_top</ref>, that is, 
+	it will show the top of the document. 
+     </description>
+   </attribute>
+
+   <attribute name="no_excerpt_text" 
+              type="string" 
+              programs="htsearch" 
+              version="3.0" 
+              category="Presentation:Text" >
+     <default>&lt;em&gt;(None of the search words were found in the top of this document.)&lt;/em&gt;</default>
+     <example></example>
+     <description> 
+	This text will be displayed in place of the excerpt if 
+	there is no excerpt available. If this attribute is set 
+	to nothing (blank), the excerpt label will not be 
+	displayed in this case. 
+     </description>
+   </attribute>
+
+   <attribute name="no_next_page_text" 
+              type="string" 
+              programs="htsearch" 
+              version="3.0" 
+              category="Presentation:Text" >
+     <default>[next]</default>
+     <example></example>
+     <description> 
+	The text displayed where there would normally be a 
+	hyperlink to go to the next page of matches. 
+     </description>
+   </attribute>
+
+   <attribute name="no_page_list_header" 
+              type="string" 
+              programs="htsearch" 
+              version="3.0" 
+              category="Presentation:Text" >
+     <default></default>
+     <example>&lt;hr noshade size=2&gt;All results on this page.&lt;br&gt;</example>
+     <description> 
+	This text will be used as the value of the PAGEHEADER 
+	variable, for use in templates or the 
+	<ref type="attr">search_results_footer</ref> 
+	file, when all search results fit on a single page. 
+     </description>
+   </attribute>
+
+   <attribute name="no_page_number_text" 
+              type="quoted_string_list" 
+              programs="htsearch" 
+              version="3.0" 
+              category="Presentation:Text" >
+     <default></default>
+     <example>&lt;strong&gt;1&lt;/strong&gt; &lt;strong&gt;2&lt;/strong&gt; \ 
+				  &lt;strong&gt;3&lt;/strong&gt; &lt;strong&gt;4&lt;/strong&gt; \ 
+				  &lt;strong&gt;5&lt;/strong&gt; &lt;strong&gt;6&lt;/strong&gt; \ 
+				  &lt;strong&gt;7&lt;/strong&gt; &lt;strong&gt;8&lt;/strong&gt; \ 
+				  &lt;strong&gt;9&lt;/strong&gt; &lt;strong&gt;10&lt;/strong&gt; 
+</example>
+     <description> 
+	The text strings in this list will be used when putting 
+	together the PAGELIST variable, for use in templates or 
+	the <ref type="attr">search_results_footer</ref> 
+	file, when search results fit on more than page. The PAGELIST 
+	is the list of links at the bottom of the search results page. 
+	There should be as many strings in the list as there are 
+	pages allowed by the <ref type="attr">maximum_page_buttons</ref> 
+	attribute. If there are not enough, or the list is empty, 
+	the page numbers alone will be used as the text for the links. 
+	An entry from this list is used for the current page, as the 
+	current page is shown in the page list without a hypertext link, 
+	while entries from the <ref type="attr">page_number_text</ref> list are used for the links to other pages. 
+	The text strings can contain HTML tags to highlight page numbers 
+	or embed images. The strings need to be quoted if they contain 
+	spaces. 
+     </description>
+   </attribute>
+
+   <attribute name="no_prev_page_text" 
+              type="string" 
+              programs="htsearch" 
+              version="3.0" 
+              category="Presentation:Text" >
+     <default>[prev]</default>
+     <example></example>
+     <description> 
+	The text displayed where there would normally be a 
+	hyperlink to go to the previous page of matches. 
+     </description>
+   </attribute>
+
+   <attribute name="no_title_text" 
+              type="string" 
+              programs="htsearch" 
+              version="3.1.0" 
+              category="Presentation:Text" >
+     <default>filename</default>
+     <example>"No Title Found"</example>
+     <description> 
+	This specifies the text to use in search results when no 
+	title is found in the document itself. If it is set to 
+	filename, htsearch will use the name of the file itself, 
+	enclosed in brackets (e.g. [index.html]). 
+     </description>
+   </attribute>
+
+   <attribute name="noindex_end" 
+              type="string" 
+              programs="htdig" 
+              version="3.1.0" 
+              category="Indexing:What" >
+     <default>&lt;!--/htdig_noindex--&gt;</default>
+     <example>&lt;/SCRIPT&gt;</example>
+     <description> 
+	This string marks the end of a section of an HTML file that should be 
+	completely ignored when indexing. It works together with 
+	<ref type="attr">noindex_start</ref>. 
+	As in the defaults, this can be SGML comment  
+	declarations that can be inserted anywhere in the documents to exclude  
+	different sections from being indexed. However, existing tags can also be  
+	used; this is especially useful to exclude some sections from being indexed  
+	where the files to be indexed can not be edited. The example shows how 
+	SCRIPT sections in 'uneditable' documents can be skipped. 
+	Note that the match for this string is case insensitive. 
+     </description>
+   </attribute>
+
+   <attribute name="noindex_start" 
+              type="string" 
+              programs="htdig" 
+              version="3.1.0" 
+              category="Indexing:What" >
+     <default>&lt;!--htdig_noindex--&gt;</default>
+     <example>&lt;SCRIPT</example>
+     <description> 
+	This string marks the start of a section of an HTML file that should be 
+	completely ignored when indexing. It works together with 
+	<ref type="attr">noindex_end</ref>. 
+	As in the defaults, this can be SGML comment 
+	declarations that can be inserted anywhere in the documents to exclude 
+	different sections from being indexed. However, existing tags can also be 
+	used; this is especially useful to exclude some sections from being indexed 
+	where the files to be indexed can not be edited. The example shows how 
+	SCRIPT sections in 'uneditable' documents can be skipped; note how 
+	noindex_start does not contain an ending &gt;: this allows for all SCRIPT 
+	tags to be matched regardless of attributes defined (different types or 
+	languages). Note that the match for this string is case insensitive. 
+     </description>
+   </attribute>
+
+   <attribute name="nothing_found_file" 
+              type="string" 
+              programs="htsearch" 
+              version="all" 
+              category="Presentation:Files" >
+     <default>${common_dir}/nomatch.html</default>
+     <example>/www/searching/nothing.html</example>
+     <description> 
+	This specifies the file which contains the <code> 
+	HTML</code> text to display when no matches were found. 
+	The file should contain a complete <code>HTML</code> 
+	document.<br/> 
+	Note that this attribute could also be defined in 
+	terms of <ref type="attr">database_base</ref> to 
+	make is specific to the current search database. 
+     </description>
+   </attribute>
+
+   <attribute name="nph" 
+              type="boolean" 
+              programs="htsearch" 
+              version="3.2.0b2" 
+              category="Presentation:How" >
+     <default>false</default>
+     <example>true</example>
+     <description> 
+	This attribute determines whether htsearch sends out full HTTP 
+	headers as required for an NPH (non-parsed header) CGI. Some 
+	servers assume CGIs will act in this fashion, for example MS 
+	IIS. If your server does not send out full HTTP headers, you 
+	should set this to true. 
+     </description>
+   </attribute>
+
+   <attribute name="page_list_header" 
+              type="string" 
+              programs="htsearch" 
+              version="3.0" 
+              category="Presentation:Text" >
+     <default>&lt;hr noshade size=2&gt;Pages:&lt;br&gt;</default>
+     <example></example>
+     <description> 
+	This text will be used as the value of the PAGEHEADER 
+	variable, for use in templates or the 
+	<ref type="attr">search_results_footer</ref> 
+	file, when all search results fit on more than one page. 
+     </description>
+   </attribute>
+
+   <attribute name="page_number_separator" 
+              type="quoted_string_list" 
+              programs="htsearch" 
+              version="3.1.4" 
+              category="Presentation:Text" >
+     <default>" "</default>
+     <example>"&lt;/td&gt; &lt;td&gt;"</example>
+     <description> 
+	The text strings in this list will be used when putting 
+	together the PAGELIST variable, for use in templates or 
+	the <ref type="attr">search_results_footer</ref> 
+	file, when search results fit on more than page. The PAGELIST 
+	is the list of links at the bottom of the search results page. 
+	The strings in the list will be used in rotation, and will 
+	separate individual entries taken from 
+	<ref type="attr">page_number_text</ref> and 
+	<ref type="attr">no_page_number_text</ref>. 
+	There can be as many or as few strings in the list as you like. 
+	If there are not enough for the number of pages listed, it goes 
+	back to the start of the list. If the list is empty, a space is 
+	used. The text strings can contain HTML tags. The strings need 
+	to be quoted if they contain spaces, or to specify an empty string. 
+     </description>
+   </attribute>
+
+   <attribute name="page_number_text" 
+              type="quoted_string_list" 
+              programs="htsearch" 
+              version="3.0" 
+              category="Presentation:Text" >
+     <default></default>
+     <example>&lt;em&gt;1&lt;/em&gt; &lt;em&gt;2&lt;/em&gt; \ 
+				  &lt;em&gt;3&lt;/em&gt; &lt;em&gt;4&lt;/em&gt; \ 
+				  &lt;em&gt;5&lt;/em&gt; &lt;em&gt;6&lt;/em&gt; \ 
+				  &lt;em&gt;7&lt;/em&gt; &lt;em&gt;8&lt;/em&gt; \ 
+				  &lt;em&gt;9&lt;/em&gt; &lt;em&gt;10&lt;/em&gt; 
+</example>
+     <description> 
+	The text strings in this list will be used when putting 
+	together the PAGELIST variable, for use in templates or 
+	the <ref type="attr">search_results_footer</ref> 
+	file, when search results fit on more than page. The PAGELIST 
+	is the list of links at the bottom of the search results page. 
+	There should be as many strings in the list as there are 
+	pages allowed by the <ref type="attr">maximum_page_buttons</ref> 
+	attribute. If there are not enough, or the list is empty, 
+	the page numbers alone will be used as the text for the links. 
+	Entries from this list are used for the links to other pages, 
+	while an entry from the <ref type="attr">no_page_number_text</ref> list is used for the current page, as the 
+	current page is shown in the page list without a hypertext link. 
+	The text strings can contain HTML tags to highlight page numbers 
+	or embed images. The strings need to be quoted if they contain 
+	spaces. 
+     </description>
+   </attribute>
+
+   <attribute name="persistent_connections" 
+              type="boolean" 
+              programs="htdig" 
+              version="3.2.0b1" 
+              category="Indexing:Connection" 
+              block="Server" >
+     <default>true</default>
+     <example>false</example>
+     <description> 
+	If set to true, when servers make it possible, htdig can take advantage 
+	of persistent connections, as defined by HTTP/1.1 (<em>RFC2616</em>). This permits 
+	to reduce the number of open/close operations of connections, when retrieving 
+	a document with HTTP. 
+     </description>
+   </attribute>
+
+   <attribute name="plural_suffix" 
+              type="string" 
+              programs="htsearch" 
+              version="3.2.0b2" 
+              category="Presentation: Text" >
+     <default>s</default>
+     <example>en</example>
+     <description> 
+	Specifies the value of the PLURAL_MATCHES template 
+	variable used in the header, footer and template files. 
+	This can be used for localization for non-English languages 
+	where 's' is not the appropriate suffix. 
+     </description>
+   </attribute>
+
+   <attribute name="prefix_match_character" 
+              type="string" 
+              programs="htsearch" 
+              version="3.1.0b1" 
+              category="Searching:Method" >
+     <default>*</default>
+     <example>ing</example>
+     <description> 
+	A null prefix character means that prefix matching should be 
+	applied to every search word. Otherwise a match is 
+	returned only if the word does not end in the characters specified. 
+     </description>
+   </attribute>
+
+   <attribute name="prev_page_text" 
+              type="string" 
+              programs="htsearch" 
+              version="3.0" 
+              category="Presentation:Text" >
+     <default>[prev]</default>
+     <example>&lt;img src="/htdig/buttonl.gif"&gt;</example>
+     <description> 
+	The text displayed in the hyperlink to go to the 
+	previous page of matches. 
+     </description>
+   </attribute>
+
+   <attribute name="regex_max_words" 
+              type="integer" 
+              programs="htsearch" 
+              version="3.2.0b1" 
+              category="Searching:Method" >
+     <default>25</default>
+     <example>10</example>
+     <description> 
+	The "regex" fuzzy algorithm could potentially match a 
+	very large number of words. This value limits the 
+	number of words each regular expression can match. Note 
+	that this does not limit the number of documents that 
+	are matched in any way. 
+     </description>
+   </attribute>
+
+   <attribute name="remove_bad_urls" 
+              type="boolean" 
+              programs="htpurge" 
+              version="all" 
+              category="Indexing:How" 
+              block="Server" >
+     <default>true</default>
+     <example>true</example>
+     <description> 
+	If TRUE, htpurge will remove any URLs which were marked 
+	as unreachable by htdig from the database. If FALSE, it 
+	will not do this. When htdig is run in initial mode, 
+	documents which were referred to but could not be 
+	accessed should probably be removed, and hence this 
+	option should then be set to TRUE, however, if htdig is 
+	run to update the database, this may cause documents on 
+	a server which is temporarily unavailable to be 
+	removed. This is probably NOT what was intended, so 
+	hence this option should be set to FALSE in that case. 
+     </description>
+   </attribute>
+
+   <attribute name="remove_default_doc" 
+              type="string_list" 
+              programs="htdig" 
+              version="3.1.0" 
+              category="Indexing:How" >
+     <default>index.html</default>
+     <example>default.html default.htm index.html index.htm</example>
+     <description> 
+	Set this to the default documents in a directory used by the 
+	servers you are indexing. These document names will be stripped 
+	off of URLs when they are normalized, if one of these names appears 
+	after the final slash, to translate URLs like 
+	http://foo.com/index.html into http://foo.com/<br/> 
+	Note that you can disable stripping of these names during 
+	normalization by setting the list to an empty string. 
+	The list should only contain names that all servers you index 
+	recognize as default documents for directory URLs, as defined 
+	by the DirectoryIndex setting in Apache's srm.conf, for example. 
+	This only applies to http:// and https:// URLS. 
+     </description>
+   </attribute>
+
+   <attribute name="remove_unretrieved_urls" 
+              type="boolean" 
+              programs="htpurge" 
+              version="3.2.0b1" 
+              category="Indexing:How" 
+              block="Server" >
+     <default>false</default>
+     <example>true</example>
+     <description> 
+	If TRUE, htpurge will remove any URLs which were discovered 
+	and included as stubs in the database but not yet retrieved. If FALSE, it 
+	will not do this. When htdig is run in initial mode with no restrictions  
+	on hopcount or maximum documents, these should probably be removed and set 
+	to true. However, if you are hoping to index a small set of documents and  
+	eventually get to the rest, you should probably leave this as false. 
+     </description>
+   </attribute>
+
+   <attribute name="restrict" 
+              type="pattern_list" 
+              programs="htsearch" 
+              version="3.2.0b4" 
+              category="Searching:Method" >
+     <default></default>
+     <example>//www.acme.com/widgets/</example>
+     <description> 
+	This specifies a set of patterns that all URLs have to 
+	match against in order for them to be included in the search 
+	results. Any number of strings can be specified, separated by 
+	spaces. If multiple patterns are given, at least one of the 
+	patterns has to match the URL. The list can be specified 
+	from within the configuration file, and can be overridden 
+	with the "restrict" input parameter in the search form. Note 
+	that the restrict list does not take precedence over the 
+	<ref type="attr">exclude</ref> list - if a URL matches patterns 
+	in both lists it is still excluded from the search results. 
+     </description>
+   </attribute>
+
+   <attribute name="robotstxt_name" 
+              type="string" 
+              programs="htdig" 
+              version="3.0.7" 
+              category="Indexing:Out" 
+              block="Server" >
+     <default>htdig</default>
+     <example>myhtdig</example>
+     <description> 
+	Sets the name that htdig will look for when parsing 
+	robots.txt files. This can be used to make htdig appear 
+	as a different spider than ht://Dig. Useful to 
+	distinguish between a private and a global index. 
+     </description>
+   </attribute>
+
+   <attribute name="script_name" 
+              type="string" 
+              programs="htsearch" 
+              version="3.1.4" 
+              category="Presentation:Text" >
+     <default></default>
+     <example>/search/results.shtml</example>
+     <description> 
+	Overrides the value of the SCRIPT_NAME 
+	environment attribute. This is useful if 
+	htsearch is not being called directly as a CGI 
+	program, but indirectly from within a dynamic 
+	.shtml page using SSI directives. Previously, 
+	you needed a wrapper script to do this, but 
+	this configuration attribute makes wrapper 
+	scripts obsolete for SSI and possibly for 
+	other server scripting languages, as 
+	well. (You still need a wrapper script when 
+	using PHP, though.)<br/> 
+	Check out the <code>contrib/scriptname</code> 
+	directory for a small example. Note that this 
+	attribute also affects the value of the <a 
+	href="hts_templates.html#CGI">CGI</a> variable 
+	used in htsearch templates. 
+     </description>
+   </attribute>
+
+   <attribute name="search_algorithm" 
+              type="string_list" 
+              programs="htsearch" 
+              version="all" 
+              category="Searching:Method" >
+     <default>exact:1</default>
+     <example>0.3</example>
+     <description> 
+			Specifies the search algorithms and their weight to use 
+			when searching. Each entry in the list consists of the 
+			algorithm name, followed by a colon (:) followed by a 
+			weight multiplier. The multiplier is a floating point 
+			number between 0 and 1. Note that depending on your 
+			<ref type="attr">locale</ref> setting, and whether your 
+			system's locale implementation affects floating point 
+			input, you may need to specify the decimal point as a 
+			comma rather than a period.<br/> 
+			<strong>Note:</strong>If the exact  
+			method is not listed, the search may not work since the  
+			original terms will not be used.<br/> 
+			Current algorithms supported are: 
+			<dl> 
+			  <dt> 
+				exact 
+			  </dt> 
+			  <dd> 
+				The default exact word matching algorithm. This 
+				will find only exactly matched words. 
+			  </dd> 
+			  <dt> 
+				soundex 
+			  </dt> 
+			  <dd> 
+				Uses a slightly modified soundex algorithm to match 
+				words. This requires that the soundex database be 
+				present. It is generated with the 
+				<ref type="program">htfuzzy</ref> program. 
+			  </dd> 
+			  <dt> 
+				metaphone 
+			  </dt> 
+			  <dd> 
+				Uses the metaphone algorithm for matching words. 
+				This algorithm is more specific to the english 
+				language than soundex. It requires the metaphone 
+				database, which is generated with the <ref type="program">htfuzzy</ref> program. 
+			  </dd> 
+			  <dt> 
+				accents 
+			  </dt> 
+			  <dd> 
+				Uses the accents algorithm for matching words. 
+				This algorithm will treat all accented letters 
+				as equivalent to their unaccented counterparts. 
+				It requires the accents database, which is 
+				generated with the <ref type="program">htfuzzy</ref> program. 
+			  </dd> 
+			  <dt> 
+				endings 
+			  </dt> 
+			  <dd> 
+				This algorithm uses language specific word endings 
+				to find matches. Each word is first reduced to its 
+				word root and then all known legal endings are used 
+				for the matching. This algorithm uses two databases 
+				which are generated with <ref type="program">htfuzzy</ref>. 
+			  </dd> 
+			  <dt> 
+				synonyms 
+			  </dt> 
+			  <dd> 
+				Performs a dictionary lookup on all the words. This 
+				algorithm uses a database generated with the <ref type="program">htfuzzy</ref> program. 
+			  </dd> 
+			<dt> 
+			substring 
+			</dt> 
+			<dd> 
+			  Matches all words containing the queries as 
+			  substrings. Since this requires checking every word in 
+			  the database, this can really slow down searches 
+			  considerably. 
+			</dd> 
+			<dt> 
+			  prefix 
+			</dt> 
+			<dd> 
+			  Matches all words beginning with the query 
+			  strings. Uses the option <ref type="attr">prefix_match_character</ref> 
+			  to decide whether a query requires prefix 
+			  matching. For example "abc*" would perform prefix 
+			  matching on "abc" since * is the default 
+			  prefix_match_character. 
+			</dd> 
+			<dt> 
+			regex 
+			</dt> 
+			<dd> 
+			  Matches all words that match the patterns given as regular  
+			  expressions. Since this requires checking every word in 
+			  the database, this can really slow down searches 
+			  considerably. 
+			</dd> 
+			<dt> 
+			speling 
+			</dt> 
+			<dd> 
+			  A simple fuzzy algorithm that tries to find one-off spelling  
+			  mistakes, such as transposition of two letters or an extra character. 
+			  Since this usually generates just a few possibilities, it is  
+			  relatively quick. 
+			</dd> 
+			</dl> 
+     </description>
+   </attribute>
+
+   <attribute name="search_results_footer" 
+              type="string" 
+              programs="htsearch" 
+              version="all" 
+              category="Presentation:Files" >
+     <default>${common_dir}/footer.html</default>
+     <example>/usr/local/etc/ht/end-stuff.html</example>
+     <description> 
+			This specifies a filename to be output at the end of 
+			search results. While outputting the footer, some 
+			variables will be expanded. Variables use the same 
+			syntax as the Bourne shell. If there is a variable VAR, 
+			the following will all be recognized: 
+			<ul> 
+			  <li> 
+				$VAR 
+			  </li> 
+			  <li> 
+				$(VAR) 
+			  </li> 
+			  <li> 
+				${VAR} 
+			  </li> 
+			</ul> 
+	The following variables are available.  See 
+	<a href="hts_template.html">hts_template.html</a> for a complete 
+	list. 
+			<dl> 
+			  <dt> 
+				MATCHES 
+			  </dt> 
+			  <dd> 
+				The number of documents that were matched. 
+			  </dd> 
+			  <dt> 
+				PLURAL_MATCHES 
+			  </dt> 
+			  <dd> 
+				If MATCHES is not 1, this will be the string "s", 
+				else it is an empty string. This can be used to say 
+				something like "$(MATCHES) 
+				document$(PLURAL_MATCHES) were found" 
+			  </dd> 
+			  <dt> 
+				MAX_STARS 
+			  </dt> 
+			  <dd> 
+				The value of the <ref type="attr">max_stars</ref> 
+				attribute. 
+			  </dd> 
+			  <dt> 
+				LOGICAL_WORDS 
+			  </dt> 
+			  <dd> 
+				A string of the search words with either "and" or 
+				"or" between the words, depending on the type of 
+				search. 
+			  </dd> 
+			  <dt> 
+				WORDS 
+			  </dt> 
+			  <dd> 
+				A string of the search words with spaces in 
+				between. 
+			  </dd> 
+			  <dt> 
+				PAGEHEADER 
+			  </dt> 
+			  <dd> 
+				This expands to either the value of the 
+				<ref type="attr">page_list_header</ref> or 
+				<ref type="attr">no_page_list_header</ref> 
+				attribute depending on how many pages there are. 
+			  </dd> 
+			</dl> 
+			Note that this file will <strong>NOT</strong> be output 
+			if no matches were found. In this case the 
+			<ref type="attr">nothing_found_file</ref> 
+			attribute is used instead. 
+			Also, this file will not be output if it is 
+			overridden by defining the 
+			<ref type="attr">search_results_wrapper</ref> 
+			attribute. 
+     </description>
+   </attribute>
+
+   <attribute name="search_results_header" 
+              type="string" 
+              programs="htsearch" 
+              version="all" 
+              category="Presentation:Files" >
+     <default>${common_dir}/header.html</default>
+     <example>/usr/local/etc/ht/start-stuff.html</example>
+     <description> 
+			This specifies a filename to be output at the start of 
+			search results. While outputting the header, some 
+			variables will be expanded. Variables use the same 
+			syntax as the Bourne shell. If there is a variable VAR, 
+			the following will all be recognized: 
+			<ul> 
+			  <li> 
+				$VAR 
+			  </li> 
+			  <li> 
+				$(VAR) 
+			  </li> 
+			  <li> 
+				${VAR} 
+			  </li> 
+			</ul> 
+	The following variables are available.  See 
+	<a href="hts_template.html">hts_template.html</a> for a complete 
+	list. 
+	<!-- Do these need to be listed for both _footer and _header? --> 
+			<dl> 
+			  <dt> 
+				MATCHES 
+			  </dt> 
+			  <dd> 
+				The number of documents that were matched. 
+			  </dd> 
+			  <dt> 
+				PLURAL_MATCHES 
+			  </dt> 
+			  <dd> 
+				If MATCHES is not 1, this will be the string "s", 
+				else it is an empty string. This can be used to say 
+				something like "$(MATCHES) 
+				document$(PLURAL_MATCHES) were found" 
+			  </dd> 
+			  <dt> 
+				MAX_STARS 
+			  </dt> 
+			  <dd> 
+				The value of the <ref type="attr">max_stars</ref> 
+				attribute. 
+			  </dd> 
+			  <dt> 
+				LOGICAL_WORDS 
+			  </dt> 
+			  <dd> 
+				A string of the search words with either "and" or 
+				"or" between the words, depending on the type of 
+				search. 
+			  </dd> 
+			  <dt> 
+				WORDS 
+			  </dt> 
+			  <dd> 
+				A string of the search words with spaces in 
+				between. 
+			  </dd> 
+			</dl> 
+			Note that this file will <strong>NOT</strong> be output 
+			if no matches were found. In this case the 
+			<ref type="attr">nothing_found_file</ref> 
+			attribute is used instead. 
+			Also, this file will not be output if it is 
+			overridden by defining the 
+			<ref type="attr">search_results_wrapper</ref> 
+			attribute. 
+     </description>
+   </attribute>
+
+   <attribute name="search_results_order" 
+              type="string_list" 
+              programs="htsearch" 
+              version="3.2.0b2" 
+              category="Searching:Ranking" >
+     <default></default>
+     <example>/docs/|faq.html * /maillist/ /testresults/</example>
+     <description> 
+	This specifies a list of patterns for URLs in 
+	search results.  Results will be displayed in the 
+	specified order, with the search algorithm result 
+	as the second order.  Remaining areas, that do not 
+	match any of the specified patterns, can be placed 
+	by using * as the pattern.  If no * is specified, 
+	one will be implicitly placed at the end of the 
+	list.<br/> 
+	See also <ref type="attr">url_seed_score</ref>. 
+     </description>
+   </attribute>
+
+   <attribute name="search_results_wrapper" 
+              type="string" 
+              programs="htsearch" 
+              version="3.1.0" 
+              category="Presentation:Files" >
+     <default></default>
+     <example>${common_dir}/wrapper.html</example>
+     <description> 
+	This specifies a filename to be output at the start and 
+	end of search results. This file replaces the 
+	<ref type="attr">search_results_header</ref> and 
+	<ref type="attr">search_results_footer</ref> 
+	files, with the contents of both in one file, and uses the 
+	pseudo-variable <strong>$(HTSEARCH_RESULTS)</strong> as a 
+	separator for the header and footer sections. 
+	If the filename is not specified, the file is unreadable, 
+	or the pseudo-variable above is not found, htsearch reverts 
+	to the separate header and footer files instead. 
+	While outputting the wrapper, 
+	some variables will be expanded, just as for the 
+	<ref type="attr">search_results_header</ref> and 
+	<ref type="attr">search_results_footer</ref> 
+	files.<br/> 
+	Note that this file will <strong>NOT</strong> be output 
+	if no matches were found. In this case the 
+	<ref type="attr">nothing_found_file</ref> 
+	attribute is used instead. 
+     </description>
+   </attribute>
+
+   <attribute name="search_rewrite_rules" 
+              type="string list" 
+              programs="htsearch" 
+              version="3.1.6" 
+              category="URLs" >
+     <default></default>
+     <example> http://(.*)\\.mydomain\\.org/([^/]*)  http://\\2.\\1.com \
+	       http://www\\.myschool\\.edu/myorgs/([^/]*)  http://\\1.org
+     </example>
+     <description> 
+	This is a list of pairs, <em>regex</em> <em>replacement</em>, used
+	to rewrite URLs in the search results. The left hand string is a
+	regular expression; the right hand string is a literal string with
+	embedded placeholders for fragments that matched inside brackets in the
+	regular expression. \0 is the whole matched string, \1 to \9 are
+	bracketted substrings. The backslash must be doubled-up in the
+	attribute setting to get past the variable expansion parsing. Rewrite
+	rules are applied sequentially to each URL before it is displayed
+	or checked against the <ref type="attr">restrict</ref> or
+	<ref type="attr">exclude</ref> lists. Rewriting does not stop once a
+	match has been made, so multiple rules may affect a given URL. See
+	also <ref type="attr">url_part_aliases</ref> which allows URLs
+	to be of one form during indexing and translated for results,
+	and <ref type="attr">url_rewrite_rules</ref> which allows URLs
+	to be rewritten while indexing.
+     </description>
+   </attribute>
+
+   <attribute name="server_aliases" 
+              type="string_list" 
+              programs="htdig" 
+              version="3.1.0b2" 
+              category="Indexing:Where" >
+     <default></default>
+     <example>foo.mydomain.com:80=www.mydomain.com:80 \ 
+				  bar.mydomain.com:80=www.mydomain.com:80 
+</example>
+     <description> 
+	This attribute tells the indexer that servers have several 
+	DNS aliases, which all point to the same machine and are NOT 
+	virtual hosts. This allows you to ensure pages are indexed 
+	only once on a given machine, despite the alias used in a URL. 
+	As shown in the example, the mapping goes from left to right, 
+	so the server name on the right hand side is the one that is 
+	used. As of version 3.1.3, the port number is optional, and is 
+	assumed to be 80 if omitted. There is no easy way to map all 
+	ports from one alias to another without listing them all. 
+     </description>
+   </attribute>
+
+   <attribute name="server_max_docs" 
+              type="integer" 
+              programs="htdig" 
+              version="3.1.0b3" 
+              category="Indexing:Where" 
+              block="Server" >
+     <default>-1</default>
+     <example>50</example>
+     <description> 
+	This attribute tells htdig to limit the dig to retrieve a maximum 
+	number of documents from each server. This can cause 
+	unusual behavior on update digs since the old URLs are 
+	stored alphabetically. Therefore, update digs will add 
+	additional URLs in pseudo-alphabetical order, up to the 
+	limit of the attribute. However, it is most useful to 
+	partially index a server as the URLs of additional 
+	documents are entered into the database, marked as never 
+	retrieved.<br/> 
+	A value of -1 specifies no limit. 
+     </description>
+   </attribute>
+
+   <attribute name="server_wait_time" 
+              type="integer" 
+              programs="htdig" 
+              version="3.1.0b3" 
+              category="Indexing:Connection" 
+              block="Server" >
+     <default>0</default>
+     <example>20</example>
+     <description> 
+	This attribute tells htdig to ensure a server has had a 
+	delay (in seconds) from the beginning of the last 
+	connection. This can be used to prevent "server abuse" 
+	by digging without delay. It's recommended to set this 
+	to 10-30 (seconds) when indexing servers that you don't 
+	monitor yourself. Additionally, this attribute can slow 
+	down local indexing if set, which may or may not be what 
+	you intended. 
+     </description>
+   </attribute>
+
+   <attribute name="sort" 
+              type="string" 
+              programs="htsearch" 
+              version="3.1.0" 
+              category="Presentation:How" >
+     <default>score</default>
+     <example>revtime</example>
+     <description> 
+	This is the default sorting method that htsearch 
+	uses to determine the order in which matches are displayed. 
+	The valid choices are: 
+	<table border="0"> 
+	<tr> 
+	<td> 
+	<ul> 
+	     <li> score </li> 
+	     <li> time </li> 
+	     <li> title </li> 
+	</ul> 
+	</td> 
+	<td> 
+	<ul> 
+	     <li> revscore </li> 
+	     <li> revtime </li> 
+	     <li> revtitle </li> 
+	</ul> 
+	</td> 
+	</tr> 
+	</table> 
+	This attribute will only be used if the HTML form that 
+	calls htsearch didn't have the <strong>sort</strong> 
+	value set. The words date and revdate can be used instead 
+	of time and revtime, as both will sort by the time that 
+	the document was last modified, if this information is 
+	given by the server. The default is to sort by the score, 
+	which ranks documents by best match. The sort methods that 
+	begin with "rev" simply reverse the order of the 
+	sort. Note that setting this to something other than 
+	"score" will incur a slowdown in searches. 
+     </description>
+   </attribute>
+
+   <attribute name="sort_names" 
+              type="quoted_string_list" 
+              programs="htsearch" 
+              version="3.1.0" 
+              category="Searching:UI" >
+     <default>score Score time Time title Title revscore 'Reverse Score' revtime 'Reverse Time' revtitle 'Reverse Title'</default>
+     <example>score 'Best Match' time Newest title A-Z \ 
+				  revscore 'Worst Match' revtime Oldest revtitle Z-A 
+</example>
+     <description> 
+	These values are used to create the <strong> 
+	sort</strong> menu. It consists of pairs. The first 
+	element of each pair is one of the known sort methods, the 
+	second element is the text that will be shown in the 
+	menu for that sort method. This text needs to be quoted if 
+	it contains spaces. 
+	See the <a href="hts_selectors.html">select list documentation</a> 
+	for more information on how this attribute is used. 
+     </description>
+   </attribute>
+
+   <attribute name="soundex_db" 
+              type="string" 
+              programs="htfuzzy htsearch" 
+              version="all" 
+              category="File Layout" >
+     <default>${database_base}.soundex.db</default>
+     <example>${database_base}.snd.db</example>
+     <description> 
+	The database file used for the fuzzy "soundex" search 
+	algorithm. This database is created by 
+	<ref type="program">htfuzzy</ref> and used by 
+	<ref type="program">htsearch</ref>. 
+     </description>
+   </attribute>
+
+   <attribute name="star_blank" 
+              type="string" 
+              programs="htsearch" 
+              version="all" 
+              category="Presentation:Text" >
+     <default>${image_url_prefix}/star_blank.gif</default>
+     <example>//www.somewhere.org/icons/noelephant.gif</example>
+     <description> 
+	This specifies the URL to use to display a blank of the 
+	same size as the star defined in the 
+	<ref type="attr">star_image</ref> attribute or in the 
+	<ref type="attr">star_patterns</ref> attribute. 
+     </description>
+   </attribute>
+
+   <attribute name="star_image" 
+              type="string" 
+              programs="htsearch" 
+              version="all" 
+              category="Presentation:Text" >
+     <default>${image_url_prefix}/star.gif</default>
+     <example>//www.somewhere.org/icons/elephant.gif</example>
+     <description> 
+	This specifies the URL to use to display a star. This 
+	allows you to use some other icon instead of a star. 
+	(We like the star...)<br/> 
+	The display of stars can be turned on or off with the 
+	<ref type="attr">use_star_image</ref> 
+	attribute and the maximum number of stars that can be 
+	displayed is determined by the 
+	<ref type="attr">max_stars</ref> attribute.<br/> 
+	Even though the image can be changed, the ALT value 
+	for the image will always be a '*'. 
+     </description>
+   </attribute>
+
+   <attribute name="star_patterns" 
+              type="string_list" 
+              programs="htsearch" 
+              version="3.0" 
+              category="Presentation:How" >
+     <default></default>
+     <example>http://www.sdsu.edu /sdsu.gif \ 
+				  http://www.ucsd.edu /ucsd.gif 
+</example>
+     <description> 
+	This attribute allows the star image to be changed 
+	depending on the URL or the match it is used for. This 
+	is mainly to make a visual distinction between matches 
+	on different web sites. The star image could be 
+	replaced with the logo of the company the match refers 
+	to.<br/> 
+	It is advisable to keep all the images the same size 
+	in order to line things up properly in a short result 
+	listing.<br/> 
+	The format is simple. It is a list of pairs. The first 
+	element of each pair is a pattern, the second element 
+	is a URL to the image for that pattern. 
+     </description>
+   </attribute>
+
+   <attribute name="startday" 
+              type="integer" 
+              programs="htsearch" 
+              version="3.1.6" 
+              category="Searching:Method" >
+     <default></default>
+     <example>1</example>
+     <description> 
+	Day component of first date allowed as last-modified date 
+	of returned docutments. 
+	This is most usefully specified as a 
+	<a href="hts_form.html#startyear">GCI argument</a>. 
+	See also <ref type="attr">startyear</ref>. 
+     </description>
+   </attribute>
+
+   <attribute name="start_ellipses" 
+              type="string" 
+              programs="htsearch" 
+              version="all" 
+              category="Presentation:Text" >
+     <default>&lt;strong&gt;&lt;code&gt;... &lt;/code&gt;&lt;/strong&gt;</default>
+     <example>...</example>
+     <description> 
+	When excerpts are displayed in the search output, this 
+	string will be prepended to the excerpt if there is 
+	text before the text displayed. This is just a visual 
+	reminder to the user that the excerpt is only part of 
+	the complete document. 
+     </description>
+   </attribute>
+
+   <attribute name="start_highlight" 
+              type="string" 
+              programs="htsearch" 
+              version="3.1.4" 
+              category="Presentation:Text" >
+     <default>&lt;strong&gt;</default>
+     <example>&lt;font color="#FF0000"&gt;</example>
+     <description> 
+	When excerpts are displayed in the search output, matched 
+	words will be highlighted using this string and 
+	<ref type="attr">end_highlight</ref>. 
+	You should ensure that highlighting tags are balanced, 
+	that is, any formatting tags that this string 
+	opens should be closed by end_highlight. 
+     </description>
+   </attribute>
+
+   <attribute name="startmonth" 
+              type="integer" 
+              programs="htsearch" 
+              version="3.1.6" 
+              category="Searching:Method" >
+     <default></default>
+     <example>1</example>
+     <description> 
+	Month component of first date allowed as last-modified date 
+	of returned docutments. 
+	This is most usefully specified as a 
+	<a href="hts_form.html#startyear">GCI argument</a>. 
+	See also <ref type="attr">startyear</ref>. 
+     </description>
+   </attribute>
+
+   <attribute name="start_url" 
+              type="string_list" 
+              programs="htdig" 
+              version="all" 
+              category="Indexing:Where" >
+     <default>http://www.htdig.org/</default>
+     <example>//www.somewhere.org/alldata/index.html</example>
+     <description> 
+	This is the list of URLs that will be used to start a 
+	dig when there was no existing database. Note that 
+	multiple URLs can be given here. 
+	<br/>Note also that the value of <em>start_url</em>
+	will be the default value for
+	<href type="attr">limit_urls_to</ref>, so if
+	you set start_url to the URLs for specific files,
+	rather than a site or subdirectory URL, you may need
+	to set limit_urls_to to something less restrictive
+	so htdig doesn't reject links in the documents.
+     </description>
+   </attribute>
+
+   <attribute name="startyear" 
+              type="integer" 
+              programs="htsearch" 
+              version="3.1.6" 
+              category="Searching:Method" >
+     <default>1970</default>
+     <example>2001</example>
+     <description> 
+	This specifies the year of the cutoff start date for 
+	search results. If the start or end date are specified, 
+	only results with a last modified date within this 
+	range are shown. 
+	See also <ref type="attr">startday</ref>, 
+	<ref type="attr">startmonth</ref>, 
+	<ref type="attr">endday</ref>, 
+	<ref type="attr">endmonth</ref>, 
+	<a href="endyear">endyear</a>. 
+	These are most usefully specified as a 
+	<a href="hts_form.html#startyear">GCI argument</a>.<br/> 
+	For each component, if a negative number is given, 
+	it is taken as relative to the current date. 
+	Relative days can span several months or even years if desired, 
+	and relative months can span several years. A startday of 
+	-90 will select matching documents modified within 
+	the last 90 days. 
+     </description>
+   </attribute>
+
+   <attribute name="substring_max_words" 
+              type="integer" 
+              programs="htsearch" 
+              version="3.0.8b1" 
+              category="Searching:Method" >
+     <default>25</default>
+     <example>100</example>
+     <description> 
+	The Substring fuzzy algorithm could potentially match a 
+	very large number of words. This value limits the 
+	number of words each substring pattern can match. Note 
+	that this does not limit the number of documents that 
+	are matched in any way. 
+     </description>
+   </attribute>
+
+   <attribute name="synonym_db" 
+              type="string" 
+              programs="htsearch htfuzzy" 
+              version="3.0" 
+              category="File Layout" >
+     <default>${common_dir}/synonyms.db</default>
+     <example>${database_base}.syn.db</example>
+     <description> 
+	Points to the database that <ref type="program">htfuzzy</ref> creates when the <strong>synonyms</strong> 
+	algorithm is used.<br/> 
+	<ref type="program">htsearch</ref> 
+	uses this to perform synonym dictionary lookups. 
+     </description>
+   </attribute>
+
+   <attribute name="synonym_dictionary" 
+              type="string" 
+              programs="htfuzzy" 
+              version="3.0" 
+              category="File Layout" >
+     <default>${common_dir}/synonyms</default>
+     <example>/usr/dict/synonyms</example>
+     <description> 
+	This points to a text file containing the synonym 
+	dictionary used for the synonyms search algorithm.<br/> 
+	Each line of this file has at least two words. The 
+	first word is the word to replace, the rest of the 
+	words are synonyms for that word. 
+     </description>
+   </attribute>
+
+   <attribute name="syntax_error_file" 
+              type="string" 
+              programs="htsearch" 
+              version="all" 
+              category="Presentation:Files" >
+     <default>${common_dir}/syntax.html</default>
+     <example>${common_dir}/synerror.html</example>
+     <description> 
+	This points to the file which will be displayed if a 
+	boolean expression syntax error was found. 
+     </description>
+   </attribute>
+
+   <attribute name="tcp_max_retries" 
+              type="integer" 
+              programs="htdig" 
+              version="3.2.0b1" 
+              category="Indexing:Connection" 
+              block="Server" >
+     <default>1</default>
+     <example>6</example>
+     <description> 
+	 This option set the maximum number of attempts when a connection 
+	 <ref type="attr">timeout</ref>s. 
+	 After all these retries, the connection attempt results &lt;timed out&gt;. 
+     </description>
+   </attribute>
+
+   <attribute name="tcp_wait_time" 
+              type="integer" 
+              programs="htdig" 
+              version="3.2.0b1" 
+              category="Indexing:Connection" 
+              block="Server" >
+     <default>5</default>
+     <example>10</example>
+     <description> 
+	 This attribute sets the wait time (in seconds) after a connection 
+	 fails and the <ref type="attr">timeout</ref> is raised. 
+     </description>
+   </attribute>
+
+   <attribute name="template_map" 
+              type="quoted_string_list" 
+              programs="htsearch" 
+              version="3.0" 
+              category="Presentation:Files,Searching:UI" >
+     <default>Long builtin-long builtin-long Short builtin-short builtin-short</default>
+     <example>Short short ${common_dir}/short.html \ 
+				  Normal normal builtin-long \ 
+				  Detailed detail ${common_dir}/detail.html 
+</example>
+     <description> 
+	This maps match template names to internal names and 
+	template file names. It is a list of triplets. The 
+	first element in each triplet is the name that will be 
+	displayed in the FORMAT menu. The second element is the 
+	name used internally and the third element is a 
+	filename of the template to use.<br/> 
+	There are two predefined templates, namely <strong> 
+	builtin-long</strong> and <strong> 
+	builtin-short</strong>. If the filename is one of 
+	those, they will be used instead.<br/> 
+	More information about templates can be found in the 
+	<ref type="program">htsearch</ref> 
+	documentation.  The particular template is selecterd by the 
+	<a href="hts_form.html#format">format</a> cgi argument, and the 
+	default is given by <ref type="attr">template_name</ref> in 
+	the config file. 
+     </description>
+   </attribute>
+
+   <attribute name="template_name" 
+              type="string" 
+              programs="htsearch" 
+              version="3.0" 
+              category="Searching:UI,Presentation:How" >
+     <default>builtin-long</default>
+     <example>long</example>
+     <description> 
+	Specifies the default template if no
+	<a href="hts_form.html#format">format</a> field is given by the 
+	search form. This needs to map to the 
+	<ref type="attr">template_map</ref>. 
+     </description>
+   </attribute>
+
+   <attribute name="template_patterns" 
+              type="string_list" 
+              programs="htsearch" 
+              version="3.1.4" 
+              category="Presentation:How" >
+     <default></default>
+     <example>http://www.sdsu.edu ${common_dir}/sdsu.html \ 
+				  http://www.ucsd.edu ${common_dir}/ucsd.html 
+</example>
+     <description> 
+	This attribute allows the results template to be changed 
+	depending on the URL or the match it is used for. This 
+	is mainly to make a visual distinction between matches 
+	on different web sites. The results for each site could 
+	thus be shown in a style matching that site.<br/> 
+	The format is simply a list of pairs. The first 
+	element of each pair is a pattern, the second element 
+	is the name of the template file for that pattern.<br/> 
+	More information about templates can be found in the 
+	<ref type="program">htsearch</ref> 
+	documentation.<br/> 
+	Normally, when using this template selection method, you 
+	would disable user selection of templates via the <strong>format</strong> 
+	input parameter in search forms, as the two methods were not 
+	really designed to interact. Templates selected by URL patterns 
+	would override any user selection made in the form. If you want 
+	to use the two methods together, see the notes on 
+	<a href="hts_selectors.html#template_patterns">combining</a> 
+	them for an example of how to do this. 
+     </description>
+   </attribute>
+
+   <attribute name="text_factor" 
+              type="number" 
+              programs="htsearch" 
+              version="3.0" 
+              category="Searching:Ranking" >
+     <default>1</default>
+     <example>0</example>
+     <description> 
+	This is a factor which will be used to multiply the 
+	weight of words that are not in any special part of a 
+	document. Setting a factor to 0 will cause normal words 
+	to be ignored. The number may be a floating point 
+	number. See also the <ref type="attr">heading_factor</ref> 
+	attribute. 
+     </description>
+   </attribute>
+
+   <attribute name="timeout" 
+              type="integer" 
+              programs="htdig" 
+              version="all" 
+              category="Indexing:Connection" 
+              block="Server" >
+     <default>30</default>
+     <example>42</example>
+     <description> 
+	Specifies the time the digger will wait to complete a 
+	network read. This is just a safeguard against 
+	unforeseen things like the all too common 
+	transformation from a network to a notwork.<br/> 
+	The timeout is specified in seconds. 
+     </description>
+   </attribute>
+
+   <attribute name="title_factor" 
+              type="number" 
+              programs="htsearch" 
+              version="all" 
+              category="Searching:Ranking" >
+     <default>100</default>
+     <example>12</example>
+     <description> 
+	This is a factor which will be used to multiply the 
+	weight of words in the title of a document. Setting a 
+	factor to 0 will cause words in the title to be 
+	ignored. The number may be a floating point number. See 
+	also the <ref type="attr">heading_factor</ref> attribute. 
+     </description>
+   </attribute>
+
+   <attribute name="url_list" 
+              type="string" 
+              programs="htdig" 
+              version="all" 
+              category="Extra Output" >
+     <default>${database_base}.urls</default>
+     <example>/tmp/urls</example>
+     <description> 
+	This file is only created if 
+	<ref type="attr">create_url_list</ref> is set to 
+	true. It will contain a list of all URLs that were 
+	seen. 
+     </description>
+   </attribute>
+
+   <attribute name="url_log" 
+              type="string" 
+              programs="htdig" 
+              version="3.1.0" 
+              category="Extra Output" >
+     <default>${database_base}.log</default>
+     <example>/tmp/htdig.progress</example>
+     <description> 
+	If <ref type="program">htdig</ref> is run with the -l option 
+	and interrupted, it will write out its progress to this 
+	file. Note that if it has a large number of URLs to write, 
+	it may take some time to exit. This can especially happen 
+	when running update digs and the run is interrupted soon 
+	after beginning. 
+     </description>
+   </attribute>
+
+   <attribute name="url_part_aliases" 
+              type="string_list" 
+              programs="all" 
+              version="3.1.0" 
+              category="URLs" >
+     <default></default>
+     <example>http://search.example.com/~htdig *site \ 
+				   http://www.htdig.org/this/ *1 \ 
+				   .html *2
+     </example>
+     <example>http://www.htdig.org/ *site \ 
+				   http://www.htdig.org/that/ *1 \ 
+				   .htm *2 
+</example>
+     <description> 
+	A list of translations pairs <em>from</em> and 
+	<em>to</em>, used when accessing the database. 
+	If a part of an URL matches with the 
+	<em>from</em>-string of each pair, it will be 
+	translated into the <em>to</em>-string just before 
+	writing the URL to the database, and translated 
+	back just after reading it from the database.<br/> 
+	This is primarily used to provide an easy way to 
+	rename parts of URLs for e.g. changing 
+	www.example.com/~htdig to www.htdig.org.  Two 
+	different configuration files for digging and 
+	searching are then used, with url_part_aliases 
+	having different <em>from</em> strings, but 
+	identical <em>to</em>-strings.<br/> 
+	See also <ref type="attr">common_url_parts</ref>.<br/> 
+	Strings that are normally incorrect in URLs or 
+	very seldom used, should be used as 
+	<em>to</em>-strings, since extra storage will be 
+	used each time one is found as normal part of a 
+	URL.  Translations will be performed with priority 
+	for the leftmost longest match.	 Each 
+	<em>to</em>-string must be unique and not be a 
+	part of any other <em>to</em>-string.<br/> 
+	Note that when this attribute is changed, the 
+	database should be rebuilt, unless the effect of 
+	"moving" the affected URLs in the database is 
+	wanted, as described above.<br/> 
+	<strong>Please note:</strong> Don't just copy the 
+	example below into a single configuration file. 
+	There are two separate settings of 
+	<em>url_part_aliases</em> below; the first one is 
+	for the configuration file to be used by htdig, 
+	htmerge, and htnotify, and the second one is for the 
+	configuration file to be used by htsearch. 
+     </description>
+   </attribute>
+
+   <attribute name="url_rewrite_rules" 
+              type="string_list" 
+              programs="htdig" 
+              version="3.2.0b3" 
+              category="URLs" >
+     <default></default>
+     <example>(.*)\\?JServSessionIdroot=.*		\\1 \ 
+			(.*)\\&amp;JServSessionIdroot=.*		\\1 \ 
+			(.*)&amp;context=.*				\\1</example>
+     <description> 
+	This is a list of pairs, <em>regex</em> <em>replacement</em> used to 
+	permanently rewrite URLs as they are indexed. The left hand string is 
+	a regex; the right hand string is  a literal string with embedded 
+	placeholders for fragments that matched  inside brackets in the 
+	regex. \0 is the whole matched string, \1 to \9 are  bracketted 
+	substrings. Rewrite rules are applied sequentially to each  
+	incoming URL  before normalization occurs. Rewriting does not stop 
+	once a match has been made, so multiple rules may affect a given URL. 
+	See also <ref type="attr">url_part_aliases</ref> which 
+	allows URLs to be of one  
+form during indexing and translated for results. 
+     </description>
+   </attribute>
+
+   <attribute name="url_seed_score" 
+              type="string_list" 
+              programs="htsearch" 
+              version="3.2.0b2" 
+              category="Searching::Ranking" >
+     <default></default>
+     <example>/mailinglist/ *.5-1e6  
+	      /docs/|/news/ *1.5  
+	      /testresults/ &quot;*.7 -200&quot;  
+	      /faq-area/ *2+10000</example>
+     <description> 
+	This is a list of pairs, <em>pattern</em> 
+	<em>formula</em>, used to weigh the score of 
+	hits, depending on the URL of the document.<br/> 
+	The <em>pattern</em> part is a substring to match 
+	against the URL.  Pipe ('|') characters can be 
+	used in the pattern to concatenate substrings for 
+	web-areas that have the same formula.<br/> 
+	The formula describes a <em>factor</em> and a 
+	<em>constant</em>, by which the hit score is 
+	weighed.  The <em>factor</em> part is multiplied 
+	to the original score, then the <em>constant</em> 
+	part is added.<br/> 
+	The format of the formula is the factor part: 
+	&quot;*<em>N</em>&quot; optionally followed by comma and 
+	spaces, followed by the constant part : 
+	&quot;+<em>M</em>&quot;, where the plus sign may be emitted 
+	for negative numbers.  Either part is optional, 
+	but must come in this order.<br/> 
+	The numbers <em>N</em> and <em>M</em> are floating 
+	point constants.<br/> 
+	More straightforward is to think of the format as 
+	&quot;newscore = oldscore*<em>N</em>+<em>M</em>&quot;, 
+	but with the &quot;newscore = oldscore&quot; part left out. 
+     </description>
+   </attribute>
+
+   <attribute name="url_text_factor" 
+              type="number" 
+              programs="htsearch" 
+              version="??" 
+              category="Searching:Ranking" >
+     <default>1</default>
+     <example>1</example>
+     <description> 
+	TO BE COMPLETED<br/> 
+	See also <ref type="attr">heading_factor</ref>. 
+     </description>
+   </attribute>
+
+   <attribute name="use_doc_date" 
+              type="boolean" 
+              programs="htdig" 
+              version="3.2.0b1" 
+              category="Indexing:How" >
+     <default>false</default>
+     <example>true</example>
+     <description> 
+	If set to true, htdig will use META date tags in documents, 
+	overriding the modification date returned by the server. 
+	Any documents that do not have META date tags will retain 
+	the last modified date returned by the server or found on 
+	the local file system. 
+     </description>
+   </attribute>
+
+   <attribute name="use_meta_description" 
+              type="boolean" 
+              programs="htsearch" 
+              version="3.1.0b1" 
+              category="Presentation:How" >
+     <default>false</default>
+     <example>true</example>
+     <description> 
+	If set to true, any META description tags will be used as 
+	excerpts by htsearch. Any documents that do not have META 
+	descriptions will retain their normal excerpts. 
+     </description>
+   </attribute>
+
+   <attribute name="use_star_image" 
+              type="boolean" 
+              programs="htsearch" 
+              version="all" 
+              category="Presentation:How" >
+     <default>true</default>
+     <example>no</example>
+     <description> 
+	If set to true, the <ref type="attr">star_image</ref> attribute is used to display upto 
+	<ref type="attr">max_stars</ref> images for 
+	each match. 
+     </description>
+   </attribute>
+
+   <attribute name="user_agent" 
+              type="string" 
+              programs="htdig" 
+              version="3.1.0b2" 
+              category="Indexing:Out" 
+              block="Server" >
+     <default>htdig</default>
+     <example>htdig-digger</example>
+     <description> 
+	This allows customization of the user_agent: field sent when 
+	the digger requests a file from a server. 
+     </description>
+   </attribute>
+
+   <attribute name="valid_extensions" 
+              type="string_list" 
+              programs="htdig" 
+              version="3.1.4" 
+              category="Indexing:Where" 
+              block="URL" >
+     <default></default>
+     <example>.html .htm .shtml</example>
+     <description> 
+	This is a list of extensions on URLs which are 
+	the only ones considered acceptable. This list is used to 
+	supplement the MIME-types that the HTTP server provides 
+	with documents. Some HTTP servers do not have a correct 
+	list of MIME-types and so can advertise certain 
+	documents as text while they are some binary format. 
+	If the list is empty, then all extensions are acceptable, 
+	provided they pass other criteria for acceptance or rejection. 
+	If the list is not empty, only documents with one of the 
+	extensions in the list are parsed. 
+	See also <ref type="attr">bad_extensions</ref>. 
+     </description>
+   </attribute>
+
+   <attribute name="valid_punctuation" 
+              type="string" 
+              programs="htdig htsearch" 
+              version="all" 
+              category="Indexing:What" >
+     <default>.-_/!#$%^&amp;'</default>
+     <example>-'</example>
+     <description> 
+	This is the set of characters which will be deleted 
+	from the document before determining what a word is. 
+	This means that if a document contains something like 
+	<code>Andrew's</code> the digger will see this as <code> 
+	Andrews</code>.<br/> 
+	The same transformation is performed on the keywords 
+	the search engine gets.<br/> 
+	See also the <ref type="attr">extra_word_characters</ref> 
+	attribute. 
+     </description>
+   </attribute>
+
+   <attribute name="version" 
+              type="string" 
+              programs="htsearch" 
+              version="all" 
+              category="Presentation:Text" >
+     <default configmacro="true">VERSION</default>
+     <example>3.2.0</example>
+     <description> 
+	This specifies the value of the VERSION 
+	variable which can be used in search templates. 
+	The default value of this attribute is determined 
+	at compile time, and will not normally be set 
+	in configuration files. 
+     </description>
+   </attribute>
+
+   <attribute name="word_db" 
+              type="string" 
+              programs="all" 
+              version="all" 
+              category="File Layout" >
+     <default>${database_base}.words.db</default>
+     <example>${database_base}.allwords.db</example>
+     <description> 
+	This is the main word database. It is an index of all 
+	the words to a list of documents that contain the 
+	words. This database can grow large pretty quickly. 
+     </description>
+   </attribute>
+
+   <attribute name="word_dump" 
+              type="string" 
+              programs="htdig htdump htload" 
+              version="3.2.0b1" 
+              category="File Layout" >
+     <default>${database_base}.worddump</default>
+     <example>/tmp/words.txt</example>
+     <description> 
+	This file is basically a text version of the file 
+	specified in <ref type="attr">word_db</ref>. Its 
+	only use is to have a human readable database of all 
+	words. The file is easy to parse with tools like 
+	perl or tcl. 
+     </description>
+   </attribute>
+
+   <attribute name="wordlist_cache_size" 
+              type="integer" 
+              programs="all" 
+              version="3.2.0b1" 
+              category="Indexing:How" >
+     <default>10000000</default>
+     <example>40000000</example>
+     <description> 
+	Size of memory cache used by Berkeley DB (DB used by the indexer) 
+	IMPORTANT: It  makes a <strong>huge</strong> difference. The rule  
+	is that the cache size should be at least 2% of the expected index size. The 
+	Berkeley DB file has 1% of internal pages that *must* be cached for good 
+	performances. Giving an additional 1% leaves room for caching leaf pages. 
+     </description>
+   </attribute>
+
+   <attribute name="wordlist_compress" 
+              type="boolean" 
+              programs="all" 
+              version="3.2.0b1" 
+              category="Indexing:How" >
+     <default>true</default>
+     <example>true</example>
+     <description> 
+	Enables or disables the default compression system for the indexer. 
+	This currently compresses the index by a factor of 8. If the 
+	Zlib library is not found on the system, the default is false. 
+     </description>
+   </attribute>
+
+   <attribute name="wordlist_compress_zlib" 
+              type="boolean" 
+              programs="all" 
+              version="3.2.0b4" 
+              category="Indexing:How" >
+     <default>true</default>
+     <example>true</example>
+     <description> 
+	Enables or disables the zlib compression system for the indexer. 
+	wordlist_compress must be true to use this option!`
+     </description>
+   </attribute>
+
+   <attribute name="wordlist_monitor" 
+              type="boolean" 
+              programs="all" 
+              version="3.2.0b1" 
+              category="Extra Output" >
+     <default>false</default>
+     <example>true</example>
+     <description> 
+	This enables monitoring of what's happening in the indexer. 
+	It can help to detect performance/configuration problems. 
+     </description>
+   </attribute>
+
+   <attribute name="wordlist_monitor_period" 
+              type="number" 
+              programs="all" 
+              version="3.2.0b1" 
+              category="Extra Output" >
+     <default>0</default>
+     <example>.1</example>
+     <description> 
+	Sets the number of seconds between each monitor output. 
+     </description>
+   </attribute>
+
+   <attribute name="wordlist_monitor_output" 
+              type="string" 
+              programs="all" 
+              version="3.2.0b1" 
+              category="Extra Output" >
+     <default></default>
+     <example>myfile</example>
+     <description> 
+	Print monitoring output on file instead of the default stderr. 
+     </description>
+   </attribute>
+
+   <attribute name="wordlist_page_size" 
+              type="integer" 
+              programs="all" 
+              version="3.2.0b1" 
+              category="Indexing:How" >
+     <default>0</default>
+     <example>8192</example>
+     <description> 
+	Size of pages used by Berkeley DB (DB used by the indexer) 
+     </description>
+   </attribute>
+
+   <attribute name="wordlist_verbose" 
+              type="integer" 
+              programs="" 
+              version="" 
+              category="" >
+     <default></default>
+     <example>true</example>
+     <description> 
+	wordlist_verbose 1 walk logic<br/>    
+	wordlist_verbose 2 walk logic details<br/>    
+	wordlist_verbose 2 walk logic lots of details<br/>    
+     </description>
+   </attribute>
+
+   <attribute name="wordlist_wordkey_description" 
+              type="string" 
+              programs="all" 
+              version="3.2.0b1" 
+              category="Indexing:How" >
+     <default>Word/DocID 32/Flags 8/Location 16</default>
+     <nodocs/>
+   </attribute>
+
+   <attribute name="wordlist_wordrecord_description" 
+              type="string" 
+              programs="all" 
+              version="3.2.0b1" 
+              category="Indexing:How" >
+     <default>DATA</default>
+     <nodocs/>
+   </attribute>
+
+</HtdigAttributes>