summaryrefslogtreecommitdiffstats
path: root/debian/htdig/htdig-3.2.0b6/htword/WordKeyInfo.h
blob: 039dbf4fda86ed00491a39d9ad33cb4889c86357 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
// WordKeyInfo.h
//
// NAME
// information on the key structure of the inverted index.
//
// SYNOPSIS
//
// Use the WordKey::NField() method instead.
//
// DESCRIPTION
//
// Describe the structure of the index key (<i>WordKey</i>).
// The description includes the layout of the packed version
// stored on disk.
//
// CONFIGURATION
//
// wordlist_wordkey_description <desc> (no default)
//   Describe the structure of the inverted index key.
//   In the following explanation of the <i><desc></i> format
//   mandatory words are
//   in bold and values that must be replaced in italic.
//   <br>
//   <b>Word</b>/<i>name bits</i>[/...]
//   <br>
//   The <i>name</i> is an alphanumerical symbolic name for the key field.
//   The <i>bits</i> is the number of bits required to store this field.
//   Note that all values are stored in unsigned integers (unsigned int).
//
//
// END
//   
// Part of the ht://Dig package   <http://www.htdig.org/>
// Copyright (c) 1999-2004 The ht://Dig Group
// For copyright details, see the file COPYING in your distribution
// or the GNU Library General Public License (LGPL) version 2 or later
// <http://www.gnu.org/copyleft/lgpl.html>
//
//

#ifndef _WordKeyInfo_h_
#define _WordKeyInfo_h_

#include "Configuration.h"

//
// Type number associated to each possible type for a key element
// (type field of struct WordKeyInfo).
//
#define WORD_ISA_NUMBER		1
#define WORD_ISA_STRING		2

//
// Maximum number of fields in a key description
//
#define WORD_KEY_MAX_NFIELDS 20

//
// All numerical fields of the key are typed WordKeyNum.
// Most of the code strongly assume that it is unsigned. 
// Mainly provided to be replaced by unsigned longlong WordKeyNum
// for 64 bits machines.
//
typedef unsigned int WordKeyNum;

//
// Maximum number of bits in a field
//
#define WORD_KEY_MAXBITS	((int)(sizeof(WordKeyNum) * 8))
#define WORD_KEY_MAXVALUE	((WordKeyNum)~(WordKeyNum)0)

//
// Description of a single field
//
class WordKeyField
{
 public:
    WordKeyField() {
      type = lowbits = lastbits = bytesize = bytes_offset = bits = bits_offset = 0;
    }

    //
    // Precompute information that will be needed to pack/unpack the key
    // to/from disk.
    // 
    // The <previous> field is used to compute the position of the field
    // in packed string.  <nname> is the symbolic name of the field
    // <nbits> is the number of bits actualy used in a number.
    //
    int SetNum(WordKeyField *previous, char *nname, int nbits);
    //
    // Set the one and only string field
    //
    int SetString();

    //
    // Maximum possible value for this field.
    //
    WordKeyNum MaxValue() const {
      return bits >= WORD_KEY_MAXBITS ? WORD_KEY_MAXVALUE : ((1 << bits) - 1);
    }

    //
    // Debugging and printing
    //
    void Show();

    String name;			// Symbolic name of the field
    int type;				// WORD_ISA_{STRING|NUMBER} 
    //
    // 01234567012345670123456701234567
    // +-------+-------+-------+-------+--
    //    100101010011100111101011110
    // ^^^                     ^^^^^^
    //   |                        |
    // lowbits = 3           lastbits = 6
    //
    int lowbits;			
    int lastbits;			
    int bytesize;			// Number of bytes involved
    int bytes_offset;			// Offset of first byte from start
    int bits;				// Size of field in bits
    int bits_offset;                    // Offset of first bit from start
};

//
// Description of the key structure
//
class WordKeyInfo 
{
 public:
    WordKeyInfo(const Configuration& config);
    ~WordKeyInfo() { if(sort) delete [] sort; }

    //
    // Unique instance handlers 
    //
    static void Initialize(const Configuration& config);
    static void InitializeFromString(const String &desc);
    static WordKeyInfo* Instance() {
      if(instance) return instance;
      fprintf(stderr, "WordKeyInfo::Instance: no instance\n");
      return 0;
    }

    int         Alloc(int nnfields);
    int         Set(const String &desc);

    void  Show();

    //
    // Array describing the fields, in sort order.
    //
    WordKeyField *sort;
    //
    // Total number of fields
    //
    int nfields;
    //
    // Total number of bytes used by numerical fields
    //
    int num_length;

    //
    // Unique instance pointer
    //
    static WordKeyInfo* instance;
};

#endif