summaryrefslogtreecommitdiffstats
path: root/debian/htdig/htdig-3.2.0b6/db/mp.h
blob: 5a51739a68376480bdeb811f089518910f7ec231 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
/*-
 * See the file LICENSE for redistribution information.
 *
 * Copyright (c) 1996, 1997, 1998, 1999
 *	Sleepycat Software.  All rights reserved.
 *
 *	@(#)mp.h	11.3 (Sleepycat) 10/6/99
 */

struct __bh;		typedef struct __bh BH;
struct __db_mpool;	typedef struct __db_mpool DB_MPOOL;
struct __db_mpreg;	typedef struct __db_mpreg DB_MPREG;
struct __mcache;	typedef struct __mcache MCACHE;
struct __mpool;		typedef struct __mpool MPOOL;
struct __mpoolfile;	typedef struct __mpoolfile MPOOLFILE;
struct __cmpr;		typedef struct __cmpr CMPR;
struct __cmpr_context;	typedef struct __cmpr_context CMPR_CONTEXT;

/* We require at least 20K of cache. */
#define	DB_CACHESIZE_MIN	( 20 * 1024)

/*
 * By default, environments have room for 500 files.
 */
#define	DB_MPOOLFILE_DEF	500

/*
 * DB_MPOOL --
 *	Per-process memory pool structure.
 */
struct __db_mpool {
	/* These fields need to be protected for multi-threaded support. */
	MUTEX	   *mutexp;		/* Structure thread lock. */

					/* List of pgin/pgout routines. */
	LIST_HEAD(__db_mpregh, __db_mpreg) dbregq;

					/* List of DB_MPOOLFILE's. */
	TAILQ_HEAD(__db_mpoolfileh, __db_mpoolfile) dbmfq;

	/* These fields are not thread-protected. */
	DB_ENV     *dbenv;		/* Reference to error information. */

	REGINFO	    reginfo;		/* Main shared region. */

	int	    nc_reg;		/* N underlying cache regions. */
	REGINFO	   *c_reginfo;		/* Underlying cache regions. */

	/* I'm not sure if these need to be thread-protected... */
	int         recursion_level;	/* limit recur'n from weak compr'n */

};

/*
 * DB_MPREG --
 *	DB_MPOOL registry of pgin/pgout functions.
 */
struct __db_mpreg {
	LIST_ENTRY(__db_mpreg) q;	/* Linked list. */

	int ftype;			/* File type. */
					/* Pgin, pgout routines. */
	int (*pgin) __P((db_pgno_t, void *, DBT *));
	int (*pgout) __P((db_pgno_t, void *, DBT *));
};

/*
 * CMPR_CONTEXT --
 *	Shared compresssion information.
 */
struct __cmpr_context {
#define DB_CMPR_SUFFIX	"_weakcmpr"
	DB 	     *weakcmpr;		/* Free weakcmpr pages pool. */
};

/*
 * DB_MPOOLFILE --
 *	Per-process DB_MPOOLFILE information.
 */
struct __db_mpoolfile {
	/* These fields need to be protected for multi-threaded support. */
	MUTEX	  *mutexp;		/* Structure thread lock. */

	DB_FH	   fh;			/* Underlying file handle. */

	u_int32_t ref;			/* Reference count. */

	/*
	 * !!!
	 * This field is a special case -- it's protected by the region lock
	 * NOT the thread lock.  The reason for this is that we always have
	 * the region lock immediately before or after we modify the field,
	 * and we don't want to use the structure lock to protect it because
	 * then I/O (which is done with the structure lock held because of
	 * the race between the seek and write of the file descriptor) will
	 * block any other put/get calls using this DB_MPOOLFILE structure.
	 */
	u_int32_t pinref;		/* Pinned block reference count. */

	/*
	 * !!!
	 * This field is a special case -- it's protected by the region lock
	 * since it's manipulated only when new files are added to the list.
	 */
	TAILQ_ENTRY(__db_mpoolfile) q;	/* Linked list of DB_MPOOLFILE's. */

	/* These fields are not thread-protected. */
	DB_MPOOL  *dbmp;		/* Overlying DB_MPOOL. */
	MPOOLFILE *mfp;			/* Underlying MPOOLFILE. */

	void	  *addr;		/* Address of mmap'd region. */
	size_t	   len;			/* Length of mmap'd region. */

	/* These fields need to be protected for multi-threaded support. */
#define	MP_READONLY	0x01		/* File is readonly. */
#define	MP_UPGRADE	0x02		/* File descriptor is readwrite. */
#define	MP_UPGRADE_FAIL	0x04		/* Upgrade wasn't possible. */
#define	MP_CMPR		0x08		/* Transparent I/O compression. */
	u_int32_t  flags;

        CMPR_CONTEXT   cmpr_context;    /* Shared compression information */

};

/*
 * NCACHE --
 *	Select a cache based on the page number.  This assumes accesses are
 *	uniform across pages, which is probably OK -- what we really want to
 *	avoid is anything that puts all the pages for any single file in the
 *	same cache, as we expect that file access will be bursty.
 */
#define	NCACHE(mp, pgno)						\
	((pgno) % ((MPOOL *)mp)->nc_reg)

/*
 * NBUCKET --
 *	 We make the assumption that early pages of the file are more likely
 *	 to be retrieved than the later pages, which means the top bits will
 *	 be more interesting for hashing as they're less likely to collide.
 *	 That said, as 512 8K pages represents a 4MB file, so only reasonably
 *	 large files will have page numbers with any other than the bottom 9
 *	 bits set.  We XOR in the MPOOL offset of the MPOOLFILE that backs the
 *	 page, since that should also be unique for the page.  We don't want
 *	 to do anything very fancy -- speed is more important to us than using
 *	 good hashing.
 */
#define	NBUCKET(mc, mf_offset, pgno)					\
	(((pgno) ^ ((mf_offset) << 9)) % (mc)->htab_buckets)

/*
 * MPOOL --
 *	Shared memory pool region.  One of these is allocated in shared
 *	memory, and describes the entire pool.
 */
struct __mpool {
	SH_TAILQ_HEAD(__mpfq) mpfq;	/* List of MPOOLFILEs. */

	/*
	 * We single-thread CDB_memp_sync and CDB_memp_fsync calls.
	 *
	 * This mutex is intended *only* to single-thread access to the call,
	 * it is not used to protect the lsn and lsn_cnt fields, the region
	 * lock is used to protect them.
	 */
	MUTEX	  sync_mutex;		/* Checkpoint lock. */
	DB_LSN	  lsn;			/* Maximum checkpoint LSN. */
	u_int32_t lsn_cnt;		/* Checkpoint buffers left to write. */

	u_int32_t nc_reg;		/* Number of underlying REGIONS. */
	roff_t	  c_regids;		/* Array of underlying REGION Ids. */

#define	MP_LSN_RETRY	0x01		/* Retry all BH_WRITE buffers. */
	u_int32_t  flags;

	/* HACK!! */
	/* a pointers allocated for this structure is (erroneously?) used */
	/* in CDB___memp_alloc() to refer to a MCACHE structure.  Make sure */
	/* the allocation is big enough. */
	int	    dummy [100];

};

/*
 * MCACHE --
 *	The memory pool may be broken up into individual pieces/files.  Not
 *	what we would have liked, but on Solaris you can allocate only a
 *	little more than 2GB of memory in a single contiguous chunk, and I
 *	expect to see more systems with similar issues.  An MCACHE structure
 *	describes a backing piece of memory used as a cache.
 */
struct __mcache {
	SH_TAILQ_HEAD(__bhq) bhq;	/* LRU list of buffer headers. */

	int	    htab_buckets;	/* Number of hash table entries. */
	roff_t	    htab;		/* Hash table offset. */

	DB_MPOOL_STAT stat;		/* Per-cache mpool statistics. */
};

/*
 * MPOOLFILE --
 *	Shared DB_MPOOLFILE information.
 */
struct __mpoolfile {
	SH_TAILQ_ENTRY  q;		/* List of MPOOLFILEs */

	int	  ftype;		/* File type. */

	int32_t	  lsn_off;		/* Page's LSN offset. */
	u_int32_t clear_len;		/* Bytes to clear on page create. */

	roff_t	  path_off;		/* File name location. */
	roff_t	  fileid_off;		/* File identification location. */

	roff_t	  pgcookie_len;		/* Pgin/pgout cookie length. */
	roff_t	  pgcookie_off;		/* Pgin/pgout cookie location. */

	u_int32_t lsn_cnt;		/* Checkpoint buffers left to write. */

	db_pgno_t last_pgno;		/* Last page in the file. */
	db_pgno_t orig_last_pgno;	/* Original last page in the file. */

#define	MP_CAN_MMAP	0x01		/* If the file can be mmap'd. */
#define	MP_REMOVED	0x02		/* Backing file has been removed. */
#define	MP_TEMP		0x04		/* Backing file is a temporary. */
	u_int32_t  flags;

	DB_MPOOL_FSTAT stat;		/* Per-file mpool statistics. */
};

/*
 * BH_TO_CACHE --
 *	Return the cache where we can find the specified buffer header.
 */
#define	BH_TO_CACHE(dbmp, bhp)						\
	(dbmp)->c_reginfo[NCACHE((dbmp)->reginfo.primary, (bhp)->pgno)].primary

/*
 * DB_CMPR --
 *      Page compression information
 *
 * !!!
 * There is no need to keep the length of the data wrote
 * in the page since it's already encoded in the compressed
 * data.
 */

/*
 * Convert size to expected compressed size
 */
#define DB_CMPR_DIVIDE(dbenv, size) ((size) >> CDB___memp_cmpr_coefficient(dbenv) )
#define DB_CMPR_MULTIPLY(dbenv, size) ((size) << CDB___memp_cmpr_coefficient(dbenv) )


struct __cmpr {
#define DB_CMPR_FIRST	 	0x01 /* Head of chain. */
#define DB_CMPR_INTERNAL	0x02 /* Weak compression data. */
#define DB_CMPR_CHAIN	 	0x04 /* More data in next page. */
#define DB_CMPR_FREE		0x08 /* Not in use. */

  u_int16_t flags; 

  /* 
   * Filled if DB_CMPR_CHAIN set
   */
  db_pgno_t next;
};

/*
 * Reserved information at the beginning of each compressed page
 */
#define DB_CMPR_OVERHEAD	sizeof(struct __cmpr)
/*
 * Size of IO page, without the reserved information
 */
#define DB_CMPR_PAGESIZE(io)	(io->pagesize - DB_CMPR_OVERHEAD)
/*
 * Pointer to data within raw compressed buffer
 */
#define DB_CMPR_DATA(io) (io->buf + DB_CMPR_OVERHEAD)

/*
 * BH --
 *	Buffer header.
 */
struct __bh {
	MUTEX	        mutex;		/* Buffer thread/process lock. */

	u_int16_t	ref;		/* Reference count. */

#define	BH_CALLPGIN	0x001		/* Page needs to be reworked... */
#define	BH_DIRTY	0x002		/* Page was modified. */
#define	BH_DISCARD	0x004		/* Page is useless. */
#define	BH_LOCKED	0x008		/* Page is locked (I/O in progress). */
#define	BH_TRASH	0x010		/* Page is garbage. */
#define	BH_WRITE	0x020		/* Page scheduled for writing. */
#define	BH_CMPR		0x040		/* Chain contains valid data. */
#define	BH_CMPR_POOL	0x080		/* Chain allocated in pool. */
#define	BH_CMPR_OS	0x100		/* Chain allocate with malloc. */
	u_int16_t  flags;

        db_pgno_t *chain;         	/* Compression chain. */

	SH_TAILQ_ENTRY	q;		/* LRU queue. */
	SH_TAILQ_ENTRY	hq;		/* MPOOL hash bucket queue. */

	db_pgno_t pgno;			/* Underlying MPOOLFILE page number. */
	roff_t	  mf_offset;		/* Associated MPOOLFILE offset. */

	/*
	 * !!!
	 * This array must be size_t aligned -- the DB access methods put PAGE
	 * and other structures into it, and expect to be able to access them
	 * directly.  (We guarantee size_t alignment in the documentation too.)
	 */
	u_int8_t   buf[1];		/* Variable length data. */
};

#include "mp_ext.h"