1 *** dbinc/mp.h.orig 2004-02-02 10:24:53.000000000 -0800
2 --- dbinc/mp.h 2004-02-02 10:26:27.000000000 -0800
8 DB_MPOOL_STAT stat; /* Per-cache mpool statistics. */
11 + * We track page puts so that we can decide when allocation is never
12 + * going to succeed. We don't lock the field, all we care about is
15 + u_int32_t put_counter; /* Count of page put calls. */
18 struct __db_mpool_hash {
19 *** mp/mp_fput.c.orig 2002-08-13 06:26:41.000000000 -0700
20 --- mp/mp_fput.c 2004-02-02 10:22:35.000000000 -0800
24 #include "dbinc/db_shash.h"
27 + static void __memp_reset_lru __P((DB_ENV *, REGINFO *));
31 * Mpool file put function.
36 MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
39 + * On every buffer put we update the buffer generation number and check
42 + if (++c_mp->lru_count == UINT32_T_MAX)
43 + __memp_reset_lru(dbenv, dbmp->reginfo);
49 + * __memp_reset_lru --
50 + * Reset the cache LRU counter.
53 + __memp_reset_lru(dbenv, memreg)
62 + c_mp = memreg->primary;
65 + * Update the counter so all future allocations will start at the
68 + c_mp->lru_count -= MPOOL_BASE_DECREMENT;
70 + /* Adjust the priority of every buffer in the system. */
71 + for (hp = R_ADDR(memreg, c_mp->htab),
72 + bucket = 0; bucket < c_mp->htab_buckets; ++hp, ++bucket) {
74 + * Skip empty buckets.
76 + * We can check for empty buckets before locking as we
77 + * only care if the pointer is zero or non-zero.
79 + if (SH_TAILQ_FIRST(&hp->hash_bucket, __bh) == NULL)
82 + MUTEX_LOCK(dbenv, &hp->hash_mutex);
83 + for (bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh);
84 + bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh))
85 + if (bhp->priority != UINT32_T_MAX &&
86 + bhp->priority > MPOOL_BASE_DECREMENT)
87 + bhp->priority -= MPOOL_BASE_DECREMENT;
88 + MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
91 *** mp/mp_alloc.c.orig 2002-08-17 07:23:25.000000000 -0700
92 --- mp/mp_alloc.c 2004-02-02 10:28:15.000000000 -0800
97 static void __memp_bad_buffer __P((DB_MPOOL_HASH *));
98 - static void __memp_reset_lru __P((DB_ENV *, REGINFO *, MPOOL *));
108 ! u_int32_t buckets, buffers, high_priority, max_na, priority;
109 ! int aggressive, ret;
117 ! u_int32_t buckets, buffers, high_priority, priority, put_counter;
118 ! u_int32_t total_buckets;
119 ! int aggressive, giveup, ret;
125 dbht = R_ADDR(memreg, c_mp->htab);
126 hp_end = &dbht[c_mp->htab_buckets];
128 ! buckets = buffers = 0;
131 c_mp->stat.st_alloc++;
134 - * Get aggressive if we've tried to flush the number of pages as are
135 - * in the system without finding space.
137 - max_na = 5 * c_mp->htab_buckets;
140 * If we're allocating a buffer, and the one we're discarding is the
141 * same size, we don't want to waste the time to re-integrate it into
142 * the shared memory free list. If the DB_MPOOLFILE argument isn't
144 dbht = R_ADDR(memreg, c_mp->htab);
145 hp_end = &dbht[c_mp->htab_buckets];
147 ! buckets = buffers = put_counter = total_buckets = 0;
148 ! aggressive = giveup = 0;
151 c_mp->stat.st_alloc++;
154 * If we're allocating a buffer, and the one we're discarding is the
155 * same size, we don't want to waste the time to re-integrate it into
156 * the shared memory free list. If the DB_MPOOLFILE argument isn't
159 len = (sizeof(BH) - sizeof(u_int8_t)) + mfp->stat.st_pagesize;
161 R_LOCK(dbenv, memreg);
164 - * On every buffer allocation we update the buffer generation number
165 - * and check for wraparound.
167 - if (++c_mp->lru_count == UINT32_T_MAX)
168 - __memp_reset_lru(dbenv, memreg, c_mp);
171 * Anything newer than 1/10th of the buffer pool is ignored during
172 * allocation (unless allocation starts failing).
174 - DB_ASSERT(c_mp->lru_count > c_mp->stat.st_pages / 10);
175 high_priority = c_mp->lru_count - c_mp->stat.st_pages / 10;
181 * We're not holding the region locked here, these statistics
184 ! if (buckets != 0) {
185 ! if (buckets > c_mp->stat.st_alloc_max_buckets)
186 ! c_mp->stat.st_alloc_max_buckets = buckets;
187 ! c_mp->stat.st_alloc_buckets += buckets;
190 if (buffers > c_mp->stat.st_alloc_max_pages)
192 * We're not holding the region locked here, these statistics
195 ! total_buckets += buckets;
196 ! if (total_buckets != 0) {
197 ! if (total_buckets > c_mp->stat.st_alloc_max_buckets)
198 ! c_mp->stat.st_alloc_max_buckets = total_buckets;
199 ! c_mp->stat.st_alloc_buckets += total_buckets;
202 if (buffers > c_mp->stat.st_alloc_max_pages)
206 c_mp->stat.st_alloc_pages += buffers;
209 + } else if (giveup || c_mp->stat.st_pages == 0) {
210 + R_UNLOCK(dbenv, memreg);
213 + "unable to allocate space from the buffer cache");
220 * we need. Reset our free-space counter.
225 * Walk the hash buckets and find the next two with potentially useful
226 * buffers. Free the buffer with the lowest priority from the buckets'
229 ! for (hp_tmp = NULL;;) {
230 /* Check for wrap around. */
231 hp = &dbht[c_mp->last_checked++];
233 c_mp->last_checked = 0;
236 ! * If we've gone through all of the hash buckets, try
237 ! * an allocation. If the cache is small, the old page
238 ! * size is small, and the new page size is large, we
239 ! * might have freed enough memory (but not 3 times the
247 * we need. Reset our free-space counter.
250 + total_buckets += buckets;
254 * Walk the hash buckets and find the next two with potentially useful
255 * buffers. Free the buffer with the lowest priority from the buckets'
259 ! /* All pages have been freed, make one last try */
260 ! if (c_mp->stat.st_pages == 0)
263 /* Check for wrap around. */
264 hp = &dbht[c_mp->last_checked++];
266 c_mp->last_checked = 0;
267 ! hp = &dbht[c_mp->last_checked++];
274 * The failure mode is when there are too many buffers we can't
275 * write or there's not enough memory in the system. We don't
276 ! * have a metric for deciding if allocation has no possible way
277 ! * to succeed, so we don't ever fail, we assume memory will be
278 ! * available if we wait long enough.
280 ! * Get aggressive if we've tried to flush 5 times the number of
281 ! * hash buckets as are in the system -- it's possible we have
282 ! * been repeatedly trying to flush the same buffers, although
283 ! * it's unlikely. Aggressive means:
285 * a: set a flag to attempt to flush high priority buffers as
286 * well as other buffers.
287 * b: sync the mpool to force out queue extent pages. While we
288 * might not have enough space for what we want and flushing
289 * is expensive, why not?
290 ! * c: sleep for a second -- hopefully someone else will run and
291 ! * free up some memory. Try to allocate memory too, in case
292 ! * the other thread returns its memory to the region.
293 ! * d: look at a buffer in every hash bucket rather than choose
294 * the more preferable of two.
297 * This test ignores pathological cases like no buffers in the
298 * system -- that shouldn't be possible.
300 ! if ((++buckets % max_na) == 0) {
303 R_UNLOCK(dbenv, memreg);
305 ! (void)__memp_sync_int(
306 ! dbenv, NULL, 0, DB_SYNC_ALLOC, NULL);
308 ! (void)__os_sleep(dbenv, 1, 0);
310 R_LOCK(dbenv, memreg);
314 * The failure mode is when there are too many buffers we can't
315 * write or there's not enough memory in the system. We don't
316 ! * have a way to know that allocation has no way to succeed.
317 ! * We fail if there were no pages returned to the cache after
318 ! * we've been trying for a relatively long time.
320 ! * Get aggressive if we've tried to flush the number of hash
321 ! * buckets as are in the system and have not found any more
322 ! * space. Aggressive means:
324 * a: set a flag to attempt to flush high priority buffers as
325 * well as other buffers.
326 * b: sync the mpool to force out queue extent pages. While we
327 * might not have enough space for what we want and flushing
328 * is expensive, why not?
329 ! * c: look at a buffer in every hash bucket rather than choose
330 * the more preferable of two.
331 + * d: start to think about giving up.
333 + * If we get here twice, sleep for a second, hopefully someone
334 + * else will run and free up some memory.
336 + * Always try to allocate memory too, in case some other thread
337 + * returns its memory to the region.
340 * This test ignores pathological cases like no buffers in the
341 * system -- that shouldn't be possible.
343 ! if ((++buckets % c_mp->htab_buckets) == 0) {
344 ! if (freed_space > 0)
346 R_UNLOCK(dbenv, memreg);
348 ! switch (++aggressive) {
352 ! put_counter = c_mp->put_counter;
358 ! (void)__memp_sync_int(
359 ! dbenv, NULL, 0, DB_SYNC_ALLOC, NULL);
361 ! (void)__os_sleep(dbenv, 1, 0);
365 ! if (put_counter == c_mp->put_counter)
370 R_LOCK(dbenv, memreg);
374 * thread may have acquired this buffer and incremented the ref
375 * count after we wrote it, in which case we can't have it.
377 ! * If there's a write error, avoid selecting this buffer again
378 * by making it the bucket's least-desirable buffer.
380 if (ret != 0 || bhp->ref != 0) {
382 * thread may have acquired this buffer and incremented the ref
383 * count after we wrote it, in which case we can't have it.
385 ! * If there's a write error and we're having problems finding
386 ! * something to allocate, avoid selecting this buffer again
387 * by making it the bucket's least-desirable buffer.
389 if (ret != 0 || bhp->ref != 0) {
394 freed_space += __db_shsizeof(bhp);
395 __memp_bhfree(dbmp, hp, bhp, 1);
396 + if (aggressive > 1)
400 * Unlock this hash bucket and re-acquire the region lock. If
403 hp->hash_priority = SH_TAILQ_FIRST(&hp->hash_bucket, __bh)->priority;
407 - * __memp_reset_lru --
408 - * Reset the cache LRU counter.
411 - __memp_reset_lru(dbenv, memreg, c_mp)
421 - * Update the counter so all future allocations will start at the
424 - c_mp->lru_count -= MPOOL_BASE_DECREMENT;
426 - /* Release the region lock. */
427 - R_UNLOCK(dbenv, memreg);
429 - /* Adjust the priority of every buffer in the system. */
430 - for (hp = R_ADDR(memreg, c_mp->htab),
431 - bucket = 0; bucket < c_mp->htab_buckets; ++hp, ++bucket) {
433 - * Skip empty buckets.
435 - * We can check for empty buckets before locking as we
436 - * only care if the pointer is zero or non-zero.
438 - if (SH_TAILQ_FIRST(&hp->hash_bucket, __bh) == NULL)
441 - MUTEX_LOCK(dbenv, &hp->hash_mutex);
442 - for (bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh);
443 - bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh))
444 - if (bhp->priority != UINT32_T_MAX &&
445 - bhp->priority > MPOOL_BASE_DECREMENT)
446 - bhp->priority -= MPOOL_BASE_DECREMENT;
447 - MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
450 - /* Reacquire the region lock. */
451 - R_LOCK(dbenv, memreg);
456 * __memp_check_order --
458 *** dbreg/dbreg_rec.c.orig 2002-08-17 07:22:52.000000000 -0700
459 --- dbreg/dbreg_rec.c 2003-11-08 10:59:19.000000000 -0800
462 * Typically, closes should match an open which means
463 * that if this is a close, there should be a valid
464 * entry in the dbentry table when we get here,
465 ! * however there is an exception. If this is an
466 * OPENFILES pass, then we may have started from
467 * a log file other than the first, and the
468 * corresponding open appears in an earlier file.
469 ! * We can ignore that case, but all others are errors.
471 dbe = &dblp->dbentry[argp->fileid];
472 if (dbe->dbp == NULL && !dbe->deleted) {
473 /* No valid entry here. */
474 ! if ((argp->opcode != LOG_CLOSE &&
475 ! argp->opcode != LOG_RCLOSE) ||
476 ! (op != DB_TXN_OPENFILES &&
477 ! op !=DB_TXN_POPENFILES)) {
479 "Improper file close at %lu/%lu",
482 * Typically, closes should match an open which means
483 * that if this is a close, there should be a valid
484 * entry in the dbentry table when we get here,
485 ! * however there are exceptions. 1. If this is an
486 * OPENFILES pass, then we may have started from
487 * a log file other than the first, and the
488 * corresponding open appears in an earlier file.
489 ! * 2. If we are undoing an open on an abort or
490 ! * recovery, it's possible that we failed after
491 ! * the log record, but before we actually entered
494 dbe = &dblp->dbentry[argp->fileid];
495 if (dbe->dbp == NULL && !dbe->deleted) {
496 /* No valid entry here. */
498 ! argp->opcode == LOG_CHECKPOINT) {
500 "Improper file close at %lu/%lu",
502 *** env/env_recover.c.orig.1 2002-08-22 14:52:51.000000000 -0700
503 --- env/env_recover.c 2003-11-15 08:20:59.000000000 -0800
506 * we'll still need to do a vtruncate based on information we haven't
509 ! if (ret == DB_NOTFOUND) {
511 ! if (max_lsn == NULL)
519 * we'll still need to do a vtruncate based on information we haven't
522 ! if (ret == DB_NOTFOUND)
531 /* Find a low txnid. */
534 /* txnid is after rectype, which is a u_int32. */
536 (u_int8_t *)data.data + sizeof(u_int32_t), sizeof(txnid));
539 /* Find a low txnid. */
541 ! if (hi_txn != 0) do {
542 /* txnid is after rectype, which is a u_int32. */
544 (u_int8_t *)data.data + sizeof(u_int32_t), sizeof(txnid));
547 * There are no transactions and we're not recovering to an LSN (see
548 * above), so there is nothing to do.
550 ! if (ret == DB_NOTFOUND) {
552 - if (max_lsn == NULL)
556 /* Reset to the first lsn. */
557 if (ret != 0 || (ret = logc->get(logc, &first_lsn, &data, DB_SET)) != 0)
559 * There are no transactions and we're not recovering to an LSN (see
560 * above), so there is nothing to do.
562 ! if (ret == DB_NOTFOUND)
565 /* Reset to the first lsn. */
566 if (ret != 0 || (ret = logc->get(logc, &first_lsn, &data, DB_SET)) != 0)
570 txninfo, &data, &first_lsn, &last_lsn, nfiles, 1)) != 0)
573 + /* If there were no transactions, then we can bail out early. */
574 + if (hi_txn == 0 && max_lsn == NULL)
583 if ((ret = __dbreg_close_files(dbenv)) != 0)
587 if (max_lsn != NULL) {
588 region->last_ckp = ((DB_TXNHEAD *)txninfo)->ckplsn;
592 __db_err(dbenv, "Recovery complete at %.24s", ctime(&now));
593 __db_err(dbenv, "%s %lx %s [%lu][%lu]",
594 "Maximum transaction ID",
595 ! ((DB_TXNHEAD *)txninfo)->maxid,
596 "Recovery checkpoint",
597 (u_long)region->last_ckp.file,
598 (u_long)region->last_ckp.offset);
600 __db_err(dbenv, "Recovery complete at %.24s", ctime(&now));
601 __db_err(dbenv, "%s %lx %s [%lu][%lu]",
602 "Maximum transaction ID",
603 ! txninfo == NULL ? TXN_MINIMUM :
604 ! ((DB_TXNHEAD *)txninfo)->maxid,
605 "Recovery checkpoint",
606 (u_long)region->last_ckp.file,
607 (u_long)region->last_ckp.offset);
610 (u_long)lsn.file, (u_long)lsn.offset, pass);
614 err: if (lockid != DB_LOCK_INVALIDID) {
615 if ((t_ret = __rep_unlockpages(dbenv, lockid)) != 0 && ret == 0)