]> git.pld-linux.org Git - packages/db4.1.git/blame - patch.4.1.25.2
- added 4.1.25.[23] patches
[packages/db4.1.git] / patch.4.1.25.2
CommitLineData
929f69b8
JB
1*** dbinc/mp.h.orig 2004-02-02 10:24:53.000000000 -0800
2--- dbinc/mp.h 2004-02-02 10:26:27.000000000 -0800
3***************
4*** 149,154 ****
5--- 149,161 ----
6 * region lock).
7 */
8 DB_MPOOL_STAT stat; /* Per-cache mpool statistics. */
9+
10+ /*
11+ * We track page puts so that we can decide when allocation is never
12+ * going to succeed. We don't lock the field, all we care about is
13+ * if it changes.
14+ */
15+ u_int32_t put_counter; /* Count of page put calls. */
16 };
17
18 struct __db_mpool_hash {
19*** mp/mp_fput.c.orig 2002-08-13 06:26:41.000000000 -0700
20--- mp/mp_fput.c 2004-02-02 10:22:35.000000000 -0800
21***************
22*** 19,24 ****
23--- 19,26 ----
24 #include "dbinc/db_shash.h"
25 #include "dbinc/mp.h"
26
27+ static void __memp_reset_lru __P((DB_ENV *, REGINFO *));
28+
29 /*
30 * __memp_fput --
31 * Mpool file put function.
32***************
33*** 198,202 ****
34--- 200,255 ----
35
36 MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
37
38+ /*
39+ * On every buffer put we update the buffer generation number and check
40+ * for wraparound.
41+ */
42+ if (++c_mp->lru_count == UINT32_T_MAX)
43+ __memp_reset_lru(dbenv, dbmp->reginfo);
44+
45 return (0);
46 }
47+
48+ /*
49+ * __memp_reset_lru --
50+ * Reset the cache LRU counter.
51+ */
52+ static void
53+ __memp_reset_lru(dbenv, memreg)
54+ DB_ENV *dbenv;
55+ REGINFO *memreg;
56+ {
57+ BH *bhp;
58+ DB_MPOOL_HASH *hp;
59+ MPOOL *c_mp;
60+ int bucket;
61+
62+ c_mp = memreg->primary;
63+
64+ /*
65+ * Update the counter so all future allocations will start at the
66+ * bottom.
67+ */
68+ c_mp->lru_count -= MPOOL_BASE_DECREMENT;
69+
70+ /* Adjust the priority of every buffer in the system. */
71+ for (hp = R_ADDR(memreg, c_mp->htab),
72+ bucket = 0; bucket < c_mp->htab_buckets; ++hp, ++bucket) {
73+ /*
74+ * Skip empty buckets.
75+ *
76+ * We can check for empty buckets before locking as we
77+ * only care if the pointer is zero or non-zero.
78+ */
79+ if (SH_TAILQ_FIRST(&hp->hash_bucket, __bh) == NULL)
80+ continue;
81+
82+ MUTEX_LOCK(dbenv, &hp->hash_mutex);
83+ for (bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh);
84+ bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh))
85+ if (bhp->priority != UINT32_T_MAX &&
86+ bhp->priority > MPOOL_BASE_DECREMENT)
87+ bhp->priority -= MPOOL_BASE_DECREMENT;
88+ MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
89+ }
90+ }
91*** mp/mp_alloc.c.orig 2002-08-17 07:23:25.000000000 -0700
92--- mp/mp_alloc.c 2004-02-02 10:28:15.000000000 -0800
93***************
94*** 25,31 ****
95 } HS;
96
97 static void __memp_bad_buffer __P((DB_MPOOL_HASH *));
98- static void __memp_reset_lru __P((DB_ENV *, REGINFO *, MPOOL *));
99
100 /*
101 * __memp_alloc --
102--- 25,30 ----
103***************
104*** 50,57 ****
105 MPOOL *c_mp;
106 MPOOLFILE *bh_mfp;
107 size_t freed_space;
108! u_int32_t buckets, buffers, high_priority, max_na, priority;
109! int aggressive, ret;
110 void *p;
111
112 dbenv = dbmp->dbenv;
113--- 49,57 ----
114 MPOOL *c_mp;
115 MPOOLFILE *bh_mfp;
116 size_t freed_space;
117! u_int32_t buckets, buffers, high_priority, priority, put_counter;
118! u_int32_t total_buckets;
119! int aggressive, giveup, ret;
120 void *p;
121
122 dbenv = dbmp->dbenv;
123***************
124*** 59,76 ****
125 dbht = R_ADDR(memreg, c_mp->htab);
126 hp_end = &dbht[c_mp->htab_buckets];
127
128! buckets = buffers = 0;
129! aggressive = 0;
130
131 c_mp->stat.st_alloc++;
132
133 /*
134- * Get aggressive if we've tried to flush the number of pages as are
135- * in the system without finding space.
136- */
137- max_na = 5 * c_mp->htab_buckets;
138-
139- /*
140 * If we're allocating a buffer, and the one we're discarding is the
141 * same size, we don't want to waste the time to re-integrate it into
142 * the shared memory free list. If the DB_MPOOLFILE argument isn't
143--- 59,71 ----
144 dbht = R_ADDR(memreg, c_mp->htab);
145 hp_end = &dbht[c_mp->htab_buckets];
146
147! buckets = buffers = put_counter = total_buckets = 0;
148! aggressive = giveup = 0;
149! hp_tmp = NULL;
150
151 c_mp->stat.st_alloc++;
152
153 /*
154 * If we're allocating a buffer, and the one we're discarding is the
155 * same size, we don't want to waste the time to re-integrate it into
156 * the shared memory free list. If the DB_MPOOLFILE argument isn't
157***************
158*** 81,99 ****
159 len = (sizeof(BH) - sizeof(u_int8_t)) + mfp->stat.st_pagesize;
160
161 R_LOCK(dbenv, memreg);
162-
163- /*
164- * On every buffer allocation we update the buffer generation number
165- * and check for wraparound.
166- */
167- if (++c_mp->lru_count == UINT32_T_MAX)
168- __memp_reset_lru(dbenv, memreg, c_mp);
169-
170 /*
171 * Anything newer than 1/10th of the buffer pool is ignored during
172 * allocation (unless allocation starts failing).
173 */
174- DB_ASSERT(c_mp->lru_count > c_mp->stat.st_pages / 10);
175 high_priority = c_mp->lru_count - c_mp->stat.st_pages / 10;
176
177 /*
178--- 76,85 ----
179***************
180*** 120,129 ****
181 * We're not holding the region locked here, these statistics
182 * can't be trusted.
183 */
184! if (buckets != 0) {
185! if (buckets > c_mp->stat.st_alloc_max_buckets)
186! c_mp->stat.st_alloc_max_buckets = buckets;
187! c_mp->stat.st_alloc_buckets += buckets;
188 }
189 if (buffers != 0) {
190 if (buffers > c_mp->stat.st_alloc_max_pages)
191--- 106,116 ----
192 * We're not holding the region locked here, these statistics
193 * can't be trusted.
194 */
195! total_buckets += buckets;
196! if (total_buckets != 0) {
197! if (total_buckets > c_mp->stat.st_alloc_max_buckets)
198! c_mp->stat.st_alloc_max_buckets = total_buckets;
199! c_mp->stat.st_alloc_buckets += total_buckets;
200 }
201 if (buffers != 0) {
202 if (buffers > c_mp->stat.st_alloc_max_pages)
203***************
204*** 131,136 ****
205--- 118,129 ----
206 c_mp->stat.st_alloc_pages += buffers;
207 }
208 return (0);
209+ } else if (giveup || c_mp->stat.st_pages == 0) {
210+ R_UNLOCK(dbenv, memreg);
211+
212+ __db_err(dbenv,
213+ "unable to allocate space from the buffer cache");
214+ return (ret);
215 }
216
217 /*
218***************
219*** 138,163 ****
220 * we need. Reset our free-space counter.
221 */
222 freed_space = 0;
223
224 /*
225 * Walk the hash buckets and find the next two with potentially useful
226 * buffers. Free the buffer with the lowest priority from the buckets'
227 * chains.
228 */
229! for (hp_tmp = NULL;;) {
230 /* Check for wrap around. */
231 hp = &dbht[c_mp->last_checked++];
232 if (hp >= hp_end) {
233 c_mp->last_checked = 0;
234!
235! /*
236! * If we've gone through all of the hash buckets, try
237! * an allocation. If the cache is small, the old page
238! * size is small, and the new page size is large, we
239! * might have freed enough memory (but not 3 times the
240! * memory).
241! */
242! goto alloc;
243 }
244
245 /*
246--- 131,154 ----
247 * we need. Reset our free-space counter.
248 */
249 freed_space = 0;
250+ total_buckets += buckets;
251+ buckets = 0;
252
253 /*
254 * Walk the hash buckets and find the next two with potentially useful
255 * buffers. Free the buffer with the lowest priority from the buckets'
256 * chains.
257 */
258! for (;;) {
259! /* All pages have been freed, make one last try */
260! if (c_mp->stat.st_pages == 0)
261! goto alloc;
262!
263 /* Check for wrap around. */
264 hp = &dbht[c_mp->last_checked++];
265 if (hp >= hp_end) {
266 c_mp->last_checked = 0;
267! hp = &dbht[c_mp->last_checked++];
268 }
269
270 /*
271***************
272*** 172,210 ****
273 /*
274 * The failure mode is when there are too many buffers we can't
275 * write or there's not enough memory in the system. We don't
276! * have a metric for deciding if allocation has no possible way
277! * to succeed, so we don't ever fail, we assume memory will be
278! * available if we wait long enough.
279 *
280! * Get aggressive if we've tried to flush 5 times the number of
281! * hash buckets as are in the system -- it's possible we have
282! * been repeatedly trying to flush the same buffers, although
283! * it's unlikely. Aggressive means:
284 *
285 * a: set a flag to attempt to flush high priority buffers as
286 * well as other buffers.
287 * b: sync the mpool to force out queue extent pages. While we
288 * might not have enough space for what we want and flushing
289 * is expensive, why not?
290! * c: sleep for a second -- hopefully someone else will run and
291! * free up some memory. Try to allocate memory too, in case
292! * the other thread returns its memory to the region.
293! * d: look at a buffer in every hash bucket rather than choose
294 * the more preferable of two.
295 *
296 * !!!
297 * This test ignores pathological cases like no buffers in the
298 * system -- that shouldn't be possible.
299 */
300! if ((++buckets % max_na) == 0) {
301! aggressive = 1;
302!
303 R_UNLOCK(dbenv, memreg);
304
305! (void)__memp_sync_int(
306! dbenv, NULL, 0, DB_SYNC_ALLOC, NULL);
307!
308! (void)__os_sleep(dbenv, 1, 0);
309
310 R_LOCK(dbenv, memreg);
311 goto alloc;
312--- 163,221 ----
313 /*
314 * The failure mode is when there are too many buffers we can't
315 * write or there's not enough memory in the system. We don't
316! * have a way to know that allocation has no way to succeed.
317! * We fail if there were no pages returned to the cache after
318! * we've been trying for a relatively long time.
319 *
320! * Get aggressive if we've tried to flush the number of hash
321! * buckets as are in the system and have not found any more
322! * space. Aggressive means:
323 *
324 * a: set a flag to attempt to flush high priority buffers as
325 * well as other buffers.
326 * b: sync the mpool to force out queue extent pages. While we
327 * might not have enough space for what we want and flushing
328 * is expensive, why not?
329! * c: look at a buffer in every hash bucket rather than choose
330 * the more preferable of two.
331+ * d: start to think about giving up.
332+ *
333+ * If we get here twice, sleep for a second, hopefully someone
334+ * else will run and free up some memory.
335+ *
336+ * Always try to allocate memory too, in case some other thread
337+ * returns its memory to the region.
338 *
339 * !!!
340 * This test ignores pathological cases like no buffers in the
341 * system -- that shouldn't be possible.
342 */
343! if ((++buckets % c_mp->htab_buckets) == 0) {
344! if (freed_space > 0)
345! goto alloc;
346 R_UNLOCK(dbenv, memreg);
347
348! switch (++aggressive) {
349! case 1:
350! break;
351! case 2:
352! put_counter = c_mp->put_counter;
353! /* FALLTHROUGH */
354! case 3:
355! case 4:
356! case 5:
357! case 6:
358! (void)__memp_sync_int(
359! dbenv, NULL, 0, DB_SYNC_ALLOC, NULL);
360!
361! (void)__os_sleep(dbenv, 1, 0);
362! break;
363! default:
364! aggressive = 1;
365! if (put_counter == c_mp->put_counter)
366! giveup = 1;
367! break;
368! }
369
370 R_LOCK(dbenv, memreg);
371 goto alloc;
372***************
373*** 277,283 ****
374 * thread may have acquired this buffer and incremented the ref
375 * count after we wrote it, in which case we can't have it.
376 *
377! * If there's a write error, avoid selecting this buffer again
378 * by making it the bucket's least-desirable buffer.
379 */
380 if (ret != 0 || bhp->ref != 0) {
381--- 288,295 ----
382 * thread may have acquired this buffer and incremented the ref
383 * count after we wrote it, in which case we can't have it.
384 *
385! * If there's a write error and we're having problems finding
386! * something to allocate, avoid selecting this buffer again
387 * by making it the bucket's least-desirable buffer.
388 */
389 if (ret != 0 || bhp->ref != 0) {
390***************
391*** 301,306 ****
392--- 313,320 ----
393
394 freed_space += __db_shsizeof(bhp);
395 __memp_bhfree(dbmp, hp, bhp, 1);
396+ if (aggressive > 1)
397+ aggressive = 1;
398
399 /*
400 * Unlock this hash bucket and re-acquire the region lock. If
401***************
402*** 362,415 ****
403 hp->hash_priority = SH_TAILQ_FIRST(&hp->hash_bucket, __bh)->priority;
404 }
405
406- /*
407- * __memp_reset_lru --
408- * Reset the cache LRU counter.
409- */
410- static void
411- __memp_reset_lru(dbenv, memreg, c_mp)
412- DB_ENV *dbenv;
413- REGINFO *memreg;
414- MPOOL *c_mp;
415- {
416- BH *bhp;
417- DB_MPOOL_HASH *hp;
418- int bucket;
419-
420- /*
421- * Update the counter so all future allocations will start at the
422- * bottom.
423- */
424- c_mp->lru_count -= MPOOL_BASE_DECREMENT;
425-
426- /* Release the region lock. */
427- R_UNLOCK(dbenv, memreg);
428-
429- /* Adjust the priority of every buffer in the system. */
430- for (hp = R_ADDR(memreg, c_mp->htab),
431- bucket = 0; bucket < c_mp->htab_buckets; ++hp, ++bucket) {
432- /*
433- * Skip empty buckets.
434- *
435- * We can check for empty buckets before locking as we
436- * only care if the pointer is zero or non-zero.
437- */
438- if (SH_TAILQ_FIRST(&hp->hash_bucket, __bh) == NULL)
439- continue;
440-
441- MUTEX_LOCK(dbenv, &hp->hash_mutex);
442- for (bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh);
443- bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh))
444- if (bhp->priority != UINT32_T_MAX &&
445- bhp->priority > MPOOL_BASE_DECREMENT)
446- bhp->priority -= MPOOL_BASE_DECREMENT;
447- MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
448- }
449-
450- /* Reacquire the region lock. */
451- R_LOCK(dbenv, memreg);
452- }
453-
454 #ifdef DIAGNOSTIC
455 /*
456 * __memp_check_order --
457--- 376,381 ----
458*** dbreg/dbreg_rec.c.orig 2002-08-17 07:22:52.000000000 -0700
459--- dbreg/dbreg_rec.c 2003-11-08 10:59:19.000000000 -0800
460***************
461*** 174,192 ****
462 * Typically, closes should match an open which means
463 * that if this is a close, there should be a valid
464 * entry in the dbentry table when we get here,
465! * however there is an exception. If this is an
466 * OPENFILES pass, then we may have started from
467 * a log file other than the first, and the
468 * corresponding open appears in an earlier file.
469! * We can ignore that case, but all others are errors.
470 */
471 dbe = &dblp->dbentry[argp->fileid];
472 if (dbe->dbp == NULL && !dbe->deleted) {
473 /* No valid entry here. */
474! if ((argp->opcode != LOG_CLOSE &&
475! argp->opcode != LOG_RCLOSE) ||
476! (op != DB_TXN_OPENFILES &&
477! op !=DB_TXN_POPENFILES)) {
478 __db_err(dbenv,
479 "Improper file close at %lu/%lu",
480 (u_long)lsnp->file,
481--- 174,193 ----
482 * Typically, closes should match an open which means
483 * that if this is a close, there should be a valid
484 * entry in the dbentry table when we get here,
485! * however there are exceptions. 1. If this is an
486 * OPENFILES pass, then we may have started from
487 * a log file other than the first, and the
488 * corresponding open appears in an earlier file.
489! * 2. If we are undoing an open on an abort or
490! * recovery, it's possible that we failed after
491! * the log record, but before we actually entered
492! * a handle here.
493 */
494 dbe = &dblp->dbentry[argp->fileid];
495 if (dbe->dbp == NULL && !dbe->deleted) {
496 /* No valid entry here. */
497! if (DB_REDO(op) ||
498! argp->opcode == LOG_CHECKPOINT) {
499 __db_err(dbenv,
500 "Improper file close at %lu/%lu",
501 (u_long)lsnp->file,
502*** env/env_recover.c.orig.1 2002-08-22 14:52:51.000000000 -0700
503--- env/env_recover.c 2003-11-15 08:20:59.000000000 -0800
504***************
505*** 232,243 ****
506 * we'll still need to do a vtruncate based on information we haven't
507 * yet collected.
508 */
509! if (ret == DB_NOTFOUND) {
510 ret = 0;
511! if (max_lsn == NULL)
512! goto done;
513! }
514! if (ret != 0)
515 goto err;
516
517 hi_txn = txnid;
518--- 232,240 ----
519 * we'll still need to do a vtruncate based on information we haven't
520 * yet collected.
521 */
522! if (ret == DB_NOTFOUND)
523 ret = 0;
524! else if (ret != 0)
525 goto err;
526
527 hi_txn = txnid;
528***************
529*** 331,337 ****
530
531 /* Find a low txnid. */
532 ret = 0;
533! do {
534 /* txnid is after rectype, which is a u_int32. */
535 memcpy(&txnid,
536 (u_int8_t *)data.data + sizeof(u_int32_t), sizeof(txnid));
537--- 328,334 ----
538
539 /* Find a low txnid. */
540 ret = 0;
541! if (hi_txn != 0) do {
542 /* txnid is after rectype, which is a u_int32. */
543 memcpy(&txnid,
544 (u_int8_t *)data.data + sizeof(u_int32_t), sizeof(txnid));
545***************
546*** 344,354 ****
547 * There are no transactions and we're not recovering to an LSN (see
548 * above), so there is nothing to do.
549 */
550! if (ret == DB_NOTFOUND) {
551 ret = 0;
552- if (max_lsn == NULL)
553- goto done;
554- }
555
556 /* Reset to the first lsn. */
557 if (ret != 0 || (ret = logc->get(logc, &first_lsn, &data, DB_SET)) != 0)
558--- 341,348 ----
559 * There are no transactions and we're not recovering to an LSN (see
560 * above), so there is nothing to do.
561 */
562! if (ret == DB_NOTFOUND)
563 ret = 0;
564
565 /* Reset to the first lsn. */
566 if (ret != 0 || (ret = logc->get(logc, &first_lsn, &data, DB_SET)) != 0)
567***************
568*** 367,372 ****
569--- 361,370 ----
570 txninfo, &data, &first_lsn, &last_lsn, nfiles, 1)) != 0)
571 goto err;
572
573+ /* If there were no transactions, then we can bail out early. */
574+ if (hi_txn == 0 && max_lsn == NULL)
575+ goto done;
576+
577 /*
578 * Pass #2.
579 *
580***************
581*** 483,488 ****
582--- 481,487 ----
583 if ((ret = __dbreg_close_files(dbenv)) != 0)
584 goto err;
585
586+ done:
587 if (max_lsn != NULL) {
588 region->last_ckp = ((DB_TXNHEAD *)txninfo)->ckplsn;
589
590***************
591*** 538,544 ****
592 __db_err(dbenv, "Recovery complete at %.24s", ctime(&now));
593 __db_err(dbenv, "%s %lx %s [%lu][%lu]",
594 "Maximum transaction ID",
595! ((DB_TXNHEAD *)txninfo)->maxid,
596 "Recovery checkpoint",
597 (u_long)region->last_ckp.file,
598 (u_long)region->last_ckp.offset);
599--- 537,544 ----
600 __db_err(dbenv, "Recovery complete at %.24s", ctime(&now));
601 __db_err(dbenv, "%s %lx %s [%lu][%lu]",
602 "Maximum transaction ID",
603! txninfo == NULL ? TXN_MINIMUM :
604! ((DB_TXNHEAD *)txninfo)->maxid,
605 "Recovery checkpoint",
606 (u_long)region->last_ckp.file,
607 (u_long)region->last_ckp.offset);
608***************
609*** 550,556 ****
610 (u_long)lsn.file, (u_long)lsn.offset, pass);
611 }
612
613- done:
614 err: if (lockid != DB_LOCK_INVALIDID) {
615 if ((t_ret = __rep_unlockpages(dbenv, lockid)) != 0 && ret == 0)
616 ret = t_ret;
617--- 550,555 ----
This page took 0.091161 seconds and 4 git commands to generate.