- added 4.1.25.[23] patches

[packages/db4.1.git] / patch.4.1.25.2
diff --git a/patch.4.1.25.2 b/patch.4.1.25.2

new file mode 100644 (file)

index 0000000..64b5d71
--- /dev/null
+++ b/patch.4.1.25.2
@@ -0,0 +1,617 @@
+*** dbinc/mp.h.orig    2004-02-02 10:24:53.000000000 -0800
+--- dbinc/mp.h 2004-02-02 10:26:27.000000000 -0800
+***************
+*** 149,154 ****
+--- 149,161 ----
+        * region lock).
+        */
+       DB_MPOOL_STAT stat;             /* Per-cache mpool statistics. */
++  
++       /*
++        * We track page puts so that we can decide when allocation is never
++        * going to succeed.  We don't lock the field, all we care about is
++        * if it changes.
++        */
++       u_int32_t  put_counter;                /* Count of page put calls. */
+  };
+  
+  struct __db_mpool_hash {
+*** mp/mp_fput.c.orig  2002-08-13 06:26:41.000000000 -0700
+--- mp/mp_fput.c       2004-02-02 10:22:35.000000000 -0800
+***************
+*** 19,24 ****
+--- 19,26 ----
+  #include "dbinc/db_shash.h"
+  #include "dbinc/mp.h"
+  
++ static void __memp_reset_lru __P((DB_ENV *, REGINFO *));
++ 
+  /*
+   * __memp_fput --
+   *   Mpool file put function.
+***************
+*** 198,202 ****
+--- 200,255 ----
+  
+       MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
+  
++      /*
++       * On every buffer put we update the buffer generation number and check
++       * for wraparound.
++       */
++      if (++c_mp->lru_count == UINT32_T_MAX)
++              __memp_reset_lru(dbenv, dbmp->reginfo);
++ 
+       return (0);
+  }
++ 
++ /*
++  * __memp_reset_lru --
++  *   Reset the cache LRU counter.
++  */
++ static void
++ __memp_reset_lru(dbenv, memreg)
++      DB_ENV *dbenv;
++      REGINFO *memreg;
++ {
++      BH *bhp;
++      DB_MPOOL_HASH *hp;
++      MPOOL *c_mp;
++      int bucket;
++ 
++      c_mp = memreg->primary;
++ 
++      /*
++       * Update the counter so all future allocations will start at the
++       * bottom.
++       */
++      c_mp->lru_count -= MPOOL_BASE_DECREMENT;
++ 
++      /* Adjust the priority of every buffer in the system. */
++      for (hp = R_ADDR(memreg, c_mp->htab),
++          bucket = 0; bucket < c_mp->htab_buckets; ++hp, ++bucket) {
++              /*
++               * Skip empty buckets.
++               *
++               * We can check for empty buckets before locking as we
++               * only care if the pointer is zero or non-zero.
++               */
++              if (SH_TAILQ_FIRST(&hp->hash_bucket, __bh) == NULL)
++                      continue;
++ 
++              MUTEX_LOCK(dbenv, &hp->hash_mutex);
++              for (bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh);
++                  bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh))
++                      if (bhp->priority != UINT32_T_MAX &&
++                          bhp->priority > MPOOL_BASE_DECREMENT)
++                              bhp->priority -= MPOOL_BASE_DECREMENT;
++              MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
++      }
++ }
+*** mp/mp_alloc.c.orig 2002-08-17 07:23:25.000000000 -0700
+--- mp/mp_alloc.c      2004-02-02 10:28:15.000000000 -0800
+***************
+*** 25,31 ****
+  } HS;
+  
+  static void __memp_bad_buffer __P((DB_MPOOL_HASH *));
+- static void __memp_reset_lru __P((DB_ENV *, REGINFO *, MPOOL *));
+  
+  /*
+   * __memp_alloc --
+--- 25,30 ----
+***************
+*** 50,57 ****
+       MPOOL *c_mp;
+       MPOOLFILE *bh_mfp;
+       size_t freed_space;
+!      u_int32_t buckets, buffers, high_priority, max_na, priority;
+!      int aggressive, ret;
+       void *p;
+  
+       dbenv = dbmp->dbenv;
+--- 49,57 ----
+       MPOOL *c_mp;
+       MPOOLFILE *bh_mfp;
+       size_t freed_space;
+!      u_int32_t buckets, buffers, high_priority, priority, put_counter;
+!      u_int32_t total_buckets;
+!      int aggressive, giveup, ret;
+       void *p;
+  
+       dbenv = dbmp->dbenv;
+***************
+*** 59,76 ****
+       dbht = R_ADDR(memreg, c_mp->htab);
+       hp_end = &dbht[c_mp->htab_buckets];
+  
+!      buckets = buffers = 0;
+!      aggressive = 0;
+  
+       c_mp->stat.st_alloc++;
+  
+       /*
+-       * Get aggressive if we've tried to flush the number of pages as are
+-       * in the system without finding space.
+-       */
+-      max_na = 5 * c_mp->htab_buckets;
+- 
+-      /*
+        * If we're allocating a buffer, and the one we're discarding is the
+        * same size, we don't want to waste the time to re-integrate it into
+        * the shared memory free list.  If the DB_MPOOLFILE argument isn't
+--- 59,71 ----
+       dbht = R_ADDR(memreg, c_mp->htab);
+       hp_end = &dbht[c_mp->htab_buckets];
+  
+!      buckets = buffers = put_counter = total_buckets = 0;
+!      aggressive = giveup = 0;
+!      hp_tmp = NULL;
+  
+       c_mp->stat.st_alloc++;
+  
+       /*
+        * If we're allocating a buffer, and the one we're discarding is the
+        * same size, we don't want to waste the time to re-integrate it into
+        * the shared memory free list.  If the DB_MPOOLFILE argument isn't
+***************
+*** 81,99 ****
+               len = (sizeof(BH) - sizeof(u_int8_t)) + mfp->stat.st_pagesize;
+  
+       R_LOCK(dbenv, memreg);
+- 
+-      /*
+-       * On every buffer allocation we update the buffer generation number
+-       * and check for wraparound.
+-       */
+-      if (++c_mp->lru_count == UINT32_T_MAX)
+-              __memp_reset_lru(dbenv, memreg, c_mp);
+- 
+       /*
+        * Anything newer than 1/10th of the buffer pool is ignored during
+        * allocation (unless allocation starts failing).
+        */
+-      DB_ASSERT(c_mp->lru_count > c_mp->stat.st_pages / 10);
+       high_priority = c_mp->lru_count - c_mp->stat.st_pages / 10;
+  
+       /*
+--- 76,85 ----
+***************
+*** 120,129 ****
+                * We're not holding the region locked here, these statistics
+                * can't be trusted.
+                */
+!              if (buckets != 0) {
+!                      if (buckets > c_mp->stat.st_alloc_max_buckets)
+!                              c_mp->stat.st_alloc_max_buckets = buckets;
+!                      c_mp->stat.st_alloc_buckets += buckets;
+               }
+               if (buffers != 0) {
+                       if (buffers > c_mp->stat.st_alloc_max_pages)
+--- 106,116 ----
+                * We're not holding the region locked here, these statistics
+                * can't be trusted.
+                */
+!              total_buckets += buckets;
+!              if (total_buckets != 0) {
+!                      if (total_buckets > c_mp->stat.st_alloc_max_buckets)
+!                              c_mp->stat.st_alloc_max_buckets = total_buckets;
+!                      c_mp->stat.st_alloc_buckets += total_buckets;
+               }
+               if (buffers != 0) {
+                       if (buffers > c_mp->stat.st_alloc_max_pages)
+***************
+*** 131,136 ****
+--- 118,129 ----
+                       c_mp->stat.st_alloc_pages += buffers;
+               }
+               return (0);
++      } else if (giveup || c_mp->stat.st_pages == 0) {
++              R_UNLOCK(dbenv, memreg);
++ 
++              __db_err(dbenv,
++                  "unable to allocate space from the buffer cache");
++              return (ret);
+       }
+  
+       /*
+***************
+*** 138,163 ****
+        * we need.  Reset our free-space counter.
+        */
+       freed_space = 0;
+  
+       /*
+        * Walk the hash buckets and find the next two with potentially useful
+        * buffers.  Free the buffer with the lowest priority from the buckets'
+        * chains.
+        */
+!      for (hp_tmp = NULL;;) {
+               /* Check for wrap around. */
+               hp = &dbht[c_mp->last_checked++];
+               if (hp >= hp_end) {
+                       c_mp->last_checked = 0;
+! 
+!                      /*
+!                       * If we've gone through all of the hash buckets, try
+!                       * an allocation.  If the cache is small, the old page
+!                       * size is small, and the new page size is large, we
+!                       * might have freed enough memory (but not 3 times the
+!                       * memory).
+!                       */
+!                      goto alloc;
+               }
+  
+               /*
+--- 131,154 ----
+        * we need.  Reset our free-space counter.
+        */
+       freed_space = 0;
++      total_buckets += buckets;
++      buckets = 0;
+  
+       /*
+        * Walk the hash buckets and find the next two with potentially useful
+        * buffers.  Free the buffer with the lowest priority from the buckets'
+        * chains.
+        */
+!      for (;;) {
+!              /* All pages have been freed, make one last try */
+!              if (c_mp->stat.st_pages == 0)
+!                      goto alloc;
+! 
+               /* Check for wrap around. */
+               hp = &dbht[c_mp->last_checked++];
+               if (hp >= hp_end) {
+                       c_mp->last_checked = 0;
+!                      hp = &dbht[c_mp->last_checked++];
+               }
+  
+               /*
+***************
+*** 172,210 ****
+               /*
+                * The failure mode is when there are too many buffers we can't
+                * write or there's not enough memory in the system.  We don't
+!               * have a metric for deciding if allocation has no possible way
+!               * to succeed, so we don't ever fail, we assume memory will be
+!               * available if we wait long enough.
+                *
+!               * Get aggressive if we've tried to flush 5 times the number of
+!               * hash buckets as are in the system -- it's possible we have
+!               * been repeatedly trying to flush the same buffers, although
+!               * it's unlikely.  Aggressive means:
+                *
+                * a: set a flag to attempt to flush high priority buffers as
+                *    well as other buffers.
+                * b: sync the mpool to force out queue extent pages.  While we
+                *    might not have enough space for what we want and flushing
+                *    is expensive, why not?
+!               * c: sleep for a second -- hopefully someone else will run and
+!               *    free up some memory.  Try to allocate memory too, in case
+!               *    the other thread returns its memory to the region.
+!               * d: look at a buffer in every hash bucket rather than choose
+                *    the more preferable of two.
+                *
+                * !!!
+                * This test ignores pathological cases like no buffers in the
+                * system -- that shouldn't be possible.
+                */
+!              if ((++buckets % max_na) == 0) {
+!                      aggressive = 1;
+! 
+                       R_UNLOCK(dbenv, memreg);
+  
+!                      (void)__memp_sync_int(
+!                          dbenv, NULL, 0, DB_SYNC_ALLOC, NULL);
+! 
+!                      (void)__os_sleep(dbenv, 1, 0);
+  
+                       R_LOCK(dbenv, memreg);
+                       goto alloc;
+--- 163,221 ----
+               /*
+                * The failure mode is when there are too many buffers we can't
+                * write or there's not enough memory in the system.  We don't
+!               * have a way to know that allocation has no way to succeed.
+!               * We fail if there were no pages returned to the cache after
+!               * we've been trying for a relatively long time.
+                *
+!               * Get aggressive if we've tried to flush the number of hash
+!               * buckets as are in the system and have not found any more
+!               * space.  Aggressive means:
+                *
+                * a: set a flag to attempt to flush high priority buffers as
+                *    well as other buffers.
+                * b: sync the mpool to force out queue extent pages.  While we
+                *    might not have enough space for what we want and flushing
+                *    is expensive, why not?
+!               * c: look at a buffer in every hash bucket rather than choose
+                *    the more preferable of two.
++               * d: start to think about giving up.
++               *
++               * If we get here twice, sleep for a second, hopefully someone
++               * else will run and free up some memory.
++               *
++               * Always try to allocate memory too, in case some other thread
++               * returns its memory to the region.
+                *
+                * !!!
+                * This test ignores pathological cases like no buffers in the
+                * system -- that shouldn't be possible.
+                */
+!              if ((++buckets % c_mp->htab_buckets) == 0) {
+!                      if (freed_space > 0)
+!                              goto alloc;
+                       R_UNLOCK(dbenv, memreg);
+  
+!                      switch (++aggressive) {
+!                      case 1:
+!                              break;
+!                      case 2:
+!                              put_counter = c_mp->put_counter;
+!                              /* FALLTHROUGH */
+!                      case 3:
+!                      case 4:
+!                      case 5:
+!                      case 6:
+!                              (void)__memp_sync_int(
+!                                  dbenv, NULL, 0, DB_SYNC_ALLOC, NULL);
+! 
+!                              (void)__os_sleep(dbenv, 1, 0);
+!                              break;
+!                      default:
+!                              aggressive = 1;
+!                              if (put_counter == c_mp->put_counter)
+!                                      giveup = 1;
+!                              break;
+!                      }
+  
+                       R_LOCK(dbenv, memreg);
+                       goto alloc;
+***************
+*** 277,283 ****
+                * thread may have acquired this buffer and incremented the ref
+                * count after we wrote it, in which case we can't have it.
+                *
+!               * If there's a write error, avoid selecting this buffer again
+                * by making it the bucket's least-desirable buffer.
+                */
+               if (ret != 0 || bhp->ref != 0) {
+--- 288,295 ----
+                * thread may have acquired this buffer and incremented the ref
+                * count after we wrote it, in which case we can't have it.
+                *
+!               * If there's a write error and we're having problems finding
+!               * something to allocate, avoid selecting this buffer again
+                * by making it the bucket's least-desirable buffer.
+                */
+               if (ret != 0 || bhp->ref != 0) {
+***************
+*** 301,306 ****
+--- 313,320 ----
+  
+               freed_space += __db_shsizeof(bhp);
+               __memp_bhfree(dbmp, hp, bhp, 1);
++              if (aggressive > 1)
++                      aggressive = 1;
+  
+               /*
+                * Unlock this hash bucket and re-acquire the region lock. If
+***************
+*** 362,415 ****
+       hp->hash_priority = SH_TAILQ_FIRST(&hp->hash_bucket, __bh)->priority;
+  }
+  
+- /*
+-  * __memp_reset_lru --
+-  *   Reset the cache LRU counter.
+-  */
+- static void
+- __memp_reset_lru(dbenv, memreg, c_mp)
+-      DB_ENV *dbenv;
+-      REGINFO *memreg;
+-      MPOOL *c_mp;
+- {
+-      BH *bhp;
+-      DB_MPOOL_HASH *hp;
+-      int bucket;
+- 
+-      /*
+-       * Update the counter so all future allocations will start at the
+-       * bottom.
+-       */
+-      c_mp->lru_count -= MPOOL_BASE_DECREMENT;
+- 
+-      /* Release the region lock. */
+-      R_UNLOCK(dbenv, memreg);
+- 
+-      /* Adjust the priority of every buffer in the system. */
+-      for (hp = R_ADDR(memreg, c_mp->htab),
+-          bucket = 0; bucket < c_mp->htab_buckets; ++hp, ++bucket) {
+-              /*
+-               * Skip empty buckets.
+-               *
+-               * We can check for empty buckets before locking as we
+-               * only care if the pointer is zero or non-zero.
+-               */
+-              if (SH_TAILQ_FIRST(&hp->hash_bucket, __bh) == NULL)
+-                      continue;
+- 
+-              MUTEX_LOCK(dbenv, &hp->hash_mutex);
+-              for (bhp = SH_TAILQ_FIRST(&hp->hash_bucket, __bh);
+-                  bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, hq, __bh))
+-                      if (bhp->priority != UINT32_T_MAX &&
+-                          bhp->priority > MPOOL_BASE_DECREMENT)
+-                              bhp->priority -= MPOOL_BASE_DECREMENT;
+-              MUTEX_UNLOCK(dbenv, &hp->hash_mutex);
+-      }
+- 
+-      /* Reacquire the region lock. */
+-      R_LOCK(dbenv, memreg);
+- }
+- 
+  #ifdef DIAGNOSTIC
+  /*
+   * __memp_check_order --
+--- 376,381 ----
+*** dbreg/dbreg_rec.c.orig     2002-08-17 07:22:52.000000000 -0700
+--- dbreg/dbreg_rec.c  2003-11-08 10:59:19.000000000 -0800
+***************
+*** 174,192 ****
+                        * Typically, closes should match an open which means
+                        * that if this is a close, there should be a valid
+                        * entry in the dbentry table when we get here,
+!                       * however there is an exception.  If this is an
+                        * OPENFILES pass, then we may have started from
+                        * a log file other than the first, and the
+                        * corresponding open appears in an earlier file.
+!                       * We can ignore that case, but all others are errors.
+                        */
+                       dbe = &dblp->dbentry[argp->fileid];
+                       if (dbe->dbp == NULL && !dbe->deleted) {
+                               /* No valid entry here. */
+!                              if ((argp->opcode != LOG_CLOSE &&
+!                                  argp->opcode != LOG_RCLOSE) ||
+!                                  (op != DB_TXN_OPENFILES &&
+!                                  op !=DB_TXN_POPENFILES)) {
+                                       __db_err(dbenv,
+                                           "Improper file close at %lu/%lu",
+                                           (u_long)lsnp->file,
+--- 174,193 ----
+                        * Typically, closes should match an open which means
+                        * that if this is a close, there should be a valid
+                        * entry in the dbentry table when we get here,
+!                       * however there are exceptions.  1. If this is an
+                        * OPENFILES pass, then we may have started from
+                        * a log file other than the first, and the
+                        * corresponding open appears in an earlier file.
+!                       * 2. If we are undoing an open on an abort or
+!                       * recovery, it's possible that we failed after
+!                       * the log record, but before we actually entered
+!                       * a handle here.
+                        */
+                       dbe = &dblp->dbentry[argp->fileid];
+                       if (dbe->dbp == NULL && !dbe->deleted) {
+                               /* No valid entry here. */
+!                              if (DB_REDO(op) ||
+!                                  argp->opcode == LOG_CHECKPOINT) {
+                                       __db_err(dbenv,
+                                           "Improper file close at %lu/%lu",
+                                           (u_long)lsnp->file,
+*** env/env_recover.c.orig.1   2002-08-22 14:52:51.000000000 -0700
+--- env/env_recover.c  2003-11-15 08:20:59.000000000 -0800
+***************
+*** 232,243 ****
+        * we'll still need to do a vtruncate based on information we haven't
+        * yet collected.
+        */
+!      if (ret == DB_NOTFOUND) {
+               ret = 0;
+!              if (max_lsn == NULL)
+!                      goto done;
+!      }
+!      if (ret != 0)
+               goto err;
+  
+       hi_txn = txnid;
+--- 232,240 ----
+        * we'll still need to do a vtruncate based on information we haven't
+        * yet collected.
+        */
+!      if (ret == DB_NOTFOUND) 
+               ret = 0;
+!      else if (ret != 0)
+               goto err;
+  
+       hi_txn = txnid;
+***************
+*** 331,337 ****
+  
+       /* Find a low txnid. */
+       ret = 0;
+!      do {
+               /* txnid is after rectype, which is a u_int32. */
+               memcpy(&txnid,
+                   (u_int8_t *)data.data + sizeof(u_int32_t), sizeof(txnid));
+--- 328,334 ----
+  
+       /* Find a low txnid. */
+       ret = 0;
+!      if (hi_txn != 0) do {
+               /* txnid is after rectype, which is a u_int32. */
+               memcpy(&txnid,
+                   (u_int8_t *)data.data + sizeof(u_int32_t), sizeof(txnid));
+***************
+*** 344,354 ****
+        * There are no transactions and we're not recovering to an LSN (see
+        * above), so there is nothing to do.
+        */
+!      if (ret == DB_NOTFOUND) {
+               ret = 0;
+-              if (max_lsn == NULL)
+-                      goto done;
+-      }
+  
+       /* Reset to the first lsn. */
+       if (ret != 0 || (ret = logc->get(logc, &first_lsn, &data, DB_SET)) != 0)
+--- 341,348 ----
+        * There are no transactions and we're not recovering to an LSN (see
+        * above), so there is nothing to do.
+        */
+!      if (ret == DB_NOTFOUND) 
+               ret = 0;
+  
+       /* Reset to the first lsn. */
+       if (ret != 0 || (ret = logc->get(logc, &first_lsn, &data, DB_SET)) != 0)
+***************
+*** 367,372 ****
+--- 361,370 ----
+           txninfo, &data, &first_lsn, &last_lsn, nfiles, 1)) != 0)
+               goto err;
+  
++      /* If there were no transactions, then we can bail out early. */
++      if (hi_txn == 0 && max_lsn == NULL)
++              goto done;
++              
+       /*
+        * Pass #2.
+        *
+***************
+*** 483,488 ****
+--- 481,487 ----
+       if ((ret = __dbreg_close_files(dbenv)) != 0)
+               goto err;
+  
++ done:
+       if (max_lsn != NULL) {
+               region->last_ckp = ((DB_TXNHEAD *)txninfo)->ckplsn;
+  
+***************
+*** 538,544 ****
+               __db_err(dbenv, "Recovery complete at %.24s", ctime(&now));
+               __db_err(dbenv, "%s %lx %s [%lu][%lu]",
+                   "Maximum transaction ID",
+!                  ((DB_TXNHEAD *)txninfo)->maxid,
+                   "Recovery checkpoint",
+                   (u_long)region->last_ckp.file,
+                   (u_long)region->last_ckp.offset);
+--- 537,544 ----
+               __db_err(dbenv, "Recovery complete at %.24s", ctime(&now));
+               __db_err(dbenv, "%s %lx %s [%lu][%lu]",
+                   "Maximum transaction ID",
+!                  txninfo == NULL ? TXN_MINIMUM :
+!                      ((DB_TXNHEAD *)txninfo)->maxid,
+                   "Recovery checkpoint",
+                   (u_long)region->last_ckp.file,
+                   (u_long)region->last_ckp.offset);
+***************
+*** 550,556 ****
+                   (u_long)lsn.file, (u_long)lsn.offset, pass);
+       }
+  
+- done:
+  err: if (lockid != DB_LOCK_INVALIDID) {
+               if ((t_ret = __rep_unlockpages(dbenv, lockid)) != 0 && ret == 0)
+                       ret = t_ret;
+--- 550,555 ----