1 *** dbinc/os.h.orig 2002/03/27 04:34:55 11.14
2 --- dbinc/os.h 2002/09/26 18:10:10
5 int fd; /* POSIX file descriptor. */
6 char *name; /* File name. */
8 u_int32_t log_size; /* XXX: Log file size. */
9 ! u_int32_t pagesize; /* XXX: Page size. */
11 #define DB_FH_NOSYNC 0x01 /* Handle doesn't need to be sync'd. */
12 #define DB_FH_UNLINK 0x02 /* Unlink on close */
14 int fd; /* POSIX file descriptor. */
15 char *name; /* File name. */
17 + u_int32_t pagesize; /* Underlying page size. */
19 u_int32_t log_size; /* XXX: Log file size. */
21 ! u_int32_t pgno; /* Last seek. */
25 #define DB_FH_NOSYNC 0x01 /* Handle doesn't need to be sync'd. */
26 #define DB_FH_UNLINK 0x02 /* Unlink on close */
27 *** os/os_rw.c.orig 2002/07/12 18:56:52 11.24
28 --- os/os_rw.c 2002/09/16 20:46:14 11.25
35 + /* Check for illegal usage. */
36 + DB_ASSERT(F_ISSET(db_iop->fhp, DB_FH_VALID) && db_iop->fhp->fd != -1);
38 #if defined(HAVE_PREAD) && defined(HAVE_PWRITE)
47 + /* Check for illegal usage. */
48 + DB_ASSERT(F_ISSET(fhp, DB_FH_VALID) && fhp->fd != -1);
51 offset = 0; offset < len; taddr += nr, offset += nr) {
52 retry: if ((nr = DB_GLOBAL(j_read) != NULL ?
60 + /* Check for illegal usage. */
61 + DB_ASSERT(F_ISSET(fhp, DB_FH_VALID) && fhp->fd != -1);
64 offset = 0; offset < len; taddr += nw, offset += nw)
65 *** os/os_rw.c.orig 2002/09/16 20:46:14 11.25
66 --- os/os_rw.c 2002/09/26 18:10:20
71 #ifndef NO_SYSTEM_INCLUDES
72 #include <sys/types.h>
73 + #include <sys/stat.h>
83 + #ifdef HAVE_FILESYSTEM_NOTZERO
84 + static int __os_zerofill __P((DB_ENV *, DB_FH *));
86 + static int __os_physwrite __P((DB_ENV *, DB_FH *, void *, size_t, size_t *));
95 if (DB_GLOBAL(j_write) != NULL)
97 + #ifdef HAVE_FILESYSTEM_NOTZERO
98 + if (__os_fs_notzero())
101 *niop = pwrite(db_iop->fhp->fd, db_iop->buf,
102 db_iop->bytes, (off_t)db_iop->pgno * db_iop->pagesize);
114 ! /* Check for illegal usage. */
115 ! DB_ASSERT(F_ISSET(fhp, DB_FH_VALID) && fhp->fd != -1);
118 offset = 0; offset < len; taddr += nw, offset += nw)
123 + /* Check for illegal usage. */
124 + DB_ASSERT(F_ISSET(fhp, DB_FH_VALID) && fhp->fd != -1);
126 + #ifdef HAVE_FILESYSTEM_NOTZERO
127 + /* Zero-fill as necessary. */
128 + if (__os_fs_notzero()) {
130 + if ((ret = __os_zerofill(dbenv, fhp)) != 0)
134 + return (__os_physwrite(dbenv, fhp, addr, len, nwp));
138 + * __os_physwrite --
139 + * Physical write to a file handle.
142 + __os_physwrite(dbenv, fhp, addr, len, nwp)
154 ! #if defined(HAVE_FILESYSTEM_NOTZERO) && defined(DIAGNOSTIC)
155 ! if (__os_fs_notzero()) {
159 ! DB_ASSERT(fstat(fhp->fd, &sb) != -1 &&
160 ! (cur_off = lseek(fhp->fd, (off_t)0, SEEK_CUR)) != -1 &&
161 ! cur_off <= sb.st_size);
166 offset = 0; offset < len; taddr += nw, offset += nw)
174 + #ifdef HAVE_FILESYSTEM_NOTZERO
177 + * Zero out bytes in the file.
179 + * Pages allocated by writing pages past end-of-file are not zeroed,
180 + * on some systems. Recovery could theoretically be fooled by a page
181 + * showing up that contained garbage. In order to avoid this, we
182 + * have to write the pages out to disk, and flush them. The reason
183 + * for the flush is because if we don't sync, the allocation of another
184 + * page subsequent to this one might reach the disk first, and if we
185 + * crashed at the right moment, leave us with this page as the one
186 + * allocated by writing a page past it in the file.
189 + __os_zerofill(dbenv, fhp)
193 + off_t stat_offset, write_offset;
195 + u_int32_t bytes, mbytes;
196 + int group_sync, need_free, ret;
197 + u_int8_t buf[8 * 1024], *bp;
199 + /* Calculate the byte offset of the next write. */
200 + write_offset = (off_t)fhp->pgno * fhp->pgsize + fhp->offset;
202 + /* Stat the file. */
203 + if ((ret = __os_ioinfo(dbenv, NULL, fhp, &mbytes, &bytes, NULL)) != 0)
205 + stat_offset = (off_t)mbytes * MEGABYTE + bytes;
207 + /* Check if the file is large enough. */
208 + if (stat_offset >= write_offset)
211 + /* Get a large buffer if we're writing lots of data. */
212 + #undef ZF_LARGE_WRITE
213 + #define ZF_LARGE_WRITE (64 * 1024)
214 + if (write_offset - stat_offset > ZF_LARGE_WRITE) {
215 + if ((ret = __os_calloc(dbenv, 1, ZF_LARGE_WRITE, &bp)) != 0)
217 + blen = ZF_LARGE_WRITE;
221 + blen = sizeof(buf);
223 + memset(buf, 0, sizeof(buf));
226 + /* Seek to the current end of the file. */
227 + if ((ret = __os_seek(
228 + dbenv, fhp, MEGABYTE, mbytes, bytes, 0, DB_OS_SEEK_SET)) != 0)
232 + * Hash is the only access method that allocates groups of pages. Hash
233 + * uses the existence of the last page in a group to signify the entire
234 + * group is OK; so, write all the pages but the last one in the group,
235 + * flush them to disk, then write the last one to disk and flush it.
237 + for (group_sync = 0; stat_offset < write_offset; group_sync = 1) {
238 + if (write_offset - stat_offset <= blen) {
239 + blen = (size_t)(write_offset - stat_offset);
240 + if (group_sync && (ret = __os_fsync(dbenv, fhp)) != 0)
243 + if ((ret = __os_physwrite(dbenv, fhp, bp, blen, &nw)) != 0)
245 + stat_offset += blen;
247 + if ((ret = __os_fsync(dbenv, fhp)) != 0)
250 + /* Seek back to where we started. */
251 + mbytes = (u_int32_t)(write_offset / MEGABYTE);
252 + bytes = (u_int32_t)(write_offset % MEGABYTE);
253 + ret = __os_seek(dbenv, fhp, MEGABYTE, mbytes, bytes, 0, DB_OS_SEEK_SET);
255 + err: if (need_free)
256 + __os_free(dbenv, bp);
260 *** os/os_seek.c.orig Mon Jul 15 22:03:38 2002
261 --- os/os_seek.c Thu Sep 26 14:13:52 2002
264 } while (ret == EINTR);
268 __db_err(dbenv, "seek: %lu %d %d: %s",
269 (u_long)pgsize * pageno + relative,
270 isrewind, db_whence, strerror(ret));
272 } while (ret == EINTR);
276 ! fhp->pgsize = pgsize;
277 ! fhp->pgno = pageno;
278 ! fhp->offset = relative;
280 __db_err(dbenv, "seek: %lu %d %d: %s",
281 (u_long)pgsize * pageno + relative,
282 isrewind, db_whence, strerror(ret));
283 *** os_win32/os_rw.c.orig 2002/08/06 04:56:19 11.28
284 --- os_win32/os_rw.c 2002/09/26 18:10:20
291 + #ifdef HAVE_FILESYSTEM_NOTZERO
292 + static int __os_zerofill __P((DB_ENV *, DB_FH *));
294 + static int __os_physwrite __P((DB_ENV *, DB_FH *, void *, size_t, size_t *));
303 if (DB_GLOBAL(j_write) != NULL)
305 + #ifdef HAVE_FILESYSTEM_NOTZERO
306 + if (__os_fs_notzero())
309 if (!WriteFile(db_iop->fhp->handle,
310 db_iop->buf, (DWORD)db_iop->bytes, &nbytes, &over))
320 + #ifdef HAVE_FILESYSTEM_NOTZERO
321 + /* Zero-fill as necessary. */
322 + if (__os_fs_notzero() && (ret = __os_zerofill(dbenv, fhp)) != 0)
325 + return (__os_physwrite(dbenv, fhp, addr, len, nwp));
329 + * __os_physwrite --
330 + * Physical write to a file handle.
333 + __os_physwrite(dbenv, fhp, addr, len, nwp)
350 + #ifdef HAVE_FILESYSTEM_NOTZERO
353 + * Zero out bytes in the file.
355 + * Pages allocated by writing pages past end-of-file are not zeroed,
356 + * on some systems. Recovery could theoretically be fooled by a page
357 + * showing up that contained garbage. In order to avoid this, we
358 + * have to write the pages out to disk, and flush them. The reason
359 + * for the flush is because if we don't sync, the allocation of another
360 + * page subsequent to this one might reach the disk first, and if we
361 + * crashed at the right moment, leave us with this page as the one
362 + * allocated by writing a page past it in the file.
365 + __os_zerofill(dbenv, fhp)
369 + unsigned __int64 stat_offset, write_offset;
371 + u_int32_t bytes, mbytes;
372 + int group_sync, need_free, ret;
373 + u_int8_t buf[8 * 1024], *bp;
375 + /* Calculate the byte offset of the next write. */
376 + write_offset = (unsigned __int64)fhp->pgno * fhp->pgsize + fhp->offset;
378 + /* Stat the file. */
379 + if ((ret = __os_ioinfo(dbenv, NULL, fhp, &mbytes, &bytes, NULL)) != 0)
381 + stat_offset = (unsigned __int64)mbytes * MEGABYTE + bytes;
383 + /* Check if the file is large enough. */
384 + if (stat_offset >= write_offset)
387 + /* Get a large buffer if we're writing lots of data. */
388 + #undef ZF_LARGE_WRITE
389 + #define ZF_LARGE_WRITE (64 * 1024)
390 + if (write_offset - stat_offset > ZF_LARGE_WRITE) {
391 + if ((ret = __os_calloc(dbenv, 1, ZF_LARGE_WRITE, &bp)) != 0)
393 + blen = ZF_LARGE_WRITE;
397 + blen = sizeof(buf);
399 + memset(buf, 0, sizeof(buf));
402 + /* Seek to the current end of the file. */
403 + if ((ret = __os_seek(
404 + dbenv, fhp, MEGABYTE, mbytes, bytes, 0, DB_OS_SEEK_SET)) != 0)
408 + * Hash is the only access method that allocates groups of pages. Hash
409 + * uses the existence of the last page in a group to signify the entire
410 + * group is OK; so, write all the pages but the last one in the group,
411 + * flush them to disk, then write the last one to disk and flush it.
413 + for (group_sync = 0; stat_offset < write_offset; group_sync = 1) {
414 + if (write_offset - stat_offset <= blen) {
415 + blen = (size_t)(write_offset - stat_offset);
416 + if (group_sync && (ret = __os_fsync(dbenv, fhp)) != 0)
419 + if ((ret = __os_physwrite(dbenv, fhp, bp, blen, &nw)) != 0)
421 + stat_offset += blen;
423 + if ((ret = __os_fsync(dbenv, fhp)) != 0)
426 + /* Seek back to where we started. */
427 + mbytes = (u_int32_t)(write_offset / MEGABYTE);
428 + bytes = (u_int32_t)(write_offset % MEGABYTE);
429 + ret = __os_seek(dbenv, fhp, MEGABYTE, mbytes, bytes, 0, DB_OS_SEEK_SET);
431 + err: if (need_free)
432 + __os_free(dbenv, bp);
436 *** os_win32/os_seek.c.orig 2002/08/06 04:56:20 11.17
437 --- os_win32/os_seek.c 2002/09/26 18:10:20
440 __os_win32_errno() : 0;
444 __db_err(dbenv, "seek: %lu %d %d: %s",
445 (u_long)pgsize * pageno + relative,
446 isrewind, db_whence, strerror(ret));
451 __os_win32_errno() : 0;
455 ! fhp->pgsize = pgsize;
456 ! fhp->pgno = pageno;
457 ! fhp->offset = relative;
459 __db_err(dbenv, "seek: %lu %d %d: %s",
460 (u_long)pgsize * pageno + relative,
461 isrewind, db_whence, strerror(ret));