]>
Commit | Line | Data |
---|---|---|
9cb7906f ER |
1 | diff -ru rsync-3.1.2.orig/checksum.c rsync-3.1.2/checksum.c |
2 | --- rsync-3.1.2.orig/checksum.c 2015-08-08 22:47:03.000000000 +0300 | |
3 | +++ rsync-3.1.2/checksum.c 2016-10-24 15:38:28.002415712 +0300 | |
4 | @@ -24,6 +24,9 @@ | |
5 | extern int checksum_seed; | |
6 | extern int protocol_version; | |
7 | extern int proper_seed_order; | |
8 | +#ifdef WITH_DROP_CACHE | |
9 | +#define close(fd) fadv_close(fd) | |
10 | +#endif | |
11 | ||
12 | /* | |
13 | a simple 32 bit checksum that can be upadted from either end | |
14 | diff -ru rsync-3.1.2.orig/cleanup.c rsync-3.1.2/cleanup.c | |
15 | --- rsync-3.1.2.orig/cleanup.c 2015-08-08 22:47:03.000000000 +0300 | |
16 | +++ rsync-3.1.2/cleanup.c 2016-10-24 15:38:28.002415712 +0300 | |
17 | @@ -53,7 +53,11 @@ | |
18 | int fd; | |
19 | int ret; | |
20 | STRUCT_STAT st; | |
21 | - | |
22 | +#endif | |
23 | +#ifdef WITH_DROP_CACHE | |
24 | + fadv_close_all(); | |
25 | +#endif | |
26 | +#ifdef SHUTDOWN_ALL_SOCKETS | |
27 | max_fd = sysconf(_SC_OPEN_MAX) - 1; | |
28 | for (fd = max_fd; fd >= 0; fd--) { | |
29 | if ((ret = do_fstat(fd, &st)) == 0) { | |
30 | diff -ru rsync-3.1.2.orig/config.h.in rsync-3.1.2/config.h.in | |
31 | --- rsync-3.1.2.orig/config.h.in 2015-12-21 22:20:53.000000000 +0200 | |
32 | +++ rsync-3.1.2/config.h.in 2016-10-24 15:38:28.006415712 +0300 | |
33 | @@ -275,6 +275,9 @@ | |
34 | /* Define to 1 if you have the <memory.h> header file. */ | |
35 | #undef HAVE_MEMORY_H | |
36 | ||
37 | +/* Define to 1 if you have the `mincore' function. */ | |
38 | +#undef HAVE_MINCORE | |
39 | + | |
40 | /* Define to 1 if you have the `mkfifo' function. */ | |
41 | #undef HAVE_MKFIFO | |
42 | ||
43 | @@ -287,6 +290,9 @@ | |
44 | /* Define to 1 if the system has the type `mode_t'. */ | |
45 | #undef HAVE_MODE_T | |
46 | ||
47 | +/* Define to 1 if you have the `mmap' function. */ | |
48 | +#undef HAVE_MMAP | |
49 | + | |
50 | /* Define to 1 if you have the `mtrace' function. */ | |
51 | #undef HAVE_MTRACE | |
52 | ||
53 | @@ -329,6 +335,9 @@ | |
54 | /* true if you have posix ACLs */ | |
55 | #undef HAVE_POSIX_ACLS | |
56 | ||
57 | +/* Define to 1 if you have the `posix_fadvise64' function. */ | |
58 | +#undef HAVE_POSIX_FADVISE64 | |
59 | + | |
60 | /* Define to 1 if you have the `posix_fallocate' function. */ | |
61 | #undef HAVE_POSIX_FALLOCATE | |
62 | ||
63 | diff -ru rsync-3.1.2.orig/configure.ac rsync-3.1.2/configure.ac | |
64 | --- rsync-3.1.2.orig/configure.ac 2015-12-21 22:00:49.000000000 +0200 | |
65 | +++ rsync-3.1.2/configure.ac 2016-10-24 15:38:28.006415712 +0300 | |
66 | @@ -598,6 +598,7 @@ | |
67 | setlocale setmode open64 lseek64 mkstemp64 mtrace va_copy __va_copy \ | |
68 | seteuid strerror putenv iconv_open locale_charset nl_langinfo getxattr \ | |
69 | extattr_get_link sigaction sigprocmask setattrlist getgrouplist \ | |
70 | + mmap mincore posix_fadvise64 \ | |
71 | initgroups utimensat posix_fallocate attropen setvbuf usleep) | |
72 | ||
73 | dnl cygwin iconv.h defines iconv_open as libiconv_open | |
74 | diff -ru rsync-3.1.2.orig/configure.sh rsync-3.1.2/configure.sh | |
75 | --- rsync-3.1.2.orig/configure.sh 2015-12-21 22:20:53.000000000 +0200 | |
76 | +++ rsync-3.1.2/configure.sh 2016-10-24 15:38:28.006415712 +0300 | |
77 | @@ -7692,6 +7692,7 @@ | |
78 | setlocale setmode open64 lseek64 mkstemp64 mtrace va_copy __va_copy \ | |
79 | seteuid strerror putenv iconv_open locale_charset nl_langinfo getxattr \ | |
80 | extattr_get_link sigaction sigprocmask setattrlist getgrouplist \ | |
81 | + mmap mincore posix_fadvise64 \ | |
82 | initgroups utimensat posix_fallocate attropen setvbuf usleep | |
83 | do : | |
84 | as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` | |
85 | diff -ru rsync-3.1.2.orig/fileio.c rsync-3.1.2/fileio.c | |
86 | --- rsync-3.1.2.orig/fileio.c 2015-08-08 22:47:03.000000000 +0300 | |
87 | +++ rsync-3.1.2/fileio.c 2016-10-24 15:38:28.006415712 +0300 | |
88 | @@ -51,7 +51,7 @@ | |
89 | ret = -1; | |
90 | else { | |
91 | do { | |
92 | - ret = write(f, "", 1); | |
93 | + ret = fadv_write(f, "", 1); | |
94 | } while (ret < 0 && errno == EINTR); | |
95 | ||
96 | ret = ret <= 0 ? -1 : 0; | |
97 | @@ -81,7 +81,7 @@ | |
98 | do_lseek(f, sparse_seek, SEEK_CUR); | |
99 | sparse_seek = l2; | |
100 | ||
101 | - while ((ret = write(f, buf + l1, len - (l1+l2))) <= 0) { | |
102 | + while ((ret = fadv_write(f, buf + l1, len - (l1+l2))) <= 0) { | |
103 | if (ret < 0 && errno == EINTR) | |
104 | continue; | |
105 | sparse_seek = 0; | |
106 | @@ -107,7 +107,7 @@ | |
107 | char *bp = wf_writeBuf; | |
108 | ||
109 | while (wf_writeBufCnt > 0) { | |
110 | - if ((ret = write(f, bp, wf_writeBufCnt)) < 0) { | |
111 | + if ((ret = fadv_write(f, bp, wf_writeBufCnt)) < 0) { | |
112 | if (errno == EINTR) | |
113 | continue; | |
114 | return ret; | |
115 | @@ -254,7 +254,7 @@ | |
116 | map->p_len = window_size; | |
117 | ||
118 | while (read_size > 0) { | |
119 | - int32 nread = read(map->fd, map->p + read_offset, read_size); | |
120 | + int32 nread = fadv_read(map->fd, map->p + read_offset, read_size); | |
121 | if (nread <= 0) { | |
122 | if (!map->status) | |
123 | map->status = nread ? errno : ENODATA; | |
124 | diff -ru rsync-3.1.2.orig/generator.c rsync-3.1.2/generator.c | |
125 | --- rsync-3.1.2.orig/generator.c 2015-12-05 21:10:24.000000000 +0200 | |
126 | +++ rsync-3.1.2/generator.c 2016-10-24 15:38:28.006415712 +0300 | |
127 | @@ -111,6 +111,10 @@ | |
128 | static int need_retouch_dir_perms; | |
129 | static const char *solo_file = NULL; | |
130 | ||
131 | +#ifdef WITH_DROP_CACHE | |
132 | +#define close(fd) fadv_close(fd) | |
133 | +#endif | |
134 | + | |
135 | enum nonregtype { | |
136 | TYPE_DIR, TYPE_SPECIAL, TYPE_DEVICE, TYPE_SYMLINK | |
137 | }; | |
138 | diff -ru rsync-3.1.2.orig/options.c rsync-3.1.2/options.c | |
139 | --- rsync-3.1.2.orig/options.c 2015-12-19 00:46:28.000000000 +0200 | |
140 | +++ rsync-3.1.2/options.c 2016-10-24 15:38:28.006415712 +0300 | |
141 | @@ -62,6 +62,9 @@ | |
142 | int preserve_gid = 0; | |
143 | int preserve_times = 0; | |
144 | int update_only = 0; | |
145 | +#ifdef WITH_DROP_CACHE | |
146 | +int drop_cache = 0; | |
147 | +#endif | |
148 | int cvs_exclude = 0; | |
149 | int dry_run = 0; | |
150 | int do_xfers = 1; | |
151 | @@ -680,6 +683,9 @@ | |
152 | rprintf(F," --backup-dir=DIR make backups into hierarchy based in DIR\n"); | |
153 | rprintf(F," --suffix=SUFFIX set backup suffix (default %s w/o --backup-dir)\n",BACKUP_SUFFIX); | |
154 | rprintf(F," -u, --update skip files that are newer on the receiver\n"); | |
155 | +#ifdef WITH_DROP_CACHE | |
156 | + rprintf(F," --drop-cache do not cache rsync files (POSIX_FADV_DONTNEED)\n"); | |
157 | +#endif | |
158 | rprintf(F," --inplace update destination files in-place (SEE MAN PAGE)\n"); | |
159 | rprintf(F," --append append data onto shorter files\n"); | |
160 | rprintf(F," --append-verify like --append, but with old data in file checksum\n"); | |
161 | @@ -914,6 +920,9 @@ | |
162 | {"no-one-file-system",0, POPT_ARG_VAL, &one_file_system, 0, 0, 0 }, | |
163 | {"no-x", 0, POPT_ARG_VAL, &one_file_system, 0, 0, 0 }, | |
164 | {"update", 'u', POPT_ARG_NONE, &update_only, 0, 0, 0 }, | |
165 | +#ifdef WITH_DROP_CACHE | |
166 | + {"drop-cache", 0, POPT_ARG_NONE, &drop_cache, 0, 0, 0 }, | |
167 | +#endif | |
168 | {"existing", 0, POPT_ARG_NONE, &ignore_non_existing, 0, 0, 0 }, | |
169 | {"ignore-non-existing",0,POPT_ARG_NONE, &ignore_non_existing, 0, 0, 0 }, | |
170 | {"ignore-existing", 0, POPT_ARG_NONE, &ignore_existing, 0, 0, 0 }, | |
171 | @@ -1065,6 +1074,9 @@ | |
172 | rprintf(F," --log-file=FILE override the \"log file\" setting\n"); | |
173 | rprintf(F," --log-file-format=FMT override the \"log format\" setting\n"); | |
174 | rprintf(F," --sockopts=OPTIONS specify custom TCP options\n"); | |
175 | +#ifdef WITH_DROP_CACHE | |
176 | + rprintf(F," --drop-cache do not cache rsync files (POSIX_FADV_DONTNEED)\n"); | |
177 | +#endif | |
178 | rprintf(F," -v, --verbose increase verbosity\n"); | |
179 | rprintf(F," -4, --ipv4 prefer IPv4\n"); | |
180 | rprintf(F," -6, --ipv6 prefer IPv6\n"); | |
181 | @@ -1089,6 +1101,9 @@ | |
182 | {"log-file", 0, POPT_ARG_STRING, &logfile_name, 0, 0, 0 }, | |
183 | {"log-file-format", 0, POPT_ARG_STRING, &logfile_format, 0, 0, 0 }, | |
184 | {"port", 0, POPT_ARG_INT, &rsync_port, 0, 0, 0 }, | |
185 | +#ifdef WITH_DROP_CACHE | |
186 | + {"drop-cache", 0, POPT_ARG_NONE, &drop_cache, 0, 0, 0 }, | |
187 | +#endif | |
188 | {"sockopts", 0, POPT_ARG_STRING, &sockopts, 0, 0, 0 }, | |
189 | {"protocol", 0, POPT_ARG_INT, &protocol_version, 0, 0, 0 }, | |
190 | {"server", 0, POPT_ARG_NONE, &am_server, 0, 0, 0 }, | |
191 | @@ -2376,6 +2391,11 @@ | |
192 | if (!am_sender) | |
193 | args[ac++] = "--sender"; | |
194 | ||
195 | +#ifdef WITH_DROP_CACHE | |
196 | + if (drop_cache) | |
197 | + args[ac++] = "--drop-cache"; | |
198 | +#endif | |
199 | + | |
200 | x = 1; | |
201 | argstr[0] = '-'; | |
202 | ||
203 | diff -ru rsync-3.1.2.orig/proto.h rsync-3.1.2/proto.h | |
204 | --- rsync-3.1.2.orig/proto.h 2015-12-21 22:22:53.000000000 +0200 | |
205 | +++ rsync-3.1.2/proto.h 2016-10-24 15:38:28.010415712 +0300 | |
206 | @@ -348,6 +348,10 @@ | |
207 | uid_t recv_user_name(int f, uid_t uid); | |
208 | gid_t recv_group_name(int f, gid_t gid, uint16 *flags_ptr); | |
209 | void recv_id_list(int f, struct file_list *flist); | |
210 | +ssize_t fadv_write(int fd, const void *buf, size_t count); | |
211 | +ssize_t fadv_read(int fd, void *buf, size_t count); | |
212 | +void fadv_close_all(void); | |
213 | +int fadv_close(int fd); | |
214 | void parse_name_map(char *map, BOOL usernames); | |
215 | const char *getallgroups(uid_t uid, item_list *gid_list); | |
216 | void set_nonblocking(int fd); | |
217 | diff -ru rsync-3.1.2.orig/receiver.c rsync-3.1.2/receiver.c | |
218 | --- rsync-3.1.2.orig/receiver.c 2015-09-07 20:07:17.000000000 +0300 | |
219 | +++ rsync-3.1.2/receiver.c 2016-10-24 15:38:28.010415712 +0300 | |
220 | @@ -62,6 +62,10 @@ | |
221 | extern struct file_list *cur_flist, *first_flist, *dir_flist; | |
222 | extern filter_rule_list daemon_filter_list; | |
223 | ||
224 | +#ifdef WITH_DROP_CACHE | |
225 | +#define close(fd) fadv_close(fd) | |
226 | +#endif | |
227 | + | |
228 | static struct bitbag *delayed_bits = NULL; | |
229 | static int phase = 0, redoing = 0; | |
230 | static flist_ndx_list batch_redo_list; | |
231 | diff -ru rsync-3.1.2.orig/rsync.1 rsync-3.1.2/rsync.1 | |
232 | --- rsync-3.1.2.orig/rsync.1 2015-12-21 22:22:41.000000000 +0200 | |
233 | +++ rsync-3.1.2/rsync.1 2016-10-24 15:38:28.010415712 +0300 | |
234 | @@ -453,6 +453,7 @@ | |
235 | \-\-super receiver attempts super\-user activities | |
236 | \-\-fake\-super store/recover privileged attrs using xattrs | |
237 | \-S, \-\-sparse handle sparse files efficiently | |
238 | + \-\-drop\-cache drop cache continuosly using fadvise | |
239 | \-\-preallocate allocate dest files before writing | |
240 | \-n, \-\-dry\-run perform a trial run with no changes made | |
241 | \-W, \-\-whole\-file copy files whole (w/o delta\-xfer algorithm) | |
242 | @@ -1426,6 +1427,13 @@ | |
243 | up less space on the destination. Conflicts with \fB\-\-inplace\fP because it\(cq\&s | |
244 | not possible to overwrite data in a sparse fashion. | |
245 | .IP | |
246 | +.IP "\fB\-\-drop\-cache\fP" | |
247 | +Stop rsync from filling up the file system cache with the files it copies\&. Without this | |
248 | +option other processes, that had been crunching along happily on your system, will suddenly | |
249 | +become slow as they find their data being outsed from the cache. The \fB\-\-drop\-cache\fP function | |
250 | +uses posix_fadvise64 and mincore todo its work\&. It will only get compiled if configure can find posix_fadvise64 and mincore\&. | |
251 | +Rsync will tries only to drop data from cache that has not been cached before. | |
252 | +.IP | |
253 | .IP "\fB\-\-preallocate\fP" | |
254 | This tells the receiver to allocate each destination | |
255 | file to its eventual size before writing data to the file. Rsync will only use | |
256 | diff -ru rsync-3.1.2.orig/rsync.h rsync-3.1.2/rsync.h | |
257 | --- rsync-3.1.2.orig/rsync.h 2015-08-08 22:47:03.000000000 +0300 | |
258 | +++ rsync-3.1.2/rsync.h 2016-10-24 15:38:28.010415712 +0300 | |
259 | @@ -1291,3 +1291,13 @@ | |
260 | #ifdef MAINTAINER_MODE | |
261 | const char *get_panic_action(void); | |
262 | #endif | |
263 | + | |
264 | +#if defined HAVE_POSIX_FADVISE64 && defined HAVE_MINCORE && defined HAVE_MMAP | |
265 | +#define WITH_DROP_CACHE 1 | |
266 | +#include <sys/mman.h> | |
267 | +int fadv_close(int fd); | |
268 | +void fadv_close_all(void); | |
269 | +#endif | |
270 | + | |
271 | +ssize_t fadv_write(int fd, const void *buf, size_t count); | |
272 | +ssize_t fadv_read(int fd, void *buf, size_t count); | |
273 | diff -ru rsync-3.1.2.orig/rsync.yo rsync-3.1.2/rsync.yo | |
274 | --- rsync-3.1.2.orig/rsync.yo 2015-12-21 22:00:49.000000000 +0200 | |
275 | +++ rsync-3.1.2/rsync.yo 2016-10-24 15:38:28.010415712 +0300 | |
276 | @@ -1244,6 +1244,17 @@ | |
277 | up less space on the destination. Conflicts with bf(--inplace) because it's | |
278 | not possible to overwrite data in a sparse fashion. | |
279 | ||
280 | +dit(bf(--drop-cache)) Stop rsync from disturbing the file system cache with | |
281 | +the data from the files it copies. Without this option other processes, that | |
282 | +had been crunching along happily using cached data, will suddenly become | |
283 | +slow as they find their favorite data blocks data being evicted from the | |
284 | +cache by the files read and written by rsync. Since rsync has to wait until | |
285 | +the data is written to disk, before it can drop the cache, this option will | |
286 | +slow rsync down considerably, especially with small files and short copy | |
287 | +jobs. The bf(--drop-cache) function uses posix_fadvise64 and mincore todo | |
288 | +its work. It will only get compiled if configure can find posix_fadvise64 | |
289 | +and mincore. | |
290 | + | |
291 | dit(bf(--preallocate)) This tells the receiver to allocate each destination | |
292 | file to its eventual size before writing data to the file. Rsync will only use | |
293 | the real filesystem-level preallocation support provided by Linux's | |
294 | diff -ru rsync-3.1.2.orig/sender.c rsync-3.1.2/sender.c | |
295 | --- rsync-3.1.2.orig/sender.c 2015-09-07 20:07:17.000000000 +0300 | |
296 | +++ rsync-3.1.2/sender.c 2016-10-24 15:38:28.010415712 +0300 | |
297 | @@ -46,6 +46,9 @@ | |
298 | extern int file_old_total; | |
299 | extern struct stats stats; | |
300 | extern struct file_list *cur_flist, *first_flist, *dir_flist; | |
301 | +#ifdef WITH_DROP_CACHE | |
302 | +#define close(fd) fadv_close(fd) | |
303 | +#endif | |
304 | ||
305 | BOOL extra_flist_sending_enabled; | |
306 | ||
307 | diff -ru rsync-3.1.2.orig/t_unsafe.c rsync-3.1.2/t_unsafe.c | |
308 | --- rsync-3.1.2.orig/t_unsafe.c 2015-08-08 22:47:03.000000000 +0300 | |
309 | +++ rsync-3.1.2/t_unsafe.c 2016-10-24 15:38:28.010415712 +0300 | |
310 | @@ -24,6 +24,7 @@ | |
311 | #include "rsync.h" | |
312 | ||
313 | int dry_run = 0; | |
314 | +int drop_cache = 0; | |
315 | int am_root = 0; | |
316 | int am_sender = 1; | |
317 | int read_only = 0; | |
318 | diff -ru rsync-3.1.2.orig/util.c rsync-3.1.2/util.c | |
319 | --- rsync-3.1.2.orig/util.c 2015-12-21 20:54:02.000000000 +0200 | |
320 | +++ rsync-3.1.2/util.c 2016-10-24 15:38:28.014415712 +0300 | |
321 | @@ -37,6 +37,10 @@ | |
322 | extern unsigned int module_dirlen; | |
323 | extern char *partial_dir; | |
324 | extern filter_rule_list daemon_filter_list; | |
325 | +#ifdef WITH_DROP_CACHE | |
326 | +#include <sys/mman.h> | |
327 | +extern int drop_cache; | |
328 | +#endif | |
329 | ||
330 | int sanitize_paths = 0; | |
331 | ||
332 | @@ -44,6 +48,218 @@ | |
333 | unsigned int curr_dir_len; | |
334 | int curr_dir_depth; /* This is only set for a sanitizing daemon. */ | |
335 | ||
336 | +#ifdef WITH_DROP_CACHE | |
337 | +#define FADV_BUFFER_SIZE 1024*1024*16 | |
338 | + | |
339 | +static struct stat fadv_fd_stat[1024]; | |
340 | +static off_t fadv_fd_pos[1024]; | |
341 | +static unsigned char *fadv_core_ptr[1024]; | |
342 | +static int fadv_max_fd = 0; | |
343 | +static int fadv_close_ring_tail = 0; | |
344 | +static int fadv_close_ring_head = 0; | |
345 | +static int fadv_close_ring_size = 0; | |
346 | +static int fadv_close_ring[1024]; | |
347 | +static int fadv_close_buffer_size = 0; | |
348 | +static size_t fadv_pagesize; | |
349 | + | |
350 | +static void fadv_fd_init_func(void) | |
351 | +{ | |
352 | + static int fadv_fd_init = 0; | |
353 | + if (fadv_fd_init == 0){ | |
354 | + int i; | |
355 | + fadv_fd_init = 1; | |
356 | + fadv_pagesize = getpagesize(); | |
357 | + if (fadv_max_fd == 0){ | |
358 | + fadv_max_fd = sysconf(_SC_OPEN_MAX) - 20; | |
359 | + if (fadv_max_fd < 0) | |
360 | + fadv_max_fd = 1; | |
361 | + if (fadv_max_fd > 1000) | |
362 | + fadv_max_fd = 1000; | |
363 | + } | |
364 | + for (i=0;i<fadv_max_fd;i++){ | |
365 | + fadv_fd_pos[i] = 0; | |
366 | + fadv_fd_stat[i].st_dev = 0; | |
367 | + fadv_fd_stat[i].st_ino = 0; | |
368 | + fadv_fd_stat[i].st_size = 0; | |
369 | + fadv_core_ptr[i] = NULL; | |
370 | + } | |
371 | + } | |
372 | +} | |
373 | + | |
374 | +static void fadv_get_core(int fd) | |
375 | +{ | |
376 | + struct stat stat; | |
377 | + void *pa; | |
378 | + size_t pi; | |
379 | + fstat(fd,&stat); | |
380 | + if ( fadv_fd_stat[fd].st_dev == stat.st_dev && fadv_fd_stat[fd].st_ino == stat.st_ino ) { | |
381 | + return; | |
382 | + } | |
383 | + fadv_fd_stat[fd].st_dev = stat.st_dev; | |
384 | + fadv_fd_stat[fd].st_ino = stat.st_ino; | |
385 | + fadv_fd_stat[fd].st_size = stat.st_size; | |
386 | + | |
387 | + if (fadv_core_ptr[fd]!=NULL){ | |
388 | + free (fadv_core_ptr[fd]); | |
389 | + } | |
390 | + | |
391 | + pa = mmap((void *)0, stat.st_size, PROT_READ, MAP_SHARED, fd, 0); | |
392 | + if (MAP_FAILED == pa) { | |
393 | + perror("mmap"); | |
394 | + } else { | |
395 | + fadv_core_ptr[fd] = calloc(1, (stat.st_size+fadv_pagesize)/fadv_pagesize); | |
396 | + if ( fadv_core_ptr[fd] == NULL ){ | |
397 | + perror("calloc"); | |
398 | + } else { | |
399 | + if ( mincore(pa, stat.st_size, (fadv_core_ptr[fd])) != 0){ | |
400 | + perror("mincore"); | |
401 | + free(fadv_core_ptr[fd]); | |
402 | + fadv_core_ptr[fd]=(unsigned char*)0; | |
403 | + } else if (DEBUG_GTE(IO, 4)) { | |
404 | +// } else { | |
405 | + rprintf(FINFO, "fadv_get_core(fd=%d): ", fd); | |
406 | + for (pi = 0; pi <= stat.st_size/fadv_pagesize; pi++) { | |
407 | + if ((fadv_core_ptr[fd])[pi]&1) { | |
408 | + rprintf(FINFO,"%lu ", (unsigned long)pi); | |
409 | + } | |
410 | + } | |
411 | + rprintf(FINFO,"\n"); | |
412 | + } | |
413 | + munmap(pa, stat.st_size); | |
414 | + } | |
415 | + } | |
416 | +} | |
417 | + | |
418 | +static void fadv_drop(int fd, int sync) | |
419 | +{ | |
420 | + /* trail 1 MB behind in dropping. we do this to make | |
421 | + sure that the same block or stripe does not have | |
422 | + to be written twice */ | |
423 | + off_t pos = lseek(fd,0,SEEK_CUR) - 1024*1024; | |
424 | + if (fd > fadv_max_fd){ | |
425 | + return; | |
426 | + } | |
427 | + if ( fadv_fd_pos[fd] < pos - FADV_BUFFER_SIZE ) { | |
428 | + if (sync) { | |
429 | + /* if the file is not flushed to disk before calling fadvise, | |
430 | + then the Cache will not be freed and the advise gets ignored | |
431 | + this does give a severe hit on performance. If only there | |
432 | + was a way to mark cache so that it gets release once the data | |
433 | + is written to disk. */ | |
434 | + fdatasync(fd); | |
435 | + } | |
436 | + if (fadv_core_ptr[fd] != NULL) { | |
437 | + size_t pi; | |
438 | + if (pos < fadv_fd_stat[fd].st_size){ | |
439 | + for (pi = fadv_fd_pos[fd]/fadv_pagesize; pi <= pos/fadv_pagesize; pi++) { | |
440 | + if (! (fadv_core_ptr[fd][pi]&1)) { | |
441 | + posix_fadvise64(fd, pi*fadv_pagesize, fadv_pagesize, POSIX_FADV_DONTNEED); | |
442 | + } | |
443 | + } | |
444 | + } else { | |
445 | + posix_fadvise64(fd, fadv_fd_stat[fd].st_size, pos-fadv_fd_stat[fd].st_size, POSIX_FADV_DONTNEED); | |
446 | + } | |
447 | + } else { | |
448 | + posix_fadvise64(fd, 0, pos, POSIX_FADV_DONTNEED); | |
449 | + } | |
450 | + fadv_fd_pos[fd] = pos; | |
451 | + } | |
452 | +} | |
453 | + | |
454 | +#endif | |
455 | + | |
456 | +ssize_t fadv_write(int fd, const void *buf, size_t count) | |
457 | +{ | |
458 | + int ret = write(fd, buf, count); | |
459 | +#ifdef WITH_DROP_CACHE | |
460 | + if (drop_cache) { | |
461 | + fadv_drop(fd,1); | |
462 | + } | |
463 | +#endif | |
464 | + return ret; | |
465 | +} | |
466 | + | |
467 | +ssize_t fadv_read(int fd, void *buf, size_t count) | |
468 | +{ | |
469 | + int ret; | |
470 | +#ifdef WITH_DROP_CACHE | |
471 | + if (drop_cache) { | |
472 | + fadv_fd_init_func(); | |
473 | + fadv_get_core(fd); | |
474 | + } | |
475 | +#endif | |
476 | + ret = read(fd, buf, count); | |
477 | +#ifdef WITH_DROP_CACHE | |
478 | + if (drop_cache) { | |
479 | + fadv_drop(fd,0); | |
480 | + } | |
481 | +#endif | |
482 | + return ret; | |
483 | +} | |
484 | + | |
485 | +#ifdef WITH_DROP_CACHE | |
486 | +void fadv_close_all(void) | |
487 | +{ | |
488 | + /* printf ("%i\n",fadv_close_ring_size); */ | |
489 | + while (fadv_close_ring_size > 0){ | |
490 | + fdatasync(fadv_close_ring[fadv_close_ring_tail]); | |
491 | + if (fadv_core_ptr[fadv_close_ring[fadv_close_ring_tail]]){ | |
492 | + size_t pi; | |
493 | + for (pi = 0; pi <= fadv_fd_stat[fadv_close_ring[fadv_close_ring_tail]].st_size/fadv_pagesize; pi++) { | |
494 | + if (!(fadv_core_ptr[fadv_close_ring[fadv_close_ring_tail]][pi]&1)) { | |
495 | + posix_fadvise64(fadv_close_ring[fadv_close_ring_tail], pi*fadv_pagesize, fadv_pagesize, POSIX_FADV_DONTNEED); | |
496 | + } | |
497 | + } | |
498 | + /* if the file has grown, drop the rest */ | |
499 | + //posix_fadvise64(fadv_close_ring[fadv_close_ring_tail], fadv_fd_stat[fadv_close_ring[fadv_close_ring_tail]].st_size,0, POSIX_FADV_DONTNEED); | |
500 | + | |
501 | + free(fadv_core_ptr[fadv_close_ring[fadv_close_ring_tail]]); | |
502 | + fadv_core_ptr[fadv_close_ring[fadv_close_ring_tail]] = NULL; | |
503 | + fadv_fd_stat[fadv_close_ring[fadv_close_ring_tail]].st_size = 0; | |
504 | + fadv_fd_stat[fadv_close_ring[fadv_close_ring_tail]].st_ino = 0; | |
505 | + fadv_fd_stat[fadv_close_ring[fadv_close_ring_tail]].st_dev = 0; | |
506 | + } else { | |
507 | + posix_fadvise64(fadv_close_ring[fadv_close_ring_tail], 0, 0,POSIX_FADV_DONTNEED); | |
508 | + } | |
509 | + fadv_close_ring_size--; | |
510 | + close(fadv_close_ring[fadv_close_ring_tail]); | |
511 | + fadv_close_ring_tail = (fadv_close_ring_tail + 1) % fadv_max_fd; | |
512 | + fadv_close_buffer_size = 0; | |
513 | + } | |
514 | +} | |
515 | + | |
516 | +int fadv_close(int fd) | |
517 | +{ | |
518 | + if (drop_cache) { | |
519 | + /* if the file is not flushed to disk before calling fadvise, | |
520 | + then the Cache will not be freed and the advise gets ignored | |
521 | + this does give a severe hit on performance. So instead of doing | |
522 | + it right away, we save us a copy of the filehandle and do it | |
523 | + some time before we are out of filehandles. This speeds | |
524 | + up operation for small files massively. It is directly | |
525 | + related to the number of spare file handles you have. */ | |
526 | + int newfd = dup(fd); | |
527 | + off_t pos = lseek(fd,0,SEEK_CUR); | |
528 | + fadv_fd_init_func(); | |
529 | + fadv_core_ptr[newfd] = fadv_core_ptr[fd]; | |
530 | + fadv_fd_stat[newfd].st_size = fadv_fd_stat[fd].st_size ; | |
531 | + fadv_core_ptr[fd] = NULL; | |
532 | + fadv_close_buffer_size += pos - fadv_fd_pos[fd]; | |
533 | + fadv_close_ring[fadv_close_ring_head] = newfd; | |
534 | + fadv_close_ring_head = (fadv_close_ring_head + 1) % fadv_max_fd; | |
535 | + fadv_close_ring_size ++; | |
536 | + if (fadv_close_ring_size == fadv_max_fd || fadv_close_buffer_size > 1024*1024 ){ | |
537 | + /* it seems fastest to drop things 'in groups' */ | |
538 | + fadv_close_all(); | |
539 | + } | |
540 | + }; | |
541 | + return close(fd); | |
542 | +} | |
543 | + | |
544 | + | |
545 | +#define close(fd) fadv_close(fd) | |
546 | +#endif | |
547 | + | |
548 | /* Set a fd into nonblocking mode. */ | |
549 | void set_nonblocking(int fd) | |
550 | { | |
551 | @@ -273,7 +489,7 @@ | |
552 | ||
553 | total_written = 0; | |
554 | while (len > 0) { | |
555 | - int written = write(desc, ptr, len); | |
556 | + int written = fadv_write(desc, ptr, len); | |
557 | if (written < 0) { | |
558 | if (errno == EINTR) | |
559 | continue; | |
560 | @@ -305,7 +521,7 @@ | |
561 | return len; | |
562 | ||
563 | do { | |
564 | - n_chars = read(desc, ptr, len); | |
565 | + n_chars = fadv_read(desc, ptr, len); | |
566 | } while (n_chars < 0 && errno == EINTR); | |
567 | ||
568 | return n_chars; |