]>
Commit | Line | Data |
---|---|---|
102118f8 | 1 | diff --git a/fs/exec.c b/fs/exec.c |
2 | index f9e8f6f..b060dce 100644 | |
3 | --- a/fs/exec.c | |
4 | +++ b/fs/exec.c | |
5 | @@ -51,6 +51,7 @@ | |
6 | #include <linux/syscalls.h> | |
7 | #include <linux/rmap.h> | |
8 | #include <linux/tsacct_kern.h> | |
9 | +#include <linux/prefetch_core.h> | |
10 | #include <linux/cn_proc.h> | |
11 | #include <linux/audit.h> | |
12 | #include <linux/signalfd.h> | |
13 | @@ -1167,6 +1168,8 @@ int do_execve(char * filename, | |
14 | if (IS_ERR(file)) | |
15 | goto out_kfree; | |
16 | ||
17 | + prefetch_exec_hook(filename); | |
18 | + | |
19 | sched_exec(); | |
20 | ||
21 | bprm->p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *); | |
22 | diff --git a/include/linux/prefetch_core.h b/include/linux/prefetch_core.h | |
23 | new file mode 100644 | |
24 | index 0000000..a5fbd56 | |
25 | --- /dev/null | |
26 | +++ b/include/linux/prefetch_core.h | |
27 | @@ -0,0 +1,110 @@ | |
28 | +/* | |
29 | + * Copyright (C) 2007 Krzysztof Lichota <lichota@mimuw.edu.pl> | |
30 | + * | |
31 | + * This is prefetch core - common code used for tracing and saving trace files. | |
32 | + * It is used by prefetching modules, such as boot and app. | |
33 | + */ | |
34 | + | |
35 | +#ifndef _LINUX_PREFETCH_CORE_H | |
36 | +#define _LINUX_PREFETCH_CORE_H | |
37 | + | |
38 | +#include <linux/types.h> | |
39 | +#include <linux/mm_types.h> | |
40 | + | |
41 | +/** | |
42 | + * Trace record, records one range of pages for inode put into trace. | |
43 | +*/ | |
44 | +struct prefetch_trace_record { | |
45 | + dev_t device; | |
46 | + unsigned long inode_no; | |
47 | + pgoff_t range_start; | |
48 | + pgoff_t range_length; | |
49 | +}; | |
50 | + | |
51 | +extern char trace_file_magic[4]; | |
52 | + | |
53 | +enum { | |
54 | + PREFETCH_FORMAT_VERSION_MAJOR = 1, | |
55 | + PREFETCH_FORMAT_VERSION_MINOR = 0 | |
56 | +}; | |
57 | + | |
58 | +/** | |
59 | + * Trace on-disk header. | |
60 | + * Major version is increased with major changes of format. | |
61 | + * If you do not support this format explicitely, do not read other fields. | |
62 | + * Minor version is increased with backward compatible changes and | |
63 | + * you can read other fields and raw data, provided that you read | |
64 | + * trace data from @data_start offset in file. | |
65 | +*/ | |
66 | +struct prefetch_trace_header { | |
67 | + char magic[4]; /*Trace file signature - should contain trace_file_magic */ | |
68 | + u16 version_major; /*Major version of trace file format */ | |
69 | + u16 version_minor; /*Minor version of trace file format */ | |
70 | + u16 data_start; /*Trace raw data start */ | |
71 | +}; | |
72 | + | |
73 | +struct trace_marker { | |
74 | + unsigned position; | |
75 | + unsigned generation; | |
76 | +}; | |
77 | + | |
78 | +int prefetch_start_trace(struct trace_marker *marker); | |
79 | +int prefetch_continue_trace(struct trace_marker *marker); | |
80 | +int prefetch_stop_trace(struct trace_marker *marker); | |
81 | +int prefetch_release_trace(struct trace_marker end_marker); | |
82 | + | |
83 | +int prefetch_trace_fragment_size(struct trace_marker start_marker, | |
84 | + struct trace_marker end_marker); | |
85 | + | |
86 | +int get_prefetch_trace_fragment(struct trace_marker start_marker, | |
87 | + struct trace_marker end_marker, | |
88 | + void **fragment_result, | |
89 | + int *fragment_size_result); | |
90 | + | |
91 | +void *alloc_trace_buffer(int len); | |
92 | +void free_trace_buffer(void *buffer, int len); | |
93 | +void sort_trace_fragment(void *trace, int trace_size); | |
94 | + | |
95 | +int prefetch_save_trace_between_markers(char *filename, | |
96 | + struct trace_marker start_marker, | |
97 | + struct trace_marker end_marker); | |
98 | +int prefetch_save_trace_fragment(char *filename, | |
99 | + void *trace_buffer, int trace_size); | |
100 | +int prefetch_load_trace_fragment(char *filename, | |
101 | + void **trace_buffer, int *trace_size); | |
102 | + | |
103 | +int prefetch_start_prefetch(void *trace, int trace_size, int async); | |
104 | +int do_prefetch_from_file(char *filename); | |
105 | + | |
106 | +void print_marker(char *msg, struct trace_marker marker); | |
107 | + | |
108 | +/* Hook for mm page release code */ | |
109 | +#ifdef CONFIG_PREFETCH_CORE | |
110 | +void prefetch_page_release_hook(struct page *page); | |
111 | +#else | |
112 | +#define prefetch_page_release_hook(param) do {} while (0) | |
113 | +#endif | |
114 | + | |
115 | +struct proc_dir_entry; | |
116 | +extern struct proc_dir_entry *prefetch_proc_dir; | |
117 | + | |
118 | +int param_match(char *line, char *param_name); | |
119 | +int param_match_prefix(char *line, char *param_name); | |
120 | + | |
121 | +/*Auxiliary functions for reading and writing in kernel*/ | |
122 | +struct file *kernel_open(char const *file_name, int flags, int mode); | |
123 | +int kernel_write(struct file *file, unsigned long offset, const char *addr, | |
124 | + unsigned long count); | |
125 | +/*NOTE: kernel_read is already available in kernel*/ | |
126 | +int kernel_close(struct file *file); | |
127 | + | |
128 | +/* App prefetching hooks */ | |
129 | +#ifdef CONFIG_PREFETCH_APP | |
130 | +void prefetch_exec_hook(char *filename); | |
131 | +void prefetch_exit_hook(pid_t pid); | |
132 | +#else | |
133 | +#define prefetch_exec_hook(param) do {} while (0) | |
134 | +#define prefetch_exit_hook(param) do {} while (0) | |
135 | +#endif | |
136 | + | |
137 | +#endif /*_LINUX_PREFETCH_CORE_H*/ | |
138 | diff --git a/init/Kconfig b/init/Kconfig | |
139 | index a9e99f8..df3d532 100644 | |
140 | --- a/init/Kconfig | |
141 | +++ b/init/Kconfig | |
142 | @@ -104,6 +104,38 @@ config SWAP | |
143 | for so called swap devices or swap files in your kernel that are | |
144 | used to provide more virtual memory than the actual RAM present | |
145 | in your computer. If unsure say Y. | |
146 | +config PREFETCH_CORE | |
147 | + bool "Prefetching support (core)" | |
148 | + default n | |
149 | + depends on MMU && BLOCK && EXPERIMENTAL | |
150 | + select TASK_DELAY_ACCT | |
151 | + help | |
152 | + This option enables core of tracing and prefetching facility | |
153 | + The core provides functions used by real prefetching modules, | |
154 | + so you have to enable one of them as well. | |
155 | +config PREFETCH_BOOT | |
156 | + tristate "Boot prefetching support" | |
157 | + default n | |
158 | + depends on PREFETCH_CORE && PROC_FS && EXPERIMENTAL | |
159 | + help | |
160 | + This option enables facility for tracing and prefetching during system boot. | |
161 | + In order to use it you have to install appropriate prefetch init scripts. | |
162 | +config PREFETCH_APP | |
163 | + bool "Application prefetching support" | |
164 | + default n | |
165 | + depends on PREFETCH_CORE && PROC_FS && EXPERIMENTAL | |
166 | + help | |
167 | + This option enables facility for tracing and prefetching during application start. | |
168 | + Upon application start tracing is started and after some, configurable time, | |
169 | + tracing is stopped and written to file. Upon next start the files in saved | |
170 | + file are prefetched. | |
171 | +config PREFETCH_DEBUG | |
172 | + bool "Prefetching debug interface and debugging facilities" | |
173 | + default n | |
174 | + depends on PREFETCH_CORE && PROC_FS | |
175 | + help | |
176 | + This option enables facilities for testing and debugging tracing and prefetching. | |
177 | + Do not enable on production systems. | |
178 | ||
179 | config SYSVIPC | |
180 | bool "System V IPC" | |
181 | diff --git a/kernel/exit.c b/kernel/exit.c | |
182 | index 5b888c2..c136765 100644 | |
183 | --- a/kernel/exit.c | |
184 | +++ b/kernel/exit.c | |
185 | @@ -44,6 +44,7 @@ | |
186 | #include <linux/compat.h> | |
187 | #include <linux/pipe_fs_i.h> | |
188 | #include <linux/audit.h> /* for audit_free() */ | |
189 | +#include <linux/prefetch_core.h> | |
190 | #include <linux/resource.h> | |
191 | #include <linux/blkdev.h> | |
192 | #include <linux/task_io_accounting_ops.h> | |
193 | @@ -864,6 +865,8 @@ fastcall NORET_TYPE void do_exit(long co | |
194 | struct task_struct *tsk = current; | |
195 | int group_dead; | |
196 | ||
197 | + prefetch_exit_hook(tsk->pid); | |
198 | + | |
199 | profile_task_exit(tsk); | |
200 | ||
201 | WARN_ON(atomic_read(&tsk->fs_excl)); | |
202 | diff --git a/mm/Makefile b/mm/Makefile | |
203 | index a9148ea..5433e6e 100644 | |
204 | --- a/mm/Makefile | |
205 | +++ b/mm/Makefile | |
206 | @@ -31,4 +31,7 @@ obj-$(CONFIG_FS_XIP) += filemap_xip.o | |
207 | obj-$(CONFIG_MIGRATION) += migrate.o | |
208 | obj-$(CONFIG_SMP) += allocpercpu.o | |
209 | obj-$(CONFIG_QUICKLIST) += quicklist.o | |
210 | +obj-$(CONFIG_PREFETCH_CORE) += prefetch_core.o | |
211 | +obj-$(CONFIG_PREFETCH_BOOT) += prefetch_boot.o | |
212 | +obj-$(CONFIG_PREFETCH_APP) += prefetch_app.o | |
213 | ||
214 | diff --git a/mm/filemap.c b/mm/filemap.c | |
215 | index edb1b0b..405487c 100644 | |
216 | --- a/mm/filemap.c | |
217 | +++ b/mm/filemap.c | |
218 | @@ -30,6 +30,7 @@ | |
219 | #include <linux/security.h> | |
220 | #include <linux/syscalls.h> | |
221 | #include <linux/cpuset.h> | |
222 | +#include <linux/prefetch_core.h> | |
223 | #include "filemap.h" | |
224 | #include "internal.h" | |
225 | ||
226 | @@ -115,7 +116,9 @@ generic_file_direct_IO(int rw, struct ki | |
227 | void __remove_from_page_cache(struct page *page) | |
228 | { | |
229 | struct address_space *mapping = page->mapping; | |
230 | - | |
231 | + | |
232 | + prefetch_page_release_hook(page); | |
233 | + | |
234 | radix_tree_delete(&mapping->page_tree, page->index); | |
235 | page->mapping = NULL; | |
236 | mapping->nrpages--; | |
237 | diff --git a/mm/prefetch_app.c b/mm/prefetch_app.c | |
238 | new file mode 100644 | |
239 | index 0000000..b7f3d43 | |
240 | --- /dev/null | |
241 | +++ b/mm/prefetch_app.c | |
242 | @@ -0,0 +1,1071 @@ | |
243 | +/* | |
244 | + * linux/mm/prefetch_app.c | |
245 | + * | |
246 | + * Copyright (C) 2007 Krzysztof Lichota <lichota@mimuw.edu.pl> | |
247 | + * | |
248 | + * This is application tracing and prefetching module. It traces application start | |
249 | + * for specified time, then upon next start it prefetches these files. | |
250 | + * | |
251 | + * This program is free software; you can redistribute it and/or modify | |
252 | + * it under the terms of the GNU General Public License version 2 as | |
253 | + * published by the Free Software Foundation. | |
254 | + */ | |
255 | + | |
256 | +#include <linux/prefetch_core.h> | |
257 | +#include <asm/uaccess.h> | |
258 | +#include <linux/proc_fs.h> | |
259 | +#include <linux/workqueue.h> | |
260 | +#include <asm/current.h> | |
261 | +#include <linux/sched.h> | |
262 | +#include <linux/module.h> | |
263 | +#include <linux/crc32.h> | |
264 | +#include <linux/sched.h> | |
265 | +#include <linux/delayacct.h> | |
266 | +#include <linux/seq_file.h> | |
267 | + | |
268 | +/*Enables/disables whole functionality of the module*/ | |
269 | +static int enabled = 1; | |
270 | +module_param(enabled, bool, 0); | |
271 | +MODULE_PARM_DESC(enabled, | |
272 | + "Enables or disables whole app prefetching module functionality (tracing and prefetching)"); | |
273 | + | |
274 | +static int initialized = 0; | |
275 | + | |
276 | +/*Controls whether prefetching should be done along with tracing.*/ | |
277 | +static int prefetch_enabled = 1; | |
278 | +module_param(prefetch_enabled, bool, 0); | |
279 | +MODULE_PARM_DESC(prefetch_enabled, | |
280 | + "Enables or disables prefetching during app start. If disabled, only tracing will be done"); | |
281 | + | |
282 | +/*Size of shortened name, together with hash it should be <=DNAME_INLINE_LEN_MIN*/ | |
283 | +static int short_name_len = 10; | |
284 | +module_param(short_name_len, bool, 0); | |
285 | +MODULE_PARM_DESC(short_name_len, | |
286 | + "Length of shortened file name, used to name prefetch file together with hash of whole name"); | |
287 | + | |
288 | +#define DEFAULT_APP_TRACE_FILENAME_TEMPLATE "/.prefetch/%s" | |
289 | +static char *filename_template = DEFAULT_APP_TRACE_FILENAME_TEMPLATE; | |
290 | +module_param(filename_template, charp, 0); | |
291 | +MODULE_PARM_DESC(filename_template, | |
292 | + "Template for application trace name, where trace will be saved and read from. %s will be replaced with name of application and hash. The default is: " | |
293 | + DEFAULT_APP_TRACE_FILENAME_TEMPLATE); | |
294 | + | |
295 | +/*Size of hashtable for filenames*/ | |
296 | +static int filename_hashtable_size = 128; | |
297 | +module_param(filename_hashtable_size, uint, 0); | |
298 | +MODULE_PARM_DESC(filename_hashtable_size, "Size of hashtable for filenames"); | |
299 | + | |
300 | +/** | |
301 | + * Time (in seconds) after which app tracing is stopped. | |
302 | +*/ | |
303 | +static int tracing_timeout = 10; | |
304 | +module_param(tracing_timeout, uint, 0); | |
305 | +MODULE_PARM_DESC(tracing_timeout, | |
306 | + "Time (in seconds) after which app tracing is stopped"); | |
307 | + | |
308 | +/** | |
309 | + * IO ticks (in centisecs) threshold above which application will be traced and prefetching done. | |
310 | +*/ | |
311 | +static int tracing_ticks_threshold = 200; | |
312 | +module_param(tracing_ticks_threshold, uint, 0); | |
313 | +MODULE_PARM_DESC(tracing_ticks_threshold, | |
314 | + "IO ticks (in centisecs) threshold above which application will be traced and prefetching done"); | |
315 | + | |
316 | +/** | |
317 | + * Hashtable of apps names blacklisted from tracing/prefetching. | |
318 | + * If filename is on this list, it will not be traced. | |
319 | + * Protected by prefetch_apps_blacklist_mutex. | |
320 | +*/ | |
321 | +struct hlist_head *prefetch_apps_blacklist; | |
322 | +DEFINE_MUTEX(prefetch_apps_blacklist_mutex); | |
323 | + | |
324 | +/** | |
325 | + * Hashtable of apps names which should be traced/prefetched. | |
326 | + * If filename is on this list, it means it has been decided that tracing/prefetching | |
327 | + * should be done for it. | |
328 | + * This list is protected by prefetch_apps_list_mutex. | |
329 | +*/ | |
330 | +struct hlist_head *prefetch_apps_list; | |
331 | +DEFINE_MUTEX(prefetch_apps_list_mutex); | |
332 | + | |
333 | +/** | |
334 | + * Entry in filename hashtable list. | |
335 | +*/ | |
336 | +struct filename_entry { | |
337 | + struct hlist_node entries_list; | |
338 | + char *filename; | |
339 | +}; | |
340 | + | |
341 | +struct trace_job; | |
342 | + | |
343 | +/** | |
344 | + * Entry in traced pids hashtable list. | |
345 | +*/ | |
346 | +struct traced_pid_entry { | |
347 | + struct hlist_node entries_list; | |
348 | + pid_t pid; | |
349 | + struct trace_job *trace_job; | |
350 | +}; | |
351 | + | |
352 | +#define TRACED_HASH_SIZE 16 | |
353 | +/** | |
354 | + * Hashtable of concurrently traced applications. | |
355 | + * The key is pid. | |
356 | + * Protected by traced_pids_mutex. | |
357 | +*/ | |
358 | +struct hlist_head *traced_pids; | |
359 | + | |
360 | +DEFINE_MUTEX(traced_pids_mutex); | |
361 | + | |
362 | +/** | |
363 | + * Frees filename entry contents and entry itself. | |
364 | +*/ | |
365 | +void free_filename_entry(struct filename_entry *entry) | |
366 | +{ | |
367 | + kfree(entry->filename); | |
368 | + kfree(entry); | |
369 | +} | |
370 | + | |
371 | +void __clear_hashtable(struct hlist_head *list, int hashtable_size) | |
372 | +{ | |
373 | + struct filename_entry *entry; | |
374 | + struct hlist_node *cursor; | |
375 | + struct hlist_node *tmp; | |
376 | + int i; | |
377 | + | |
378 | + for (i = 0; i < hashtable_size; ++i) { | |
379 | + hlist_for_each_entry_safe(entry, cursor, tmp, &list[i], | |
380 | + entries_list) { | |
381 | + free_filename_entry(entry); | |
382 | + } | |
383 | + /* clear whole list at once */ | |
384 | + INIT_HLIST_HEAD(&list[i]); | |
385 | + } | |
386 | +} | |
387 | + | |
388 | +void clear_hashtable(struct hlist_head *list, int hashtable_size, | |
389 | + struct mutex *mutex) | |
390 | +{ | |
391 | + mutex_lock(mutex); | |
392 | + __clear_hashtable(list, hashtable_size); | |
393 | + mutex_unlock(mutex); | |
394 | +} | |
395 | + | |
396 | +int initialize_hashtable(struct hlist_head **list, int hashtable_size) | |
397 | +{ | |
398 | + struct hlist_head *h; | |
399 | + int i; | |
400 | + | |
401 | + h = kmalloc(sizeof(struct hlist_head) * hashtable_size, GFP_KERNEL); | |
402 | + if (h == NULL) | |
403 | + return -ENOMEM; | |
404 | + | |
405 | + for (i = 0; i < hashtable_size; ++i) { | |
406 | + INIT_HLIST_HEAD(&h[i]); | |
407 | + } | |
408 | + | |
409 | + *list = h; | |
410 | + return 0; | |
411 | +} | |
412 | + | |
413 | +u32 filename_hash(char *s) | |
414 | +{ | |
415 | + return crc32_le(0, s, strlen(s)); | |
416 | +} | |
417 | + | |
418 | +static inline unsigned filename_hashtable_index(char *filename) | |
419 | +{ | |
420 | + return filename_hash(filename) % filename_hashtable_size; | |
421 | +} | |
422 | + | |
423 | +/** | |
424 | + * Checks if filename @filename is in hashtable @list | |
425 | + */ | |
426 | +int filename_on_list(char *filename, struct hlist_head *list) | |
427 | +{ | |
428 | + struct filename_entry *entry; | |
429 | + struct hlist_node *cursor; | |
430 | + unsigned hashtable_index = filename_hashtable_index(filename); | |
431 | + | |
432 | + hlist_for_each_entry(entry, cursor, &list[hashtable_index], | |
433 | + entries_list) { | |
434 | + if (strcmp(entry->filename, filename) == 0) | |
435 | + return 1; | |
436 | + } | |
437 | + return 0; | |
438 | +} | |
439 | + | |
440 | +/** | |
441 | + * Adds filename @filename to hashtable @list | |
442 | + * Filename contents is copied. | |
443 | + * Proper mutex must be held. | |
444 | + */ | |
445 | +static int __add_filename_to_list(char *filename, struct hlist_head *list) | |
446 | +{ | |
447 | + int ret = 0; | |
448 | + struct filename_entry *entry = NULL; | |
449 | + unsigned hashtable_index = filename_hashtable_index(filename); | |
450 | + | |
451 | + entry = kzalloc(sizeof(*entry), GFP_KERNEL); | |
452 | + if (entry == NULL) { | |
453 | + ret = -ENOMEM; | |
454 | + goto out_error; | |
455 | + } | |
456 | + INIT_HLIST_NODE(&entry->entries_list); | |
457 | + | |
458 | + entry->filename = kstrdup(filename, GFP_KERNEL); | |
459 | + if (entry->filename == NULL) { | |
460 | + ret = -ENOMEM; | |
461 | + goto out_error; | |
462 | + } | |
463 | + | |
464 | + hlist_add_head(&entry->entries_list, &list[hashtable_index]); | |
465 | + | |
466 | + return ret; | |
467 | + | |
468 | + out_error: | |
469 | + if (entry != NULL) { | |
470 | + if (entry->filename != NULL) | |
471 | + kfree(entry->filename); | |
472 | + kfree(entry); | |
473 | + } | |
474 | + return ret; | |
475 | +} | |
476 | + | |
477 | +static int add_filename_to_list_unique(char *filename, struct hlist_head *list, | |
478 | + struct mutex *mutex) | |
479 | +{ | |
480 | + int ret = 0; | |
481 | + | |
482 | + mutex_lock(mutex); | |
483 | + if (!filename_on_list(filename, list)) | |
484 | + ret = __add_filename_to_list(filename, list); | |
485 | + mutex_unlock(mutex); | |
486 | + | |
487 | + return ret; | |
488 | +} | |
489 | + | |
490 | +/** | |
491 | + * Removes filename @filename from hashtable @list | |
492 | + * Frees filename entry and its contents. | |
493 | + * Returns true (non-zero) if entry was found and removed. | |
494 | + */ | |
495 | +int remove_filename_from_list(char *filename, struct hlist_head *list) | |
496 | +{ | |
497 | + struct filename_entry *entry; | |
498 | + struct hlist_node *cursor; | |
499 | + unsigned hashtable_index = filename_hashtable_index(filename); | |
500 | + | |
501 | + hlist_for_each_entry(entry, cursor, &list[hashtable_index], | |
502 | + entries_list) { | |
503 | + if (strcmp(entry->filename, filename) == 0) { | |
504 | + hlist_del(&entry->entries_list); | |
505 | + free_filename_entry(entry); | |
506 | + return 1; | |
507 | + } | |
508 | + } | |
509 | + return 0; | |
510 | +} | |
511 | + | |
512 | +static inline unsigned traced_pid_hash(pid_t pid) | |
513 | +{ | |
514 | + return pid % TRACED_HASH_SIZE; | |
515 | +} | |
516 | + | |
517 | +/** | |
518 | + * Adds pid @pid to traced pids with trace job @job. | |
519 | + */ | |
520 | +int add_traced_pid(pid_t pid, struct trace_job *job, | |
521 | + struct hlist_head *hashtable) | |
522 | +{ | |
523 | + int ret = 0; | |
524 | + struct traced_pid_entry *entry = NULL; | |
525 | + unsigned hashtable_index = traced_pid_hash(pid); | |
526 | + | |
527 | + entry = kzalloc(sizeof(*entry), GFP_KERNEL); | |
528 | + if (entry == NULL) { | |
529 | + ret = -ENOMEM; | |
530 | + goto out_error; | |
531 | + } | |
532 | + INIT_HLIST_NODE(&entry->entries_list); | |
533 | + entry->trace_job = job; | |
534 | + entry->pid = pid; | |
535 | + | |
536 | + hlist_add_head(&entry->entries_list, &hashtable[hashtable_index]); | |
537 | + | |
538 | + return ret; | |
539 | + | |
540 | + out_error: | |
541 | + kfree(entry); | |
542 | + return ret; | |
543 | +} | |
544 | + | |
545 | +/** | |
546 | + * Removes trace job for pid @pid. | |
547 | + * Frees entry and its contents. | |
548 | + * Does not free job. | |
549 | + */ | |
550 | +int remove_traced_pid(pid_t pid, struct hlist_head *hashtable) | |
551 | +{ | |
552 | + struct traced_pid_entry *entry = NULL; | |
553 | + unsigned hashtable_index = traced_pid_hash(pid); | |
554 | + struct hlist_node *cursor; | |
555 | + | |
556 | + hlist_for_each_entry(entry, cursor, &hashtable[hashtable_index], | |
557 | + entries_list) { | |
558 | + if (entry->pid == pid) { | |
559 | + hlist_del(&entry->entries_list); | |
560 | + kfree(entry); | |
561 | + return 1; | |
562 | + } | |
563 | + } | |
564 | + return 0; | |
565 | +} | |
566 | + | |
567 | +struct traced_pid_entry *find_traced_pid(pid_t pid, | |
568 | + struct hlist_head *hashtable) | |
569 | +{ | |
570 | + struct traced_pid_entry *entry = NULL; | |
571 | + unsigned hashtable_index = traced_pid_hash(pid); | |
572 | + struct hlist_node *cursor; | |
573 | + | |
574 | + hlist_for_each_entry(entry, cursor, &hashtable[hashtable_index], | |
575 | + entries_list) { | |
576 | + if (entry->pid == pid) | |
577 | + return entry; | |
578 | + } | |
579 | + return NULL; | |
580 | +} | |
581 | + | |
582 | +/** | |
583 | + Structure describing tracing or monitoring job. | |
584 | +*/ | |
585 | +struct trace_job { | |
586 | + struct delayed_work work; | |
587 | + char *filename; | |
588 | + pid_t pid; | |
589 | + struct trace_marker start_marker; | |
590 | +}; | |
591 | + | |
592 | +char *create_trace_filename(char *filename) | |
593 | +{ | |
594 | + char *basename = NULL; | |
595 | + u32 hash; | |
596 | + int filename_len = strlen(filename); | |
597 | + char *file_name = NULL; | |
598 | + char *short_name = NULL; | |
599 | + char *slash_pos; | |
600 | + | |
601 | + hash = crc32_le(0, filename, filename_len); | |
602 | + | |
603 | + slash_pos = strrchr(filename, '/'); | |
604 | + if (slash_pos == NULL) { | |
605 | + printk(KERN_WARNING "File name does not contain slash\n"); | |
606 | + goto out; | |
607 | + } | |
608 | + | |
609 | + basename = kmalloc(short_name_len + 1, GFP_KERNEL); | |
610 | + | |
611 | + if (basename == NULL) { | |
612 | + printk(KERN_WARNING "Cannot allocate memory for basename\n"); | |
613 | + goto out; | |
614 | + } | |
615 | + strncpy(basename, slash_pos + 1, short_name_len); | |
616 | + basename[short_name_len] = '\0'; | |
617 | + | |
618 | + file_name = kasprintf(GFP_KERNEL, "%s-%x", basename, hash); | |
619 | + if (file_name == NULL) { | |
620 | + printk(KERN_WARNING "Cannot allocate memory for file name\n"); | |
621 | + goto out; | |
622 | + } | |
623 | + | |
624 | + short_name = kasprintf(GFP_KERNEL, filename_template, file_name); | |
625 | + if (short_name == NULL) { | |
626 | + printk(KERN_WARNING "Cannot allocate memory for short name\n"); | |
627 | + goto out; | |
628 | + } | |
629 | + | |
630 | + out: | |
631 | + if (file_name != NULL) | |
632 | + kfree(file_name); | |
633 | + if (basename != NULL) | |
634 | + kfree(basename); | |
635 | + return short_name; | |
636 | +} | |
637 | + | |
638 | +static void do_finish_monitoring(struct trace_job *trace_job) | |
639 | +{ | |
640 | + struct task_struct *process = NULL; | |
641 | + int ticks = -1; | |
642 | + | |
643 | + read_lock(&tasklist_lock); | |
644 | + process = find_task_by_pid(trace_job->pid); | |
645 | + if (process != NULL) | |
646 | + ticks = delayacct_blkio_ticks(process); | |
647 | + read_unlock(&tasklist_lock); | |
648 | + | |
649 | + if (ticks == -1) { | |
650 | + /* Process was terminated earlier than our timeout, stopping monitoring was handled by exit hook */ | |
651 | + goto out; | |
652 | + } | |
653 | + | |
654 | + if (ticks > tracing_ticks_threshold) { | |
655 | + /* Add app to tracing list if it does not appear there yet */ | |
656 | +#ifdef CONFIG_PREFETCH_DEBUG | |
657 | + printk(KERN_INFO | |
658 | + "Application %s qualifies for prefetching, ticks=%d\n", | |
659 | + trace_job->filename, ticks); | |
660 | +#endif | |
661 | + mutex_lock(&prefetch_apps_list_mutex); | |
662 | + if (!filename_on_list(trace_job->filename, prefetch_apps_list)) { | |
663 | + __add_filename_to_list(trace_job->filename, | |
664 | + prefetch_apps_list); | |
665 | +#ifdef CONFIG_PREFETCH_DEBUG | |
666 | + printk(KERN_INFO | |
667 | + "Added application %s to prefetching list\n", | |
668 | + trace_job->filename); | |
669 | +#endif | |
670 | + } | |
671 | + mutex_unlock(&prefetch_apps_list_mutex); | |
672 | + } else { | |
673 | + /* App does not require prefetching, remove app from tracing list if it there */ | |
674 | + mutex_lock(&prefetch_apps_list_mutex); | |
675 | + remove_filename_from_list(trace_job->filename, | |
676 | + prefetch_apps_list); | |
677 | + mutex_unlock(&prefetch_apps_list_mutex); | |
678 | + } | |
679 | + out: | |
680 | + return; | |
681 | +} | |
682 | + | |
683 | +static void finish_trace_job(struct trace_job *trace_job) | |
684 | +{ | |
685 | + mutex_lock(&traced_pids_mutex); | |
686 | + if (!remove_traced_pid(trace_job->pid, traced_pids)) | |
687 | + printk(KERN_WARNING | |
688 | + "Did not remove pid %d from traced pids, inconsistency in pids handling, filename for job=%s\n", | |
689 | + trace_job->pid, trace_job->filename); | |
690 | + mutex_unlock(&traced_pids_mutex); | |
691 | + | |
692 | + kfree(trace_job->filename); | |
693 | + kfree(trace_job); | |
694 | +} | |
695 | + | |
696 | +static void finish_monitoring(struct work_struct *work) | |
697 | +{ | |
698 | + struct trace_job *trace_job = | |
699 | + container_of(container_of(work, struct delayed_work, work), | |
700 | + struct trace_job, work); | |
701 | + do_finish_monitoring(trace_job); | |
702 | + finish_trace_job(trace_job); | |
703 | +} | |
704 | + | |
705 | +static void finish_tracing(struct work_struct *work) | |
706 | +{ | |
707 | + struct trace_marker end_marker; | |
708 | + void *trace_fragment = NULL; | |
709 | + int trace_fragment_size = 0; | |
710 | + int ret; | |
711 | + struct trace_job *trace_job = | |
712 | + container_of(container_of(work, struct delayed_work, work), | |
713 | + struct trace_job, work); | |
714 | + char *trace_filename = NULL; | |
715 | + | |
716 | + do_finish_monitoring(trace_job); | |
717 | + | |
718 | + ret = prefetch_stop_trace(&end_marker); | |
719 | + | |
720 | + if (ret < 0) { | |
721 | + printk(KERN_WARNING "Failed to stop trace for application %s\n", | |
722 | + trace_job->filename); | |
723 | + end_marker = trace_job->start_marker; /*at least this we can do to release as much as possible */ | |
724 | + goto out_release; | |
725 | + } | |
726 | + | |
727 | + ret = get_prefetch_trace_fragment(trace_job->start_marker, | |
728 | + end_marker, | |
729 | + &trace_fragment, | |
730 | + &trace_fragment_size); | |
731 | + if (ret < 0) { | |
732 | + printk(KERN_WARNING | |
733 | + "Failed to fetch trace fragment for application %s, error=%d\n", | |
734 | + trace_job->filename, ret); | |
735 | + goto out_release; | |
736 | + } | |
737 | + | |
738 | + if (trace_fragment_size <= 0) { | |
739 | + printk(KERN_WARNING "Empty trace for application %s\n", | |
740 | + trace_job->filename); | |
741 | + goto out_release; | |
742 | + } | |
743 | + | |
744 | + trace_filename = create_trace_filename(trace_job->filename); | |
745 | + if (trace_filename == NULL) { | |
746 | + printk(KERN_WARNING | |
747 | + "Cannot allocate memory for short filename, trace for application %s not saved\n", | |
748 | + trace_job->filename); | |
749 | + goto out_free_release; | |
750 | + } | |
751 | + | |
752 | + sort_trace_fragment(trace_fragment, trace_fragment_size); | |
753 | + /* | |
754 | + * NOTE: the race between saving and loading trace is possible, but it should only | |
755 | + * result in reading prefetch file failing or prefetch not done very efficiently. | |
756 | + */ | |
757 | + ret = | |
758 | + prefetch_save_trace_fragment(trace_filename, trace_fragment, | |
759 | + trace_fragment_size); | |
760 | + if (ret < 0) { | |
761 | + printk(KERN_WARNING | |
762 | + "Failed to save trace for application %s to file %s, error=%d\n", | |
763 | + trace_job->filename, trace_filename, ret); | |
764 | + goto out_free_release; | |
765 | + } | |
766 | + | |
767 | + out_free_release: | |
768 | + free_trace_buffer(trace_fragment, trace_fragment_size); | |
769 | + | |
770 | + out_release: | |
771 | + ret = prefetch_release_trace(end_marker); | |
772 | + if (ret < 0) | |
773 | + printk(KERN_WARNING | |
774 | + "Releasing trace for app tracing returned error, error=%d\n", | |
775 | + ret); | |
776 | + if (trace_filename != NULL) | |
777 | + kfree(trace_filename); | |
778 | + finish_trace_job(trace_job); | |
779 | +} | |
780 | + | |
781 | +static int start_tracing_job(char *filename) | |
782 | +{ | |
783 | + int ret = 0; | |
784 | + struct trace_job *trace_job; | |
785 | + | |
786 | + trace_job = kzalloc(sizeof(*trace_job), GFP_KERNEL); | |
787 | + | |
788 | + if (trace_job == NULL) { | |
789 | + printk(KERN_WARNING | |
790 | + "Cannot allocate memory to start tracing for app %s\n", | |
791 | + filename); | |
792 | + ret = -ENOMEM; | |
793 | + goto out_error; | |
794 | + } | |
795 | + | |
796 | + trace_job->filename = kstrdup(filename, GFP_KERNEL); | |
797 | + | |
798 | + if (trace_job->filename == NULL) { | |
799 | + printk(KERN_WARNING | |
800 | + "Cannot allocate memory for filename to start tracing for app %s\n", | |
801 | + filename); | |
802 | + ret = -ENOMEM; | |
803 | + goto out_free; | |
804 | + } | |
805 | + | |
806 | + ret = prefetch_start_trace(&trace_job->start_marker); | |
807 | + if (ret < 0) { | |
808 | + printk(KERN_WARNING "Failed to start tracing for app %s\n", | |
809 | + filename); | |
810 | + goto out_free; | |
811 | + } | |
812 | + | |
813 | + trace_job->pid = current->pid; | |
814 | + | |
815 | + mutex_lock(&traced_pids_mutex); | |
816 | + add_traced_pid(trace_job->pid, trace_job, traced_pids); | |
817 | + mutex_unlock(&traced_pids_mutex); | |
818 | + | |
819 | + INIT_DELAYED_WORK(&trace_job->work, finish_tracing); | |
820 | + schedule_delayed_work(&trace_job->work, HZ * tracing_timeout); | |
821 | + | |
822 | +#ifdef CONFIG_PREFETCH_DEBUG | |
823 | + printk(KERN_INFO "Successfully started tracing for application %s\n", | |
824 | + filename); | |
825 | +#endif | |
826 | + | |
827 | + return 0; | |
828 | + | |
829 | + out_free: | |
830 | + if (trace_job != NULL) { | |
831 | + if (trace_job->filename != NULL) | |
832 | + kfree(trace_job->filename); | |
833 | + kfree(trace_job); | |
834 | + } | |
835 | + out_error: | |
836 | + return ret; | |
837 | +} | |
838 | + | |
839 | +static int start_monitoring_job(char *filename) | |
840 | +{ | |
841 | + int ret = 0; | |
842 | + struct trace_job *trace_job; | |
843 | + | |
844 | + trace_job = kzalloc(sizeof(*trace_job), GFP_KERNEL); | |
845 | + | |
846 | + if (trace_job == NULL) { | |
847 | + printk(KERN_WARNING | |
848 | + "Cannot allocate memory to start monitoring for app %s\n", | |
849 | + filename); | |
850 | + ret = -ENOMEM; | |
851 | + goto out_error; | |
852 | + } | |
853 | + | |
854 | + trace_job->filename = kstrdup(filename, GFP_KERNEL); | |
855 | + | |
856 | + if (trace_job->filename == NULL) { | |
857 | + printk(KERN_WARNING | |
858 | + "Cannot allocate memory for filename to start monitoring for app %s\n", | |
859 | + filename); | |
860 | + ret = -ENOMEM; | |
861 | + goto out_free; | |
862 | + } | |
863 | + | |
864 | + trace_job->pid = current->pid; | |
865 | + | |
866 | + mutex_lock(&traced_pids_mutex); | |
867 | + add_traced_pid(trace_job->pid, trace_job, traced_pids); | |
868 | + mutex_unlock(&traced_pids_mutex); | |
869 | + | |
870 | + INIT_DELAYED_WORK(&trace_job->work, finish_monitoring); | |
871 | + schedule_delayed_work(&trace_job->work, HZ * tracing_timeout); | |
872 | + | |
873 | + return 0; | |
874 | + | |
875 | + out_free: | |
876 | + if (trace_job != NULL) { | |
877 | + if (trace_job->filename != NULL) | |
878 | + kfree(trace_job->filename); | |
879 | + kfree(trace_job); | |
880 | + } | |
881 | + out_error: | |
882 | + return ret; | |
883 | +} | |
884 | + | |
885 | +int start_app_prefetch(char *filename) | |
886 | +{ | |
887 | + char *trace_filename = NULL; | |
888 | + int ret = 0; | |
889 | + | |
890 | + trace_filename = create_trace_filename(filename); | |
891 | + if (trace_filename == NULL) { | |
892 | + printk(KERN_WARNING | |
893 | + "Cannot allocate memory for short filename, cannot start prefetetching for application %s\n", | |
894 | + filename); | |
895 | + ret = -ENOMEM; | |
896 | + goto out; | |
897 | + } | |
898 | + | |
899 | + ret = do_prefetch_from_file(trace_filename); | |
900 | + if (ret < 0) { | |
901 | + printk(KERN_WARNING | |
902 | + "Failed to start prefetching for application %s, error=%d\n", | |
903 | + filename, ret); | |
904 | + goto out_free; | |
905 | + } | |
906 | + | |
907 | + out_free: | |
908 | + kfree(trace_filename); | |
909 | + | |
910 | + out: | |
911 | + return ret; | |
912 | +} | |
913 | + | |
914 | +void try_app_prefetch(char *filename) | |
915 | +{ | |
916 | + int app_on_list; | |
917 | + | |
918 | + if (!enabled) | |
919 | + return; | |
920 | + | |
921 | + mutex_lock(&prefetch_apps_blacklist_mutex); | |
922 | + if (filename_on_list(filename, prefetch_apps_blacklist)) { | |
923 | +#ifdef CONFIG_PREFETCH_DEBUG | |
924 | + printk(KERN_INFO | |
925 | + "Not doing tracing nor prefetching for blacklisted file %s\n", | |
926 | + filename); | |
927 | +#endif | |
928 | + mutex_unlock(&prefetch_apps_blacklist_mutex); | |
929 | + return; | |
930 | + } | |
931 | + mutex_unlock(&prefetch_apps_blacklist_mutex); | |
932 | + | |
933 | + mutex_lock(&prefetch_apps_list_mutex); | |
934 | + app_on_list = filename_on_list(filename, prefetch_apps_list); | |
935 | + mutex_unlock(&prefetch_apps_list_mutex); | |
936 | + | |
937 | + if (app_on_list) { | |
938 | + /* Start tracing and schedule end tracing work */ | |
939 | + start_tracing_job(filename); | |
940 | + | |
941 | + if (prefetch_enabled) { | |
942 | + start_app_prefetch(filename); | |
943 | + } | |
944 | + } else { | |
945 | + start_monitoring_job(filename); | |
946 | + } | |
947 | +} | |
948 | + | |
949 | +void prefetch_exec_hook(char *filename) | |
950 | +{ | |
951 | + try_app_prefetch(filename); | |
952 | +} | |
953 | + | |
954 | +/** | |
955 | + Prefetch hook for intercepting exit() of process. | |
956 | +*/ | |
957 | +void prefetch_exit_hook(pid_t pid) | |
958 | +{ | |
959 | + struct traced_pid_entry *entry = NULL; | |
960 | + if (!initialized || !enabled) | |
961 | + return; | |
962 | + | |
963 | + mutex_lock(&traced_pids_mutex); | |
964 | + entry = find_traced_pid(pid, traced_pids); | |
965 | + if (entry != NULL) | |
966 | + do_finish_monitoring(entry->trace_job); | |
967 | + mutex_unlock(&traced_pids_mutex); | |
968 | + /*NOTE: job is not cancelled, it will wake up and clean up after itself */ | |
969 | +} | |
970 | + | |
971 | +#define PREFETCH_PATH_MAX 512 | |
972 | +#define PREFETCH_PATH_MAX_S "512" | |
973 | + | |
974 | +ssize_t app_prefetch_proc_write(struct file *proc_file, | |
975 | + const char __user * buffer, size_t count, | |
976 | + loff_t * ppos) | |
977 | +{ | |
978 | + char *name; | |
979 | + int e = 0; | |
980 | + int tmp; | |
981 | + int r; | |
982 | + char *s = NULL; | |
983 | + | |
984 | + if (count >= PATH_MAX) | |
985 | + return -ENAMETOOLONG; | |
986 | + | |
987 | + name = kmalloc(count + 1, GFP_KERNEL); | |
988 | + if (!name) | |
989 | + return -ENOMEM; | |
990 | + | |
991 | + if (copy_from_user(name, buffer, count)) { | |
992 | + e = -EFAULT; | |
993 | + goto out; | |
994 | + } | |
995 | + | |
996 | + /* strip the optional newline */ | |
997 | + if (count && name[count - 1] == '\n') | |
998 | + name[count - 1] = '\0'; | |
999 | + else | |
1000 | + name[count] = '\0'; | |
1001 | + | |
1002 | + if (param_match(name, "prefetch enable")) { | |
1003 | + printk(KERN_INFO "Prefetching for apps enabled\n"); | |
1004 | + prefetch_enabled = 1; | |
1005 | + goto out; | |
1006 | + } | |
1007 | + | |
1008 | + if (param_match(name, "prefetch disable")) { | |
1009 | + printk(KERN_INFO "Prefetching for apps disabled\n"); | |
1010 | + prefetch_enabled = 0; | |
1011 | + goto out; | |
1012 | + } | |
1013 | + | |
1014 | + if (param_match(name, "enable")) { | |
1015 | + printk(KERN_INFO "App prefetching module enabled\n"); | |
1016 | + enabled = 1; | |
1017 | + goto out; | |
1018 | + } | |
1019 | + | |
1020 | + if (param_match(name, "disable")) { | |
1021 | + printk(KERN_INFO "App prefetching module disabled\n"); | |
1022 | + enabled = 0; | |
1023 | + goto out; | |
1024 | + } | |
1025 | + | |
1026 | + if (param_match_prefix(name, "set tracing timeout")) { | |
1027 | + r = sscanf(name, "set tracing timeout %d", &tmp); | |
1028 | + if (r != 1) { | |
1029 | + e = -EINVAL; | |
1030 | + printk(KERN_WARNING | |
1031 | + "Wrong parameter to set tracing timeout command, command was: %s\n", | |
1032 | + name); | |
1033 | + goto out; | |
1034 | + } | |
1035 | + if (tmp <= 0) { | |
1036 | + e = -EINVAL; | |
1037 | + printk(KERN_WARNING | |
1038 | + "Wrong timeout specified, must be >0, timeout was: %d\n", | |
1039 | + tmp); | |
1040 | + goto out; | |
1041 | + } | |
1042 | + tracing_timeout = tmp; | |
1043 | + printk(KERN_INFO "Set tracing timeout to %d seconds\n", | |
1044 | + tracing_timeout); | |
1045 | + goto out; | |
1046 | + } | |
1047 | + | |
1048 | + if (param_match(name, "clear app-list")) { | |
1049 | + clear_hashtable(prefetch_apps_list, filename_hashtable_size, | |
1050 | + &prefetch_apps_list_mutex); | |
1051 | + printk(KERN_INFO "List of traced applications cleared\n"); | |
1052 | + goto out; | |
1053 | + } | |
1054 | + | |
1055 | + if (param_match_prefix(name, "add app-list")) { | |
1056 | + s = kzalloc(PREFETCH_PATH_MAX + 1, GFP_KERNEL); | |
1057 | + if (s == NULL) { | |
1058 | + printk(KERN_WARNING | |
1059 | + "Cannot allocate memory for path\n"); | |
1060 | + e = -ENOMEM; | |
1061 | + goto out; | |
1062 | + } | |
1063 | + r = sscanf(name, "add app-list %" PREFETCH_PATH_MAX_S "s", s); | |
1064 | + if (r != 1) { | |
1065 | + e = -EINVAL; | |
1066 | + printk(KERN_WARNING | |
1067 | + "Wrong parameter to add app-list command, command was: %s\n", | |
1068 | + name); | |
1069 | + } else { | |
1070 | + e = add_filename_to_list_unique(s, prefetch_apps_list, | |
1071 | + &prefetch_apps_list_mutex); | |
1072 | + if (e < 0) | |
1073 | + printk(KERN_WARNING | |
1074 | + "Failed to add application %s to prefetched applications list, error=%d\n", | |
1075 | + s, e); | |
1076 | + } | |
1077 | + kfree(s); | |
1078 | + goto out; | |
1079 | + } | |
1080 | + | |
1081 | + if (param_match(name, "clear app-blacklist")) { | |
1082 | + clear_hashtable(prefetch_apps_blacklist, | |
1083 | + filename_hashtable_size, | |
1084 | + &prefetch_apps_blacklist_mutex); | |
1085 | + printk(KERN_INFO "Blacklist of traced applications cleared\n"); | |
1086 | + goto out; | |
1087 | + } | |
1088 | + | |
1089 | + if (param_match_prefix(name, "add app-blacklist")) { | |
1090 | + s = kzalloc(PREFETCH_PATH_MAX + 1, GFP_KERNEL); | |
1091 | + if (s == NULL) { | |
1092 | + printk(KERN_WARNING | |
1093 | + "Cannot allocate memory for path\n"); | |
1094 | + e = -ENOMEM; | |
1095 | + goto out; | |
1096 | + } | |
1097 | + | |
1098 | + r = sscanf(name, "add app-blacklist %s", s); | |
1099 | + if (r != 1) { | |
1100 | + e = -EINVAL; | |
1101 | + printk(KERN_WARNING | |
1102 | + "Wrong parameter to add app-blacklist command, command was: %s\n", | |
1103 | + name); | |
1104 | + } else { | |
1105 | + e = add_filename_to_list_unique(s, | |
1106 | + prefetch_apps_blacklist, | |
1107 | + &prefetch_apps_blacklist_mutex); | |
1108 | + if (e < 0) | |
1109 | + printk(KERN_WARNING | |
1110 | + "Failed to add application %s to blacklisted applications list, error=%d\n", | |
1111 | + s, e); | |
1112 | + } | |
1113 | + kfree(s); | |
1114 | + goto out; | |
1115 | + } | |
1116 | + out: | |
1117 | + kfree(name); | |
1118 | + | |
1119 | + return e ? e : count; | |
1120 | +} | |
1121 | + | |
1122 | +void seq_print_filename_list(struct seq_file *m, struct hlist_head *list) | |
1123 | +{ | |
1124 | + struct filename_entry *entry; | |
1125 | + struct hlist_node *cursor; | |
1126 | + int i; | |
1127 | + | |
1128 | + for (i = 0; i < filename_hashtable_size; ++i) { | |
1129 | + hlist_for_each_entry(entry, cursor, &list[i], entries_list) { | |
1130 | + seq_printf(m, "%s\n", entry->filename); | |
1131 | + } | |
1132 | + } | |
1133 | +} | |
1134 | + | |
1135 | +static void *app_prefetch_proc_start(struct seq_file *m, loff_t * pos) | |
1136 | +{ | |
1137 | + if (*pos != 0) | |
1138 | + return NULL; | |
1139 | + | |
1140 | + return &tracing_ticks_threshold; /*whatever pointer, must not be NULL */ | |
1141 | +} | |
1142 | + | |
1143 | +static void *app_prefetch_proc_next(struct seq_file *m, void *v, loff_t * pos) | |
1144 | +{ | |
1145 | + return NULL; | |
1146 | +} | |
1147 | + | |
1148 | +static int app_prefetch_proc_show(struct seq_file *m, void *v) | |
1149 | +{ | |
1150 | + seq_printf(m, "### Traced applications: ###\n"); | |
1151 | + mutex_lock(&prefetch_apps_list_mutex); | |
1152 | + seq_print_filename_list(m, prefetch_apps_list); | |
1153 | + mutex_unlock(&prefetch_apps_list_mutex); | |
1154 | + | |
1155 | + seq_printf(m, "### Blacklisted applications: ###\n"); | |
1156 | + mutex_lock(&prefetch_apps_blacklist_mutex); | |
1157 | + seq_print_filename_list(m, prefetch_apps_blacklist); | |
1158 | + mutex_unlock(&prefetch_apps_blacklist_mutex); | |
1159 | + | |
1160 | + return 0; | |
1161 | +} | |
1162 | + | |
1163 | +static void app_prefetch_proc_stop(struct seq_file *m, void *v) | |
1164 | +{ | |
1165 | +} | |
1166 | + | |
1167 | +struct seq_operations seq_app_prefetch_op = { | |
1168 | + .start = app_prefetch_proc_start, | |
1169 | + .next = app_prefetch_proc_next, | |
1170 | + .stop = app_prefetch_proc_stop, | |
1171 | + .show = app_prefetch_proc_show, | |
1172 | +}; | |
1173 | + | |
1174 | +static int app_prefetch_proc_open(struct inode *inode, struct file *proc_file) | |
1175 | +{ | |
1176 | + return seq_open(proc_file, &seq_app_prefetch_op); | |
1177 | +} | |
1178 | + | |
1179 | +static int app_prefetch_proc_release(struct inode *inode, | |
1180 | + struct file *proc_file) | |
1181 | +{ | |
1182 | + return seq_release(inode, proc_file); | |
1183 | +} | |
1184 | + | |
1185 | +static struct file_operations proc_app_prefetch_fops = { | |
1186 | + .owner = THIS_MODULE, | |
1187 | + .open = app_prefetch_proc_open, | |
1188 | + .release = app_prefetch_proc_release, | |
1189 | + .write = app_prefetch_proc_write, | |
1190 | + .read = seq_read, | |
1191 | + .llseek = seq_lseek, | |
1192 | +}; | |
1193 | + | |
1194 | +static int app_list_show(struct seq_file *m, void *v) | |
1195 | +{ | |
1196 | + mutex_lock(&prefetch_apps_list_mutex); | |
1197 | + seq_print_filename_list(m, prefetch_apps_list); | |
1198 | + mutex_unlock(&prefetch_apps_list_mutex); | |
1199 | + | |
1200 | + return 0; | |
1201 | +} | |
1202 | + | |
1203 | +static int app_list_open(struct inode *inode, struct file *proc_file) | |
1204 | +{ | |
1205 | + return single_open(proc_file, app_list_show, NULL); | |
1206 | +} | |
1207 | + | |
1208 | +static int app_list_release_generic(struct inode *inode, struct file *proc_file) | |
1209 | +{ | |
1210 | + return single_release(inode, proc_file); | |
1211 | +} | |
1212 | + | |
1213 | +static struct file_operations proc_app_list_fops = { | |
1214 | + .owner = THIS_MODULE, | |
1215 | + .open = app_list_open, | |
1216 | + .release = app_list_release_generic, | |
1217 | + .read = seq_read, | |
1218 | + .llseek = seq_lseek, | |
1219 | +}; | |
1220 | + | |
1221 | +static int app_blacklist_show(struct seq_file *m, void *v) | |
1222 | +{ | |
1223 | + mutex_lock(&prefetch_apps_blacklist_mutex); | |
1224 | + seq_print_filename_list(m, prefetch_apps_blacklist); | |
1225 | + mutex_unlock(&prefetch_apps_blacklist_mutex); | |
1226 | + | |
1227 | + return 0; | |
1228 | +} | |
1229 | + | |
1230 | +static int app_blacklist_open(struct inode *inode, struct file *proc_file) | |
1231 | +{ | |
1232 | + return single_open(proc_file, app_blacklist_show, NULL); | |
1233 | +} | |
1234 | + | |
1235 | +static struct file_operations proc_app_blacklist_fops = { | |
1236 | + .owner = THIS_MODULE, | |
1237 | + .open = app_blacklist_open, | |
1238 | + .release = app_list_release_generic, | |
1239 | + .read = seq_read, | |
1240 | + .llseek = seq_lseek, | |
1241 | +}; | |
1242 | + | |
1243 | +static __init int app_prefetch_init(void) | |
1244 | +{ | |
1245 | + struct proc_dir_entry *entry; | |
1246 | + int ret; | |
1247 | + | |
1248 | + /* Initialize hashtables */ | |
1249 | + ret = | |
1250 | + initialize_hashtable(&prefetch_apps_blacklist, | |
1251 | + filename_hashtable_size); | |
1252 | + if (ret < 0) { | |
1253 | + printk(KERN_WARNING | |
1254 | + "Cannot initialize app blacklist hashtable, error=%d\n", | |
1255 | + ret); | |
1256 | + goto out_error; | |
1257 | + } | |
1258 | + | |
1259 | + ret = | |
1260 | + initialize_hashtable(&prefetch_apps_list, filename_hashtable_size); | |
1261 | + if (ret < 0) { | |
1262 | + printk(KERN_WARNING | |
1263 | + "Cannot initialize app hashtable, error=%d\n", ret); | |
1264 | + goto out_error; | |
1265 | + } | |
1266 | + | |
1267 | + ret = initialize_hashtable(&traced_pids, TRACED_HASH_SIZE); | |
1268 | + if (ret < 0) { | |
1269 | + printk(KERN_WARNING | |
1270 | + "Cannot initialize traced pids hashtable, error=%d\n", | |
1271 | + ret); | |
1272 | + goto out_error; | |
1273 | + } | |
1274 | + | |
1275 | + if (prefetch_proc_dir == NULL) { | |
1276 | + printk(KERN_WARNING | |
1277 | + "Prefetch proc directory not present, proc interface for app prefetching will not be available\n"); | |
1278 | + } else { | |
1279 | + entry = create_proc_entry("app", 0600, prefetch_proc_dir); | |
1280 | + if (entry) | |
1281 | + entry->proc_fops = &proc_app_prefetch_fops; | |
1282 | + entry = create_proc_entry("app-list", 0600, prefetch_proc_dir); | |
1283 | + if (entry) | |
1284 | + entry->proc_fops = &proc_app_list_fops; | |
1285 | + entry = | |
1286 | + create_proc_entry("app-blacklist", 0600, prefetch_proc_dir); | |
1287 | + if (entry) | |
1288 | + entry->proc_fops = &proc_app_blacklist_fops; | |
1289 | + } | |
1290 | + | |
1291 | + printk(KERN_INFO | |
1292 | + "App prefetching module started, enabled=%d, prefetching=%d\n", | |
1293 | + enabled, prefetch_enabled); | |
1294 | + | |
1295 | + initialized = 1; | |
1296 | + | |
1297 | + return 0; | |
1298 | + | |
1299 | + out_error: | |
1300 | + return ret; | |
1301 | +} | |
1302 | + | |
1303 | +static void app_prefetch_exit(void) | |
1304 | +{ | |
1305 | + remove_proc_entry("app", prefetch_proc_dir); | |
1306 | +} | |
1307 | + | |
1308 | +MODULE_AUTHOR("Krzysztof Lichota <lichota@mimuw.edu.pl>"); | |
1309 | +MODULE_LICENSE("GPL"); | |
1310 | +MODULE_DESCRIPTION("Application tracing and prefetching during startup"); | |
1311 | + | |
1312 | +module_init(app_prefetch_init); | |
1313 | +module_exit(app_prefetch_exit); | |
1314 | diff --git a/mm/prefetch_boot.c b/mm/prefetch_boot.c | |
1315 | new file mode 100644 | |
1316 | index 0000000..da7f89b | |
1317 | --- /dev/null | |
1318 | +++ b/mm/prefetch_boot.c | |
1319 | @@ -0,0 +1,396 @@ | |
1320 | +/* | |
1321 | + * linux/mm/prefetch_core.c | |
1322 | + * | |
1323 | + * Copyright (C) 2007 Krzysztof Lichota <lichota@mimuw.edu.pl> | |
1324 | + * | |
1325 | + * This is boot prefetch support implementation. | |
1326 | + * It consists mainly of proc interface, the rest is done by init scripts using proc interface. | |
1327 | + * | |
1328 | + * This program is free software; you can redistribute it and/or modify | |
1329 | + * it under the terms of the GNU General Public License version 2 as | |
1330 | + * published by the Free Software Foundation. | |
1331 | + */ | |
1332 | + | |
1333 | +#include <linux/prefetch_core.h> | |
1334 | +#include <linux/kernel.h> | |
1335 | +#include <linux/module.h> | |
1336 | +#include <linux/limits.h> | |
1337 | +#include <asm/uaccess.h> | |
1338 | +#include <linux/proc_fs.h> | |
1339 | +#include <asm/current.h> | |
1340 | + | |
1341 | +/*************** Boot tracing **************/ | |
1342 | +#define DEFAULT_BOOT_TRACE_FILENAME_TEMPLATE "/.prefetch-boot-trace.%s" | |
1343 | +static char *filename_template = DEFAULT_BOOT_TRACE_FILENAME_TEMPLATE; | |
1344 | +module_param(filename_template, charp, 0); | |
1345 | +MODULE_PARM_DESC(filename_template, | |
1346 | + "Template for boot trace name, where trace will be saved and read from. %s will be replaced with name of phase. The default is: " | |
1347 | + DEFAULT_BOOT_TRACE_FILENAME_TEMPLATE); | |
1348 | + | |
1349 | +/*maximum size of phase name, not including trailing NULL*/ | |
1350 | +#define PHASE_NAME_MAX 10 | |
1351 | +/*maximum size as string, keep in sync with PHASE_NAME_MAX*/ | |
1352 | +#define PHASE_NAME_MAX_S "10" | |
1353 | + | |
1354 | +/*maximum size of command name, not including trailing NULL*/ | |
1355 | +#define CMD_NAME_MAX 10 | |
1356 | +/*maximum size as string, keep in sync with CMD_NAME_MAX*/ | |
1357 | +#define CMD_NAME_MAX_S "10" | |
1358 | + | |
1359 | +/*Enables/disables whole functionality of the module*/ | |
1360 | +static int enabled = 1; | |
1361 | +module_param(enabled, bool, 0); | |
1362 | +MODULE_PARM_DESC(enabled, | |
1363 | + "Enables or disables whole boot prefetching module functionality (tracing and prefetching)"); | |
1364 | + | |
1365 | +/*Controls whether prefetching should be done along with tracing.*/ | |
1366 | +static int prefetch_enabled = 1; | |
1367 | +module_param(prefetch_enabled, bool, 0); | |
1368 | +MODULE_PARM_DESC(prefetch_enabled, | |
1369 | + "Enables or disables prefetching during boot. If disabled, only tracing will be done"); | |
1370 | + | |
1371 | +static struct mutex boot_prefetch_mutex; | |
1372 | +/** | |
1373 | + * Phase start marker, protected by boot_prefetch_mutex. | |
1374 | +*/ | |
1375 | +static struct trace_marker boot_start_marker; | |
1376 | +static char boot_tracing_phase[PHASE_NAME_MAX + 1] = "init"; | |
1377 | +static int boot_tracing_running = 0; | |
1378 | + | |
1379 | +/** | |
1380 | + Saves boot trace fragment for phase @phase_name which | |
1381 | + starts at boot_start_marker and ends at @end_phase_marker. | |
1382 | + | |
1383 | + boot_prefetch_mutex must be held while calling this function. | |
1384 | +*/ | |
1385 | +static int prefetch_save_boot_trace(char *phase_name, | |
1386 | + struct trace_marker end_phase_marker) | |
1387 | +{ | |
1388 | + char *boot_trace_filename = NULL; | |
1389 | + int ret = 0; | |
1390 | + | |
1391 | + boot_trace_filename = kasprintf(GFP_KERNEL, filename_template, | |
1392 | + phase_name); | |
1393 | + | |
1394 | + if (boot_trace_filename == NULL) { | |
1395 | + printk(KERN_WARNING | |
1396 | + "Cannot allocate memory for trace filename in phase %s\n", | |
1397 | + phase_name); | |
1398 | + ret = -ENOMEM; | |
1399 | + goto out; | |
1400 | + } | |
1401 | + ret = prefetch_save_trace_between_markers(boot_trace_filename, | |
1402 | + boot_start_marker, | |
1403 | + end_phase_marker); | |
1404 | + out: | |
1405 | + if (boot_trace_filename != NULL) | |
1406 | + kfree(boot_trace_filename); | |
1407 | + return ret; | |
1408 | +} | |
1409 | + | |
1410 | +/** | |
1411 | + Starts tracing for given boot phase. | |
1412 | + boot_prefetch_mutex is taken by this function. | |
1413 | +*/ | |
1414 | +int prefetch_start_boot_tracing_phase(char *phase_name) | |
1415 | +{ | |
1416 | + int r; | |
1417 | + int ret = 0; | |
1418 | + struct trace_marker marker; | |
1419 | + | |
1420 | + mutex_lock(&boot_prefetch_mutex); | |
1421 | + | |
1422 | + if (boot_tracing_running) { | |
1423 | + /*boot tracing was already running */ | |
1424 | + ret = prefetch_continue_trace(&marker); | |
1425 | + if (ret < 0) { | |
1426 | + printk(KERN_WARNING | |
1427 | + "Cannot continue tracing, error=%d\n", ret); | |
1428 | + goto out_unlock; | |
1429 | + } | |
1430 | + | |
1431 | + r = prefetch_save_boot_trace(boot_tracing_phase, marker); | |
1432 | + if (r < 0) | |
1433 | + /*NOTE: just warn and continue, prefetching might still succeed and phase has been started */ | |
1434 | + printk(KERN_WARNING | |
1435 | + "Saving boot trace failed, phase %s, error=%d\n", | |
1436 | + boot_tracing_phase, r); | |
1437 | + | |
1438 | + boot_start_marker = marker; | |
1439 | + } else { | |
1440 | + /*first phase of tracing */ | |
1441 | + ret = prefetch_start_trace(&boot_start_marker); | |
1442 | + if (ret < 0) { | |
1443 | + printk(KERN_WARNING "Cannot start tracing, error=%d\n", | |
1444 | + ret); | |
1445 | + goto out_unlock; | |
1446 | + } | |
1447 | + } | |
1448 | + | |
1449 | + strncpy(boot_tracing_phase, phase_name, PHASE_NAME_MAX); | |
1450 | + boot_tracing_phase[PHASE_NAME_MAX] = 0; | |
1451 | + | |
1452 | + boot_tracing_running = 1; | |
1453 | + | |
1454 | +#ifdef PREFETCH_DEBUG | |
1455 | + printk(KERN_INFO "Boot command %s, phase %s marker: ", cmd_name, | |
1456 | + phase_name); | |
1457 | + print_marker("Marker: ", boot_start_marker); | |
1458 | +#endif | |
1459 | + out_unlock: | |
1460 | + mutex_unlock(&boot_prefetch_mutex); | |
1461 | + return ret; | |
1462 | +} | |
1463 | + | |
1464 | +int prefetch_start_boot_prefetching_phase(char *phase_name) | |
1465 | +{ | |
1466 | + char *boot_trace_filename = NULL; | |
1467 | + int ret = 0; | |
1468 | + if (!prefetch_enabled) { | |
1469 | + printk(KERN_INFO | |
1470 | + "Prefetching disabled, not starting prefetching for boot phase: %s\n", | |
1471 | + phase_name); | |
1472 | + return 0; | |
1473 | + } | |
1474 | + | |
1475 | + boot_trace_filename = kasprintf(GFP_KERNEL, filename_template, | |
1476 | + phase_name); | |
1477 | + | |
1478 | + if (boot_trace_filename == NULL) { | |
1479 | + printk(KERN_WARNING | |
1480 | + "Cannot allocate memory for trace filename\n"); | |
1481 | + ret = -ENOMEM; | |
1482 | + goto out; | |
1483 | + } | |
1484 | + | |
1485 | + printk(KERN_INFO "Starting prefetching for boot phase: %s\n", | |
1486 | + phase_name); | |
1487 | + ret = do_prefetch_from_file(boot_trace_filename); | |
1488 | + | |
1489 | + if (ret < 0) | |
1490 | + printk("Failed to prefetch trace from file %s, error=%d\n", | |
1491 | + boot_trace_filename, ret); | |
1492 | + | |
1493 | + out: | |
1494 | + if (boot_trace_filename != NULL) | |
1495 | + kfree(boot_trace_filename); | |
1496 | + | |
1497 | + return ret; | |
1498 | +} | |
1499 | + | |
1500 | +/** | |
1501 | + Starts next phase of boot. | |
1502 | + Starts tracing. Then, if trace is available, loads it and starts | |
1503 | + prefetch. | |
1504 | + @cmd_name is the name of action, if you want to keep its contents, | |
1505 | + copy it somewhere, as it will be deallocated. | |
1506 | + @phase_name is the name of new phase, if you want to keep its contents, | |
1507 | + copy it somewhere, as it will be deallocated. | |
1508 | +*/ | |
1509 | +static int prefetch_start_boot_phase(char *cmd_name, char *phase_name) | |
1510 | +{ | |
1511 | + int ret = 0; | |
1512 | + int start_prefetching = 0; | |
1513 | + int start_tracing = 0; | |
1514 | + | |
1515 | + if (strcmp(cmd_name, "prefetch") == 0) | |
1516 | + start_prefetching = 1; | |
1517 | + else if (strcmp(cmd_name, "trace") == 0) | |
1518 | + start_tracing = 1; | |
1519 | + else if (strcmp(cmd_name, "both") == 0) { | |
1520 | + start_prefetching = 1; | |
1521 | + start_tracing = 1; | |
1522 | + } else { | |
1523 | + printk(KERN_WARNING | |
1524 | + "Boot prefetch: unknown command: %s for phase %s\n", | |
1525 | + cmd_name, phase_name); | |
1526 | + return -EINVAL; | |
1527 | + } | |
1528 | + if (start_tracing) | |
1529 | + prefetch_start_boot_tracing_phase(phase_name); | |
1530 | + | |
1531 | + if (start_prefetching) | |
1532 | + ret = prefetch_start_boot_prefetching_phase(phase_name); | |
1533 | + | |
1534 | + return ret; | |
1535 | +} | |
1536 | + | |
1537 | +static int prefetch_stop_boot_tracing(void) | |
1538 | +{ | |
1539 | + struct trace_marker marker; | |
1540 | + int ret = 0; | |
1541 | + printk(KERN_INFO "Stopping boot tracing and prefetching\n"); | |
1542 | + | |
1543 | + mutex_lock(&boot_prefetch_mutex); | |
1544 | + | |
1545 | + if (!boot_tracing_running) { | |
1546 | + printk | |
1547 | + ("Trying to stop boot tracing although tracing is not running\n"); | |
1548 | + ret = -EINVAL; | |
1549 | + goto out_unlock; | |
1550 | + } | |
1551 | + | |
1552 | + ret = prefetch_stop_trace(&marker); | |
1553 | + if (ret < 0) | |
1554 | + printk(KERN_WARNING | |
1555 | + "Stopping tracing for boot tracing returned error, error=%d\n", | |
1556 | + ret); | |
1557 | + | |
1558 | + boot_tracing_running = 0; | |
1559 | + | |
1560 | +#ifdef PREFETCH_DEBUG | |
1561 | + print_marker("Boot stop marker: ", marker); | |
1562 | +#endif | |
1563 | + | |
1564 | + ret = prefetch_save_boot_trace(boot_tracing_phase, marker); | |
1565 | + if (ret < 0) { | |
1566 | + printk(KERN_WARNING | |
1567 | + "Saving final boot trace failed, phase %s, error=%d\n", | |
1568 | + boot_tracing_phase, ret); | |
1569 | + goto out_unlock_release; | |
1570 | + } | |
1571 | + | |
1572 | + out_unlock_release: | |
1573 | + ret = prefetch_release_trace(marker); | |
1574 | + if (ret < 0) | |
1575 | + printk(KERN_WARNING | |
1576 | + "Releasing trace for boot tracing returned error, error=%d\n", | |
1577 | + ret); | |
1578 | + | |
1579 | + out_unlock: | |
1580 | + mutex_unlock(&boot_prefetch_mutex); | |
1581 | + return ret; | |
1582 | +} | |
1583 | + | |
1584 | +ssize_t boot_prefetch_proc_write(struct file * proc_file, | |
1585 | + const char __user * buffer, size_t count, | |
1586 | + loff_t * ppos) | |
1587 | +{ | |
1588 | + char *name; | |
1589 | + int e = 0; | |
1590 | + int r; | |
1591 | + char *phase_name; | |
1592 | + char *cmd_name; | |
1593 | + | |
1594 | + if (count >= PATH_MAX) | |
1595 | + return -ENAMETOOLONG; | |
1596 | + | |
1597 | + name = kmalloc(count + 1, GFP_KERNEL); | |
1598 | + if (!name) | |
1599 | + return -ENOMEM; | |
1600 | + | |
1601 | + if (copy_from_user(name, buffer, count)) { | |
1602 | + e = -EFAULT; | |
1603 | + goto out; | |
1604 | + } | |
1605 | + | |
1606 | + /* strip the optional newline */ | |
1607 | + if (count && name[count - 1] == '\n') | |
1608 | + name[count - 1] = '\0'; | |
1609 | + else | |
1610 | + name[count] = '\0'; | |
1611 | + | |
1612 | + if (param_match(name, "prefetch enable")) { | |
1613 | + printk(KERN_INFO "Prefetching enabled\n"); | |
1614 | + prefetch_enabled = 1; | |
1615 | + goto out; | |
1616 | + } | |
1617 | + | |
1618 | + if (param_match(name, "prefetch disable")) { | |
1619 | + printk(KERN_INFO "Prefetching disabled\n"); | |
1620 | + prefetch_enabled = 0; | |
1621 | + goto out; | |
1622 | + } | |
1623 | + | |
1624 | + if (param_match_prefix(name, "start ")) { | |
1625 | + phase_name = kzalloc(PHASE_NAME_MAX + 1, GFP_KERNEL); /*1 for terminating NULL */ | |
1626 | + if (phase_name == NULL) { | |
1627 | + printk(KERN_WARNING | |
1628 | + "Cannot allocate memory for phase name\n"); | |
1629 | + goto out; | |
1630 | + } | |
1631 | + cmd_name = kzalloc(CMD_NAME_MAX + 1, GFP_KERNEL); /*1 for terminating NULL */ | |
1632 | + if (cmd_name == NULL) { | |
1633 | + printk(KERN_WARNING | |
1634 | + "Cannot allocate memory for command name\n"); | |
1635 | + goto out; | |
1636 | + } | |
1637 | + r = sscanf(name, | |
1638 | + "start %" CMD_NAME_MAX_S "s phase %" PHASE_NAME_MAX_S | |
1639 | + "s", cmd_name, phase_name); | |
1640 | + if (r != 2) { | |
1641 | + e = -EINVAL; | |
1642 | + printk(KERN_WARNING | |
1643 | + "Wrong parameter to start command, command was: %s\n", | |
1644 | + name); | |
1645 | + kfree(phase_name); | |
1646 | + kfree(cmd_name); | |
1647 | + goto out; | |
1648 | + } | |
1649 | + e = prefetch_start_boot_phase(cmd_name, phase_name); | |
1650 | + kfree(phase_name); | |
1651 | + kfree(cmd_name); | |
1652 | + goto out; | |
1653 | + } | |
1654 | + | |
1655 | + if (param_match(name, "boot tracing stop")) { | |
1656 | + e = prefetch_stop_boot_tracing(); | |
1657 | + goto out; | |
1658 | + } | |
1659 | + out: | |
1660 | + kfree(name); | |
1661 | + | |
1662 | + return e ? e : count; | |
1663 | +} | |
1664 | + | |
1665 | +static int boot_prefetch_proc_open(struct inode *inode, struct file *proc_file) | |
1666 | +{ | |
1667 | + return 0; | |
1668 | +} | |
1669 | + | |
1670 | +static int boot_prefetch_proc_release(struct inode *inode, | |
1671 | + struct file *proc_file) | |
1672 | +{ | |
1673 | + return 0; | |
1674 | +} | |
1675 | + | |
1676 | +static struct file_operations proc_boot_prefetch_fops = { | |
1677 | + .owner = THIS_MODULE, | |
1678 | + .open = boot_prefetch_proc_open, | |
1679 | + .release = boot_prefetch_proc_release, | |
1680 | + .write = boot_prefetch_proc_write, | |
1681 | +}; | |
1682 | + | |
1683 | +static __init int boot_prefetch_init(void) | |
1684 | +{ | |
1685 | + struct proc_dir_entry *entry; | |
1686 | + | |
1687 | + mutex_init(&boot_prefetch_mutex); | |
1688 | + | |
1689 | + if (prefetch_proc_dir == NULL) { | |
1690 | + printk(KERN_WARNING | |
1691 | + "Prefetch proc directory not present, proc interface for boot prefetching will not be available\n"); | |
1692 | + } else { | |
1693 | + entry = create_proc_entry("boot", 0600, prefetch_proc_dir); | |
1694 | + if (entry) | |
1695 | + entry->proc_fops = &proc_boot_prefetch_fops; | |
1696 | + } | |
1697 | + printk(KERN_INFO | |
1698 | + "Boot prefetching module started, enabled=%d, prefetching=%d\n", | |
1699 | + enabled, prefetch_enabled); | |
1700 | + | |
1701 | + return 0; | |
1702 | +} | |
1703 | + | |
1704 | +static void boot_prefetch_exit(void) | |
1705 | +{ | |
1706 | + remove_proc_entry("boot", prefetch_proc_dir); | |
1707 | +} | |
1708 | + | |
1709 | +MODULE_AUTHOR("Krzysztof Lichota <lichota@mimuw.edu.pl>"); | |
1710 | +MODULE_LICENSE("GPL"); | |
1711 | +MODULE_DESCRIPTION | |
1712 | + ("Boot prefetching - support for tracing and prefetching during system boot"); | |
1713 | + | |
1714 | +module_init(boot_prefetch_init); | |
1715 | +module_exit(boot_prefetch_exit); | |
1716 | diff --git a/mm/prefetch_core.c b/mm/prefetch_core.c | |
1717 | new file mode 100644 | |
1718 | index 0000000..001470b | |
1719 | --- /dev/null | |
1720 | +++ b/mm/prefetch_core.c | |
1721 | @@ -0,0 +1,1527 @@ | |
1722 | +/* | |
1723 | + * linux/mm/prefetch_core.c | |
1724 | + * | |
1725 | + * Copyright (C) 2006 Fengguang Wu <wfg@ustc.edu> | |
1726 | + * Copyright (C) 2007 Krzysztof Lichota <lichota@mimuw.edu.pl> | |
1727 | + * | |
1728 | + * This is prefetch core - common code used for tracing and saving trace files. | |
1729 | + * It is used by prefetching modules, such as boot and app. | |
1730 | + * | |
1731 | + * Based on filecache code by Fengguang Wu. | |
1732 | + * | |
1733 | + * This program is free software; you can redistribute it and/or modify | |
1734 | + * it under the terms of the GNU General Public License version 2 as | |
1735 | + * published by the Free Software Foundation. | |
1736 | + */ | |
1737 | + | |
1738 | +#include <linux/prefetch_core.h> | |
1739 | +#include <linux/fs.h> | |
1740 | +#include <linux/mm.h> | |
1741 | +#include <linux/radix-tree.h> | |
1742 | +#include <linux/page-flags.h> | |
1743 | +#include <linux/pagevec.h> | |
1744 | +#include <linux/pagemap.h> | |
1745 | +#include <linux/vmalloc.h> | |
1746 | +#include <linux/writeback.h> | |
1747 | +#include <linux/proc_fs.h> | |
1748 | +#include <linux/module.h> | |
1749 | +#include <asm/uaccess.h> | |
1750 | +#include <linux/spinlock.h> | |
1751 | +#include <linux/time.h> | |
1752 | +#include <linux/file.h> | |
1753 | +#include <linux/delayacct.h> | |
1754 | +#include <linux/file.h> | |
1755 | +#include <linux/sort.h> | |
1756 | + | |
1757 | +char trace_file_magic[4] = { 'P', 'F', 'C', 'H' }; | |
1758 | + | |
1759 | +/*Inode walk session*/ | |
1760 | +struct inode_walk_session { | |
1761 | + int private_session; | |
1762 | + pgoff_t next_offset; | |
1763 | + struct { | |
1764 | + unsigned long cursor; | |
1765 | + unsigned long origin; | |
1766 | + unsigned long size; | |
1767 | + struct inode **inodes; | |
1768 | + } ivec; | |
1769 | + struct { | |
1770 | + unsigned long pos; | |
1771 | + unsigned long i_state; | |
1772 | + struct inode *inode; | |
1773 | + struct inode *pinned_inode; | |
1774 | + } icur; | |
1775 | + int inodes_walked; | |
1776 | + int pages_walked; | |
1777 | + int pages_referenced; | |
1778 | + int page_blocks; | |
1779 | +}; | |
1780 | + | |
1781 | +/*Disables/enables the whole module functionality*/ | |
1782 | +static int enabled = 1; | |
1783 | +module_param(enabled, bool, 0); | |
1784 | +MODULE_PARM_DESC(enabled, | |
1785 | + "Enables or disables whole prefetching module functionality (tracing and prefetching)"); | |
1786 | + | |
1787 | +#define DEFAULT_TRACE_SIZE_KB 256 | |
1788 | + | |
1789 | +/*NOTE: changing trace size in runtime is not supported - do not do it.*/ | |
1790 | +unsigned trace_size_kb = DEFAULT_TRACE_SIZE_KB; /*in kilobytes */ | |
1791 | +module_param(trace_size_kb, uint, 0); | |
1792 | +MODULE_PARM_DESC(trace_size_kb, | |
1793 | + "Size of memory allocated for trace (in KB), set to 0 to use default"); | |
1794 | + | |
1795 | +static inline unsigned prefetch_trace_size(void) | |
1796 | +{ | |
1797 | + if (likely(trace_size_kb > 0)) | |
1798 | + return trace_size_kb << 10; | |
1799 | + | |
1800 | + /*if set to 0, then use default */ | |
1801 | + return DEFAULT_TRACE_SIZE_KB * 1024; | |
1802 | +} | |
1803 | + | |
1804 | +enum tracing_command { | |
1805 | + START_TRACING, | |
1806 | + STOP_TRACING, | |
1807 | + CONTINUE_TRACING | |
1808 | +}; | |
1809 | + | |
1810 | +/*Structure holding all information needed for trace*/ | |
1811 | +struct prefetch_trace_t { | |
1812 | + spinlock_t prefetch_trace_lock; | |
1813 | + unsigned int buffer_used; | |
1814 | + unsigned int buffer_size; | |
1815 | + void *buffer; | |
1816 | + int generation; | |
1817 | + int overflow; | |
1818 | + int overflow_reported; | |
1819 | + /*fields above protected by prefetch_trace_lock */ | |
1820 | + int page_release_traced; | |
1821 | + /** | |
1822 | + * Number of traces started and not finished. | |
1823 | + * Used to check if it is necessary to add entries to trace. | |
1824 | + */ | |
1825 | + atomic_t tracers_count; | |
1826 | + int trace_users; /*number of trace users, protected by prefetch_trace_mutex */ | |
1827 | + struct mutex prefetch_trace_mutex; | |
1828 | +}; | |
1829 | + | |
1830 | +struct prefetch_trace_t prefetch_trace = { | |
1831 | + SPIN_LOCK_UNLOCKED, /*prefetch_trace_lock */ | |
1832 | + 0, /*buffer_used */ | |
1833 | + 0, /*buffer_size */ | |
1834 | + NULL, /*buffer */ | |
1835 | + 0, /*generation */ | |
1836 | + 0, /*overflow */ | |
1837 | + 0, /*overflow_reported */ | |
1838 | + 0, /*page_release_traced */ | |
1839 | + ATOMIC_INIT(0), /*tracers_count */ | |
1840 | + 0, /*trace_users */ | |
1841 | + __MUTEX_INITIALIZER(prefetch_trace.prefetch_trace_mutex) /*prefetch_trace_mutex */ | |
1842 | +}; | |
1843 | + | |
1844 | +/** | |
1845 | + Set if walk_pages() decided that it is the start of tracing | |
1846 | + and bits should be cleared, not recorded. | |
1847 | + Using it is protected by inode_lock. | |
1848 | + If lock breaking is enabled, this variable makes sure that | |
1849 | + second caller of walk_pages(START_TRACING) will not | |
1850 | + race with first caller and will not start recording changes. | |
1851 | +*/ | |
1852 | +static int clearing_in_progress = 0; | |
1853 | + | |
1854 | +/** | |
1855 | + * Timer used for measuring tracing and prefetching time. | |
1856 | +*/ | |
1857 | +struct prefetch_timer { | |
1858 | + struct timespec ts_start; | |
1859 | + struct timespec ts_end; | |
1860 | + char *name; | |
1861 | +}; | |
1862 | + | |
1863 | +static void clear_trace(void); | |
1864 | + | |
1865 | +/** | |
1866 | + * Starts timer. | |
1867 | +*/ | |
1868 | +void prefetch_start_timing(struct prefetch_timer *timer, char *name) | |
1869 | +{ | |
1870 | + timer->name = name; | |
1871 | + do_posix_clock_monotonic_gettime(&timer->ts_start); | |
1872 | +} | |
1873 | + | |
1874 | +/** | |
1875 | + * Stops timer. | |
1876 | +*/ | |
1877 | +void prefetch_end_timing(struct prefetch_timer *timer) | |
1878 | +{ | |
1879 | + do_posix_clock_monotonic_gettime(&timer->ts_end); | |
1880 | +} | |
1881 | + | |
1882 | +/** | |
1883 | + * Prints timer name and time duration into kernel log. | |
1884 | +*/ | |
1885 | +void prefetch_print_timing(struct prefetch_timer *timer) | |
1886 | +{ | |
1887 | + struct timespec ts = timespec_sub(timer->ts_end, timer->ts_start); | |
1888 | + s64 ns = timespec_to_ns(&ts); | |
1889 | + | |
1890 | + printk(KERN_INFO "Prefetch timing (%s): %lld ns, %ld.%.9ld\n", | |
1891 | + timer->name, ns, ts.tv_sec, ts.tv_nsec); | |
1892 | +} | |
1893 | + | |
1894 | +struct async_prefetch_params { | |
1895 | + void *trace; | |
1896 | + int trace_size; | |
1897 | +}; | |
1898 | + | |
1899 | +static int prefetch_do_prefetch(void *trace, int trace_size); | |
1900 | + | |
1901 | +static int async_prefetch_thread(void *p) | |
1902 | +{ | |
1903 | + int ret; | |
1904 | + struct async_prefetch_params *params = | |
1905 | + (struct async_prefetch_params *)p; | |
1906 | +#ifdef PREFETCH_DEBUG | |
1907 | + printk(KERN_INFO "Started async prefetch thread\n"); | |
1908 | +#endif | |
1909 | + ret = prefetch_do_prefetch(params->trace, params->trace_size); | |
1910 | + kfree(params); | |
1911 | + return ret; | |
1912 | +} | |
1913 | + | |
1914 | +static int prefetch_start_prefetch_async(void *trace, int trace_size) | |
1915 | +{ | |
1916 | + struct async_prefetch_params *params = | |
1917 | + kmalloc(sizeof(struct async_prefetch_params), GFP_KERNEL); | |
1918 | + if (params == NULL) | |
1919 | + return -ENOMEM; | |
1920 | + params->trace = trace; | |
1921 | + params->trace_size = trace_size; | |
1922 | + | |
1923 | + if (kernel_thread(async_prefetch_thread, params, 0) < 0) { | |
1924 | + printk(KERN_WARNING "Cannot start async prefetch thread\n"); | |
1925 | + return -EINVAL; | |
1926 | + } | |
1927 | + return 0; | |
1928 | +} | |
1929 | + | |
1930 | +static int prefetch_start_prefetch_sync(void *trace, int trace_size) | |
1931 | +{ | |
1932 | + return prefetch_do_prefetch(trace, trace_size); | |
1933 | +} | |
1934 | + | |
1935 | +/** | |
1936 | + * Starts prefetch based on given @trace, whose length (in bytes) is @trace_size. | |
1937 | + * If async is false, the function will return only after prefetching is finished. | |
1938 | + * Otherwise, prefetching will be started in separate thread and function will | |
1939 | + * return immediately. | |
1940 | +*/ | |
1941 | +int prefetch_start_prefetch(void *trace, int trace_size, int async) | |
1942 | +{ | |
1943 | + if (async) | |
1944 | + return prefetch_start_prefetch_async(trace, trace_size); | |
1945 | + else | |
1946 | + return prefetch_start_prefetch_sync(trace, trace_size); | |
1947 | +} | |
1948 | + | |
1949 | +EXPORT_SYMBOL(prefetch_start_prefetch); | |
1950 | + | |
1951 | +static int prefetch_do_prefetch(void *trace, int trace_size) | |
1952 | +{ | |
1953 | + struct prefetch_trace_record *record = trace; | |
1954 | + struct prefetch_trace_record *prev_record = NULL; | |
1955 | +#ifdef PREFETCH_DEBUG | |
1956 | + struct prefetch_timer timer; | |
1957 | +#endif | |
1958 | + struct super_block *sb = NULL; | |
1959 | + struct file *file = NULL; | |
1960 | + struct inode *inode = NULL; | |
1961 | + int ret = 0; | |
1962 | + int readaheads_failed = 0; | |
1963 | + int readahead_ret; | |
1964 | + | |
1965 | + if (!enabled) | |
1966 | + return -ENODEV; /*module disabled */ | |
1967 | + | |
1968 | +#ifdef PREFETCH_DEBUG | |
1969 | + printk(KERN_INFO "Delay io ticks before prefetching: %d\n", | |
1970 | + (int)delayacct_blkio_ticks(current)); | |
1971 | + prefetch_start_timing(&timer, "Prefetching"); | |
1972 | +#endif | |
1973 | + | |
1974 | + for (; | |
1975 | + (void *)(record + sizeof(struct prefetch_trace_record)) <= | |
1976 | + trace + trace_size; prev_record = record, ++record) { | |
1977 | + if (prev_record == NULL | |
1978 | + || prev_record->device != record->device) { | |
1979 | + /*open next device */ | |
1980 | + if (sb) | |
1981 | + drop_super(sb); | |
1982 | + sb = user_get_super(record->device); | |
1983 | + } | |
1984 | + if (sb == NULL) | |
1985 | + continue; /*no such device or error getting device */ | |
1986 | + | |
1987 | + if (prev_record == NULL || prev_record->device != record->device | |
1988 | + || prev_record->inode_no != record->inode_no) { | |
1989 | + /*open next file */ | |
1990 | + if (inode) | |
1991 | + iput(inode); | |
1992 | + | |
1993 | + inode = iget(sb, record->inode_no); | |
1994 | + if (IS_ERR(inode)) { | |
1995 | + /*no such inode or other error */ | |
1996 | + inode = NULL; | |
1997 | + continue; | |
1998 | + } | |
1999 | + | |
2000 | + if (file) | |
2001 | + put_filp(file); | |
2002 | + | |
2003 | + file = get_empty_filp(); | |
2004 | + if (file == NULL) { | |
2005 | + ret = -ENFILE; | |
2006 | + goto out; | |
2007 | + } | |
2008 | + /*only most important file fields filled, ext3_readpages doesn't use it anyway. */ | |
2009 | + file->f_op = inode->i_fop; | |
2010 | + file->f_mapping = inode->i_mapping; | |
2011 | + file->f_mode = FMODE_READ; | |
2012 | + file->f_flags = O_RDONLY; | |
2013 | + } | |
2014 | + if (inode == NULL) | |
2015 | + continue; | |
2016 | + | |
2017 | + readahead_ret = | |
2018 | + force_page_cache_readahead(inode->i_mapping, file, | |
2019 | + record->range_start, | |
2020 | + record->range_length); | |
2021 | + if (readahead_ret < 0) { | |
2022 | + readaheads_failed++; | |
2023 | +#ifdef PREFETCH_DEBUG | |
2024 | + if (readaheads_failed < 10) { | |
2025 | + printk(KERN_WARNING | |
2026 | + "Readahead failed, device=%d:%d, inode=%ld, start=%ld, length=%ld, error=%d\n", | |
2027 | + MAJOR(record->device), | |
2028 | + MINOR(record->device), record->inode_no, | |
2029 | + record->range_start, | |
2030 | + record->range_length, readahead_ret); | |
2031 | + } | |
2032 | + if (readaheads_failed == 10) | |
2033 | + printk(KERN_WARNING | |
2034 | + "Readaheads failed reached limit, not printing next failures\n"); | |
2035 | +#endif | |
2036 | + } | |
2037 | + } | |
2038 | + | |
2039 | + out: | |
2040 | + if (readaheads_failed > 0) | |
2041 | + printk(KERN_INFO "Readaheads not performed: %d\n", | |
2042 | + readaheads_failed); | |
2043 | + | |
2044 | + if (sb) | |
2045 | + drop_super(sb); | |
2046 | + if (inode) | |
2047 | + iput(inode); | |
2048 | + if (file) | |
2049 | + put_filp(file); | |
2050 | + | |
2051 | +#ifdef PREFETCH_DEBUG | |
2052 | + printk(KERN_INFO "Delay io ticks after prefetching: %d\n", | |
2053 | + (int)delayacct_blkio_ticks(current)); | |
2054 | + prefetch_end_timing(&timer); | |
2055 | + prefetch_print_timing(&timer); | |
2056 | +#endif | |
2057 | + return ret; | |
2058 | +} | |
2059 | + | |
2060 | +/** | |
2061 | + * Adds trace record. Does not sleep. | |
2062 | +*/ | |
2063 | +void prefetch_trace_add(dev_t device, | |
2064 | + unsigned long inode_no, | |
2065 | + pgoff_t range_start, pgoff_t range_length) | |
2066 | +{ | |
2067 | + struct prefetch_trace_record *record; | |
2068 | + | |
2069 | + spin_lock(&prefetch_trace.prefetch_trace_lock); | |
2070 | + | |
2071 | + if (prefetch_trace.buffer_used + sizeof(struct prefetch_trace_record) >= | |
2072 | + prefetch_trace.buffer_size) { | |
2073 | + prefetch_trace.overflow = 1; | |
2074 | + spin_unlock(&prefetch_trace.prefetch_trace_lock); | |
2075 | + return; | |
2076 | + } | |
2077 | + | |
2078 | + record = | |
2079 | + (struct prefetch_trace_record *)(prefetch_trace.buffer + | |
2080 | + prefetch_trace.buffer_used); | |
2081 | + prefetch_trace.buffer_used += sizeof(struct prefetch_trace_record); | |
2082 | + | |
2083 | + record->device = device; | |
2084 | + record->inode_no = inode_no; | |
2085 | + record->range_start = range_start; | |
2086 | + record->range_length = range_length; | |
2087 | + spin_unlock(&prefetch_trace.prefetch_trace_lock); | |
2088 | +} | |
2089 | + | |
2090 | +#define IVEC_SIZE (PAGE_SIZE / sizeof(struct inode *)) | |
2091 | + | |
2092 | +/* | |
2093 | + * Full: there are more data following. | |
2094 | + */ | |
2095 | +static int ivec_full(struct inode_walk_session *s) | |
2096 | +{ | |
2097 | + return !s->ivec.cursor || | |
2098 | + s->ivec.cursor > s->ivec.origin + s->ivec.size; | |
2099 | +} | |
2100 | + | |
2101 | +static int ivec_push(struct inode_walk_session *s, struct inode *inode) | |
2102 | +{ | |
2103 | + if (!atomic_read(&inode->i_count)) | |
2104 | + return 0; | |
2105 | + if (!inode->i_mapping) | |
2106 | + return 0; | |
2107 | + | |
2108 | + s->ivec.cursor++; | |
2109 | + | |
2110 | + if (s->ivec.size >= IVEC_SIZE) | |
2111 | + return 1; | |
2112 | + | |
2113 | + if (s->ivec.cursor > s->ivec.origin) | |
2114 | + s->ivec.inodes[s->ivec.size++] = inode; | |
2115 | + return 0; | |
2116 | +} | |
2117 | + | |
2118 | +/* | |
2119 | + * Travease the inode lists in order - newest first. | |
2120 | + * And fill @s->ivec.inodes with inodes positioned in [@pos, @pos+IVEC_SIZE). | |
2121 | + */ | |
2122 | +static int ivec_fill(struct inode_walk_session *s, unsigned long pos) | |
2123 | +{ | |
2124 | + struct inode *inode; | |
2125 | + struct super_block *sb; | |
2126 | + | |
2127 | + s->ivec.origin = pos; | |
2128 | + s->ivec.cursor = 0; | |
2129 | + s->ivec.size = 0; | |
2130 | + | |
2131 | + /* | |
2132 | + * We have a cursor inode, clean and expected to be unchanged. | |
2133 | + */ | |
2134 | + if (s->icur.inode && pos >= s->icur.pos && | |
2135 | + !(s->icur.i_state & I_DIRTY) && | |
2136 | + s->icur.i_state == s->icur.inode->i_state) { | |
2137 | + inode = s->icur.inode; | |
2138 | + s->ivec.cursor = s->icur.pos; | |
2139 | + goto continue_from_saved; | |
2140 | + } | |
2141 | + | |
2142 | + spin_lock(&sb_lock); | |
2143 | + list_for_each_entry(sb, &super_blocks, s_list) { | |
2144 | + list_for_each_entry(inode, &sb->s_dirty, i_list) { | |
2145 | + if (ivec_push(s, inode)) | |
2146 | + goto out_full_unlock; | |
2147 | + } | |
2148 | + list_for_each_entry(inode, &sb->s_io, i_list) { | |
2149 | + if (ivec_push(s, inode)) | |
2150 | + goto out_full_unlock; | |
2151 | + } | |
2152 | + } | |
2153 | + spin_unlock(&sb_lock); | |
2154 | + | |
2155 | + list_for_each_entry(inode, &inode_in_use, i_list) { | |
2156 | + if (ivec_push(s, inode)) | |
2157 | + goto out_full; | |
2158 | + continue_from_saved: | |
2159 | + ; | |
2160 | + } | |
2161 | + | |
2162 | + list_for_each_entry(inode, &inode_unused, i_list) { | |
2163 | + if (ivec_push(s, inode)) | |
2164 | + goto out_full; | |
2165 | + } | |
2166 | + | |
2167 | + return 0; | |
2168 | + | |
2169 | + out_full_unlock: | |
2170 | + spin_unlock(&sb_lock); | |
2171 | + out_full: | |
2172 | + return 1; | |
2173 | +} | |
2174 | + | |
2175 | +static struct inode *ivec_inode(struct inode_walk_session *s, unsigned long pos) | |
2176 | +{ | |
2177 | + if ((ivec_full(s) && pos >= s->ivec.origin + s->ivec.size) | |
2178 | + || pos < s->ivec.origin) | |
2179 | + ivec_fill(s, pos); | |
2180 | + | |
2181 | + if (pos >= s->ivec.cursor) | |
2182 | + return NULL; | |
2183 | + | |
2184 | + s->icur.pos = pos; | |
2185 | + s->icur.inode = s->ivec.inodes[pos - s->ivec.origin]; | |
2186 | + return s->icur.inode; | |
2187 | +} | |
2188 | + | |
2189 | +static void add_referenced_page_range(struct inode_walk_session *s, | |
2190 | + struct address_space *mapping, | |
2191 | + pgoff_t start, pgoff_t len) | |
2192 | +{ | |
2193 | + struct inode *inode; | |
2194 | + | |
2195 | + s->pages_referenced += len; | |
2196 | + s->page_blocks++; | |
2197 | + if (!clearing_in_progress) { | |
2198 | + inode = mapping->host; | |
2199 | + if (inode && inode->i_sb && inode->i_sb->s_bdev) | |
2200 | + prefetch_trace_add(inode->i_sb->s_bdev->bd_dev, | |
2201 | + inode->i_ino, start, len); | |
2202 | + } | |
2203 | +} | |
2204 | + | |
2205 | +/** | |
2206 | + Add page to trace if it was referenced. | |
2207 | + | |
2208 | + NOTE: spinlock might be held while this function is called. | |
2209 | +*/ | |
2210 | +void prefetch_add_page_to_trace(struct page *page) | |
2211 | +{ | |
2212 | + struct address_space *mapping; | |
2213 | + struct inode *inode; | |
2214 | + | |
2215 | + /*if not tracing, nothing to be done */ | |
2216 | + if (atomic_read(&prefetch_trace.tracers_count) <= 0) | |
2217 | + return; | |
2218 | + | |
2219 | + /*if page was not touched */ | |
2220 | + if (!PageReferenced(page)) | |
2221 | + return; | |
2222 | + | |
2223 | + /*swap pages are not interesting */ | |
2224 | + if (PageSwapCache(page)) | |
2225 | + return; | |
2226 | + | |
2227 | + /*no locking, just stats */ | |
2228 | + prefetch_trace.page_release_traced++; | |
2229 | + | |
2230 | + mapping = page_mapping(page); | |
2231 | + | |
2232 | + inode = mapping->host; | |
2233 | + if (inode && inode->i_sb && inode->i_sb->s_bdev) | |
2234 | + prefetch_trace_add(inode->i_sb->s_bdev->bd_dev, inode->i_ino, | |
2235 | + page_index(page), 1); | |
2236 | +} | |
2237 | + | |
2238 | +/** | |
2239 | + Hook called when page is about to be freed, so we have to check | |
2240 | + if it was referenced, as inode walk will not notice it. | |
2241 | + | |
2242 | + NOTE: spinlock is held while this function is called. | |
2243 | +*/ | |
2244 | +void prefetch_page_release_hook(struct page *page) | |
2245 | +{ | |
2246 | + prefetch_add_page_to_trace(page); | |
2247 | +} | |
2248 | + | |
2249 | +static void walk_file_cache(struct inode_walk_session *s, | |
2250 | + struct address_space *mapping) | |
2251 | +{ | |
2252 | + int i; | |
2253 | + pgoff_t len = 0; | |
2254 | + struct pagevec pvec; | |
2255 | + struct page *page; | |
2256 | + struct page *page0 = NULL; | |
2257 | + int current_page_referenced = 0; | |
2258 | + int previous_page_referenced = 0; | |
2259 | + pgoff_t start = 0; | |
2260 | + | |
2261 | + for (;;) { | |
2262 | + pagevec_init(&pvec, 0); | |
2263 | + pvec.nr = radix_tree_gang_lookup(&mapping->page_tree, | |
2264 | + (void **)pvec.pages, | |
2265 | + start + len, PAGEVEC_SIZE); | |
2266 | + | |
2267 | + if (pvec.nr == 0) { | |
2268 | + /*no more pages present | |
2269 | + add the last range, if present */ | |
2270 | + if (previous_page_referenced) | |
2271 | + add_referenced_page_range(s, mapping, start, | |
2272 | + len); | |
2273 | + goto out; | |
2274 | + } | |
2275 | + | |
2276 | + if (!page0) { | |
2277 | + page0 = pvec.pages[0]; | |
2278 | + previous_page_referenced = PageReferenced(page0); | |
2279 | + } | |
2280 | + | |
2281 | + for (i = 0; i < pvec.nr; i++) { | |
2282 | + | |
2283 | + page = pvec.pages[i]; | |
2284 | + current_page_referenced = TestClearPageReferenced(page); | |
2285 | + | |
2286 | + s->pages_walked++; | |
2287 | + | |
2288 | + if (page->index == start + len | |
2289 | + && previous_page_referenced == | |
2290 | + current_page_referenced) | |
2291 | + len++; | |
2292 | + else { | |
2293 | + if (previous_page_referenced) | |
2294 | + add_referenced_page_range(s, mapping, | |
2295 | + start, len); | |
2296 | + | |
2297 | + page0 = page; | |
2298 | + start = page->index; | |
2299 | + len = 1; | |
2300 | + } | |
2301 | + previous_page_referenced = current_page_referenced; | |
2302 | + } | |
2303 | + } | |
2304 | + | |
2305 | + out: | |
2306 | + return; | |
2307 | +} | |
2308 | + | |
2309 | +static void show_inode(struct inode_walk_session *s, struct inode *inode) | |
2310 | +{ | |
2311 | + ++s->inodes_walked; /*just for stats, so not using atomic_inc() */ | |
2312 | + | |
2313 | + if (inode->i_mapping) | |
2314 | + walk_file_cache(s, inode->i_mapping); | |
2315 | +} | |
2316 | + | |
2317 | +/** | |
2318 | + Allocates memory for trace buffer. | |
2319 | + This memory should be freed using free_trace_buffer(). | |
2320 | +*/ | |
2321 | +void *alloc_trace_buffer(int len) | |
2322 | +{ | |
2323 | + return (void *)__get_free_pages(GFP_KERNEL, get_order(len)); | |
2324 | +} | |
2325 | + | |
2326 | +EXPORT_SYMBOL(alloc_trace_buffer); | |
2327 | + | |
2328 | +/** | |
2329 | + Frees memory allocated using alloc_trace_buffer(). | |
2330 | +*/ | |
2331 | +void free_trace_buffer(void *buffer, int len) | |
2332 | +{ | |
2333 | + free_pages((unsigned long)buffer, get_order(len)); | |
2334 | +} | |
2335 | + | |
2336 | +EXPORT_SYMBOL(free_trace_buffer); | |
2337 | + | |
2338 | +/*NOTE: this function is called with inode_lock spinlock held*/ | |
2339 | +static int inode_walk_show(struct inode_walk_session *s, loff_t pos) | |
2340 | +{ | |
2341 | + unsigned long index = pos; | |
2342 | + struct inode *inode; | |
2343 | + | |
2344 | + inode = ivec_inode(s, index); | |
2345 | + BUG_ON(!inode); | |
2346 | + show_inode(s, inode); | |
2347 | + | |
2348 | + return 0; | |
2349 | +} | |
2350 | + | |
2351 | +static void *inode_walk_start(struct inode_walk_session *s, loff_t * pos) | |
2352 | +{ | |
2353 | + s->ivec.inodes = (struct inode **)__get_free_page(GFP_KERNEL); | |
2354 | + if (!s->ivec.inodes) | |
2355 | + return NULL; | |
2356 | + s->ivec.size = 0; | |
2357 | + | |
2358 | + spin_lock(&inode_lock); | |
2359 | + | |
2360 | + BUG_ON(s->icur.pinned_inode); | |
2361 | + s->icur.pinned_inode = s->icur.inode; | |
2362 | + return ivec_inode(s, *pos) ? pos : NULL; | |
2363 | +} | |
2364 | + | |
2365 | +static void inode_walk_stop(struct inode_walk_session *s) | |
2366 | +{ | |
2367 | + if (s->icur.inode) { | |
2368 | + __iget(s->icur.inode); | |
2369 | + s->icur.i_state = s->icur.inode->i_state; | |
2370 | + } | |
2371 | + | |
2372 | + spin_unlock(&inode_lock); | |
2373 | + free_page((unsigned long)s->ivec.inodes); | |
2374 | + | |
2375 | + if (s->icur.pinned_inode) { | |
2376 | + iput(s->icur.pinned_inode); | |
2377 | + s->icur.pinned_inode = NULL; | |
2378 | + } | |
2379 | +} | |
2380 | + | |
2381 | +/*NOTE: this function is called with inode_lock spinlock held*/ | |
2382 | +static void *inode_walk_next(struct inode_walk_session *s, loff_t * pos) | |
2383 | +{ | |
2384 | + (*pos)++; | |
2385 | + | |
2386 | + return ivec_inode(s, *pos) ? pos : NULL; | |
2387 | +} | |
2388 | + | |
2389 | +static struct inode_walk_session *inode_walk_session_create(void) | |
2390 | +{ | |
2391 | + struct inode_walk_session *s; | |
2392 | + int err = 0; | |
2393 | + | |
2394 | + s = kzalloc(sizeof(*s), GFP_KERNEL); | |
2395 | + if (!s) | |
2396 | + err = -ENOMEM; | |
2397 | + | |
2398 | + return err ? ERR_PTR(err) : s; | |
2399 | +} | |
2400 | + | |
2401 | +static void inode_walk_session_release(struct inode_walk_session *s) | |
2402 | +{ | |
2403 | + if (s->icur.inode) | |
2404 | + iput(s->icur.inode); | |
2405 | + kfree(s); | |
2406 | +} | |
2407 | + | |
2408 | +/** | |
2409 | + * Prints message followed by marker. | |
2410 | +*/ | |
2411 | +void print_marker(char *msg, struct trace_marker marker) | |
2412 | +{ | |
2413 | + printk("%s %u.%u\n", msg, marker.generation, marker.position); | |
2414 | +} | |
2415 | + | |
2416 | +EXPORT_SYMBOL(print_marker); | |
2417 | + | |
2418 | +/** | |
2419 | + Returns current trace marker. | |
2420 | + Note: marker ranges are open on the right side, i.e. | |
2421 | + [start_marker, end_marker) | |
2422 | +*/ | |
2423 | +static struct trace_marker get_trace_marker(void) | |
2424 | +{ | |
2425 | + struct trace_marker marker; | |
2426 | + | |
2427 | + spin_lock(&prefetch_trace.prefetch_trace_lock); | |
2428 | + marker.position = prefetch_trace.buffer_used; | |
2429 | + marker.generation = prefetch_trace.generation; | |
2430 | + spin_unlock(&prefetch_trace.prefetch_trace_lock); | |
2431 | + | |
2432 | + return marker; | |
2433 | +} | |
2434 | + | |
2435 | +/** | |
2436 | + Returns size of prefetch trace between start and end marker. | |
2437 | + Returns <0 if error occurs. | |
2438 | +*/ | |
2439 | +int prefetch_trace_fragment_size(struct trace_marker start_marker, | |
2440 | + struct trace_marker end_marker) | |
2441 | +{ | |
2442 | + if (start_marker.generation != end_marker.generation) | |
2443 | + return -EINVAL; /*trace must have wrapped around and trace is no longer available */ | |
2444 | + if (end_marker.position < start_marker.position) | |
2445 | + return -ERANGE; /*invalid markers */ | |
2446 | + | |
2447 | + return end_marker.position - start_marker.position; | |
2448 | +} | |
2449 | + | |
2450 | +EXPORT_SYMBOL(prefetch_trace_fragment_size); | |
2451 | + | |
2452 | +/** | |
2453 | + Returns position in trace buffer for given marker. | |
2454 | + prefetch_trace_lock spinlock must be held when calling this function. | |
2455 | + Returns < 0 in case of error. | |
2456 | + Returns -ENOSPC if this marker is not in buffer. | |
2457 | + Note: marker ranges are open on right side, so this position | |
2458 | + might point to first byte after the buffer for end markers. | |
2459 | +*/ | |
2460 | +static int trace_marker_position_in_buffer(struct trace_marker marker) | |
2461 | +{ | |
2462 | + if (marker.generation != prefetch_trace.generation) | |
2463 | + return -EINVAL; /*trace must have wrapped around and trace is no longer available */ | |
2464 | + | |
2465 | + if (prefetch_trace.buffer_used < marker.position) | |
2466 | + return -ENOSPC; | |
2467 | + | |
2468 | + /*for now simple, not circular buffer */ | |
2469 | + return marker.position; | |
2470 | +} | |
2471 | + | |
2472 | +/** | |
2473 | + Fetches fragment of trace between start marker and end_marker. | |
2474 | + Returns memory (allocated using alloc_trace_buffer()) which holds trace fragment | |
2475 | + or error on @fragment_result in case of success and its size on @fragment_size_result. | |
2476 | + This memory should be freed using free_trace_buffer(). | |
2477 | + If fragment_size == 0, fragment is NULL. | |
2478 | +*/ | |
2479 | +int get_prefetch_trace_fragment(struct trace_marker start_marker, | |
2480 | + struct trace_marker end_marker, | |
2481 | + void **fragment_result, | |
2482 | + int *fragment_size_result) | |
2483 | +{ | |
2484 | + int start_position; | |
2485 | + int end_position; | |
2486 | + int len; | |
2487 | + int ret; | |
2488 | + void *fragment; | |
2489 | + int fragment_size; | |
2490 | + | |
2491 | + fragment_size = prefetch_trace_fragment_size(start_marker, end_marker); | |
2492 | + if (fragment_size < 0) | |
2493 | + return fragment_size; | |
2494 | + if (fragment_size == 0) { | |
2495 | + *fragment_size_result = 0; | |
2496 | + *fragment_result = NULL; | |
2497 | + return 0; | |
2498 | + } | |
2499 | + | |
2500 | + fragment = alloc_trace_buffer(fragment_size); | |
2501 | + if (fragment == NULL) | |
2502 | + return -ENOMEM; | |
2503 | + | |
2504 | + spin_lock(&prefetch_trace.prefetch_trace_lock); | |
2505 | + | |
2506 | + start_position = trace_marker_position_in_buffer(start_marker); | |
2507 | + end_position = trace_marker_position_in_buffer(end_marker); | |
2508 | + | |
2509 | + if (start_position < 0) { | |
2510 | + ret = -ESRCH; | |
2511 | + goto out_free; | |
2512 | + } | |
2513 | + if (end_position < 0) { | |
2514 | + ret = -ESRCH; | |
2515 | + goto out_free; | |
2516 | + } | |
2517 | + | |
2518 | + len = end_position - start_position; | |
2519 | + BUG_ON(len <= 0 || len != fragment_size); | |
2520 | + | |
2521 | + memcpy(fragment, prefetch_trace.buffer + start_position, len); | |
2522 | + | |
2523 | + spin_unlock(&prefetch_trace.prefetch_trace_lock); | |
2524 | + | |
2525 | + *fragment_result = fragment; | |
2526 | + *fragment_size_result = fragment_size; | |
2527 | + return 0; | |
2528 | + | |
2529 | + out_free: | |
2530 | + spin_unlock(&prefetch_trace.prefetch_trace_lock); | |
2531 | + free_trace_buffer(fragment, fragment_size); | |
2532 | + return ret; | |
2533 | +} | |
2534 | + | |
2535 | +EXPORT_SYMBOL(get_prefetch_trace_fragment); | |
2536 | + | |
2537 | +struct file *kernel_open(char const *file_name, int flags, int mode) | |
2538 | +{ | |
2539 | + int orig_fsuid = current->fsuid; | |
2540 | + int orig_fsgid = current->fsgid; | |
2541 | + struct file *file = NULL; | |
2542 | +#if BITS_PER_LONG != 32 | |
2543 | + flags |= O_LARGEFILE; | |
2544 | +#endif | |
2545 | + current->fsuid = 0; | |
2546 | + current->fsgid = 0; | |
2547 | + | |
2548 | + file = filp_open(file_name, flags, mode); | |
2549 | + current->fsuid = orig_fsuid; | |
2550 | + current->fsgid = orig_fsgid; | |
2551 | + return file; | |
2552 | +} | |
2553 | + | |
2554 | +int kernel_close(struct file *file) | |
2555 | +{ | |
2556 | + if (file->f_op && file->f_op->flush) { | |
2557 | + file->f_op->flush(file, current->files); | |
2558 | + } | |
2559 | + fput(file); | |
2560 | + | |
2561 | + return 0; /*no errors known for now */ | |
2562 | +} | |
2563 | + | |
2564 | +int kernel_write(struct file *file, unsigned long offset, const char *addr, | |
2565 | + unsigned long count) | |
2566 | +{ | |
2567 | + mm_segment_t old_fs; | |
2568 | + loff_t pos = offset; | |
2569 | + int result = -ENOSYS; | |
2570 | + | |
2571 | + if (!file->f_op->write) | |
2572 | + goto fail; | |
2573 | + old_fs = get_fs(); | |
2574 | + set_fs(get_ds()); | |
2575 | + result = file->f_op->write(file, addr, count, &pos); | |
2576 | + set_fs(old_fs); | |
2577 | + fail: | |
2578 | + return result; | |
2579 | +} | |
2580 | + | |
2581 | +/** | |
2582 | + * Compares 2 traces records and returns -1, 0 or 1, depending on result of comparison. | |
2583 | + * Comparison is lexicographical on device, inode, range_start and range_length (range_length descending). | |
2584 | + */ | |
2585 | +static int trace_cmp(const void *p1, const void *p2) | |
2586 | +{ | |
2587 | + struct prefetch_trace_record *r1 = (struct prefetch_trace_record *)p1; | |
2588 | + struct prefetch_trace_record *r2 = (struct prefetch_trace_record *)p2; | |
2589 | + | |
2590 | + if (r1->device < r2->device) | |
2591 | + return -1; | |
2592 | + if (r1->device > r2->device) | |
2593 | + return 1; | |
2594 | + | |
2595 | + if (r1->inode_no < r2->inode_no) | |
2596 | + return -1; | |
2597 | + if (r1->inode_no > r2->inode_no) | |
2598 | + return 1; | |
2599 | + | |
2600 | + if (r1->range_start < r2->range_start) | |
2601 | + return -1; | |
2602 | + if (r1->range_start > r2->range_start) | |
2603 | + return 1; | |
2604 | + | |
2605 | + /*longer range_length is preferred as we want to fetch large fragments first */ | |
2606 | + if (r1->range_length < r2->range_length) | |
2607 | + return 1; | |
2608 | + if (r1->range_length > r2->range_length) | |
2609 | + return -1; | |
2610 | + return 0; | |
2611 | +} | |
2612 | + | |
2613 | +/** | |
2614 | + * Sorts trace fragment by device, inode and start. | |
2615 | +*/ | |
2616 | +void sort_trace_fragment(void *trace, int trace_size) | |
2617 | +{ | |
2618 | + sort(trace, trace_size / sizeof(struct prefetch_trace_record), | |
2619 | + sizeof(struct prefetch_trace_record), trace_cmp, NULL); | |
2620 | +} | |
2621 | + | |
2622 | +EXPORT_SYMBOL(sort_trace_fragment); | |
2623 | + | |
2624 | +/** | |
2625 | + * Saves trace fragment from buffer @trace_buffer of size @trace_size into file @filename. | |
2626 | + * Returns 0, if success <0 if error (with error code). | |
2627 | +*/ | |
2628 | +int prefetch_save_trace_fragment(char *filename, | |
2629 | + void *fragment, int fragment_size) | |
2630 | +{ | |
2631 | + int ret = 0; | |
2632 | + int written = 0; | |
2633 | + struct file *file; | |
2634 | + struct prefetch_trace_header header; | |
2635 | + int data_start = 0; | |
2636 | + | |
2637 | + file = kernel_open(filename, O_CREAT | O_TRUNC | O_RDWR, 0600); | |
2638 | + | |
2639 | + if (IS_ERR(file)) { | |
2640 | + ret = PTR_ERR(file); | |
2641 | + printk(KERN_WARNING | |
2642 | + "Cannot open file %s for writing to save trace, error=%d\n", | |
2643 | + filename, ret); | |
2644 | + goto out; | |
2645 | + } | |
2646 | + | |
2647 | + data_start = sizeof(header); | |
2648 | + /*copy magic signature */ | |
2649 | + memcpy(&header.magic[0], trace_file_magic, sizeof(header.magic)); | |
2650 | + header.version_major = PREFETCH_FORMAT_VERSION_MAJOR; | |
2651 | + header.version_minor = PREFETCH_FORMAT_VERSION_MINOR; | |
2652 | + header.data_start = data_start; | |
2653 | + | |
2654 | + ret = kernel_write(file, 0, (char *)&header, sizeof(header)); | |
2655 | + if (ret < 0 || ret != sizeof(header)) { | |
2656 | + printk("Error while writing header to file %s, error=%d\n", | |
2657 | + filename, ret); | |
2658 | + goto out_close; | |
2659 | + } | |
2660 | + | |
2661 | + while (written < fragment_size) { | |
2662 | + ret = | |
2663 | + kernel_write(file, data_start + written, fragment + written, | |
2664 | + fragment_size - written); | |
2665 | + | |
2666 | + if (ret < 0) { | |
2667 | + printk("Error while writing to file %s, error=%d\n", | |
2668 | + filename, ret); | |
2669 | + goto out_close; | |
2670 | + } | |
2671 | + written += ret; | |
2672 | + } | |
2673 | + out_close: | |
2674 | + kernel_close(file); | |
2675 | + out: | |
2676 | + return ret; | |
2677 | +} | |
2678 | + | |
2679 | +EXPORT_SYMBOL(prefetch_save_trace_fragment); | |
2680 | + | |
2681 | +/** | |
2682 | + * Saves trace fragment between @start_marker and @end_marker into file @filename. | |
2683 | + * Returns 0, if success <0 if error (with error code). | |
2684 | +*/ | |
2685 | +int prefetch_save_trace_between_markers(char *filename, | |
2686 | + struct trace_marker start_marker, | |
2687 | + struct trace_marker end_marker) | |
2688 | +{ | |
2689 | + void *fragment = NULL; | |
2690 | + int fragment_size = 0; | |
2691 | + int ret = 0; | |
2692 | + | |
2693 | + ret = get_prefetch_trace_fragment(start_marker, | |
2694 | + end_marker, | |
2695 | + &fragment, &fragment_size); | |
2696 | + | |
2697 | + if (ret < 0) { | |
2698 | + printk(KERN_WARNING | |
2699 | + "Cannot save trace fragment - cannot get trace fragment, error=%d\n", | |
2700 | + ret); | |
2701 | + goto out; | |
2702 | + } | |
2703 | + | |
2704 | + ret = prefetch_save_trace_fragment(filename, fragment, fragment_size); | |
2705 | + if (ret < 0) { | |
2706 | + printk(KERN_WARNING | |
2707 | + "Cannot save trace fragment - error saving file, error=%d\n", | |
2708 | + ret); | |
2709 | + goto out_free; | |
2710 | + } | |
2711 | + | |
2712 | + out_free: | |
2713 | + if (fragment_size > 0) | |
2714 | + free_trace_buffer(fragment, fragment_size); | |
2715 | + out: | |
2716 | + return ret; | |
2717 | +} | |
2718 | + | |
2719 | +EXPORT_SYMBOL(prefetch_save_trace_between_markers); | |
2720 | + | |
2721 | +static int walk_pages(enum tracing_command command, struct trace_marker *marker) | |
2722 | +{ | |
2723 | + void *retptr; | |
2724 | + loff_t pos = 0; | |
2725 | + int ret; | |
2726 | + loff_t next; | |
2727 | + struct inode_walk_session *s; | |
2728 | + int clearing = 0; | |
2729 | + int invalid_trace_counter = 0; | |
2730 | + int report_overflow = 0; | |
2731 | +#ifdef PREFETCH_DEBUG | |
2732 | + struct prefetch_timer walk_pages_timer; | |
2733 | +#endif | |
2734 | + | |
2735 | + spin_lock(&prefetch_trace.prefetch_trace_lock); | |
2736 | + if (prefetch_trace.overflow && !prefetch_trace.overflow_reported) { | |
2737 | + prefetch_trace.overflow_reported = 1; | |
2738 | + report_overflow = 1; | |
2739 | + } | |
2740 | + spin_unlock(&prefetch_trace.prefetch_trace_lock); | |
2741 | + | |
2742 | + if (report_overflow) { | |
2743 | + if (command == STOP_TRACING) { | |
2744 | + if (atomic_dec_return(&prefetch_trace.tracers_count) < | |
2745 | + 0) | |
2746 | + printk(KERN_WARNING | |
2747 | + "Trace counter is invalid\n"); | |
2748 | + } | |
2749 | + printk(KERN_WARNING "Prefetch buffer overflow\n"); | |
2750 | + return -ENOSPC; | |
2751 | + } | |
2752 | + | |
2753 | + s = inode_walk_session_create(); | |
2754 | + if (IS_ERR(s)) { | |
2755 | + retptr = s; | |
2756 | + goto out; | |
2757 | + } | |
2758 | + | |
2759 | + retptr = inode_walk_start(s, &pos); | |
2760 | + | |
2761 | + if (IS_ERR(retptr)) | |
2762 | + goto out_error_session_release; | |
2763 | + | |
2764 | + /*inode_lock spinlock held from here */ | |
2765 | + if (command == START_TRACING) { | |
2766 | + if (atomic_inc_return(&prefetch_trace.tracers_count) == 1) { | |
2767 | + /*prefetch_trace.tracers_count was 0, this is first tracer, so just clear bits */ | |
2768 | + clearing = 1; | |
2769 | + clearing_in_progress = 1; | |
2770 | + *marker = get_trace_marker(); | |
2771 | + } | |
2772 | + } | |
2773 | +#ifdef PREFETCH_DEBUG | |
2774 | + if (!clearing) { | |
2775 | + prefetch_start_timing(&walk_pages_timer, "walk pages"); | |
2776 | + } else | |
2777 | + prefetch_start_timing(&walk_pages_timer, "clearing pages"); | |
2778 | +#endif | |
2779 | + | |
2780 | + while (retptr != NULL) { | |
2781 | + /*FIXME: add lock breaking */ | |
2782 | + ret = inode_walk_show(s, pos); | |
2783 | + if (ret < 0) { | |
2784 | + retptr = ERR_PTR(ret); | |
2785 | + goto out_error; | |
2786 | + } | |
2787 | + | |
2788 | + next = pos; | |
2789 | + retptr = inode_walk_next(s, &next); | |
2790 | + if (IS_ERR(retptr)) | |
2791 | + goto out_error; | |
2792 | + pos = next; | |
2793 | + } | |
2794 | + | |
2795 | + if (command == STOP_TRACING) { | |
2796 | + if (atomic_dec_return(&prefetch_trace.tracers_count) < 0) { | |
2797 | + invalid_trace_counter = 1; | |
2798 | + } | |
2799 | + *marker = get_trace_marker(); | |
2800 | + } else if (command == CONTINUE_TRACING) { | |
2801 | + *marker = get_trace_marker(); | |
2802 | + } | |
2803 | + | |
2804 | + out_error: | |
2805 | + if (clearing) | |
2806 | + clearing_in_progress = 0; | |
2807 | + | |
2808 | + inode_walk_stop(s); | |
2809 | + /*inode_lock spinlock released */ | |
2810 | +#ifdef PREFETCH_DEBUG | |
2811 | + if (clearing) | |
2812 | + printk(KERN_INFO "Clearing run finished\n"); | |
2813 | +#endif | |
2814 | + if (invalid_trace_counter) | |
2815 | + printk(KERN_WARNING "Trace counter is invalid\n"); | |
2816 | + | |
2817 | +#ifdef PREFETCH_DEBUG | |
2818 | + if (!IS_ERR(retptr)) { | |
2819 | + prefetch_end_timing(&walk_pages_timer); | |
2820 | + prefetch_print_timing(&walk_pages_timer); | |
2821 | + printk(KERN_INFO | |
2822 | + "Inodes walked: %d, pages walked: %d, referenced: %d" | |
2823 | + " blocks: %d\n", s->inodes_walked, s->pages_walked, | |
2824 | + s->pages_referenced, s->page_blocks); | |
2825 | + } | |
2826 | +#endif | |
2827 | + | |
2828 | + out_error_session_release: | |
2829 | + inode_walk_session_release(s); | |
2830 | + out: | |
2831 | + return PTR_ERR(retptr); | |
2832 | +} | |
2833 | + | |
2834 | +/** | |
2835 | + Starts tracing, if no error happens returns marker which points to start of trace on @marker. | |
2836 | +*/ | |
2837 | +int prefetch_start_trace(struct trace_marker *marker) | |
2838 | +{ | |
2839 | + int ret; | |
2840 | + if (!enabled) | |
2841 | + return -ENODEV; /*module disabled */ | |
2842 | + | |
2843 | + ret = walk_pages(START_TRACING, marker); | |
2844 | + | |
2845 | + if (ret >= 0) { | |
2846 | + mutex_lock(&prefetch_trace.prefetch_trace_mutex); | |
2847 | + prefetch_trace.trace_users++; | |
2848 | + mutex_unlock(&prefetch_trace.prefetch_trace_mutex); | |
2849 | + } | |
2850 | + return ret; | |
2851 | +} | |
2852 | + | |
2853 | +EXPORT_SYMBOL(prefetch_start_trace); | |
2854 | + | |
2855 | +/** | |
2856 | + Performs interim tracing run, returns marker which points to current place in trace. | |
2857 | +*/ | |
2858 | +int prefetch_continue_trace(struct trace_marker *marker) | |
2859 | +{ | |
2860 | + if (!enabled) | |
2861 | + return -ENODEV; /*module disabled */ | |
2862 | + | |
2863 | + return walk_pages(CONTINUE_TRACING, marker); | |
2864 | +} | |
2865 | + | |
2866 | +EXPORT_SYMBOL(prefetch_continue_trace); | |
2867 | + | |
2868 | +/** | |
2869 | + Stops tracing, returns marker which points to end of trace. | |
2870 | +*/ | |
2871 | +int prefetch_stop_trace(struct trace_marker *marker) | |
2872 | +{ | |
2873 | + if (!enabled) { | |
2874 | + /*trace might have been started when module was enabled */ | |
2875 | + if (atomic_dec_return(&prefetch_trace.tracers_count) < 0) | |
2876 | + printk(KERN_WARNING | |
2877 | + "Trace counter is invalid after decrementing it in disabled module\n"); | |
2878 | + | |
2879 | + return -ENODEV; /*module disabled */ | |
2880 | + } | |
2881 | +#ifdef PREFETCH_DEBUG | |
2882 | + printk(KERN_INFO "Released pages traced: %d\n", | |
2883 | + prefetch_trace.page_release_traced); | |
2884 | +#endif | |
2885 | + return walk_pages(STOP_TRACING, marker); | |
2886 | +} | |
2887 | + | |
2888 | +EXPORT_SYMBOL(prefetch_stop_trace); | |
2889 | + | |
2890 | +/** | |
2891 | + Releases trace up to @end marker. | |
2892 | + Each successful call to prefetch_start_trace() should | |
2893 | + be matched with exactly one call to prefetch_release_trace(). | |
2894 | + NOTE: end_marker is currently not used, but might | |
2895 | + be used in the future to release only part of trace. | |
2896 | +*/ | |
2897 | +int prefetch_release_trace(struct trace_marker end_marker) | |
2898 | +{ | |
2899 | + mutex_lock(&prefetch_trace.prefetch_trace_mutex); | |
2900 | + | |
2901 | + prefetch_trace.trace_users--; | |
2902 | + if (prefetch_trace.trace_users == 0) | |
2903 | + clear_trace(); | |
2904 | + if (prefetch_trace.trace_users < 0) | |
2905 | + printk(KERN_WARNING "Trace users count is invalid, count=%d\n", | |
2906 | + prefetch_trace.trace_users); | |
2907 | + | |
2908 | + mutex_unlock(&prefetch_trace.prefetch_trace_mutex); | |
2909 | + | |
2910 | + return 0; | |
2911 | +} | |
2912 | + | |
2913 | +EXPORT_SYMBOL(prefetch_release_trace); | |
2914 | + | |
2915 | +/** | |
2916 | + * Loads trace fragment from @filename. | |
2917 | + * Returns <0 in case of errors. | |
2918 | + * If successful, returns pointer to trace data on @trace_buffer and its size on @trace_size, | |
2919 | + * in such case caller is responsible for freeing the buffer using free_trace_buffer(). | |
2920 | +*/ | |
2921 | +int prefetch_load_trace_fragment(char *filename, void **trace_buffer, | |
2922 | + int *trace_size) | |
2923 | +{ | |
2924 | + struct file *file; | |
2925 | + void *buffer; | |
2926 | + int data_start; | |
2927 | + int data_read = 0; | |
2928 | + int raw_data_size; | |
2929 | + int file_size; | |
2930 | + int ret = 0; | |
2931 | + struct prefetch_trace_header header; | |
2932 | + | |
2933 | + file = kernel_open(filename, O_RDONLY, 0600); | |
2934 | + | |
2935 | + if (IS_ERR(file)) { | |
2936 | + ret = PTR_ERR(file); | |
2937 | + printk("Cannot open file %s for reading, error=%d\n", filename, | |
2938 | + ret); | |
2939 | + return ret; | |
2940 | + } | |
2941 | + | |
2942 | + file_size = file->f_mapping->host->i_size; | |
2943 | + | |
2944 | + ret = kernel_read(file, 0, (char *)&header, sizeof(header)); | |
2945 | + | |
2946 | + if (ret < 0 || ret != sizeof(header)) { | |
2947 | + printk(KERN_WARNING | |
2948 | + "Cannot read trace header for trace file %s, error=%d\n", | |
2949 | + filename, ret); | |
2950 | + ret = -EINVAL; | |
2951 | + goto out_close; | |
2952 | + } | |
2953 | + | |
2954 | + if (strncmp | |
2955 | + (&header.magic[0], &trace_file_magic[0], | |
2956 | + sizeof(header.magic)) != 0) { | |
2957 | + printk(KERN_WARNING | |
2958 | + "Trace file %s does not have valid trace file signature\n", | |
2959 | + filename); | |
2960 | + ret = -EINVAL; | |
2961 | + goto out_close; | |
2962 | + } | |
2963 | + | |
2964 | + if (header.version_major != PREFETCH_FORMAT_VERSION_MAJOR) { | |
2965 | + printk(KERN_WARNING | |
2966 | + "Trace file %s has unsupported major version %d\n", | |
2967 | + filename, header.version_major); | |
2968 | + ret = -EINVAL; | |
2969 | + goto out_close; | |
2970 | + } | |
2971 | + data_start = header.data_start; | |
2972 | + if (data_start < sizeof(header)) { | |
2973 | + /*NOTE: exceeding file size is checked implicitely below with raw_data_size check */ | |
2974 | + printk(KERN_WARNING | |
2975 | + "Trace file %s contains invalid data start: %d\n", | |
2976 | + filename, data_start); | |
2977 | + ret = -EINVAL; | |
2978 | + goto out_close; | |
2979 | + } | |
2980 | + | |
2981 | + raw_data_size = file_size - data_start; | |
2982 | + if (raw_data_size < 0) { | |
2983 | + ret = -EINVAL; | |
2984 | + printk(KERN_WARNING "Invalid trace file %s, not loading\n", | |
2985 | + filename); | |
2986 | + goto out_close; | |
2987 | + } | |
2988 | + | |
2989 | + if (raw_data_size == 0) { | |
2990 | + ret = -EINVAL; | |
2991 | + printk(KERN_INFO "Empty trace file %s, not loading\n", | |
2992 | + filename); | |
2993 | + goto out_close; | |
2994 | + } | |
2995 | + | |
2996 | + buffer = alloc_trace_buffer(raw_data_size); | |
2997 | + if (buffer == NULL) { | |
2998 | + printk(KERN_INFO "Cannot allocate memory for trace %s\n", | |
2999 | + filename); | |
3000 | + ret = -ENOMEM; | |
3001 | + goto out_close; | |
3002 | + } | |
3003 | + | |
3004 | + while (data_read < raw_data_size) { | |
3005 | + ret = | |
3006 | + kernel_read(file, data_start + data_read, | |
3007 | + buffer + data_read, raw_data_size - data_read); | |
3008 | + | |
3009 | + if (ret < 0) { | |
3010 | + printk("Error while reading from file %s, error=%d\n", | |
3011 | + filename, ret); | |
3012 | + goto out_close_free; | |
3013 | + } | |
3014 | + if (ret == 0) { | |
3015 | + printk(KERN_WARNING | |
3016 | + "File too short, data read=%d, expected size=%d\n", | |
3017 | + data_read, raw_data_size); | |
3018 | + break; | |
3019 | + } | |
3020 | + | |
3021 | + data_read += ret; | |
3022 | + } | |
3023 | + | |
3024 | + if (data_read == raw_data_size) { | |
3025 | + *trace_size = raw_data_size; | |
3026 | + *trace_buffer = buffer; | |
3027 | + } else { | |
3028 | + printk(KERN_WARNING | |
3029 | + "Trace file size changed beneath us, cancelling read\n"); | |
3030 | + ret = -ETXTBSY; | |
3031 | + goto out_close_free; | |
3032 | + } | |
3033 | + | |
3034 | + /*everything OK, caller will free the buffer */ | |
3035 | + kernel_close(file); | |
3036 | + return 0; | |
3037 | + | |
3038 | + out_close_free: | |
3039 | + free_trace_buffer(buffer, file_size); | |
3040 | + out_close: | |
3041 | + kernel_close(file); | |
3042 | + return ret; | |
3043 | +} | |
3044 | + | |
3045 | +/** | |
3046 | + * Prefetches files based on trace read from @filename. | |
3047 | +*/ | |
3048 | +int do_prefetch_from_file(char *filename) | |
3049 | +{ | |
3050 | + int ret = 0; | |
3051 | + void *buffer = NULL; | |
3052 | + int buffer_size; | |
3053 | + | |
3054 | + ret = prefetch_load_trace_fragment(filename, &buffer, &buffer_size); | |
3055 | + if (ret < 0) { | |
3056 | + printk(KERN_WARNING "Reading trace file %s failed, error=%d\n", | |
3057 | + filename, ret); | |
3058 | + goto out; | |
3059 | + } | |
3060 | + | |
3061 | + ret = prefetch_start_prefetch(buffer, buffer_size, 0); | |
3062 | + if (ret < 0) { | |
3063 | + printk(KERN_WARNING | |
3064 | + "Prefetching for trace file %s failed, error=%d\n", | |
3065 | + filename, ret); | |
3066 | + goto out_free; | |
3067 | + } | |
3068 | +#ifdef CONFIG_PREFETCH_DEBUG | |
3069 | + printk(KERN_INFO "Prefetch from file %s successful\n", filename); | |
3070 | +#endif | |
3071 | + | |
3072 | + out_free: | |
3073 | + free_trace_buffer(buffer, buffer_size); | |
3074 | + out: | |
3075 | + return ret; | |
3076 | +} | |
3077 | + | |
3078 | +EXPORT_SYMBOL(do_prefetch_from_file); | |
3079 | + | |
3080 | +static void clear_trace(void) | |
3081 | +{ | |
3082 | + void *new_buffer = NULL; | |
3083 | + | |
3084 | +#ifdef PREFETCH_DEBUG | |
3085 | + printk(KERN_INFO "Clearing prefetch trace buffer\n"); | |
3086 | +#endif | |
3087 | + | |
3088 | + spin_lock(&prefetch_trace.prefetch_trace_lock); | |
3089 | + | |
3090 | + if (prefetch_trace.buffer == NULL) { | |
3091 | + spin_unlock(&prefetch_trace.prefetch_trace_lock); | |
3092 | + | |
3093 | + new_buffer = alloc_trace_buffer(prefetch_trace_size()); | |
3094 | + | |
3095 | + if (new_buffer == NULL) { | |
3096 | + printk(KERN_WARNING | |
3097 | + "Cannot allocate memory for trace buffer\n"); | |
3098 | + goto out; | |
3099 | + } | |
3100 | + | |
3101 | + spin_lock(&prefetch_trace.prefetch_trace_lock); | |
3102 | + | |
3103 | + if (prefetch_trace.buffer != NULL) { | |
3104 | + /*someone already allocated it */ | |
3105 | + free_trace_buffer(new_buffer, prefetch_trace_size()); | |
3106 | + } else { | |
3107 | + prefetch_trace.buffer = new_buffer; | |
3108 | + prefetch_trace.buffer_size = prefetch_trace_size(); | |
3109 | + } | |
3110 | + } | |
3111 | + /*reset used buffer counter */ | |
3112 | + prefetch_trace.buffer_used = 0; | |
3113 | + prefetch_trace.overflow = 0; | |
3114 | + prefetch_trace.overflow_reported = 0; | |
3115 | + prefetch_trace.page_release_traced = 0; | |
3116 | + prefetch_trace.generation++; /*next generation, markers are not comparable */ | |
3117 | + | |
3118 | + spin_unlock(&prefetch_trace.prefetch_trace_lock); | |
3119 | + out: | |
3120 | + return; | |
3121 | +} | |
3122 | + | |
3123 | +/** | |
3124 | + * Checks if @line is exactly the same as @param_name. | |
3125 | + */ | |
3126 | +int param_match(char *line, char *param_name) | |
3127 | +{ | |
3128 | + if (strcmp(line, param_name) == 0) | |
3129 | + return 1; | |
3130 | + | |
3131 | + return 0; | |
3132 | +} | |
3133 | + | |
3134 | +EXPORT_SYMBOL(param_match); | |
3135 | + | |
3136 | +/** | |
3137 | + * Checks if @line starts with @param_name, not exceeding param_name length for safety. | |
3138 | + */ | |
3139 | +int param_match_prefix(char *line, char *param_name) | |
3140 | +{ | |
3141 | + unsigned param_len = strlen(param_name); | |
3142 | + if (strncmp(line, param_name, param_len) == 0) | |
3143 | + return 1; | |
3144 | + | |
3145 | + return 0; | |
3146 | +} | |
3147 | + | |
3148 | +EXPORT_SYMBOL(param_match_prefix); | |
3149 | + | |
3150 | +ssize_t prefetch_proc_write(struct file * proc_file, const char __user * buffer, | |
3151 | + size_t count, loff_t * ppos) | |
3152 | +{ | |
3153 | + char *name; | |
3154 | + int e = 0; | |
3155 | + | |
3156 | + if (count >= PATH_MAX) | |
3157 | + return -ENAMETOOLONG; | |
3158 | + | |
3159 | + name = kmalloc(count + 1, GFP_KERNEL); | |
3160 | + if (!name) | |
3161 | + return -ENOMEM; | |
3162 | + | |
3163 | + if (copy_from_user(name, buffer, count)) { | |
3164 | + e = -EFAULT; | |
3165 | + goto out; | |
3166 | + } | |
3167 | + | |
3168 | + /* strip the optional newline */ | |
3169 | + if (count && name[count - 1] == '\n') | |
3170 | + name[count - 1] = '\0'; | |
3171 | + else | |
3172 | + name[count] = '\0'; | |
3173 | + | |
3174 | + if (param_match(name, "enable")) { | |
3175 | + printk(KERN_INFO "Prefetch module enabled\n"); | |
3176 | + enabled = 1; | |
3177 | + goto out; | |
3178 | + } | |
3179 | + | |
3180 | + if (param_match(name, "disable")) { | |
3181 | + printk(KERN_INFO "Prefetch module disabled\n"); | |
3182 | + enabled = 0; | |
3183 | + goto out; | |
3184 | + } | |
3185 | + out: | |
3186 | + kfree(name); | |
3187 | + | |
3188 | + return e ? e : count; | |
3189 | +} | |
3190 | + | |
3191 | +static int prefetch_proc_open(struct inode *inode, struct file *proc_file) | |
3192 | +{ | |
3193 | + return 0; | |
3194 | +} | |
3195 | + | |
3196 | +static int prefetch_proc_release(struct inode *inode, struct file *proc_file) | |
3197 | +{ | |
3198 | + return 0; | |
3199 | +} | |
3200 | + | |
3201 | +static struct file_operations proc_prefetch_fops = { | |
3202 | + .owner = THIS_MODULE, | |
3203 | + .open = prefetch_proc_open, | |
3204 | + .release = prefetch_proc_release, | |
3205 | + .write = prefetch_proc_write | |
3206 | +}; | |
3207 | + | |
3208 | +struct proc_dir_entry *prefetch_proc_dir = NULL; | |
3209 | +EXPORT_SYMBOL(prefetch_proc_dir); | |
3210 | + | |
3211 | +static __init int prefetch_core_init(void) | |
3212 | +{ | |
3213 | + struct proc_dir_entry *entry; | |
3214 | + | |
3215 | + mutex_lock(&prefetch_trace.prefetch_trace_mutex); | |
3216 | + clear_trace(); | |
3217 | + mutex_unlock(&prefetch_trace.prefetch_trace_mutex); | |
3218 | + | |
3219 | + prefetch_proc_dir = proc_mkdir("prefetch", NULL); | |
3220 | + | |
3221 | + if (prefetch_proc_dir == NULL) { | |
3222 | + printk(KERN_WARNING | |
3223 | + "Creating prefetch proc directory failed, proc interface will not be available\n"); | |
3224 | + } else { | |
3225 | + entry = create_proc_entry("control", 0600, prefetch_proc_dir); | |
3226 | + if (entry) | |
3227 | + entry->proc_fops = &proc_prefetch_fops; | |
3228 | + } | |
3229 | + | |
3230 | + printk(KERN_INFO "Prefetching core module started, enabled=%d\n", | |
3231 | + enabled); | |
3232 | + | |
3233 | + return 0; | |
3234 | +} | |
3235 | + | |
3236 | +static void prefetch_core_exit(void) | |
3237 | +{ | |
3238 | + remove_proc_entry("control", prefetch_proc_dir); | |
3239 | + remove_proc_entry("prefetch", NULL); /*remove directory */ | |
3240 | +} | |
3241 | + | |
3242 | +MODULE_AUTHOR("Krzysztof Lichota <lichota@mimuw.edu.pl>"); | |
3243 | +MODULE_LICENSE("GPL"); | |
3244 | +MODULE_DESCRIPTION | |
3245 | + ("Prefetching core - functions used for tracing and prefetching by prefetching modules"); | |
3246 | + | |
3247 | +module_init(prefetch_core_init); | |
3248 | +module_exit(prefetch_core_exit); |