]> git.pld-linux.org Git - packages/kernel.git/blame - linux-2.6-prefetch.patch
- conflicts with util-vserver tools with broken vprocunhide
[packages/kernel.git] / linux-2.6-prefetch.patch
CommitLineData
102118f8 1diff --git a/fs/exec.c b/fs/exec.c
2index f9e8f6f..b060dce 100644
3--- a/fs/exec.c
4+++ b/fs/exec.c
5@@ -51,6 +51,7 @@
6 #include <linux/syscalls.h>
7 #include <linux/rmap.h>
8 #include <linux/tsacct_kern.h>
9+#include <linux/prefetch_core.h>
10 #include <linux/cn_proc.h>
11 #include <linux/audit.h>
12 #include <linux/signalfd.h>
13@@ -1167,6 +1168,8 @@ int do_execve(char * filename,
14 if (IS_ERR(file))
15 goto out_kfree;
16
17+ prefetch_exec_hook(filename);
18+
19 sched_exec();
20
21 bprm->p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
22diff --git a/include/linux/prefetch_core.h b/include/linux/prefetch_core.h
23new file mode 100644
24index 0000000..a5fbd56
25--- /dev/null
26+++ b/include/linux/prefetch_core.h
27@@ -0,0 +1,110 @@
28+/*
29+ * Copyright (C) 2007 Krzysztof Lichota <lichota@mimuw.edu.pl>
30+ *
31+ * This is prefetch core - common code used for tracing and saving trace files.
32+ * It is used by prefetching modules, such as boot and app.
33+ */
34+
35+#ifndef _LINUX_PREFETCH_CORE_H
36+#define _LINUX_PREFETCH_CORE_H
37+
38+#include <linux/types.h>
39+#include <linux/mm_types.h>
40+
41+/**
42+ * Trace record, records one range of pages for inode put into trace.
43+*/
44+struct prefetch_trace_record {
45+ dev_t device;
46+ unsigned long inode_no;
47+ pgoff_t range_start;
48+ pgoff_t range_length;
49+};
50+
51+extern char trace_file_magic[4];
52+
53+enum {
54+ PREFETCH_FORMAT_VERSION_MAJOR = 1,
55+ PREFETCH_FORMAT_VERSION_MINOR = 0
56+};
57+
58+/**
59+ * Trace on-disk header.
60+ * Major version is increased with major changes of format.
61+ * If you do not support this format explicitely, do not read other fields.
62+ * Minor version is increased with backward compatible changes and
63+ * you can read other fields and raw data, provided that you read
64+ * trace data from @data_start offset in file.
65+*/
66+struct prefetch_trace_header {
67+ char magic[4]; /*Trace file signature - should contain trace_file_magic */
68+ u16 version_major; /*Major version of trace file format */
69+ u16 version_minor; /*Minor version of trace file format */
70+ u16 data_start; /*Trace raw data start */
71+};
72+
73+struct trace_marker {
74+ unsigned position;
75+ unsigned generation;
76+};
77+
78+int prefetch_start_trace(struct trace_marker *marker);
79+int prefetch_continue_trace(struct trace_marker *marker);
80+int prefetch_stop_trace(struct trace_marker *marker);
81+int prefetch_release_trace(struct trace_marker end_marker);
82+
83+int prefetch_trace_fragment_size(struct trace_marker start_marker,
84+ struct trace_marker end_marker);
85+
86+int get_prefetch_trace_fragment(struct trace_marker start_marker,
87+ struct trace_marker end_marker,
88+ void **fragment_result,
89+ int *fragment_size_result);
90+
91+void *alloc_trace_buffer(int len);
92+void free_trace_buffer(void *buffer, int len);
93+void sort_trace_fragment(void *trace, int trace_size);
94+
95+int prefetch_save_trace_between_markers(char *filename,
96+ struct trace_marker start_marker,
97+ struct trace_marker end_marker);
98+int prefetch_save_trace_fragment(char *filename,
99+ void *trace_buffer, int trace_size);
100+int prefetch_load_trace_fragment(char *filename,
101+ void **trace_buffer, int *trace_size);
102+
103+int prefetch_start_prefetch(void *trace, int trace_size, int async);
104+int do_prefetch_from_file(char *filename);
105+
106+void print_marker(char *msg, struct trace_marker marker);
107+
108+/* Hook for mm page release code */
109+#ifdef CONFIG_PREFETCH_CORE
110+void prefetch_page_release_hook(struct page *page);
111+#else
112+#define prefetch_page_release_hook(param) do {} while (0)
113+#endif
114+
115+struct proc_dir_entry;
116+extern struct proc_dir_entry *prefetch_proc_dir;
117+
118+int param_match(char *line, char *param_name);
119+int param_match_prefix(char *line, char *param_name);
120+
121+/*Auxiliary functions for reading and writing in kernel*/
122+struct file *kernel_open(char const *file_name, int flags, int mode);
123+int kernel_write(struct file *file, unsigned long offset, const char *addr,
124+ unsigned long count);
125+/*NOTE: kernel_read is already available in kernel*/
126+int kernel_close(struct file *file);
127+
128+/* App prefetching hooks */
129+#ifdef CONFIG_PREFETCH_APP
130+void prefetch_exec_hook(char *filename);
131+void prefetch_exit_hook(pid_t pid);
132+#else
133+#define prefetch_exec_hook(param) do {} while (0)
134+#define prefetch_exit_hook(param) do {} while (0)
135+#endif
136+
137+#endif /*_LINUX_PREFETCH_CORE_H*/
138diff --git a/init/Kconfig b/init/Kconfig
139index a9e99f8..df3d532 100644
140--- a/init/Kconfig
141+++ b/init/Kconfig
142@@ -104,6 +104,38 @@ config SWAP
143 for so called swap devices or swap files in your kernel that are
144 used to provide more virtual memory than the actual RAM present
145 in your computer. If unsure say Y.
146+config PREFETCH_CORE
147+ bool "Prefetching support (core)"
148+ default n
149+ depends on MMU && BLOCK && EXPERIMENTAL
150+ select TASK_DELAY_ACCT
151+ help
152+ This option enables core of tracing and prefetching facility
153+ The core provides functions used by real prefetching modules,
154+ so you have to enable one of them as well.
155+config PREFETCH_BOOT
156+ tristate "Boot prefetching support"
157+ default n
158+ depends on PREFETCH_CORE && PROC_FS && EXPERIMENTAL
159+ help
160+ This option enables facility for tracing and prefetching during system boot.
161+ In order to use it you have to install appropriate prefetch init scripts.
162+config PREFETCH_APP
163+ bool "Application prefetching support"
164+ default n
165+ depends on PREFETCH_CORE && PROC_FS && EXPERIMENTAL
166+ help
167+ This option enables facility for tracing and prefetching during application start.
168+ Upon application start tracing is started and after some, configurable time,
169+ tracing is stopped and written to file. Upon next start the files in saved
170+ file are prefetched.
171+config PREFETCH_DEBUG
172+ bool "Prefetching debug interface and debugging facilities"
173+ default n
174+ depends on PREFETCH_CORE && PROC_FS
175+ help
176+ This option enables facilities for testing and debugging tracing and prefetching.
177+ Do not enable on production systems.
178
179 config SYSVIPC
180 bool "System V IPC"
181diff --git a/kernel/exit.c b/kernel/exit.c
182index 5b888c2..c136765 100644
183--- a/kernel/exit.c
184+++ b/kernel/exit.c
185@@ -44,6 +44,7 @@
186 #include <linux/compat.h>
187 #include <linux/pipe_fs_i.h>
188 #include <linux/audit.h> /* for audit_free() */
189+#include <linux/prefetch_core.h>
190 #include <linux/resource.h>
191 #include <linux/blkdev.h>
192 #include <linux/task_io_accounting_ops.h>
193@@ -864,6 +865,8 @@ fastcall NORET_TYPE void do_exit(long co
194 struct task_struct *tsk = current;
195 int group_dead;
196
197+ prefetch_exit_hook(tsk->pid);
198+
199 profile_task_exit(tsk);
200
201 WARN_ON(atomic_read(&tsk->fs_excl));
202diff --git a/mm/Makefile b/mm/Makefile
203index a9148ea..5433e6e 100644
204--- a/mm/Makefile
205+++ b/mm/Makefile
206@@ -31,4 +31,7 @@ obj-$(CONFIG_FS_XIP) += filemap_xip.o
207 obj-$(CONFIG_MIGRATION) += migrate.o
208 obj-$(CONFIG_SMP) += allocpercpu.o
209 obj-$(CONFIG_QUICKLIST) += quicklist.o
210+obj-$(CONFIG_PREFETCH_CORE) += prefetch_core.o
211+obj-$(CONFIG_PREFETCH_BOOT) += prefetch_boot.o
212+obj-$(CONFIG_PREFETCH_APP) += prefetch_app.o
213
214diff --git a/mm/filemap.c b/mm/filemap.c
215index edb1b0b..405487c 100644
216--- a/mm/filemap.c
217+++ b/mm/filemap.c
218@@ -30,6 +30,7 @@
219 #include <linux/security.h>
220 #include <linux/syscalls.h>
221 #include <linux/cpuset.h>
222+#include <linux/prefetch_core.h>
223 #include "filemap.h"
224 #include "internal.h"
225
226@@ -115,7 +116,9 @@ generic_file_direct_IO(int rw, struct ki
227 void __remove_from_page_cache(struct page *page)
228 {
229 struct address_space *mapping = page->mapping;
230-
231+
232+ prefetch_page_release_hook(page);
233+
234 radix_tree_delete(&mapping->page_tree, page->index);
235 page->mapping = NULL;
236 mapping->nrpages--;
237diff --git a/mm/prefetch_app.c b/mm/prefetch_app.c
238new file mode 100644
239index 0000000..b7f3d43
240--- /dev/null
241+++ b/mm/prefetch_app.c
242@@ -0,0 +1,1071 @@
243+/*
244+ * linux/mm/prefetch_app.c
245+ *
246+ * Copyright (C) 2007 Krzysztof Lichota <lichota@mimuw.edu.pl>
247+ *
248+ * This is application tracing and prefetching module. It traces application start
249+ * for specified time, then upon next start it prefetches these files.
250+ *
251+ * This program is free software; you can redistribute it and/or modify
252+ * it under the terms of the GNU General Public License version 2 as
253+ * published by the Free Software Foundation.
254+ */
255+
256+#include <linux/prefetch_core.h>
257+#include <asm/uaccess.h>
258+#include <linux/proc_fs.h>
259+#include <linux/workqueue.h>
260+#include <asm/current.h>
261+#include <linux/sched.h>
262+#include <linux/module.h>
263+#include <linux/crc32.h>
264+#include <linux/sched.h>
265+#include <linux/delayacct.h>
266+#include <linux/seq_file.h>
267+
268+/*Enables/disables whole functionality of the module*/
269+static int enabled = 1;
270+module_param(enabled, bool, 0);
271+MODULE_PARM_DESC(enabled,
272+ "Enables or disables whole app prefetching module functionality (tracing and prefetching)");
273+
274+static int initialized = 0;
275+
276+/*Controls whether prefetching should be done along with tracing.*/
277+static int prefetch_enabled = 1;
278+module_param(prefetch_enabled, bool, 0);
279+MODULE_PARM_DESC(prefetch_enabled,
280+ "Enables or disables prefetching during app start. If disabled, only tracing will be done");
281+
282+/*Size of shortened name, together with hash it should be <=DNAME_INLINE_LEN_MIN*/
283+static int short_name_len = 10;
284+module_param(short_name_len, bool, 0);
285+MODULE_PARM_DESC(short_name_len,
286+ "Length of shortened file name, used to name prefetch file together with hash of whole name");
287+
288+#define DEFAULT_APP_TRACE_FILENAME_TEMPLATE "/.prefetch/%s"
289+static char *filename_template = DEFAULT_APP_TRACE_FILENAME_TEMPLATE;
290+module_param(filename_template, charp, 0);
291+MODULE_PARM_DESC(filename_template,
292+ "Template for application trace name, where trace will be saved and read from. %s will be replaced with name of application and hash. The default is: "
293+ DEFAULT_APP_TRACE_FILENAME_TEMPLATE);
294+
295+/*Size of hashtable for filenames*/
296+static int filename_hashtable_size = 128;
297+module_param(filename_hashtable_size, uint, 0);
298+MODULE_PARM_DESC(filename_hashtable_size, "Size of hashtable for filenames");
299+
300+/**
301+ * Time (in seconds) after which app tracing is stopped.
302+*/
303+static int tracing_timeout = 10;
304+module_param(tracing_timeout, uint, 0);
305+MODULE_PARM_DESC(tracing_timeout,
306+ "Time (in seconds) after which app tracing is stopped");
307+
308+/**
309+ * IO ticks (in centisecs) threshold above which application will be traced and prefetching done.
310+*/
311+static int tracing_ticks_threshold = 200;
312+module_param(tracing_ticks_threshold, uint, 0);
313+MODULE_PARM_DESC(tracing_ticks_threshold,
314+ "IO ticks (in centisecs) threshold above which application will be traced and prefetching done");
315+
316+/**
317+ * Hashtable of apps names blacklisted from tracing/prefetching.
318+ * If filename is on this list, it will not be traced.
319+ * Protected by prefetch_apps_blacklist_mutex.
320+*/
321+struct hlist_head *prefetch_apps_blacklist;
322+DEFINE_MUTEX(prefetch_apps_blacklist_mutex);
323+
324+/**
325+ * Hashtable of apps names which should be traced/prefetched.
326+ * If filename is on this list, it means it has been decided that tracing/prefetching
327+ * should be done for it.
328+ * This list is protected by prefetch_apps_list_mutex.
329+*/
330+struct hlist_head *prefetch_apps_list;
331+DEFINE_MUTEX(prefetch_apps_list_mutex);
332+
333+/**
334+ * Entry in filename hashtable list.
335+*/
336+struct filename_entry {
337+ struct hlist_node entries_list;
338+ char *filename;
339+};
340+
341+struct trace_job;
342+
343+/**
344+ * Entry in traced pids hashtable list.
345+*/
346+struct traced_pid_entry {
347+ struct hlist_node entries_list;
348+ pid_t pid;
349+ struct trace_job *trace_job;
350+};
351+
352+#define TRACED_HASH_SIZE 16
353+/**
354+ * Hashtable of concurrently traced applications.
355+ * The key is pid.
356+ * Protected by traced_pids_mutex.
357+*/
358+struct hlist_head *traced_pids;
359+
360+DEFINE_MUTEX(traced_pids_mutex);
361+
362+/**
363+ * Frees filename entry contents and entry itself.
364+*/
365+void free_filename_entry(struct filename_entry *entry)
366+{
367+ kfree(entry->filename);
368+ kfree(entry);
369+}
370+
371+void __clear_hashtable(struct hlist_head *list, int hashtable_size)
372+{
373+ struct filename_entry *entry;
374+ struct hlist_node *cursor;
375+ struct hlist_node *tmp;
376+ int i;
377+
378+ for (i = 0; i < hashtable_size; ++i) {
379+ hlist_for_each_entry_safe(entry, cursor, tmp, &list[i],
380+ entries_list) {
381+ free_filename_entry(entry);
382+ }
383+ /* clear whole list at once */
384+ INIT_HLIST_HEAD(&list[i]);
385+ }
386+}
387+
388+void clear_hashtable(struct hlist_head *list, int hashtable_size,
389+ struct mutex *mutex)
390+{
391+ mutex_lock(mutex);
392+ __clear_hashtable(list, hashtable_size);
393+ mutex_unlock(mutex);
394+}
395+
396+int initialize_hashtable(struct hlist_head **list, int hashtable_size)
397+{
398+ struct hlist_head *h;
399+ int i;
400+
401+ h = kmalloc(sizeof(struct hlist_head) * hashtable_size, GFP_KERNEL);
402+ if (h == NULL)
403+ return -ENOMEM;
404+
405+ for (i = 0; i < hashtable_size; ++i) {
406+ INIT_HLIST_HEAD(&h[i]);
407+ }
408+
409+ *list = h;
410+ return 0;
411+}
412+
413+u32 filename_hash(char *s)
414+{
415+ return crc32_le(0, s, strlen(s));
416+}
417+
418+static inline unsigned filename_hashtable_index(char *filename)
419+{
420+ return filename_hash(filename) % filename_hashtable_size;
421+}
422+
423+/**
424+ * Checks if filename @filename is in hashtable @list
425+ */
426+int filename_on_list(char *filename, struct hlist_head *list)
427+{
428+ struct filename_entry *entry;
429+ struct hlist_node *cursor;
430+ unsigned hashtable_index = filename_hashtable_index(filename);
431+
432+ hlist_for_each_entry(entry, cursor, &list[hashtable_index],
433+ entries_list) {
434+ if (strcmp(entry->filename, filename) == 0)
435+ return 1;
436+ }
437+ return 0;
438+}
439+
440+/**
441+ * Adds filename @filename to hashtable @list
442+ * Filename contents is copied.
443+ * Proper mutex must be held.
444+ */
445+static int __add_filename_to_list(char *filename, struct hlist_head *list)
446+{
447+ int ret = 0;
448+ struct filename_entry *entry = NULL;
449+ unsigned hashtable_index = filename_hashtable_index(filename);
450+
451+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
452+ if (entry == NULL) {
453+ ret = -ENOMEM;
454+ goto out_error;
455+ }
456+ INIT_HLIST_NODE(&entry->entries_list);
457+
458+ entry->filename = kstrdup(filename, GFP_KERNEL);
459+ if (entry->filename == NULL) {
460+ ret = -ENOMEM;
461+ goto out_error;
462+ }
463+
464+ hlist_add_head(&entry->entries_list, &list[hashtable_index]);
465+
466+ return ret;
467+
468+ out_error:
469+ if (entry != NULL) {
470+ if (entry->filename != NULL)
471+ kfree(entry->filename);
472+ kfree(entry);
473+ }
474+ return ret;
475+}
476+
477+static int add_filename_to_list_unique(char *filename, struct hlist_head *list,
478+ struct mutex *mutex)
479+{
480+ int ret = 0;
481+
482+ mutex_lock(mutex);
483+ if (!filename_on_list(filename, list))
484+ ret = __add_filename_to_list(filename, list);
485+ mutex_unlock(mutex);
486+
487+ return ret;
488+}
489+
490+/**
491+ * Removes filename @filename from hashtable @list
492+ * Frees filename entry and its contents.
493+ * Returns true (non-zero) if entry was found and removed.
494+ */
495+int remove_filename_from_list(char *filename, struct hlist_head *list)
496+{
497+ struct filename_entry *entry;
498+ struct hlist_node *cursor;
499+ unsigned hashtable_index = filename_hashtable_index(filename);
500+
501+ hlist_for_each_entry(entry, cursor, &list[hashtable_index],
502+ entries_list) {
503+ if (strcmp(entry->filename, filename) == 0) {
504+ hlist_del(&entry->entries_list);
505+ free_filename_entry(entry);
506+ return 1;
507+ }
508+ }
509+ return 0;
510+}
511+
512+static inline unsigned traced_pid_hash(pid_t pid)
513+{
514+ return pid % TRACED_HASH_SIZE;
515+}
516+
517+/**
518+ * Adds pid @pid to traced pids with trace job @job.
519+ */
520+int add_traced_pid(pid_t pid, struct trace_job *job,
521+ struct hlist_head *hashtable)
522+{
523+ int ret = 0;
524+ struct traced_pid_entry *entry = NULL;
525+ unsigned hashtable_index = traced_pid_hash(pid);
526+
527+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
528+ if (entry == NULL) {
529+ ret = -ENOMEM;
530+ goto out_error;
531+ }
532+ INIT_HLIST_NODE(&entry->entries_list);
533+ entry->trace_job = job;
534+ entry->pid = pid;
535+
536+ hlist_add_head(&entry->entries_list, &hashtable[hashtable_index]);
537+
538+ return ret;
539+
540+ out_error:
541+ kfree(entry);
542+ return ret;
543+}
544+
545+/**
546+ * Removes trace job for pid @pid.
547+ * Frees entry and its contents.
548+ * Does not free job.
549+ */
550+int remove_traced_pid(pid_t pid, struct hlist_head *hashtable)
551+{
552+ struct traced_pid_entry *entry = NULL;
553+ unsigned hashtable_index = traced_pid_hash(pid);
554+ struct hlist_node *cursor;
555+
556+ hlist_for_each_entry(entry, cursor, &hashtable[hashtable_index],
557+ entries_list) {
558+ if (entry->pid == pid) {
559+ hlist_del(&entry->entries_list);
560+ kfree(entry);
561+ return 1;
562+ }
563+ }
564+ return 0;
565+}
566+
567+struct traced_pid_entry *find_traced_pid(pid_t pid,
568+ struct hlist_head *hashtable)
569+{
570+ struct traced_pid_entry *entry = NULL;
571+ unsigned hashtable_index = traced_pid_hash(pid);
572+ struct hlist_node *cursor;
573+
574+ hlist_for_each_entry(entry, cursor, &hashtable[hashtable_index],
575+ entries_list) {
576+ if (entry->pid == pid)
577+ return entry;
578+ }
579+ return NULL;
580+}
581+
582+/**
583+ Structure describing tracing or monitoring job.
584+*/
585+struct trace_job {
586+ struct delayed_work work;
587+ char *filename;
588+ pid_t pid;
589+ struct trace_marker start_marker;
590+};
591+
592+char *create_trace_filename(char *filename)
593+{
594+ char *basename = NULL;
595+ u32 hash;
596+ int filename_len = strlen(filename);
597+ char *file_name = NULL;
598+ char *short_name = NULL;
599+ char *slash_pos;
600+
601+ hash = crc32_le(0, filename, filename_len);
602+
603+ slash_pos = strrchr(filename, '/');
604+ if (slash_pos == NULL) {
605+ printk(KERN_WARNING "File name does not contain slash\n");
606+ goto out;
607+ }
608+
609+ basename = kmalloc(short_name_len + 1, GFP_KERNEL);
610+
611+ if (basename == NULL) {
612+ printk(KERN_WARNING "Cannot allocate memory for basename\n");
613+ goto out;
614+ }
615+ strncpy(basename, slash_pos + 1, short_name_len);
616+ basename[short_name_len] = '\0';
617+
618+ file_name = kasprintf(GFP_KERNEL, "%s-%x", basename, hash);
619+ if (file_name == NULL) {
620+ printk(KERN_WARNING "Cannot allocate memory for file name\n");
621+ goto out;
622+ }
623+
624+ short_name = kasprintf(GFP_KERNEL, filename_template, file_name);
625+ if (short_name == NULL) {
626+ printk(KERN_WARNING "Cannot allocate memory for short name\n");
627+ goto out;
628+ }
629+
630+ out:
631+ if (file_name != NULL)
632+ kfree(file_name);
633+ if (basename != NULL)
634+ kfree(basename);
635+ return short_name;
636+}
637+
638+static void do_finish_monitoring(struct trace_job *trace_job)
639+{
640+ struct task_struct *process = NULL;
641+ int ticks = -1;
642+
643+ read_lock(&tasklist_lock);
644+ process = find_task_by_pid(trace_job->pid);
645+ if (process != NULL)
646+ ticks = delayacct_blkio_ticks(process);
647+ read_unlock(&tasklist_lock);
648+
649+ if (ticks == -1) {
650+ /* Process was terminated earlier than our timeout, stopping monitoring was handled by exit hook */
651+ goto out;
652+ }
653+
654+ if (ticks > tracing_ticks_threshold) {
655+ /* Add app to tracing list if it does not appear there yet */
656+#ifdef CONFIG_PREFETCH_DEBUG
657+ printk(KERN_INFO
658+ "Application %s qualifies for prefetching, ticks=%d\n",
659+ trace_job->filename, ticks);
660+#endif
661+ mutex_lock(&prefetch_apps_list_mutex);
662+ if (!filename_on_list(trace_job->filename, prefetch_apps_list)) {
663+ __add_filename_to_list(trace_job->filename,
664+ prefetch_apps_list);
665+#ifdef CONFIG_PREFETCH_DEBUG
666+ printk(KERN_INFO
667+ "Added application %s to prefetching list\n",
668+ trace_job->filename);
669+#endif
670+ }
671+ mutex_unlock(&prefetch_apps_list_mutex);
672+ } else {
673+ /* App does not require prefetching, remove app from tracing list if it there */
674+ mutex_lock(&prefetch_apps_list_mutex);
675+ remove_filename_from_list(trace_job->filename,
676+ prefetch_apps_list);
677+ mutex_unlock(&prefetch_apps_list_mutex);
678+ }
679+ out:
680+ return;
681+}
682+
683+static void finish_trace_job(struct trace_job *trace_job)
684+{
685+ mutex_lock(&traced_pids_mutex);
686+ if (!remove_traced_pid(trace_job->pid, traced_pids))
687+ printk(KERN_WARNING
688+ "Did not remove pid %d from traced pids, inconsistency in pids handling, filename for job=%s\n",
689+ trace_job->pid, trace_job->filename);
690+ mutex_unlock(&traced_pids_mutex);
691+
692+ kfree(trace_job->filename);
693+ kfree(trace_job);
694+}
695+
696+static void finish_monitoring(struct work_struct *work)
697+{
698+ struct trace_job *trace_job =
699+ container_of(container_of(work, struct delayed_work, work),
700+ struct trace_job, work);
701+ do_finish_monitoring(trace_job);
702+ finish_trace_job(trace_job);
703+}
704+
705+static void finish_tracing(struct work_struct *work)
706+{
707+ struct trace_marker end_marker;
708+ void *trace_fragment = NULL;
709+ int trace_fragment_size = 0;
710+ int ret;
711+ struct trace_job *trace_job =
712+ container_of(container_of(work, struct delayed_work, work),
713+ struct trace_job, work);
714+ char *trace_filename = NULL;
715+
716+ do_finish_monitoring(trace_job);
717+
718+ ret = prefetch_stop_trace(&end_marker);
719+
720+ if (ret < 0) {
721+ printk(KERN_WARNING "Failed to stop trace for application %s\n",
722+ trace_job->filename);
723+ end_marker = trace_job->start_marker; /*at least this we can do to release as much as possible */
724+ goto out_release;
725+ }
726+
727+ ret = get_prefetch_trace_fragment(trace_job->start_marker,
728+ end_marker,
729+ &trace_fragment,
730+ &trace_fragment_size);
731+ if (ret < 0) {
732+ printk(KERN_WARNING
733+ "Failed to fetch trace fragment for application %s, error=%d\n",
734+ trace_job->filename, ret);
735+ goto out_release;
736+ }
737+
738+ if (trace_fragment_size <= 0) {
739+ printk(KERN_WARNING "Empty trace for application %s\n",
740+ trace_job->filename);
741+ goto out_release;
742+ }
743+
744+ trace_filename = create_trace_filename(trace_job->filename);
745+ if (trace_filename == NULL) {
746+ printk(KERN_WARNING
747+ "Cannot allocate memory for short filename, trace for application %s not saved\n",
748+ trace_job->filename);
749+ goto out_free_release;
750+ }
751+
752+ sort_trace_fragment(trace_fragment, trace_fragment_size);
753+ /*
754+ * NOTE: the race between saving and loading trace is possible, but it should only
755+ * result in reading prefetch file failing or prefetch not done very efficiently.
756+ */
757+ ret =
758+ prefetch_save_trace_fragment(trace_filename, trace_fragment,
759+ trace_fragment_size);
760+ if (ret < 0) {
761+ printk(KERN_WARNING
762+ "Failed to save trace for application %s to file %s, error=%d\n",
763+ trace_job->filename, trace_filename, ret);
764+ goto out_free_release;
765+ }
766+
767+ out_free_release:
768+ free_trace_buffer(trace_fragment, trace_fragment_size);
769+
770+ out_release:
771+ ret = prefetch_release_trace(end_marker);
772+ if (ret < 0)
773+ printk(KERN_WARNING
774+ "Releasing trace for app tracing returned error, error=%d\n",
775+ ret);
776+ if (trace_filename != NULL)
777+ kfree(trace_filename);
778+ finish_trace_job(trace_job);
779+}
780+
781+static int start_tracing_job(char *filename)
782+{
783+ int ret = 0;
784+ struct trace_job *trace_job;
785+
786+ trace_job = kzalloc(sizeof(*trace_job), GFP_KERNEL);
787+
788+ if (trace_job == NULL) {
789+ printk(KERN_WARNING
790+ "Cannot allocate memory to start tracing for app %s\n",
791+ filename);
792+ ret = -ENOMEM;
793+ goto out_error;
794+ }
795+
796+ trace_job->filename = kstrdup(filename, GFP_KERNEL);
797+
798+ if (trace_job->filename == NULL) {
799+ printk(KERN_WARNING
800+ "Cannot allocate memory for filename to start tracing for app %s\n",
801+ filename);
802+ ret = -ENOMEM;
803+ goto out_free;
804+ }
805+
806+ ret = prefetch_start_trace(&trace_job->start_marker);
807+ if (ret < 0) {
808+ printk(KERN_WARNING "Failed to start tracing for app %s\n",
809+ filename);
810+ goto out_free;
811+ }
812+
813+ trace_job->pid = current->pid;
814+
815+ mutex_lock(&traced_pids_mutex);
816+ add_traced_pid(trace_job->pid, trace_job, traced_pids);
817+ mutex_unlock(&traced_pids_mutex);
818+
819+ INIT_DELAYED_WORK(&trace_job->work, finish_tracing);
820+ schedule_delayed_work(&trace_job->work, HZ * tracing_timeout);
821+
822+#ifdef CONFIG_PREFETCH_DEBUG
823+ printk(KERN_INFO "Successfully started tracing for application %s\n",
824+ filename);
825+#endif
826+
827+ return 0;
828+
829+ out_free:
830+ if (trace_job != NULL) {
831+ if (trace_job->filename != NULL)
832+ kfree(trace_job->filename);
833+ kfree(trace_job);
834+ }
835+ out_error:
836+ return ret;
837+}
838+
839+static int start_monitoring_job(char *filename)
840+{
841+ int ret = 0;
842+ struct trace_job *trace_job;
843+
844+ trace_job = kzalloc(sizeof(*trace_job), GFP_KERNEL);
845+
846+ if (trace_job == NULL) {
847+ printk(KERN_WARNING
848+ "Cannot allocate memory to start monitoring for app %s\n",
849+ filename);
850+ ret = -ENOMEM;
851+ goto out_error;
852+ }
853+
854+ trace_job->filename = kstrdup(filename, GFP_KERNEL);
855+
856+ if (trace_job->filename == NULL) {
857+ printk(KERN_WARNING
858+ "Cannot allocate memory for filename to start monitoring for app %s\n",
859+ filename);
860+ ret = -ENOMEM;
861+ goto out_free;
862+ }
863+
864+ trace_job->pid = current->pid;
865+
866+ mutex_lock(&traced_pids_mutex);
867+ add_traced_pid(trace_job->pid, trace_job, traced_pids);
868+ mutex_unlock(&traced_pids_mutex);
869+
870+ INIT_DELAYED_WORK(&trace_job->work, finish_monitoring);
871+ schedule_delayed_work(&trace_job->work, HZ * tracing_timeout);
872+
873+ return 0;
874+
875+ out_free:
876+ if (trace_job != NULL) {
877+ if (trace_job->filename != NULL)
878+ kfree(trace_job->filename);
879+ kfree(trace_job);
880+ }
881+ out_error:
882+ return ret;
883+}
884+
885+int start_app_prefetch(char *filename)
886+{
887+ char *trace_filename = NULL;
888+ int ret = 0;
889+
890+ trace_filename = create_trace_filename(filename);
891+ if (trace_filename == NULL) {
892+ printk(KERN_WARNING
893+ "Cannot allocate memory for short filename, cannot start prefetetching for application %s\n",
894+ filename);
895+ ret = -ENOMEM;
896+ goto out;
897+ }
898+
899+ ret = do_prefetch_from_file(trace_filename);
900+ if (ret < 0) {
901+ printk(KERN_WARNING
902+ "Failed to start prefetching for application %s, error=%d\n",
903+ filename, ret);
904+ goto out_free;
905+ }
906+
907+ out_free:
908+ kfree(trace_filename);
909+
910+ out:
911+ return ret;
912+}
913+
914+void try_app_prefetch(char *filename)
915+{
916+ int app_on_list;
917+
918+ if (!enabled)
919+ return;
920+
921+ mutex_lock(&prefetch_apps_blacklist_mutex);
922+ if (filename_on_list(filename, prefetch_apps_blacklist)) {
923+#ifdef CONFIG_PREFETCH_DEBUG
924+ printk(KERN_INFO
925+ "Not doing tracing nor prefetching for blacklisted file %s\n",
926+ filename);
927+#endif
928+ mutex_unlock(&prefetch_apps_blacklist_mutex);
929+ return;
930+ }
931+ mutex_unlock(&prefetch_apps_blacklist_mutex);
932+
933+ mutex_lock(&prefetch_apps_list_mutex);
934+ app_on_list = filename_on_list(filename, prefetch_apps_list);
935+ mutex_unlock(&prefetch_apps_list_mutex);
936+
937+ if (app_on_list) {
938+ /* Start tracing and schedule end tracing work */
939+ start_tracing_job(filename);
940+
941+ if (prefetch_enabled) {
942+ start_app_prefetch(filename);
943+ }
944+ } else {
945+ start_monitoring_job(filename);
946+ }
947+}
948+
949+void prefetch_exec_hook(char *filename)
950+{
951+ try_app_prefetch(filename);
952+}
953+
954+/**
955+ Prefetch hook for intercepting exit() of process.
956+*/
957+void prefetch_exit_hook(pid_t pid)
958+{
959+ struct traced_pid_entry *entry = NULL;
960+ if (!initialized || !enabled)
961+ return;
962+
963+ mutex_lock(&traced_pids_mutex);
964+ entry = find_traced_pid(pid, traced_pids);
965+ if (entry != NULL)
966+ do_finish_monitoring(entry->trace_job);
967+ mutex_unlock(&traced_pids_mutex);
968+ /*NOTE: job is not cancelled, it will wake up and clean up after itself */
969+}
970+
971+#define PREFETCH_PATH_MAX 512
972+#define PREFETCH_PATH_MAX_S "512"
973+
974+ssize_t app_prefetch_proc_write(struct file *proc_file,
975+ const char __user * buffer, size_t count,
976+ loff_t * ppos)
977+{
978+ char *name;
979+ int e = 0;
980+ int tmp;
981+ int r;
982+ char *s = NULL;
983+
984+ if (count >= PATH_MAX)
985+ return -ENAMETOOLONG;
986+
987+ name = kmalloc(count + 1, GFP_KERNEL);
988+ if (!name)
989+ return -ENOMEM;
990+
991+ if (copy_from_user(name, buffer, count)) {
992+ e = -EFAULT;
993+ goto out;
994+ }
995+
996+ /* strip the optional newline */
997+ if (count && name[count - 1] == '\n')
998+ name[count - 1] = '\0';
999+ else
1000+ name[count] = '\0';
1001+
1002+ if (param_match(name, "prefetch enable")) {
1003+ printk(KERN_INFO "Prefetching for apps enabled\n");
1004+ prefetch_enabled = 1;
1005+ goto out;
1006+ }
1007+
1008+ if (param_match(name, "prefetch disable")) {
1009+ printk(KERN_INFO "Prefetching for apps disabled\n");
1010+ prefetch_enabled = 0;
1011+ goto out;
1012+ }
1013+
1014+ if (param_match(name, "enable")) {
1015+ printk(KERN_INFO "App prefetching module enabled\n");
1016+ enabled = 1;
1017+ goto out;
1018+ }
1019+
1020+ if (param_match(name, "disable")) {
1021+ printk(KERN_INFO "App prefetching module disabled\n");
1022+ enabled = 0;
1023+ goto out;
1024+ }
1025+
1026+ if (param_match_prefix(name, "set tracing timeout")) {
1027+ r = sscanf(name, "set tracing timeout %d", &tmp);
1028+ if (r != 1) {
1029+ e = -EINVAL;
1030+ printk(KERN_WARNING
1031+ "Wrong parameter to set tracing timeout command, command was: %s\n",
1032+ name);
1033+ goto out;
1034+ }
1035+ if (tmp <= 0) {
1036+ e = -EINVAL;
1037+ printk(KERN_WARNING
1038+ "Wrong timeout specified, must be >0, timeout was: %d\n",
1039+ tmp);
1040+ goto out;
1041+ }
1042+ tracing_timeout = tmp;
1043+ printk(KERN_INFO "Set tracing timeout to %d seconds\n",
1044+ tracing_timeout);
1045+ goto out;
1046+ }
1047+
1048+ if (param_match(name, "clear app-list")) {
1049+ clear_hashtable(prefetch_apps_list, filename_hashtable_size,
1050+ &prefetch_apps_list_mutex);
1051+ printk(KERN_INFO "List of traced applications cleared\n");
1052+ goto out;
1053+ }
1054+
1055+ if (param_match_prefix(name, "add app-list")) {
1056+ s = kzalloc(PREFETCH_PATH_MAX + 1, GFP_KERNEL);
1057+ if (s == NULL) {
1058+ printk(KERN_WARNING
1059+ "Cannot allocate memory for path\n");
1060+ e = -ENOMEM;
1061+ goto out;
1062+ }
1063+ r = sscanf(name, "add app-list %" PREFETCH_PATH_MAX_S "s", s);
1064+ if (r != 1) {
1065+ e = -EINVAL;
1066+ printk(KERN_WARNING
1067+ "Wrong parameter to add app-list command, command was: %s\n",
1068+ name);
1069+ } else {
1070+ e = add_filename_to_list_unique(s, prefetch_apps_list,
1071+ &prefetch_apps_list_mutex);
1072+ if (e < 0)
1073+ printk(KERN_WARNING
1074+ "Failed to add application %s to prefetched applications list, error=%d\n",
1075+ s, e);
1076+ }
1077+ kfree(s);
1078+ goto out;
1079+ }
1080+
1081+ if (param_match(name, "clear app-blacklist")) {
1082+ clear_hashtable(prefetch_apps_blacklist,
1083+ filename_hashtable_size,
1084+ &prefetch_apps_blacklist_mutex);
1085+ printk(KERN_INFO "Blacklist of traced applications cleared\n");
1086+ goto out;
1087+ }
1088+
1089+ if (param_match_prefix(name, "add app-blacklist")) {
1090+ s = kzalloc(PREFETCH_PATH_MAX + 1, GFP_KERNEL);
1091+ if (s == NULL) {
1092+ printk(KERN_WARNING
1093+ "Cannot allocate memory for path\n");
1094+ e = -ENOMEM;
1095+ goto out;
1096+ }
1097+
1098+ r = sscanf(name, "add app-blacklist %s", s);
1099+ if (r != 1) {
1100+ e = -EINVAL;
1101+ printk(KERN_WARNING
1102+ "Wrong parameter to add app-blacklist command, command was: %s\n",
1103+ name);
1104+ } else {
1105+ e = add_filename_to_list_unique(s,
1106+ prefetch_apps_blacklist,
1107+ &prefetch_apps_blacklist_mutex);
1108+ if (e < 0)
1109+ printk(KERN_WARNING
1110+ "Failed to add application %s to blacklisted applications list, error=%d\n",
1111+ s, e);
1112+ }
1113+ kfree(s);
1114+ goto out;
1115+ }
1116+ out:
1117+ kfree(name);
1118+
1119+ return e ? e : count;
1120+}
1121+
1122+void seq_print_filename_list(struct seq_file *m, struct hlist_head *list)
1123+{
1124+ struct filename_entry *entry;
1125+ struct hlist_node *cursor;
1126+ int i;
1127+
1128+ for (i = 0; i < filename_hashtable_size; ++i) {
1129+ hlist_for_each_entry(entry, cursor, &list[i], entries_list) {
1130+ seq_printf(m, "%s\n", entry->filename);
1131+ }
1132+ }
1133+}
1134+
1135+static void *app_prefetch_proc_start(struct seq_file *m, loff_t * pos)
1136+{
1137+ if (*pos != 0)
1138+ return NULL;
1139+
1140+ return &tracing_ticks_threshold; /*whatever pointer, must not be NULL */
1141+}
1142+
1143+static void *app_prefetch_proc_next(struct seq_file *m, void *v, loff_t * pos)
1144+{
1145+ return NULL;
1146+}
1147+
1148+static int app_prefetch_proc_show(struct seq_file *m, void *v)
1149+{
1150+ seq_printf(m, "### Traced applications: ###\n");
1151+ mutex_lock(&prefetch_apps_list_mutex);
1152+ seq_print_filename_list(m, prefetch_apps_list);
1153+ mutex_unlock(&prefetch_apps_list_mutex);
1154+
1155+ seq_printf(m, "### Blacklisted applications: ###\n");
1156+ mutex_lock(&prefetch_apps_blacklist_mutex);
1157+ seq_print_filename_list(m, prefetch_apps_blacklist);
1158+ mutex_unlock(&prefetch_apps_blacklist_mutex);
1159+
1160+ return 0;
1161+}
1162+
1163+static void app_prefetch_proc_stop(struct seq_file *m, void *v)
1164+{
1165+}
1166+
1167+struct seq_operations seq_app_prefetch_op = {
1168+ .start = app_prefetch_proc_start,
1169+ .next = app_prefetch_proc_next,
1170+ .stop = app_prefetch_proc_stop,
1171+ .show = app_prefetch_proc_show,
1172+};
1173+
1174+static int app_prefetch_proc_open(struct inode *inode, struct file *proc_file)
1175+{
1176+ return seq_open(proc_file, &seq_app_prefetch_op);
1177+}
1178+
1179+static int app_prefetch_proc_release(struct inode *inode,
1180+ struct file *proc_file)
1181+{
1182+ return seq_release(inode, proc_file);
1183+}
1184+
1185+static struct file_operations proc_app_prefetch_fops = {
1186+ .owner = THIS_MODULE,
1187+ .open = app_prefetch_proc_open,
1188+ .release = app_prefetch_proc_release,
1189+ .write = app_prefetch_proc_write,
1190+ .read = seq_read,
1191+ .llseek = seq_lseek,
1192+};
1193+
1194+static int app_list_show(struct seq_file *m, void *v)
1195+{
1196+ mutex_lock(&prefetch_apps_list_mutex);
1197+ seq_print_filename_list(m, prefetch_apps_list);
1198+ mutex_unlock(&prefetch_apps_list_mutex);
1199+
1200+ return 0;
1201+}
1202+
1203+static int app_list_open(struct inode *inode, struct file *proc_file)
1204+{
1205+ return single_open(proc_file, app_list_show, NULL);
1206+}
1207+
1208+static int app_list_release_generic(struct inode *inode, struct file *proc_file)
1209+{
1210+ return single_release(inode, proc_file);
1211+}
1212+
1213+static struct file_operations proc_app_list_fops = {
1214+ .owner = THIS_MODULE,
1215+ .open = app_list_open,
1216+ .release = app_list_release_generic,
1217+ .read = seq_read,
1218+ .llseek = seq_lseek,
1219+};
1220+
1221+static int app_blacklist_show(struct seq_file *m, void *v)
1222+{
1223+ mutex_lock(&prefetch_apps_blacklist_mutex);
1224+ seq_print_filename_list(m, prefetch_apps_blacklist);
1225+ mutex_unlock(&prefetch_apps_blacklist_mutex);
1226+
1227+ return 0;
1228+}
1229+
1230+static int app_blacklist_open(struct inode *inode, struct file *proc_file)
1231+{
1232+ return single_open(proc_file, app_blacklist_show, NULL);
1233+}
1234+
1235+static struct file_operations proc_app_blacklist_fops = {
1236+ .owner = THIS_MODULE,
1237+ .open = app_blacklist_open,
1238+ .release = app_list_release_generic,
1239+ .read = seq_read,
1240+ .llseek = seq_lseek,
1241+};
1242+
1243+static __init int app_prefetch_init(void)
1244+{
1245+ struct proc_dir_entry *entry;
1246+ int ret;
1247+
1248+ /* Initialize hashtables */
1249+ ret =
1250+ initialize_hashtable(&prefetch_apps_blacklist,
1251+ filename_hashtable_size);
1252+ if (ret < 0) {
1253+ printk(KERN_WARNING
1254+ "Cannot initialize app blacklist hashtable, error=%d\n",
1255+ ret);
1256+ goto out_error;
1257+ }
1258+
1259+ ret =
1260+ initialize_hashtable(&prefetch_apps_list, filename_hashtable_size);
1261+ if (ret < 0) {
1262+ printk(KERN_WARNING
1263+ "Cannot initialize app hashtable, error=%d\n", ret);
1264+ goto out_error;
1265+ }
1266+
1267+ ret = initialize_hashtable(&traced_pids, TRACED_HASH_SIZE);
1268+ if (ret < 0) {
1269+ printk(KERN_WARNING
1270+ "Cannot initialize traced pids hashtable, error=%d\n",
1271+ ret);
1272+ goto out_error;
1273+ }
1274+
1275+ if (prefetch_proc_dir == NULL) {
1276+ printk(KERN_WARNING
1277+ "Prefetch proc directory not present, proc interface for app prefetching will not be available\n");
1278+ } else {
1279+ entry = create_proc_entry("app", 0600, prefetch_proc_dir);
1280+ if (entry)
1281+ entry->proc_fops = &proc_app_prefetch_fops;
1282+ entry = create_proc_entry("app-list", 0600, prefetch_proc_dir);
1283+ if (entry)
1284+ entry->proc_fops = &proc_app_list_fops;
1285+ entry =
1286+ create_proc_entry("app-blacklist", 0600, prefetch_proc_dir);
1287+ if (entry)
1288+ entry->proc_fops = &proc_app_blacklist_fops;
1289+ }
1290+
1291+ printk(KERN_INFO
1292+ "App prefetching module started, enabled=%d, prefetching=%d\n",
1293+ enabled, prefetch_enabled);
1294+
1295+ initialized = 1;
1296+
1297+ return 0;
1298+
1299+ out_error:
1300+ return ret;
1301+}
1302+
1303+static void app_prefetch_exit(void)
1304+{
1305+ remove_proc_entry("app", prefetch_proc_dir);
1306+}
1307+
1308+MODULE_AUTHOR("Krzysztof Lichota <lichota@mimuw.edu.pl>");
1309+MODULE_LICENSE("GPL");
1310+MODULE_DESCRIPTION("Application tracing and prefetching during startup");
1311+
1312+module_init(app_prefetch_init);
1313+module_exit(app_prefetch_exit);
1314diff --git a/mm/prefetch_boot.c b/mm/prefetch_boot.c
1315new file mode 100644
1316index 0000000..da7f89b
1317--- /dev/null
1318+++ b/mm/prefetch_boot.c
1319@@ -0,0 +1,396 @@
1320+/*
1321+ * linux/mm/prefetch_core.c
1322+ *
1323+ * Copyright (C) 2007 Krzysztof Lichota <lichota@mimuw.edu.pl>
1324+ *
1325+ * This is boot prefetch support implementation.
1326+ * It consists mainly of proc interface, the rest is done by init scripts using proc interface.
1327+ *
1328+ * This program is free software; you can redistribute it and/or modify
1329+ * it under the terms of the GNU General Public License version 2 as
1330+ * published by the Free Software Foundation.
1331+ */
1332+
1333+#include <linux/prefetch_core.h>
1334+#include <linux/kernel.h>
1335+#include <linux/module.h>
1336+#include <linux/limits.h>
1337+#include <asm/uaccess.h>
1338+#include <linux/proc_fs.h>
1339+#include <asm/current.h>
1340+
1341+/*************** Boot tracing **************/
1342+#define DEFAULT_BOOT_TRACE_FILENAME_TEMPLATE "/.prefetch-boot-trace.%s"
1343+static char *filename_template = DEFAULT_BOOT_TRACE_FILENAME_TEMPLATE;
1344+module_param(filename_template, charp, 0);
1345+MODULE_PARM_DESC(filename_template,
1346+ "Template for boot trace name, where trace will be saved and read from. %s will be replaced with name of phase. The default is: "
1347+ DEFAULT_BOOT_TRACE_FILENAME_TEMPLATE);
1348+
1349+/*maximum size of phase name, not including trailing NULL*/
1350+#define PHASE_NAME_MAX 10
1351+/*maximum size as string, keep in sync with PHASE_NAME_MAX*/
1352+#define PHASE_NAME_MAX_S "10"
1353+
1354+/*maximum size of command name, not including trailing NULL*/
1355+#define CMD_NAME_MAX 10
1356+/*maximum size as string, keep in sync with CMD_NAME_MAX*/
1357+#define CMD_NAME_MAX_S "10"
1358+
1359+/*Enables/disables whole functionality of the module*/
1360+static int enabled = 1;
1361+module_param(enabled, bool, 0);
1362+MODULE_PARM_DESC(enabled,
1363+ "Enables or disables whole boot prefetching module functionality (tracing and prefetching)");
1364+
1365+/*Controls whether prefetching should be done along with tracing.*/
1366+static int prefetch_enabled = 1;
1367+module_param(prefetch_enabled, bool, 0);
1368+MODULE_PARM_DESC(prefetch_enabled,
1369+ "Enables or disables prefetching during boot. If disabled, only tracing will be done");
1370+
1371+static struct mutex boot_prefetch_mutex;
1372+/**
1373+ * Phase start marker, protected by boot_prefetch_mutex.
1374+*/
1375+static struct trace_marker boot_start_marker;
1376+static char boot_tracing_phase[PHASE_NAME_MAX + 1] = "init";
1377+static int boot_tracing_running = 0;
1378+
1379+/**
1380+ Saves boot trace fragment for phase @phase_name which
1381+ starts at boot_start_marker and ends at @end_phase_marker.
1382+
1383+ boot_prefetch_mutex must be held while calling this function.
1384+*/
1385+static int prefetch_save_boot_trace(char *phase_name,
1386+ struct trace_marker end_phase_marker)
1387+{
1388+ char *boot_trace_filename = NULL;
1389+ int ret = 0;
1390+
1391+ boot_trace_filename = kasprintf(GFP_KERNEL, filename_template,
1392+ phase_name);
1393+
1394+ if (boot_trace_filename == NULL) {
1395+ printk(KERN_WARNING
1396+ "Cannot allocate memory for trace filename in phase %s\n",
1397+ phase_name);
1398+ ret = -ENOMEM;
1399+ goto out;
1400+ }
1401+ ret = prefetch_save_trace_between_markers(boot_trace_filename,
1402+ boot_start_marker,
1403+ end_phase_marker);
1404+ out:
1405+ if (boot_trace_filename != NULL)
1406+ kfree(boot_trace_filename);
1407+ return ret;
1408+}
1409+
1410+/**
1411+ Starts tracing for given boot phase.
1412+ boot_prefetch_mutex is taken by this function.
1413+*/
1414+int prefetch_start_boot_tracing_phase(char *phase_name)
1415+{
1416+ int r;
1417+ int ret = 0;
1418+ struct trace_marker marker;
1419+
1420+ mutex_lock(&boot_prefetch_mutex);
1421+
1422+ if (boot_tracing_running) {
1423+ /*boot tracing was already running */
1424+ ret = prefetch_continue_trace(&marker);
1425+ if (ret < 0) {
1426+ printk(KERN_WARNING
1427+ "Cannot continue tracing, error=%d\n", ret);
1428+ goto out_unlock;
1429+ }
1430+
1431+ r = prefetch_save_boot_trace(boot_tracing_phase, marker);
1432+ if (r < 0)
1433+ /*NOTE: just warn and continue, prefetching might still succeed and phase has been started */
1434+ printk(KERN_WARNING
1435+ "Saving boot trace failed, phase %s, error=%d\n",
1436+ boot_tracing_phase, r);
1437+
1438+ boot_start_marker = marker;
1439+ } else {
1440+ /*first phase of tracing */
1441+ ret = prefetch_start_trace(&boot_start_marker);
1442+ if (ret < 0) {
1443+ printk(KERN_WARNING "Cannot start tracing, error=%d\n",
1444+ ret);
1445+ goto out_unlock;
1446+ }
1447+ }
1448+
1449+ strncpy(boot_tracing_phase, phase_name, PHASE_NAME_MAX);
1450+ boot_tracing_phase[PHASE_NAME_MAX] = 0;
1451+
1452+ boot_tracing_running = 1;
1453+
1454+#ifdef PREFETCH_DEBUG
1455+ printk(KERN_INFO "Boot command %s, phase %s marker: ", cmd_name,
1456+ phase_name);
1457+ print_marker("Marker: ", boot_start_marker);
1458+#endif
1459+ out_unlock:
1460+ mutex_unlock(&boot_prefetch_mutex);
1461+ return ret;
1462+}
1463+
1464+int prefetch_start_boot_prefetching_phase(char *phase_name)
1465+{
1466+ char *boot_trace_filename = NULL;
1467+ int ret = 0;
1468+ if (!prefetch_enabled) {
1469+ printk(KERN_INFO
1470+ "Prefetching disabled, not starting prefetching for boot phase: %s\n",
1471+ phase_name);
1472+ return 0;
1473+ }
1474+
1475+ boot_trace_filename = kasprintf(GFP_KERNEL, filename_template,
1476+ phase_name);
1477+
1478+ if (boot_trace_filename == NULL) {
1479+ printk(KERN_WARNING
1480+ "Cannot allocate memory for trace filename\n");
1481+ ret = -ENOMEM;
1482+ goto out;
1483+ }
1484+
1485+ printk(KERN_INFO "Starting prefetching for boot phase: %s\n",
1486+ phase_name);
1487+ ret = do_prefetch_from_file(boot_trace_filename);
1488+
1489+ if (ret < 0)
1490+ printk("Failed to prefetch trace from file %s, error=%d\n",
1491+ boot_trace_filename, ret);
1492+
1493+ out:
1494+ if (boot_trace_filename != NULL)
1495+ kfree(boot_trace_filename);
1496+
1497+ return ret;
1498+}
1499+
1500+/**
1501+ Starts next phase of boot.
1502+ Starts tracing. Then, if trace is available, loads it and starts
1503+ prefetch.
1504+ @cmd_name is the name of action, if you want to keep its contents,
1505+ copy it somewhere, as it will be deallocated.
1506+ @phase_name is the name of new phase, if you want to keep its contents,
1507+ copy it somewhere, as it will be deallocated.
1508+*/
1509+static int prefetch_start_boot_phase(char *cmd_name, char *phase_name)
1510+{
1511+ int ret = 0;
1512+ int start_prefetching = 0;
1513+ int start_tracing = 0;
1514+
1515+ if (strcmp(cmd_name, "prefetch") == 0)
1516+ start_prefetching = 1;
1517+ else if (strcmp(cmd_name, "trace") == 0)
1518+ start_tracing = 1;
1519+ else if (strcmp(cmd_name, "both") == 0) {
1520+ start_prefetching = 1;
1521+ start_tracing = 1;
1522+ } else {
1523+ printk(KERN_WARNING
1524+ "Boot prefetch: unknown command: %s for phase %s\n",
1525+ cmd_name, phase_name);
1526+ return -EINVAL;
1527+ }
1528+ if (start_tracing)
1529+ prefetch_start_boot_tracing_phase(phase_name);
1530+
1531+ if (start_prefetching)
1532+ ret = prefetch_start_boot_prefetching_phase(phase_name);
1533+
1534+ return ret;
1535+}
1536+
1537+static int prefetch_stop_boot_tracing(void)
1538+{
1539+ struct trace_marker marker;
1540+ int ret = 0;
1541+ printk(KERN_INFO "Stopping boot tracing and prefetching\n");
1542+
1543+ mutex_lock(&boot_prefetch_mutex);
1544+
1545+ if (!boot_tracing_running) {
1546+ printk
1547+ ("Trying to stop boot tracing although tracing is not running\n");
1548+ ret = -EINVAL;
1549+ goto out_unlock;
1550+ }
1551+
1552+ ret = prefetch_stop_trace(&marker);
1553+ if (ret < 0)
1554+ printk(KERN_WARNING
1555+ "Stopping tracing for boot tracing returned error, error=%d\n",
1556+ ret);
1557+
1558+ boot_tracing_running = 0;
1559+
1560+#ifdef PREFETCH_DEBUG
1561+ print_marker("Boot stop marker: ", marker);
1562+#endif
1563+
1564+ ret = prefetch_save_boot_trace(boot_tracing_phase, marker);
1565+ if (ret < 0) {
1566+ printk(KERN_WARNING
1567+ "Saving final boot trace failed, phase %s, error=%d\n",
1568+ boot_tracing_phase, ret);
1569+ goto out_unlock_release;
1570+ }
1571+
1572+ out_unlock_release:
1573+ ret = prefetch_release_trace(marker);
1574+ if (ret < 0)
1575+ printk(KERN_WARNING
1576+ "Releasing trace for boot tracing returned error, error=%d\n",
1577+ ret);
1578+
1579+ out_unlock:
1580+ mutex_unlock(&boot_prefetch_mutex);
1581+ return ret;
1582+}
1583+
1584+ssize_t boot_prefetch_proc_write(struct file * proc_file,
1585+ const char __user * buffer, size_t count,
1586+ loff_t * ppos)
1587+{
1588+ char *name;
1589+ int e = 0;
1590+ int r;
1591+ char *phase_name;
1592+ char *cmd_name;
1593+
1594+ if (count >= PATH_MAX)
1595+ return -ENAMETOOLONG;
1596+
1597+ name = kmalloc(count + 1, GFP_KERNEL);
1598+ if (!name)
1599+ return -ENOMEM;
1600+
1601+ if (copy_from_user(name, buffer, count)) {
1602+ e = -EFAULT;
1603+ goto out;
1604+ }
1605+
1606+ /* strip the optional newline */
1607+ if (count && name[count - 1] == '\n')
1608+ name[count - 1] = '\0';
1609+ else
1610+ name[count] = '\0';
1611+
1612+ if (param_match(name, "prefetch enable")) {
1613+ printk(KERN_INFO "Prefetching enabled\n");
1614+ prefetch_enabled = 1;
1615+ goto out;
1616+ }
1617+
1618+ if (param_match(name, "prefetch disable")) {
1619+ printk(KERN_INFO "Prefetching disabled\n");
1620+ prefetch_enabled = 0;
1621+ goto out;
1622+ }
1623+
1624+ if (param_match_prefix(name, "start ")) {
1625+ phase_name = kzalloc(PHASE_NAME_MAX + 1, GFP_KERNEL); /*1 for terminating NULL */
1626+ if (phase_name == NULL) {
1627+ printk(KERN_WARNING
1628+ "Cannot allocate memory for phase name\n");
1629+ goto out;
1630+ }
1631+ cmd_name = kzalloc(CMD_NAME_MAX + 1, GFP_KERNEL); /*1 for terminating NULL */
1632+ if (cmd_name == NULL) {
1633+ printk(KERN_WARNING
1634+ "Cannot allocate memory for command name\n");
1635+ goto out;
1636+ }
1637+ r = sscanf(name,
1638+ "start %" CMD_NAME_MAX_S "s phase %" PHASE_NAME_MAX_S
1639+ "s", cmd_name, phase_name);
1640+ if (r != 2) {
1641+ e = -EINVAL;
1642+ printk(KERN_WARNING
1643+ "Wrong parameter to start command, command was: %s\n",
1644+ name);
1645+ kfree(phase_name);
1646+ kfree(cmd_name);
1647+ goto out;
1648+ }
1649+ e = prefetch_start_boot_phase(cmd_name, phase_name);
1650+ kfree(phase_name);
1651+ kfree(cmd_name);
1652+ goto out;
1653+ }
1654+
1655+ if (param_match(name, "boot tracing stop")) {
1656+ e = prefetch_stop_boot_tracing();
1657+ goto out;
1658+ }
1659+ out:
1660+ kfree(name);
1661+
1662+ return e ? e : count;
1663+}
1664+
1665+static int boot_prefetch_proc_open(struct inode *inode, struct file *proc_file)
1666+{
1667+ return 0;
1668+}
1669+
1670+static int boot_prefetch_proc_release(struct inode *inode,
1671+ struct file *proc_file)
1672+{
1673+ return 0;
1674+}
1675+
1676+static struct file_operations proc_boot_prefetch_fops = {
1677+ .owner = THIS_MODULE,
1678+ .open = boot_prefetch_proc_open,
1679+ .release = boot_prefetch_proc_release,
1680+ .write = boot_prefetch_proc_write,
1681+};
1682+
1683+static __init int boot_prefetch_init(void)
1684+{
1685+ struct proc_dir_entry *entry;
1686+
1687+ mutex_init(&boot_prefetch_mutex);
1688+
1689+ if (prefetch_proc_dir == NULL) {
1690+ printk(KERN_WARNING
1691+ "Prefetch proc directory not present, proc interface for boot prefetching will not be available\n");
1692+ } else {
1693+ entry = create_proc_entry("boot", 0600, prefetch_proc_dir);
1694+ if (entry)
1695+ entry->proc_fops = &proc_boot_prefetch_fops;
1696+ }
1697+ printk(KERN_INFO
1698+ "Boot prefetching module started, enabled=%d, prefetching=%d\n",
1699+ enabled, prefetch_enabled);
1700+
1701+ return 0;
1702+}
1703+
1704+static void boot_prefetch_exit(void)
1705+{
1706+ remove_proc_entry("boot", prefetch_proc_dir);
1707+}
1708+
1709+MODULE_AUTHOR("Krzysztof Lichota <lichota@mimuw.edu.pl>");
1710+MODULE_LICENSE("GPL");
1711+MODULE_DESCRIPTION
1712+ ("Boot prefetching - support for tracing and prefetching during system boot");
1713+
1714+module_init(boot_prefetch_init);
1715+module_exit(boot_prefetch_exit);
1716diff --git a/mm/prefetch_core.c b/mm/prefetch_core.c
1717new file mode 100644
1718index 0000000..001470b
1719--- /dev/null
1720+++ b/mm/prefetch_core.c
1721@@ -0,0 +1,1527 @@
1722+/*
1723+ * linux/mm/prefetch_core.c
1724+ *
1725+ * Copyright (C) 2006 Fengguang Wu <wfg@ustc.edu>
1726+ * Copyright (C) 2007 Krzysztof Lichota <lichota@mimuw.edu.pl>
1727+ *
1728+ * This is prefetch core - common code used for tracing and saving trace files.
1729+ * It is used by prefetching modules, such as boot and app.
1730+ *
1731+ * Based on filecache code by Fengguang Wu.
1732+ *
1733+ * This program is free software; you can redistribute it and/or modify
1734+ * it under the terms of the GNU General Public License version 2 as
1735+ * published by the Free Software Foundation.
1736+ */
1737+
1738+#include <linux/prefetch_core.h>
1739+#include <linux/fs.h>
1740+#include <linux/mm.h>
1741+#include <linux/radix-tree.h>
1742+#include <linux/page-flags.h>
1743+#include <linux/pagevec.h>
1744+#include <linux/pagemap.h>
1745+#include <linux/vmalloc.h>
1746+#include <linux/writeback.h>
1747+#include <linux/proc_fs.h>
1748+#include <linux/module.h>
1749+#include <asm/uaccess.h>
1750+#include <linux/spinlock.h>
1751+#include <linux/time.h>
1752+#include <linux/file.h>
1753+#include <linux/delayacct.h>
1754+#include <linux/file.h>
1755+#include <linux/sort.h>
1756+
1757+char trace_file_magic[4] = { 'P', 'F', 'C', 'H' };
1758+
1759+/*Inode walk session*/
1760+struct inode_walk_session {
1761+ int private_session;
1762+ pgoff_t next_offset;
1763+ struct {
1764+ unsigned long cursor;
1765+ unsigned long origin;
1766+ unsigned long size;
1767+ struct inode **inodes;
1768+ } ivec;
1769+ struct {
1770+ unsigned long pos;
1771+ unsigned long i_state;
1772+ struct inode *inode;
1773+ struct inode *pinned_inode;
1774+ } icur;
1775+ int inodes_walked;
1776+ int pages_walked;
1777+ int pages_referenced;
1778+ int page_blocks;
1779+};
1780+
1781+/*Disables/enables the whole module functionality*/
1782+static int enabled = 1;
1783+module_param(enabled, bool, 0);
1784+MODULE_PARM_DESC(enabled,
1785+ "Enables or disables whole prefetching module functionality (tracing and prefetching)");
1786+
1787+#define DEFAULT_TRACE_SIZE_KB 256
1788+
1789+/*NOTE: changing trace size in runtime is not supported - do not do it.*/
1790+unsigned trace_size_kb = DEFAULT_TRACE_SIZE_KB; /*in kilobytes */
1791+module_param(trace_size_kb, uint, 0);
1792+MODULE_PARM_DESC(trace_size_kb,
1793+ "Size of memory allocated for trace (in KB), set to 0 to use default");
1794+
1795+static inline unsigned prefetch_trace_size(void)
1796+{
1797+ if (likely(trace_size_kb > 0))
1798+ return trace_size_kb << 10;
1799+
1800+ /*if set to 0, then use default */
1801+ return DEFAULT_TRACE_SIZE_KB * 1024;
1802+}
1803+
1804+enum tracing_command {
1805+ START_TRACING,
1806+ STOP_TRACING,
1807+ CONTINUE_TRACING
1808+};
1809+
1810+/*Structure holding all information needed for trace*/
1811+struct prefetch_trace_t {
1812+ spinlock_t prefetch_trace_lock;
1813+ unsigned int buffer_used;
1814+ unsigned int buffer_size;
1815+ void *buffer;
1816+ int generation;
1817+ int overflow;
1818+ int overflow_reported;
1819+ /*fields above protected by prefetch_trace_lock */
1820+ int page_release_traced;
1821+ /**
1822+ * Number of traces started and not finished.
1823+ * Used to check if it is necessary to add entries to trace.
1824+ */
1825+ atomic_t tracers_count;
1826+ int trace_users; /*number of trace users, protected by prefetch_trace_mutex */
1827+ struct mutex prefetch_trace_mutex;
1828+};
1829+
1830+struct prefetch_trace_t prefetch_trace = {
1831+ SPIN_LOCK_UNLOCKED, /*prefetch_trace_lock */
1832+ 0, /*buffer_used */
1833+ 0, /*buffer_size */
1834+ NULL, /*buffer */
1835+ 0, /*generation */
1836+ 0, /*overflow */
1837+ 0, /*overflow_reported */
1838+ 0, /*page_release_traced */
1839+ ATOMIC_INIT(0), /*tracers_count */
1840+ 0, /*trace_users */
1841+ __MUTEX_INITIALIZER(prefetch_trace.prefetch_trace_mutex) /*prefetch_trace_mutex */
1842+};
1843+
1844+/**
1845+ Set if walk_pages() decided that it is the start of tracing
1846+ and bits should be cleared, not recorded.
1847+ Using it is protected by inode_lock.
1848+ If lock breaking is enabled, this variable makes sure that
1849+ second caller of walk_pages(START_TRACING) will not
1850+ race with first caller and will not start recording changes.
1851+*/
1852+static int clearing_in_progress = 0;
1853+
1854+/**
1855+ * Timer used for measuring tracing and prefetching time.
1856+*/
1857+struct prefetch_timer {
1858+ struct timespec ts_start;
1859+ struct timespec ts_end;
1860+ char *name;
1861+};
1862+
1863+static void clear_trace(void);
1864+
1865+/**
1866+ * Starts timer.
1867+*/
1868+void prefetch_start_timing(struct prefetch_timer *timer, char *name)
1869+{
1870+ timer->name = name;
1871+ do_posix_clock_monotonic_gettime(&timer->ts_start);
1872+}
1873+
1874+/**
1875+ * Stops timer.
1876+*/
1877+void prefetch_end_timing(struct prefetch_timer *timer)
1878+{
1879+ do_posix_clock_monotonic_gettime(&timer->ts_end);
1880+}
1881+
1882+/**
1883+ * Prints timer name and time duration into kernel log.
1884+*/
1885+void prefetch_print_timing(struct prefetch_timer *timer)
1886+{
1887+ struct timespec ts = timespec_sub(timer->ts_end, timer->ts_start);
1888+ s64 ns = timespec_to_ns(&ts);
1889+
1890+ printk(KERN_INFO "Prefetch timing (%s): %lld ns, %ld.%.9ld\n",
1891+ timer->name, ns, ts.tv_sec, ts.tv_nsec);
1892+}
1893+
1894+struct async_prefetch_params {
1895+ void *trace;
1896+ int trace_size;
1897+};
1898+
1899+static int prefetch_do_prefetch(void *trace, int trace_size);
1900+
1901+static int async_prefetch_thread(void *p)
1902+{
1903+ int ret;
1904+ struct async_prefetch_params *params =
1905+ (struct async_prefetch_params *)p;
1906+#ifdef PREFETCH_DEBUG
1907+ printk(KERN_INFO "Started async prefetch thread\n");
1908+#endif
1909+ ret = prefetch_do_prefetch(params->trace, params->trace_size);
1910+ kfree(params);
1911+ return ret;
1912+}
1913+
1914+static int prefetch_start_prefetch_async(void *trace, int trace_size)
1915+{
1916+ struct async_prefetch_params *params =
1917+ kmalloc(sizeof(struct async_prefetch_params), GFP_KERNEL);
1918+ if (params == NULL)
1919+ return -ENOMEM;
1920+ params->trace = trace;
1921+ params->trace_size = trace_size;
1922+
1923+ if (kernel_thread(async_prefetch_thread, params, 0) < 0) {
1924+ printk(KERN_WARNING "Cannot start async prefetch thread\n");
1925+ return -EINVAL;
1926+ }
1927+ return 0;
1928+}
1929+
1930+static int prefetch_start_prefetch_sync(void *trace, int trace_size)
1931+{
1932+ return prefetch_do_prefetch(trace, trace_size);
1933+}
1934+
1935+/**
1936+ * Starts prefetch based on given @trace, whose length (in bytes) is @trace_size.
1937+ * If async is false, the function will return only after prefetching is finished.
1938+ * Otherwise, prefetching will be started in separate thread and function will
1939+ * return immediately.
1940+*/
1941+int prefetch_start_prefetch(void *trace, int trace_size, int async)
1942+{
1943+ if (async)
1944+ return prefetch_start_prefetch_async(trace, trace_size);
1945+ else
1946+ return prefetch_start_prefetch_sync(trace, trace_size);
1947+}
1948+
1949+EXPORT_SYMBOL(prefetch_start_prefetch);
1950+
1951+static int prefetch_do_prefetch(void *trace, int trace_size)
1952+{
1953+ struct prefetch_trace_record *record = trace;
1954+ struct prefetch_trace_record *prev_record = NULL;
1955+#ifdef PREFETCH_DEBUG
1956+ struct prefetch_timer timer;
1957+#endif
1958+ struct super_block *sb = NULL;
1959+ struct file *file = NULL;
1960+ struct inode *inode = NULL;
1961+ int ret = 0;
1962+ int readaheads_failed = 0;
1963+ int readahead_ret;
1964+
1965+ if (!enabled)
1966+ return -ENODEV; /*module disabled */
1967+
1968+#ifdef PREFETCH_DEBUG
1969+ printk(KERN_INFO "Delay io ticks before prefetching: %d\n",
1970+ (int)delayacct_blkio_ticks(current));
1971+ prefetch_start_timing(&timer, "Prefetching");
1972+#endif
1973+
1974+ for (;
1975+ (void *)(record + sizeof(struct prefetch_trace_record)) <=
1976+ trace + trace_size; prev_record = record, ++record) {
1977+ if (prev_record == NULL
1978+ || prev_record->device != record->device) {
1979+ /*open next device */
1980+ if (sb)
1981+ drop_super(sb);
1982+ sb = user_get_super(record->device);
1983+ }
1984+ if (sb == NULL)
1985+ continue; /*no such device or error getting device */
1986+
1987+ if (prev_record == NULL || prev_record->device != record->device
1988+ || prev_record->inode_no != record->inode_no) {
1989+ /*open next file */
1990+ if (inode)
1991+ iput(inode);
1992+
1993+ inode = iget(sb, record->inode_no);
1994+ if (IS_ERR(inode)) {
1995+ /*no such inode or other error */
1996+ inode = NULL;
1997+ continue;
1998+ }
1999+
2000+ if (file)
2001+ put_filp(file);
2002+
2003+ file = get_empty_filp();
2004+ if (file == NULL) {
2005+ ret = -ENFILE;
2006+ goto out;
2007+ }
2008+ /*only most important file fields filled, ext3_readpages doesn't use it anyway. */
2009+ file->f_op = inode->i_fop;
2010+ file->f_mapping = inode->i_mapping;
2011+ file->f_mode = FMODE_READ;
2012+ file->f_flags = O_RDONLY;
2013+ }
2014+ if (inode == NULL)
2015+ continue;
2016+
2017+ readahead_ret =
2018+ force_page_cache_readahead(inode->i_mapping, file,
2019+ record->range_start,
2020+ record->range_length);
2021+ if (readahead_ret < 0) {
2022+ readaheads_failed++;
2023+#ifdef PREFETCH_DEBUG
2024+ if (readaheads_failed < 10) {
2025+ printk(KERN_WARNING
2026+ "Readahead failed, device=%d:%d, inode=%ld, start=%ld, length=%ld, error=%d\n",
2027+ MAJOR(record->device),
2028+ MINOR(record->device), record->inode_no,
2029+ record->range_start,
2030+ record->range_length, readahead_ret);
2031+ }
2032+ if (readaheads_failed == 10)
2033+ printk(KERN_WARNING
2034+ "Readaheads failed reached limit, not printing next failures\n");
2035+#endif
2036+ }
2037+ }
2038+
2039+ out:
2040+ if (readaheads_failed > 0)
2041+ printk(KERN_INFO "Readaheads not performed: %d\n",
2042+ readaheads_failed);
2043+
2044+ if (sb)
2045+ drop_super(sb);
2046+ if (inode)
2047+ iput(inode);
2048+ if (file)
2049+ put_filp(file);
2050+
2051+#ifdef PREFETCH_DEBUG
2052+ printk(KERN_INFO "Delay io ticks after prefetching: %d\n",
2053+ (int)delayacct_blkio_ticks(current));
2054+ prefetch_end_timing(&timer);
2055+ prefetch_print_timing(&timer);
2056+#endif
2057+ return ret;
2058+}
2059+
2060+/**
2061+ * Adds trace record. Does not sleep.
2062+*/
2063+void prefetch_trace_add(dev_t device,
2064+ unsigned long inode_no,
2065+ pgoff_t range_start, pgoff_t range_length)
2066+{
2067+ struct prefetch_trace_record *record;
2068+
2069+ spin_lock(&prefetch_trace.prefetch_trace_lock);
2070+
2071+ if (prefetch_trace.buffer_used + sizeof(struct prefetch_trace_record) >=
2072+ prefetch_trace.buffer_size) {
2073+ prefetch_trace.overflow = 1;
2074+ spin_unlock(&prefetch_trace.prefetch_trace_lock);
2075+ return;
2076+ }
2077+
2078+ record =
2079+ (struct prefetch_trace_record *)(prefetch_trace.buffer +
2080+ prefetch_trace.buffer_used);
2081+ prefetch_trace.buffer_used += sizeof(struct prefetch_trace_record);
2082+
2083+ record->device = device;
2084+ record->inode_no = inode_no;
2085+ record->range_start = range_start;
2086+ record->range_length = range_length;
2087+ spin_unlock(&prefetch_trace.prefetch_trace_lock);
2088+}
2089+
2090+#define IVEC_SIZE (PAGE_SIZE / sizeof(struct inode *))
2091+
2092+/*
2093+ * Full: there are more data following.
2094+ */
2095+static int ivec_full(struct inode_walk_session *s)
2096+{
2097+ return !s->ivec.cursor ||
2098+ s->ivec.cursor > s->ivec.origin + s->ivec.size;
2099+}
2100+
2101+static int ivec_push(struct inode_walk_session *s, struct inode *inode)
2102+{
2103+ if (!atomic_read(&inode->i_count))
2104+ return 0;
2105+ if (!inode->i_mapping)
2106+ return 0;
2107+
2108+ s->ivec.cursor++;
2109+
2110+ if (s->ivec.size >= IVEC_SIZE)
2111+ return 1;
2112+
2113+ if (s->ivec.cursor > s->ivec.origin)
2114+ s->ivec.inodes[s->ivec.size++] = inode;
2115+ return 0;
2116+}
2117+
2118+/*
2119+ * Travease the inode lists in order - newest first.
2120+ * And fill @s->ivec.inodes with inodes positioned in [@pos, @pos+IVEC_SIZE).
2121+ */
2122+static int ivec_fill(struct inode_walk_session *s, unsigned long pos)
2123+{
2124+ struct inode *inode;
2125+ struct super_block *sb;
2126+
2127+ s->ivec.origin = pos;
2128+ s->ivec.cursor = 0;
2129+ s->ivec.size = 0;
2130+
2131+ /*
2132+ * We have a cursor inode, clean and expected to be unchanged.
2133+ */
2134+ if (s->icur.inode && pos >= s->icur.pos &&
2135+ !(s->icur.i_state & I_DIRTY) &&
2136+ s->icur.i_state == s->icur.inode->i_state) {
2137+ inode = s->icur.inode;
2138+ s->ivec.cursor = s->icur.pos;
2139+ goto continue_from_saved;
2140+ }
2141+
2142+ spin_lock(&sb_lock);
2143+ list_for_each_entry(sb, &super_blocks, s_list) {
2144+ list_for_each_entry(inode, &sb->s_dirty, i_list) {
2145+ if (ivec_push(s, inode))
2146+ goto out_full_unlock;
2147+ }
2148+ list_for_each_entry(inode, &sb->s_io, i_list) {
2149+ if (ivec_push(s, inode))
2150+ goto out_full_unlock;
2151+ }
2152+ }
2153+ spin_unlock(&sb_lock);
2154+
2155+ list_for_each_entry(inode, &inode_in_use, i_list) {
2156+ if (ivec_push(s, inode))
2157+ goto out_full;
2158+ continue_from_saved:
2159+ ;
2160+ }
2161+
2162+ list_for_each_entry(inode, &inode_unused, i_list) {
2163+ if (ivec_push(s, inode))
2164+ goto out_full;
2165+ }
2166+
2167+ return 0;
2168+
2169+ out_full_unlock:
2170+ spin_unlock(&sb_lock);
2171+ out_full:
2172+ return 1;
2173+}
2174+
2175+static struct inode *ivec_inode(struct inode_walk_session *s, unsigned long pos)
2176+{
2177+ if ((ivec_full(s) && pos >= s->ivec.origin + s->ivec.size)
2178+ || pos < s->ivec.origin)
2179+ ivec_fill(s, pos);
2180+
2181+ if (pos >= s->ivec.cursor)
2182+ return NULL;
2183+
2184+ s->icur.pos = pos;
2185+ s->icur.inode = s->ivec.inodes[pos - s->ivec.origin];
2186+ return s->icur.inode;
2187+}
2188+
2189+static void add_referenced_page_range(struct inode_walk_session *s,
2190+ struct address_space *mapping,
2191+ pgoff_t start, pgoff_t len)
2192+{
2193+ struct inode *inode;
2194+
2195+ s->pages_referenced += len;
2196+ s->page_blocks++;
2197+ if (!clearing_in_progress) {
2198+ inode = mapping->host;
2199+ if (inode && inode->i_sb && inode->i_sb->s_bdev)
2200+ prefetch_trace_add(inode->i_sb->s_bdev->bd_dev,
2201+ inode->i_ino, start, len);
2202+ }
2203+}
2204+
2205+/**
2206+ Add page to trace if it was referenced.
2207+
2208+ NOTE: spinlock might be held while this function is called.
2209+*/
2210+void prefetch_add_page_to_trace(struct page *page)
2211+{
2212+ struct address_space *mapping;
2213+ struct inode *inode;
2214+
2215+ /*if not tracing, nothing to be done */
2216+ if (atomic_read(&prefetch_trace.tracers_count) <= 0)
2217+ return;
2218+
2219+ /*if page was not touched */
2220+ if (!PageReferenced(page))
2221+ return;
2222+
2223+ /*swap pages are not interesting */
2224+ if (PageSwapCache(page))
2225+ return;
2226+
2227+ /*no locking, just stats */
2228+ prefetch_trace.page_release_traced++;
2229+
2230+ mapping = page_mapping(page);
2231+
2232+ inode = mapping->host;
2233+ if (inode && inode->i_sb && inode->i_sb->s_bdev)
2234+ prefetch_trace_add(inode->i_sb->s_bdev->bd_dev, inode->i_ino,
2235+ page_index(page), 1);
2236+}
2237+
2238+/**
2239+ Hook called when page is about to be freed, so we have to check
2240+ if it was referenced, as inode walk will not notice it.
2241+
2242+ NOTE: spinlock is held while this function is called.
2243+*/
2244+void prefetch_page_release_hook(struct page *page)
2245+{
2246+ prefetch_add_page_to_trace(page);
2247+}
2248+
2249+static void walk_file_cache(struct inode_walk_session *s,
2250+ struct address_space *mapping)
2251+{
2252+ int i;
2253+ pgoff_t len = 0;
2254+ struct pagevec pvec;
2255+ struct page *page;
2256+ struct page *page0 = NULL;
2257+ int current_page_referenced = 0;
2258+ int previous_page_referenced = 0;
2259+ pgoff_t start = 0;
2260+
2261+ for (;;) {
2262+ pagevec_init(&pvec, 0);
2263+ pvec.nr = radix_tree_gang_lookup(&mapping->page_tree,
2264+ (void **)pvec.pages,
2265+ start + len, PAGEVEC_SIZE);
2266+
2267+ if (pvec.nr == 0) {
2268+ /*no more pages present
2269+ add the last range, if present */
2270+ if (previous_page_referenced)
2271+ add_referenced_page_range(s, mapping, start,
2272+ len);
2273+ goto out;
2274+ }
2275+
2276+ if (!page0) {
2277+ page0 = pvec.pages[0];
2278+ previous_page_referenced = PageReferenced(page0);
2279+ }
2280+
2281+ for (i = 0; i < pvec.nr; i++) {
2282+
2283+ page = pvec.pages[i];
2284+ current_page_referenced = TestClearPageReferenced(page);
2285+
2286+ s->pages_walked++;
2287+
2288+ if (page->index == start + len
2289+ && previous_page_referenced ==
2290+ current_page_referenced)
2291+ len++;
2292+ else {
2293+ if (previous_page_referenced)
2294+ add_referenced_page_range(s, mapping,
2295+ start, len);
2296+
2297+ page0 = page;
2298+ start = page->index;
2299+ len = 1;
2300+ }
2301+ previous_page_referenced = current_page_referenced;
2302+ }
2303+ }
2304+
2305+ out:
2306+ return;
2307+}
2308+
2309+static void show_inode(struct inode_walk_session *s, struct inode *inode)
2310+{
2311+ ++s->inodes_walked; /*just for stats, so not using atomic_inc() */
2312+
2313+ if (inode->i_mapping)
2314+ walk_file_cache(s, inode->i_mapping);
2315+}
2316+
2317+/**
2318+ Allocates memory for trace buffer.
2319+ This memory should be freed using free_trace_buffer().
2320+*/
2321+void *alloc_trace_buffer(int len)
2322+{
2323+ return (void *)__get_free_pages(GFP_KERNEL, get_order(len));
2324+}
2325+
2326+EXPORT_SYMBOL(alloc_trace_buffer);
2327+
2328+/**
2329+ Frees memory allocated using alloc_trace_buffer().
2330+*/
2331+void free_trace_buffer(void *buffer, int len)
2332+{
2333+ free_pages((unsigned long)buffer, get_order(len));
2334+}
2335+
2336+EXPORT_SYMBOL(free_trace_buffer);
2337+
2338+/*NOTE: this function is called with inode_lock spinlock held*/
2339+static int inode_walk_show(struct inode_walk_session *s, loff_t pos)
2340+{
2341+ unsigned long index = pos;
2342+ struct inode *inode;
2343+
2344+ inode = ivec_inode(s, index);
2345+ BUG_ON(!inode);
2346+ show_inode(s, inode);
2347+
2348+ return 0;
2349+}
2350+
2351+static void *inode_walk_start(struct inode_walk_session *s, loff_t * pos)
2352+{
2353+ s->ivec.inodes = (struct inode **)__get_free_page(GFP_KERNEL);
2354+ if (!s->ivec.inodes)
2355+ return NULL;
2356+ s->ivec.size = 0;
2357+
2358+ spin_lock(&inode_lock);
2359+
2360+ BUG_ON(s->icur.pinned_inode);
2361+ s->icur.pinned_inode = s->icur.inode;
2362+ return ivec_inode(s, *pos) ? pos : NULL;
2363+}
2364+
2365+static void inode_walk_stop(struct inode_walk_session *s)
2366+{
2367+ if (s->icur.inode) {
2368+ __iget(s->icur.inode);
2369+ s->icur.i_state = s->icur.inode->i_state;
2370+ }
2371+
2372+ spin_unlock(&inode_lock);
2373+ free_page((unsigned long)s->ivec.inodes);
2374+
2375+ if (s->icur.pinned_inode) {
2376+ iput(s->icur.pinned_inode);
2377+ s->icur.pinned_inode = NULL;
2378+ }
2379+}
2380+
2381+/*NOTE: this function is called with inode_lock spinlock held*/
2382+static void *inode_walk_next(struct inode_walk_session *s, loff_t * pos)
2383+{
2384+ (*pos)++;
2385+
2386+ return ivec_inode(s, *pos) ? pos : NULL;
2387+}
2388+
2389+static struct inode_walk_session *inode_walk_session_create(void)
2390+{
2391+ struct inode_walk_session *s;
2392+ int err = 0;
2393+
2394+ s = kzalloc(sizeof(*s), GFP_KERNEL);
2395+ if (!s)
2396+ err = -ENOMEM;
2397+
2398+ return err ? ERR_PTR(err) : s;
2399+}
2400+
2401+static void inode_walk_session_release(struct inode_walk_session *s)
2402+{
2403+ if (s->icur.inode)
2404+ iput(s->icur.inode);
2405+ kfree(s);
2406+}
2407+
2408+/**
2409+ * Prints message followed by marker.
2410+*/
2411+void print_marker(char *msg, struct trace_marker marker)
2412+{
2413+ printk("%s %u.%u\n", msg, marker.generation, marker.position);
2414+}
2415+
2416+EXPORT_SYMBOL(print_marker);
2417+
2418+/**
2419+ Returns current trace marker.
2420+ Note: marker ranges are open on the right side, i.e.
2421+ [start_marker, end_marker)
2422+*/
2423+static struct trace_marker get_trace_marker(void)
2424+{
2425+ struct trace_marker marker;
2426+
2427+ spin_lock(&prefetch_trace.prefetch_trace_lock);
2428+ marker.position = prefetch_trace.buffer_used;
2429+ marker.generation = prefetch_trace.generation;
2430+ spin_unlock(&prefetch_trace.prefetch_trace_lock);
2431+
2432+ return marker;
2433+}
2434+
2435+/**
2436+ Returns size of prefetch trace between start and end marker.
2437+ Returns <0 if error occurs.
2438+*/
2439+int prefetch_trace_fragment_size(struct trace_marker start_marker,
2440+ struct trace_marker end_marker)
2441+{
2442+ if (start_marker.generation != end_marker.generation)
2443+ return -EINVAL; /*trace must have wrapped around and trace is no longer available */
2444+ if (end_marker.position < start_marker.position)
2445+ return -ERANGE; /*invalid markers */
2446+
2447+ return end_marker.position - start_marker.position;
2448+}
2449+
2450+EXPORT_SYMBOL(prefetch_trace_fragment_size);
2451+
2452+/**
2453+ Returns position in trace buffer for given marker.
2454+ prefetch_trace_lock spinlock must be held when calling this function.
2455+ Returns < 0 in case of error.
2456+ Returns -ENOSPC if this marker is not in buffer.
2457+ Note: marker ranges are open on right side, so this position
2458+ might point to first byte after the buffer for end markers.
2459+*/
2460+static int trace_marker_position_in_buffer(struct trace_marker marker)
2461+{
2462+ if (marker.generation != prefetch_trace.generation)
2463+ return -EINVAL; /*trace must have wrapped around and trace is no longer available */
2464+
2465+ if (prefetch_trace.buffer_used < marker.position)
2466+ return -ENOSPC;
2467+
2468+ /*for now simple, not circular buffer */
2469+ return marker.position;
2470+}
2471+
2472+/**
2473+ Fetches fragment of trace between start marker and end_marker.
2474+ Returns memory (allocated using alloc_trace_buffer()) which holds trace fragment
2475+ or error on @fragment_result in case of success and its size on @fragment_size_result.
2476+ This memory should be freed using free_trace_buffer().
2477+ If fragment_size == 0, fragment is NULL.
2478+*/
2479+int get_prefetch_trace_fragment(struct trace_marker start_marker,
2480+ struct trace_marker end_marker,
2481+ void **fragment_result,
2482+ int *fragment_size_result)
2483+{
2484+ int start_position;
2485+ int end_position;
2486+ int len;
2487+ int ret;
2488+ void *fragment;
2489+ int fragment_size;
2490+
2491+ fragment_size = prefetch_trace_fragment_size(start_marker, end_marker);
2492+ if (fragment_size < 0)
2493+ return fragment_size;
2494+ if (fragment_size == 0) {
2495+ *fragment_size_result = 0;
2496+ *fragment_result = NULL;
2497+ return 0;
2498+ }
2499+
2500+ fragment = alloc_trace_buffer(fragment_size);
2501+ if (fragment == NULL)
2502+ return -ENOMEM;
2503+
2504+ spin_lock(&prefetch_trace.prefetch_trace_lock);
2505+
2506+ start_position = trace_marker_position_in_buffer(start_marker);
2507+ end_position = trace_marker_position_in_buffer(end_marker);
2508+
2509+ if (start_position < 0) {
2510+ ret = -ESRCH;
2511+ goto out_free;
2512+ }
2513+ if (end_position < 0) {
2514+ ret = -ESRCH;
2515+ goto out_free;
2516+ }
2517+
2518+ len = end_position - start_position;
2519+ BUG_ON(len <= 0 || len != fragment_size);
2520+
2521+ memcpy(fragment, prefetch_trace.buffer + start_position, len);
2522+
2523+ spin_unlock(&prefetch_trace.prefetch_trace_lock);
2524+
2525+ *fragment_result = fragment;
2526+ *fragment_size_result = fragment_size;
2527+ return 0;
2528+
2529+ out_free:
2530+ spin_unlock(&prefetch_trace.prefetch_trace_lock);
2531+ free_trace_buffer(fragment, fragment_size);
2532+ return ret;
2533+}
2534+
2535+EXPORT_SYMBOL(get_prefetch_trace_fragment);
2536+
2537+struct file *kernel_open(char const *file_name, int flags, int mode)
2538+{
2539+ int orig_fsuid = current->fsuid;
2540+ int orig_fsgid = current->fsgid;
2541+ struct file *file = NULL;
2542+#if BITS_PER_LONG != 32
2543+ flags |= O_LARGEFILE;
2544+#endif
2545+ current->fsuid = 0;
2546+ current->fsgid = 0;
2547+
2548+ file = filp_open(file_name, flags, mode);
2549+ current->fsuid = orig_fsuid;
2550+ current->fsgid = orig_fsgid;
2551+ return file;
2552+}
2553+
2554+int kernel_close(struct file *file)
2555+{
2556+ if (file->f_op && file->f_op->flush) {
2557+ file->f_op->flush(file, current->files);
2558+ }
2559+ fput(file);
2560+
2561+ return 0; /*no errors known for now */
2562+}
2563+
2564+int kernel_write(struct file *file, unsigned long offset, const char *addr,
2565+ unsigned long count)
2566+{
2567+ mm_segment_t old_fs;
2568+ loff_t pos = offset;
2569+ int result = -ENOSYS;
2570+
2571+ if (!file->f_op->write)
2572+ goto fail;
2573+ old_fs = get_fs();
2574+ set_fs(get_ds());
2575+ result = file->f_op->write(file, addr, count, &pos);
2576+ set_fs(old_fs);
2577+ fail:
2578+ return result;
2579+}
2580+
2581+/**
2582+ * Compares 2 traces records and returns -1, 0 or 1, depending on result of comparison.
2583+ * Comparison is lexicographical on device, inode, range_start and range_length (range_length descending).
2584+ */
2585+static int trace_cmp(const void *p1, const void *p2)
2586+{
2587+ struct prefetch_trace_record *r1 = (struct prefetch_trace_record *)p1;
2588+ struct prefetch_trace_record *r2 = (struct prefetch_trace_record *)p2;
2589+
2590+ if (r1->device < r2->device)
2591+ return -1;
2592+ if (r1->device > r2->device)
2593+ return 1;
2594+
2595+ if (r1->inode_no < r2->inode_no)
2596+ return -1;
2597+ if (r1->inode_no > r2->inode_no)
2598+ return 1;
2599+
2600+ if (r1->range_start < r2->range_start)
2601+ return -1;
2602+ if (r1->range_start > r2->range_start)
2603+ return 1;
2604+
2605+ /*longer range_length is preferred as we want to fetch large fragments first */
2606+ if (r1->range_length < r2->range_length)
2607+ return 1;
2608+ if (r1->range_length > r2->range_length)
2609+ return -1;
2610+ return 0;
2611+}
2612+
2613+/**
2614+ * Sorts trace fragment by device, inode and start.
2615+*/
2616+void sort_trace_fragment(void *trace, int trace_size)
2617+{
2618+ sort(trace, trace_size / sizeof(struct prefetch_trace_record),
2619+ sizeof(struct prefetch_trace_record), trace_cmp, NULL);
2620+}
2621+
2622+EXPORT_SYMBOL(sort_trace_fragment);
2623+
2624+/**
2625+ * Saves trace fragment from buffer @trace_buffer of size @trace_size into file @filename.
2626+ * Returns 0, if success <0 if error (with error code).
2627+*/
2628+int prefetch_save_trace_fragment(char *filename,
2629+ void *fragment, int fragment_size)
2630+{
2631+ int ret = 0;
2632+ int written = 0;
2633+ struct file *file;
2634+ struct prefetch_trace_header header;
2635+ int data_start = 0;
2636+
2637+ file = kernel_open(filename, O_CREAT | O_TRUNC | O_RDWR, 0600);
2638+
2639+ if (IS_ERR(file)) {
2640+ ret = PTR_ERR(file);
2641+ printk(KERN_WARNING
2642+ "Cannot open file %s for writing to save trace, error=%d\n",
2643+ filename, ret);
2644+ goto out;
2645+ }
2646+
2647+ data_start = sizeof(header);
2648+ /*copy magic signature */
2649+ memcpy(&header.magic[0], trace_file_magic, sizeof(header.magic));
2650+ header.version_major = PREFETCH_FORMAT_VERSION_MAJOR;
2651+ header.version_minor = PREFETCH_FORMAT_VERSION_MINOR;
2652+ header.data_start = data_start;
2653+
2654+ ret = kernel_write(file, 0, (char *)&header, sizeof(header));
2655+ if (ret < 0 || ret != sizeof(header)) {
2656+ printk("Error while writing header to file %s, error=%d\n",
2657+ filename, ret);
2658+ goto out_close;
2659+ }
2660+
2661+ while (written < fragment_size) {
2662+ ret =
2663+ kernel_write(file, data_start + written, fragment + written,
2664+ fragment_size - written);
2665+
2666+ if (ret < 0) {
2667+ printk("Error while writing to file %s, error=%d\n",
2668+ filename, ret);
2669+ goto out_close;
2670+ }
2671+ written += ret;
2672+ }
2673+ out_close:
2674+ kernel_close(file);
2675+ out:
2676+ return ret;
2677+}
2678+
2679+EXPORT_SYMBOL(prefetch_save_trace_fragment);
2680+
2681+/**
2682+ * Saves trace fragment between @start_marker and @end_marker into file @filename.
2683+ * Returns 0, if success <0 if error (with error code).
2684+*/
2685+int prefetch_save_trace_between_markers(char *filename,
2686+ struct trace_marker start_marker,
2687+ struct trace_marker end_marker)
2688+{
2689+ void *fragment = NULL;
2690+ int fragment_size = 0;
2691+ int ret = 0;
2692+
2693+ ret = get_prefetch_trace_fragment(start_marker,
2694+ end_marker,
2695+ &fragment, &fragment_size);
2696+
2697+ if (ret < 0) {
2698+ printk(KERN_WARNING
2699+ "Cannot save trace fragment - cannot get trace fragment, error=%d\n",
2700+ ret);
2701+ goto out;
2702+ }
2703+
2704+ ret = prefetch_save_trace_fragment(filename, fragment, fragment_size);
2705+ if (ret < 0) {
2706+ printk(KERN_WARNING
2707+ "Cannot save trace fragment - error saving file, error=%d\n",
2708+ ret);
2709+ goto out_free;
2710+ }
2711+
2712+ out_free:
2713+ if (fragment_size > 0)
2714+ free_trace_buffer(fragment, fragment_size);
2715+ out:
2716+ return ret;
2717+}
2718+
2719+EXPORT_SYMBOL(prefetch_save_trace_between_markers);
2720+
2721+static int walk_pages(enum tracing_command command, struct trace_marker *marker)
2722+{
2723+ void *retptr;
2724+ loff_t pos = 0;
2725+ int ret;
2726+ loff_t next;
2727+ struct inode_walk_session *s;
2728+ int clearing = 0;
2729+ int invalid_trace_counter = 0;
2730+ int report_overflow = 0;
2731+#ifdef PREFETCH_DEBUG
2732+ struct prefetch_timer walk_pages_timer;
2733+#endif
2734+
2735+ spin_lock(&prefetch_trace.prefetch_trace_lock);
2736+ if (prefetch_trace.overflow && !prefetch_trace.overflow_reported) {
2737+ prefetch_trace.overflow_reported = 1;
2738+ report_overflow = 1;
2739+ }
2740+ spin_unlock(&prefetch_trace.prefetch_trace_lock);
2741+
2742+ if (report_overflow) {
2743+ if (command == STOP_TRACING) {
2744+ if (atomic_dec_return(&prefetch_trace.tracers_count) <
2745+ 0)
2746+ printk(KERN_WARNING
2747+ "Trace counter is invalid\n");
2748+ }
2749+ printk(KERN_WARNING "Prefetch buffer overflow\n");
2750+ return -ENOSPC;
2751+ }
2752+
2753+ s = inode_walk_session_create();
2754+ if (IS_ERR(s)) {
2755+ retptr = s;
2756+ goto out;
2757+ }
2758+
2759+ retptr = inode_walk_start(s, &pos);
2760+
2761+ if (IS_ERR(retptr))
2762+ goto out_error_session_release;
2763+
2764+ /*inode_lock spinlock held from here */
2765+ if (command == START_TRACING) {
2766+ if (atomic_inc_return(&prefetch_trace.tracers_count) == 1) {
2767+ /*prefetch_trace.tracers_count was 0, this is first tracer, so just clear bits */
2768+ clearing = 1;
2769+ clearing_in_progress = 1;
2770+ *marker = get_trace_marker();
2771+ }
2772+ }
2773+#ifdef PREFETCH_DEBUG
2774+ if (!clearing) {
2775+ prefetch_start_timing(&walk_pages_timer, "walk pages");
2776+ } else
2777+ prefetch_start_timing(&walk_pages_timer, "clearing pages");
2778+#endif
2779+
2780+ while (retptr != NULL) {
2781+ /*FIXME: add lock breaking */
2782+ ret = inode_walk_show(s, pos);
2783+ if (ret < 0) {
2784+ retptr = ERR_PTR(ret);
2785+ goto out_error;
2786+ }
2787+
2788+ next = pos;
2789+ retptr = inode_walk_next(s, &next);
2790+ if (IS_ERR(retptr))
2791+ goto out_error;
2792+ pos = next;
2793+ }
2794+
2795+ if (command == STOP_TRACING) {
2796+ if (atomic_dec_return(&prefetch_trace.tracers_count) < 0) {
2797+ invalid_trace_counter = 1;
2798+ }
2799+ *marker = get_trace_marker();
2800+ } else if (command == CONTINUE_TRACING) {
2801+ *marker = get_trace_marker();
2802+ }
2803+
2804+ out_error:
2805+ if (clearing)
2806+ clearing_in_progress = 0;
2807+
2808+ inode_walk_stop(s);
2809+ /*inode_lock spinlock released */
2810+#ifdef PREFETCH_DEBUG
2811+ if (clearing)
2812+ printk(KERN_INFO "Clearing run finished\n");
2813+#endif
2814+ if (invalid_trace_counter)
2815+ printk(KERN_WARNING "Trace counter is invalid\n");
2816+
2817+#ifdef PREFETCH_DEBUG
2818+ if (!IS_ERR(retptr)) {
2819+ prefetch_end_timing(&walk_pages_timer);
2820+ prefetch_print_timing(&walk_pages_timer);
2821+ printk(KERN_INFO
2822+ "Inodes walked: %d, pages walked: %d, referenced: %d"
2823+ " blocks: %d\n", s->inodes_walked, s->pages_walked,
2824+ s->pages_referenced, s->page_blocks);
2825+ }
2826+#endif
2827+
2828+ out_error_session_release:
2829+ inode_walk_session_release(s);
2830+ out:
2831+ return PTR_ERR(retptr);
2832+}
2833+
2834+/**
2835+ Starts tracing, if no error happens returns marker which points to start of trace on @marker.
2836+*/
2837+int prefetch_start_trace(struct trace_marker *marker)
2838+{
2839+ int ret;
2840+ if (!enabled)
2841+ return -ENODEV; /*module disabled */
2842+
2843+ ret = walk_pages(START_TRACING, marker);
2844+
2845+ if (ret >= 0) {
2846+ mutex_lock(&prefetch_trace.prefetch_trace_mutex);
2847+ prefetch_trace.trace_users++;
2848+ mutex_unlock(&prefetch_trace.prefetch_trace_mutex);
2849+ }
2850+ return ret;
2851+}
2852+
2853+EXPORT_SYMBOL(prefetch_start_trace);
2854+
2855+/**
2856+ Performs interim tracing run, returns marker which points to current place in trace.
2857+*/
2858+int prefetch_continue_trace(struct trace_marker *marker)
2859+{
2860+ if (!enabled)
2861+ return -ENODEV; /*module disabled */
2862+
2863+ return walk_pages(CONTINUE_TRACING, marker);
2864+}
2865+
2866+EXPORT_SYMBOL(prefetch_continue_trace);
2867+
2868+/**
2869+ Stops tracing, returns marker which points to end of trace.
2870+*/
2871+int prefetch_stop_trace(struct trace_marker *marker)
2872+{
2873+ if (!enabled) {
2874+ /*trace might have been started when module was enabled */
2875+ if (atomic_dec_return(&prefetch_trace.tracers_count) < 0)
2876+ printk(KERN_WARNING
2877+ "Trace counter is invalid after decrementing it in disabled module\n");
2878+
2879+ return -ENODEV; /*module disabled */
2880+ }
2881+#ifdef PREFETCH_DEBUG
2882+ printk(KERN_INFO "Released pages traced: %d\n",
2883+ prefetch_trace.page_release_traced);
2884+#endif
2885+ return walk_pages(STOP_TRACING, marker);
2886+}
2887+
2888+EXPORT_SYMBOL(prefetch_stop_trace);
2889+
2890+/**
2891+ Releases trace up to @end marker.
2892+ Each successful call to prefetch_start_trace() should
2893+ be matched with exactly one call to prefetch_release_trace().
2894+ NOTE: end_marker is currently not used, but might
2895+ be used in the future to release only part of trace.
2896+*/
2897+int prefetch_release_trace(struct trace_marker end_marker)
2898+{
2899+ mutex_lock(&prefetch_trace.prefetch_trace_mutex);
2900+
2901+ prefetch_trace.trace_users--;
2902+ if (prefetch_trace.trace_users == 0)
2903+ clear_trace();
2904+ if (prefetch_trace.trace_users < 0)
2905+ printk(KERN_WARNING "Trace users count is invalid, count=%d\n",
2906+ prefetch_trace.trace_users);
2907+
2908+ mutex_unlock(&prefetch_trace.prefetch_trace_mutex);
2909+
2910+ return 0;
2911+}
2912+
2913+EXPORT_SYMBOL(prefetch_release_trace);
2914+
2915+/**
2916+ * Loads trace fragment from @filename.
2917+ * Returns <0 in case of errors.
2918+ * If successful, returns pointer to trace data on @trace_buffer and its size on @trace_size,
2919+ * in such case caller is responsible for freeing the buffer using free_trace_buffer().
2920+*/
2921+int prefetch_load_trace_fragment(char *filename, void **trace_buffer,
2922+ int *trace_size)
2923+{
2924+ struct file *file;
2925+ void *buffer;
2926+ int data_start;
2927+ int data_read = 0;
2928+ int raw_data_size;
2929+ int file_size;
2930+ int ret = 0;
2931+ struct prefetch_trace_header header;
2932+
2933+ file = kernel_open(filename, O_RDONLY, 0600);
2934+
2935+ if (IS_ERR(file)) {
2936+ ret = PTR_ERR(file);
2937+ printk("Cannot open file %s for reading, error=%d\n", filename,
2938+ ret);
2939+ return ret;
2940+ }
2941+
2942+ file_size = file->f_mapping->host->i_size;
2943+
2944+ ret = kernel_read(file, 0, (char *)&header, sizeof(header));
2945+
2946+ if (ret < 0 || ret != sizeof(header)) {
2947+ printk(KERN_WARNING
2948+ "Cannot read trace header for trace file %s, error=%d\n",
2949+ filename, ret);
2950+ ret = -EINVAL;
2951+ goto out_close;
2952+ }
2953+
2954+ if (strncmp
2955+ (&header.magic[0], &trace_file_magic[0],
2956+ sizeof(header.magic)) != 0) {
2957+ printk(KERN_WARNING
2958+ "Trace file %s does not have valid trace file signature\n",
2959+ filename);
2960+ ret = -EINVAL;
2961+ goto out_close;
2962+ }
2963+
2964+ if (header.version_major != PREFETCH_FORMAT_VERSION_MAJOR) {
2965+ printk(KERN_WARNING
2966+ "Trace file %s has unsupported major version %d\n",
2967+ filename, header.version_major);
2968+ ret = -EINVAL;
2969+ goto out_close;
2970+ }
2971+ data_start = header.data_start;
2972+ if (data_start < sizeof(header)) {
2973+ /*NOTE: exceeding file size is checked implicitely below with raw_data_size check */
2974+ printk(KERN_WARNING
2975+ "Trace file %s contains invalid data start: %d\n",
2976+ filename, data_start);
2977+ ret = -EINVAL;
2978+ goto out_close;
2979+ }
2980+
2981+ raw_data_size = file_size - data_start;
2982+ if (raw_data_size < 0) {
2983+ ret = -EINVAL;
2984+ printk(KERN_WARNING "Invalid trace file %s, not loading\n",
2985+ filename);
2986+ goto out_close;
2987+ }
2988+
2989+ if (raw_data_size == 0) {
2990+ ret = -EINVAL;
2991+ printk(KERN_INFO "Empty trace file %s, not loading\n",
2992+ filename);
2993+ goto out_close;
2994+ }
2995+
2996+ buffer = alloc_trace_buffer(raw_data_size);
2997+ if (buffer == NULL) {
2998+ printk(KERN_INFO "Cannot allocate memory for trace %s\n",
2999+ filename);
3000+ ret = -ENOMEM;
3001+ goto out_close;
3002+ }
3003+
3004+ while (data_read < raw_data_size) {
3005+ ret =
3006+ kernel_read(file, data_start + data_read,
3007+ buffer + data_read, raw_data_size - data_read);
3008+
3009+ if (ret < 0) {
3010+ printk("Error while reading from file %s, error=%d\n",
3011+ filename, ret);
3012+ goto out_close_free;
3013+ }
3014+ if (ret == 0) {
3015+ printk(KERN_WARNING
3016+ "File too short, data read=%d, expected size=%d\n",
3017+ data_read, raw_data_size);
3018+ break;
3019+ }
3020+
3021+ data_read += ret;
3022+ }
3023+
3024+ if (data_read == raw_data_size) {
3025+ *trace_size = raw_data_size;
3026+ *trace_buffer = buffer;
3027+ } else {
3028+ printk(KERN_WARNING
3029+ "Trace file size changed beneath us, cancelling read\n");
3030+ ret = -ETXTBSY;
3031+ goto out_close_free;
3032+ }
3033+
3034+ /*everything OK, caller will free the buffer */
3035+ kernel_close(file);
3036+ return 0;
3037+
3038+ out_close_free:
3039+ free_trace_buffer(buffer, file_size);
3040+ out_close:
3041+ kernel_close(file);
3042+ return ret;
3043+}
3044+
3045+/**
3046+ * Prefetches files based on trace read from @filename.
3047+*/
3048+int do_prefetch_from_file(char *filename)
3049+{
3050+ int ret = 0;
3051+ void *buffer = NULL;
3052+ int buffer_size;
3053+
3054+ ret = prefetch_load_trace_fragment(filename, &buffer, &buffer_size);
3055+ if (ret < 0) {
3056+ printk(KERN_WARNING "Reading trace file %s failed, error=%d\n",
3057+ filename, ret);
3058+ goto out;
3059+ }
3060+
3061+ ret = prefetch_start_prefetch(buffer, buffer_size, 0);
3062+ if (ret < 0) {
3063+ printk(KERN_WARNING
3064+ "Prefetching for trace file %s failed, error=%d\n",
3065+ filename, ret);
3066+ goto out_free;
3067+ }
3068+#ifdef CONFIG_PREFETCH_DEBUG
3069+ printk(KERN_INFO "Prefetch from file %s successful\n", filename);
3070+#endif
3071+
3072+ out_free:
3073+ free_trace_buffer(buffer, buffer_size);
3074+ out:
3075+ return ret;
3076+}
3077+
3078+EXPORT_SYMBOL(do_prefetch_from_file);
3079+
3080+static void clear_trace(void)
3081+{
3082+ void *new_buffer = NULL;
3083+
3084+#ifdef PREFETCH_DEBUG
3085+ printk(KERN_INFO "Clearing prefetch trace buffer\n");
3086+#endif
3087+
3088+ spin_lock(&prefetch_trace.prefetch_trace_lock);
3089+
3090+ if (prefetch_trace.buffer == NULL) {
3091+ spin_unlock(&prefetch_trace.prefetch_trace_lock);
3092+
3093+ new_buffer = alloc_trace_buffer(prefetch_trace_size());
3094+
3095+ if (new_buffer == NULL) {
3096+ printk(KERN_WARNING
3097+ "Cannot allocate memory for trace buffer\n");
3098+ goto out;
3099+ }
3100+
3101+ spin_lock(&prefetch_trace.prefetch_trace_lock);
3102+
3103+ if (prefetch_trace.buffer != NULL) {
3104+ /*someone already allocated it */
3105+ free_trace_buffer(new_buffer, prefetch_trace_size());
3106+ } else {
3107+ prefetch_trace.buffer = new_buffer;
3108+ prefetch_trace.buffer_size = prefetch_trace_size();
3109+ }
3110+ }
3111+ /*reset used buffer counter */
3112+ prefetch_trace.buffer_used = 0;
3113+ prefetch_trace.overflow = 0;
3114+ prefetch_trace.overflow_reported = 0;
3115+ prefetch_trace.page_release_traced = 0;
3116+ prefetch_trace.generation++; /*next generation, markers are not comparable */
3117+
3118+ spin_unlock(&prefetch_trace.prefetch_trace_lock);
3119+ out:
3120+ return;
3121+}
3122+
3123+/**
3124+ * Checks if @line is exactly the same as @param_name.
3125+ */
3126+int param_match(char *line, char *param_name)
3127+{
3128+ if (strcmp(line, param_name) == 0)
3129+ return 1;
3130+
3131+ return 0;
3132+}
3133+
3134+EXPORT_SYMBOL(param_match);
3135+
3136+/**
3137+ * Checks if @line starts with @param_name, not exceeding param_name length for safety.
3138+ */
3139+int param_match_prefix(char *line, char *param_name)
3140+{
3141+ unsigned param_len = strlen(param_name);
3142+ if (strncmp(line, param_name, param_len) == 0)
3143+ return 1;
3144+
3145+ return 0;
3146+}
3147+
3148+EXPORT_SYMBOL(param_match_prefix);
3149+
3150+ssize_t prefetch_proc_write(struct file * proc_file, const char __user * buffer,
3151+ size_t count, loff_t * ppos)
3152+{
3153+ char *name;
3154+ int e = 0;
3155+
3156+ if (count >= PATH_MAX)
3157+ return -ENAMETOOLONG;
3158+
3159+ name = kmalloc(count + 1, GFP_KERNEL);
3160+ if (!name)
3161+ return -ENOMEM;
3162+
3163+ if (copy_from_user(name, buffer, count)) {
3164+ e = -EFAULT;
3165+ goto out;
3166+ }
3167+
3168+ /* strip the optional newline */
3169+ if (count && name[count - 1] == '\n')
3170+ name[count - 1] = '\0';
3171+ else
3172+ name[count] = '\0';
3173+
3174+ if (param_match(name, "enable")) {
3175+ printk(KERN_INFO "Prefetch module enabled\n");
3176+ enabled = 1;
3177+ goto out;
3178+ }
3179+
3180+ if (param_match(name, "disable")) {
3181+ printk(KERN_INFO "Prefetch module disabled\n");
3182+ enabled = 0;
3183+ goto out;
3184+ }
3185+ out:
3186+ kfree(name);
3187+
3188+ return e ? e : count;
3189+}
3190+
3191+static int prefetch_proc_open(struct inode *inode, struct file *proc_file)
3192+{
3193+ return 0;
3194+}
3195+
3196+static int prefetch_proc_release(struct inode *inode, struct file *proc_file)
3197+{
3198+ return 0;
3199+}
3200+
3201+static struct file_operations proc_prefetch_fops = {
3202+ .owner = THIS_MODULE,
3203+ .open = prefetch_proc_open,
3204+ .release = prefetch_proc_release,
3205+ .write = prefetch_proc_write
3206+};
3207+
3208+struct proc_dir_entry *prefetch_proc_dir = NULL;
3209+EXPORT_SYMBOL(prefetch_proc_dir);
3210+
3211+static __init int prefetch_core_init(void)
3212+{
3213+ struct proc_dir_entry *entry;
3214+
3215+ mutex_lock(&prefetch_trace.prefetch_trace_mutex);
3216+ clear_trace();
3217+ mutex_unlock(&prefetch_trace.prefetch_trace_mutex);
3218+
3219+ prefetch_proc_dir = proc_mkdir("prefetch", NULL);
3220+
3221+ if (prefetch_proc_dir == NULL) {
3222+ printk(KERN_WARNING
3223+ "Creating prefetch proc directory failed, proc interface will not be available\n");
3224+ } else {
3225+ entry = create_proc_entry("control", 0600, prefetch_proc_dir);
3226+ if (entry)
3227+ entry->proc_fops = &proc_prefetch_fops;
3228+ }
3229+
3230+ printk(KERN_INFO "Prefetching core module started, enabled=%d\n",
3231+ enabled);
3232+
3233+ return 0;
3234+}
3235+
3236+static void prefetch_core_exit(void)
3237+{
3238+ remove_proc_entry("control", prefetch_proc_dir);
3239+ remove_proc_entry("prefetch", NULL); /*remove directory */
3240+}
3241+
3242+MODULE_AUTHOR("Krzysztof Lichota <lichota@mimuw.edu.pl>");
3243+MODULE_LICENSE("GPL");
3244+MODULE_DESCRIPTION
3245+ ("Prefetching core - functions used for tracing and prefetching by prefetching modules");
3246+
3247+module_init(prefetch_core_init);
3248+module_exit(prefetch_core_exit);
This page took 0.421458 seconds and 4 git commands to generate.