]>
Commit | Line | Data |
---|---|---|
5dd10c98 | 1 | diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt |
5bd2511a | 2 | index 1808f11..8e8d3b7 100644 |
5dd10c98 AM |
3 | --- a/Documentation/kernel-parameters.txt |
4 | +++ b/Documentation/kernel-parameters.txt | |
5bd2511a | 5 | @@ -2773,6 +2773,9 @@ and is between 256 and 4096 characters. It is defined in the file |
de6743ae AM |
6 | HIGHMEM regardless of setting |
7 | of CONFIG_HIGHPTE. | |
5dd10c98 AM |
8 | |
9 | + uuid_debug= (Boolean) whether to enable debugging of TuxOnIce's | |
10 | + uuid support. | |
11 | + | |
12 | vdso= [X86,SH] | |
13 | vdso=2: enable compat VDSO (default with COMPAT_VDSO) | |
14 | vdso=1: enable VDSO (default) | |
2380c486 JR |
15 | diff --git a/Documentation/power/tuxonice-internals.txt b/Documentation/power/tuxonice-internals.txt |
16 | new file mode 100644 | |
e999739a | 17 | index 0000000..7a96186 |
2380c486 JR |
18 | --- /dev/null |
19 | +++ b/Documentation/power/tuxonice-internals.txt | |
e999739a | 20 | @@ -0,0 +1,477 @@ |
2380c486 | 21 | + TuxOnIce 3.0 Internal Documentation. |
e999739a | 22 | + Updated to 26 March 2009 |
2380c486 JR |
23 | + |
24 | +1. Introduction. | |
25 | + | |
26 | + TuxOnIce 3.0 is an addition to the Linux Kernel, designed to | |
27 | + allow the user to quickly shutdown and quickly boot a computer, without | |
28 | + needing to close documents or programs. It is equivalent to the | |
29 | + hibernate facility in some laptops. This implementation, however, | |
30 | + requires no special BIOS or hardware support. | |
31 | + | |
32 | + The code in these files is based upon the original implementation | |
33 | + prepared by Gabor Kuti and additional work by Pavel Machek and a | |
34 | + host of others. This code has been substantially reworked by Nigel | |
35 | + Cunningham, again with the help and testing of many others, not the | |
36 | + least of whom is Michael Frank. At its heart, however, the operation is | |
37 | + essentially the same as Gabor's version. | |
38 | + | |
39 | +2. Overview of operation. | |
40 | + | |
41 | + The basic sequence of operations is as follows: | |
42 | + | |
43 | + a. Quiesce all other activity. | |
44 | + b. Ensure enough memory and storage space are available, and attempt | |
45 | + to free memory/storage if necessary. | |
46 | + c. Allocate the required memory and storage space. | |
47 | + d. Write the image. | |
48 | + e. Power down. | |
49 | + | |
50 | + There are a number of complicating factors which mean that things are | |
51 | + not as simple as the above would imply, however... | |
52 | + | |
53 | + o The activity of each process must be stopped at a point where it will | |
54 | + not be holding locks necessary for saving the image, or unexpectedly | |
55 | + restart operations due to something like a timeout and thereby make | |
56 | + our image inconsistent. | |
57 | + | |
58 | + o It is desirous that we sync outstanding I/O to disk before calculating | |
59 | + image statistics. This reduces corruption if one should suspend but | |
60 | + then not resume, and also makes later parts of the operation safer (see | |
61 | + below). | |
62 | + | |
63 | + o We need to get as close as we can to an atomic copy of the data. | |
64 | + Inconsistencies in the image will result in inconsistent memory contents at | |
65 | + resume time, and thus in instability of the system and/or file system | |
66 | + corruption. This would appear to imply a maximum image size of one half of | |
67 | + the amount of RAM, but we have a solution... (again, below). | |
68 | + | |
69 | + o In 2.6, we choose to play nicely with the other suspend-to-disk | |
70 | + implementations. | |
71 | + | |
72 | +3. Detailed description of internals. | |
73 | + | |
74 | + a. Quiescing activity. | |
75 | + | |
76 | + Safely quiescing the system is achieved using three separate but related | |
77 | + aspects. | |
78 | + | |
79 | + First, we note that the vast majority of processes don't need to run during | |
80 | + suspend. They can be 'frozen'. We therefore implement a refrigerator | |
81 | + routine, which processes enter and in which they remain until the cycle is | |
82 | + complete. Processes enter the refrigerator via try_to_freeze() invocations | |
83 | + at appropriate places. A process cannot be frozen in any old place. It | |
84 | + must not be holding locks that will be needed for writing the image or | |
85 | + freezing other processes. For this reason, userspace processes generally | |
86 | + enter the refrigerator via the signal handling code, and kernel threads at | |
87 | + the place in their event loops where they drop locks and yield to other | |
88 | + processes or sleep. | |
89 | + | |
90 | + The task of freezing processes is complicated by the fact that there can be | |
91 | + interdependencies between processes. Freezing process A before process B may | |
92 | + mean that process B cannot be frozen, because it stops at waiting for | |
93 | + process A rather than in the refrigerator. This issue is seen where | |
94 | + userspace waits on freezeable kernel threads or fuse filesystem threads. To | |
95 | + address this issue, we implement the following algorithm for quiescing | |
96 | + activity: | |
97 | + | |
98 | + - Freeze filesystems (including fuse - userspace programs starting | |
99 | + new requests are immediately frozen; programs already running | |
100 | + requests complete their work before being frozen in the next | |
101 | + step) | |
102 | + - Freeze userspace | |
103 | + - Thaw filesystems (this is safe now that userspace is frozen and no | |
104 | + fuse requests are outstanding). | |
105 | + - Invoke sys_sync (noop on fuse). | |
106 | + - Freeze filesystems | |
107 | + - Freeze kernel threads | |
108 | + | |
109 | + If we need to free memory, we thaw kernel threads and filesystems, but not | |
110 | + userspace. We can then free caches without worrying about deadlocks due to | |
111 | + swap files being on frozen filesystems or such like. | |
112 | + | |
113 | + b. Ensure enough memory & storage are available. | |
114 | + | |
115 | + We have a number of constraints to meet in order to be able to successfully | |
116 | + suspend and resume. | |
117 | + | |
118 | + First, the image will be written in two parts, described below. One of these | |
119 | + parts needs to have an atomic copy made, which of course implies a maximum | |
120 | + size of one half of the amount of system memory. The other part ('pageset') | |
121 | + is not atomically copied, and can therefore be as large or small as desired. | |
122 | + | |
123 | + Second, we have constraints on the amount of storage available. In these | |
124 | + calculations, we may also consider any compression that will be done. The | |
125 | + cryptoapi module allows the user to configure an expected compression ratio. | |
126 | + | |
127 | + Third, the user can specify an arbitrary limit on the image size, in | |
128 | + megabytes. This limit is treated as a soft limit, so that we don't fail the | |
129 | + attempt to suspend if we cannot meet this constraint. | |
130 | + | |
131 | + c. Allocate the required memory and storage space. | |
132 | + | |
133 | + Having done the initial freeze, we determine whether the above constraints | |
134 | + are met, and seek to allocate the metadata for the image. If the constraints | |
135 | + are not met, or we fail to allocate the required space for the metadata, we | |
136 | + seek to free the amount of memory that we calculate is needed and try again. | |
137 | + We allow up to four iterations of this loop before aborting the cycle. If we | |
138 | + do fail, it should only be because of a bug in TuxOnIce's calculations. | |
139 | + | |
140 | + These steps are merged together in the prepare_image function, found in | |
141 | + prepare_image.c. The functions are merged because of the cyclical nature | |
142 | + of the problem of calculating how much memory and storage is needed. Since | |
143 | + the data structures containing the information about the image must | |
144 | + themselves take memory and use storage, the amount of memory and storage | |
145 | + required changes as we prepare the image. Since the changes are not large, | |
146 | + only one or two iterations will be required to achieve a solution. | |
147 | + | |
148 | + The recursive nature of the algorithm is miminised by keeping user space | |
149 | + frozen while preparing the image, and by the fact that our records of which | |
150 | + pages are to be saved and which pageset they are saved in use bitmaps (so | |
151 | + that changes in number or fragmentation of the pages to be saved don't | |
152 | + feedback via changes in the amount of memory needed for metadata). The | |
153 | + recursiveness is thus limited to any extra slab pages allocated to store the | |
154 | + extents that record storage used, and the effects of seeking to free memory. | |
155 | + | |
156 | + d. Write the image. | |
157 | + | |
158 | + We previously mentioned the need to create an atomic copy of the data, and | |
159 | + the half-of-memory limitation that is implied in this. This limitation is | |
160 | + circumvented by dividing the memory to be saved into two parts, called | |
161 | + pagesets. | |
162 | + | |
e999739a | 163 | + Pageset2 contains most of the page cache - the pages on the active and |
164 | + inactive LRU lists that aren't needed or modified while TuxOnIce is | |
165 | + running, so they can be safely written without an atomic copy. They are | |
166 | + therefore saved first and reloaded last. While saving these pages, | |
167 | + TuxOnIce carefully ensures that the work of writing the pages doesn't make | |
168 | + the image inconsistent. With the support for Kernel (Video) Mode Setting | |
169 | + going into the kernel at the time of writing, we need to check for pages | |
170 | + on the LRU that are used by KMS, and exclude them from pageset2. They are | |
171 | + atomically copied as part of pageset 1. | |
2380c486 JR |
172 | + |
173 | + Once pageset2 has been saved, we prepare to do the atomic copy of remaining | |
174 | + memory. As part of the preparation, we power down drivers, thereby providing | |
175 | + them with the opportunity to have their state recorded in the image. The | |
176 | + amount of memory allocated by drivers for this is usually negligible, but if | |
177 | + DRI is in use, video drivers may require significants amounts. Ideally we | |
178 | + would be able to query drivers while preparing the image as to the amount of | |
179 | + memory they will need. Unfortunately no such mechanism exists at the time of | |
180 | + writing. For this reason, TuxOnIce allows the user to set an | |
181 | + 'extra_pages_allowance', which is used to seek to ensure sufficient memory | |
182 | + is available for drivers at this point. TuxOnIce also lets the user set this | |
183 | + value to 0. In this case, a test driver suspend is done while preparing the | |
e999739a | 184 | + image, and the difference (plus a margin) used instead. TuxOnIce will also |
185 | + automatically restart the hibernation process (twice at most) if it finds | |
186 | + that the extra pages allowance is not sufficient. It will then use what was | |
187 | + actually needed (plus a margin, again). Failure to hibernate should thus | |
188 | + be an extremely rare occurence. | |
2380c486 JR |
189 | + |
190 | + Having suspended the drivers, we save the CPU context before making an | |
191 | + atomic copy of pageset1, resuming the drivers and saving the atomic copy. | |
192 | + After saving the two pagesets, we just need to save our metadata before | |
193 | + powering down. | |
194 | + | |
195 | + As we mentioned earlier, the contents of pageset2 pages aren't needed once | |
196 | + they've been saved. We therefore use them as the destination of our atomic | |
197 | + copy. In the unlikely event that pageset1 is larger, extra pages are | |
198 | + allocated while the image is being prepared. This is normally only a real | |
199 | + possibility when the system has just been booted and the page cache is | |
200 | + small. | |
201 | + | |
202 | + This is where we need to be careful about syncing, however. Pageset2 will | |
203 | + probably contain filesystem meta data. If this is overwritten with pageset1 | |
204 | + and then a sync occurs, the filesystem will be corrupted - at least until | |
205 | + resume time and another sync of the restored data. Since there is a | |
206 | + possibility that the user might not resume or (may it never be!) that | |
e999739a | 207 | + TuxOnIce might oops, we do our utmost to avoid syncing filesystems after |
2380c486 JR |
208 | + copying pageset1. |
209 | + | |
210 | + e. Power down. | |
211 | + | |
212 | + Powering down uses standard kernel routines. TuxOnIce supports powering down | |
213 | + using the ACPI S3, S4 and S5 methods or the kernel's non-ACPI power-off. | |
214 | + Supporting suspend to ram (S3) as a power off option might sound strange, | |
215 | + but it allows the user to quickly get their system up and running again if | |
216 | + the battery doesn't run out (we just need to re-read the overwritten pages) | |
217 | + and if the battery does run out (or the user removes power), they can still | |
218 | + resume. | |
219 | + | |
220 | +4. Data Structures. | |
221 | + | |
222 | + TuxOnIce uses three main structures to store its metadata and configuration | |
223 | + information: | |
224 | + | |
225 | + a) Pageflags bitmaps. | |
226 | + | |
227 | + TuxOnIce records which pages will be in pageset1, pageset2, the destination | |
228 | + of the atomic copy and the source of the atomically restored image using | |
e999739a | 229 | + bitmaps. The code used is that written for swsusp, with small improvements |
230 | + to match TuxOnIce's requirements. | |
2380c486 JR |
231 | + |
232 | + The pageset1 bitmap is thus easily stored in the image header for use at | |
233 | + resume time. | |
234 | + | |
235 | + As mentioned above, using bitmaps also means that the amount of memory and | |
236 | + storage required for recording the above information is constant. This | |
237 | + greatly simplifies the work of preparing the image. In earlier versions of | |
238 | + TuxOnIce, extents were used to record which pages would be stored. In that | |
239 | + case, however, eating memory could result in greater fragmentation of the | |
240 | + lists of pages, which in turn required more memory to store the extents and | |
241 | + more storage in the image header. These could in turn require further | |
242 | + freeing of memory, and another iteration. All of this complexity is removed | |
243 | + by having bitmaps. | |
244 | + | |
245 | + Bitmaps also make a lot of sense because TuxOnIce only ever iterates | |
246 | + through the lists. There is therefore no cost to not being able to find the | |
247 | + nth page in order 0 time. We only need to worry about the cost of finding | |
248 | + the n+1th page, given the location of the nth page. Bitwise optimisations | |
249 | + help here. | |
250 | + | |
2380c486 JR |
251 | + b) Extents for block data. |
252 | + | |
253 | + TuxOnIce supports writing the image to multiple block devices. In the case | |
254 | + of swap, multiple partitions and/or files may be in use, and we happily use | |
e999739a | 255 | + them all (with the exception of compcache pages, which we allocate but do |
256 | + not use). This use of multiple block devices is accomplished as follows: | |
2380c486 JR |
257 | + |
258 | + Whatever the actual source of the allocated storage, the destination of the | |
259 | + image can be viewed in terms of one or more block devices, and on each | |
260 | + device, a list of sectors. To simplify matters, we only use contiguous, | |
261 | + PAGE_SIZE aligned sectors, like the swap code does. | |
262 | + | |
263 | + Since sector numbers on each bdev may well not start at 0, it makes much | |
264 | + more sense to use extents here. Contiguous ranges of pages can thus be | |
265 | + represented in the extents by contiguous values. | |
266 | + | |
267 | + Variations in block size are taken account of in transforming this data | |
268 | + into the parameters for bio submission. | |
269 | + | |
270 | + We can thus implement a layer of abstraction wherein the core of TuxOnIce | |
271 | + doesn't have to worry about which device we're currently writing to or | |
272 | + where in the device we are. It simply requests that the next page in the | |
273 | + pageset or header be written, leaving the details to this lower layer. | |
274 | + The lower layer remembers where in the sequence of devices and blocks each | |
275 | + pageset starts. The header always starts at the beginning of the allocated | |
276 | + storage. | |
277 | + | |
278 | + So extents are: | |
279 | + | |
280 | + struct extent { | |
281 | + unsigned long minimum, maximum; | |
282 | + struct extent *next; | |
283 | + } | |
284 | + | |
285 | + These are combined into chains of extents for a device: | |
286 | + | |
287 | + struct extent_chain { | |
288 | + int size; /* size of the extent ie sum (max-min+1) */ | |
289 | + int allocs, frees; | |
290 | + char *name; | |
291 | + struct extent *first, *last_touched; | |
292 | + }; | |
293 | + | |
294 | + For each bdev, we need to store a little more info: | |
295 | + | |
296 | + struct suspend_bdev_info { | |
297 | + struct block_device *bdev; | |
298 | + dev_t dev_t; | |
299 | + int bmap_shift; | |
300 | + int blocks_per_page; | |
301 | + }; | |
302 | + | |
303 | + The dev_t is used to identify the device in the stored image. As a result, | |
304 | + we expect devices at resume time to have the same major and minor numbers | |
305 | + as they had while suspending. This is primarily a concern where the user | |
306 | + utilises LVM for storage, as they will need to dmsetup their partitions in | |
307 | + such a way as to maintain this consistency at resume time. | |
308 | + | |
e999739a | 309 | + bmap_shift and blocks_per_page apply the effects of variations in blocks |
310 | + per page settings for the filesystem and underlying bdev. For most | |
2380c486 JR |
311 | + filesystems, these are the same, but for xfs, they can have independant |
312 | + values. | |
313 | + | |
314 | + Combining these two structures together, we have everything we need to | |
315 | + record what devices and what blocks on each device are being used to | |
316 | + store the image, and to submit i/o using bio_submit. | |
317 | + | |
318 | + The last elements in the picture are a means of recording how the storage | |
319 | + is being used. | |
320 | + | |
321 | + We do this first and foremost by implementing a layer of abstraction on | |
322 | + top of the devices and extent chains which allows us to view however many | |
323 | + devices there might be as one long storage tape, with a single 'head' that | |
324 | + tracks a 'current position' on the tape: | |
325 | + | |
326 | + struct extent_iterate_state { | |
327 | + struct extent_chain *chains; | |
328 | + int num_chains; | |
329 | + int current_chain; | |
330 | + struct extent *current_extent; | |
331 | + unsigned long current_offset; | |
332 | + }; | |
333 | + | |
334 | + That is, *chains points to an array of size num_chains of extent chains. | |
335 | + For the filewriter, this is always a single chain. For the swapwriter, the | |
336 | + array is of size MAX_SWAPFILES. | |
337 | + | |
338 | + current_chain, current_extent and current_offset thus point to the current | |
339 | + index in the chains array (and into a matching array of struct | |
340 | + suspend_bdev_info), the current extent in that chain (to optimise access), | |
341 | + and the current value in the offset. | |
342 | + | |
343 | + The image is divided into three parts: | |
344 | + - The header | |
345 | + - Pageset 1 | |
346 | + - Pageset 2 | |
347 | + | |
348 | + The header always starts at the first device and first block. We know its | |
349 | + size before we begin to save the image because we carefully account for | |
350 | + everything that will be stored in it. | |
351 | + | |
352 | + The second pageset (LRU) is stored first. It begins on the next page after | |
353 | + the end of the header. | |
354 | + | |
355 | + The first pageset is stored second. It's start location is only known once | |
356 | + pageset2 has been saved, since pageset2 may be compressed as it is written. | |
357 | + This location is thus recorded at the end of saving pageset2. It is page | |
358 | + aligned also. | |
359 | + | |
360 | + Since this information is needed at resume time, and the location of extents | |
361 | + in memory will differ at resume time, this needs to be stored in a portable | |
362 | + way: | |
363 | + | |
364 | + struct extent_iterate_saved_state { | |
365 | + int chain_num; | |
366 | + int extent_num; | |
367 | + unsigned long offset; | |
368 | + }; | |
369 | + | |
370 | + We can thus implement a layer of abstraction wherein the core of TuxOnIce | |
371 | + doesn't have to worry about which device we're currently writing to or | |
372 | + where in the device we are. It simply requests that the next page in the | |
373 | + pageset or header be written, leaving the details to this layer, and | |
374 | + invokes the routines to remember and restore the position, without having | |
375 | + to worry about the details of how the data is arranged on disk or such like. | |
376 | + | |
377 | + c) Modules | |
378 | + | |
379 | + One aim in designing TuxOnIce was to make it flexible. We wanted to allow | |
380 | + for the implementation of different methods of transforming a page to be | |
381 | + written to disk and different methods of getting the pages stored. | |
382 | + | |
383 | + In early versions (the betas and perhaps Suspend1), compression support was | |
384 | + inlined in the image writing code, and the data structures and code for | |
385 | + managing swap were intertwined with the rest of the code. A number of people | |
386 | + had expressed interest in implementing image encryption, and alternative | |
387 | + methods of storing the image. | |
388 | + | |
389 | + In order to achieve this, TuxOnIce was given a modular design. | |
390 | + | |
391 | + A module is a single file which encapsulates the functionality needed | |
392 | + to transform a pageset of data (encryption or compression, for example), | |
393 | + or to write the pageset to a device. The former type of module is called | |
394 | + a 'page-transformer', the later a 'writer'. | |
395 | + | |
396 | + Modules are linked together in pipeline fashion. There may be zero or more | |
397 | + page transformers in a pipeline, and there is always exactly one writer. | |
398 | + The pipeline follows this pattern: | |
399 | + | |
400 | + --------------------------------- | |
401 | + | TuxOnIce Core | | |
402 | + --------------------------------- | |
403 | + | | |
404 | + | | |
405 | + --------------------------------- | |
406 | + | Page transformer 1 | | |
407 | + --------------------------------- | |
408 | + | | |
409 | + | | |
410 | + --------------------------------- | |
411 | + | Page transformer 2 | | |
412 | + --------------------------------- | |
413 | + | | |
414 | + | | |
415 | + --------------------------------- | |
416 | + | Writer | | |
417 | + --------------------------------- | |
418 | + | |
419 | + During the writing of an image, the core code feeds pages one at a time | |
420 | + to the first module. This module performs whatever transformations it | |
421 | + implements on the incoming data, completely consuming the incoming data and | |
e999739a | 422 | + feeding output in a similar manner to the next module. |
2380c486 JR |
423 | + |
424 | + All routines are SMP safe, and the final result of the transformations is | |
425 | + written with an index (provided by the core) and size of the output by the | |
426 | + writer. As a result, we can have multithreaded I/O without needing to | |
427 | + worry about the sequence in which pages are written (or read). | |
428 | + | |
429 | + During reading, the pipeline works in the reverse direction. The core code | |
430 | + calls the first module with the address of a buffer which should be filled. | |
431 | + (Note that the buffer size is always PAGE_SIZE at this time). This module | |
432 | + will in turn request data from the next module and so on down until the | |
433 | + writer is made to read from the stored image. | |
434 | + | |
435 | + Part of definition of the structure of a module thus looks like this: | |
436 | + | |
437 | + int (*rw_init) (int rw, int stream_number); | |
438 | + int (*rw_cleanup) (int rw); | |
439 | + int (*write_chunk) (struct page *buffer_page); | |
440 | + int (*read_chunk) (struct page *buffer_page, int sync); | |
441 | + | |
442 | + It should be noted that the _cleanup routine may be called before the | |
443 | + full stream of data has been read or written. While writing the image, | |
444 | + the user may (depending upon settings) choose to abort suspending, and | |
445 | + if we are in the midst of writing the last portion of the image, a portion | |
446 | + of the second pageset may be reread. This may also happen if an error | |
447 | + occurs and we seek to abort the process of writing the image. | |
448 | + | |
449 | + The modular design is also useful in a number of other ways. It provides | |
450 | + a means where by we can add support for: | |
451 | + | |
452 | + - providing overall initialisation and cleanup routines; | |
453 | + - serialising configuration information in the image header; | |
454 | + - providing debugging information to the user; | |
455 | + - determining memory and image storage requirements; | |
456 | + - dis/enabling components at run-time; | |
457 | + - configuring the module (see below); | |
458 | + | |
459 | + ...and routines for writers specific to their work: | |
460 | + - Parsing a resume= location; | |
461 | + - Determining whether an image exists; | |
462 | + - Marking a resume as having been attempted; | |
463 | + - Invalidating an image; | |
464 | + | |
465 | + Since some parts of the core - the user interface and storage manager | |
466 | + support - have use for some of these functions, they are registered as | |
467 | + 'miscellaneous' modules as well. | |
468 | + | |
469 | + d) Sysfs data structures. | |
470 | + | |
471 | + This brings us naturally to support for configuring TuxOnIce. We desired to | |
472 | + provide a way to make TuxOnIce as flexible and configurable as possible. | |
e999739a | 473 | + The user shouldn't have to reboot just because they want to now hibernate to |
2380c486 JR |
474 | + a file instead of a partition, for example. |
475 | + | |
476 | + To accomplish this, TuxOnIce implements a very generic means whereby the | |
477 | + core and modules can register new sysfs entries. All TuxOnIce entries use | |
e999739a | 478 | + a single _store and _show routine, both of which are found in |
479 | + tuxonice_sysfs.c in the kernel/power directory. These routines handle the | |
480 | + most common operations - getting and setting the values of bits, integers, | |
481 | + longs, unsigned longs and strings in one place, and allow overrides for | |
482 | + customised get and set options as well as side-effect routines for all | |
483 | + reads and writes. | |
2380c486 JR |
484 | + |
485 | + When combined with some simple macros, a new sysfs entry can then be defined | |
486 | + in just a couple of lines: | |
487 | + | |
e999739a | 488 | + SYSFS_INT("progress_granularity", SYSFS_RW, &progress_granularity, 1, |
489 | + 2048, 0, NULL), | |
2380c486 JR |
490 | + |
491 | + This defines a sysfs entry named "progress_granularity" which is rw and | |
492 | + allows the user to access an integer stored at &progress_granularity, giving | |
493 | + it a value between 1 and 2048 inclusive. | |
494 | + | |
495 | + Sysfs entries are registered under /sys/power/tuxonice, and entries for | |
496 | + modules are located in a subdirectory named after the module. | |
497 | + | |
498 | diff --git a/Documentation/power/tuxonice.txt b/Documentation/power/tuxonice.txt | |
499 | new file mode 100644 | |
92bca44c | 500 | index 0000000..3bf0575 |
2380c486 JR |
501 | --- /dev/null |
502 | +++ b/Documentation/power/tuxonice.txt | |
9474138d | 503 | @@ -0,0 +1,948 @@ |
2380c486 JR |
504 | + --- TuxOnIce, version 3.0 --- |
505 | + | |
506 | +1. What is it? | |
507 | +2. Why would you want it? | |
508 | +3. What do you need to use it? | |
509 | +4. Why not just use the version already in the kernel? | |
510 | +5. How do you use it? | |
511 | +6. What do all those entries in /sys/power/tuxonice do? | |
512 | +7. How do you get support? | |
513 | +8. I think I've found a bug. What should I do? | |
514 | +9. When will XXX be supported? | |
515 | +10 How does it work? | |
516 | +11. Who wrote TuxOnIce? | |
517 | + | |
518 | +1. What is it? | |
519 | + | |
520 | + Imagine you're sitting at your computer, working away. For some reason, you | |
521 | + need to turn off your computer for a while - perhaps it's time to go home | |
522 | + for the day. When you come back to your computer next, you're going to want | |
523 | + to carry on where you left off. Now imagine that you could push a button and | |
524 | + have your computer store the contents of its memory to disk and power down. | |
525 | + Then, when you next start up your computer, it loads that image back into | |
526 | + memory and you can carry on from where you were, just as if you'd never | |
527 | + turned the computer off. You have far less time to start up, no reopening of | |
528 | + applications or finding what directory you put that file in yesterday. | |
529 | + That's what TuxOnIce does. | |
530 | + | |
531 | + TuxOnIce has a long heritage. It began life as work by Gabor Kuti, who, | |
532 | + with some help from Pavel Machek, got an early version going in 1999. The | |
533 | + project was then taken over by Florent Chabaud while still in alpha version | |
534 | + numbers. Nigel Cunningham came on the scene when Florent was unable to | |
535 | + continue, moving the project into betas, then 1.0, 2.0 and so on up to | |
536 | + the present series. During the 2.0 series, the name was contracted to | |
537 | + Suspend2 and the website suspend2.net created. Beginning around July 2007, | |
538 | + a transition to calling the software TuxOnIce was made, to seek to help | |
539 | + make it clear that TuxOnIce is more concerned with hibernation than suspend | |
540 | + to ram. | |
541 | + | |
542 | + Pavel Machek's swsusp code, which was merged around 2.5.17 retains the | |
543 | + original name, and was essentially a fork of the beta code until Rafael | |
544 | + Wysocki came on the scene in 2005 and began to improve it further. | |
545 | + | |
546 | +2. Why would you want it? | |
547 | + | |
548 | + Why wouldn't you want it? | |
549 | + | |
550 | + Being able to save the state of your system and quickly restore it improves | |
551 | + your productivity - you get a useful system in far less time than through | |
552 | + the normal boot process. You also get to be completely 'green', using zero | |
553 | + power, or as close to that as possible (the computer may still provide | |
554 | + minimal power to some devices, so they can initiate a power on, but that | |
555 | + will be the same amount of power as would be used if you told the computer | |
556 | + to shutdown. | |
557 | + | |
558 | +3. What do you need to use it? | |
559 | + | |
560 | + a. Kernel Support. | |
561 | + | |
562 | + i) The TuxOnIce patch. | |
563 | + | |
564 | + TuxOnIce is part of the Linux Kernel. This version is not part of Linus's | |
565 | + 2.6 tree at the moment, so you will need to download the kernel source and | |
566 | + apply the latest patch. Having done that, enable the appropriate options in | |
567 | + make [menu|x]config (under Power Management Options - look for "Enhanced | |
568 | + Hibernation"), compile and install your kernel. TuxOnIce works with SMP, | |
569 | + Highmem, preemption, fuse filesystems, x86-32, PPC and x86_64. | |
570 | + | |
571 | + TuxOnIce patches are available from http://tuxonice.net. | |
572 | + | |
573 | + ii) Compression support. | |
574 | + | |
575 | + Compression support is implemented via the cryptoapi. You will therefore want | |
576 | + to select any Cryptoapi transforms that you want to use on your image from | |
9474138d AM |
577 | + the Cryptoapi menu while configuring your kernel. We recommend the use of the |
578 | + LZO compression method - it is very fast and still achieves good compression. | |
2380c486 | 579 | + |
9474138d | 580 | + You can also tell TuxOnIce to write its image to an encrypted and/or |
2380c486 JR |
581 | + compressed filesystem/swap partition. In that case, you don't need to do |
582 | + anything special for TuxOnIce when it comes to kernel configuration. | |
583 | + | |
584 | + iii) Configuring other options. | |
585 | + | |
586 | + While you're configuring your kernel, try to configure as much as possible | |
587 | + to build as modules. We recommend this because there are a number of drivers | |
588 | + that are still in the process of implementing proper power management | |
589 | + support. In those cases, the best way to work around their current lack is | |
590 | + to build them as modules and remove the modules while hibernating. You might | |
591 | + also bug the driver authors to get their support up to speed, or even help! | |
592 | + | |
593 | + b. Storage. | |
594 | + | |
595 | + i) Swap. | |
596 | + | |
597 | + TuxOnIce can store the hibernation image in your swap partition, a swap file or | |
598 | + a combination thereof. Whichever combination you choose, you will probably | |
599 | + want to create enough swap space to store the largest image you could have, | |
600 | + plus the space you'd normally use for swap. A good rule of thumb would be | |
601 | + to calculate the amount of swap you'd want without using TuxOnIce, and then | |
602 | + add the amount of memory you have. This swapspace can be arranged in any way | |
603 | + you'd like. It can be in one partition or file, or spread over a number. The | |
604 | + only requirement is that they be active when you start a hibernation cycle. | |
605 | + | |
606 | + There is one exception to this requirement. TuxOnIce has the ability to turn | |
607 | + on one swap file or partition at the start of hibernating and turn it back off | |
608 | + at the end. If you want to ensure you have enough memory to store a image | |
609 | + when your memory is fully used, you might want to make one swap partition or | |
610 | + file for 'normal' use, and another for TuxOnIce to activate & deactivate | |
611 | + automatically. (Further details below). | |
612 | + | |
613 | + ii) Normal files. | |
614 | + | |
615 | + TuxOnIce includes a 'file allocator'. The file allocator can store your | |
616 | + image in a simple file. Since Linux has the concept of everything being a | |
617 | + file, this is more powerful than it initially sounds. If, for example, you | |
618 | + were to set up a network block device file, you could hibernate to a network | |
619 | + server. This has been tested and works to a point, but nbd itself isn't | |
620 | + stateless enough for our purposes. | |
621 | + | |
622 | + Take extra care when setting up the file allocator. If you just type | |
623 | + commands without thinking and then try to hibernate, you could cause | |
624 | + irreversible corruption on your filesystems! Make sure you have backups. | |
625 | + | |
626 | + Most people will only want to hibernate to a local file. To achieve that, do | |
627 | + something along the lines of: | |
628 | + | |
629 | + echo "TuxOnIce" > /hibernation-file | |
92bca44c | 630 | + dd if=/dev/zero bs=1M count=512 >> /hibernation-file |
2380c486 JR |
631 | + |
632 | + This will create a 512MB file called /hibernation-file. To get TuxOnIce to use | |
633 | + it: | |
634 | + | |
635 | + echo /hibernation-file > /sys/power/tuxonice/file/target | |
636 | + | |
637 | + Then | |
638 | + | |
639 | + cat /sys/power/tuxonice/resume | |
640 | + | |
641 | + Put the results of this into your bootloader's configuration (see also step | |
642 | + C, below): | |
643 | + | |
644 | + ---EXAMPLE-ONLY-DON'T-COPY-AND-PASTE--- | |
645 | + # cat /sys/power/tuxonice/resume | |
646 | + file:/dev/hda2:0x1e001 | |
647 | + | |
648 | + In this example, we would edit the append= line of our lilo.conf|menu.lst | |
649 | + so that it included: | |
650 | + | |
651 | + resume=file:/dev/hda2:0x1e001 | |
652 | + ---EXAMPLE-ONLY-DON'T-COPY-AND-PASTE--- | |
653 | + | |
654 | + For those who are thinking 'Could I make the file sparse?', the answer is | |
655 | + 'No!'. At the moment, there is no way for TuxOnIce to fill in the holes in | |
656 | + a sparse file while hibernating. In the longer term (post merge!), I'd like | |
657 | + to change things so that the file could be dynamically resized and have | |
658 | + holes filled as needed. Right now, however, that's not possible and not a | |
659 | + priority. | |
660 | + | |
661 | + c. Bootloader configuration. | |
662 | + | |
663 | + Using TuxOnIce also requires that you add an extra parameter to | |
664 | + your lilo.conf or equivalent. Here's an example for a swap partition: | |
665 | + | |
666 | + append="resume=swap:/dev/hda1" | |
667 | + | |
668 | + This would tell TuxOnIce that /dev/hda1 is a swap partition you | |
669 | + have. TuxOnIce will use the swap signature of this partition as a | |
670 | + pointer to your data when you hibernate. This means that (in this example) | |
671 | + /dev/hda1 doesn't need to be _the_ swap partition where all of your data | |
672 | + is actually stored. It just needs to be a swap partition that has a | |
673 | + valid signature. | |
674 | + | |
675 | + You don't need to have a swap partition for this purpose. TuxOnIce | |
676 | + can also use a swap file, but usage is a little more complex. Having made | |
677 | + your swap file, turn it on and do | |
678 | + | |
679 | + cat /sys/power/tuxonice/swap/headerlocations | |
680 | + | |
681 | + (this assumes you've already compiled your kernel with TuxOnIce | |
682 | + support and booted it). The results of the cat command will tell you | |
683 | + what you need to put in lilo.conf: | |
684 | + | |
685 | + For swap partitions like /dev/hda1, simply use resume=/dev/hda1. | |
686 | + For swapfile `swapfile`, use resume=swap:/dev/hda2:0x242d. | |
687 | + | |
688 | + If the swapfile changes for any reason (it is moved to a different | |
689 | + location, it is deleted and recreated, or the filesystem is | |
690 | + defragmented) then you will have to check | |
691 | + /sys/power/tuxonice/swap/headerlocations for a new resume_block value. | |
692 | + | |
693 | + Once you've compiled and installed the kernel and adjusted your bootloader | |
694 | + configuration, you should only need to reboot for the most basic part | |
695 | + of TuxOnIce to be ready. | |
696 | + | |
697 | + If you only compile in the swap allocator, or only compile in the file | |
698 | + allocator, you don't need to add the "swap:" part of the resume= | |
699 | + parameters above. resume=/dev/hda2:0x242d will work just as well. If you | |
700 | + have compiled both and your storage is on swap, you can also use this | |
701 | + format (the swap allocator is the default allocator). | |
702 | + | |
703 | + When compiling your kernel, one of the options in the 'Power Management | |
704 | + Support' menu, just above the 'Enhanced Hibernation (TuxOnIce)' entry is | |
705 | + called 'Default resume partition'. This can be used to set a default value | |
706 | + for the resume= parameter. | |
707 | + | |
708 | + d. The hibernate script. | |
709 | + | |
710 | + Since the driver model in 2.6 kernels is still being developed, you may need | |
711 | + to do more than just configure TuxOnIce. Users of TuxOnIce usually start the | |
712 | + process via a script which prepares for the hibernation cycle, tells the | |
713 | + kernel to do its stuff and then restore things afterwards. This script might | |
714 | + involve: | |
715 | + | |
716 | + - Switching to a text console and back if X doesn't like the video card | |
717 | + status on resume. | |
718 | + - Un/reloading drivers that don't play well with hibernation. | |
719 | + | |
720 | + Note that you might not be able to unload some drivers if there are | |
721 | + processes using them. You might have to kill off processes that hold | |
722 | + devices open. Hint: if your X server accesses an USB mouse, doing a | |
723 | + 'chvt' to a text console releases the device and you can unload the | |
724 | + module. | |
725 | + | |
726 | + Check out the latest script (available on tuxonice.net). | |
727 | + | |
728 | + e. The userspace user interface. | |
729 | + | |
730 | + TuxOnIce has very limited support for displaying status if you only apply | |
731 | + the kernel patch - it can printk messages, but that is all. In addition, | |
732 | + some of the functions mentioned in this document (such as cancelling a cycle | |
733 | + or performing interactive debugging) are unavailable. To utilise these | |
734 | + functions, or simply get a nice display, you need the 'userui' component. | |
735 | + Userui comes in three flavours, usplash, fbsplash and text. Text should | |
736 | + work on any console. Usplash and fbsplash require the appropriate | |
737 | + (distro specific?) support. | |
738 | + | |
739 | + To utilise a userui, TuxOnIce just needs to be told where to find the | |
740 | + userspace binary: | |
741 | + | |
742 | + echo "/usr/local/sbin/tuxoniceui_fbsplash" > /sys/power/tuxonice/user_interface/program | |
743 | + | |
744 | + The hibernate script can do this for you, and a default value for this | |
745 | + setting can be configured when compiling the kernel. This path is also | |
746 | + stored in the image header, so if you have an initrd or initramfs, you can | |
747 | + use the userui during the first part of resuming (prior to the atomic | |
748 | + restore) by putting the binary in the same path in your initrd/ramfs. | |
749 | + Alternatively, you can put it in a different location and do an echo | |
750 | + similar to the above prior to the echo > do_resume. The value saved in the | |
751 | + image header will then be ignored. | |
752 | + | |
753 | +4. Why not just use the version already in the kernel? | |
754 | + | |
755 | + The version in the vanilla kernel has a number of drawbacks. The most | |
756 | + serious of these are: | |
757 | + - it has a maximum image size of 1/2 total memory; | |
758 | + - it doesn't allocate storage until after it has snapshotted memory. | |
759 | + This means that you can't be sure hibernating will work until you | |
760 | + see it start to write the image; | |
761 | + - it does not allow you to press escape to cancel a cycle; | |
762 | + - it does not allow you to press escape to cancel resuming; | |
763 | + - it does not allow you to automatically swapon a file when | |
764 | + starting a cycle; | |
765 | + - it does not allow you to use multiple swap partitions or files; | |
766 | + - it does not allow you to use ordinary files; | |
767 | + - it just invalidates an image and continues to boot if you | |
768 | + accidentally boot the wrong kernel after hibernating; | |
769 | + - it doesn't support any sort of nice display while hibernating; | |
770 | + - it is moving toward requiring that you have an initrd/initramfs | |
771 | + to ever have a hope of resuming (uswsusp). While uswsusp will | |
772 | + address some of the concerns above, it won't address all of them, | |
773 | + and will be more complicated to get set up; | |
774 | + - it doesn't have support for suspend-to-both (write a hibernation | |
775 | + image, then suspend to ram; I think this is known as ReadySafe | |
776 | + under M$). | |
777 | + | |
778 | +5. How do you use it? | |
779 | + | |
780 | + A hibernation cycle can be started directly by doing: | |
781 | + | |
782 | + echo > /sys/power/tuxonice/do_hibernate | |
783 | + | |
784 | + In practice, though, you'll probably want to use the hibernate script | |
785 | + to unload modules, configure the kernel the way you like it and so on. | |
786 | + In that case, you'd do (as root): | |
787 | + | |
788 | + hibernate | |
789 | + | |
790 | + See the hibernate script's man page for more details on the options it | |
791 | + takes. | |
792 | + | |
793 | + If you're using the text or splash user interface modules, one feature of | |
794 | + TuxOnIce that you might find useful is that you can press Escape at any time | |
795 | + during hibernating, and the process will be aborted. | |
796 | + | |
797 | + Due to the way hibernation works, this means you'll have your system back and | |
798 | + perfectly usable almost instantly. The only exception is when it's at the | |
799 | + very end of writing the image. Then it will need to reload a small (usually | |
800 | + 4-50MBs, depending upon the image characteristics) portion first. | |
801 | + | |
802 | + Likewise, when resuming, you can press escape and resuming will be aborted. | |
803 | + The computer will then powerdown again according to settings at that time for | |
804 | + the powerdown method or rebooting. | |
805 | + | |
806 | + You can change the settings for powering down while the image is being | |
807 | + written by pressing 'R' to toggle rebooting and 'O' to toggle between | |
808 | + suspending to ram and powering down completely). | |
809 | + | |
810 | + If you run into problems with resuming, adding the "noresume" option to | |
811 | + the kernel command line will let you skip the resume step and recover your | |
812 | + system. This option shouldn't normally be needed, because TuxOnIce modifies | |
813 | + the image header prior to the atomic restore, and will thus prompt you | |
814 | + if it detects that you've tried to resume an image before (this flag is | |
815 | + removed if you press Escape to cancel a resume, so you won't be prompted | |
816 | + then). | |
817 | + | |
818 | + Recent kernels (2.6.24 onwards) add support for resuming from a different | |
819 | + kernel to the one that was hibernated (thanks to Rafael for his work on | |
820 | + this - I've just embraced and enhanced the support for TuxOnIce). This | |
821 | + should further reduce the need for you to use the noresume option. | |
822 | + | |
823 | +6. What do all those entries in /sys/power/tuxonice do? | |
824 | + | |
825 | + /sys/power/tuxonice is the directory which contains files you can use to | |
826 | + tune and configure TuxOnIce to your liking. The exact contents of | |
827 | + the directory will depend upon the version of TuxOnIce you're | |
828 | + running and the options you selected at compile time. In the following | |
829 | + descriptions, names in brackets refer to compile time options. | |
830 | + (Note that they're all dependant upon you having selected CONFIG_TUXONICE | |
831 | + in the first place!). | |
832 | + | |
833 | + Since the values of these settings can open potential security risks, the | |
834 | + writeable ones are accessible only to the root user. You may want to | |
835 | + configure sudo to allow you to invoke your hibernate script as an ordinary | |
836 | + user. | |
837 | + | |
9474138d AM |
838 | + - alloc/failure_test |
839 | + | |
840 | + This debugging option provides a way of testing TuxOnIce's handling of | |
841 | + memory allocation failures. Each allocation type that TuxOnIce makes has | |
842 | + been given a unique number (see the source code). Echo the appropriate | |
843 | + number into this entry, and when TuxOnIce attempts to do that allocation, | |
844 | + it will pretend there was a failure and act accordingly. | |
845 | + | |
846 | + - alloc/find_max_mem_allocated | |
847 | + | |
848 | + This debugging option will cause TuxOnIce to find the maximum amount of | |
849 | + memory it used during a cycle, and report that information in debugging | |
850 | + information at the end of the cycle. | |
851 | + | |
852 | + - alt_resume_param | |
853 | + | |
854 | + Instead of powering down after writing a hibernation image, TuxOnIce | |
855 | + supports resuming from a different image. This entry lets you set the | |
856 | + location of the signature for that image (the resume= value you'd use | |
857 | + for it). Using an alternate image and keep_image mode, you can do things | |
858 | + like using an alternate image to power down an uninterruptible power | |
859 | + supply. | |
860 | + | |
861 | + - block_io/target_outstanding_io | |
862 | + | |
863 | + This value controls the amount of memory that the block I/O code says it | |
864 | + needs when the core code is calculating how much memory is needed for | |
865 | + hibernating and for resuming. It doesn't directly control the amount of | |
866 | + I/O that is submitted at any one time - that depends on the amount of | |
867 | + available memory (we may have more available than we asked for), the | |
868 | + throughput that is being achieved and the ability of the CPU to keep up | |
869 | + with disk throughput (particularly where we're compressing pages). | |
870 | + | |
2380c486 JR |
871 | + - checksum/enabled |
872 | + | |
873 | + Use cryptoapi hashing routines to verify that Pageset2 pages don't change | |
874 | + while we're saving the first part of the image, and to get any pages that | |
875 | + do change resaved in the atomic copy. This should normally not be needed, | |
876 | + but if you're seeing issues, please enable this. If your issues stop you | |
877 | + being able to resume, enable this option, hibernate and cancel the cycle | |
878 | + after the atomic copy is done. If the debugging info shows a non-zero | |
879 | + number of pages resaved, please report this to Nigel. | |
880 | + | |
881 | + - compression/algorithm | |
882 | + | |
883 | + Set the cryptoapi algorithm used for compressing the image. | |
884 | + | |
885 | + - compression/expected_compression | |
886 | + | |
887 | + These values allow you to set an expected compression ratio, which TuxOnice | |
888 | + will use in calculating whether it meets constraints on the image size. If | |
889 | + this expected compression ratio is not attained, the hibernation cycle will | |
890 | + abort, so it is wise to allow some spare. You can see what compression | |
891 | + ratio is achieved in the logs after hibernating. | |
892 | + | |
893 | + - debug_info: | |
894 | + | |
895 | + This file returns information about your configuration that may be helpful | |
896 | + in diagnosing problems with hibernating. | |
897 | + | |
9474138d AM |
898 | + - did_suspend_to_both: |
899 | + | |
900 | + This file can be used when you hibernate with powerdown method 3 (ie suspend | |
901 | + to ram after writing the image). There can be two outcomes in this case. We | |
902 | + can resume from the suspend-to-ram before the battery runs out, or we can run | |
903 | + out of juice and and up resuming like normal. This entry lets you find out, | |
904 | + post resume, which way we went. If the value is 1, we resumed from suspend | |
905 | + to ram. This can be useful when actions need to be run post suspend-to-ram | |
906 | + that don't need to be run if we did the normal resume from power off. | |
907 | + | |
2380c486 JR |
908 | + - do_hibernate: |
909 | + | |
910 | + When anything is written to this file, the kernel side of TuxOnIce will | |
911 | + begin to attempt to write an image to disk and power down. You'll normally | |
912 | + want to run the hibernate script instead, to get modules unloaded first. | |
913 | + | |
914 | + - do_resume: | |
915 | + | |
916 | + When anything is written to this file TuxOnIce will attempt to read and | |
917 | + restore an image. If there is no image, it will return almost immediately. | |
918 | + If an image exists, the echo > will never return. Instead, the original | |
919 | + kernel context will be restored and the original echo > do_hibernate will | |
920 | + return. | |
921 | + | |
922 | + - */enabled | |
923 | + | |
924 | + These option can be used to temporarily disable various parts of TuxOnIce. | |
925 | + | |
926 | + - extra_pages_allowance | |
927 | + | |
928 | + When TuxOnIce does its atomic copy, it calls the driver model suspend | |
929 | + and resume methods. If you have DRI enabled with a driver such as fglrx, | |
930 | + this can result in the driver allocating a substantial amount of memory | |
931 | + for storing its state. Extra_pages_allowance tells TuxOnIce how much | |
932 | + extra memory it should ensure is available for those allocations. If | |
933 | + your attempts at hibernating end with a message in dmesg indicating that | |
934 | + insufficient extra pages were allowed, you need to increase this value. | |
935 | + | |
936 | + - file/target: | |
937 | + | |
938 | + Read this value to get the current setting. Write to it to point TuxOnice | |
939 | + at a new storage location for the file allocator. See section 3.b.ii above | |
940 | + for details of how to set up the file allocator. | |
941 | + | |
942 | + - freezer_test | |
943 | + | |
944 | + This entry can be used to get TuxOnIce to just test the freezer and prepare | |
945 | + an image without actually doing a hibernation cycle. It is useful for | |
946 | + diagnosing freezing and image preparation issues. | |
947 | + | |
9474138d AM |
948 | + - full_pageset2 |
949 | + | |
950 | + TuxOnIce divides the pages that are stored in an image into two sets. The | |
951 | + difference between the two sets is that pages in pageset 1 are atomically | |
952 | + copied, and pages in pageset 2 are written to disk without being copied | |
953 | + first. A page CAN be written to disk without being copied first if and only | |
954 | + if its contents will not be modified or used at any time after userspace | |
955 | + processes are frozen. A page MUST be in pageset 1 if its contents are | |
956 | + modified or used at any time after userspace processes have been frozen. | |
957 | + | |
958 | + Normally (ie if this option is enabled), TuxOnIce will put all pages on the | |
959 | + per-zone LRUs in pageset2, then remove those pages used by any userspace | |
960 | + user interface helper and TuxOnIce storage manager that are running, | |
961 | + together with pages used by the GEM memory manager introduced around 2.6.28 | |
962 | + kernels. | |
963 | + | |
964 | + If this option is disabled, a much more conservative approach will be taken. | |
965 | + The only pages in pageset2 will be those belonging to userspace processes, | |
966 | + with the exclusion of those belonging to the TuxOnIce userspace helpers | |
967 | + mentioned above. This will result in a much smaller pageset2, and will | |
968 | + therefore result in smaller images than are possible with this option | |
969 | + enabled. | |
970 | + | |
971 | + - ignore_rootfs | |
972 | + | |
973 | + TuxOnIce records which device is mounted as the root filesystem when | |
974 | + writing the hibernation image. It will normally check at resume time that | |
975 | + this device isn't already mounted - that would be a cause of filesystem | |
976 | + corruption. In some particular cases (RAM based root filesystems), you | |
977 | + might want to disable this check. This option allows you to do that. | |
978 | + | |
2380c486 JR |
979 | + - image_exists: |
980 | + | |
981 | + Can be used in a script to determine whether a valid image exists at the | |
982 | + location currently pointed to by resume=. Returns up to three lines. | |
983 | + The first is whether an image exists (-1 for unsure, otherwise 0 or 1). | |
984 | + If an image eixsts, additional lines will return the machine and version. | |
985 | + Echoing anything to this entry removes any current image. | |
986 | + | |
987 | + - image_size_limit: | |
988 | + | |
989 | + The maximum size of hibernation image written to disk, measured in megabytes | |
990 | + (1024*1024). | |
991 | + | |
992 | + - last_result: | |
993 | + | |
994 | + The result of the last hibernation cycle, as defined in | |
995 | + include/linux/suspend-debug.h with the values SUSPEND_ABORTED to | |
996 | + SUSPEND_KEPT_IMAGE. This is a bitmask. | |
997 | + | |
9474138d AM |
998 | + - late_cpu_hotplug: |
999 | + | |
1000 | + This sysfs entry controls whether cpu hotplugging is done - as normal - just | |
1001 | + before (unplug) and after (replug) the atomic copy/restore (so that all | |
1002 | + CPUs/cores are available for multithreaded I/O). The alternative is to | |
1003 | + unplug all secondary CPUs/cores at the start of hibernating/resuming, and | |
1004 | + replug them at the end of resuming. No multithreaded I/O will be possible in | |
1005 | + this configuration, but the odd machine has been reported to require it. | |
1006 | + | |
1007 | + - lid_file: | |
1008 | + | |
1009 | + This determines which ACPI button file we look in to determine whether the | |
1010 | + lid is open or closed after resuming from suspend to disk or power off. | |
1011 | + If the entry is set to "lid/LID", we'll open /proc/acpi/button/lid/LID/state | |
1012 | + and check its contents at the appropriate moment. See post_wake_state below | |
1013 | + for more details on how this entry is used. | |
1014 | + | |
2380c486 JR |
1015 | + - log_everything (CONFIG_PM_DEBUG): |
1016 | + | |
1017 | + Setting this option results in all messages printed being logged. Normally, | |
1018 | + only a subset are logged, so as to not slow the process and not clutter the | |
1019 | + logs. Useful for debugging. It can be toggled during a cycle by pressing | |
1020 | + 'L'. | |
1021 | + | |
9474138d AM |
1022 | + - no_load_direct: |
1023 | + | |
1024 | + This is a debugging option. If, when loading the atomically copied pages of | |
1025 | + an image, TuxOnIce finds that the destination address for a page is free, | |
1026 | + it will normally allocate the image, load the data directly into that | |
1027 | + address and skip it in the atomic restore. If this option is disabled, the | |
1028 | + page will be loaded somewhere else and atomically restored like other pages. | |
1029 | + | |
1030 | + - no_flusher_thread: | |
1031 | + | |
1032 | + When doing multithreaded I/O (see below), the first online CPU can be used | |
1033 | + to _just_ submit compressed pages when writing the image, rather than | |
1034 | + compressing and submitting data. This option is normally disabled, but has | |
1035 | + been included because Nigel would like to see whether it will be more useful | |
1036 | + as the number of cores/cpus in computers increases. | |
1037 | + | |
1038 | + - no_multithreaded_io: | |
1039 | + | |
1040 | + TuxOnIce will normally create one thread per cpu/core on your computer, | |
1041 | + each of which will then perform I/O. This will generally result in | |
1042 | + throughput that's the maximum the storage medium can handle. There | |
1043 | + shouldn't be any reason to disable multithreaded I/O now, but this option | |
1044 | + has been retained for debugging purposes. | |
1045 | + | |
1046 | + - no_pageset2 | |
1047 | + | |
1048 | + See the entry for full_pageset2 above for an explanation of pagesets. | |
1049 | + Enabling this option causes TuxOnIce to do an atomic copy of all pages, | |
1050 | + thereby limiting the maximum image size to 1/2 of memory, as swsusp does. | |
1051 | + | |
1052 | + - no_pageset2_if_unneeded | |
1053 | + | |
1054 | + See the entry for full_pageset2 above for an explanation of pagesets. | |
1055 | + Enabling this option causes TuxOnIce to act like no_pageset2 was enabled | |
1056 | + if and only it isn't needed anyway. This option may still make TuxOnIce | |
1057 | + less reliable because pageset2 pages are normally used to store the | |
1058 | + atomic copy - drivers that want to do allocations of larger amounts of | |
1059 | + memory in one shot will be more likely to find that those amounts aren't | |
1060 | + available if this option is enabled. | |
1061 | + | |
2380c486 JR |
1062 | + - pause_between_steps (CONFIG_PM_DEBUG): |
1063 | + | |
1064 | + This option is used during debugging, to make TuxOnIce pause between | |
1065 | + each step of the process. It is ignored when the nice display is on. | |
1066 | + | |
9474138d AM |
1067 | + - post_wake_state: |
1068 | + | |
1069 | + TuxOnIce provides support for automatically waking after a user-selected | |
1070 | + delay, and using a different powerdown method if the lid is still closed. | |
1071 | + (Yes, we're assuming a laptop). This entry lets you choose what state | |
1072 | + should be entered next. The values are those described under | |
1073 | + powerdown_method, below. It can be used to suspend to RAM after hibernating, | |
1074 | + then powerdown properly (say) 20 minutes. It can also be used to power down | |
1075 | + properly, then wake at (say) 6.30am and suspend to RAM until you're ready | |
1076 | + to use the machine. | |
1077 | + | |
2380c486 JR |
1078 | + - powerdown_method: |
1079 | + | |
1080 | + Used to select a method by which TuxOnIce should powerdown after writing the | |
1081 | + image. Currently: | |
1082 | + | |
1083 | + 0: Don't use ACPI to power off. | |
1084 | + 3: Attempt to enter Suspend-to-ram. | |
1085 | + 4: Attempt to enter ACPI S4 mode. | |
1086 | + 5: Attempt to power down via ACPI S5 mode. | |
1087 | + | |
1088 | + Note that these options are highly dependant upon your hardware & software: | |
1089 | + | |
1090 | + 3: When succesful, your machine suspends to ram instead of powering off. | |
1091 | + The advantage of using this mode is that it doesn't matter whether your | |
1092 | + battery has enough charge to make it through to your next resume. If it | |
1093 | + lasts, you will simply resume from suspend to ram (and the image on disk | |
1094 | + will be discarded). If the battery runs out, you will resume from disk | |
1095 | + instead. The disadvantage is that it takes longer than a normal | |
1096 | + suspend-to-ram to enter the state, since the suspend-to-disk image needs | |
1097 | + to be written first. | |
1098 | + 4/5: When successful, your machine will be off and comsume (almost) no power. | |
1099 | + But it might still react to some external events like opening the lid or | |
1100 | + trafic on a network or usb device. For the bios, resume is then the same | |
1101 | + as warm boot, similar to a situation where you used the command `reboot' | |
1102 | + to reboot your machine. If your machine has problems on warm boot or if | |
1103 | + you want to protect your machine with the bios password, this is probably | |
1104 | + not the right choice. Mode 4 may be necessary on some machines where ACPI | |
1105 | + wake up methods need to be run to properly reinitialise hardware after a | |
1106 | + hibernation cycle. | |
1107 | + 0: Switch the machine completely off. The only possible wakeup is the power | |
1108 | + button. For the bios, resume is then the same as a cold boot, in | |
1109 | + particular you would have to provide your bios boot password if your | |
1110 | + machine uses that feature for booting. | |
1111 | + | |
1112 | + - progressbar_granularity_limit: | |
1113 | + | |
1114 | + This option can be used to limit the granularity of the progress bar | |
1115 | + displayed with a bootsplash screen. The value is the maximum number of | |
1116 | + steps. That is, 10 will make the progress bar jump in 10% increments. | |
1117 | + | |
1118 | + - reboot: | |
1119 | + | |
1120 | + This option causes TuxOnIce to reboot rather than powering down | |
1121 | + at the end of saving an image. It can be toggled during a cycle by pressing | |
1122 | + 'R'. | |
1123 | + | |
9474138d AM |
1124 | + - resume: |
1125 | + | |
1126 | + This sysfs entry can be used to read and set the location in which TuxOnIce | |
1127 | + will look for the signature of an image - the value set using resume= at | |
1128 | + boot time or CONFIG_PM_STD_PARTITION ("Default resume partition"). By | |
1129 | + writing to this file as well as modifying your bootloader's configuration | |
1130 | + file (eg menu.lst), you can set or reset the location of your image or the | |
1131 | + method of storing the image without rebooting. | |
1132 | + | |
1133 | + - replace_swsusp (CONFIG_TOI_REPLACE_SWSUSP): | |
1134 | + | |
1135 | + This option makes | |
1136 | + | |
1137 | + echo disk > /sys/power/state | |
1138 | + | |
1139 | + activate TuxOnIce instead of swsusp. Regardless of whether this option is | |
1140 | + enabled, any invocation of swsusp's resume time trigger will cause TuxOnIce | |
1141 | + to check for an image too. This is due to the fact that at resume time, we | |
1142 | + can't know whether this option was enabled until we see if an image is there | |
1143 | + for us to resume from. (And when an image exists, we don't care whether we | |
1144 | + did replace swsusp anyway - we just want to resume). | |
1145 | + | |
2380c486 JR |
1146 | + - resume_commandline: |
1147 | + | |
1148 | + This entry can be read after resuming to see the commandline that was used | |
1149 | + when resuming began. You might use this to set up two bootloader entries | |
1150 | + that are the same apart from the fact that one includes a extra append= | |
1151 | + argument "at_work=1". You could then grep resume_commandline in your | |
1152 | + post-resume scripts and configure networking (for example) differently | |
1153 | + depending upon whether you're at home or work. resume_commandline can be | |
1154 | + set to arbitrary text if you wish to remove sensitive contents. | |
1155 | + | |
1156 | + - swap/swapfilename: | |
1157 | + | |
1158 | + This entry is used to specify the swapfile or partition that | |
1159 | + TuxOnIce will attempt to swapon/swapoff automatically. Thus, if | |
1160 | + I normally use /dev/hda1 for swap, and want to use /dev/hda2 for specifically | |
1161 | + for my hibernation image, I would | |
1162 | + | |
1163 | + echo /dev/hda2 > /sys/power/tuxonice/swap/swapfile | |
1164 | + | |
1165 | + /dev/hda2 would then be automatically swapon'd and swapoff'd. Note that the | |
1166 | + swapon and swapoff occur while other processes are frozen (including kswapd) | |
1167 | + so this swap file will not be used up when attempting to free memory. The | |
1168 | + parition/file is also given the highest priority, so other swapfiles/partitions | |
1169 | + will only be used to save the image when this one is filled. | |
1170 | + | |
1171 | + The value of this file is used by headerlocations along with any currently | |
1172 | + activated swapfiles/partitions. | |
1173 | + | |
1174 | + - swap/headerlocations: | |
1175 | + | |
1176 | + This option tells you the resume= options to use for swap devices you | |
1177 | + currently have activated. It is particularly useful when you only want to | |
1178 | + use a swap file to store your image. See above for further details. | |
1179 | + | |
9474138d | 1180 | + - test_bio |
2380c486 | 1181 | + |
9474138d AM |
1182 | + This is a debugging option. When enabled, TuxOnIce will not hibernate. |
1183 | + Instead, when asked to write an image, it will skip the atomic copy, | |
1184 | + just doing the writing of the image and then returning control to the | |
1185 | + user at the point where it would have powered off. This is useful for | |
1186 | + testing throughput in different configurations. | |
1187 | + | |
1188 | + - test_filter_speed | |
1189 | + | |
1190 | + This is a debugging option. When enabled, TuxOnIce will not hibernate. | |
1191 | + Instead, when asked to write an image, it will not write anything or do | |
1192 | + an atomic copy, but will only run any enabled compression algorithm on the | |
1193 | + data that would have been written (the source pages of the atomic copy in | |
1194 | + the case of pageset 1). This is useful for comparing the performance of | |
1195 | + compression algorithms and for determining the extent to which an upgrade | |
1196 | + to your storage method would improve hibernation speed. | |
2380c486 JR |
1197 | + |
1198 | + - user_interface/debug_sections (CONFIG_PM_DEBUG): | |
1199 | + | |
1200 | + This value, together with the console log level, controls what debugging | |
1201 | + information is displayed. The console log level determines the level of | |
1202 | + detail, and this value determines what detail is displayed. This value is | |
1203 | + a bit vector, and the meaning of the bits can be found in the kernel tree | |
1204 | + in include/linux/tuxonice.h. It can be overridden using the kernel's | |
1205 | + command line option suspend_dbg. | |
1206 | + | |
1207 | + - user_interface/default_console_level (CONFIG_PM_DEBUG): | |
1208 | + | |
1209 | + This determines the value of the console log level at the start of a | |
1210 | + hibernation cycle. If debugging is compiled in, the console log level can be | |
1211 | + changed during a cycle by pressing the digit keys. Meanings are: | |
1212 | + | |
1213 | + 0: Nice display. | |
1214 | + 1: Nice display plus numerical progress. | |
1215 | + 2: Errors only. | |
1216 | + 3: Low level debugging info. | |
1217 | + 4: Medium level debugging info. | |
1218 | + 5: High level debugging info. | |
1219 | + 6: Verbose debugging info. | |
1220 | + | |
1221 | + - user_interface/enable_escape: | |
1222 | + | |
1223 | + Setting this to "1" will enable you abort a hibernation cycle or resuming by | |
1224 | + pressing escape, "0" (default) disables this feature. Note that enabling | |
1225 | + this option means that you cannot initiate a hibernation cycle and then walk | |
9474138d | 1226 | + away from your computer, expecting it to be secure. With feature disabled, |
2380c486 JR |
1227 | + you can validly have this expectation once TuxOnice begins to write the |
1228 | + image to disk. (Prior to this point, it is possible that TuxOnice might | |
1229 | + about because of failure to freeze all processes or because constraints | |
1230 | + on its ability to save the image are not met). | |
1231 | + | |
9474138d AM |
1232 | + - user_interface/program |
1233 | + | |
1234 | + This entry is used to tell TuxOnice what userspace program to use for | |
1235 | + providing a user interface while hibernating. The program uses a netlink | |
1236 | + socket to pass messages back and forward to the kernel, allowing all of the | |
1237 | + functions formerly implemented in the kernel user interface components. | |
1238 | + | |
2380c486 JR |
1239 | + - version: |
1240 | + | |
1241 | + The version of TuxOnIce you have compiled into the currently running kernel. | |
1242 | + | |
9474138d AM |
1243 | + - wake_alarm_dir: |
1244 | + | |
1245 | + As mentioned above (post_wake_state), TuxOnIce supports automatically waking | |
1246 | + after some delay. This entry allows you to select which wake alarm to use. | |
1247 | + It should contain the value "rtc0" if you're wanting to use | |
1248 | + /sys/class/rtc/rtc0. | |
1249 | + | |
1250 | + - wake_delay: | |
1251 | + | |
1252 | + This value determines the delay from the end of writing the image until the | |
1253 | + wake alarm is triggered. You can set an absolute time by writing the desired | |
1254 | + time into /sys/class/rtc/<wake_alarm_dir>/wakealarm and leaving these values | |
1255 | + empty. | |
1256 | + | |
1257 | + Note that for the wakeup to actually occur, you may need to modify entries | |
1258 | + in /proc/acpi/wakeup. This is done by echoing the name of the button in the | |
1259 | + first column (eg PBTN) into the file. | |
1260 | + | |
2380c486 JR |
1261 | +7. How do you get support? |
1262 | + | |
1263 | + Glad you asked. TuxOnIce is being actively maintained and supported | |
1264 | + by Nigel (the guy doing most of the kernel coding at the moment), Bernard | |
1265 | + (who maintains the hibernate script and userspace user interface components) | |
1266 | + and its users. | |
1267 | + | |
1268 | + Resources availble include HowTos, FAQs and a Wiki, all available via | |
1269 | + tuxonice.net. You can find the mailing lists there. | |
1270 | + | |
1271 | +8. I think I've found a bug. What should I do? | |
1272 | + | |
1273 | + By far and a way, the most common problems people have with TuxOnIce | |
1274 | + related to drivers not having adequate power management support. In this | |
1275 | + case, it is not a bug with TuxOnIce, but we can still help you. As we | |
1276 | + mentioned above, such issues can usually be worked around by building the | |
1277 | + functionality as modules and unloading them while hibernating. Please visit | |
1278 | + the Wiki for up-to-date lists of known issues and work arounds. | |
1279 | + | |
1280 | + If this information doesn't help, try running: | |
1281 | + | |
1282 | + hibernate --bug-report | |
1283 | + | |
1284 | + ..and sending the output to the users mailing list. | |
1285 | + | |
1286 | + Good information on how to provide us with useful information from an | |
1287 | + oops is found in the file REPORTING-BUGS, in the top level directory | |
1288 | + of the kernel tree. If you get an oops, please especially note the | |
1289 | + information about running what is printed on the screen through ksymoops. | |
1290 | + The raw information is useless. | |
1291 | + | |
1292 | +9. When will XXX be supported? | |
1293 | + | |
1294 | + If there's a feature missing from TuxOnIce that you'd like, feel free to | |
1295 | + ask. We try to be obliging, within reason. | |
1296 | + | |
1297 | + Patches are welcome. Please send to the list. | |
1298 | + | |
1299 | +10. How does it work? | |
1300 | + | |
1301 | + TuxOnIce does its work in a number of steps. | |
1302 | + | |
1303 | + a. Freezing system activity. | |
1304 | + | |
1305 | + The first main stage in hibernating is to stop all other activity. This is | |
1306 | + achieved in stages. Processes are considered in fours groups, which we will | |
1307 | + describe in reverse order for clarity's sake: Threads with the PF_NOFREEZE | |
1308 | + flag, kernel threads without this flag, userspace processes with the | |
1309 | + PF_SYNCTHREAD flag and all other processes. The first set (PF_NOFREEZE) are | |
1310 | + untouched by the refrigerator code. They are allowed to run during hibernating | |
1311 | + and resuming, and are used to support user interaction, storage access or the | |
1312 | + like. Other kernel threads (those unneeded while hibernating) are frozen last. | |
1313 | + This leaves us with userspace processes that need to be frozen. When a | |
1314 | + process enters one of the *_sync system calls, we set a PF_SYNCTHREAD flag on | |
1315 | + that process for the duration of that call. Processes that have this flag are | |
1316 | + frozen after processes without it, so that we can seek to ensure that dirty | |
1317 | + data is synced to disk as quickly as possible in a situation where other | |
1318 | + processes may be submitting writes at the same time. Freezing the processes | |
1319 | + that are submitting data stops new I/O from being submitted. Syncthreads can | |
1320 | + then cleanly finish their work. So the order is: | |
1321 | + | |
1322 | + - Userspace processes without PF_SYNCTHREAD or PF_NOFREEZE; | |
1323 | + - Userspace processes with PF_SYNCTHREAD (they won't have NOFREEZE); | |
1324 | + - Kernel processes without PF_NOFREEZE. | |
1325 | + | |
1326 | + b. Eating memory. | |
1327 | + | |
1328 | + For a successful hibernation cycle, you need to have enough disk space to store the | |
1329 | + image and enough memory for the various limitations of TuxOnIce's | |
1330 | + algorithm. You can also specify a maximum image size. In order to attain | |
1331 | + to those constraints, TuxOnIce may 'eat' memory. If, after freezing | |
1332 | + processes, the constraints aren't met, TuxOnIce will thaw all the | |
1333 | + other processes and begin to eat memory until its calculations indicate | |
1334 | + the constraints are met. It will then freeze processes again and recheck | |
1335 | + its calculations. | |
1336 | + | |
1337 | + c. Allocation of storage. | |
1338 | + | |
1339 | + Next, TuxOnIce allocates the storage that will be used to save | |
1340 | + the image. | |
1341 | + | |
1342 | + The core of TuxOnIce knows nothing about how or where pages are stored. We | |
1343 | + therefore request the active allocator (remember you might have compiled in | |
1344 | + more than one!) to allocate enough storage for our expect image size. If | |
1345 | + this request cannot be fulfilled, we eat more memory and try again. If it | |
1346 | + is fulfiled, we seek to allocate additional storage, just in case our | |
1347 | + expected compression ratio (if any) isn't achieved. This time, however, we | |
1348 | + just continue if we can't allocate enough storage. | |
1349 | + | |
1350 | + If these calls to our allocator change the characteristics of the image | |
1351 | + such that we haven't allocated enough memory, we also loop. (The allocator | |
1352 | + may well need to allocate space for its storage information). | |
1353 | + | |
1354 | + d. Write the first part of the image. | |
1355 | + | |
1356 | + TuxOnIce stores the image in two sets of pages called 'pagesets'. | |
1357 | + Pageset 2 contains pages on the active and inactive lists; essentially | |
1358 | + the page cache. Pageset 1 contains all other pages, including the kernel. | |
1359 | + We use two pagesets for one important reason: We need to make an atomic copy | |
1360 | + of the kernel to ensure consistency of the image. Without a second pageset, | |
1361 | + that would limit us to an image that was at most half the amount of memory | |
1362 | + available. Using two pagesets allows us to store a full image. Since pageset | |
1363 | + 2 pages won't be needed in saving pageset 1, we first save pageset 2 pages. | |
1364 | + We can then make our atomic copy of the remaining pages using both pageset 2 | |
1365 | + pages and any other pages that are free. While saving both pagesets, we are | |
1366 | + careful not to corrupt the image. Among other things, we use lowlevel block | |
1367 | + I/O routines that don't change the pagecache contents. | |
1368 | + | |
1369 | + The next step, then, is writing pageset 2. | |
1370 | + | |
1371 | + e. Suspending drivers and storing processor context. | |
1372 | + | |
1373 | + Having written pageset2, TuxOnIce calls the power management functions to | |
1374 | + notify drivers of the hibernation, and saves the processor state in preparation | |
1375 | + for the atomic copy of memory we are about to make. | |
1376 | + | |
1377 | + f. Atomic copy. | |
1378 | + | |
1379 | + At this stage, everything else but the TuxOnIce code is halted. Processes | |
1380 | + are frozen or idling, drivers are quiesced and have stored (ideally and where | |
1381 | + necessary) their configuration in memory we are about to atomically copy. | |
1382 | + In our lowlevel architecture specific code, we have saved the CPU state. | |
1383 | + We can therefore now do our atomic copy before resuming drivers etc. | |
1384 | + | |
1385 | + g. Save the atomic copy (pageset 1). | |
1386 | + | |
1387 | + TuxOnice can then write the atomic copy of the remaining pages. Since we | |
1388 | + have copied the pages into other locations, we can continue to use the | |
1389 | + normal block I/O routines without fear of corruption our image. | |
1390 | + | |
1391 | + f. Save the image header. | |
1392 | + | |
1393 | + Nearly there! We save our settings and other parameters needed for | |
1394 | + reloading pageset 1 in an 'image header'. We also tell our allocator to | |
1395 | + serialise its data at this stage, so that it can reread the image at resume | |
1396 | + time. | |
1397 | + | |
1398 | + g. Set the image header. | |
1399 | + | |
1400 | + Finally, we edit the header at our resume= location. The signature is | |
1401 | + changed by the allocator to reflect the fact that an image exists, and to | |
1402 | + point to the start of that data if necessary (swap allocator). | |
1403 | + | |
1404 | + h. Power down. | |
1405 | + | |
1406 | + Or reboot if we're debugging and the appropriate option is selected. | |
1407 | + | |
1408 | + Whew! | |
1409 | + | |
1410 | + Reloading the image. | |
1411 | + -------------------- | |
1412 | + | |
1413 | + Reloading the image is essentially the reverse of all the above. We load | |
1414 | + our copy of pageset 1, being careful to choose locations that aren't going | |
1415 | + to be overwritten as we copy it back (We start very early in the boot | |
1416 | + process, so there are no other processes to quiesce here). We then copy | |
1417 | + pageset 1 back to its original location in memory and restore the process | |
1418 | + context. We are now running with the original kernel. Next, we reload the | |
1419 | + pageset 2 pages, free the memory and swap used by TuxOnIce, restore | |
1420 | + the pageset header and restart processes. Sounds easy in comparison to | |
1421 | + hibernating, doesn't it! | |
1422 | + | |
1423 | + There is of course more to TuxOnIce than this, but this explanation | |
1424 | + should be a good start. If there's interest, I'll write further | |
1425 | + documentation on range pages and the low level I/O. | |
1426 | + | |
1427 | +11. Who wrote TuxOnIce? | |
1428 | + | |
1429 | + (Answer based on the writings of Florent Chabaud, credits in files and | |
1430 | + Nigel's limited knowledge; apologies to anyone missed out!) | |
1431 | + | |
1432 | + The main developers of TuxOnIce have been... | |
1433 | + | |
1434 | + Gabor Kuti | |
1435 | + Pavel Machek | |
1436 | + Florent Chabaud | |
1437 | + Bernard Blackham | |
1438 | + Nigel Cunningham | |
1439 | + | |
1440 | + Significant portions of swsusp, the code in the vanilla kernel which | |
1441 | + TuxOnIce enhances, have been worked on by Rafael Wysocki. Thanks should | |
1442 | + also be expressed to him. | |
1443 | + | |
1444 | + The above mentioned developers have been aided in their efforts by a host | |
1445 | + of hundreds, if not thousands of testers and people who have submitted bug | |
1446 | + fixes & suggestions. Of special note are the efforts of Michael Frank, who | |
1447 | + had his computers repetitively hibernate and resume for literally tens of | |
1448 | + thousands of cycles and developed scripts to stress the system and test | |
1449 | + TuxOnIce far beyond the point most of us (Nigel included!) would consider | |
1450 | + testing. His efforts have contributed as much to TuxOnIce as any of the | |
1451 | + names above. | |
1452 | diff --git a/MAINTAINERS b/MAINTAINERS | |
5bd2511a | 1453 | index 6d119c9..a2f6ce6 100644 |
2380c486 JR |
1454 | --- a/MAINTAINERS |
1455 | +++ b/MAINTAINERS | |
5bd2511a | 1456 | @@ -5679,6 +5679,13 @@ S: Maintained |
9474138d AM |
1457 | F: drivers/tc/ |
1458 | F: include/linux/tc.h | |
2380c486 JR |
1459 | |
1460 | +TUXONICE (ENHANCED HIBERNATION) | |
1461 | +P: Nigel Cunningham | |
1462 | +M: nigel@tuxonice.net | |
e999739a | 1463 | +L: tuxonice-devel@tuxonice.net |
2380c486 JR |
1464 | +W: http://tuxonice.net |
1465 | +S: Maintained | |
1466 | + | |
1467 | U14-34F SCSI DRIVER | |
92bca44c AM |
1468 | M: Dario Ballabio <ballabio_dario@emc.com> |
1469 | L: linux-scsi@vger.kernel.org | |
2380c486 | 1470 | diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c |
5bd2511a | 1471 | index 9fc02dc..a99d7e7 100644 |
2380c486 JR |
1472 | --- a/arch/powerpc/mm/pgtable_32.c |
1473 | +++ b/arch/powerpc/mm/pgtable_32.c | |
5bd2511a | 1474 | @@ -427,6 +427,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable) |
2380c486 JR |
1475 | |
1476 | change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0)); | |
1477 | } | |
1478 | +EXPORT_SYMBOL_GPL(kernel_map_pages); | |
1479 | #endif /* CONFIG_DEBUG_PAGEALLOC */ | |
1480 | ||
1481 | static int fixmaps; | |
1482 | diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c | |
de6743ae | 1483 | index 8e1aac8..84568af 100644 |
2380c486 JR |
1484 | --- a/arch/x86/kernel/reboot.c |
1485 | +++ b/arch/x86/kernel/reboot.c | |
de6743ae | 1486 | @@ -718,6 +718,7 @@ void machine_restart(char *cmd) |
2380c486 JR |
1487 | { |
1488 | machine_ops.restart(cmd); | |
1489 | } | |
1490 | +EXPORT_SYMBOL_GPL(machine_restart); | |
1491 | ||
1492 | void machine_halt(void) | |
1493 | { | |
1494 | diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c | |
5bd2511a | 1495 | index 532e793..bad27ae 100644 |
2380c486 JR |
1496 | --- a/arch/x86/mm/pageattr.c |
1497 | +++ b/arch/x86/mm/pageattr.c | |
5bd2511a | 1498 | @@ -1354,6 +1354,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable) |
2380c486 JR |
1499 | */ |
1500 | __flush_tlb_all(); | |
1501 | } | |
1502 | +EXPORT_SYMBOL_GPL(kernel_map_pages); | |
1503 | ||
1504 | #ifdef CONFIG_HIBERNATION | |
1505 | ||
5bd2511a | 1506 | @@ -1368,7 +1369,7 @@ bool kernel_page_present(struct page *page) |
2380c486 JR |
1507 | pte = lookup_address((unsigned long)page_address(page), &level); |
1508 | return (pte_val(*pte) & _PAGE_PRESENT); | |
1509 | } | |
1510 | - | |
1511 | +EXPORT_SYMBOL_GPL(kernel_page_present); | |
1512 | #endif /* CONFIG_HIBERNATION */ | |
1513 | ||
1514 | #endif /* CONFIG_DEBUG_PAGEALLOC */ | |
92bca44c | 1515 | diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c |
5bd2511a | 1516 | index 1290ba5..2280003 100644 |
92bca44c AM |
1517 | --- a/arch/x86/power/cpu.c |
1518 | +++ b/arch/x86/power/cpu.c | |
5bd2511a | 1519 | @@ -114,9 +114,7 @@ void save_processor_state(void) |
2380c486 JR |
1520 | { |
1521 | __save_processor_state(&saved_context); | |
1522 | } | |
92bca44c AM |
1523 | -#ifdef CONFIG_X86_32 |
1524 | EXPORT_SYMBOL(save_processor_state); | |
1525 | -#endif | |
2380c486 JR |
1526 | |
1527 | static void do_fpu_end(void) | |
1528 | { | |
1529 | diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c | |
de6743ae | 1530 | index 3769079..4dabd68 100644 |
2380c486 JR |
1531 | --- a/arch/x86/power/hibernate_32.c |
1532 | +++ b/arch/x86/power/hibernate_32.c | |
de6743ae AM |
1533 | @@ -9,6 +9,7 @@ |
1534 | #include <linux/gfp.h> | |
2380c486 JR |
1535 | #include <linux/suspend.h> |
1536 | #include <linux/bootmem.h> | |
1537 | +#include <linux/module.h> | |
1538 | ||
1539 | #include <asm/system.h> | |
1540 | #include <asm/page.h> | |
de6743ae | 1541 | @@ -164,6 +165,7 @@ int swsusp_arch_resume(void) |
2380c486 JR |
1542 | restore_image(); |
1543 | return 0; | |
1544 | } | |
1545 | +EXPORT_SYMBOL_GPL(swsusp_arch_resume); | |
1546 | ||
1547 | /* | |
1548 | * pfn_is_nosave - check if given pfn is in the 'nosave' section | |
1549 | diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c | |
de6743ae | 1550 | index d24f983..803b20a 100644 |
2380c486 JR |
1551 | --- a/arch/x86/power/hibernate_64.c |
1552 | +++ b/arch/x86/power/hibernate_64.c | |
de6743ae AM |
1553 | @@ -11,6 +11,7 @@ |
1554 | #include <linux/gfp.h> | |
2380c486 JR |
1555 | #include <linux/smp.h> |
1556 | #include <linux/suspend.h> | |
1557 | +#include <linux/module.h> | |
1558 | #include <asm/proto.h> | |
1559 | #include <asm/page.h> | |
1560 | #include <asm/pgtable.h> | |
de6743ae | 1561 | @@ -119,6 +120,7 @@ int swsusp_arch_resume(void) |
2380c486 JR |
1562 | restore_image(); |
1563 | return 0; | |
1564 | } | |
1565 | +EXPORT_SYMBOL_GPL(swsusp_arch_resume); | |
1566 | ||
1567 | /* | |
1568 | * pfn_is_nosave - check if given pfn is in the 'nosave' section | |
de6743ae | 1569 | @@ -169,3 +171,4 @@ int arch_hibernation_header_restore(void *addr) |
2380c486 JR |
1570 | restore_cr3 = rdr->cr3; |
1571 | return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL; | |
1572 | } | |
1573 | +EXPORT_SYMBOL_GPL(arch_hibernation_header_restore); | |
7e46296a | 1574 | diff --git a/block/Makefile b/block/Makefile |
5bd2511a | 1575 | index 0bb499a..49f36d0 100644 |
7e46296a AM |
1576 | --- a/block/Makefile |
1577 | +++ b/block/Makefile | |
5bd2511a | 1578 | @@ -5,7 +5,8 @@ |
7e46296a AM |
1579 | obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ |
1580 | blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \ | |
1581 | blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ | |
5bd2511a AM |
1582 | - blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o |
1583 | + blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o \ | |
1584 | + uuid.o | |
7e46296a AM |
1585 | |
1586 | obj-$(CONFIG_BLK_DEV_BSG) += bsg.o | |
5dd10c98 | 1587 | obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o |
7e46296a | 1588 | diff --git a/block/blk-core.c b/block/blk-core.c |
5bd2511a | 1589 | index f84cce4..6c28098 100644 |
7e46296a AM |
1590 | --- a/block/blk-core.c |
1591 | +++ b/block/blk-core.c | |
1592 | @@ -37,6 +37,9 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap); | |
1593 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); | |
1594 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); | |
1595 | ||
1596 | +int trap_non_toi_io; | |
1597 | +EXPORT_SYMBOL_GPL(trap_non_toi_io); | |
1598 | + | |
1599 | static int __make_request(struct request_queue *q, struct bio *bio); | |
1600 | ||
1601 | /* | |
5bd2511a | 1602 | @@ -1582,6 +1585,9 @@ void submit_bio(int rw, struct bio *bio) |
7e46296a AM |
1603 | |
1604 | bio->bi_rw |= rw; | |
1605 | ||
1606 | + if (unlikely(trap_non_toi_io)) | |
1607 | + BUG_ON(!bio_rw_flagged(bio, BIO_RW_TUXONICE)); | |
1608 | + | |
1609 | /* | |
1610 | * If it's a regular read/write or a barrier with data attached, | |
1611 | * go through the normal accounting stuff before submission. | |
1612 | diff --git a/block/genhd.c b/block/genhd.c | |
5bd2511a | 1613 | index 59a2db6..6875d7d 100644 |
7e46296a AM |
1614 | --- a/block/genhd.c |
1615 | +++ b/block/genhd.c | |
1616 | @@ -18,6 +18,8 @@ | |
1617 | #include <linux/buffer_head.h> | |
1618 | #include <linux/mutex.h> | |
1619 | #include <linux/idr.h> | |
1620 | +#include <linux/ctype.h> | |
cacc47f8 | 1621 | +#include <linux/fs_uuid.h> |
7e46296a AM |
1622 | |
1623 | #include "blk.h" | |
1624 | ||
cacc47f8 | 1625 | @@ -1286,3 +1288,84 @@ int invalidate_partition(struct gendisk *disk, int partno) |
7e46296a AM |
1626 | } |
1627 | ||
1628 | EXPORT_SYMBOL(invalidate_partition); | |
1629 | + | |
cacc47f8 | 1630 | +dev_t blk_lookup_fs_info(struct fs_info *seek) |
7e46296a AM |
1631 | +{ |
1632 | + dev_t devt = MKDEV(0, 0); | |
1633 | + struct class_dev_iter iter; | |
1634 | + struct device *dev; | |
cacc47f8 | 1635 | + int best_score = 0; |
7e46296a AM |
1636 | + |
1637 | + class_dev_iter_init(&iter, &block_class, NULL, &disk_type); | |
cacc47f8 | 1638 | + while (best_score < 3 && (dev = class_dev_iter_next(&iter))) { |
7e46296a AM |
1639 | + struct gendisk *disk = dev_to_disk(dev); |
1640 | + struct disk_part_iter piter; | |
1641 | + struct hd_struct *part; | |
1642 | + | |
1643 | + disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); | |
1644 | + | |
cacc47f8 AM |
1645 | + while (best_score < 3 && (part = disk_part_iter_next(&piter))) { |
1646 | + int score = part_matches_fs_info(part, seek); | |
1647 | + if (score > best_score) { | |
7e46296a | 1648 | + devt = part_devt(part); |
cacc47f8 | 1649 | + best_score = score; |
7e46296a AM |
1650 | + } |
1651 | + } | |
1652 | + disk_part_iter_exit(&piter); | |
1653 | + } | |
1654 | + class_dev_iter_exit(&iter); | |
1655 | + return devt; | |
1656 | +} | |
cacc47f8 | 1657 | +EXPORT_SYMBOL_GPL(blk_lookup_fs_info); |
5dd10c98 AM |
1658 | + |
1659 | +/* Caller uses NULL, key to start. For each match found, we return a bdev on | |
1660 | + * which we have done blkdev_get, and we do the blkdev_put on block devices | |
1661 | + * that are passed to us. When no more matches are found, we return NULL. | |
1662 | + */ | |
1663 | +struct block_device *next_bdev_of_type(struct block_device *last, | |
1664 | + const char *key) | |
1665 | +{ | |
1666 | + dev_t devt = MKDEV(0, 0); | |
1667 | + struct class_dev_iter iter; | |
1668 | + struct device *dev; | |
1669 | + struct block_device *next = NULL, *bdev; | |
1670 | + int got_last = 0; | |
1671 | + | |
1672 | + if (!key) | |
1673 | + goto out; | |
1674 | + | |
1675 | + class_dev_iter_init(&iter, &block_class, NULL, &disk_type); | |
1676 | + while (!devt && (dev = class_dev_iter_next(&iter))) { | |
1677 | + struct gendisk *disk = dev_to_disk(dev); | |
1678 | + struct disk_part_iter piter; | |
1679 | + struct hd_struct *part; | |
1680 | + | |
1681 | + disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); | |
1682 | + | |
1683 | + while ((part = disk_part_iter_next(&piter))) { | |
1684 | + bdev = bdget(part_devt(part)); | |
1685 | + if (last && !got_last) { | |
1686 | + if (last == bdev) | |
1687 | + got_last = 1; | |
1688 | + continue; | |
1689 | + } | |
1690 | + | |
1691 | + if (blkdev_get(bdev, FMODE_READ)) | |
1692 | + continue; | |
1693 | + | |
1694 | + if (bdev_matches_key(bdev, key)) { | |
1695 | + next = bdev; | |
1696 | + break; | |
1697 | + } | |
1698 | + | |
1699 | + blkdev_put(bdev, FMODE_READ); | |
1700 | + } | |
1701 | + disk_part_iter_exit(&piter); | |
1702 | + } | |
1703 | + class_dev_iter_exit(&iter); | |
1704 | +out: | |
1705 | + if (last) | |
1706 | + blkdev_put(last, FMODE_READ); | |
1707 | + return next; | |
1708 | +} | |
1709 | +EXPORT_SYMBOL_GPL(next_bdev_of_type); | |
7e46296a AM |
1710 | diff --git a/block/uuid.c b/block/uuid.c |
1711 | new file mode 100644 | |
cacc47f8 | 1712 | index 0000000..37fd8e4 |
7e46296a AM |
1713 | --- /dev/null |
1714 | +++ b/block/uuid.c | |
cacc47f8 | 1715 | @@ -0,0 +1,492 @@ |
7e46296a AM |
1716 | +#include <linux/blkdev.h> |
1717 | +#include <linux/ctype.h> | |
cacc47f8 | 1718 | +#include <linux/fs_uuid.h> |
de6743ae | 1719 | +#include <linux/slab.h> |
7e46296a | 1720 | + |
5dd10c98 AM |
1721 | +static int debug_enabled; |
1722 | + | |
1723 | +#define PRINTK(fmt, args...) do { \ | |
1724 | + if (debug_enabled) \ | |
1725 | + printk(KERN_DEBUG fmt, ## args); \ | |
1726 | + } while(0) | |
1727 | + | |
1728 | +#define PRINT_HEX_DUMP(v1, v2, v3, v4, v5, v6, v7, v8) \ | |
1729 | + do { \ | |
1730 | + if (debug_enabled) \ | |
1731 | + print_hex_dump(v1, v2, v3, v4, v5, v6, v7, v8); \ | |
1732 | + } while(0) | |
7e46296a AM |
1733 | + |
1734 | +/* | |
1735 | + * Simple UUID translation | |
1736 | + */ | |
1737 | + | |
1738 | +struct uuid_info { | |
5dd10c98 | 1739 | + const char *key; |
7e46296a AM |
1740 | + const char *name; |
1741 | + long bkoff; | |
1742 | + unsigned sboff; | |
1743 | + unsigned sig_len; | |
1744 | + const char *magic; | |
1745 | + int uuid_offset; | |
5dd10c98 AM |
1746 | + int last_mount_offset; |
1747 | + int last_mount_size; | |
7e46296a AM |
1748 | +}; |
1749 | + | |
1750 | +/* | |
1751 | + * Based on libuuid's blkid_magic array. Note that I don't | |
1752 | + * have uuid offsets for all of these yet - mssing ones are 0x0. | |
1753 | + * Further information welcome. | |
1754 | + * | |
5dd10c98 | 1755 | + * Rearranged by page of fs signature for optimisation. |
7e46296a AM |
1756 | + */ |
1757 | +static struct uuid_info uuid_list[] = { | |
5dd10c98 AM |
1758 | + { NULL, "oracleasm", 0, 32, 8, "ORCLDISK", 0x0, 0, 0 }, |
1759 | + { "ntfs", "ntfs", 0, 3, 8, "NTFS ", 0x0, 0, 0 }, | |
1760 | + { "vfat", "vfat", 0, 0x52, 5, "MSWIN", 0x0, 0, 0 }, | |
1761 | + { "vfat", "vfat", 0, 0x52, 8, "FAT32 ", 0x0, 0, 0 }, | |
1762 | + { "vfat", "vfat", 0, 0x36, 5, "MSDOS", 0x0, 0, 0 }, | |
1763 | + { "vfat", "vfat", 0, 0x36, 8, "FAT16 ", 0x0, 0, 0 }, | |
1764 | + { "vfat", "vfat", 0, 0x36, 8, "FAT12 ", 0x0, 0, 0 }, | |
1765 | + { "vfat", "vfat", 0, 0, 1, "\353", 0x0, 0, 0 }, | |
1766 | + { "vfat", "vfat", 0, 0, 1, "\351", 0x0, 0, 0 }, | |
1767 | + { "vfat", "vfat", 0, 0x1fe, 2, "\125\252", 0x0, 0, 0 }, | |
1768 | + { "xfs", "xfs", 0, 0, 4, "XFSB", 0x14, 0, 0 }, | |
1769 | + { "romfs", "romfs", 0, 0, 8, "-rom1fs-", 0x0, 0, 0 }, | |
1770 | + { "bfs", "bfs", 0, 0, 4, "\316\372\173\033", 0, 0, 0 }, | |
1771 | + { "cramfs", "cramfs", 0, 0, 4, "E=\315\050", 0x0, 0, 0 }, | |
1772 | + { "qnx4", "qnx4", 0, 4, 6, "QNX4FS", 0, 0, 0 }, | |
1773 | + { NULL, "crypt_LUKS", 0, 0, 6, "LUKS\xba\xbe", 0x0, 0, 0 }, | |
1774 | + { "squashfs", "squashfs", 0, 0, 4, "sqsh", 0, 0, 0 }, | |
1775 | + { "squashfs", "squashfs", 0, 0, 4, "hsqs", 0, 0, 0 }, | |
1776 | + { "ocfs", "ocfs", 0, 8, 9, "OracleCFS", 0x0, 0, 0 }, | |
1777 | + { "lvm2pv", "lvm2pv", 0, 0x018, 8, "LVM2 001", 0x0, 0, 0 }, | |
1778 | + { "sysv", "sysv", 0, 0x3f8, 4, "\020~\030\375", 0, 0, 0 }, | |
1779 | + { "ext", "ext", 1, 0x38, 2, "\123\357", 0x468, 0x42c, 4 }, | |
1780 | + { "minix", "minix", 1, 0x10, 2, "\177\023", 0, 0, 0 }, | |
1781 | + { "minix", "minix", 1, 0x10, 2, "\217\023", 0, 0, 0 }, | |
1782 | + { "minix", "minix", 1, 0x10, 2, "\150\044", 0, 0, 0 }, | |
1783 | + { "minix", "minix", 1, 0x10, 2, "\170\044", 0, 0, 0 }, | |
1784 | + { "lvm2pv", "lvm2pv", 1, 0x018, 8, "LVM2 001", 0x0, 0, 0 }, | |
1785 | + { "vxfs", "vxfs", 1, 0, 4, "\365\374\001\245", 0, 0, 0 }, | |
1786 | + { "hfsplus", "hfsplus", 1, 0, 2, "BD", 0x0, 0, 0 }, | |
1787 | + { "hfsplus", "hfsplus", 1, 0, 2, "H+", 0x0, 0, 0 }, | |
1788 | + { "hfsplus", "hfsplus", 1, 0, 2, "HX", 0x0, 0, 0 }, | |
1789 | + { "hfs", "hfs", 1, 0, 2, "BD", 0x0, 0, 0 }, | |
1790 | + { "ocfs2", "ocfs2", 1, 0, 6, "OCFSV2", 0x0, 0, 0 }, | |
1791 | + { "lvm2pv", "lvm2pv", 0, 0x218, 8, "LVM2 001", 0x0, 0, 0 }, | |
1792 | + { "lvm2pv", "lvm2pv", 1, 0x218, 8, "LVM2 001", 0x0, 0, 0 }, | |
1793 | + { "ocfs2", "ocfs2", 2, 0, 6, "OCFSV2", 0x0, 0, 0 }, | |
1794 | + { "swap", "swap", 0, 0xff6, 10, "SWAP-SPACE", 0x40c, 0, 0 }, | |
1795 | + { "swap", "swap", 0, 0xff6, 10, "SWAPSPACE2", 0x40c, 0, 0 }, | |
1796 | + { "swap", "swsuspend", 0, 0xff6, 9, "S1SUSPEND", 0x40c, 0, 0 }, | |
1797 | + { "swap", "swsuspend", 0, 0xff6, 9, "S2SUSPEND", 0x40c, 0, 0 }, | |
1798 | + { "swap", "swsuspend", 0, 0xff6, 9, "ULSUSPEND", 0x40c, 0, 0 }, | |
1799 | + { "ocfs2", "ocfs2", 4, 0, 6, "OCFSV2", 0x0, 0, 0 }, | |
1800 | + { "ocfs2", "ocfs2", 8, 0, 6, "OCFSV2", 0x0, 0, 0 }, | |
1801 | + { "hpfs", "hpfs", 8, 0, 4, "I\350\225\371", 0, 0, 0 }, | |
1802 | + { "reiserfs", "reiserfs", 8, 0x34, 8, "ReIsErFs", 0x10054, 0, 0 }, | |
1803 | + { "reiserfs", "reiserfs", 8, 20, 8, "ReIsErFs", 0x10054, 0, 0 }, | |
1804 | + { "zfs", "zfs", 8, 0, 8, "\0\0\x02\xf5\xb0\x07\xb1\x0c", 0x0, 0, 0 }, | |
1805 | + { "zfs", "zfs", 8, 0, 8, "\x0c\xb1\x07\xb0\xf5\x02\0\0", 0x0, 0, 0 }, | |
1806 | + { "ufs", "ufs", 8, 0x55c, 4, "T\031\001\000", 0, 0, 0 }, | |
1807 | + { "swap", "swap", 0, 0x1ff6, 10, "SWAP-SPACE", 0x40c, 0, 0 }, | |
1808 | + { "swap", "swap", 0, 0x1ff6, 10, "SWAPSPACE2", 0x40c, 0, 0 }, | |
1809 | + { "swap", "swsuspend", 0, 0x1ff6, 9, "S1SUSPEND", 0x40c, 0, 0 }, | |
1810 | + { "swap", "swsuspend", 0, 0x1ff6, 9, "S2SUSPEND", 0x40c, 0, 0 }, | |
1811 | + { "swap", "swsuspend", 0, 0x1ff6, 9, "ULSUSPEND", 0x40c, 0, 0 }, | |
1812 | + { "reiserfs", "reiserfs", 64, 0x34, 9, "ReIsEr2Fs", 0x10054, 0, 0 }, | |
1813 | + { "reiserfs", "reiserfs", 64, 0x34, 9, "ReIsEr3Fs", 0x10054, 0, 0 }, | |
1814 | + { "reiserfs", "reiserfs", 64, 0x34, 8, "ReIsErFs", 0x10054, 0, 0 }, | |
1815 | + { "reiser4", "reiser4", 64, 0, 7, "ReIsEr4", 0x100544, 0, 0 }, | |
1816 | + { "gfs2", "gfs2", 64, 0, 4, "\x01\x16\x19\x70", 0x0, 0, 0 }, | |
1817 | + { "gfs", "gfs", 64, 0, 4, "\x01\x16\x19\x70", 0x0, 0, 0 }, | |
1818 | + { "btrfs", "btrfs", 64, 0x40, 8, "_BHRfS_M", 0x0, 0, 0 }, | |
1819 | + { "swap", "swap", 0, 0x3ff6, 10, "SWAP-SPACE", 0x40c, 0, 0 }, | |
1820 | + { "swap", "swap", 0, 0x3ff6, 10, "SWAPSPACE2", 0x40c, 0, 0 }, | |
1821 | + { "swap", "swsuspend", 0, 0x3ff6, 9, "S1SUSPEND", 0x40c, 0, 0 }, | |
1822 | + { "swap", "swsuspend", 0, 0x3ff6, 9, "S2SUSPEND", 0x40c, 0, 0 }, | |
1823 | + { "swap", "swsuspend", 0, 0x3ff6, 9, "ULSUSPEND", 0x40c, 0, 0 }, | |
1824 | + { "udf", "udf", 32, 1, 5, "BEA01", 0x0, 0, 0 }, | |
1825 | + { "udf", "udf", 32, 1, 5, "BOOT2", 0x0, 0, 0 }, | |
1826 | + { "udf", "udf", 32, 1, 5, "CD001", 0x0, 0, 0 }, | |
1827 | + { "udf", "udf", 32, 1, 5, "CDW02", 0x0, 0, 0 }, | |
1828 | + { "udf", "udf", 32, 1, 5, "NSR02", 0x0, 0, 0 }, | |
1829 | + { "udf", "udf", 32, 1, 5, "NSR03", 0x0, 0, 0 }, | |
1830 | + { "udf", "udf", 32, 1, 5, "TEA01", 0x0, 0, 0 }, | |
1831 | + { "iso9660", "iso9660", 32, 1, 5, "CD001", 0x0, 0, 0 }, | |
1832 | + { "iso9660", "iso9660", 32, 9, 5, "CDROM", 0x0, 0, 0 }, | |
1833 | + { "jfs", "jfs", 32, 0, 4, "JFS1", 0x88, 0, 0 }, | |
1834 | + { "swap", "swap", 0, 0x7ff6, 10, "SWAP-SPACE", 0x40c, 0, 0 }, | |
1835 | + { "swap", "swap", 0, 0x7ff6, 10, "SWAPSPACE2", 0x40c, 0, 0 }, | |
1836 | + { "swap", "swsuspend", 0, 0x7ff6, 9, "S1SUSPEND", 0x40c, 0, 0 }, | |
1837 | + { "swap", "swsuspend", 0, 0x7ff6, 9, "S2SUSPEND", 0x40c, 0, 0 }, | |
1838 | + { "swap", "swsuspend", 0, 0x7ff6, 9, "ULSUSPEND", 0x40c, 0, 0 }, | |
1839 | + { "swap", "swap", 0, 0xfff6, 10, "SWAP-SPACE", 0x40c, 0, 0 }, | |
1840 | + { "swap", "swap", 0, 0xfff6, 10, "SWAPSPACE2", 0x40c, 0, 0 }, | |
1841 | + { "swap", "swsuspend", 0, 0xfff6, 9, "S1SUSPEND", 0x40c, 0, 0 }, | |
1842 | + { "swap", "swsuspend", 0, 0xfff6, 9, "S2SUSPEND", 0x40c, 0, 0 }, | |
1843 | + { "swap", "swsuspend", 0, 0xfff6, 9, "ULSUSPEND", 0x40c, 0, 0 }, | |
1844 | + { "zfs", "zfs", 264, 0, 8, "\0\0\x02\xf5\xb0\x07\xb1\x0c", 0x0, 0, 0 }, | |
1845 | + { "zfs", "zfs", 264, 0, 8, "\x0c\xb1\x07\xb0\xf5\x02\0\0", 0x0, 0, 0 }, | |
1846 | + { NULL, NULL, 0, 0, 0, NULL, 0x0, 0, 0 } | |
7e46296a AM |
1847 | +}; |
1848 | + | |
5dd10c98 AM |
1849 | +static int null_uuid(const char *uuid) |
1850 | +{ | |
1851 | + int i; | |
1852 | + | |
1853 | + for (i = 0; i < 16 && !uuid[i]; i++); | |
1854 | + | |
1855 | + return (i == 16); | |
1856 | +} | |
1857 | + | |
1858 | + | |
7e46296a AM |
1859 | +static void uuid_end_bio(struct bio *bio, int err) |
1860 | +{ | |
1861 | + struct page *page = bio->bi_io_vec[0].bv_page; | |
1862 | + | |
cacc47f8 AM |
1863 | + if(!test_bit(BIO_UPTODATE, &bio->bi_flags)) |
1864 | + SetPageError(page); | |
7e46296a AM |
1865 | + |
1866 | + unlock_page(page); | |
1867 | + bio_put(bio); | |
1868 | +} | |
1869 | + | |
1870 | + | |
1871 | +/** | |
1872 | + * submit - submit BIO request | |
7e46296a | 1873 | + * @dev: The block device we're using. |
5dd10c98 | 1874 | + * @page_num: The page we're reading. |
7e46296a AM |
1875 | + * |
1876 | + * Based on Patrick Mochell's pmdisk code from long ago: "Straight from the | |
1877 | + * textbook - allocate and initialize the bio. If we're writing, make sure | |
1878 | + * the page is marked as dirty. Then submit it and carry on." | |
7e46296a | 1879 | + **/ |
5dd10c98 | 1880 | +static struct page *read_bdev_page(struct block_device *dev, int page_num) |
7e46296a AM |
1881 | +{ |
1882 | + struct bio *bio = NULL; | |
5dd10c98 | 1883 | + struct page *page = alloc_page(GFP_NOFS); |
7e46296a | 1884 | + |
5dd10c98 AM |
1885 | + if (!page) { |
1886 | + printk(KERN_ERR "Failed to allocate a page for reading data " | |
1887 | + "in UUID checks."); | |
7e46296a | 1888 | + return NULL; |
5dd10c98 | 1889 | + } |
7e46296a | 1890 | + |
5dd10c98 | 1891 | + bio = bio_alloc(GFP_NOFS, 1); |
7e46296a | 1892 | + bio->bi_bdev = dev; |
5dd10c98 | 1893 | + bio->bi_sector = page_num << 3; |
7e46296a AM |
1894 | + bio->bi_end_io = uuid_end_bio; |
1895 | + | |
5dd10c98 AM |
1896 | + PRINTK("Submitting bio on device %lx, page %d.\n", |
1897 | + (unsigned long) dev->bd_dev, page_num); | |
1898 | + | |
7e46296a | 1899 | + if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { |
5dd10c98 AM |
1900 | + printk(KERN_DEBUG "ERROR: adding page to bio at %d\n", |
1901 | + page_num); | |
7e46296a AM |
1902 | + bio_put(bio); |
1903 | + __free_page(page); | |
5dd10c98 | 1904 | + printk(KERN_DEBUG "read_bdev_page freed page %p (in error " |
7e46296a | 1905 | + "path).\n", page); |
cacc47f8 | 1906 | + return NULL; |
7e46296a AM |
1907 | + } |
1908 | + | |
5dd10c98 | 1909 | + lock_page(page); |
7e46296a AM |
1910 | + submit_bio(READ | (1 << BIO_RW_SYNCIO) | |
1911 | + (1 << BIO_RW_UNPLUG), bio); | |
1912 | + | |
5dd10c98 | 1913 | + wait_on_page_locked(page); |
cacc47f8 AM |
1914 | + if (PageError(page)) { |
1915 | + __free_page(page); | |
1916 | + page = NULL; | |
1917 | + } | |
7e46296a AM |
1918 | + return page; |
1919 | +} | |
1920 | + | |
5dd10c98 AM |
1921 | +int bdev_matches_key(struct block_device *bdev, const char *key) |
1922 | +{ | |
1923 | + unsigned char *data = NULL; | |
1924 | + struct page *data_page = NULL; | |
1925 | + | |
1926 | + int dev_offset, pg_num, pg_off, i; | |
1927 | + int last_pg_num = -1; | |
1928 | + int result = 0; | |
1929 | + char buf[50]; | |
1930 | + | |
1931 | + if (null_uuid(key)) { | |
1932 | + PRINTK("Refusing to find a NULL key.\n"); | |
1933 | + return 0; | |
1934 | + } | |
1935 | + | |
1936 | + if (!bdev->bd_disk) { | |
1937 | + bdevname(bdev, buf); | |
1938 | + PRINTK("bdev %s has no bd_disk.\n", buf); | |
1939 | + return 0; | |
1940 | + } | |
1941 | + | |
1942 | + if (!bdev->bd_disk->queue) { | |
1943 | + bdevname(bdev, buf); | |
1944 | + PRINTK("bdev %s has no queue.\n", buf); | |
1945 | + return 0; | |
1946 | + } | |
1947 | + | |
1948 | + for (i = 0; uuid_list[i].name; i++) { | |
1949 | + struct uuid_info *dat = &uuid_list[i]; | |
1950 | + | |
1951 | + if (!dat->key || strcmp(dat->key, key)) | |
1952 | + continue; | |
1953 | + | |
1954 | + dev_offset = (dat->bkoff << 10) + dat->sboff; | |
1955 | + pg_num = dev_offset >> 12; | |
1956 | + pg_off = dev_offset & 0xfff; | |
1957 | + | |
1958 | + if ((((pg_num + 1) << 3) - 1) > bdev->bd_part->nr_sects >> 1) | |
1959 | + continue; | |
1960 | + | |
1961 | + if (pg_num != last_pg_num) { | |
1962 | + if (data_page) | |
1963 | + __free_page(data_page); | |
1964 | + data_page = read_bdev_page(bdev, pg_num); | |
cacc47f8 AM |
1965 | + if (!data_page) |
1966 | + continue; | |
5dd10c98 AM |
1967 | + data = page_address(data_page); |
1968 | + } | |
1969 | + | |
1970 | + last_pg_num = pg_num; | |
1971 | + | |
1972 | + if (strncmp(&data[pg_off], dat->magic, dat->sig_len)) | |
1973 | + continue; | |
1974 | + | |
1975 | + result = 1; | |
1976 | + break; | |
1977 | + } | |
1978 | + | |
1979 | + if (data_page) | |
1980 | + __free_page(data_page); | |
1981 | + | |
1982 | + return result; | |
1983 | +} | |
7e46296a | 1984 | + |
cacc47f8 AM |
1985 | +/* |
1986 | + * part_matches_fs_info - Does the given partition match the details given? | |
1987 | + * | |
1988 | + * Returns a score saying how good the match is. | |
1989 | + * 0 = no UUID match. | |
1990 | + * 1 = UUID but last mount time differs. | |
1991 | + * 2 = UUID, last mount time but not dev_t | |
1992 | + * 3 = perfect match | |
1993 | + * | |
1994 | + * This lets us cope elegantly with probing resulting in dev_ts changing | |
1995 | + * from boot to boot, and with the case where a user copies a partition | |
1996 | + * (UUID is non unique), and we need to check the last mount time of the | |
1997 | + * correct partition. | |
1998 | + */ | |
1999 | +int part_matches_fs_info(struct hd_struct *part, struct fs_info *seek) | |
7e46296a AM |
2000 | +{ |
2001 | + struct block_device *bdev; | |
cacc47f8 | 2002 | + struct fs_info *got; |
7e46296a AM |
2003 | + int result = 0; |
2004 | + char buf[50]; | |
2005 | + | |
cacc47f8 | 2006 | + if (null_uuid((char *) &seek->uuid)) { |
5dd10c98 AM |
2007 | + PRINTK("Refusing to find a NULL uuid.\n"); |
2008 | + return 0; | |
2009 | + } | |
2010 | + | |
7e46296a AM |
2011 | + bdev = bdget(part_devt(part)); |
2012 | + | |
cacc47f8 | 2013 | + PRINTK("part_matches fs info considering %x.\n", part_devt(part)); |
7e46296a AM |
2014 | + |
2015 | + if (blkdev_get(bdev, FMODE_READ)) { | |
2016 | + PRINTK("blkdev_get failed.\n"); | |
2017 | + return 0; | |
2018 | + } | |
2019 | + | |
2020 | + if (!bdev->bd_disk) { | |
2021 | + bdevname(bdev, buf); | |
2022 | + PRINTK("bdev %s has no bd_disk.\n", buf); | |
2023 | + goto out; | |
2024 | + } | |
2025 | + | |
2026 | + if (!bdev->bd_disk->queue) { | |
2027 | + bdevname(bdev, buf); | |
2028 | + PRINTK("bdev %s has no queue.\n", buf); | |
2029 | + goto out; | |
2030 | + } | |
2031 | + | |
cacc47f8 | 2032 | + got = fs_info_from_block_dev(bdev); |
7e46296a | 2033 | + |
cacc47f8 AM |
2034 | + if (got && !memcmp(got->uuid, seek->uuid, 16)) { |
2035 | + PRINTK(" Having matching UUID.\n"); | |
2036 | + PRINTK(" Got: LMS %d, LM %p.\n", got->last_mount_size, got->last_mount); | |
2037 | + PRINTK(" Seek: LMS %d, LM %p.\n", seek->last_mount_size, seek->last_mount); | |
2038 | + result = 1; | |
7e46296a | 2039 | + |
cacc47f8 AM |
2040 | + if (got->last_mount_size == seek->last_mount_size && |
2041 | + got->last_mount && seek->last_mount && | |
2042 | + !memcmp(got->last_mount, seek->last_mount, | |
2043 | + got->last_mount_size)) { | |
2044 | + result = 2; | |
7e46296a | 2045 | + |
cacc47f8 | 2046 | + PRINTK(" Matching last mount time.\n"); |
7e46296a | 2047 | + |
cacc47f8 AM |
2048 | + if (part_devt(part) == seek->dev_t) { |
2049 | + result = 3; | |
2050 | + PRINTK(" Matching dev_t.\n"); | |
2051 | + } else | |
2052 | + PRINTK("Dev_ts differ (%x vs %x).\n", part_devt(part), seek->dev_t); | |
7e46296a AM |
2053 | + } |
2054 | + } | |
2055 | + | |
cacc47f8 AM |
2056 | + PRINTK(" Score for %x is %d.\n", part_devt(part), result); |
2057 | + free_fs_info(got); | |
7e46296a AM |
2058 | +out: |
2059 | + blkdev_put(bdev, FMODE_READ); | |
2060 | + return result; | |
2061 | +} | |
2062 | + | |
5dd10c98 AM |
2063 | +void free_fs_info(struct fs_info *fs_info) |
2064 | +{ | |
2065 | + if (!fs_info || IS_ERR(fs_info)) | |
2066 | + return; | |
2067 | + | |
2068 | + if (fs_info->last_mount) | |
2069 | + kfree(fs_info->last_mount); | |
2070 | + | |
2071 | + kfree(fs_info); | |
2072 | +} | |
2073 | +EXPORT_SYMBOL_GPL(free_fs_info); | |
2074 | + | |
2075 | +struct fs_info *fs_info_from_block_dev(struct block_device *bdev) | |
7e46296a AM |
2076 | +{ |
2077 | + unsigned char *data = NULL; | |
2078 | + struct page *data_page = NULL; | |
2079 | + | |
2080 | + int dev_offset, pg_num, pg_off; | |
2081 | + int uuid_pg_num, uuid_pg_off, i; | |
2082 | + unsigned char *uuid_data = NULL; | |
2083 | + struct page *uuid_data_page = NULL; | |
2084 | + | |
5dd10c98 | 2085 | + int last_pg_num = -1, last_uuid_pg_num = 0; |
7e46296a | 2086 | + char buf[50]; |
5dd10c98 | 2087 | + struct fs_info *fs_info = NULL; |
7e46296a AM |
2088 | + |
2089 | + bdevname(bdev, buf); | |
2090 | + | |
cacc47f8 | 2091 | + PRINTK("uuid_from_block_dev looking for partition type of %s.\n", buf); |
7e46296a AM |
2092 | + |
2093 | + for (i = 0; uuid_list[i].name; i++) { | |
2094 | + struct uuid_info *dat = &uuid_list[i]; | |
2095 | + dev_offset = (dat->bkoff << 10) + dat->sboff; | |
2096 | + pg_num = dev_offset >> 12; | |
2097 | + pg_off = dev_offset & 0xfff; | |
2098 | + uuid_pg_num = dat->uuid_offset >> 12; | |
2099 | + uuid_pg_off = dat->uuid_offset & 0xfff; | |
2100 | + | |
2101 | + if ((((pg_num + 1) << 3) - 1) > bdev->bd_part->nr_sects >> 1) | |
2102 | + continue; | |
2103 | + | |
5dd10c98 AM |
2104 | + /* Ignore partition types with no UUID offset */ |
2105 | + if (!dat->uuid_offset) | |
2106 | + continue; | |
2107 | + | |
2108 | + if (pg_num != last_pg_num) { | |
7e46296a AM |
2109 | + if (data_page) |
2110 | + __free_page(data_page); | |
5dd10c98 | 2111 | + data_page = read_bdev_page(bdev, pg_num); |
cacc47f8 AM |
2112 | + if (!data_page) |
2113 | + continue; | |
7e46296a AM |
2114 | + data = page_address(data_page); |
2115 | + } | |
2116 | + | |
2117 | + last_pg_num = pg_num; | |
2118 | + | |
2119 | + if (strncmp(&data[pg_off], dat->magic, dat->sig_len)) | |
2120 | + continue; | |
2121 | + | |
7e46296a AM |
2122 | + PRINTK("This partition looks like %s.\n", dat->name); |
2123 | + | |
5dd10c98 AM |
2124 | + fs_info = kzalloc(sizeof(struct fs_info), GFP_KERNEL); |
2125 | + | |
2126 | + if (!fs_info) { | |
2127 | + PRINTK("Failed to allocate fs_info struct."); | |
2128 | + fs_info = ERR_PTR(-ENOMEM); | |
2129 | + break; | |
2130 | + } | |
2131 | + | |
2132 | + /* UUID can't be off the end of the disk */ | |
2133 | + if ((uuid_pg_num > bdev->bd_part->nr_sects >> 3) || | |
2134 | + !dat->uuid_offset) | |
2135 | + goto no_uuid; | |
2136 | + | |
7e46296a AM |
2137 | + if (!uuid_data || uuid_pg_num != last_uuid_pg_num) { |
2138 | + if (uuid_data_page) | |
2139 | + __free_page(uuid_data_page); | |
5dd10c98 | 2140 | + uuid_data_page = read_bdev_page(bdev, uuid_pg_num); |
cacc47f8 AM |
2141 | + if (!uuid_data_page) |
2142 | + continue; | |
7e46296a AM |
2143 | + uuid_data = page_address(uuid_data_page); |
2144 | + } | |
2145 | + | |
2146 | + last_uuid_pg_num = uuid_pg_num; | |
5dd10c98 | 2147 | + memcpy(&fs_info->uuid, &uuid_data[uuid_pg_off], 16); |
cacc47f8 | 2148 | + fs_info->dev_t = bdev->bd_dev; |
5dd10c98 AM |
2149 | + |
2150 | +no_uuid: | |
2151 | + PRINT_HEX_DUMP(KERN_EMERG, "fs_info_from_block_dev " | |
2152 | + "returning uuid ", DUMP_PREFIX_NONE, 16, 1, | |
2153 | + fs_info->uuid, 16, 0); | |
2154 | + | |
2155 | + if (dat->last_mount_size) { | |
2156 | + int pg = dat->last_mount_offset >> 12, sz; | |
2157 | + int off = dat->last_mount_offset & 0xfff; | |
2158 | + struct page *last_mount = read_bdev_page(bdev, pg); | |
2159 | + unsigned char *last_mount_data; | |
2160 | + char *ptr; | |
2161 | + | |
2162 | + if (!last_mount) { | |
2163 | + fs_info = ERR_PTR(-ENOMEM); | |
2164 | + break; | |
2165 | + } | |
2166 | + last_mount_data = page_address(last_mount); | |
2167 | + sz = dat->last_mount_size; | |
2168 | + ptr = kmalloc(sz, GFP_KERNEL); | |
2169 | + | |
2170 | + if (!ptr) { | |
2171 | + printk(KERN_EMERG "fs_info_from_block_dev " | |
2172 | + "failed to get memory for last mount " | |
2173 | + "timestamp."); | |
2174 | + free_fs_info(fs_info); | |
2175 | + fs_info = ERR_PTR(-ENOMEM); | |
2176 | + } else { | |
2177 | + fs_info->last_mount = ptr; | |
2178 | + fs_info->last_mount_size = sz; | |
2179 | + memcpy(ptr, &last_mount_data[off], sz); | |
2180 | + } | |
7e46296a | 2181 | + |
5dd10c98 | 2182 | + __free_page(last_mount); |
7e46296a | 2183 | + } |
5dd10c98 | 2184 | + break; |
7e46296a AM |
2185 | + } |
2186 | + | |
2187 | + if (data_page) | |
2188 | + __free_page(data_page); | |
2189 | + | |
2190 | + if (uuid_data_page) | |
2191 | + __free_page(uuid_data_page); | |
2192 | + | |
5dd10c98 | 2193 | + return fs_info; |
7e46296a | 2194 | +} |
5dd10c98 AM |
2195 | +EXPORT_SYMBOL_GPL(fs_info_from_block_dev); |
2196 | + | |
2197 | +static int __init uuid_debug_setup(char *str) | |
2198 | +{ | |
2199 | + int value; | |
2200 | + | |
2201 | + if (sscanf(str, "=%d", &value)) | |
2202 | + debug_enabled = value; | |
2203 | + | |
2204 | + return 1; | |
2205 | +} | |
2206 | + | |
2207 | +__setup("uuid_debug", uuid_debug_setup); | |
2208 | diff --git a/crypto/Kconfig b/crypto/Kconfig | |
5bd2511a | 2209 | index 9d9434f..b5911be 100644 |
5dd10c98 AM |
2210 | --- a/crypto/Kconfig |
2211 | +++ b/crypto/Kconfig | |
de6743ae | 2212 | @@ -816,6 +816,13 @@ config CRYPTO_LZO |
5dd10c98 AM |
2213 | help |
2214 | This is the LZO algorithm. | |
2215 | ||
2216 | +config CRYPTO_LZF | |
2217 | + tristate "LZF compression algorithm" | |
2218 | + select CRYPTO_ALGAPI | |
2219 | + help | |
2220 | + This is the LZF algorithm. It is especially useful for TuxOnIce, | |
2221 | + because it achieves good compression quickly. | |
2222 | + | |
2223 | comment "Random Number Generation" | |
2224 | ||
2225 | config CRYPTO_ANSI_CPRNG | |
2226 | diff --git a/crypto/Makefile b/crypto/Makefile | |
de6743ae | 2227 | index d7e6441..76b9a9e 100644 |
5dd10c98 AM |
2228 | --- a/crypto/Makefile |
2229 | +++ b/crypto/Makefile | |
de6743ae | 2230 | @@ -78,6 +78,7 @@ obj-$(CONFIG_CRYPTO_DEFLATE) += deflate.o |
5dd10c98 AM |
2231 | obj-$(CONFIG_CRYPTO_ZLIB) += zlib.o |
2232 | obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o | |
2233 | obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o | |
2234 | +obj-$(CONFIG_CRYPTO_LZF) += lzf.o | |
2235 | obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o | |
2236 | obj-$(CONFIG_CRYPTO_LZO) += lzo.o | |
2237 | obj-$(CONFIG_CRYPTO_RNG2) += rng.o | |
2238 | diff --git a/crypto/lzf.c b/crypto/lzf.c | |
2239 | new file mode 100644 | |
2240 | index 0000000..ccaf83a | |
2241 | --- /dev/null | |
2242 | +++ b/crypto/lzf.c | |
2243 | @@ -0,0 +1,326 @@ | |
2244 | +/* | |
2245 | + * Cryptoapi LZF compression module. | |
2246 | + * | |
2247 | + * Copyright (c) 2004-2008 Nigel Cunningham <nigel at tuxonice net> | |
2248 | + * | |
2249 | + * based on the deflate.c file: | |
2250 | + * | |
2251 | + * Copyright (c) 2003 James Morris <jmorris@intercode.com.au> | |
2252 | + * | |
2253 | + * and upon the LZF compression module donated to the TuxOnIce project with | |
2254 | + * the following copyright: | |
2255 | + * | |
2256 | + * This program is free software; you can redistribute it and/or modify it | |
2257 | + * under the terms of the GNU General Public License as published by the Free | |
2258 | + * Software Foundation; either version 2 of the License, or (at your option) | |
2259 | + * any later version. | |
2260 | + * Copyright (c) 2000-2003 Marc Alexander Lehmann <pcg@goof.com> | |
2261 | + * | |
2262 | + * Redistribution and use in source and binary forms, with or without modifica- | |
2263 | + * tion, are permitted provided that the following conditions are met: | |
2264 | + * | |
2265 | + * 1. Redistributions of source code must retain the above copyright notice, | |
2266 | + * this list of conditions and the following disclaimer. | |
2267 | + * | |
2268 | + * 2. Redistributions in binary form must reproduce the above copyright | |
2269 | + * notice, this list of conditions and the following disclaimer in the | |
2270 | + * documentation and/or other materials provided with the distribution. | |
2271 | + * | |
2272 | + * 3. The name of the author may not be used to endorse or promote products | |
2273 | + * derived from this software without specific prior written permission. | |
2274 | + * | |
2275 | + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED | |
2276 | + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- | |
2277 | + * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO | |
2278 | + * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- | |
2279 | + * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
2280 | + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; | |
2281 | + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, | |
2282 | + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- | |
2283 | + * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
2284 | + * OF THE POSSIBILITY OF SUCH DAMAGE. | |
2285 | + * | |
2286 | + * Alternatively, the contents of this file may be used under the terms of | |
2287 | + * the GNU General Public License version 2 (the "GPL"), in which case the | |
2288 | + * provisions of the GPL are applicable instead of the above. If you wish to | |
2289 | + * allow the use of your version of this file only under the terms of the | |
2290 | + * GPL and not to allow others to use your version of this file under the | |
2291 | + * BSD license, indicate your decision by deleting the provisions above and | |
2292 | + * replace them with the notice and other provisions required by the GPL. If | |
2293 | + * you do not delete the provisions above, a recipient may use your version | |
2294 | + * of this file under either the BSD or the GPL. | |
2295 | + */ | |
2296 | + | |
2297 | +#include <linux/kernel.h> | |
2298 | +#include <linux/module.h> | |
2299 | +#include <linux/init.h> | |
2300 | +#include <linux/module.h> | |
2301 | +#include <linux/crypto.h> | |
2302 | +#include <linux/err.h> | |
2303 | +#include <linux/vmalloc.h> | |
2304 | +#include <linux/string.h> | |
2305 | + | |
2306 | +struct lzf_ctx { | |
2307 | + void *hbuf; | |
2308 | + unsigned int bufofs; | |
2309 | +}; | |
2310 | + | |
2311 | +/* | |
2312 | + * size of hashtable is (1 << hlog) * sizeof (char *) | |
2313 | + * decompression is independent of the hash table size | |
2314 | + * the difference between 15 and 14 is very small | |
2315 | + * for small blocks (and 14 is also faster). | |
2316 | + * For a low-memory configuration, use hlog == 13; | |
2317 | + * For best compression, use 15 or 16. | |
2318 | + */ | |
2319 | +static const int hlog = 13; | |
2320 | + | |
2321 | +/* | |
2322 | + * don't play with this unless you benchmark! | |
2323 | + * decompression is not dependent on the hash function | |
2324 | + * the hashing function might seem strange, just believe me | |
2325 | + * it works ;) | |
2326 | + */ | |
2327 | +static inline u16 first(const u8 *p) | |
2328 | +{ | |
2329 | + return ((p[0]) << 8) + p[1]; | |
2330 | +} | |
2331 | + | |
2332 | +static inline u16 next(u8 v, const u8 *p) | |
2333 | +{ | |
2334 | + return ((v) << 8) + p[2]; | |
2335 | +} | |
2336 | + | |
2337 | +static inline u32 idx(unsigned int h) | |
2338 | +{ | |
2339 | + return (((h ^ (h << 5)) >> (3*8 - hlog)) + h*3) & ((1 << hlog) - 1); | |
2340 | +} | |
2341 | + | |
2342 | +/* | |
2343 | + * IDX works because it is very similar to a multiplicative hash, e.g. | |
2344 | + * (h * 57321 >> (3*8 - hlog)) | |
2345 | + * the next one is also quite good, albeit slow ;) | |
2346 | + * (int)(cos(h & 0xffffff) * 1e6) | |
2347 | + */ | |
2348 | + | |
2349 | +static const int max_lit = (1 << 5); | |
2350 | +static const int max_off = (1 << 13); | |
2351 | +static const int max_ref = ((1 << 8) + (1 << 3)); | |
2352 | + | |
2353 | +/* | |
2354 | + * compressed format | |
2355 | + * | |
2356 | + * 000LLLLL <L+1> ; literal | |
2357 | + * LLLOOOOO oooooooo ; backref L | |
2358 | + * 111OOOOO LLLLLLLL oooooooo ; backref L+7 | |
2359 | + * | |
2360 | + */ | |
2361 | + | |
2362 | +static void lzf_compress_exit(struct crypto_tfm *tfm) | |
2363 | +{ | |
2364 | + struct lzf_ctx *ctx = crypto_tfm_ctx(tfm); | |
2365 | + | |
2366 | + if (!ctx->hbuf) | |
2367 | + return; | |
2368 | + | |
2369 | + vfree(ctx->hbuf); | |
2370 | + ctx->hbuf = NULL; | |
2371 | +} | |
2372 | + | |
2373 | +static int lzf_compress_init(struct crypto_tfm *tfm) | |
2374 | +{ | |
2375 | + struct lzf_ctx *ctx = crypto_tfm_ctx(tfm); | |
2376 | + | |
2377 | + /* Get LZF ready to go */ | |
2378 | + ctx->hbuf = vmalloc_32((1 << hlog) * sizeof(char *)); | |
2379 | + if (ctx->hbuf) | |
2380 | + return 0; | |
2381 | + | |
2382 | + printk(KERN_WARNING "Failed to allocate %ld bytes for lzf workspace\n", | |
2383 | + (long) ((1 << hlog) * sizeof(char *))); | |
2384 | + return -ENOMEM; | |
2385 | +} | |
2386 | + | |
2387 | +static int lzf_compress(struct crypto_tfm *tfm, const u8 *in_data, | |
2388 | + unsigned int in_len, u8 *out_data, unsigned int *out_len) | |
2389 | +{ | |
2390 | + struct lzf_ctx *ctx = crypto_tfm_ctx(tfm); | |
2391 | + const u8 **htab = ctx->hbuf; | |
2392 | + const u8 **hslot; | |
2393 | + const u8 *ip = in_data; | |
2394 | + u8 *op = out_data; | |
2395 | + const u8 *in_end = ip + in_len; | |
2396 | + u8 *out_end = op + *out_len - 3; | |
2397 | + const u8 *ref; | |
2398 | + | |
2399 | + unsigned int hval = first(ip); | |
2400 | + unsigned long off; | |
2401 | + int lit = 0; | |
2402 | + | |
2403 | + memset(htab, 0, sizeof(htab)); | |
2404 | + | |
2405 | + for (;;) { | |
2406 | + if (ip < in_end - 2) { | |
2407 | + hval = next(hval, ip); | |
2408 | + hslot = htab + idx(hval); | |
2409 | + ref = *hslot; | |
2410 | + *hslot = ip; | |
2411 | + | |
2412 | + off = ip - ref - 1; | |
2413 | + if (off < max_off | |
2414 | + && ip + 4 < in_end && ref > in_data | |
2415 | + && *(u16 *) ref == *(u16 *) ip && ref[2] == ip[2] | |
2416 | + ) { | |
2417 | + /* match found at *ref++ */ | |
2418 | + unsigned int len = 2; | |
2419 | + unsigned int maxlen = in_end - ip - len; | |
2420 | + maxlen = maxlen > max_ref ? max_ref : maxlen; | |
2421 | + | |
2422 | + do { | |
2423 | + len++; | |
2424 | + } while (len < maxlen && ref[len] == ip[len]); | |
2425 | + | |
2426 | + if (op + lit + 1 + 3 >= out_end) { | |
2427 | + *out_len = PAGE_SIZE; | |
2428 | + return 0; | |
2429 | + } | |
2430 | + | |
2431 | + if (lit) { | |
2432 | + *op++ = lit - 1; | |
2433 | + lit = -lit; | |
2434 | + do { | |
2435 | + *op++ = ip[lit]; | |
2436 | + } while (++lit); | |
2437 | + } | |
2438 | + | |
2439 | + len -= 2; | |
2440 | + ip++; | |
2441 | + | |
2442 | + if (len < 7) { | |
2443 | + *op++ = (off >> 8) + (len << 5); | |
2444 | + } else { | |
2445 | + *op++ = (off >> 8) + (7 << 5); | |
2446 | + *op++ = len - 7; | |
2447 | + } | |
2448 | + | |
2449 | + *op++ = off; | |
2450 | + | |
2451 | + ip += len; | |
2452 | + hval = first(ip); | |
2453 | + hval = next(hval, ip); | |
2454 | + htab[idx(hval)] = ip; | |
2455 | + ip++; | |
2456 | + continue; | |
2457 | + } | |
2458 | + } else if (ip == in_end) | |
2459 | + break; | |
2460 | + | |
2461 | + /* one more literal byte we must copy */ | |
2462 | + lit++; | |
2463 | + ip++; | |
2464 | + | |
2465 | + if (lit == max_lit) { | |
2466 | + if (op + 1 + max_lit >= out_end) { | |
2467 | + *out_len = PAGE_SIZE; | |
2468 | + return 0; | |
2469 | + } | |
2470 | + | |
2471 | + *op++ = max_lit - 1; | |
2472 | + memcpy(op, ip - max_lit, max_lit); | |
2473 | + op += max_lit; | |
2474 | + lit = 0; | |
2475 | + } | |
2476 | + } | |
2477 | + | |
2478 | + if (lit) { | |
2479 | + if (op + lit + 1 >= out_end) { | |
2480 | + *out_len = PAGE_SIZE; | |
2481 | + return 0; | |
2482 | + } | |
2483 | + | |
2484 | + *op++ = lit - 1; | |
2485 | + lit = -lit; | |
2486 | + do { | |
2487 | + *op++ = ip[lit]; | |
2488 | + } while (++lit); | |
2489 | + } | |
2490 | + | |
2491 | + *out_len = op - out_data; | |
2492 | + return 0; | |
2493 | +} | |
2494 | + | |
2495 | +static int lzf_decompress(struct crypto_tfm *tfm, const u8 *src, | |
2496 | + unsigned int slen, u8 *dst, unsigned int *dlen) | |
2497 | +{ | |
2498 | + u8 const *ip = src; | |
2499 | + u8 *op = dst; | |
2500 | + u8 const *const in_end = ip + slen; | |
2501 | + u8 *const out_end = op + *dlen; | |
2502 | + | |
2503 | + *dlen = PAGE_SIZE; | |
2504 | + do { | |
2505 | + unsigned int ctrl = *ip++; | |
2506 | + | |
2507 | + if (ctrl < (1 << 5)) { | |
2508 | + /* literal run */ | |
2509 | + ctrl++; | |
2510 | + | |
2511 | + if (op + ctrl > out_end) | |
2512 | + return 0; | |
2513 | + memcpy(op, ip, ctrl); | |
2514 | + op += ctrl; | |
2515 | + ip += ctrl; | |
2516 | + } else { /* back reference */ | |
2517 | + | |
2518 | + unsigned int len = ctrl >> 5; | |
2519 | + | |
2520 | + u8 *ref = op - ((ctrl & 0x1f) << 8) - 1; | |
2521 | + | |
2522 | + if (len == 7) | |
2523 | + len += *ip++; | |
2524 | + | |
2525 | + ref -= *ip++; | |
2526 | + len += 2; | |
2527 | + | |
2528 | + if (op + len > out_end || ref < (u8 *) dst) | |
2529 | + return 0; | |
2530 | + | |
2531 | + do { | |
2532 | + *op++ = *ref++; | |
2533 | + } while (--len); | |
2534 | + } | |
2535 | + } while (op < out_end && ip < in_end); | |
2536 | + | |
2537 | + *dlen = op - (u8 *) dst; | |
2538 | + return 0; | |
2539 | +} | |
2540 | + | |
2541 | +static struct crypto_alg alg = { | |
2542 | + .cra_name = "lzf", | |
2543 | + .cra_flags = CRYPTO_ALG_TYPE_COMPRESS, | |
2544 | + .cra_ctxsize = sizeof(struct lzf_ctx), | |
2545 | + .cra_module = THIS_MODULE, | |
2546 | + .cra_list = LIST_HEAD_INIT(alg.cra_list), | |
2547 | + .cra_init = lzf_compress_init, | |
2548 | + .cra_exit = lzf_compress_exit, | |
2549 | + .cra_u = { .compress = { | |
2550 | + .coa_compress = lzf_compress, | |
2551 | + .coa_decompress = lzf_decompress } } | |
2552 | +}; | |
2553 | + | |
2554 | +static int __init init(void) | |
2555 | +{ | |
2556 | + return crypto_register_alg(&alg); | |
2557 | +} | |
2558 | + | |
2559 | +static void __exit fini(void) | |
2560 | +{ | |
2561 | + crypto_unregister_alg(&alg); | |
2562 | +} | |
2563 | + | |
2564 | +module_init(init); | |
2565 | +module_exit(fini); | |
2566 | + | |
2567 | +MODULE_LICENSE("GPL"); | |
2568 | +MODULE_DESCRIPTION("LZF Compression Algorithm"); | |
2569 | +MODULE_AUTHOR("Marc Alexander Lehmann & Nigel Cunningham"); | |
2380c486 | 2570 | diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c |
de6743ae | 2571 | index 941fcb8..1a2a2e7 100644 |
2380c486 JR |
2572 | --- a/drivers/base/power/main.c |
2573 | +++ b/drivers/base/power/main.c | |
de6743ae | 2574 | @@ -69,6 +69,7 @@ void device_pm_lock(void) |
2380c486 JR |
2575 | { |
2576 | mutex_lock(&dpm_list_mtx); | |
2577 | } | |
2578 | +EXPORT_SYMBOL_GPL(device_pm_lock); | |
2579 | ||
2580 | /** | |
7e46296a | 2581 | * device_pm_unlock - Unlock the list of active devices used by the PM core. |
de6743ae | 2582 | @@ -77,6 +78,7 @@ void device_pm_unlock(void) |
2380c486 JR |
2583 | { |
2584 | mutex_unlock(&dpm_list_mtx); | |
2585 | } | |
2586 | +EXPORT_SYMBOL_GPL(device_pm_unlock); | |
2587 | ||
2588 | /** | |
7e46296a | 2589 | * device_pm_add - Add a device to the PM core's list of active devices. |
2380c486 | 2590 | diff --git a/drivers/char/vt.c b/drivers/char/vt.c |
5bd2511a | 2591 | index 7cdb6ee..f114914 100644 |
2380c486 JR |
2592 | --- a/drivers/char/vt.c |
2593 | +++ b/drivers/char/vt.c | |
de6743ae | 2594 | @@ -2461,6 +2461,7 @@ int vt_kmsg_redirect(int new) |
5dd10c98 AM |
2595 | else |
2596 | return kmsg_con; | |
2597 | } | |
2598 | +EXPORT_SYMBOL_GPL(vt_kmsg_redirect); | |
2380c486 JR |
2599 | |
2600 | /* | |
5dd10c98 | 2601 | * Console on virtual terminal |
e999739a | 2602 | diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c |
5bd2511a | 2603 | index 33dad3f..47fb186 100644 |
e999739a | 2604 | --- a/drivers/gpu/drm/drm_gem.c |
2605 | +++ b/drivers/gpu/drm/drm_gem.c | |
5bd2511a AM |
2606 | @@ -133,7 +133,8 @@ int drm_gem_object_init(struct drm_device *dev, |
2607 | BUG_ON((size & (PAGE_SIZE - 1)) != 0); | |
e999739a | 2608 | |
2609 | obj->dev = dev; | |
2610 | - obj->filp = shmem_file_setup("drm mm object", size, VM_NORESERVE); | |
2611 | + obj->filp = shmem_file_setup("drm mm object", size, | |
2612 | + VM_NORESERVE | VM_ATOMIC_COPY); | |
92bca44c | 2613 | if (IS_ERR(obj->filp)) |
5bd2511a | 2614 | return -ENOMEM; |
92bca44c | 2615 | |
2380c486 | 2616 | diff --git a/drivers/md/md.c b/drivers/md/md.c |
5bd2511a | 2617 | index 46b3a04..883513f 100644 |
2380c486 JR |
2618 | --- a/drivers/md/md.c |
2619 | +++ b/drivers/md/md.c | |
5bd2511a | 2620 | @@ -6602,6 +6602,9 @@ void md_do_sync(mddev_t *mddev) |
2380c486 JR |
2621 | mddev->curr_resync = 2; |
2622 | ||
2623 | try_again: | |
2624 | + while (freezer_is_on()) | |
2625 | + yield(); | |
2626 | + | |
5dd10c98 | 2627 | if (kthread_should_stop()) |
2380c486 | 2628 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); |
5dd10c98 | 2629 | |
5bd2511a | 2630 | @@ -6624,6 +6627,10 @@ void md_do_sync(mddev_t *mddev) |
2380c486 JR |
2631 | * time 'round when curr_resync == 2 |
2632 | */ | |
2633 | continue; | |
2634 | + | |
2635 | + while (freezer_is_on()) | |
2636 | + yield(); | |
2637 | + | |
2638 | /* We need to wait 'interruptible' so as not to | |
2639 | * contribute to the load average, and not to | |
2640 | * be caught by 'softlockup' | |
5bd2511a | 2641 | @@ -6636,6 +6643,7 @@ void md_do_sync(mddev_t *mddev) |
2380c486 JR |
2642 | " share one or more physical units)\n", |
2643 | desc, mdname(mddev), mdname(mddev2)); | |
2644 | mddev_put(mddev2); | |
2645 | + try_to_freeze(); | |
2646 | if (signal_pending(current)) | |
2647 | flush_signals(current); | |
2648 | schedule(); | |
5bd2511a | 2649 | @@ -6745,6 +6753,9 @@ void md_do_sync(mddev_t *mddev) |
92bca44c AM |
2650 | || kthread_should_stop()); |
2651 | } | |
9474138d | 2652 | |
2380c486 JR |
2653 | + while (freezer_is_on()) |
2654 | + yield(); | |
2655 | + | |
2656 | if (kthread_should_stop()) | |
2657 | goto interrupted; | |
9474138d | 2658 | |
5bd2511a | 2659 | @@ -6789,6 +6800,9 @@ void md_do_sync(mddev_t *mddev) |
2380c486 JR |
2660 | last_mark = next; |
2661 | } | |
2662 | ||
2663 | + while (freezer_is_on()) | |
2664 | + yield(); | |
2665 | + | |
2666 | ||
2667 | if (kthread_should_stop()) | |
2668 | goto interrupted; | |
9474138d | 2669 | diff --git a/fs/block_dev.c b/fs/block_dev.c |
5bd2511a | 2670 | index 99d6af8..f270494 100644 |
9474138d AM |
2671 | --- a/fs/block_dev.c |
2672 | +++ b/fs/block_dev.c | |
5bd2511a | 2673 | @@ -295,6 +295,93 @@ out: |
2380c486 JR |
2674 | } |
2675 | EXPORT_SYMBOL(thaw_bdev); | |
2676 | ||
2677 | +#ifdef CONFIG_FS_FREEZER_DEBUG | |
2678 | +#define FS_PRINTK(fmt, args...) printk(fmt, ## args) | |
2679 | +#else | |
2680 | +#define FS_PRINTK(fmt, args...) | |
2681 | +#endif | |
2682 | + | |
2683 | +/* #define DEBUG_FS_FREEZING */ | |
2684 | + | |
2685 | +/** | |
2686 | + * freeze_filesystems - lock all filesystems and force them into a consistent | |
2687 | + * state | |
2688 | + * @which: What combination of fuse & non-fuse to freeze. | |
2689 | + */ | |
2690 | +void freeze_filesystems(int which) | |
2691 | +{ | |
2692 | + struct super_block *sb; | |
2693 | + | |
2694 | + lockdep_off(); | |
2695 | + | |
2696 | + /* | |
2697 | + * Freeze in reverse order so filesystems dependant upon others are | |
2698 | + * frozen in the right order (eg. loopback on ext3). | |
2699 | + */ | |
2700 | + list_for_each_entry_reverse(sb, &super_blocks, s_list) { | |
2701 | + FS_PRINTK(KERN_INFO "Considering %s.%s: (root %p, bdev %x)", | |
2702 | + sb->s_type->name ? sb->s_type->name : "?", | |
2703 | + sb->s_subtype ? sb->s_subtype : "", sb->s_root, | |
2704 | + sb->s_bdev ? sb->s_bdev->bd_dev : 0); | |
2705 | + | |
2706 | + if (sb->s_type->fs_flags & FS_IS_FUSE && | |
2707 | + sb->s_frozen == SB_UNFROZEN && | |
2708 | + which & FS_FREEZER_FUSE) { | |
2709 | + sb->s_frozen = SB_FREEZE_TRANS; | |
2710 | + sb->s_flags |= MS_FROZEN; | |
2711 | + FS_PRINTK("Fuse filesystem done.\n"); | |
2712 | + continue; | |
2713 | + } | |
2714 | + | |
2715 | + if (!sb->s_root || !sb->s_bdev || | |
2716 | + (sb->s_frozen == SB_FREEZE_TRANS) || | |
2717 | + (sb->s_flags & MS_RDONLY) || | |
2718 | + (sb->s_flags & MS_FROZEN) || | |
2719 | + !(which & FS_FREEZER_NORMAL)) { | |
2720 | + FS_PRINTK(KERN_INFO "Nope.\n"); | |
2721 | + continue; | |
2722 | + } | |
2723 | + | |
2724 | + FS_PRINTK(KERN_INFO "Freezing %x... ", sb->s_bdev->bd_dev); | |
2725 | + freeze_bdev(sb->s_bdev); | |
2726 | + sb->s_flags |= MS_FROZEN; | |
2727 | + FS_PRINTK(KERN_INFO "Done.\n"); | |
2728 | + } | |
2729 | + | |
2730 | + lockdep_on(); | |
2731 | +} | |
2732 | + | |
2733 | +/** | |
2734 | + * thaw_filesystems - unlock all filesystems | |
2735 | + * @which: What combination of fuse & non-fuse to thaw. | |
2736 | + */ | |
2737 | +void thaw_filesystems(int which) | |
2738 | +{ | |
2739 | + struct super_block *sb; | |
2740 | + | |
2741 | + lockdep_off(); | |
2742 | + | |
2743 | + list_for_each_entry(sb, &super_blocks, s_list) { | |
2744 | + if (!(sb->s_flags & MS_FROZEN)) | |
2745 | + continue; | |
2746 | + | |
2747 | + if (sb->s_type->fs_flags & FS_IS_FUSE) { | |
2748 | + if (!(which & FS_FREEZER_FUSE)) | |
2749 | + continue; | |
2750 | + | |
2751 | + sb->s_frozen = SB_UNFROZEN; | |
2752 | + } else { | |
2753 | + if (!(which & FS_FREEZER_NORMAL)) | |
2754 | + continue; | |
2755 | + | |
2756 | + thaw_bdev(sb->s_bdev, sb); | |
2757 | + } | |
2758 | + sb->s_flags &= ~MS_FROZEN; | |
2759 | + } | |
2760 | + | |
2761 | + lockdep_on(); | |
2762 | +} | |
2763 | + | |
9474138d AM |
2764 | static int blkdev_writepage(struct page *page, struct writeback_control *wbc) |
2765 | { | |
2766 | return block_write_full_page(page, blkdev_get_block, wbc); | |
2380c486 | 2767 | diff --git a/fs/drop_caches.c b/fs/drop_caches.c |
5bd2511a | 2768 | index 83c4f60..8f7ec03 100644 |
2380c486 JR |
2769 | --- a/fs/drop_caches.c |
2770 | +++ b/fs/drop_caches.c | |
2771 | @@ -8,6 +8,7 @@ | |
2772 | #include <linux/writeback.h> | |
2773 | #include <linux/sysctl.h> | |
2774 | #include <linux/gfp.h> | |
2775 | +#include <linux/module.h> | |
2776 | ||
2777 | /* A global variable is a bit ugly, but it keeps the code simple */ | |
2778 | int sysctl_drop_caches; | |
5bd2511a AM |
2779 | @@ -42,6 +43,13 @@ static void drop_slab(void) |
2780 | } while (nr_objects > 10); | |
2380c486 JR |
2781 | } |
2782 | ||
5bd2511a | 2783 | +/* For TuxOnIce */ |
2380c486 | 2784 | +void drop_pagecache(void) |
5bd2511a AM |
2785 | +{ |
2786 | + iterate_supers(drop_pagecache_sb, NULL); | |
2787 | +} | |
2380c486 | 2788 | +EXPORT_SYMBOL_GPL(drop_pagecache); |
5bd2511a | 2789 | + |
2380c486 | 2790 | int drop_caches_sysctl_handler(ctl_table *table, int write, |
7e46296a | 2791 | void __user *buffer, size_t *length, loff_t *ppos) |
5bd2511a | 2792 | { |
2380c486 | 2793 | diff --git a/fs/fuse/control.c b/fs/fuse/control.c |
7e46296a | 2794 | index 3773fd6..6272b60 100644 |
2380c486 JR |
2795 | --- a/fs/fuse/control.c |
2796 | +++ b/fs/fuse/control.c | |
7e46296a | 2797 | @@ -341,6 +341,7 @@ static void fuse_ctl_kill_sb(struct super_block *sb) |
2380c486 JR |
2798 | static struct file_system_type fuse_ctl_fs_type = { |
2799 | .owner = THIS_MODULE, | |
2800 | .name = "fusectl", | |
2801 | + .fs_flags = FS_IS_FUSE, | |
2802 | .get_sb = fuse_ctl_get_sb, | |
2803 | .kill_sb = fuse_ctl_kill_sb, | |
2804 | }; | |
2805 | diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c | |
5bd2511a | 2806 | index 9424796..25c6277 100644 |
2380c486 JR |
2807 | --- a/fs/fuse/dev.c |
2808 | +++ b/fs/fuse/dev.c | |
2809 | @@ -7,6 +7,7 @@ | |
2810 | */ | |
2811 | ||
2812 | #include "fuse_i.h" | |
2813 | +#include "fuse.h" | |
2814 | ||
2815 | #include <linux/init.h> | |
2816 | #include <linux/module.h> | |
2817 | @@ -16,6 +17,7 @@ | |
2818 | #include <linux/pagemap.h> | |
2819 | #include <linux/file.h> | |
2820 | #include <linux/slab.h> | |
2821 | +#include <linux/freezer.h> | |
5bd2511a AM |
2822 | #include <linux/pipe_fs_i.h> |
2823 | #include <linux/swap.h> | |
2824 | #include <linux/splice.h> | |
2825 | @@ -961,6 +963,8 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, | |
2826 | struct fuse_in *in; | |
2827 | unsigned reqsize; | |
2380c486 JR |
2828 | |
2829 | + FUSE_MIGHT_FREEZE(file->f_mapping->host->i_sb, "fuse_dev_read"); | |
2830 | + | |
2831 | restart: | |
2832 | spin_lock(&fc->lock); | |
2833 | err = -EAGAIN; | |
5bd2511a | 2834 | @@ -1395,6 +1399,9 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov, |
2380c486 JR |
2835 | if (!fc) |
2836 | return -EPERM; | |
2837 | ||
2838 | + FUSE_MIGHT_FREEZE(iocb->ki_filp->f_mapping->host->i_sb, | |
2839 | + "fuse_dev_write"); | |
2840 | + | |
5bd2511a AM |
2841 | fuse_copy_init(&cs, fc, 0, iov, nr_segs); |
2842 | ||
2843 | return fuse_dev_do_write(fc, &cs, iov_length(iov, nr_segs)); | |
2380c486 | 2844 | diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c |
5bd2511a | 2845 | index 3cdc5f7..725cb5a 100644 |
2380c486 JR |
2846 | --- a/fs/fuse/dir.c |
2847 | +++ b/fs/fuse/dir.c | |
2848 | @@ -7,12 +7,14 @@ | |
2849 | */ | |
2850 | ||
2851 | #include "fuse_i.h" | |
2852 | +#include "fuse.h" | |
2853 | ||
2854 | #include <linux/pagemap.h> | |
2855 | #include <linux/file.h> | |
2856 | #include <linux/gfp.h> | |
2857 | #include <linux/sched.h> | |
2858 | #include <linux/namei.h> | |
2859 | +#include <linux/freezer.h> | |
2860 | ||
2861 | #if BITS_PER_LONG >= 64 | |
2862 | static inline void fuse_dentry_settime(struct dentry *entry, u64 time) | |
2863 | @@ -174,6 +176,9 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) | |
2864 | return 0; | |
2865 | ||
2866 | fc = get_fuse_conn(inode); | |
2867 | + | |
2868 | + FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_dentry_revalidate"); | |
2869 | + | |
2870 | req = fuse_get_req(fc); | |
2871 | if (IS_ERR(req)) | |
2872 | return 0; | |
2873 | @@ -268,6 +273,8 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name, | |
2874 | if (name->len > FUSE_NAME_MAX) | |
2875 | goto out; | |
2876 | ||
2877 | + FUSE_MIGHT_FREEZE(sb, "fuse_lookup_name"); | |
2878 | + | |
2879 | req = fuse_get_req(fc); | |
2880 | err = PTR_ERR(req); | |
2881 | if (IS_ERR(req)) | |
2882 | @@ -331,6 +338,8 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, | |
2883 | if (err) | |
2884 | goto out_err; | |
2885 | ||
2886 | + FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_lookup"); | |
2887 | + | |
2888 | err = -EIO; | |
2889 | if (inode && get_node_id(inode) == FUSE_ROOT_ID) | |
2890 | goto out_iput; | |
7e46296a | 2891 | @@ -392,6 +401,8 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, |
2380c486 JR |
2892 | if (IS_ERR(forget_req)) |
2893 | return PTR_ERR(forget_req); | |
2894 | ||
2895 | + FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_create_open"); | |
2896 | + | |
2897 | req = fuse_get_req(fc); | |
2898 | err = PTR_ERR(req); | |
2899 | if (IS_ERR(req)) | |
7e46296a | 2900 | @@ -485,6 +496,8 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req, |
2380c486 JR |
2901 | int err; |
2902 | struct fuse_req *forget_req; | |
2903 | ||
2904 | + FUSE_MIGHT_FREEZE(dir->i_sb, "create_new_entry"); | |
2905 | + | |
2906 | forget_req = fuse_get_req(fc); | |
2907 | if (IS_ERR(forget_req)) { | |
2908 | fuse_put_request(fc, req); | |
7e46296a | 2909 | @@ -587,7 +600,11 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, int mode) |
2380c486 JR |
2910 | { |
2911 | struct fuse_mkdir_in inarg; | |
2912 | struct fuse_conn *fc = get_fuse_conn(dir); | |
2913 | - struct fuse_req *req = fuse_get_req(fc); | |
2914 | + struct fuse_req *req; | |
2915 | + | |
2916 | + FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_mkdir"); | |
2917 | + | |
2918 | + req = fuse_get_req(fc); | |
2919 | if (IS_ERR(req)) | |
2920 | return PTR_ERR(req); | |
2921 | ||
7e46296a | 2922 | @@ -611,7 +628,11 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry, |
2380c486 JR |
2923 | { |
2924 | struct fuse_conn *fc = get_fuse_conn(dir); | |
2925 | unsigned len = strlen(link) + 1; | |
2926 | - struct fuse_req *req = fuse_get_req(fc); | |
2927 | + struct fuse_req *req; | |
2928 | + | |
2929 | + FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_symlink"); | |
2930 | + | |
2931 | + req = fuse_get_req(fc); | |
2932 | if (IS_ERR(req)) | |
2933 | return PTR_ERR(req); | |
2934 | ||
7e46296a | 2935 | @@ -628,7 +649,11 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) |
2380c486 JR |
2936 | { |
2937 | int err; | |
2938 | struct fuse_conn *fc = get_fuse_conn(dir); | |
2939 | - struct fuse_req *req = fuse_get_req(fc); | |
2940 | + struct fuse_req *req; | |
2941 | + | |
2942 | + FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_unlink"); | |
2943 | + | |
2944 | + req = fuse_get_req(fc); | |
2945 | if (IS_ERR(req)) | |
2946 | return PTR_ERR(req); | |
2947 | ||
7e46296a | 2948 | @@ -661,7 +686,11 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) |
2380c486 JR |
2949 | { |
2950 | int err; | |
2951 | struct fuse_conn *fc = get_fuse_conn(dir); | |
2952 | - struct fuse_req *req = fuse_get_req(fc); | |
2953 | + struct fuse_req *req; | |
2954 | + | |
2955 | + FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_rmdir"); | |
2956 | + | |
2957 | + req = fuse_get_req(fc); | |
2958 | if (IS_ERR(req)) | |
2959 | return PTR_ERR(req); | |
2960 | ||
2961 | diff --git a/fs/fuse/file.c b/fs/fuse/file.c | |
5bd2511a | 2962 | index ada0ade..ca89e06 100644 |
2380c486 JR |
2963 | --- a/fs/fuse/file.c |
2964 | +++ b/fs/fuse/file.c | |
2965 | @@ -7,11 +7,13 @@ | |
2966 | */ | |
2967 | ||
2968 | #include "fuse_i.h" | |
2969 | +#include "fuse.h" | |
2970 | ||
2971 | #include <linux/pagemap.h> | |
2972 | #include <linux/slab.h> | |
2973 | #include <linux/kernel.h> | |
2974 | #include <linux/sched.h> | |
2975 | +#include <linux/freezer.h> | |
92bca44c | 2976 | #include <linux/module.h> |
2380c486 JR |
2977 | |
2978 | static const struct file_operations fuse_direct_io_file_operations; | |
92bca44c | 2979 | @@ -109,6 +111,8 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, |
2380c486 | 2980 | int err; |
92bca44c | 2981 | int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN; |
2380c486 | 2982 | |
92bca44c | 2983 | + FUSE_MIGHT_FREEZE(file->f_path.dentry->d_inode->i_sb, "fuse_send_open"); |
2380c486 | 2984 | + |
92bca44c AM |
2985 | ff = fuse_file_alloc(fc); |
2986 | if (!ff) | |
2987 | return -ENOMEM; | |
2988 | @@ -316,6 +320,8 @@ static int fuse_flush(struct file *file, fl_owner_t id) | |
2380c486 JR |
2989 | if (fc->no_flush) |
2990 | return 0; | |
2991 | ||
2992 | + FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_flush"); | |
2993 | + | |
2994 | req = fuse_get_req_nofail(fc, file); | |
2995 | memset(&inarg, 0, sizeof(inarg)); | |
2996 | inarg.fh = ff->fh; | |
5bd2511a | 2997 | @@ -366,6 +372,8 @@ int fuse_fsync_common(struct file *file, int datasync, int isdir) |
2380c486 JR |
2998 | if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir)) |
2999 | return 0; | |
3000 | ||
3001 | + FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_fsync_common"); | |
3002 | + | |
3003 | /* | |
3004 | * Start writeback against all dirty pages of the inode, then | |
3005 | * wait for all outstanding writes, before sending the FSYNC | |
5bd2511a | 3006 | @@ -473,6 +481,8 @@ static int fuse_readpage(struct file *file, struct page *page) |
2380c486 JR |
3007 | if (is_bad_inode(inode)) |
3008 | goto out; | |
3009 | ||
3010 | + FUSE_MIGHT_FREEZE(file->f_mapping->host->i_sb, "fuse_readpage"); | |
3011 | + | |
3012 | /* | |
3013 | * Page writeback can extend beyond the liftime of the | |
3014 | * page-cache page, so make sure we read a properly synced | |
5bd2511a | 3015 | @@ -586,6 +596,9 @@ static int fuse_readpages_fill(void *_data, struct page *page) |
2380c486 JR |
3016 | struct inode *inode = data->inode; |
3017 | struct fuse_conn *fc = get_fuse_conn(inode); | |
3018 | ||
3019 | + FUSE_MIGHT_FREEZE(data->file->f_mapping->host->i_sb, | |
3020 | + "fuse_readpages_fill"); | |
3021 | + | |
3022 | fuse_wait_on_page_writeback(inode, page->index); | |
3023 | ||
3024 | if (req->num_pages && | |
5bd2511a | 3025 | @@ -617,6 +630,8 @@ static int fuse_readpages(struct file *file, struct address_space *mapping, |
2380c486 JR |
3026 | if (is_bad_inode(inode)) |
3027 | goto out; | |
3028 | ||
3029 | + FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_readpages"); | |
3030 | + | |
3031 | data.file = file; | |
3032 | data.inode = inode; | |
3033 | data.req = fuse_get_req(fc); | |
5bd2511a | 3034 | @@ -730,6 +745,8 @@ static int fuse_buffered_write(struct file *file, struct inode *inode, |
2380c486 JR |
3035 | if (is_bad_inode(inode)) |
3036 | return -EIO; | |
3037 | ||
3038 | + FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_buffered_write"); | |
3039 | + | |
3040 | /* | |
3041 | * Make sure writepages on the same page are not mixed up with | |
3042 | * plain writes. | |
5bd2511a | 3043 | @@ -889,6 +906,8 @@ static ssize_t fuse_perform_write(struct file *file, |
2380c486 JR |
3044 | struct fuse_req *req; |
3045 | ssize_t count; | |
3046 | ||
3047 | + FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_perform_write"); | |
3048 | + | |
3049 | req = fuse_get_req(fc); | |
3050 | if (IS_ERR(req)) { | |
3051 | err = PTR_ERR(req); | |
5bd2511a | 3052 | @@ -1033,6 +1052,8 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf, |
92bca44c AM |
3053 | ssize_t res = 0; |
3054 | struct fuse_req *req; | |
2380c486 JR |
3055 | |
3056 | + FUSE_MIGHT_FREEZE(file->f_mapping->host->i_sb, "fuse_direct_io"); | |
3057 | + | |
3058 | req = fuse_get_req(fc); | |
3059 | if (IS_ERR(req)) | |
3060 | return PTR_ERR(req); | |
5bd2511a | 3061 | @@ -1420,6 +1441,8 @@ static int fuse_getlk(struct file *file, struct file_lock *fl) |
2380c486 JR |
3062 | struct fuse_lk_out outarg; |
3063 | int err; | |
3064 | ||
3065 | + FUSE_MIGHT_FREEZE(file->f_mapping->host->i_sb, "fuse_getlk"); | |
3066 | + | |
3067 | req = fuse_get_req(fc); | |
3068 | if (IS_ERR(req)) | |
3069 | return PTR_ERR(req); | |
5bd2511a | 3070 | @@ -1455,6 +1478,8 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock) |
2380c486 JR |
3071 | if (fl->fl_flags & FL_CLOSE) |
3072 | return 0; | |
3073 | ||
3074 | + FUSE_MIGHT_FREEZE(file->f_mapping->host->i_sb, "fuse_setlk"); | |
3075 | + | |
3076 | req = fuse_get_req(fc); | |
3077 | if (IS_ERR(req)) | |
3078 | return PTR_ERR(req); | |
5bd2511a | 3079 | @@ -1521,6 +1546,8 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block) |
2380c486 JR |
3080 | if (!inode->i_sb->s_bdev || fc->no_bmap) |
3081 | return 0; | |
3082 | ||
3083 | + FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_bmap"); | |
3084 | + | |
3085 | req = fuse_get_req(fc); | |
3086 | if (IS_ERR(req)) | |
3087 | return 0; | |
3088 | diff --git a/fs/fuse/fuse.h b/fs/fuse/fuse.h | |
3089 | new file mode 100644 | |
3090 | index 0000000..170e49a | |
3091 | --- /dev/null | |
3092 | +++ b/fs/fuse/fuse.h | |
3093 | @@ -0,0 +1,13 @@ | |
3094 | +#define FUSE_MIGHT_FREEZE(superblock, desc) \ | |
3095 | +do { \ | |
3096 | + int printed = 0; \ | |
3097 | + while (superblock->s_frozen != SB_UNFROZEN) { \ | |
3098 | + if (!printed) { \ | |
3099 | + printk(KERN_INFO "%d frozen in " desc ".\n", \ | |
3100 | + current->pid); \ | |
3101 | + printed = 1; \ | |
3102 | + } \ | |
3103 | + try_to_freeze(); \ | |
3104 | + yield(); \ | |
3105 | + } \ | |
3106 | +} while (0) | |
3107 | diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c | |
de6743ae | 3108 | index ec14d19..2a82a08 100644 |
2380c486 JR |
3109 | --- a/fs/fuse/inode.c |
3110 | +++ b/fs/fuse/inode.c | |
7e46296a | 3111 | @@ -1062,7 +1062,7 @@ static void fuse_kill_sb_anon(struct super_block *sb) |
2380c486 JR |
3112 | static struct file_system_type fuse_fs_type = { |
3113 | .owner = THIS_MODULE, | |
3114 | .name = "fuse", | |
3115 | - .fs_flags = FS_HAS_SUBTYPE, | |
3116 | + .fs_flags = FS_HAS_SUBTYPE | FS_IS_FUSE, | |
3117 | .get_sb = fuse_get_sb, | |
92bca44c | 3118 | .kill_sb = fuse_kill_sb_anon, |
2380c486 | 3119 | }; |
7e46296a | 3120 | @@ -1094,7 +1094,7 @@ static struct file_system_type fuseblk_fs_type = { |
2380c486 JR |
3121 | .name = "fuseblk", |
3122 | .get_sb = fuse_get_sb_blk, | |
92bca44c | 3123 | .kill_sb = fuse_kill_sb_blk, |
2380c486 JR |
3124 | - .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE, |
3125 | + .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE | FS_IS_FUSE, | |
3126 | }; | |
3127 | ||
3128 | static inline int register_fuseblk(void) | |
3129 | diff --git a/fs/namei.c b/fs/namei.c | |
5bd2511a | 3130 | index 868d0cb..325b6cf 100644 |
2380c486 JR |
3131 | --- a/fs/namei.c |
3132 | +++ b/fs/namei.c | |
5bd2511a | 3133 | @@ -2256,6 +2256,8 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry) |
2380c486 JR |
3134 | if (!dir->i_op->unlink) |
3135 | return -EPERM; | |
3136 | ||
3137 | + vfs_check_frozen(dir->i_sb, SB_FREEZE_WRITE); | |
3138 | + | |
2380c486 | 3139 | mutex_lock(&dentry->d_inode->i_mutex); |
d031c9d6 | 3140 | if (d_mountpoint(dentry)) |
3141 | error = -EBUSY; | |
2380c486 | 3142 | diff --git a/fs/super.c b/fs/super.c |
5bd2511a | 3143 | index 5c35bc7..7c2e6e8 100644 |
2380c486 JR |
3144 | --- a/fs/super.c |
3145 | +++ b/fs/super.c | |
5bd2511a | 3146 | @@ -34,6 +34,8 @@ |
2380c486 JR |
3147 | |
3148 | ||
3149 | LIST_HEAD(super_blocks); | |
3150 | +EXPORT_SYMBOL_GPL(super_blocks); | |
3151 | + | |
3152 | DEFINE_SPINLOCK(sb_lock); | |
3153 | ||
3154 | /** | |
3155 | diff --git a/include/linux/Kbuild b/include/linux/Kbuild | |
5bd2511a | 3156 | index 2fc8e14..709a571 100644 |
2380c486 JR |
3157 | --- a/include/linux/Kbuild |
3158 | +++ b/include/linux/Kbuild | |
5bd2511a | 3159 | @@ -216,6 +216,7 @@ unifdef-y += filter.h |
2380c486 JR |
3160 | unifdef-y += flat.h |
3161 | unifdef-y += futex.h | |
3162 | unifdef-y += fs.h | |
3163 | +unifdef-y += freezer.h | |
3164 | unifdef-y += gameport.h | |
3165 | unifdef-y += generic_serial.h | |
5dd10c98 | 3166 | unifdef-y += hdlcdrv.h |
7e46296a | 3167 | diff --git a/include/linux/bio.h b/include/linux/bio.h |
5dd10c98 | 3168 | index 7fc5606..07e9b97 100644 |
7e46296a AM |
3169 | --- a/include/linux/bio.h |
3170 | +++ b/include/linux/bio.h | |
3171 | @@ -175,8 +175,11 @@ enum bio_rw_flags { | |
3172 | BIO_RW_META, | |
3173 | BIO_RW_DISCARD, | |
3174 | BIO_RW_NOIDLE, | |
3175 | + BIO_RW_TUXONICE, | |
3176 | }; | |
3177 | ||
3178 | +extern int trap_non_toi_io; | |
3179 | + | |
3180 | /* | |
3181 | * First four bits must match between bio->bi_rw and rq->cmd_flags, make | |
3182 | * that explicit here. | |
2380c486 | 3183 | diff --git a/include/linux/freezer.h b/include/linux/freezer.h |
de6743ae | 3184 | index da7e52b..a45b332 100644 |
2380c486 JR |
3185 | --- a/include/linux/freezer.h |
3186 | +++ b/include/linux/freezer.h | |
de6743ae | 3187 | @@ -124,6 +124,19 @@ static inline void set_freezable(void) |
2380c486 JR |
3188 | current->flags &= ~PF_NOFREEZE; |
3189 | } | |
3190 | ||
7e46296a | 3191 | +extern int freezer_state; |
2380c486 JR |
3192 | +#define FREEZER_OFF 0 |
3193 | +#define FREEZER_FILESYSTEMS_FROZEN 1 | |
3194 | +#define FREEZER_USERSPACE_FROZEN 2 | |
3195 | +#define FREEZER_FULLY_ON 3 | |
3196 | + | |
3197 | +static inline int freezer_is_on(void) | |
3198 | +{ | |
3199 | + return freezer_state == FREEZER_FULLY_ON; | |
3200 | +} | |
2380c486 JR |
3201 | + |
3202 | +extern void thaw_kernel_threads(void); | |
3203 | + | |
3204 | /* | |
3205 | * Tell the freezer that the current task should be frozen by it and that it | |
3206 | * should send a fake signal to the task to freeze it. | |
de6743ae | 3207 | @@ -175,6 +188,8 @@ static inline int freeze_processes(void) { BUG(); return 0; } |
2380c486 JR |
3208 | static inline void thaw_processes(void) {} |
3209 | ||
3210 | static inline int try_to_freeze(void) { return 0; } | |
3211 | +static inline int freezer_is_on(void) { return 0; } | |
3212 | +static inline void thaw_kernel_threads(void) { } | |
3213 | ||
3214 | static inline void freezer_do_not_count(void) {} | |
3215 | static inline void freezer_count(void) {} | |
3216 | diff --git a/include/linux/fs.h b/include/linux/fs.h | |
5bd2511a | 3217 | index 471e1ff..63da27b 100644 |
2380c486 JR |
3218 | --- a/include/linux/fs.h |
3219 | +++ b/include/linux/fs.h | |
de6743ae | 3220 | @@ -176,6 +176,7 @@ struct inodes_stat_t { |
2380c486 JR |
3221 | #define FS_REQUIRES_DEV 1 |
3222 | #define FS_BINARY_MOUNTDATA 2 | |
3223 | #define FS_HAS_SUBTYPE 4 | |
3224 | +#define FS_IS_FUSE 8 /* Fuse filesystem - bdev freeze these too */ | |
3225 | #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ | |
3226 | #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() | |
3227 | * during rename() internally. | |
de6743ae | 3228 | @@ -209,6 +210,7 @@ struct inodes_stat_t { |
2380c486 JR |
3229 | #define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */ |
3230 | #define MS_I_VERSION (1<<23) /* Update inode I_version field */ | |
9474138d AM |
3231 | #define MS_STRICTATIME (1<<24) /* Always perform atime updates */ |
3232 | +#define MS_FROZEN (1<<25) /* Frozen by freeze_filesystems() */ | |
28757c75 | 3233 | #define MS_BORN (1<<29) |
2380c486 JR |
3234 | #define MS_ACTIVE (1<<30) |
3235 | #define MS_NOUSER (1<<31) | |
de6743ae | 3236 | @@ -235,6 +237,8 @@ struct inodes_stat_t { |
e999739a | 3237 | #define S_NOCMTIME 128 /* Do not update file c/mtime */ |
3238 | #define S_SWAPFILE 256 /* Do not truncate: swapon got its bmaps */ | |
3239 | #define S_PRIVATE 512 /* Inode is fs-internal */ | |
3240 | +#define S_ATOMIC_COPY 1024 /* Pages mapped with this inode need to be | |
3241 | + atomically copied (gem) */ | |
3242 | ||
3243 | /* | |
3244 | * Note that nosuid etc flags are inode-specific: setting some file-system | |
de6743ae | 3245 | @@ -382,6 +386,7 @@ struct inodes_stat_t { |
92bca44c AM |
3246 | #include <linux/capability.h> |
3247 | #include <linux/semaphore.h> | |
3248 | #include <linux/fiemap.h> | |
3249 | +#include <linux/freezer.h> | |
3250 | ||
3251 | #include <asm/atomic.h> | |
3252 | #include <asm/byteorder.h> | |
5bd2511a | 3253 | @@ -1395,8 +1400,11 @@ enum { |
2380c486 JR |
3254 | SB_FREEZE_TRANS = 2, |
3255 | }; | |
3256 | ||
3257 | -#define vfs_check_frozen(sb, level) \ | |
3258 | - wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level))) | |
3259 | +#define vfs_check_frozen(sb, level) do { \ | |
3260 | + freezer_do_not_count(); \ | |
3261 | + wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level))); \ | |
3262 | + freezer_count(); \ | |
3263 | +} while (0) | |
3264 | ||
3265 | #define get_fs_excl() atomic_inc(¤t->fs_excl) | |
3266 | #define put_fs_excl() atomic_dec(¤t->fs_excl) | |
5bd2511a | 3267 | @@ -1954,6 +1962,13 @@ extern struct super_block *freeze_bdev(struct block_device *); |
92bca44c AM |
3268 | extern void emergency_thaw_all(void); |
3269 | extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); | |
9474138d | 3270 | extern int fsync_bdev(struct block_device *); |
92bca44c AM |
3271 | +extern int fsync_super(struct super_block *); |
3272 | +extern int fsync_no_super(struct block_device *); | |
9474138d AM |
3273 | +#define FS_FREEZER_FUSE 1 |
3274 | +#define FS_FREEZER_NORMAL 2 | |
3275 | +#define FS_FREEZER_ALL (FS_FREEZER_FUSE | FS_FREEZER_NORMAL) | |
3276 | +void freeze_filesystems(int which); | |
3277 | +void thaw_filesystems(int which); | |
3278 | #else | |
3279 | static inline void bd_forget(struct inode *inode) {} | |
3280 | static inline int sync_blockdev(struct block_device *bdev) { return 0; } | |
cacc47f8 AM |
3281 | diff --git a/include/linux/fs_uuid.h b/include/linux/fs_uuid.h |
3282 | new file mode 100644 | |
3283 | index 0000000..3234135 | |
3284 | --- /dev/null | |
3285 | +++ b/include/linux/fs_uuid.h | |
3286 | @@ -0,0 +1,19 @@ | |
3287 | +#include <linux/device.h> | |
3288 | + | |
3289 | +struct hd_struct; | |
3290 | +struct block_device; | |
3291 | + | |
3292 | +struct fs_info { | |
3293 | + char uuid[16]; | |
3294 | + dev_t dev_t; | |
3295 | + char *last_mount; | |
3296 | + int last_mount_size; | |
3297 | +}; | |
3298 | + | |
3299 | +int part_matches_fs_info(struct hd_struct *part, struct fs_info *seek); | |
3300 | +dev_t blk_lookup_fs_info(struct fs_info *seek); | |
3301 | +struct fs_info *fs_info_from_block_dev(struct block_device *bdev); | |
3302 | +void free_fs_info(struct fs_info *fs_info); | |
3303 | +int bdev_matches_key(struct block_device *bdev, const char *key); | |
3304 | +struct block_device *next_bdev_of_type(struct block_device *last, | |
3305 | + const char *key); | |
2380c486 | 3306 | diff --git a/include/linux/mm.h b/include/linux/mm.h |
5bd2511a | 3307 | index b969efb..1e63042 100644 |
2380c486 JR |
3308 | --- a/include/linux/mm.h |
3309 | +++ b/include/linux/mm.h | |
de6743ae | 3310 | @@ -98,6 +98,7 @@ extern unsigned int kobjsize(const void *objp); |
7e46296a AM |
3311 | #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ |
3312 | #define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */ | |
3313 | #define VM_MAPPED_COPY 0x01000000 /* T if mapped copy of data (nommu mmap) */ | |
5dd10c98 | 3314 | +#define VM_ATOMIC_COPY 0x01000000 /* TOI should do atomic copy (mmu) */ |
7e46296a AM |
3315 | #define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */ |
3316 | #define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */ | |
e999739a | 3317 | |
5bd2511a | 3318 | @@ -1424,6 +1425,7 @@ int drop_caches_sysctl_handler(struct ctl_table *, int, |
2380c486 JR |
3319 | void __user *, size_t *, loff_t *); |
3320 | unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask, | |
3321 | unsigned long lru_pages); | |
3322 | +void drop_pagecache(void); | |
3323 | ||
3324 | #ifndef CONFIG_MMU | |
3325 | #define randomize_va_space 0 | |
3326 | diff --git a/include/linux/netlink.h b/include/linux/netlink.h | |
5bd2511a | 3327 | index 59d0669..5efa8e0 100644 |
2380c486 JR |
3328 | --- a/include/linux/netlink.h |
3329 | +++ b/include/linux/netlink.h | |
3330 | @@ -24,6 +24,8 @@ | |
3331 | /* leave room for NETLINK_DM (DM Events) */ | |
3332 | #define NETLINK_SCSITRANSPORT 18 /* SCSI Transports */ | |
3333 | #define NETLINK_ECRYPTFS 19 | |
3334 | +#define NETLINK_TOI_USERUI 20 /* TuxOnIce's userui */ | |
3335 | +#define NETLINK_TOI_USM 21 /* Userspace storage manager */ | |
3336 | ||
3337 | #define MAX_LINKS 32 | |
3338 | ||
3339 | diff --git a/include/linux/suspend.h b/include/linux/suspend.h | |
7e46296a | 3340 | index 5e781d8..a1c07f3 100644 |
2380c486 JR |
3341 | --- a/include/linux/suspend.h |
3342 | +++ b/include/linux/suspend.h | |
7e46296a AM |
3343 | @@ -329,4 +329,70 @@ static inline void unlock_system_sleep(void) |
3344 | } | |
3345 | #endif | |
2380c486 JR |
3346 | |
3347 | +enum { | |
3348 | + TOI_CAN_HIBERNATE, | |
3349 | + TOI_CAN_RESUME, | |
3350 | + TOI_RESUME_DEVICE_OK, | |
3351 | + TOI_NORESUME_SPECIFIED, | |
3352 | + TOI_SANITY_CHECK_PROMPT, | |
3353 | + TOI_CONTINUE_REQ, | |
3354 | + TOI_RESUMED_BEFORE, | |
3355 | + TOI_BOOT_TIME, | |
3356 | + TOI_NOW_RESUMING, | |
3357 | + TOI_IGNORE_LOGLEVEL, | |
3358 | + TOI_TRYING_TO_RESUME, | |
3359 | + TOI_LOADING_ALT_IMAGE, | |
3360 | + TOI_STOP_RESUME, | |
3361 | + TOI_IO_STOPPED, | |
3362 | + TOI_NOTIFIERS_PREPARE, | |
3363 | + TOI_CLUSTER_MODE, | |
3364 | + TOI_BOOT_KERNEL, | |
3365 | +}; | |
3366 | + | |
3367 | +#ifdef CONFIG_TOI | |
3368 | + | |
3369 | +/* Used in init dir files */ | |
3370 | +extern unsigned long toi_state; | |
3371 | +#define set_toi_state(bit) (set_bit(bit, &toi_state)) | |
3372 | +#define clear_toi_state(bit) (clear_bit(bit, &toi_state)) | |
3373 | +#define test_toi_state(bit) (test_bit(bit, &toi_state)) | |
3374 | +extern int toi_running; | |
3375 | + | |
3376 | +#define test_action_state(bit) (test_bit(bit, &toi_bkd.toi_action)) | |
9474138d | 3377 | +extern int try_tuxonice_hibernate(void); |
2380c486 JR |
3378 | + |
3379 | +#else /* !CONFIG_TOI */ | |
3380 | + | |
3381 | +#define toi_state (0) | |
3382 | +#define set_toi_state(bit) do { } while (0) | |
3383 | +#define clear_toi_state(bit) do { } while (0) | |
3384 | +#define test_toi_state(bit) (0) | |
3385 | +#define toi_running (0) | |
3386 | + | |
9474138d | 3387 | +static inline int try_tuxonice_hibernate(void) { return 0; } |
2380c486 JR |
3388 | +#define test_action_state(bit) (0) |
3389 | + | |
3390 | +#endif /* CONFIG_TOI */ | |
3391 | + | |
3392 | +#ifdef CONFIG_HIBERNATION | |
3393 | +#ifdef CONFIG_TOI | |
9474138d | 3394 | +extern void try_tuxonice_resume(void); |
2380c486 | 3395 | +#else |
9474138d | 3396 | +#define try_tuxonice_resume() do { } while (0) |
2380c486 JR |
3397 | +#endif |
3398 | + | |
3399 | +extern int resume_attempted; | |
3400 | +extern int software_resume(void); | |
3401 | + | |
3402 | +static inline void check_resume_attempted(void) | |
3403 | +{ | |
3404 | + if (resume_attempted) | |
3405 | + return; | |
3406 | + | |
3407 | + software_resume(); | |
3408 | +} | |
3409 | +#else | |
3410 | +#define check_resume_attempted() do { } while (0) | |
3411 | +#define resume_attempted (0) | |
3412 | +#endif | |
3413 | #endif /* _LINUX_SUSPEND_H */ | |
3414 | diff --git a/include/linux/swap.h b/include/linux/swap.h | |
5bd2511a | 3415 | index ff4acea..5aa8559 100644 |
2380c486 JR |
3416 | --- a/include/linux/swap.h |
3417 | +++ b/include/linux/swap.h | |
5bd2511a | 3418 | @@ -198,6 +198,7 @@ struct swap_list_t { |
2380c486 JR |
3419 | extern unsigned long totalram_pages; |
3420 | extern unsigned long totalreserve_pages; | |
3421 | extern unsigned int nr_free_buffer_pages(void); | |
3422 | +extern unsigned int nr_unallocated_buffer_pages(void); | |
3423 | extern unsigned int nr_free_pagecache_pages(void); | |
3424 | ||
3425 | /* Definition of global_page_state not available yet */ | |
5bd2511a | 3426 | @@ -248,6 +249,8 @@ extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, |
de6743ae AM |
3427 | int nid); |
3428 | extern int __isolate_lru_page(struct page *page, int mode, int file); | |
3429 | extern unsigned long shrink_all_memory(unsigned long nr_pages); | |
3430 | +extern unsigned long shrink_memory_mask(unsigned long nr_to_reclaim, | |
3431 | + gfp_t mask); | |
3432 | extern int vm_swappiness; | |
3433 | extern int remove_mapping(struct address_space *mapping, struct page *page); | |
3434 | extern long vm_total_pages; | |
5bd2511a | 3435 | @@ -327,8 +330,10 @@ extern void swapcache_free(swp_entry_t, struct page *page); |
5dd10c98 AM |
3436 | extern int free_swap_and_cache(swp_entry_t); |
3437 | extern int swap_type_of(dev_t, sector_t, struct block_device **); | |
3438 | extern unsigned int count_swap_pages(int, int); | |
3439 | +extern sector_t map_swap_entry(swp_entry_t entry, struct block_device **); | |
3440 | extern sector_t map_swap_page(struct page *, struct block_device **); | |
3441 | extern sector_t swapdev_block(int, pgoff_t); | |
3442 | +extern struct swap_info_struct *get_swap_info_struct(unsigned); | |
3443 | extern int reuse_swap_page(struct page *); | |
3444 | extern int try_to_free_swap(struct page *); | |
3445 | struct backing_dev_info; | |
2380c486 | 3446 | diff --git a/init/do_mounts.c b/init/do_mounts.c |
de6743ae | 3447 | index 02e3ca4..5af8c3e 100644 |
2380c486 JR |
3448 | --- a/init/do_mounts.c |
3449 | +++ b/init/do_mounts.c | |
de6743ae | 3450 | @@ -144,6 +144,7 @@ fail: |
2380c486 JR |
3451 | done: |
3452 | return res; | |
3453 | } | |
3454 | +EXPORT_SYMBOL_GPL(name_to_dev_t); | |
3455 | ||
3456 | static int __init root_dev_setup(char *line) | |
3457 | { | |
de6743ae | 3458 | @@ -414,6 +415,8 @@ void __init prepare_namespace(void) |
2380c486 JR |
3459 | if (is_floppy && rd_doload && rd_load_disk(0)) |
3460 | ROOT_DEV = Root_RAM0; | |
3461 | ||
3462 | + check_resume_attempted(); | |
3463 | + | |
3464 | mount_root(); | |
3465 | out: | |
7e46296a | 3466 | devtmpfs_mount("dev"); |
2380c486 | 3467 | diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c |
de6743ae | 3468 | index 2b10853..ec3e087 100644 |
2380c486 JR |
3469 | --- a/init/do_mounts_initrd.c |
3470 | +++ b/init/do_mounts_initrd.c | |
3471 | @@ -6,6 +6,7 @@ | |
3472 | #include <linux/romfs_fs.h> | |
3473 | #include <linux/initrd.h> | |
3474 | #include <linux/sched.h> | |
3475 | +#include <linux/suspend.h> | |
3476 | #include <linux/freezer.h> | |
3477 | ||
3478 | #include "do_mounts.h" | |
de6743ae | 3479 | @@ -64,6 +65,11 @@ static void __init handle_initrd(void) |
2380c486 JR |
3480 | |
3481 | current->flags &= ~PF_FREEZER_SKIP; | |
3482 | ||
3483 | + if (!resume_attempted) | |
3484 | + printk(KERN_ERR "TuxOnIce: No attempt was made to resume from " | |
3485 | + "any image that might exist.\n"); | |
3486 | + clear_toi_state(TOI_BOOT_TIME); | |
3487 | + | |
3488 | /* move initrd to rootfs' /old */ | |
3489 | sys_fchdir(old_fd); | |
3490 | sys_mount("/", ".", NULL, MS_MOVE, NULL); | |
3491 | diff --git a/init/main.c b/init/main.c | |
5bd2511a | 3492 | index 3bdb152..f74eb5b 100644 |
2380c486 JR |
3493 | --- a/init/main.c |
3494 | +++ b/init/main.c | |
5bd2511a | 3495 | @@ -117,6 +117,7 @@ extern void softirq_init(void); |
2380c486 JR |
3496 | char __initdata boot_command_line[COMMAND_LINE_SIZE]; |
3497 | /* Untouched saved command line (eg. for /proc) */ | |
3498 | char *saved_command_line; | |
3499 | +EXPORT_SYMBOL_GPL(saved_command_line); | |
3500 | /* Command line for parameter parsing */ | |
3501 | static char *static_command_line; | |
3502 | ||
3503 | diff --git a/kernel/cpu.c b/kernel/cpu.c | |
5bd2511a | 3504 | index 97d1b42..b6e21bb 100644 |
2380c486 JR |
3505 | --- a/kernel/cpu.c |
3506 | +++ b/kernel/cpu.c | |
5bd2511a AM |
3507 | @@ -428,6 +428,7 @@ int disable_nonboot_cpus(void) |
3508 | cpu_maps_update_done(); | |
2380c486 JR |
3509 | return error; |
3510 | } | |
3511 | +EXPORT_SYMBOL_GPL(disable_nonboot_cpus); | |
3512 | ||
7e46296a | 3513 | void __weak arch_enable_nonboot_cpus_begin(void) |
2380c486 | 3514 | { |
5bd2511a | 3515 | @@ -466,6 +467,7 @@ void __ref enable_nonboot_cpus(void) |
2380c486 JR |
3516 | out: |
3517 | cpu_maps_update_done(); | |
3518 | } | |
3519 | +EXPORT_SYMBOL_GPL(enable_nonboot_cpus); | |
3520 | ||
3521 | static int alloc_frozen_cpus(void) | |
3522 | { | |
2380c486 | 3523 | diff --git a/kernel/kmod.c b/kernel/kmod.c |
5bd2511a | 3524 | index 6e9b196..19247e0 100644 |
2380c486 JR |
3525 | --- a/kernel/kmod.c |
3526 | +++ b/kernel/kmod.c | |
5bd2511a | 3527 | @@ -290,6 +290,7 @@ int usermodehelper_disable(void) |
2380c486 JR |
3528 | usermodehelper_disabled = 0; |
3529 | return -EAGAIN; | |
3530 | } | |
3531 | +EXPORT_SYMBOL_GPL(usermodehelper_disable); | |
3532 | ||
3533 | /** | |
3534 | * usermodehelper_enable - allow new helpers to be started again | |
5bd2511a | 3535 | @@ -298,6 +299,7 @@ void usermodehelper_enable(void) |
2380c486 JR |
3536 | { |
3537 | usermodehelper_disabled = 0; | |
3538 | } | |
3539 | +EXPORT_SYMBOL_GPL(usermodehelper_enable); | |
3540 | ||
3541 | static void helper_lock(void) | |
3542 | { | |
92bca44c | 3543 | diff --git a/kernel/pid.c b/kernel/pid.c |
5bd2511a | 3544 | index e9fd8c1..32d2697 100644 |
92bca44c AM |
3545 | --- a/kernel/pid.c |
3546 | +++ b/kernel/pid.c | |
de6743ae | 3547 | @@ -384,6 +384,7 @@ struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns) |
92bca44c AM |
3548 | { |
3549 | return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID); | |
3550 | } | |
3551 | +EXPORT_SYMBOL_GPL(find_task_by_pid_ns); | |
3552 | ||
3553 | struct task_struct *find_task_by_vpid(pid_t vnr) | |
3554 | { | |
2380c486 | 3555 | diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig |
de6743ae | 3556 | index 5c36ea9..cd32677 100644 |
2380c486 JR |
3557 | --- a/kernel/power/Kconfig |
3558 | +++ b/kernel/power/Kconfig | |
de6743ae | 3559 | @@ -47,6 +47,13 @@ config CAN_PM_TRACE |
2380c486 JR |
3560 | def_bool y |
3561 | depends on PM_DEBUG && PM_SLEEP && EXPERIMENTAL | |
3562 | ||
3563 | +config FS_FREEZER_DEBUG | |
3564 | + bool "Filesystem freezer debugging" | |
3565 | + depends on PM_DEBUG | |
3566 | + default n | |
3567 | + ---help--- | |
3568 | + This option enables debugging of the filesystem freezing code. | |
3569 | + | |
3570 | config PM_TRACE | |
3571 | bool | |
3572 | help | |
de6743ae | 3573 | @@ -197,6 +204,238 @@ config PM_STD_PARTITION |
2380c486 JR |
3574 | suspended image to. It will simply pick the first available swap |
3575 | device. | |
3576 | ||
3577 | +menuconfig TOI_CORE | |
3578 | + tristate "Enhanced Hibernation (TuxOnIce)" | |
3579 | + depends on HIBERNATION | |
3580 | + default y | |
3581 | + ---help--- | |
3582 | + TuxOnIce is the 'new and improved' suspend support. | |
3583 | + | |
3584 | + See the TuxOnIce home page (tuxonice.net) | |
3585 | + for FAQs, HOWTOs and other documentation. | |
3586 | + | |
3587 | + comment "Image Storage (you need at least one allocator)" | |
3588 | + depends on TOI_CORE | |
3589 | + | |
3590 | + config TOI_FILE | |
3591 | + tristate "File Allocator" | |
3592 | + depends on TOI_CORE | |
3593 | + default y | |
3594 | + ---help--- | |
3595 | + This option enables support for storing an image in a | |
5dd10c98 AM |
3596 | + simple file. You might want this if your swap is |
3597 | + sometimes full enough that you don't have enough spare | |
3598 | + space to store an image. | |
2380c486 JR |
3599 | + |
3600 | + config TOI_SWAP | |
3601 | + tristate "Swap Allocator" | |
3602 | + depends on TOI_CORE && SWAP | |
3603 | + default y | |
3604 | + ---help--- | |
3605 | + This option enables support for storing an image in your | |
3606 | + swap space. | |
3607 | + | |
3608 | + comment "General Options" | |
3609 | + depends on TOI_CORE | |
3610 | + | |
2380c486 JR |
3611 | + config TOI_CRYPTO |
3612 | + tristate "Compression support" | |
3613 | + depends on TOI_CORE && CRYPTO | |
3614 | + default y | |
3615 | + ---help--- | |
3616 | + This option adds support for using cryptoapi compression | |
9474138d AM |
3617 | + algorithms. Compression is particularly useful as it can |
3618 | + more than double your suspend and resume speed (depending | |
3619 | + upon how well your image compresses). | |
2380c486 JR |
3620 | + |
3621 | + You probably want this, so say Y here. | |
3622 | + | |
3623 | + comment "No compression support available without Cryptoapi support." | |
3624 | + depends on TOI_CORE && !CRYPTO | |
3625 | + | |
3626 | + config TOI_USERUI | |
3627 | + tristate "Userspace User Interface support" | |
3628 | + depends on TOI_CORE && NET && (VT || SERIAL_CONSOLE) | |
3629 | + default y | |
3630 | + ---help--- | |
3631 | + This option enabled support for a userspace based user interface | |
3632 | + to TuxOnIce, which allows you to have a nice display while suspending | |
3633 | + and resuming, and also enables features such as pressing escape to | |
3634 | + cancel a cycle or interactive debugging. | |
3635 | + | |
3636 | + config TOI_USERUI_DEFAULT_PATH | |
3637 | + string "Default userui program location" | |
e999739a | 3638 | + default "/usr/local/sbin/tuxoniceui_text" |
2380c486 JR |
3639 | + depends on TOI_USERUI |
3640 | + ---help--- | |
3641 | + This entry allows you to specify a default path to the userui binary. | |
3642 | + | |
3643 | + config TOI_KEEP_IMAGE | |
3644 | + bool "Allow Keep Image Mode" | |
3645 | + depends on TOI_CORE | |
3646 | + ---help--- | |
3647 | + This option allows you to keep and image and reuse it. It is intended | |
3648 | + __ONLY__ for use with systems where all filesystems are mounted read- | |
3649 | + only (kiosks, for example). To use it, compile this option in and boot | |
3650 | + normally. Set the KEEP_IMAGE flag in /sys/power/tuxonice and suspend. | |
3651 | + When you resume, the image will not be removed. You will be unable to turn | |
3652 | + off swap partitions (assuming you are using the swap allocator), but future | |
3653 | + suspends simply do a power-down. The image can be updated using the | |
3654 | + kernel command line parameter suspend_act= to turn off the keep image | |
3655 | + bit. Keep image mode is a little less user friendly on purpose - it | |
3656 | + should not be used without thought! | |
3657 | + | |
3658 | + config TOI_REPLACE_SWSUSP | |
3659 | + bool "Replace swsusp by default" | |
3660 | + default y | |
3661 | + depends on TOI_CORE | |
3662 | + ---help--- | |
3663 | + TuxOnIce can replace swsusp. This option makes that the default state, | |
3664 | + requiring you to echo 0 > /sys/power/tuxonice/replace_swsusp if you want | |
3665 | + to use the vanilla kernel functionality. Note that your initrd/ramfs will | |
3666 | + need to do this before trying to resume, too. | |
3667 | + With overriding swsusp enabled, echoing disk to /sys/power/state will | |
3668 | + start a TuxOnIce cycle. If resume= doesn't specify an allocator and both | |
3669 | + the swap and file allocators are compiled in, the swap allocator will be | |
3670 | + used by default. | |
3671 | + | |
3672 | + config TOI_IGNORE_LATE_INITCALL | |
3673 | + bool "Wait for initrd/ramfs to run, by default" | |
3674 | + default n | |
3675 | + depends on TOI_CORE | |
3676 | + ---help--- | |
3677 | + When booting, TuxOnIce can check for an image and start to resume prior | |
3678 | + to any initrd/ramfs running (via a late initcall). | |
3679 | + | |
3680 | + If you don't have an initrd/ramfs, this is what you want to happen - | |
3681 | + otherwise you won't be able to safely resume. You should set this option | |
3682 | + to 'No'. | |
3683 | + | |
3684 | + If, however, you want your initrd/ramfs to run anyway before resuming, | |
3685 | + you need to tell TuxOnIce to ignore that earlier opportunity to resume. | |
3686 | + This can be done either by using this compile time option, or by | |
3687 | + overriding this option with the boot-time parameter toi_initramfs_resume_only=1. | |
3688 | + | |
3689 | + Note that if TuxOnIce can't resume at the earlier opportunity, the | |
3690 | + value of this option won't matter - the initramfs/initrd (if any) will | |
3691 | + run anyway. | |
3692 | + | |
3693 | + menuconfig TOI_CLUSTER | |
3694 | + tristate "Cluster support" | |
3695 | + default n | |
3696 | + depends on TOI_CORE && NET && BROKEN | |
3697 | + ---help--- | |
3698 | + Support for linking multiple machines in a cluster so that they suspend | |
3699 | + and resume together. | |
3700 | + | |
3701 | + config TOI_DEFAULT_CLUSTER_INTERFACE | |
3702 | + string "Default cluster interface" | |
3703 | + depends on TOI_CLUSTER | |
3704 | + ---help--- | |
3705 | + The default interface on which to communicate with other nodes in | |
3706 | + the cluster. | |
3707 | + | |
3708 | + If no value is set here, cluster support will be disabled by default. | |
3709 | + | |
3710 | + config TOI_DEFAULT_CLUSTER_KEY | |
3711 | + string "Default cluster key" | |
3712 | + default "Default" | |
3713 | + depends on TOI_CLUSTER | |
3714 | + ---help--- | |
3715 | + The default key used by this node. All nodes in the same cluster | |
3716 | + have the same key. Multiple clusters may coexist on the same lan | |
3717 | + by using different values for this key. | |
3718 | + | |
3719 | + config TOI_CLUSTER_IMAGE_TIMEOUT | |
3720 | + int "Timeout when checking for image" | |
3721 | + default 15 | |
3722 | + depends on TOI_CLUSTER | |
3723 | + ---help--- | |
3724 | + Timeout (seconds) before continuing to boot when waiting to see | |
3725 | + whether other nodes might have an image. Set to -1 to wait | |
3726 | + indefinitely. In WAIT_UNTIL_NODES is non zero, we might continue | |
3727 | + booting sooner than this timeout. | |
3728 | + | |
3729 | + config TOI_CLUSTER_WAIT_UNTIL_NODES | |
3730 | + int "Nodes without image before continuing" | |
3731 | + default 0 | |
3732 | + depends on TOI_CLUSTER | |
3733 | + ---help--- | |
3734 | + When booting and no image is found, we wait to see if other nodes | |
3735 | + have an image before continuing to boot. This value lets us | |
3736 | + continue after seeing a certain number of nodes without an image, | |
3737 | + instead of continuing to wait for the timeout. Set to 0 to only | |
3738 | + use the timeout. | |
3739 | + | |
3740 | + config TOI_DEFAULT_CLUSTER_PRE_HIBERNATE | |
3741 | + string "Default pre-hibernate script" | |
3742 | + depends on TOI_CLUSTER | |
3743 | + ---help--- | |
3744 | + The default script to be called when starting to hibernate. | |
3745 | + | |
3746 | + config TOI_DEFAULT_CLUSTER_POST_HIBERNATE | |
3747 | + string "Default post-hibernate script" | |
3748 | + depends on TOI_CLUSTER | |
3749 | + ---help--- | |
3750 | + The default script to be called after resuming from hibernation. | |
3751 | + | |
3752 | + config TOI_DEFAULT_WAIT | |
3753 | + int "Default waiting time for emergency boot messages" | |
3754 | + default "25" | |
3755 | + range -1 32768 | |
3756 | + depends on TOI_CORE | |
3757 | + help | |
3758 | + TuxOnIce can display warnings very early in the process of resuming, | |
3759 | + if (for example) it appears that you have booted a kernel that doesn't | |
3760 | + match an image on disk. It can then give you the opportunity to either | |
3761 | + continue booting that kernel, or reboot the machine. This option can be | |
3762 | + used to control how long to wait in such circumstances. -1 means wait | |
3763 | + forever. 0 means don't wait at all (do the default action, which will | |
3764 | + generally be to continue booting and remove the image). Values of 1 or | |
3765 | + more indicate a number of seconds (up to 255) to wait before doing the | |
3766 | + default. | |
3767 | + | |
3768 | + config TOI_DEFAULT_EXTRA_PAGES_ALLOWANCE | |
3769 | + int "Default extra pages allowance" | |
3770 | + default "2000" | |
3771 | + range 500 32768 | |
3772 | + depends on TOI_CORE | |
3773 | + help | |
3774 | + This value controls the default for the allowance TuxOnIce makes for | |
3775 | + drivers to allocate extra memory during the atomic copy. The default | |
3776 | + value of 2000 will be okay in most cases. If you are using | |
3777 | + DRI, the easiest way to find what value to use is to try to hibernate | |
3778 | + and look at how many pages were actually needed in the sysfs entry | |
3779 | + /sys/power/tuxonice/debug_info (first number on the last line), adding | |
3780 | + a little extra because the value is not always the same. | |
3781 | + | |
3782 | + config TOI_CHECKSUM | |
3783 | + bool "Checksum pageset2" | |
3784 | + default n | |
3785 | + depends on TOI_CORE | |
3786 | + select CRYPTO | |
3787 | + select CRYPTO_ALGAPI | |
3788 | + select CRYPTO_MD4 | |
3789 | + ---help--- | |
3790 | + Adds support for checksumming pageset2 pages, to ensure you really get an | |
3791 | + atomic copy. Since some filesystems (XFS especially) change metadata even | |
3792 | + when there's no other activity, we need this to check for pages that have | |
3793 | + been changed while we were saving the page cache. If your debugging output | |
3794 | + always says no pages were resaved, you may be able to safely disable this | |
3795 | + option. | |
3796 | + | |
3797 | +config TOI | |
3798 | + bool | |
3799 | + depends on TOI_CORE!=n | |
3800 | + default y | |
3801 | + | |
3802 | +config TOI_EXPORTS | |
3803 | + bool | |
3804 | + depends on TOI_SWAP=m || TOI_FILE=m || \ | |
3805 | + TOI_CRYPTO=m || TOI_CLUSTER=m || \ | |
3806 | + TOI_USERUI=m || TOI_CORE=m | |
3807 | + default y | |
3808 | + | |
3809 | config APM_EMULATION | |
3810 | tristate "Advanced Power Management Emulation" | |
3811 | depends on PM && SYS_SUPPORTS_APM_EMULATION | |
3812 | diff --git a/kernel/power/Makefile b/kernel/power/Makefile | |
5bd2511a | 3813 | index 524e058..3d736f4 100644 |
2380c486 JR |
3814 | --- a/kernel/power/Makefile |
3815 | +++ b/kernel/power/Makefile | |
7e46296a | 3816 | @@ -3,6 +3,35 @@ ifeq ($(CONFIG_PM_DEBUG),y) |
2380c486 JR |
3817 | EXTRA_CFLAGS += -DDEBUG |
3818 | endif | |
3819 | ||
7e46296a | 3820 | +tuxonice_core-y := tuxonice_modules.o |
2380c486 JR |
3821 | + |
3822 | +obj-$(CONFIG_TOI) += tuxonice_builtin.o | |
3823 | + | |
92bca44c | 3824 | +tuxonice_core-$(CONFIG_PM_DEBUG) += tuxonice_alloc.o |
2380c486 | 3825 | + |
7e46296a AM |
3826 | +# Compile these in after allocation debugging, if used. |
3827 | + | |
3828 | +tuxonice_core-y += tuxonice_sysfs.o tuxonice_highlevel.o \ | |
3829 | + tuxonice_io.o tuxonice_pagedir.o tuxonice_prepare_image.o \ | |
3830 | + tuxonice_extent.o tuxonice_pageflags.o tuxonice_ui.o \ | |
3831 | + tuxonice_power_off.o tuxonice_atomic_copy.o | |
3832 | + | |
92bca44c | 3833 | +tuxonice_core-$(CONFIG_TOI_CHECKSUM) += tuxonice_checksum.o |
2380c486 | 3834 | + |
92bca44c | 3835 | +tuxonice_core-$(CONFIG_NET) += tuxonice_storage.o tuxonice_netlink.o |
2380c486 JR |
3836 | + |
3837 | +obj-$(CONFIG_TOI_CORE) += tuxonice_core.o | |
3838 | +obj-$(CONFIG_TOI_CRYPTO) += tuxonice_compress.o | |
3839 | + | |
7e46296a AM |
3840 | +tuxonice_bio-y := tuxonice_bio_core.o tuxonice_bio_chains.o \ |
3841 | + tuxonice_bio_signature.o | |
3842 | + | |
3843 | +obj-$(CONFIG_TOI_SWAP) += tuxonice_bio.o tuxonice_swap.o | |
3844 | +obj-$(CONFIG_TOI_FILE) += tuxonice_bio.o tuxonice_file.o | |
2380c486 JR |
3845 | +obj-$(CONFIG_TOI_CLUSTER) += tuxonice_cluster.o |
3846 | + | |
3847 | +obj-$(CONFIG_TOI_USERUI) += tuxonice_userui.o | |
3848 | + | |
3849 | obj-$(CONFIG_PM) += main.o | |
3850 | obj-$(CONFIG_PM_SLEEP) += console.o | |
3851 | obj-$(CONFIG_FREEZER) += process.o | |
7e46296a | 3852 | diff --git a/kernel/power/console.c b/kernel/power/console.c |
5dd10c98 | 3853 | index 218e5af..95a6bdc 100644 |
7e46296a AM |
3854 | --- a/kernel/power/console.c |
3855 | +++ b/kernel/power/console.c | |
5dd10c98 AM |
3856 | @@ -24,6 +24,7 @@ int pm_prepare_console(void) |
3857 | orig_kmsg = vt_kmsg_redirect(SUSPEND_CONSOLE); | |
7e46296a AM |
3858 | return 0; |
3859 | } | |
3860 | +EXPORT_SYMBOL_GPL(pm_prepare_console); | |
3861 | ||
3862 | void pm_restore_console(void) | |
3863 | { | |
5dd10c98 AM |
3864 | @@ -32,4 +33,5 @@ void pm_restore_console(void) |
3865 | vt_kmsg_redirect(orig_kmsg); | |
7e46296a AM |
3866 | } |
3867 | } | |
3868 | +EXPORT_SYMBOL_GPL(pm_restore_console); | |
3869 | #endif | |
92bca44c | 3870 | diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c |
de6743ae | 3871 | index aa9e916..4a836b4 100644 |
92bca44c AM |
3872 | --- a/kernel/power/hibernate.c |
3873 | +++ b/kernel/power/hibernate.c | |
de6743ae | 3874 | @@ -26,11 +26,12 @@ |
9474138d AM |
3875 | #include <scsi/scsi_scan.h> |
3876 | #include <asm/suspend.h> | |
2380c486 | 3877 | |
9474138d | 3878 | -#include "power.h" |
2380c486 JR |
3879 | +#include "tuxonice.h" |
3880 | ||
92bca44c | 3881 | |
2380c486 JR |
3882 | static int noresume = 0; |
3883 | -static char resume_file[256] = CONFIG_PM_STD_PARTITION; | |
3884 | +char resume_file[256] = CONFIG_PM_STD_PARTITION; | |
3885 | +EXPORT_SYMBOL_GPL(resume_file); | |
2380c486 JR |
3886 | dev_t swsusp_resume_device; |
3887 | sector_t swsusp_resume_block; | |
5dd10c98 | 3888 | int in_suspend __nosavedata = 0; |
de6743ae | 3889 | @@ -117,55 +118,60 @@ static int hibernation_test(int level) { return 0; } |
2380c486 JR |
3890 | * hibernation |
3891 | */ | |
3892 | ||
3893 | -static int platform_begin(int platform_mode) | |
3894 | +int platform_begin(int platform_mode) | |
3895 | { | |
3896 | return (platform_mode && hibernation_ops) ? | |
3897 | hibernation_ops->begin() : 0; | |
3898 | } | |
3899 | +EXPORT_SYMBOL_GPL(platform_begin); | |
3900 | ||
3901 | /** | |
3902 | * platform_end - tell the platform driver that we've entered the | |
3903 | * working state | |
3904 | */ | |
3905 | ||
3906 | -static void platform_end(int platform_mode) | |
3907 | +void platform_end(int platform_mode) | |
3908 | { | |
3909 | if (platform_mode && hibernation_ops) | |
3910 | hibernation_ops->end(); | |
3911 | } | |
3912 | +EXPORT_SYMBOL_GPL(platform_end); | |
3913 | ||
3914 | /** | |
3915 | * platform_pre_snapshot - prepare the machine for hibernation using the | |
3916 | * platform driver if so configured and return an error code if it fails | |
3917 | */ | |
3918 | ||
3919 | -static int platform_pre_snapshot(int platform_mode) | |
3920 | +int platform_pre_snapshot(int platform_mode) | |
3921 | { | |
3922 | return (platform_mode && hibernation_ops) ? | |
3923 | hibernation_ops->pre_snapshot() : 0; | |
3924 | } | |
3925 | +EXPORT_SYMBOL_GPL(platform_pre_snapshot); | |
3926 | ||
3927 | /** | |
3928 | * platform_leave - prepare the machine for switching to the normal mode | |
3929 | * of operation using the platform driver (called with interrupts disabled) | |
3930 | */ | |
3931 | ||
3932 | -static void platform_leave(int platform_mode) | |
3933 | +void platform_leave(int platform_mode) | |
3934 | { | |
3935 | if (platform_mode && hibernation_ops) | |
3936 | hibernation_ops->leave(); | |
3937 | } | |
3938 | +EXPORT_SYMBOL_GPL(platform_leave); | |
3939 | ||
3940 | /** | |
3941 | * platform_finish - switch the machine to the normal mode of operation | |
3942 | * using the platform driver (must be called after platform_prepare()) | |
3943 | */ | |
3944 | ||
3945 | -static void platform_finish(int platform_mode) | |
3946 | +void platform_finish(int platform_mode) | |
3947 | { | |
3948 | if (platform_mode && hibernation_ops) | |
3949 | hibernation_ops->finish(); | |
3950 | } | |
3951 | +EXPORT_SYMBOL_GPL(platform_finish); | |
3952 | ||
3953 | /** | |
3954 | * platform_pre_restore - prepare the platform for the restoration from a | |
de6743ae | 3955 | @@ -173,11 +179,12 @@ static void platform_finish(int platform_mode) |
2380c486 JR |
3956 | * called, platform_restore_cleanup() must be called. |
3957 | */ | |
3958 | ||
3959 | -static int platform_pre_restore(int platform_mode) | |
3960 | +int platform_pre_restore(int platform_mode) | |
3961 | { | |
3962 | return (platform_mode && hibernation_ops) ? | |
3963 | hibernation_ops->pre_restore() : 0; | |
3964 | } | |
3965 | +EXPORT_SYMBOL_GPL(platform_pre_restore); | |
3966 | ||
3967 | /** | |
3968 | * platform_restore_cleanup - switch the platform to the normal mode of | |
de6743ae | 3969 | @@ -186,22 +193,24 @@ static int platform_pre_restore(int platform_mode) |
2380c486 JR |
3970 | * regardless of the result of platform_pre_restore(). |
3971 | */ | |
3972 | ||
3973 | -static void platform_restore_cleanup(int platform_mode) | |
3974 | +void platform_restore_cleanup(int platform_mode) | |
3975 | { | |
3976 | if (platform_mode && hibernation_ops) | |
3977 | hibernation_ops->restore_cleanup(); | |
3978 | } | |
3979 | +EXPORT_SYMBOL_GPL(platform_restore_cleanup); | |
3980 | ||
3981 | /** | |
3982 | * platform_recover - recover the platform from a failure to suspend | |
3983 | * devices. | |
3984 | */ | |
3985 | ||
3986 | -static void platform_recover(int platform_mode) | |
3987 | +void platform_recover(int platform_mode) | |
3988 | { | |
3989 | if (platform_mode && hibernation_ops && hibernation_ops->recover) | |
3990 | hibernation_ops->recover(); | |
3991 | } | |
3992 | +EXPORT_SYMBOL_GPL(platform_recover); | |
3993 | ||
3994 | /** | |
5dd10c98 | 3995 | * swsusp_show_speed - print the time elapsed between two events. |
de6743ae | 3996 | @@ -535,6 +544,7 @@ int hibernation_platform_enter(void) |
92bca44c | 3997 | |
2380c486 JR |
3998 | return error; |
3999 | } | |
4000 | +EXPORT_SYMBOL_GPL(hibernation_platform_enter); | |
4001 | ||
4002 | /** | |
92bca44c | 4003 | * power_down - Shut the machine down for hibernation. |
de6743ae | 4004 | @@ -586,6 +596,9 @@ int hibernate(void) |
2380c486 JR |
4005 | { |
4006 | int error; | |
4007 | ||
4008 | + if (test_action_state(TOI_REPLACE_SWSUSP)) | |
9474138d | 4009 | + return try_tuxonice_hibernate(); |
2380c486 JR |
4010 | + |
4011 | mutex_lock(&pm_mutex); | |
4012 | /* The snapshot device should not be opened while we're running */ | |
4013 | if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { | |
de6743ae | 4014 | @@ -666,11 +679,19 @@ int hibernate(void) |
2380c486 JR |
4015 | * |
4016 | */ | |
4017 | ||
4018 | -static int software_resume(void) | |
4019 | +int software_resume(void) | |
4020 | { | |
4021 | int error; | |
4022 | unsigned int flags; | |
92bca44c | 4023 | |
2380c486 JR |
4024 | + resume_attempted = 1; |
4025 | + | |
4026 | + /* | |
4027 | + * We can't know (until an image header - if any - is loaded), whether | |
4028 | + * we did override swsusp. We therefore ensure that both are tried. | |
4029 | + */ | |
9474138d | 4030 | + try_tuxonice_resume(); |
92bca44c | 4031 | + |
2380c486 JR |
4032 | /* |
4033 | * If the user said "noresume".. bail out early. | |
92bca44c | 4034 | */ |
de6743ae | 4035 | @@ -999,6 +1020,7 @@ static int __init resume_offset_setup(char *str) |
2380c486 JR |
4036 | static int __init noresume_setup(char *str) |
4037 | { | |
4038 | noresume = 1; | |
4039 | + set_toi_state(TOI_NORESUME_SPECIFIED); | |
4040 | return 1; | |
4041 | } | |
4042 | ||
4043 | diff --git a/kernel/power/main.c b/kernel/power/main.c | |
de6743ae | 4044 | index b58800b..d23adf9 100644 |
2380c486 JR |
4045 | --- a/kernel/power/main.c |
4046 | +++ b/kernel/power/main.c | |
7e46296a | 4047 | @@ -16,6 +16,7 @@ |
2380c486 JR |
4048 | #include "power.h" |
4049 | ||
4050 | DEFINE_MUTEX(pm_mutex); | |
4051 | +EXPORT_SYMBOL_GPL(pm_mutex); | |
4052 | ||
4053 | unsigned int pm_flags; | |
4054 | EXPORT_SYMBOL(pm_flags); | |
7e46296a | 4055 | @@ -24,7 +25,8 @@ EXPORT_SYMBOL(pm_flags); |
2380c486 JR |
4056 | |
4057 | /* Routines for PM-transition notifications */ | |
4058 | ||
4059 | -static BLOCKING_NOTIFIER_HEAD(pm_chain_head); | |
4060 | +BLOCKING_NOTIFIER_HEAD(pm_chain_head); | |
4061 | +EXPORT_SYMBOL_GPL(pm_chain_head); | |
4062 | ||
4063 | int register_pm_notifier(struct notifier_block *nb) | |
4064 | { | |
7e46296a | 4065 | @@ -43,6 +45,7 @@ int pm_notifier_call_chain(unsigned long val) |
92bca44c AM |
4066 | return (blocking_notifier_call_chain(&pm_chain_head, val, NULL) |
4067 | == NOTIFY_BAD) ? -EINVAL : 0; | |
2380c486 JR |
4068 | } |
4069 | +EXPORT_SYMBOL_GPL(pm_notifier_call_chain); | |
4070 | ||
de6743ae AM |
4071 | /* If set, devices may be suspended and resumed asynchronously. */ |
4072 | int pm_async_enabled = 1; | |
4073 | @@ -136,6 +139,7 @@ power_attr(pm_test); | |
92bca44c | 4074 | #endif /* CONFIG_PM_SLEEP */ |
2380c486 JR |
4075 | |
4076 | struct kobject *power_kobj; | |
4077 | +EXPORT_SYMBOL_GPL(power_kobj); | |
4078 | ||
4079 | /** | |
4080 | * state - control system power state. | |
4081 | diff --git a/kernel/power/power.h b/kernel/power/power.h | |
5bd2511a | 4082 | index 006270f..a5e538f 100644 |
2380c486 JR |
4083 | --- a/kernel/power/power.h |
4084 | +++ b/kernel/power/power.h | |
9474138d | 4085 | @@ -31,8 +31,12 @@ static inline char *check_image_kernel(struct swsusp_info *info) |
2380c486 JR |
4086 | return arch_hibernation_header_restore(info) ? |
4087 | "architecture specific data" : NULL; | |
4088 | } | |
4089 | +#else | |
e999739a | 4090 | +extern char *check_image_kernel(struct swsusp_info *info); |
2380c486 | 4091 | #endif /* CONFIG_ARCH_HIBERNATION_HEADER */ |
e999739a | 4092 | +extern int init_header(struct swsusp_info *info); |
2380c486 JR |
4093 | |
4094 | +extern char resume_file[256]; | |
4095 | /* | |
4096 | * Keep some memory free so that I/O operations can succeed without paging | |
4097 | * [Might this be more than 4 MB?] | |
9474138d | 4098 | @@ -49,6 +53,7 @@ static inline char *check_image_kernel(struct swsusp_info *info) |
2380c486 JR |
4099 | extern int hibernation_snapshot(int platform_mode); |
4100 | extern int hibernation_restore(int platform_mode); | |
4101 | extern int hibernation_platform_enter(void); | |
4102 | +extern void platform_recover(int platform_mode); | |
4103 | #endif | |
4104 | ||
4105 | extern int pfn_is_nosave(unsigned long); | |
9474138d | 4106 | @@ -63,6 +68,8 @@ static struct kobj_attribute _name##_attr = { \ |
2380c486 JR |
4107 | .store = _name##_store, \ |
4108 | } | |
4109 | ||
4110 | +extern struct pbe *restore_pblist; | |
4111 | + | |
4112 | /* Preferred image size in bytes (default 500 MB) */ | |
4113 | extern unsigned long image_size; | |
4114 | extern int in_suspend; | |
5bd2511a | 4115 | @@ -233,3 +240,86 @@ static inline void suspend_thaw_processes(void) |
2380c486 JR |
4116 | { |
4117 | } | |
4118 | #endif | |
4119 | + | |
4120 | +extern struct page *saveable_page(struct zone *z, unsigned long p); | |
4121 | +#ifdef CONFIG_HIGHMEM | |
4122 | +extern struct page *saveable_highmem_page(struct zone *z, unsigned long p); | |
4123 | +#else | |
4124 | +static | |
4125 | +inline struct page *saveable_highmem_page(struct zone *z, unsigned long p) | |
4126 | +{ | |
4127 | + return NULL; | |
4128 | +} | |
4129 | +#endif | |
4130 | + | |
4131 | +#define PBES_PER_PAGE (PAGE_SIZE / sizeof(struct pbe)) | |
4132 | +extern struct list_head nosave_regions; | |
4133 | + | |
4134 | +/** | |
4135 | + * This structure represents a range of page frames the contents of which | |
4136 | + * should not be saved during the suspend. | |
4137 | + */ | |
4138 | + | |
4139 | +struct nosave_region { | |
4140 | + struct list_head list; | |
4141 | + unsigned long start_pfn; | |
4142 | + unsigned long end_pfn; | |
4143 | +}; | |
4144 | + | |
4145 | +#ifndef PHYS_PFN_OFFSET | |
4146 | +#define PHYS_PFN_OFFSET 0 | |
4147 | +#endif | |
4148 | + | |
4149 | +#define ZONE_START(thiszone) ((thiszone)->zone_start_pfn - PHYS_PFN_OFFSET) | |
4150 | + | |
4151 | +#define BM_END_OF_MAP (~0UL) | |
4152 | + | |
7e46296a | 4153 | +#define BM_BITS_PER_BLOCK (PAGE_SIZE * BITS_PER_BYTE) |
2380c486 JR |
4154 | + |
4155 | +struct bm_block { | |
4156 | + struct list_head hook; /* hook into a list of bitmap blocks */ | |
4157 | + unsigned long start_pfn; /* pfn represented by the first bit */ | |
4158 | + unsigned long end_pfn; /* pfn represented by the last bit plus 1 */ | |
4159 | + unsigned long *data; /* bitmap representing pages */ | |
4160 | +}; | |
4161 | + | |
4162 | +/* struct bm_position is used for browsing memory bitmaps */ | |
4163 | + | |
4164 | +struct bm_position { | |
4165 | + struct bm_block *block; | |
4166 | + int bit; | |
4167 | +}; | |
4168 | + | |
4169 | +struct memory_bitmap { | |
4170 | + struct list_head blocks; /* list of bitmap blocks */ | |
4171 | + struct linked_page *p_list; /* list of pages used to store zone | |
4172 | + * bitmap objects and bitmap block | |
4173 | + * objects | |
4174 | + */ | |
4175 | + struct bm_position cur; /* most recently used bit position */ | |
4176 | + struct bm_position iter; /* most recently used bit position | |
4177 | + * when iterating over a bitmap. | |
4178 | + */ | |
4179 | +}; | |
4180 | + | |
2380c486 JR |
4181 | +extern int memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, |
4182 | + int safe_needed); | |
4183 | +extern void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free); | |
4184 | +extern void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn); | |
4185 | +extern void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn); | |
4186 | +extern int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn); | |
4187 | +extern unsigned long memory_bm_next_pfn(struct memory_bitmap *bm); | |
4188 | +extern void memory_bm_position_reset(struct memory_bitmap *bm); | |
4189 | +extern void memory_bm_clear(struct memory_bitmap *bm); | |
4190 | +extern void memory_bm_copy(struct memory_bitmap *source, | |
4191 | + struct memory_bitmap *dest); | |
4192 | +extern void memory_bm_dup(struct memory_bitmap *source, | |
4193 | + struct memory_bitmap *dest); | |
4194 | + | |
4195 | +#ifdef CONFIG_TOI | |
4196 | +struct toi_module_ops; | |
4197 | +extern int memory_bm_read(struct memory_bitmap *bm, int (*rw_chunk) | |
4198 | + (int rw, struct toi_module_ops *owner, char *buffer, int buffer_size)); | |
4199 | +extern int memory_bm_write(struct memory_bitmap *bm, int (*rw_chunk) | |
4200 | + (int rw, struct toi_module_ops *owner, char *buffer, int buffer_size)); | |
4201 | +#endif | |
2380c486 | 4202 | diff --git a/kernel/power/process.c b/kernel/power/process.c |
de6743ae | 4203 | index 71ae290..8733143 100644 |
2380c486 JR |
4204 | --- a/kernel/power/process.c |
4205 | +++ b/kernel/power/process.c | |
5dd10c98 | 4206 | @@ -15,6 +15,13 @@ |
2380c486 JR |
4207 | #include <linux/syscalls.h> |
4208 | #include <linux/freezer.h> | |
5dd10c98 | 4209 | #include <linux/delay.h> |
2380c486 JR |
4210 | +#include <linux/buffer_head.h> |
4211 | + | |
4212 | +int freezer_state; | |
4213 | +EXPORT_SYMBOL_GPL(freezer_state); | |
92bca44c | 4214 | + |
de6743ae | 4215 | +int freezer_sync = 1; |
92bca44c | 4216 | +EXPORT_SYMBOL_GPL(freezer_sync); |
2380c486 JR |
4217 | |
4218 | /* | |
4219 | * Timeout for stopping processes | |
de6743ae | 4220 | @@ -112,17 +119,26 @@ int freeze_processes(void) |
2380c486 JR |
4221 | { |
4222 | int error; | |
4223 | ||
4224 | - printk("Freezing user space processes ... "); | |
4225 | + printk(KERN_INFO "Stopping fuse filesystems.\n"); | |
4226 | + freeze_filesystems(FS_FREEZER_FUSE); | |
4227 | + freezer_state = FREEZER_FILESYSTEMS_FROZEN; | |
4228 | + printk(KERN_INFO "Freezing user space processes ... "); | |
4229 | error = try_to_freeze_tasks(true); | |
4230 | if (error) | |
4231 | goto Exit; | |
9474138d | 4232 | printk("done.\n"); |
2380c486 JR |
4233 | |
4234 | - printk("Freezing remaining freezable tasks ... "); | |
92bca44c AM |
4235 | + if (freezer_sync) |
4236 | + sys_sync(); | |
2380c486 JR |
4237 | + printk(KERN_INFO "Stopping normal filesystems.\n"); |
4238 | + freeze_filesystems(FS_FREEZER_NORMAL); | |
4239 | + freezer_state = FREEZER_USERSPACE_FROZEN; | |
4240 | + printk(KERN_INFO "Freezing remaining freezable tasks ... "); | |
4241 | error = try_to_freeze_tasks(false); | |
4242 | if (error) | |
4243 | goto Exit; | |
4244 | printk("done."); | |
4245 | + freezer_state = FREEZER_FULLY_ON; | |
92bca44c AM |
4246 | |
4247 | oom_killer_disable(); | |
2380c486 | 4248 | Exit: |
de6743ae | 4249 | @@ -131,6 +147,7 @@ int freeze_processes(void) |
92bca44c | 4250 | |
2380c486 JR |
4251 | return error; |
4252 | } | |
4253 | +EXPORT_SYMBOL_GPL(freeze_processes); | |
4254 | ||
4255 | static void thaw_tasks(bool nosig_only) | |
4256 | { | |
de6743ae | 4257 | @@ -154,12 +171,39 @@ static void thaw_tasks(bool nosig_only) |
2380c486 JR |
4258 | |
4259 | void thaw_processes(void) | |
4260 | { | |
2380c486 JR |
4261 | + int old_state = freezer_state; |
4262 | + | |
4263 | + if (old_state == FREEZER_OFF) | |
4264 | + return; | |
4265 | + | |
2380c486 JR |
4266 | + freezer_state = FREEZER_OFF; |
4267 | + | |
92bca44c AM |
4268 | oom_killer_enable(); |
4269 | ||
2380c486 JR |
4270 | + printk(KERN_INFO "Restarting all filesystems ...\n"); |
4271 | + thaw_filesystems(FS_FREEZER_ALL); | |
4272 | + | |
4273 | + printk(KERN_INFO "Restarting tasks ... "); | |
2380c486 JR |
4274 | + if (old_state == FREEZER_FULLY_ON) |
4275 | + thaw_tasks(true); | |
92bca44c AM |
4276 | + |
4277 | printk("Restarting tasks ... "); | |
4278 | - thaw_tasks(true); | |
2380c486 JR |
4279 | thaw_tasks(false); |
4280 | schedule(); | |
4281 | printk("done.\n"); | |
4282 | } | |
4283 | +EXPORT_SYMBOL_GPL(thaw_processes); | |
4284 | ||
4285 | +void thaw_kernel_threads(void) | |
4286 | +{ | |
4287 | + freezer_state = FREEZER_USERSPACE_FROZEN; | |
4288 | + printk(KERN_INFO "Restarting normal filesystems.\n"); | |
4289 | + thaw_filesystems(FS_FREEZER_NORMAL); | |
4290 | + thaw_tasks(true); | |
4291 | +} | |
4292 | + | |
4293 | +/* | |
4294 | + * It's ugly putting this EXPORT down here, but it's necessary so that it | |
4295 | + * doesn't matter whether the fs-freezing patch is applied or not. | |
4296 | + */ | |
4297 | +EXPORT_SYMBOL_GPL(thaw_kernel_threads); | |
4298 | diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c | |
5bd2511a | 4299 | index 25ce010..4fde437 100644 |
2380c486 JR |
4300 | --- a/kernel/power/snapshot.c |
4301 | +++ b/kernel/power/snapshot.c | |
de6743ae | 4302 | @@ -35,6 +35,8 @@ |
2380c486 JR |
4303 | #include <asm/io.h> |
4304 | ||
4305 | #include "power.h" | |
4306 | +#include "tuxonice_builtin.h" | |
4307 | +#include "tuxonice_pagedir.h" | |
4308 | ||
4309 | static int swsusp_page_is_free(struct page *); | |
4310 | static void swsusp_set_page_forbidden(struct page *); | |
de6743ae | 4311 | @@ -54,6 +56,10 @@ unsigned long image_size = 500 * 1024 * 1024; |
2380c486 JR |
4312 | * directly to their "original" page frames. |
4313 | */ | |
4314 | struct pbe *restore_pblist; | |
4315 | +EXPORT_SYMBOL_GPL(restore_pblist); | |
4316 | + | |
4317 | +int resume_attempted; | |
4318 | +EXPORT_SYMBOL_GPL(resume_attempted); | |
4319 | ||
4320 | /* Pointer to an auxiliary buffer (1 page) */ | |
4321 | static void *buffer; | |
de6743ae | 4322 | @@ -96,6 +102,9 @@ static void *get_image_page(gfp_t gfp_mask, int safe_needed) |
2380c486 JR |
4323 | |
4324 | unsigned long get_safe_page(gfp_t gfp_mask) | |
4325 | { | |
4326 | + if (toi_running) | |
4327 | + return toi_get_nonconflicting_page(); | |
4328 | + | |
4329 | return (unsigned long)get_image_page(gfp_mask, PG_SAFE); | |
4330 | } | |
4331 | ||
de6743ae | 4332 | @@ -232,47 +241,22 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size) |
2380c486 JR |
4333 | * the represented memory area. |
4334 | */ | |
4335 | ||
4336 | -#define BM_END_OF_MAP (~0UL) | |
4337 | - | |
7e46296a | 4338 | -#define BM_BITS_PER_BLOCK (PAGE_SIZE * BITS_PER_BYTE) |
2380c486 JR |
4339 | - |
4340 | -struct bm_block { | |
4341 | - struct list_head hook; /* hook into a list of bitmap blocks */ | |
4342 | - unsigned long start_pfn; /* pfn represented by the first bit */ | |
4343 | - unsigned long end_pfn; /* pfn represented by the last bit plus 1 */ | |
4344 | - unsigned long *data; /* bitmap representing pages */ | |
4345 | -}; | |
4346 | - | |
4347 | static inline unsigned long bm_block_bits(struct bm_block *bb) | |
4348 | { | |
4349 | return bb->end_pfn - bb->start_pfn; | |
4350 | } | |
4351 | ||
4352 | -/* strcut bm_position is used for browsing memory bitmaps */ | |
4353 | - | |
4354 | -struct bm_position { | |
4355 | - struct bm_block *block; | |
4356 | - int bit; | |
4357 | -}; | |
4358 | - | |
4359 | -struct memory_bitmap { | |
4360 | - struct list_head blocks; /* list of bitmap blocks */ | |
4361 | - struct linked_page *p_list; /* list of pages used to store zone | |
4362 | - * bitmap objects and bitmap block | |
4363 | - * objects | |
4364 | - */ | |
4365 | - struct bm_position cur; /* most recently used bit position */ | |
4366 | -}; | |
4367 | - | |
4368 | /* Functions that operate on memory bitmaps */ | |
4369 | ||
4370 | -static void memory_bm_position_reset(struct memory_bitmap *bm) | |
4371 | +void memory_bm_position_reset(struct memory_bitmap *bm) | |
4372 | { | |
4373 | bm->cur.block = list_entry(bm->blocks.next, struct bm_block, hook); | |
4374 | bm->cur.bit = 0; | |
9474138d AM |
4375 | -} |
4376 | ||
4377 | -static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free); | |
2380c486 JR |
4378 | + bm->iter.block = list_entry(bm->blocks.next, struct bm_block, hook); |
4379 | + bm->iter.bit = 0; | |
9474138d | 4380 | +} |
2380c486 JR |
4381 | +EXPORT_SYMBOL_GPL(memory_bm_position_reset); |
4382 | ||
2380c486 JR |
4383 | /** |
4384 | * create_bm_block_list - create a list of block bitmap objects | |
de6743ae | 4385 | @@ -380,7 +364,7 @@ static int create_mem_extents(struct list_head *list, gfp_t gfp_mask) |
2380c486 JR |
4386 | /** |
4387 | * memory_bm_create - allocate memory for a memory bitmap | |
4388 | */ | |
4389 | -static int | |
4390 | +int | |
4391 | memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) | |
4392 | { | |
4393 | struct chain_allocator ca; | |
de6743ae | 4394 | @@ -436,11 +420,12 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) |
2380c486 JR |
4395 | memory_bm_free(bm, PG_UNSAFE_CLEAR); |
4396 | goto Exit; | |
4397 | } | |
4398 | +EXPORT_SYMBOL_GPL(memory_bm_create); | |
4399 | ||
4400 | /** | |
4401 | * memory_bm_free - free memory occupied by the memory bitmap @bm | |
4402 | */ | |
4403 | -static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) | |
4404 | +void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) | |
4405 | { | |
4406 | struct bm_block *bb; | |
4407 | ||
de6743ae | 4408 | @@ -452,6 +437,7 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) |
2380c486 JR |
4409 | |
4410 | INIT_LIST_HEAD(&bm->blocks); | |
4411 | } | |
4412 | +EXPORT_SYMBOL_GPL(memory_bm_free); | |
4413 | ||
4414 | /** | |
4415 | * memory_bm_find_bit - find the bit in the bitmap @bm that corresponds | |
de6743ae | 4416 | @@ -490,7 +476,7 @@ static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, |
2380c486 JR |
4417 | return 0; |
4418 | } | |
4419 | ||
4420 | -static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) | |
4421 | +void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) | |
4422 | { | |
4423 | void *addr; | |
4424 | unsigned int bit; | |
de6743ae | 4425 | @@ -500,6 +486,7 @@ static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) |
2380c486 JR |
4426 | BUG_ON(error); |
4427 | set_bit(bit, addr); | |
4428 | } | |
4429 | +EXPORT_SYMBOL_GPL(memory_bm_set_bit); | |
4430 | ||
9474138d | 4431 | static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn) |
2380c486 | 4432 | { |
de6743ae | 4433 | @@ -513,7 +500,7 @@ static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn) |
2380c486 JR |
4434 | return error; |
4435 | } | |
4436 | ||
4437 | -static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn) | |
4438 | +void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn) | |
4439 | { | |
4440 | void *addr; | |
4441 | unsigned int bit; | |
de6743ae | 4442 | @@ -523,8 +510,9 @@ static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn) |
2380c486 JR |
4443 | BUG_ON(error); |
4444 | clear_bit(bit, addr); | |
4445 | } | |
4446 | +EXPORT_SYMBOL_GPL(memory_bm_clear_bit); | |
4447 | ||
4448 | -static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) | |
4449 | +int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) | |
4450 | { | |
4451 | void *addr; | |
4452 | unsigned int bit; | |
de6743ae | 4453 | @@ -534,6 +522,7 @@ static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) |
2380c486 JR |
4454 | BUG_ON(error); |
4455 | return test_bit(bit, addr); | |
4456 | } | |
4457 | +EXPORT_SYMBOL_GPL(memory_bm_test_bit); | |
4458 | ||
4459 | static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn) | |
4460 | { | |
de6743ae | 4461 | @@ -552,43 +541,178 @@ static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn) |
2380c486 JR |
4462 | * this function. |
4463 | */ | |
4464 | ||
4465 | -static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) | |
4466 | +unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) | |
4467 | { | |
4468 | struct bm_block *bb; | |
4469 | int bit; | |
4470 | ||
4471 | - bb = bm->cur.block; | |
4472 | + bb = bm->iter.block; | |
4473 | do { | |
4474 | - bit = bm->cur.bit; | |
4475 | + bit = bm->iter.bit; | |
4476 | bit = find_next_bit(bb->data, bm_block_bits(bb), bit); | |
4477 | if (bit < bm_block_bits(bb)) | |
4478 | goto Return_pfn; | |
4479 | ||
4480 | bb = list_entry(bb->hook.next, struct bm_block, hook); | |
4481 | - bm->cur.block = bb; | |
4482 | - bm->cur.bit = 0; | |
4483 | + bm->iter.block = bb; | |
4484 | + bm->iter.bit = 0; | |
4485 | } while (&bb->hook != &bm->blocks); | |
4486 | ||
4487 | memory_bm_position_reset(bm); | |
4488 | return BM_END_OF_MAP; | |
4489 | ||
4490 | Return_pfn: | |
4491 | - bm->cur.bit = bit + 1; | |
4492 | + bm->iter.bit = bit + 1; | |
4493 | return bb->start_pfn + bit; | |
4494 | } | |
4495 | +EXPORT_SYMBOL_GPL(memory_bm_next_pfn); | |
4496 | ||
4497 | -/** | |
4498 | - * This structure represents a range of page frames the contents of which | |
4499 | - * should not be saved during the suspend. | |
4500 | - */ | |
4501 | +void memory_bm_clear(struct memory_bitmap *bm) | |
4502 | +{ | |
4503 | + unsigned long pfn; | |
4504 | ||
4505 | -struct nosave_region { | |
4506 | - struct list_head list; | |
4507 | - unsigned long start_pfn; | |
4508 | - unsigned long end_pfn; | |
4509 | -}; | |
4510 | + memory_bm_position_reset(bm); | |
4511 | + pfn = memory_bm_next_pfn(bm); | |
4512 | + while (pfn != BM_END_OF_MAP) { | |
4513 | + memory_bm_clear_bit(bm, pfn); | |
4514 | + pfn = memory_bm_next_pfn(bm); | |
4515 | + } | |
4516 | +} | |
4517 | +EXPORT_SYMBOL_GPL(memory_bm_clear); | |
4518 | + | |
4519 | +void memory_bm_copy(struct memory_bitmap *source, struct memory_bitmap *dest) | |
4520 | +{ | |
4521 | + unsigned long pfn; | |
4522 | + | |
4523 | + memory_bm_position_reset(source); | |
4524 | + pfn = memory_bm_next_pfn(source); | |
4525 | + while (pfn != BM_END_OF_MAP) { | |
4526 | + memory_bm_set_bit(dest, pfn); | |
4527 | + pfn = memory_bm_next_pfn(source); | |
4528 | + } | |
4529 | +} | |
4530 | +EXPORT_SYMBOL_GPL(memory_bm_copy); | |
4531 | + | |
4532 | +void memory_bm_dup(struct memory_bitmap *source, struct memory_bitmap *dest) | |
4533 | +{ | |
4534 | + memory_bm_clear(dest); | |
4535 | + memory_bm_copy(source, dest); | |
4536 | +} | |
4537 | +EXPORT_SYMBOL_GPL(memory_bm_dup); | |
4538 | + | |
4539 | +#ifdef CONFIG_TOI | |
4540 | +#define DEFINE_MEMORY_BITMAP(name) \ | |
4541 | +struct memory_bitmap *name; \ | |
4542 | +EXPORT_SYMBOL_GPL(name) | |
4543 | + | |
4544 | +DEFINE_MEMORY_BITMAP(pageset1_map); | |
4545 | +DEFINE_MEMORY_BITMAP(pageset1_copy_map); | |
4546 | +DEFINE_MEMORY_BITMAP(pageset2_map); | |
4547 | +DEFINE_MEMORY_BITMAP(page_resave_map); | |
4548 | +DEFINE_MEMORY_BITMAP(io_map); | |
4549 | +DEFINE_MEMORY_BITMAP(nosave_map); | |
4550 | +DEFINE_MEMORY_BITMAP(free_map); | |
9474138d | 4551 | + |
2380c486 JR |
4552 | +int memory_bm_write(struct memory_bitmap *bm, int (*rw_chunk) |
4553 | + (int rw, struct toi_module_ops *owner, char *buffer, int buffer_size)) | |
4554 | +{ | |
4555 | + int result = 0; | |
4556 | + unsigned int nr = 0; | |
4557 | + struct bm_block *bb; | |
4558 | + | |
4559 | + if (!bm) | |
4560 | + return result; | |
9474138d AM |
4561 | |
4562 | -static LIST_HEAD(nosave_regions); | |
2380c486 JR |
4563 | + list_for_each_entry(bb, &bm->blocks, hook) |
4564 | + nr++; | |
4565 | + | |
4566 | + result = (*rw_chunk)(WRITE, NULL, (char *) &nr, sizeof(unsigned int)); | |
4567 | + if (result) | |
4568 | + return result; | |
4569 | + | |
4570 | + list_for_each_entry(bb, &bm->blocks, hook) { | |
4571 | + result = (*rw_chunk)(WRITE, NULL, (char *) &bb->start_pfn, | |
4572 | + 2 * sizeof(unsigned long)); | |
4573 | + if (result) | |
4574 | + return result; | |
4575 | + | |
4576 | + result = (*rw_chunk)(WRITE, NULL, (char *) bb->data, PAGE_SIZE); | |
4577 | + if (result) | |
4578 | + return result; | |
4579 | + } | |
4580 | + | |
4581 | + return 0; | |
4582 | +} | |
4583 | +EXPORT_SYMBOL_GPL(memory_bm_write); | |
4584 | + | |
4585 | +int memory_bm_read(struct memory_bitmap *bm, int (*rw_chunk) | |
4586 | + (int rw, struct toi_module_ops *owner, char *buffer, int buffer_size)) | |
4587 | +{ | |
4588 | + int result = 0; | |
4589 | + unsigned int nr, i; | |
4590 | + struct bm_block *bb; | |
4591 | + | |
4592 | + if (!bm) | |
4593 | + return result; | |
4594 | + | |
4595 | + result = memory_bm_create(bm, GFP_KERNEL, 0); | |
4596 | + | |
4597 | + if (result) | |
4598 | + return result; | |
4599 | + | |
4600 | + result = (*rw_chunk)(READ, NULL, (char *) &nr, sizeof(unsigned int)); | |
4601 | + if (result) | |
4602 | + goto Free; | |
4603 | + | |
4604 | + for (i = 0; i < nr; i++) { | |
4605 | + unsigned long pfn; | |
4606 | + | |
4607 | + result = (*rw_chunk)(READ, NULL, (char *) &pfn, | |
4608 | + sizeof(unsigned long)); | |
4609 | + if (result) | |
4610 | + goto Free; | |
4611 | + | |
4612 | + list_for_each_entry(bb, &bm->blocks, hook) | |
4613 | + if (bb->start_pfn == pfn) | |
4614 | + break; | |
4615 | + | |
4616 | + if (&bb->hook == &bm->blocks) { | |
4617 | + printk(KERN_ERR | |
4618 | + "TuxOnIce: Failed to load memory bitmap.\n"); | |
4619 | + result = -EINVAL; | |
4620 | + goto Free; | |
4621 | + } | |
4622 | + | |
4623 | + result = (*rw_chunk)(READ, NULL, (char *) &pfn, | |
4624 | + sizeof(unsigned long)); | |
4625 | + if (result) | |
4626 | + goto Free; | |
4627 | + | |
4628 | + if (pfn != bb->end_pfn) { | |
4629 | + printk(KERN_ERR | |
4630 | + "TuxOnIce: Failed to load memory bitmap. " | |
4631 | + "End PFN doesn't match what was saved.\n"); | |
4632 | + result = -EINVAL; | |
4633 | + goto Free; | |
4634 | + } | |
4635 | + | |
4636 | + result = (*rw_chunk)(READ, NULL, (char *) bb->data, PAGE_SIZE); | |
4637 | + | |
4638 | + if (result) | |
4639 | + goto Free; | |
4640 | + } | |
4641 | + | |
4642 | + return 0; | |
4643 | + | |
4644 | +Free: | |
4645 | + memory_bm_free(bm, PG_ANY); | |
4646 | + return result; | |
4647 | +} | |
4648 | +EXPORT_SYMBOL_GPL(memory_bm_read); | |
4649 | +#endif | |
4650 | + | |
4651 | +LIST_HEAD(nosave_regions); | |
4652 | +EXPORT_SYMBOL_GPL(nosave_regions); | |
4653 | ||
4654 | /** | |
4655 | * register_nosave_region - register a range of page frames the contents | |
de6743ae | 4656 | @@ -824,7 +948,7 @@ static unsigned int count_free_highmem_pages(void) |
2380c486 JR |
4657 | * We should save the page if it isn't Nosave or NosaveFree, or Reserved, |
4658 | * and it isn't a part of a free chunk of pages. | |
4659 | */ | |
4660 | -static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn) | |
4661 | +struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn) | |
4662 | { | |
4663 | struct page *page; | |
4664 | ||
de6743ae | 4665 | @@ -843,6 +967,7 @@ static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn) |
2380c486 JR |
4666 | |
4667 | return page; | |
4668 | } | |
4669 | +EXPORT_SYMBOL_GPL(saveable_highmem_page); | |
4670 | ||
4671 | /** | |
4672 | * count_highmem_pages - compute the total number of saveable highmem | |
de6743ae | 4673 | @@ -868,11 +993,6 @@ static unsigned int count_highmem_pages(void) |
2380c486 JR |
4674 | } |
4675 | return n; | |
4676 | } | |
4677 | -#else | |
4678 | -static inline void *saveable_highmem_page(struct zone *z, unsigned long p) | |
4679 | -{ | |
4680 | - return NULL; | |
4681 | -} | |
4682 | #endif /* CONFIG_HIGHMEM */ | |
4683 | ||
4684 | /** | |
de6743ae | 4685 | @@ -883,7 +1003,7 @@ static inline void *saveable_highmem_page(struct zone *z, unsigned long p) |
2380c486 JR |
4686 | * of pages statically defined as 'unsaveable', and it isn't a part of |
4687 | * a free chunk of pages. | |
4688 | */ | |
4689 | -static struct page *saveable_page(struct zone *zone, unsigned long pfn) | |
4690 | +struct page *saveable_page(struct zone *zone, unsigned long pfn) | |
4691 | { | |
4692 | struct page *page; | |
4693 | ||
de6743ae | 4694 | @@ -905,6 +1025,7 @@ static struct page *saveable_page(struct zone *zone, unsigned long pfn) |
2380c486 JR |
4695 | |
4696 | return page; | |
4697 | } | |
4698 | +EXPORT_SYMBOL_GPL(saveable_page); | |
4699 | ||
4700 | /** | |
4701 | * count_data_pages - compute the total number of saveable non-highmem | |
de6743ae | 4702 | @@ -1501,6 +1622,9 @@ asmlinkage int swsusp_save(void) |
2380c486 JR |
4703 | { |
4704 | unsigned int nr_pages, nr_highmem; | |
4705 | ||
4706 | + if (toi_running) | |
4707 | + return toi_post_context_save(); | |
4708 | + | |
d031c9d6 | 4709 | printk(KERN_INFO "PM: Creating hibernation image:\n"); |
2380c486 JR |
4710 | |
4711 | drain_local_pages(NULL); | |
de6743ae | 4712 | @@ -1541,14 +1665,14 @@ asmlinkage int swsusp_save(void) |
2380c486 JR |
4713 | } |
4714 | ||
4715 | #ifndef CONFIG_ARCH_HIBERNATION_HEADER | |
4716 | -static int init_header_complete(struct swsusp_info *info) | |
e999739a | 4717 | +int init_header_complete(struct swsusp_info *info) |
2380c486 JR |
4718 | { |
4719 | memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname)); | |
4720 | info->version_code = LINUX_VERSION_CODE; | |
4721 | return 0; | |
4722 | } | |
4723 | ||
4724 | -static char *check_image_kernel(struct swsusp_info *info) | |
e999739a | 4725 | +char *check_image_kernel(struct swsusp_info *info) |
2380c486 JR |
4726 | { |
4727 | if (info->version_code != LINUX_VERSION_CODE) | |
4728 | return "kernel version"; | |
de6743ae | 4729 | @@ -1562,6 +1686,7 @@ static char *check_image_kernel(struct swsusp_info *info) |
2380c486 JR |
4730 | return "machine"; |
4731 | return NULL; | |
4732 | } | |
e999739a | 4733 | +EXPORT_SYMBOL_GPL(check_image_kernel); |
2380c486 JR |
4734 | #endif /* CONFIG_ARCH_HIBERNATION_HEADER */ |
4735 | ||
4736 | unsigned long snapshot_get_image_size(void) | |
de6743ae | 4737 | @@ -1569,7 +1694,7 @@ unsigned long snapshot_get_image_size(void) |
2380c486 JR |
4738 | return nr_copy_pages + nr_meta_pages + 1; |
4739 | } | |
4740 | ||
4741 | -static int init_header(struct swsusp_info *info) | |
e999739a | 4742 | +int init_header(struct swsusp_info *info) |
2380c486 JR |
4743 | { |
4744 | memset(info, 0, sizeof(struct swsusp_info)); | |
4745 | info->num_physpages = num_physpages; | |
de6743ae | 4746 | @@ -1579,6 +1704,7 @@ static int init_header(struct swsusp_info *info) |
2380c486 | 4747 | info->size <<= PAGE_SHIFT; |
e999739a | 4748 | return init_header_complete(info); |
2380c486 | 4749 | } |
e999739a | 4750 | +EXPORT_SYMBOL_GPL(init_header); |
2380c486 JR |
4751 | |
4752 | /** | |
4753 | * pack_pfns - pfns corresponding to the set bits found in the bitmap @bm | |
92bca44c | 4754 | diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c |
de6743ae | 4755 | index 56e7dbb..9618d42 100644 |
92bca44c AM |
4756 | --- a/kernel/power/suspend.c |
4757 | +++ b/kernel/power/suspend.c | |
de6743ae | 4758 | @@ -230,6 +230,7 @@ int suspend_devices_and_enter(suspend_state_t state) |
92bca44c AM |
4759 | suspend_ops->recover(); |
4760 | goto Resume_devices; | |
4761 | } | |
4762 | +EXPORT_SYMBOL_GPL(suspend_devices_and_enter); | |
4763 | ||
4764 | /** | |
4765 | * suspend_finish - Do final work before exiting suspend sequence. | |
2380c486 JR |
4766 | diff --git a/kernel/power/tuxonice.h b/kernel/power/tuxonice.h |
4767 | new file mode 100644 | |
cacc47f8 | 4768 | index 0000000..537291e |
2380c486 JR |
4769 | --- /dev/null |
4770 | +++ b/kernel/power/tuxonice.h | |
5dd10c98 | 4771 | @@ -0,0 +1,211 @@ |
2380c486 JR |
4772 | +/* |
4773 | + * kernel/power/tuxonice.h | |
4774 | + * | |
5dd10c98 | 4775 | + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
4776 | + * |
4777 | + * This file is released under the GPLv2. | |
4778 | + * | |
4779 | + * It contains declarations used throughout swsusp. | |
4780 | + * | |
4781 | + */ | |
4782 | + | |
4783 | +#ifndef KERNEL_POWER_TOI_H | |
4784 | +#define KERNEL_POWER_TOI_H | |
4785 | + | |
4786 | +#include <linux/delay.h> | |
4787 | +#include <linux/bootmem.h> | |
4788 | +#include <linux/suspend.h> | |
4789 | +#include <linux/fs.h> | |
4790 | +#include <linux/kmod.h> | |
4791 | +#include <asm/setup.h> | |
4792 | +#include "tuxonice_pageflags.h" | |
9474138d | 4793 | +#include "power.h" |
2380c486 | 4794 | + |
cacc47f8 AM |
4795 | +#define TOI_CORE_VERSION "3.1.1.1" |
4796 | +#define TOI_HEADER_VERSION 3 | |
5dd10c98 | 4797 | +#define MY_BOOT_KERNEL_DATA_VERSION 3 |
2380c486 JR |
4798 | + |
4799 | +struct toi_boot_kernel_data { | |
4800 | + int version; | |
4801 | + int size; | |
4802 | + unsigned long toi_action; | |
4803 | + unsigned long toi_debug_state; | |
4804 | + u32 toi_default_console_level; | |
4805 | + int toi_io_time[2][2]; | |
4806 | + char toi_nosave_commandline[COMMAND_LINE_SIZE]; | |
5dd10c98 AM |
4807 | + unsigned long pages_used[33]; |
4808 | + unsigned long compress_bytes_in; | |
4809 | + unsigned long compress_bytes_out; | |
2380c486 JR |
4810 | +}; |
4811 | + | |
4812 | +extern struct toi_boot_kernel_data toi_bkd; | |
4813 | + | |
4814 | +/* Location of book kernel data struct in kernel being resumed */ | |
4815 | +extern unsigned long boot_kernel_data_buffer; | |
4816 | + | |
4817 | +/* == Action states == */ | |
4818 | + | |
4819 | +enum { | |
4820 | + TOI_REBOOT, | |
4821 | + TOI_PAUSE, | |
4822 | + TOI_LOGALL, | |
4823 | + TOI_CAN_CANCEL, | |
4824 | + TOI_KEEP_IMAGE, | |
4825 | + TOI_FREEZER_TEST, | |
4826 | + TOI_SINGLESTEP, | |
4827 | + TOI_PAUSE_NEAR_PAGESET_END, | |
4828 | + TOI_TEST_FILTER_SPEED, | |
4829 | + TOI_TEST_BIO, | |
4830 | + TOI_NO_PAGESET2, | |
2380c486 JR |
4831 | + TOI_IGNORE_ROOTFS, |
4832 | + TOI_REPLACE_SWSUSP, | |
4833 | + TOI_PAGESET2_FULL, | |
4834 | + TOI_ABORT_ON_RESAVE_NEEDED, | |
4835 | + TOI_NO_MULTITHREADED_IO, | |
5dd10c98 | 4836 | + TOI_NO_DIRECT_LOAD, /* Obsolete */ |
2380c486 JR |
4837 | + TOI_LATE_CPU_HOTPLUG, |
4838 | + TOI_GET_MAX_MEM_ALLOCD, | |
4839 | + TOI_NO_FLUSHER_THREAD, | |
4840 | + TOI_NO_PS2_IF_UNNEEDED | |
4841 | +}; | |
4842 | + | |
4843 | +#define clear_action_state(bit) (test_and_clear_bit(bit, &toi_bkd.toi_action)) | |
4844 | + | |
4845 | +/* == Result states == */ | |
4846 | + | |
4847 | +enum { | |
4848 | + TOI_ABORTED, | |
4849 | + TOI_ABORT_REQUESTED, | |
4850 | + TOI_NOSTORAGE_AVAILABLE, | |
4851 | + TOI_INSUFFICIENT_STORAGE, | |
4852 | + TOI_FREEZING_FAILED, | |
4853 | + TOI_KEPT_IMAGE, | |
4854 | + TOI_WOULD_EAT_MEMORY, | |
4855 | + TOI_UNABLE_TO_FREE_ENOUGH_MEMORY, | |
4856 | + TOI_PM_SEM, | |
4857 | + TOI_DEVICE_REFUSED, | |
4858 | + TOI_SYSDEV_REFUSED, | |
4859 | + TOI_EXTRA_PAGES_ALLOW_TOO_SMALL, | |
4860 | + TOI_UNABLE_TO_PREPARE_IMAGE, | |
4861 | + TOI_FAILED_MODULE_INIT, | |
4862 | + TOI_FAILED_MODULE_CLEANUP, | |
4863 | + TOI_FAILED_IO, | |
4864 | + TOI_OUT_OF_MEMORY, | |
4865 | + TOI_IMAGE_ERROR, | |
4866 | + TOI_PLATFORM_PREP_FAILED, | |
4867 | + TOI_CPU_HOTPLUG_FAILED, | |
4868 | + TOI_ARCH_PREPARE_FAILED, | |
4869 | + TOI_RESAVE_NEEDED, | |
4870 | + TOI_CANT_SUSPEND, | |
4871 | + TOI_NOTIFIERS_PREPARE_FAILED, | |
4872 | + TOI_PRE_SNAPSHOT_FAILED, | |
4873 | + TOI_PRE_RESTORE_FAILED, | |
4874 | + TOI_USERMODE_HELPERS_ERR, | |
4875 | + TOI_CANT_USE_ALT_RESUME, | |
0ada99ac | 4876 | + TOI_HEADER_TOO_BIG, |
2380c486 JR |
4877 | + TOI_NUM_RESULT_STATES /* Used in printing debug info only */ |
4878 | +}; | |
4879 | + | |
4880 | +extern unsigned long toi_result; | |
4881 | + | |
4882 | +#define set_result_state(bit) (test_and_set_bit(bit, &toi_result)) | |
4883 | +#define set_abort_result(bit) (test_and_set_bit(TOI_ABORTED, &toi_result), \ | |
4884 | + test_and_set_bit(bit, &toi_result)) | |
4885 | +#define clear_result_state(bit) (test_and_clear_bit(bit, &toi_result)) | |
4886 | +#define test_result_state(bit) (test_bit(bit, &toi_result)) | |
4887 | + | |
4888 | +/* == Debug sections and levels == */ | |
4889 | + | |
4890 | +/* debugging levels. */ | |
4891 | +enum { | |
4892 | + TOI_STATUS = 0, | |
4893 | + TOI_ERROR = 2, | |
4894 | + TOI_LOW, | |
4895 | + TOI_MEDIUM, | |
4896 | + TOI_HIGH, | |
4897 | + TOI_VERBOSE, | |
4898 | +}; | |
4899 | + | |
4900 | +enum { | |
4901 | + TOI_ANY_SECTION, | |
4902 | + TOI_EAT_MEMORY, | |
4903 | + TOI_IO, | |
4904 | + TOI_HEADER, | |
4905 | + TOI_WRITER, | |
4906 | + TOI_MEMORY, | |
4907 | +}; | |
4908 | + | |
4909 | +#define set_debug_state(bit) (test_and_set_bit(bit, &toi_bkd.toi_debug_state)) | |
4910 | +#define clear_debug_state(bit) \ | |
4911 | + (test_and_clear_bit(bit, &toi_bkd.toi_debug_state)) | |
4912 | +#define test_debug_state(bit) (test_bit(bit, &toi_bkd.toi_debug_state)) | |
4913 | + | |
4914 | +/* == Steps in hibernating == */ | |
4915 | + | |
4916 | +enum { | |
4917 | + STEP_HIBERNATE_PREPARE_IMAGE, | |
4918 | + STEP_HIBERNATE_SAVE_IMAGE, | |
4919 | + STEP_HIBERNATE_POWERDOWN, | |
4920 | + STEP_RESUME_CAN_RESUME, | |
4921 | + STEP_RESUME_LOAD_PS1, | |
4922 | + STEP_RESUME_DO_RESTORE, | |
4923 | + STEP_RESUME_READ_PS2, | |
4924 | + STEP_RESUME_GO, | |
4925 | + STEP_RESUME_ALT_IMAGE, | |
4926 | + STEP_CLEANUP, | |
4927 | + STEP_QUIET_CLEANUP | |
4928 | +}; | |
4929 | + | |
4930 | +/* == TuxOnIce states == | |
4931 | + (see also include/linux/suspend.h) */ | |
4932 | + | |
4933 | +#define get_toi_state() (toi_state) | |
4934 | +#define restore_toi_state(saved_state) \ | |
4935 | + do { toi_state = saved_state; } while (0) | |
4936 | + | |
4937 | +/* == Module support == */ | |
4938 | + | |
4939 | +struct toi_core_fns { | |
4940 | + int (*post_context_save)(void); | |
4941 | + unsigned long (*get_nonconflicting_page)(void); | |
4942 | + int (*try_hibernate)(void); | |
4943 | + void (*try_resume)(void); | |
4944 | +}; | |
4945 | + | |
4946 | +extern struct toi_core_fns *toi_core_fns; | |
4947 | + | |
4948 | +/* == All else == */ | |
4949 | +#define KB(x) ((x) << (PAGE_SHIFT - 10)) | |
4950 | +#define MB(x) ((x) >> (20 - PAGE_SHIFT)) | |
4951 | + | |
4952 | +extern int toi_start_anything(int toi_or_resume); | |
4953 | +extern void toi_finish_anything(int toi_or_resume); | |
4954 | + | |
4955 | +extern int save_image_part1(void); | |
4956 | +extern int toi_atomic_restore(void); | |
4957 | + | |
9474138d AM |
4958 | +extern int toi_try_hibernate(void); |
4959 | +extern void toi_try_resume(void); | |
2380c486 JR |
4960 | + |
4961 | +extern int __toi_post_context_save(void); | |
4962 | + | |
4963 | +extern unsigned int nr_hibernates; | |
4964 | +extern char alt_resume_param[256]; | |
4965 | + | |
4966 | +extern void copyback_post(void); | |
4967 | +extern int toi_hibernate(void); | |
92bca44c | 4968 | +extern unsigned long extra_pd1_pages_used; |
2380c486 JR |
4969 | + |
4970 | +#define SECTOR_SIZE 512 | |
4971 | + | |
4972 | +extern void toi_early_boot_message(int can_erase_image, int default_answer, | |
4973 | + char *warning_reason, ...); | |
4974 | + | |
2380c486 JR |
4975 | +extern int do_check_can_resume(void); |
4976 | +extern int do_toi_step(int step); | |
4977 | +extern int toi_launch_userspace_program(char *command, int channel_no, | |
4978 | + enum umh_wait wait, int debug); | |
4979 | + | |
7e46296a AM |
4980 | +extern char tuxonice_signature[9]; |
4981 | +extern int freezer_sync; | |
2380c486 JR |
4982 | +#endif |
4983 | diff --git a/kernel/power/tuxonice_alloc.c b/kernel/power/tuxonice_alloc.c | |
4984 | new file mode 100644 | |
5dd10c98 | 4985 | index 0000000..891c5b2 |
2380c486 JR |
4986 | --- /dev/null |
4987 | +++ b/kernel/power/tuxonice_alloc.c | |
7e46296a | 4988 | @@ -0,0 +1,313 @@ |
2380c486 JR |
4989 | +/* |
4990 | + * kernel/power/tuxonice_alloc.c | |
4991 | + * | |
5dd10c98 | 4992 | + * Copyright (C) 2008-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
4993 | + * |
4994 | + * This file is released under the GPLv2. | |
4995 | + * | |
4996 | + */ | |
4997 | + | |
4998 | +#ifdef CONFIG_PM_DEBUG | |
4999 | +#include <linux/module.h> | |
5000 | +#include <linux/slab.h> | |
5001 | +#include "tuxonice_modules.h" | |
5002 | +#include "tuxonice_alloc.h" | |
5003 | +#include "tuxonice_sysfs.h" | |
5004 | +#include "tuxonice.h" | |
5005 | + | |
7e46296a | 5006 | +#define TOI_ALLOC_PATHS 40 |
2380c486 JR |
5007 | + |
5008 | +static DEFINE_MUTEX(toi_alloc_mutex); | |
5009 | + | |
5010 | +static struct toi_module_ops toi_alloc_ops; | |
5011 | + | |
5012 | +static int toi_fail_num; | |
7e46296a | 5013 | +static int trace_allocs; |
2380c486 JR |
5014 | +static atomic_t toi_alloc_count[TOI_ALLOC_PATHS], |
5015 | + toi_free_count[TOI_ALLOC_PATHS], | |
5016 | + toi_test_count[TOI_ALLOC_PATHS], | |
5017 | + toi_fail_count[TOI_ALLOC_PATHS]; | |
5018 | +static int toi_cur_allocd[TOI_ALLOC_PATHS], toi_max_allocd[TOI_ALLOC_PATHS]; | |
5019 | +static int cur_allocd, max_allocd; | |
5020 | + | |
5021 | +static char *toi_alloc_desc[TOI_ALLOC_PATHS] = { | |
5022 | + "", /* 0 */ | |
5023 | + "get_io_info_struct", | |
5024 | + "extent", | |
5025 | + "extent (loading chain)", | |
5026 | + "userui channel", | |
5027 | + "userui arg", /* 5 */ | |
5028 | + "attention list metadata", | |
5029 | + "extra pagedir memory metadata", | |
5030 | + "bdev metadata", | |
5031 | + "extra pagedir memory", | |
5032 | + "header_locations_read", /* 10 */ | |
5033 | + "bio queue", | |
5034 | + "prepare_readahead", | |
5035 | + "i/o buffer", | |
5036 | + "writer buffer in bio_init", | |
5037 | + "checksum buffer", /* 15 */ | |
5038 | + "compression buffer", | |
5039 | + "filewriter signature op", | |
5040 | + "set resume param alloc1", | |
5041 | + "set resume param alloc2", | |
5042 | + "debugging info buffer", /* 20 */ | |
5043 | + "check can resume buffer", | |
5044 | + "write module config buffer", | |
5045 | + "read module config buffer", | |
5046 | + "write image header buffer", | |
5047 | + "read pageset1 buffer", /* 25 */ | |
5048 | + "get_have_image_data buffer", | |
5049 | + "checksum page", | |
5050 | + "worker rw loop", | |
5051 | + "get nonconflicting page", | |
5052 | + "ps1 load addresses", /* 30 */ | |
5053 | + "remove swap image", | |
5054 | + "swap image exists", | |
5055 | + "swap parse sig location", | |
5056 | + "sysfs kobj", | |
5057 | + "swap mark resume attempted buffer", /* 35 */ | |
5058 | + "cluster member", | |
5059 | + "boot kernel data buffer", | |
7e46296a AM |
5060 | + "setting swap signature", |
5061 | + "block i/o bdev struct" | |
2380c486 JR |
5062 | +}; |
5063 | + | |
5064 | +#define MIGHT_FAIL(FAIL_NUM, FAIL_VAL) \ | |
5065 | + do { \ | |
5066 | + BUG_ON(FAIL_NUM >= TOI_ALLOC_PATHS); \ | |
5067 | + \ | |
5068 | + if (FAIL_NUM == toi_fail_num) { \ | |
5069 | + atomic_inc(&toi_test_count[FAIL_NUM]); \ | |
5070 | + toi_fail_num = 0; \ | |
5071 | + return FAIL_VAL; \ | |
5072 | + } \ | |
5073 | + } while (0) | |
5074 | + | |
9474138d | 5075 | +static void alloc_update_stats(int fail_num, void *result, int size) |
2380c486 JR |
5076 | +{ |
5077 | + if (!result) { | |
5078 | + atomic_inc(&toi_fail_count[fail_num]); | |
5079 | + return; | |
5080 | + } | |
5081 | + | |
5082 | + atomic_inc(&toi_alloc_count[fail_num]); | |
5083 | + if (unlikely(test_action_state(TOI_GET_MAX_MEM_ALLOCD))) { | |
5084 | + mutex_lock(&toi_alloc_mutex); | |
5085 | + toi_cur_allocd[fail_num]++; | |
7e46296a | 5086 | + cur_allocd += size; |
2380c486 JR |
5087 | + if (unlikely(cur_allocd > max_allocd)) { |
5088 | + int i; | |
5089 | + | |
5090 | + for (i = 0; i < TOI_ALLOC_PATHS; i++) | |
5091 | + toi_max_allocd[i] = toi_cur_allocd[i]; | |
5092 | + max_allocd = cur_allocd; | |
5093 | + } | |
5094 | + mutex_unlock(&toi_alloc_mutex); | |
5095 | + } | |
5096 | +} | |
5097 | + | |
9474138d | 5098 | +static void free_update_stats(int fail_num, int size) |
2380c486 JR |
5099 | +{ |
5100 | + BUG_ON(fail_num >= TOI_ALLOC_PATHS); | |
5101 | + atomic_inc(&toi_free_count[fail_num]); | |
7e46296a AM |
5102 | + if (unlikely(atomic_read(&toi_free_count[fail_num]) > |
5103 | + atomic_read(&toi_alloc_count[fail_num]))) | |
5104 | + dump_stack(); | |
2380c486 JR |
5105 | + if (unlikely(test_action_state(TOI_GET_MAX_MEM_ALLOCD))) { |
5106 | + mutex_lock(&toi_alloc_mutex); | |
7e46296a | 5107 | + cur_allocd -= size; |
2380c486 JR |
5108 | + toi_cur_allocd[fail_num]--; |
5109 | + mutex_unlock(&toi_alloc_mutex); | |
5110 | + } | |
5111 | +} | |
5112 | + | |
5113 | +void *toi_kzalloc(int fail_num, size_t size, gfp_t flags) | |
5114 | +{ | |
5115 | + void *result; | |
5116 | + | |
5117 | + if (toi_alloc_ops.enabled) | |
5118 | + MIGHT_FAIL(fail_num, NULL); | |
5119 | + result = kzalloc(size, flags); | |
5120 | + if (toi_alloc_ops.enabled) | |
9474138d | 5121 | + alloc_update_stats(fail_num, result, size); |
7e46296a AM |
5122 | + if (fail_num == trace_allocs) |
5123 | + dump_stack(); | |
2380c486 JR |
5124 | + return result; |
5125 | +} | |
5126 | +EXPORT_SYMBOL_GPL(toi_kzalloc); | |
5127 | + | |
5128 | +unsigned long toi_get_free_pages(int fail_num, gfp_t mask, | |
5129 | + unsigned int order) | |
5130 | +{ | |
5131 | + unsigned long result; | |
5132 | + | |
5133 | + if (toi_alloc_ops.enabled) | |
5134 | + MIGHT_FAIL(fail_num, 0); | |
5135 | + result = __get_free_pages(mask, order); | |
5136 | + if (toi_alloc_ops.enabled) | |
9474138d AM |
5137 | + alloc_update_stats(fail_num, (void *) result, |
5138 | + PAGE_SIZE << order); | |
7e46296a AM |
5139 | + if (fail_num == trace_allocs) |
5140 | + dump_stack(); | |
2380c486 JR |
5141 | + return result; |
5142 | +} | |
5143 | +EXPORT_SYMBOL_GPL(toi_get_free_pages); | |
5144 | + | |
5145 | +struct page *toi_alloc_page(int fail_num, gfp_t mask) | |
5146 | +{ | |
5147 | + struct page *result; | |
5148 | + | |
5149 | + if (toi_alloc_ops.enabled) | |
5150 | + MIGHT_FAIL(fail_num, NULL); | |
5151 | + result = alloc_page(mask); | |
5152 | + if (toi_alloc_ops.enabled) | |
9474138d | 5153 | + alloc_update_stats(fail_num, (void *) result, PAGE_SIZE); |
7e46296a AM |
5154 | + if (fail_num == trace_allocs) |
5155 | + dump_stack(); | |
2380c486 JR |
5156 | + return result; |
5157 | +} | |
5158 | +EXPORT_SYMBOL_GPL(toi_alloc_page); | |
5159 | + | |
5160 | +unsigned long toi_get_zeroed_page(int fail_num, gfp_t mask) | |
5161 | +{ | |
5162 | + unsigned long result; | |
5163 | + | |
7e46296a AM |
5164 | + if (fail_num == trace_allocs) |
5165 | + dump_stack(); | |
2380c486 JR |
5166 | + if (toi_alloc_ops.enabled) |
5167 | + MIGHT_FAIL(fail_num, 0); | |
5168 | + result = get_zeroed_page(mask); | |
5169 | + if (toi_alloc_ops.enabled) | |
9474138d | 5170 | + alloc_update_stats(fail_num, (void *) result, PAGE_SIZE); |
7e46296a AM |
5171 | + if (fail_num == trace_allocs) |
5172 | + dump_stack(); | |
2380c486 JR |
5173 | + return result; |
5174 | +} | |
5175 | +EXPORT_SYMBOL_GPL(toi_get_zeroed_page); | |
5176 | + | |
9474138d | 5177 | +void toi_kfree(int fail_num, const void *arg, int size) |
2380c486 JR |
5178 | +{ |
5179 | + if (arg && toi_alloc_ops.enabled) | |
9474138d | 5180 | + free_update_stats(fail_num, size); |
2380c486 | 5181 | + |
7e46296a AM |
5182 | + if (fail_num == trace_allocs) |
5183 | + dump_stack(); | |
2380c486 JR |
5184 | + kfree(arg); |
5185 | +} | |
5186 | +EXPORT_SYMBOL_GPL(toi_kfree); | |
5187 | + | |
5188 | +void toi_free_page(int fail_num, unsigned long virt) | |
5189 | +{ | |
5190 | + if (virt && toi_alloc_ops.enabled) | |
9474138d | 5191 | + free_update_stats(fail_num, PAGE_SIZE); |
2380c486 | 5192 | + |
7e46296a AM |
5193 | + if (fail_num == trace_allocs) |
5194 | + dump_stack(); | |
2380c486 JR |
5195 | + free_page(virt); |
5196 | +} | |
5197 | +EXPORT_SYMBOL_GPL(toi_free_page); | |
5198 | + | |
5199 | +void toi__free_page(int fail_num, struct page *page) | |
5200 | +{ | |
5201 | + if (page && toi_alloc_ops.enabled) | |
9474138d | 5202 | + free_update_stats(fail_num, PAGE_SIZE); |
2380c486 | 5203 | + |
7e46296a AM |
5204 | + if (fail_num == trace_allocs) |
5205 | + dump_stack(); | |
2380c486 JR |
5206 | + __free_page(page); |
5207 | +} | |
5208 | +EXPORT_SYMBOL_GPL(toi__free_page); | |
5209 | + | |
5210 | +void toi_free_pages(int fail_num, struct page *page, int order) | |
5211 | +{ | |
5212 | + if (page && toi_alloc_ops.enabled) | |
9474138d | 5213 | + free_update_stats(fail_num, PAGE_SIZE << order); |
2380c486 | 5214 | + |
7e46296a AM |
5215 | + if (fail_num == trace_allocs) |
5216 | + dump_stack(); | |
2380c486 JR |
5217 | + __free_pages(page, order); |
5218 | +} | |
5219 | + | |
5220 | +void toi_alloc_print_debug_stats(void) | |
5221 | +{ | |
5222 | + int i, header_done = 0; | |
5223 | + | |
5224 | + if (!toi_alloc_ops.enabled) | |
5225 | + return; | |
5226 | + | |
5227 | + for (i = 0; i < TOI_ALLOC_PATHS; i++) | |
5228 | + if (atomic_read(&toi_alloc_count[i]) != | |
5229 | + atomic_read(&toi_free_count[i])) { | |
5230 | + if (!header_done) { | |
5231 | + printk(KERN_INFO "Idx Allocs Frees Tests " | |
7e46296a | 5232 | + " Fails Max Description\n"); |
2380c486 JR |
5233 | + header_done = 1; |
5234 | + } | |
5235 | + | |
5236 | + printk(KERN_INFO "%3d %7d %7d %7d %7d %7d %s\n", i, | |
5237 | + atomic_read(&toi_alloc_count[i]), | |
5238 | + atomic_read(&toi_free_count[i]), | |
5239 | + atomic_read(&toi_test_count[i]), | |
5240 | + atomic_read(&toi_fail_count[i]), | |
5241 | + toi_max_allocd[i], | |
5242 | + toi_alloc_desc[i]); | |
5243 | + } | |
5244 | +} | |
5245 | +EXPORT_SYMBOL_GPL(toi_alloc_print_debug_stats); | |
5246 | + | |
5247 | +static int toi_alloc_initialise(int starting_cycle) | |
5248 | +{ | |
5249 | + int i; | |
5250 | + | |
7e46296a AM |
5251 | + if (!starting_cycle) |
5252 | + return 0; | |
5253 | + | |
5254 | + for (i = 0; i < TOI_ALLOC_PATHS; i++) { | |
5255 | + atomic_set(&toi_alloc_count[i], 0); | |
5256 | + atomic_set(&toi_free_count[i], 0); | |
5257 | + atomic_set(&toi_test_count[i], 0); | |
5258 | + atomic_set(&toi_fail_count[i], 0); | |
5259 | + toi_cur_allocd[i] = 0; | |
5260 | + toi_max_allocd[i] = 0; | |
5261 | + }; | |
2380c486 | 5262 | + |
7e46296a AM |
5263 | + max_allocd = 0; |
5264 | + cur_allocd = 0; | |
2380c486 JR |
5265 | + return 0; |
5266 | +} | |
5267 | + | |
5268 | +static struct toi_sysfs_data sysfs_params[] = { | |
5269 | + SYSFS_INT("failure_test", SYSFS_RW, &toi_fail_num, 0, 99, 0, NULL), | |
7e46296a AM |
5270 | + SYSFS_INT("trace", SYSFS_RW, &trace_allocs, 0, TOI_ALLOC_PATHS, 0, |
5271 | + NULL), | |
2380c486 JR |
5272 | + SYSFS_BIT("find_max_mem_allocated", SYSFS_RW, &toi_bkd.toi_action, |
5273 | + TOI_GET_MAX_MEM_ALLOCD, 0), | |
5274 | + SYSFS_INT("enabled", SYSFS_RW, &toi_alloc_ops.enabled, 0, 1, 0, | |
5275 | + NULL) | |
5276 | +}; | |
5277 | + | |
5278 | +static struct toi_module_ops toi_alloc_ops = { | |
5279 | + .type = MISC_HIDDEN_MODULE, | |
5280 | + .name = "allocation debugging", | |
5281 | + .directory = "alloc", | |
5282 | + .module = THIS_MODULE, | |
5283 | + .early = 1, | |
5284 | + .initialise = toi_alloc_initialise, | |
5285 | + | |
5286 | + .sysfs_data = sysfs_params, | |
5287 | + .num_sysfs_entries = sizeof(sysfs_params) / | |
5288 | + sizeof(struct toi_sysfs_data), | |
5289 | +}; | |
5290 | + | |
5291 | +int toi_alloc_init(void) | |
5292 | +{ | |
5293 | + int result = toi_register_module(&toi_alloc_ops); | |
2380c486 JR |
5294 | + return result; |
5295 | +} | |
5296 | + | |
5297 | +void toi_alloc_exit(void) | |
5298 | +{ | |
5299 | + toi_unregister_module(&toi_alloc_ops); | |
5300 | +} | |
5301 | +#endif | |
5302 | diff --git a/kernel/power/tuxonice_alloc.h b/kernel/power/tuxonice_alloc.h | |
5303 | new file mode 100644 | |
de6743ae | 5304 | index 0000000..77e0f0d |
2380c486 JR |
5305 | --- /dev/null |
5306 | +++ b/kernel/power/tuxonice_alloc.h | |
de6743ae | 5307 | @@ -0,0 +1,52 @@ |
2380c486 JR |
5308 | +/* |
5309 | + * kernel/power/tuxonice_alloc.h | |
5310 | + * | |
5dd10c98 | 5311 | + * Copyright (C) 2008-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
5312 | + * |
5313 | + * This file is released under the GPLv2. | |
5314 | + * | |
5315 | + */ | |
5316 | + | |
de6743ae | 5317 | +#include <linux/slab.h> |
5dd10c98 | 5318 | +#define TOI_WAIT_GFP (GFP_NOFS | __GFP_NOWARN) |
2380c486 JR |
5319 | +#define TOI_ATOMIC_GFP (GFP_ATOMIC | __GFP_NOWARN) |
5320 | + | |
5321 | +#ifdef CONFIG_PM_DEBUG | |
5322 | +extern void *toi_kzalloc(int fail_num, size_t size, gfp_t flags); | |
9474138d | 5323 | +extern void toi_kfree(int fail_num, const void *arg, int size); |
2380c486 JR |
5324 | + |
5325 | +extern unsigned long toi_get_free_pages(int fail_num, gfp_t mask, | |
5326 | + unsigned int order); | |
5327 | +#define toi_get_free_page(FAIL_NUM, MASK) toi_get_free_pages(FAIL_NUM, MASK, 0) | |
5328 | +extern unsigned long toi_get_zeroed_page(int fail_num, gfp_t mask); | |
5329 | +extern void toi_free_page(int fail_num, unsigned long buf); | |
5330 | +extern void toi__free_page(int fail_num, struct page *page); | |
5331 | +extern void toi_free_pages(int fail_num, struct page *page, int order); | |
5332 | +extern struct page *toi_alloc_page(int fail_num, gfp_t mask); | |
5333 | +extern int toi_alloc_init(void); | |
5334 | +extern void toi_alloc_exit(void); | |
5335 | + | |
5336 | +extern void toi_alloc_print_debug_stats(void); | |
5337 | + | |
5338 | +#else /* CONFIG_PM_DEBUG */ | |
5339 | + | |
5340 | +#define toi_kzalloc(FAIL, SIZE, FLAGS) (kzalloc(SIZE, FLAGS)) | |
9474138d | 5341 | +#define toi_kfree(FAIL, ALLOCN, SIZE) (kfree(ALLOCN)) |
2380c486 JR |
5342 | + |
5343 | +#define toi_get_free_pages(FAIL, FLAGS, ORDER) __get_free_pages(FLAGS, ORDER) | |
5344 | +#define toi_get_free_page(FAIL, FLAGS) __get_free_page(FLAGS) | |
5345 | +#define toi_get_zeroed_page(FAIL, FLAGS) get_zeroed_page(FLAGS) | |
5346 | +#define toi_free_page(FAIL, ALLOCN) do { free_page(ALLOCN); } while (0) | |
5347 | +#define toi__free_page(FAIL, PAGE) __free_page(PAGE) | |
5348 | +#define toi_free_pages(FAIL, PAGE, ORDER) __free_pages(PAGE, ORDER) | |
5349 | +#define toi_alloc_page(FAIL, MASK) alloc_page(MASK) | |
5350 | +static inline int toi_alloc_init(void) | |
5351 | +{ | |
5352 | + return 0; | |
5353 | +} | |
5354 | + | |
5355 | +static inline void toi_alloc_exit(void) { } | |
5356 | + | |
5357 | +static inline void toi_alloc_print_debug_stats(void) { } | |
5358 | + | |
5359 | +#endif | |
5360 | diff --git a/kernel/power/tuxonice_atomic_copy.c b/kernel/power/tuxonice_atomic_copy.c | |
5361 | new file mode 100644 | |
5dd10c98 | 5362 | index 0000000..1807f8b |
2380c486 JR |
5363 | --- /dev/null |
5364 | +++ b/kernel/power/tuxonice_atomic_copy.c | |
5dd10c98 | 5365 | @@ -0,0 +1,418 @@ |
2380c486 JR |
5366 | +/* |
5367 | + * kernel/power/tuxonice_atomic_copy.c | |
5368 | + * | |
5dd10c98 | 5369 | + * Copyright 2004-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
5370 | + * |
5371 | + * Distributed under GPLv2. | |
5372 | + * | |
5373 | + * Routines for doing the atomic save/restore. | |
5374 | + */ | |
5375 | + | |
5376 | +#include <linux/suspend.h> | |
5377 | +#include <linux/highmem.h> | |
5378 | +#include <linux/cpu.h> | |
5379 | +#include <linux/freezer.h> | |
5380 | +#include <linux/console.h> | |
9474138d | 5381 | +#include <asm/suspend.h> |
2380c486 JR |
5382 | +#include "tuxonice.h" |
5383 | +#include "tuxonice_storage.h" | |
5384 | +#include "tuxonice_power_off.h" | |
5385 | +#include "tuxonice_ui.h" | |
2380c486 JR |
5386 | +#include "tuxonice_io.h" |
5387 | +#include "tuxonice_prepare_image.h" | |
5388 | +#include "tuxonice_pageflags.h" | |
5389 | +#include "tuxonice_checksum.h" | |
5390 | +#include "tuxonice_builtin.h" | |
5391 | +#include "tuxonice_atomic_copy.h" | |
5392 | +#include "tuxonice_alloc.h" | |
5dd10c98 | 5393 | +#include "tuxonice_modules.h" |
2380c486 | 5394 | + |
92bca44c | 5395 | +unsigned long extra_pd1_pages_used; |
2380c486 JR |
5396 | + |
5397 | +/** | |
5398 | + * free_pbe_list - free page backup entries used by the atomic copy code. | |
5399 | + * @list: List to free. | |
5400 | + * @highmem: Whether the list is in highmem. | |
5401 | + * | |
5402 | + * Normally, this function isn't used. If, however, we need to abort before | |
5403 | + * doing the atomic copy, we use this to free the pbes previously allocated. | |
5404 | + **/ | |
5405 | +static void free_pbe_list(struct pbe **list, int highmem) | |
5406 | +{ | |
5407 | + while (*list) { | |
5408 | + int i; | |
5409 | + struct pbe *free_pbe, *next_page = NULL; | |
5410 | + struct page *page; | |
5411 | + | |
5412 | + if (highmem) { | |
5413 | + page = (struct page *) *list; | |
5414 | + free_pbe = (struct pbe *) kmap(page); | |
5415 | + } else { | |
5416 | + page = virt_to_page(*list); | |
5417 | + free_pbe = *list; | |
5418 | + } | |
5419 | + | |
5420 | + for (i = 0; i < PBES_PER_PAGE; i++) { | |
5421 | + if (!free_pbe) | |
5422 | + break; | |
5423 | + if (highmem) | |
5424 | + toi__free_page(29, free_pbe->address); | |
5425 | + else | |
5426 | + toi_free_page(29, | |
5427 | + (unsigned long) free_pbe->address); | |
5428 | + free_pbe = free_pbe->next; | |
5429 | + } | |
5430 | + | |
5431 | + if (highmem) { | |
5432 | + if (free_pbe) | |
5433 | + next_page = free_pbe; | |
5434 | + kunmap(page); | |
5435 | + } else { | |
5436 | + if (free_pbe) | |
5437 | + next_page = free_pbe; | |
5438 | + } | |
5439 | + | |
5440 | + toi__free_page(29, page); | |
5441 | + *list = (struct pbe *) next_page; | |
5442 | + }; | |
5443 | +} | |
5444 | + | |
5445 | +/** | |
5446 | + * copyback_post - post atomic-restore actions | |
5447 | + * | |
5448 | + * After doing the atomic restore, we have a few more things to do: | |
5449 | + * 1) We want to retain some values across the restore, so we now copy | |
5450 | + * these from the nosave variables to the normal ones. | |
5451 | + * 2) Set the status flags. | |
5452 | + * 3) Resume devices. | |
5453 | + * 4) Tell userui so it can redraw & restore settings. | |
5454 | + * 5) Reread the page cache. | |
5455 | + **/ | |
5456 | +void copyback_post(void) | |
5457 | +{ | |
5458 | + struct toi_boot_kernel_data *bkd = | |
5459 | + (struct toi_boot_kernel_data *) boot_kernel_data_buffer; | |
5460 | + | |
5461 | + /* | |
5462 | + * The boot kernel's data may be larger (newer version) or | |
5463 | + * smaller (older version) than ours. Copy the minimum | |
5464 | + * of the two sizes, so that we don't overwrite valid values | |
5465 | + * from pre-atomic copy. | |
5466 | + */ | |
5467 | + | |
5468 | + memcpy(&toi_bkd, (char *) boot_kernel_data_buffer, | |
5469 | + min_t(int, sizeof(struct toi_boot_kernel_data), | |
5470 | + bkd->size)); | |
5471 | + | |
5472 | + if (toi_activate_storage(1)) | |
5473 | + panic("Failed to reactivate our storage."); | |
5474 | + | |
5dd10c98 | 5475 | + toi_post_atomic_restore_modules(bkd); |
2380c486 JR |
5476 | + |
5477 | + toi_cond_pause(1, "About to reload secondary pagedir."); | |
5478 | + | |
5479 | + if (read_pageset2(0)) | |
5480 | + panic("Unable to successfully reread the page cache."); | |
5481 | + | |
5482 | + /* | |
5483 | + * If the user wants to sleep again after resuming from full-off, | |
5484 | + * it's most likely to be in order to suspend to ram, so we'll | |
5485 | + * do this check after loading pageset2, to give them the fastest | |
5486 | + * wakeup when they are ready to use the computer again. | |
5487 | + */ | |
5488 | + toi_check_resleep(); | |
5489 | +} | |
5490 | + | |
5491 | +/** | |
5492 | + * toi_copy_pageset1 - do the atomic copy of pageset1 | |
5493 | + * | |
5494 | + * Make the atomic copy of pageset1. We can't use copy_page (as we once did) | |
5495 | + * because we can't be sure what side effects it has. On my old Duron, with | |
5496 | + * 3DNOW, kernel_fpu_begin increments preempt count, making our preempt | |
5497 | + * count at resume time 4 instead of 3. | |
5498 | + * | |
5499 | + * We don't want to call kmap_atomic unconditionally because it has the side | |
5500 | + * effect of incrementing the preempt count, which will leave it one too high | |
5501 | + * post resume (the page containing the preempt count will be copied after | |
5502 | + * its incremented. This is essentially the same problem. | |
5503 | + **/ | |
5504 | +void toi_copy_pageset1(void) | |
5505 | +{ | |
5506 | + int i; | |
5507 | + unsigned long source_index, dest_index; | |
5508 | + | |
5509 | + memory_bm_position_reset(pageset1_map); | |
5510 | + memory_bm_position_reset(pageset1_copy_map); | |
5511 | + | |
5512 | + source_index = memory_bm_next_pfn(pageset1_map); | |
5513 | + dest_index = memory_bm_next_pfn(pageset1_copy_map); | |
5514 | + | |
5515 | + for (i = 0; i < pagedir1.size; i++) { | |
5516 | + unsigned long *origvirt, *copyvirt; | |
5517 | + struct page *origpage, *copypage; | |
5518 | + int loop = (PAGE_SIZE / sizeof(unsigned long)) - 1, | |
5519 | + was_present1, was_present2; | |
5520 | + | |
5521 | + origpage = pfn_to_page(source_index); | |
5522 | + copypage = pfn_to_page(dest_index); | |
5523 | + | |
5524 | + origvirt = PageHighMem(origpage) ? | |
5525 | + kmap_atomic(origpage, KM_USER0) : | |
5526 | + page_address(origpage); | |
5527 | + | |
5528 | + copyvirt = PageHighMem(copypage) ? | |
5529 | + kmap_atomic(copypage, KM_USER1) : | |
5530 | + page_address(copypage); | |
5531 | + | |
5532 | + was_present1 = kernel_page_present(origpage); | |
5533 | + if (!was_present1) | |
5534 | + kernel_map_pages(origpage, 1, 1); | |
5535 | + | |
5536 | + was_present2 = kernel_page_present(copypage); | |
5537 | + if (!was_present2) | |
5538 | + kernel_map_pages(copypage, 1, 1); | |
5539 | + | |
5540 | + while (loop >= 0) { | |
5541 | + *(copyvirt + loop) = *(origvirt + loop); | |
5542 | + loop--; | |
5543 | + } | |
5544 | + | |
5545 | + if (!was_present1) | |
5546 | + kernel_map_pages(origpage, 1, 0); | |
5547 | + | |
5548 | + if (!was_present2) | |
5549 | + kernel_map_pages(copypage, 1, 0); | |
5550 | + | |
5551 | + if (PageHighMem(origpage)) | |
5552 | + kunmap_atomic(origvirt, KM_USER0); | |
5553 | + | |
5554 | + if (PageHighMem(copypage)) | |
5555 | + kunmap_atomic(copyvirt, KM_USER1); | |
5556 | + | |
5557 | + source_index = memory_bm_next_pfn(pageset1_map); | |
5558 | + dest_index = memory_bm_next_pfn(pageset1_copy_map); | |
5559 | + } | |
5560 | +} | |
5561 | + | |
5562 | +/** | |
5563 | + * __toi_post_context_save - steps after saving the cpu context | |
5564 | + * | |
5565 | + * Steps taken after saving the CPU state to make the actual | |
5566 | + * atomic copy. | |
5567 | + * | |
5568 | + * Called from swsusp_save in snapshot.c via toi_post_context_save. | |
5569 | + **/ | |
5570 | +int __toi_post_context_save(void) | |
5571 | +{ | |
92bca44c | 5572 | + unsigned long old_ps1_size = pagedir1.size; |
2380c486 JR |
5573 | + |
5574 | + check_checksums(); | |
5575 | + | |
5576 | + free_checksum_pages(); | |
5577 | + | |
5578 | + toi_recalculate_image_contents(1); | |
5579 | + | |
92bca44c AM |
5580 | + extra_pd1_pages_used = pagedir1.size > old_ps1_size ? |
5581 | + pagedir1.size - old_ps1_size : 0; | |
2380c486 JR |
5582 | + |
5583 | + if (extra_pd1_pages_used > extra_pd1_pages_allowance) { | |
92bca44c | 5584 | + printk(KERN_INFO "Pageset1 has grown by %lu pages. " |
2380c486 JR |
5585 | + "extra_pages_allowance is currently only %lu.\n", |
5586 | + pagedir1.size - old_ps1_size, | |
5587 | + extra_pd1_pages_allowance); | |
5588 | + | |
5589 | + /* | |
5590 | + * Highlevel code will see this, clear the state and | |
5591 | + * retry if we haven't already done so twice. | |
5592 | + */ | |
5593 | + set_abort_result(TOI_EXTRA_PAGES_ALLOW_TOO_SMALL); | |
5594 | + return 1; | |
5595 | + } | |
5596 | + | |
5597 | + if (!test_action_state(TOI_TEST_FILTER_SPEED) && | |
5598 | + !test_action_state(TOI_TEST_BIO)) | |
5599 | + toi_copy_pageset1(); | |
5600 | + | |
5601 | + return 0; | |
5602 | +} | |
5603 | + | |
5604 | +/** | |
5605 | + * toi_hibernate - high level code for doing the atomic copy | |
5606 | + * | |
5607 | + * High-level code which prepares to do the atomic copy. Loosely based | |
5608 | + * on the swsusp version, but with the following twists: | |
5609 | + * - We set toi_running so the swsusp code uses our code paths. | |
5610 | + * - We give better feedback regarding what goes wrong if there is a | |
5611 | + * problem. | |
5612 | + * - We use an extra function to call the assembly, just in case this code | |
5613 | + * is in a module (return address). | |
5614 | + **/ | |
5615 | +int toi_hibernate(void) | |
5616 | +{ | |
5617 | + int error; | |
5618 | + | |
5619 | + toi_running = 1; /* For the swsusp code we use :< */ | |
5620 | + | |
5621 | + error = toi_lowlevel_builtin(); | |
5622 | + | |
5623 | + toi_running = 0; | |
5624 | + return error; | |
5625 | +} | |
5626 | + | |
5627 | +/** | |
5628 | + * toi_atomic_restore - prepare to do the atomic restore | |
5629 | + * | |
5630 | + * Get ready to do the atomic restore. This part gets us into the same | |
5631 | + * state we are in prior to do calling do_toi_lowlevel while | |
5632 | + * hibernating: hot-unplugging secondary cpus and freeze processes, | |
5633 | + * before starting the thread that will do the restore. | |
5634 | + **/ | |
5635 | +int toi_atomic_restore(void) | |
5636 | +{ | |
5637 | + int error; | |
5638 | + | |
5639 | + toi_running = 1; | |
5640 | + | |
5641 | + toi_prepare_status(DONT_CLEAR_BAR, "Atomic restore."); | |
5642 | + | |
5643 | + memcpy(&toi_bkd.toi_nosave_commandline, saved_command_line, | |
5644 | + strlen(saved_command_line)); | |
5645 | + | |
5dd10c98 AM |
5646 | + toi_pre_atomic_restore_modules(&toi_bkd); |
5647 | + | |
2380c486 JR |
5648 | + if (add_boot_kernel_data_pbe()) |
5649 | + goto Failed; | |
5650 | + | |
5651 | + toi_prepare_status(DONT_CLEAR_BAR, "Doing atomic copy/restore."); | |
5652 | + | |
5653 | + if (toi_go_atomic(PMSG_QUIESCE, 0)) | |
5654 | + goto Failed; | |
5655 | + | |
5656 | + /* We'll ignore saved state, but this gets preempt count (etc) right */ | |
5657 | + save_processor_state(); | |
5658 | + | |
5659 | + error = swsusp_arch_resume(); | |
5660 | + /* | |
5661 | + * Code below is only ever reached in case of failure. Otherwise | |
5662 | + * execution continues at place where swsusp_arch_suspend was called. | |
5663 | + * | |
5664 | + * We don't know whether it's safe to continue (this shouldn't happen), | |
5665 | + * so lets err on the side of caution. | |
5666 | + */ | |
5667 | + BUG(); | |
5668 | + | |
5669 | +Failed: | |
5670 | + free_pbe_list(&restore_pblist, 0); | |
5671 | +#ifdef CONFIG_HIGHMEM | |
5672 | + free_pbe_list(&restore_highmem_pblist, 1); | |
5673 | +#endif | |
2380c486 JR |
5674 | + toi_running = 0; |
5675 | + return 1; | |
5676 | +} | |
5677 | + | |
5678 | +/** | |
5679 | + * toi_go_atomic - do the actual atomic copy/restore | |
92bca44c AM |
5680 | + * @state: The state to use for dpm_suspend_start & power_down calls. |
5681 | + * @suspend_time: Whether we're suspending or resuming. | |
2380c486 JR |
5682 | + **/ |
5683 | +int toi_go_atomic(pm_message_t state, int suspend_time) | |
5684 | +{ | |
5685 | + if (suspend_time && platform_begin(1)) { | |
5686 | + set_abort_result(TOI_PLATFORM_PREP_FAILED); | |
2380c486 JR |
5687 | + return 1; |
5688 | + } | |
5689 | + | |
5690 | + suspend_console(); | |
5691 | + | |
92bca44c | 5692 | + if (dpm_suspend_start(state)) { |
2380c486 JR |
5693 | + set_abort_result(TOI_DEVICE_REFUSED); |
5694 | + toi_end_atomic(ATOMIC_STEP_DEVICE_RESUME, suspend_time, 3); | |
5695 | + return 1; | |
5696 | + } | |
5697 | + | |
9474138d AM |
5698 | + if (suspend_time && arch_prepare_suspend()) { |
5699 | + set_abort_result(TOI_ARCH_PREPARE_FAILED); | |
5700 | + toi_end_atomic(ATOMIC_STEP_DEVICE_RESUME, suspend_time, 1); | |
5701 | + return 1; | |
5702 | + } | |
5703 | + | |
92bca44c AM |
5704 | + /* At this point, dpm_suspend_start() has been called, but *not* |
5705 | + * dpm_suspend_noirq(). We *must* dpm_suspend_noirq() now. | |
9474138d AM |
5706 | + * Otherwise, drivers for some devices (e.g. interrupt controllers) |
5707 | + * become desynchronized with the actual state of the hardware | |
5708 | + * at resume time, and evil weirdness ensues. | |
5709 | + */ | |
5710 | + | |
92bca44c | 5711 | + if (dpm_suspend_noirq(state)) { |
9474138d AM |
5712 | + set_abort_result(TOI_DEVICE_REFUSED); |
5713 | + toi_end_atomic(ATOMIC_STEP_DEVICE_RESUME, suspend_time, 1); | |
5714 | + return 1; | |
5715 | + } | |
5716 | + | |
2380c486 JR |
5717 | + if (suspend_time && platform_pre_snapshot(1)) { |
5718 | + set_abort_result(TOI_PRE_SNAPSHOT_FAILED); | |
9474138d | 5719 | + toi_end_atomic(ATOMIC_STEP_PLATFORM_FINISH, suspend_time, 1); |
2380c486 JR |
5720 | + return 1; |
5721 | + } | |
5722 | + | |
5723 | + if (!suspend_time && platform_pre_restore(1)) { | |
5724 | + set_abort_result(TOI_PRE_RESTORE_FAILED); | |
9474138d | 5725 | + toi_end_atomic(ATOMIC_STEP_PLATFORM_FINISH, suspend_time, 1); |
2380c486 JR |
5726 | + return 1; |
5727 | + } | |
5728 | + | |
5729 | + if (test_action_state(TOI_LATE_CPU_HOTPLUG)) { | |
5730 | + if (disable_nonboot_cpus()) { | |
5731 | + set_abort_result(TOI_CPU_HOTPLUG_FAILED); | |
5732 | + toi_end_atomic(ATOMIC_STEP_CPU_HOTPLUG, | |
9474138d | 5733 | + suspend_time, 1); |
2380c486 JR |
5734 | + return 1; |
5735 | + } | |
5736 | + } | |
5737 | + | |
2380c486 JR |
5738 | + local_irq_disable(); |
5739 | + | |
2380c486 JR |
5740 | + if (sysdev_suspend(state)) { |
5741 | + set_abort_result(TOI_SYSDEV_REFUSED); | |
9474138d | 5742 | + toi_end_atomic(ATOMIC_STEP_IRQS, suspend_time, 1); |
2380c486 JR |
5743 | + return 1; |
5744 | + } | |
5745 | + | |
5746 | + return 0; | |
5747 | +} | |
5748 | + | |
5749 | +/** | |
5750 | + * toi_end_atomic - post atomic copy/restore routines | |
5751 | + * @stage: What step to start at. | |
5752 | + * @suspend_time: Whether we're suspending or resuming. | |
5753 | + * @error: Whether we're recovering from an error. | |
5754 | + **/ | |
5755 | +void toi_end_atomic(int stage, int suspend_time, int error) | |
5756 | +{ | |
5757 | + switch (stage) { | |
5758 | + case ATOMIC_ALL_STEPS: | |
5759 | + if (!suspend_time) | |
5760 | + platform_leave(1); | |
5761 | + sysdev_resume(); | |
2380c486 JR |
5762 | + case ATOMIC_STEP_IRQS: |
5763 | + local_irq_enable(); | |
2380c486 JR |
5764 | + case ATOMIC_STEP_CPU_HOTPLUG: |
5765 | + if (test_action_state(TOI_LATE_CPU_HOTPLUG)) | |
5766 | + enable_nonboot_cpus(); | |
9474138d | 5767 | + platform_restore_cleanup(1); |
2380c486 JR |
5768 | + case ATOMIC_STEP_PLATFORM_FINISH: |
5769 | + platform_finish(1); | |
92bca44c | 5770 | + dpm_resume_noirq(suspend_time ? |
9474138d | 5771 | + (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); |
2380c486 JR |
5772 | + case ATOMIC_STEP_DEVICE_RESUME: |
5773 | + if (suspend_time && (error & 2)) | |
5774 | + platform_recover(1); | |
92bca44c | 5775 | + dpm_resume_end(suspend_time ? |
2380c486 JR |
5776 | + ((error & 1) ? PMSG_RECOVER : PMSG_THAW) : |
5777 | + PMSG_RESTORE); | |
2380c486 | 5778 | + resume_console(); |
2380c486 JR |
5779 | + platform_end(1); |
5780 | + | |
5781 | + toi_prepare_status(DONT_CLEAR_BAR, "Post atomic."); | |
5782 | + } | |
5783 | +} | |
5784 | diff --git a/kernel/power/tuxonice_atomic_copy.h b/kernel/power/tuxonice_atomic_copy.h | |
5785 | new file mode 100644 | |
5dd10c98 | 5786 | index 0000000..e61b27b |
2380c486 JR |
5787 | --- /dev/null |
5788 | +++ b/kernel/power/tuxonice_atomic_copy.h | |
9474138d | 5789 | @@ -0,0 +1,20 @@ |
2380c486 JR |
5790 | +/* |
5791 | + * kernel/power/tuxonice_atomic_copy.h | |
5792 | + * | |
5dd10c98 | 5793 | + * Copyright 2008-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
5794 | + * |
5795 | + * Distributed under GPLv2. | |
5796 | + * | |
5797 | + * Routines for doing the atomic save/restore. | |
5798 | + */ | |
5799 | + | |
5800 | +enum { | |
5801 | + ATOMIC_ALL_STEPS, | |
2380c486 JR |
5802 | + ATOMIC_STEP_IRQS, |
5803 | + ATOMIC_STEP_CPU_HOTPLUG, | |
5804 | + ATOMIC_STEP_PLATFORM_FINISH, | |
5805 | + ATOMIC_STEP_DEVICE_RESUME, | |
2380c486 JR |
5806 | +}; |
5807 | + | |
5808 | +int toi_go_atomic(pm_message_t state, int toi_time); | |
5809 | +void toi_end_atomic(int stage, int toi_time, int error); | |
7e46296a | 5810 | diff --git a/kernel/power/tuxonice_bio.h b/kernel/power/tuxonice_bio.h |
2380c486 | 5811 | new file mode 100644 |
5dd10c98 | 5812 | index 0000000..9627ccc |
2380c486 | 5813 | --- /dev/null |
7e46296a | 5814 | +++ b/kernel/power/tuxonice_bio.h |
5dd10c98 | 5815 | @@ -0,0 +1,77 @@ |
2380c486 | 5816 | +/* |
7e46296a | 5817 | + * kernel/power/tuxonice_bio.h |
2380c486 | 5818 | + * |
5dd10c98 | 5819 | + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
5820 | + * |
5821 | + * Distributed under GPLv2. | |
5822 | + * | |
7e46296a AM |
5823 | + * This file contains declarations for functions exported from |
5824 | + * tuxonice_bio.c, which contains low level io functions. | |
2380c486 JR |
5825 | + */ |
5826 | + | |
7e46296a AM |
5827 | +#include <linux/buffer_head.h> |
5828 | +#include "tuxonice_extent.h" | |
2380c486 | 5829 | + |
7e46296a AM |
5830 | +void toi_put_extent_chain(struct hibernate_extent_chain *chain); |
5831 | +int toi_add_to_extent_chain(struct hibernate_extent_chain *chain, | |
5832 | + unsigned long start, unsigned long end); | |
2380c486 | 5833 | + |
7e46296a AM |
5834 | +struct hibernate_extent_saved_state { |
5835 | + int extent_num; | |
5836 | + struct hibernate_extent *extent_ptr; | |
5837 | + unsigned long offset; | |
5838 | +}; | |
2380c486 | 5839 | + |
7e46296a AM |
5840 | +struct toi_bdev_info { |
5841 | + struct toi_bdev_info *next; | |
5842 | + struct hibernate_extent_chain blocks; | |
5843 | + struct block_device *bdev; | |
5844 | + struct toi_module_ops *allocator; | |
5845 | + int allocator_index; | |
5846 | + struct hibernate_extent_chain allocations; | |
5dd10c98 | 5847 | + char name[266]; /* "swap on " or "file " + up to 256 chars */ |
2380c486 | 5848 | + |
7e46296a AM |
5849 | + /* Saved in header */ |
5850 | + char uuid[17]; | |
5851 | + dev_t dev_t; | |
5852 | + int prio; | |
5853 | + int bmap_shift; | |
5854 | + int blocks_per_page; | |
5dd10c98 | 5855 | + unsigned long pages_used; |
7e46296a AM |
5856 | + struct hibernate_extent_saved_state saved_state[4]; |
5857 | +}; | |
2380c486 | 5858 | + |
7e46296a AM |
5859 | +struct toi_extent_iterate_state { |
5860 | + struct toi_bdev_info *current_chain; | |
5861 | + int num_chains; | |
5862 | + int saved_chain_number[4]; | |
5863 | + struct toi_bdev_info *saved_chain_ptr[4]; | |
5864 | +}; | |
2380c486 | 5865 | + |
7e46296a AM |
5866 | +/* |
5867 | + * Our exported interface so the swapwriter and filewriter don't | |
5868 | + * need these functions duplicated. | |
5869 | + */ | |
5870 | +struct toi_bio_ops { | |
5871 | + int (*bdev_page_io) (int rw, struct block_device *bdev, long pos, | |
5872 | + struct page *page); | |
5873 | + int (*register_storage)(struct toi_bdev_info *new); | |
5874 | + void (*free_storage)(void); | |
5875 | +}; | |
2380c486 | 5876 | + |
7e46296a AM |
5877 | +struct toi_allocator_ops { |
5878 | + unsigned long (*toi_swap_storage_available) (void); | |
5879 | +}; | |
2380c486 | 5880 | + |
7e46296a | 5881 | +extern struct toi_bio_ops toi_bio_ops; |
2380c486 | 5882 | + |
7e46296a AM |
5883 | +extern char *toi_writer_buffer; |
5884 | +extern int toi_writer_buffer_posn; | |
5885 | + | |
5886 | +struct toi_bio_allocator_ops { | |
5887 | + int (*register_storage) (void); | |
5888 | + unsigned long (*storage_available)(void); | |
5889 | + int (*allocate_storage) (struct toi_bdev_info *, unsigned long); | |
5890 | + int (*bmap) (struct toi_bdev_info *); | |
5891 | + void (*free_storage) (struct toi_bdev_info *); | |
5892 | +}; | |
5893 | diff --git a/kernel/power/tuxonice_bio_chains.c b/kernel/power/tuxonice_bio_chains.c | |
5894 | new file mode 100644 | |
e876a0dd | 5895 | index 0000000..2ac2042 |
7e46296a AM |
5896 | --- /dev/null |
5897 | +++ b/kernel/power/tuxonice_bio_chains.c | |
5dd10c98 | 5898 | @@ -0,0 +1,1044 @@ |
7e46296a AM |
5899 | +/* |
5900 | + * kernel/power/tuxonice_bio_devinfo.c | |
5901 | + * | |
5dd10c98 | 5902 | + * Copyright (C) 2009-2010 Nigel Cunningham (nigel at tuxonice net) |
7e46296a AM |
5903 | + * |
5904 | + * Distributed under GPLv2. | |
5905 | + * | |
5906 | + */ | |
2380c486 | 5907 | + |
7e46296a AM |
5908 | +#include <linux/mm_types.h> |
5909 | +#include "tuxonice_bio.h" | |
5910 | +#include "tuxonice_bio_internal.h" | |
5911 | +#include "tuxonice_alloc.h" | |
5912 | +#include "tuxonice_ui.h" | |
5913 | +#include "tuxonice.h" | |
5914 | +#include "tuxonice_io.h" | |
2380c486 | 5915 | + |
7e46296a AM |
5916 | +static struct toi_bdev_info *prio_chain_head; |
5917 | +static int num_chains; | |
2380c486 JR |
5918 | + |
5919 | +/* Pointer to current entry being loaded/saved. */ | |
5920 | +struct toi_extent_iterate_state toi_writer_posn; | |
2380c486 | 5921 | + |
7e46296a AM |
5922 | +#define metadata_size (sizeof(struct toi_bdev_info) - \ |
5923 | + offsetof(struct toi_bdev_info, uuid)) | |
2380c486 | 5924 | + |
7e46296a AM |
5925 | +/* |
5926 | + * After section 0 (header) comes 2 => next_section[0] = 2 | |
5927 | + */ | |
5928 | +static int next_section[3] = { 2, 3, 1 }; | |
5929 | + | |
5930 | +/** | |
5931 | + * dump_block_chains - print the contents of the bdev info array. | |
5932 | + **/ | |
5933 | +void dump_block_chains(void) | |
5934 | +{ | |
5935 | + int i = 0; | |
5936 | + int j; | |
5937 | + struct toi_bdev_info *cur_chain = prio_chain_head; | |
2380c486 | 5938 | + |
7e46296a AM |
5939 | + while (cur_chain) { |
5940 | + struct hibernate_extent *this = cur_chain->blocks.first; | |
2380c486 | 5941 | + |
7e46296a | 5942 | + printk(KERN_DEBUG "Chain %d (prio %d):", i, cur_chain->prio); |
2380c486 | 5943 | + |
7e46296a AM |
5944 | + while (this) { |
5945 | + printk(KERN_CONT " [%lu-%lu]%s", this->start, | |
5946 | + this->end, this->next ? "," : ""); | |
5947 | + this = this->next; | |
5948 | + } | |
2380c486 | 5949 | + |
7e46296a AM |
5950 | + printk("\n"); |
5951 | + cur_chain = cur_chain->next; | |
5dd10c98 | 5952 | + i++; |
7e46296a AM |
5953 | + } |
5954 | + | |
5955 | + printk(KERN_DEBUG "Saved states:\n"); | |
5956 | + for (i = 0; i < 4; i++) { | |
5957 | + printk(KERN_DEBUG "Slot %d: Chain %d.\n", | |
5958 | + i, toi_writer_posn.saved_chain_number[i]); | |
5959 | + | |
5960 | + cur_chain = prio_chain_head; | |
5961 | + j = 0; | |
5962 | + while (cur_chain) { | |
5963 | + printk(KERN_DEBUG " Chain %d: Extent %d. Offset %lu.\n", | |
5964 | + j, cur_chain->saved_state[i].extent_num, | |
5965 | + cur_chain->saved_state[i].offset); | |
5966 | + cur_chain = cur_chain->next; | |
5967 | + j++; | |
5968 | + } | |
5969 | + printk(KERN_CONT "\n"); | |
5970 | + } | |
5971 | +} | |
2380c486 JR |
5972 | + |
5973 | +/** | |
2380c486 | 5974 | + * |
2380c486 | 5975 | + **/ |
7e46296a | 5976 | +static void toi_extent_chain_next(void) |
2380c486 | 5977 | +{ |
7e46296a | 5978 | + struct toi_bdev_info *this = toi_writer_posn.current_chain; |
2380c486 | 5979 | + |
7e46296a AM |
5980 | + if (!this->blocks.current_extent) |
5981 | + return; | |
2380c486 | 5982 | + |
7e46296a AM |
5983 | + if (this->blocks.current_offset == this->blocks.current_extent->end) { |
5984 | + if (this->blocks.current_extent->next) { | |
5985 | + this->blocks.current_extent = | |
5986 | + this->blocks.current_extent->next; | |
5987 | + this->blocks.current_offset = | |
5988 | + this->blocks.current_extent->start; | |
5989 | + } else { | |
5990 | + this->blocks.current_extent = NULL; | |
5991 | + this->blocks.current_offset = 0; | |
5992 | + } | |
5993 | + } else | |
5994 | + this->blocks.current_offset++; | |
5995 | +} | |
2380c486 JR |
5996 | + |
5997 | +/** | |
2380c486 | 5998 | + * |
7e46296a AM |
5999 | + */ |
6000 | + | |
6001 | +static struct toi_bdev_info *__find_next_chain_same_prio(void) | |
2380c486 | 6002 | +{ |
7e46296a AM |
6003 | + struct toi_bdev_info *start_chain = toi_writer_posn.current_chain; |
6004 | + struct toi_bdev_info *this = start_chain; | |
6005 | + int orig_prio = this->prio; | |
2380c486 | 6006 | + |
7e46296a AM |
6007 | + do { |
6008 | + this = this->next; | |
2380c486 | 6009 | + |
7e46296a AM |
6010 | + if (!this) |
6011 | + this = prio_chain_head; | |
6012 | + | |
6013 | + /* Back on original chain? Use it again. */ | |
6014 | + if (this == start_chain) | |
6015 | + return start_chain; | |
6016 | + | |
6017 | + } while (!this->blocks.current_extent || this->prio != orig_prio); | |
6018 | + | |
6019 | + return this; | |
2380c486 JR |
6020 | +} |
6021 | + | |
7e46296a | 6022 | +static void find_next_chain(void) |
2380c486 | 6023 | +{ |
7e46296a | 6024 | + struct toi_bdev_info *this; |
2380c486 | 6025 | + |
7e46296a | 6026 | + this = __find_next_chain_same_prio(); |
2380c486 | 6027 | + |
7e46296a AM |
6028 | + /* |
6029 | + * If we didn't get another chain of the same priority that we | |
6030 | + * can use, look for the next priority. | |
6031 | + */ | |
6032 | + while (this && !this->blocks.current_extent) | |
6033 | + this = this->next; | |
2380c486 | 6034 | + |
7e46296a | 6035 | + toi_writer_posn.current_chain = this; |
2380c486 JR |
6036 | +} |
6037 | + | |
6038 | +/** | |
7e46296a AM |
6039 | + * toi_extent_state_next - go to the next extent |
6040 | + * @blocks: The number of values to progress. | |
6041 | + * @stripe_mode: Whether to spread usage across all chains. | |
2380c486 | 6042 | + * |
7e46296a AM |
6043 | + * Given a state, progress to the next valid entry. We may begin in an |
6044 | + * invalid state, as we do when invoked after extent_state_goto_start below. | |
2380c486 | 6045 | + * |
7e46296a AM |
6046 | + * When using compression and expected_compression > 0, we let the image size |
6047 | + * be larger than storage, so we can validly run out of data to return. | |
2380c486 | 6048 | + **/ |
7e46296a | 6049 | +static unsigned long toi_extent_state_next(int blocks, int current_stream) |
2380c486 | 6050 | +{ |
7e46296a AM |
6051 | + int i; |
6052 | + | |
6053 | + if (!toi_writer_posn.current_chain) | |
e876a0dd | 6054 | + return -ENOSPC; |
7e46296a AM |
6055 | + |
6056 | + /* Assume chains always have lengths that are multiples of @blocks */ | |
6057 | + for (i = 0; i < blocks; i++) | |
6058 | + toi_extent_chain_next(); | |
6059 | + | |
6060 | + /* The header stream is not striped */ | |
6061 | + if (current_stream || | |
6062 | + !toi_writer_posn.current_chain->blocks.current_extent) | |
6063 | + find_next_chain(); | |
6064 | + | |
e876a0dd | 6065 | + return toi_writer_posn.current_chain ? 0 : -ENOSPC; |
7e46296a AM |
6066 | +} |
6067 | + | |
6068 | +static void toi_insert_chain_in_prio_list(struct toi_bdev_info *this) | |
6069 | +{ | |
6070 | + struct toi_bdev_info **prev_ptr; | |
6071 | + struct toi_bdev_info *cur; | |
6072 | + | |
6073 | + /* Loop through the existing chain, finding where to insert it */ | |
6074 | + prev_ptr = &prio_chain_head; | |
6075 | + cur = prio_chain_head; | |
6076 | + | |
6077 | + while (cur && cur->prio >= this->prio) { | |
6078 | + prev_ptr = &cur->next; | |
6079 | + cur = cur->next; | |
6080 | + } | |
6081 | + | |
6082 | + this->next = *prev_ptr; | |
6083 | + *prev_ptr = this; | |
6084 | + | |
6085 | + this = prio_chain_head; | |
6086 | + while (this) | |
6087 | + this = this->next; | |
6088 | + num_chains++; | |
2380c486 JR |
6089 | +} |
6090 | + | |
6091 | +/** | |
7e46296a AM |
6092 | + * toi_extent_state_goto_start - reinitialize an extent chain iterator |
6093 | + * @state: Iterator to reinitialize | |
2380c486 | 6094 | + **/ |
7e46296a | 6095 | +void toi_extent_state_goto_start(void) |
2380c486 | 6096 | +{ |
7e46296a AM |
6097 | + struct toi_bdev_info *this = prio_chain_head; |
6098 | + | |
6099 | + while (this) { | |
6100 | + toi_message(TOI_IO, TOI_VERBOSE, 0, | |
6101 | + "Setting current extent to %p.", this->blocks.first); | |
6102 | + this->blocks.current_extent = this->blocks.first; | |
6103 | + if (this->blocks.current_extent) { | |
6104 | + toi_message(TOI_IO, TOI_VERBOSE, 0, | |
6105 | + "Setting current offset to %lu.", | |
6106 | + this->blocks.current_extent->start); | |
6107 | + this->blocks.current_offset = | |
6108 | + this->blocks.current_extent->start; | |
6109 | + } | |
6110 | + | |
6111 | + this = this->next; | |
6112 | + } | |
6113 | + | |
6114 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Setting current chain to %p.", | |
6115 | + prio_chain_head); | |
6116 | + toi_writer_posn.current_chain = prio_chain_head; | |
6117 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Leaving extent state goto start."); | |
2380c486 JR |
6118 | +} |
6119 | + | |
6120 | +/** | |
7e46296a AM |
6121 | + * toi_extent_state_save - save state of the iterator |
6122 | + * @state: Current state of the chain | |
6123 | + * @saved_state: Iterator to populate | |
2380c486 | 6124 | + * |
7e46296a AM |
6125 | + * Given a state and a struct hibernate_extent_state_store, save the current |
6126 | + * position in a format that can be used with relocated chains (at | |
6127 | + * resume time). | |
2380c486 | 6128 | + **/ |
7e46296a | 6129 | +void toi_extent_state_save(int slot) |
2380c486 | 6130 | +{ |
7e46296a AM |
6131 | + struct toi_bdev_info *cur_chain = prio_chain_head; |
6132 | + struct hibernate_extent *extent; | |
6133 | + struct hibernate_extent_saved_state *chain_state; | |
6134 | + int i = 0; | |
2380c486 | 6135 | + |
7e46296a AM |
6136 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_extent_state_save, slot %d.", |
6137 | + slot); | |
2380c486 | 6138 | + |
7e46296a AM |
6139 | + if (!toi_writer_posn.current_chain) { |
6140 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "No current chain => " | |
6141 | + "chain_num = -1."); | |
6142 | + toi_writer_posn.saved_chain_number[slot] = -1; | |
6143 | + return; | |
6144 | + } | |
2380c486 | 6145 | + |
7e46296a AM |
6146 | + while (cur_chain) { |
6147 | + i++; | |
6148 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Saving chain %d (%p) " | |
6149 | + "state, slot %d.", i, cur_chain, slot); | |
2380c486 | 6150 | + |
7e46296a | 6151 | + chain_state = &cur_chain->saved_state[slot]; |
2380c486 | 6152 | + |
7e46296a | 6153 | + chain_state->offset = cur_chain->blocks.current_offset; |
2380c486 | 6154 | + |
7e46296a AM |
6155 | + if (toi_writer_posn.current_chain == cur_chain) { |
6156 | + toi_writer_posn.saved_chain_number[slot] = i; | |
6157 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "This is the chain " | |
6158 | + "we were on => chain_num is %d.", i); | |
6159 | + } | |
2380c486 | 6160 | + |
7e46296a AM |
6161 | + if (!cur_chain->blocks.current_extent) { |
6162 | + chain_state->extent_num = 0; | |
6163 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "No current extent " | |
6164 | + "for this chain => extent_num %d is 0.", | |
6165 | + i); | |
6166 | + cur_chain = cur_chain->next; | |
6167 | + continue; | |
6168 | + } | |
2380c486 | 6169 | + |
7e46296a AM |
6170 | + extent = cur_chain->blocks.first; |
6171 | + chain_state->extent_num = 1; | |
6172 | + | |
6173 | + while (extent != cur_chain->blocks.current_extent) { | |
6174 | + chain_state->extent_num++; | |
6175 | + extent = extent->next; | |
6176 | + } | |
6177 | + | |
6178 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "extent num %d is %d.", i, | |
6179 | + chain_state->extent_num); | |
6180 | + | |
6181 | + cur_chain = cur_chain->next; | |
6182 | + } | |
6183 | + toi_message(TOI_IO, TOI_VERBOSE, 0, | |
6184 | + "Completed saving extent state slot %d.", slot); | |
2380c486 JR |
6185 | +} |
6186 | + | |
6187 | +/** | |
7e46296a AM |
6188 | + * toi_extent_state_restore - restore the position saved by extent_state_save |
6189 | + * @state: State to populate | |
6190 | + * @saved_state: Iterator saved to restore | |
2380c486 | 6191 | + **/ |
7e46296a | 6192 | +void toi_extent_state_restore(int slot) |
2380c486 | 6193 | +{ |
7e46296a AM |
6194 | + int i = 0; |
6195 | + struct toi_bdev_info *cur_chain = prio_chain_head; | |
6196 | + struct hibernate_extent_saved_state *chain_state; | |
2380c486 | 6197 | + |
7e46296a AM |
6198 | + toi_message(TOI_IO, TOI_VERBOSE, 0, |
6199 | + "toi_extent_state_restore - slot %d.", slot); | |
2380c486 | 6200 | + |
7e46296a AM |
6201 | + if (toi_writer_posn.saved_chain_number[slot] == -1) { |
6202 | + toi_writer_posn.current_chain = NULL; | |
6203 | + return; | |
2380c486 JR |
6204 | + } |
6205 | + | |
7e46296a AM |
6206 | + while (cur_chain) { |
6207 | + int posn; | |
6208 | + int j; | |
6209 | + i++; | |
6210 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Restoring chain %d (%p) " | |
6211 | + "state, slot %d.", i, cur_chain, slot); | |
2380c486 | 6212 | + |
7e46296a | 6213 | + chain_state = &cur_chain->saved_state[slot]; |
2380c486 | 6214 | + |
7e46296a | 6215 | + posn = chain_state->extent_num; |
2380c486 | 6216 | + |
7e46296a AM |
6217 | + cur_chain->blocks.current_extent = cur_chain->blocks.first; |
6218 | + cur_chain->blocks.current_offset = chain_state->offset; | |
2380c486 | 6219 | + |
7e46296a AM |
6220 | + if (i == toi_writer_posn.saved_chain_number[slot]) { |
6221 | + toi_writer_posn.current_chain = cur_chain; | |
6222 | + toi_message(TOI_IO, TOI_VERBOSE, 0, | |
6223 | + "Found current chain."); | |
6224 | + } | |
2380c486 | 6225 | + |
7e46296a AM |
6226 | + for (j = 0; j < 4; j++) |
6227 | + if (i == toi_writer_posn.saved_chain_number[j]) { | |
6228 | + toi_writer_posn.saved_chain_ptr[j] = cur_chain; | |
6229 | + toi_message(TOI_IO, TOI_VERBOSE, 0, | |
6230 | + "Found saved chain ptr %d (%p) (offset" | |
6231 | + " %d).", j, cur_chain, | |
6232 | + cur_chain->saved_state[j].offset); | |
6233 | + } | |
2380c486 | 6234 | + |
7e46296a AM |
6235 | + if (posn) { |
6236 | + while (--posn) | |
6237 | + cur_chain->blocks.current_extent = | |
6238 | + cur_chain->blocks.current_extent->next; | |
6239 | + } else | |
6240 | + cur_chain->blocks.current_extent = NULL; | |
6241 | + | |
6242 | + cur_chain = cur_chain->next; | |
6243 | + } | |
6244 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Done."); | |
6245 | + if (test_action_state(TOI_LOGALL)) | |
6246 | + dump_block_chains(); | |
2380c486 JR |
6247 | +} |
6248 | + | |
7e46296a AM |
6249 | +/* |
6250 | + * Storage needed | |
2380c486 | 6251 | + * |
7e46296a AM |
6252 | + * Returns amount of space in the image header required |
6253 | + * for the chain data. This ignores the links between | |
6254 | + * pages, which we factor in when allocating the space. | |
6255 | + */ | |
6256 | +int toi_bio_devinfo_storage_needed(void) | |
2380c486 | 6257 | +{ |
7e46296a AM |
6258 | + int result = sizeof(num_chains); |
6259 | + struct toi_bdev_info *chain = prio_chain_head; | |
2380c486 | 6260 | + |
7e46296a AM |
6261 | + while (chain) { |
6262 | + result += metadata_size; | |
2380c486 | 6263 | + |
7e46296a AM |
6264 | + /* Chain size */ |
6265 | + result += sizeof(int); | |
2380c486 | 6266 | + |
7e46296a AM |
6267 | + /* Extents */ |
6268 | + result += (2 * sizeof(unsigned long) * | |
6269 | + chain->blocks.num_extents); | |
2380c486 | 6270 | + |
7e46296a AM |
6271 | + chain = chain->next; |
6272 | + } | |
2380c486 | 6273 | + |
7e46296a AM |
6274 | + result += 4 * sizeof(int); |
6275 | + return result; | |
2380c486 JR |
6276 | +} |
6277 | + | |
5dd10c98 AM |
6278 | +static unsigned long chain_pages_used(struct toi_bdev_info *chain) |
6279 | +{ | |
6280 | + struct hibernate_extent *this = chain->blocks.first; | |
6281 | + struct hibernate_extent_saved_state *state = &chain->saved_state[3]; | |
6282 | + unsigned long size = 0; | |
6283 | + int extent_idx = 1; | |
6284 | + | |
6285 | + if (!state->extent_num) { | |
6286 | + if (!this) | |
6287 | + return 0; | |
6288 | + else | |
6289 | + return chain->blocks.size; | |
6290 | + } | |
6291 | + | |
6292 | + while (extent_idx < state->extent_num) { | |
6293 | + size += (this->end - this->start + 1); | |
6294 | + this = this->next; | |
6295 | + extent_idx++; | |
6296 | + } | |
6297 | + | |
6298 | + /* We didn't use the one we're sitting on, so don't count it */ | |
6299 | + return size + state->offset - this->start; | |
6300 | +} | |
6301 | + | |
2380c486 | 6302 | +/** |
7e46296a AM |
6303 | + * toi_serialise_extent_chain - write a chain in the image |
6304 | + * @chain: Chain to write. | |
2380c486 | 6305 | + **/ |
7e46296a | 6306 | +static int toi_serialise_extent_chain(struct toi_bdev_info *chain) |
2380c486 | 6307 | +{ |
7e46296a AM |
6308 | + struct hibernate_extent *this; |
6309 | + int ret; | |
6310 | + int i = 1; | |
2380c486 | 6311 | + |
5dd10c98 AM |
6312 | + chain->pages_used = chain_pages_used(chain); |
6313 | + | |
7e46296a AM |
6314 | + if (test_action_state(TOI_LOGALL)) |
6315 | + dump_block_chains(); | |
6316 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Serialising chain (dev_t %lx).", | |
6317 | + chain->dev_t); | |
6318 | + /* Device info - dev_t, prio, bmap_shift, blocks per page, positions */ | |
6319 | + ret = toiActiveAllocator->rw_header_chunk(WRITE, &toi_blockwriter_ops, | |
6320 | + (char *) &chain->uuid, metadata_size); | |
6321 | + if (ret) | |
6322 | + return ret; | |
2380c486 | 6323 | + |
7e46296a AM |
6324 | + /* Num extents */ |
6325 | + ret = toiActiveAllocator->rw_header_chunk(WRITE, &toi_blockwriter_ops, | |
6326 | + (char *) &chain->blocks.num_extents, sizeof(int)); | |
6327 | + if (ret) | |
6328 | + return ret; | |
2380c486 | 6329 | + |
7e46296a AM |
6330 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "%d extents.", |
6331 | + chain->blocks.num_extents); | |
2380c486 | 6332 | + |
7e46296a AM |
6333 | + this = chain->blocks.first; |
6334 | + while (this) { | |
6335 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Extent %d.", i); | |
6336 | + ret = toiActiveAllocator->rw_header_chunk(WRITE, | |
6337 | + &toi_blockwriter_ops, | |
6338 | + (char *) this, 2 * sizeof(this->start)); | |
6339 | + if (ret) | |
6340 | + return ret; | |
6341 | + this = this->next; | |
6342 | + i++; | |
6343 | + } | |
2380c486 | 6344 | + |
7e46296a AM |
6345 | + return ret; |
6346 | +} | |
2380c486 | 6347 | + |
7e46296a AM |
6348 | +int toi_serialise_extent_chains(void) |
6349 | +{ | |
6350 | + struct toi_bdev_info *this = prio_chain_head; | |
6351 | + int result; | |
2380c486 | 6352 | + |
7e46296a AM |
6353 | + /* Write the number of chains */ |
6354 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Write number of chains (%d)", | |
6355 | + num_chains); | |
6356 | + result = toiActiveAllocator->rw_header_chunk(WRITE, | |
6357 | + &toi_blockwriter_ops, (char *) &num_chains, | |
6358 | + sizeof(int)); | |
6359 | + if (result) | |
6360 | + return result; | |
2380c486 | 6361 | + |
7e46296a AM |
6362 | + /* Then the chains themselves */ |
6363 | + while (this) { | |
6364 | + result = toi_serialise_extent_chain(this); | |
6365 | + if (result) | |
6366 | + return result; | |
6367 | + this = this->next; | |
2380c486 | 6368 | + } |
2380c486 | 6369 | + |
7e46296a AM |
6370 | + /* |
6371 | + * Finally, the chain we should be on at the start of each | |
6372 | + * section. | |
6373 | + */ | |
6374 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Saved chain numbers."); | |
6375 | + result = toiActiveAllocator->rw_header_chunk(WRITE, | |
6376 | + &toi_blockwriter_ops, | |
6377 | + (char *) &toi_writer_posn.saved_chain_number[0], | |
6378 | + 4 * sizeof(int)); | |
6379 | + | |
6380 | + return result; | |
2380c486 JR |
6381 | +} |
6382 | + | |
7e46296a | 6383 | +int toi_register_storage_chain(struct toi_bdev_info *new) |
2380c486 | 6384 | +{ |
7e46296a AM |
6385 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Inserting chain %p into list.", |
6386 | + new); | |
6387 | + toi_insert_chain_in_prio_list(new); | |
6388 | + return 0; | |
2380c486 JR |
6389 | +} |
6390 | + | |
7e46296a | 6391 | +static void free_bdev_info(struct toi_bdev_info *chain) |
2380c486 | 6392 | +{ |
7e46296a | 6393 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Free chain %p.", chain); |
2380c486 | 6394 | + |
7e46296a AM |
6395 | + toi_message(TOI_IO, TOI_VERBOSE, 0, " - Block extents."); |
6396 | + toi_put_extent_chain(&chain->blocks); | |
2380c486 | 6397 | + |
7e46296a AM |
6398 | + /* |
6399 | + * The allocator may need to do more than just free the chains | |
5dd10c98 | 6400 | + * (swap_free, for example). Don't call from boot kernel. |
7e46296a AM |
6401 | + */ |
6402 | + toi_message(TOI_IO, TOI_VERBOSE, 0, " - Allocator extents."); | |
5dd10c98 AM |
6403 | + if (chain->allocator) |
6404 | + chain->allocator->bio_allocator_ops->free_storage(chain); | |
2380c486 | 6405 | + |
7e46296a AM |
6406 | + /* |
6407 | + * Dropping out of reading atomic copy? Need to undo | |
6408 | + * toi_open_by_devnum. | |
6409 | + */ | |
6410 | + toi_message(TOI_IO, TOI_VERBOSE, 0, " - Bdev."); | |
6411 | + if (chain->bdev && !IS_ERR(chain->bdev) && | |
6412 | + chain->bdev != resume_block_device && | |
6413 | + chain->bdev != header_block_device && | |
6414 | + test_toi_state(TOI_TRYING_TO_RESUME)) | |
6415 | + toi_close_bdev(chain->bdev); | |
2380c486 | 6416 | + |
7e46296a AM |
6417 | + /* Poison */ |
6418 | + toi_message(TOI_IO, TOI_VERBOSE, 0, " - Struct."); | |
6419 | + toi_kfree(39, chain, sizeof(*chain)); | |
2380c486 | 6420 | + |
7e46296a AM |
6421 | + if (prio_chain_head == chain) |
6422 | + prio_chain_head = NULL; | |
2380c486 | 6423 | + |
7e46296a | 6424 | + num_chains--; |
2380c486 JR |
6425 | +} |
6426 | + | |
7e46296a | 6427 | +void free_all_bdev_info(void) |
0ada99ac | 6428 | +{ |
7e46296a AM |
6429 | + struct toi_bdev_info *this = prio_chain_head; |
6430 | + | |
6431 | + while (this) { | |
6432 | + struct toi_bdev_info *next = this->next; | |
6433 | + free_bdev_info(this); | |
6434 | + this = next; | |
6435 | + } | |
6436 | + | |
6437 | + memset((char *) &toi_writer_posn, 0, sizeof(toi_writer_posn)); | |
6438 | + prio_chain_head = NULL; | |
0ada99ac | 6439 | +} |
6440 | + | |
5dd10c98 AM |
6441 | +static void set_up_start_position(void) |
6442 | +{ | |
6443 | + toi_writer_posn.current_chain = prio_chain_head; | |
6444 | + go_next_page(0, 0); | |
6445 | +} | |
6446 | + | |
2380c486 | 6447 | +/** |
7e46296a AM |
6448 | + * toi_load_extent_chain - read back a chain saved in the image |
6449 | + * @chain: Chain to load | |
2380c486 | 6450 | + * |
7e46296a AM |
6451 | + * The linked list of extents is reconstructed from the disk. chain will point |
6452 | + * to the first entry. | |
2380c486 | 6453 | + **/ |
5dd10c98 | 6454 | +int toi_load_extent_chain(int index, int *num_loaded) |
2380c486 | 6455 | +{ |
7e46296a AM |
6456 | + struct toi_bdev_info *chain = toi_kzalloc(39, |
6457 | + sizeof(struct toi_bdev_info), GFP_ATOMIC); | |
6458 | + struct hibernate_extent *this, *last = NULL; | |
6459 | + int i, ret; | |
2380c486 | 6460 | + |
7e46296a AM |
6461 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Loading extent chain %d.", index); |
6462 | + /* Get dev_t, prio, bmap_shift, blocks per page, positions */ | |
6463 | + ret = toiActiveAllocator->rw_header_chunk_noreadahead(READ, NULL, | |
6464 | + (char *) &chain->uuid, metadata_size); | |
9474138d | 6465 | + |
7e46296a AM |
6466 | + if (ret) { |
6467 | + printk(KERN_ERR "Failed to read the size of extent chain.\n"); | |
6468 | + toi_kfree(39, chain, sizeof(*chain)); | |
6469 | + return 1; | |
6470 | + } | |
6471 | + | |
5dd10c98 AM |
6472 | + toi_bkd.pages_used[index] = chain->pages_used; |
6473 | + | |
7e46296a AM |
6474 | + ret = toiActiveAllocator->rw_header_chunk_noreadahead(READ, NULL, |
6475 | + (char *) &chain->blocks.num_extents, sizeof(int)); | |
6476 | + if (ret) { | |
6477 | + printk(KERN_ERR "Failed to read the size of extent chain.\n"); | |
6478 | + toi_kfree(39, chain, sizeof(*chain)); | |
6479 | + return 1; | |
6480 | + } | |
6481 | + | |
6482 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "%d extents.", | |
6483 | + chain->blocks.num_extents); | |
6484 | + | |
6485 | + for (i = 0; i < chain->blocks.num_extents; i++) { | |
6486 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Extent %d.", i + 1); | |
6487 | + | |
6488 | + this = toi_kzalloc(2, sizeof(struct hibernate_extent), | |
6489 | + TOI_ATOMIC_GFP); | |
6490 | + if (!this) { | |
6491 | + printk(KERN_INFO "Failed to allocate a new extent.\n"); | |
6492 | + free_bdev_info(chain); | |
6493 | + return -ENOMEM; | |
6494 | + } | |
6495 | + this->next = NULL; | |
6496 | + /* Get the next page */ | |
6497 | + ret = toiActiveAllocator->rw_header_chunk_noreadahead(READ, | |
6498 | + NULL, (char *) this, 2 * sizeof(this->start)); | |
6499 | + if (ret) { | |
6500 | + printk(KERN_INFO "Failed to read an extent.\n"); | |
6501 | + toi_kfree(2, this, sizeof(struct hibernate_extent)); | |
6502 | + free_bdev_info(chain); | |
6503 | + return 1; | |
6504 | + } | |
6505 | + | |
6506 | + if (last) | |
6507 | + last->next = this; | |
6508 | + else { | |
6509 | + char b1[32], b2[32], b3[32]; | |
6510 | + /* | |
6511 | + * Open the bdev | |
6512 | + */ | |
6513 | + toi_message(TOI_IO, TOI_VERBOSE, 0, | |
6514 | + "Chain dev_t is %s. Resume dev t is %s. Header" | |
6515 | + " bdev_t is %s.\n", | |
6516 | + format_dev_t(b1, chain->dev_t), | |
6517 | + format_dev_t(b2, resume_dev_t), | |
6518 | + format_dev_t(b3, toi_sig_data->header_dev_t)); | |
6519 | + | |
6520 | + if (chain->dev_t == resume_dev_t) | |
6521 | + chain->bdev = resume_block_device; | |
6522 | + else if (chain->dev_t == toi_sig_data->header_dev_t) | |
6523 | + chain->bdev = header_block_device; | |
6524 | + else { | |
6525 | + chain->bdev = toi_open_bdev(chain->uuid, | |
6526 | + chain->dev_t, 1); | |
6527 | + if (IS_ERR(chain->bdev)) { | |
6528 | + free_bdev_info(chain); | |
6529 | + return -ENODEV; | |
6530 | + } | |
6531 | + } | |
6532 | + | |
6533 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Chain bmap shift " | |
6534 | + "is %d and blocks per page is %d.", | |
6535 | + chain->bmap_shift, | |
6536 | + chain->blocks_per_page); | |
6537 | + | |
6538 | + chain->blocks.first = this; | |
6539 | + | |
6540 | + /* | |
6541 | + * Couldn't do this earlier, but can't do | |
6542 | + * goto_start now - we may have already used blocks | |
6543 | + * in the first chain. | |
6544 | + */ | |
6545 | + chain->blocks.current_extent = this; | |
6546 | + chain->blocks.current_offset = this->start; | |
6547 | + | |
6548 | + /* | |
6549 | + * Can't wait until we've read the whole chain | |
6550 | + * before we insert it in the list. We might need | |
6551 | + * this chain to read the next page in the header | |
6552 | + */ | |
6553 | + toi_insert_chain_in_prio_list(chain); | |
7e46296a | 6554 | + } |
5dd10c98 AM |
6555 | + |
6556 | + /* | |
6557 | + * We have to wait until 2 extents are loaded before setting up | |
6558 | + * properly because if the first extent has only one page, we | |
6559 | + * will need to put the position on the second extent. Sounds | |
6560 | + * obvious, but it wasn't! | |
6561 | + */ | |
6562 | + (*num_loaded)++; | |
6563 | + if ((*num_loaded) == 2) | |
6564 | + set_up_start_position(); | |
7e46296a AM |
6565 | + last = this; |
6566 | + } | |
6567 | + | |
6568 | + /* | |
6569 | + * Shouldn't get empty chains, but it's not impossible. Link them in so | |
6570 | + * they get freed properly later. | |
6571 | + */ | |
6572 | + if (!chain->blocks.num_extents) | |
6573 | + toi_insert_chain_in_prio_list(chain); | |
6574 | + | |
6575 | + if (!chain->blocks.current_extent) { | |
6576 | + chain->blocks.current_extent = chain->blocks.first; | |
6577 | + if (chain->blocks.current_extent) | |
6578 | + chain->blocks.current_offset = | |
6579 | + chain->blocks.current_extent->start; | |
6580 | + } | |
6581 | + return 0; | |
6582 | +} | |
6583 | + | |
6584 | +int toi_load_extent_chains(void) | |
6585 | +{ | |
6586 | + int result; | |
6587 | + int to_load; | |
6588 | + int i; | |
5dd10c98 | 6589 | + int extents_loaded = 0; |
7e46296a AM |
6590 | + |
6591 | + result = toiActiveAllocator->rw_header_chunk_noreadahead(READ, NULL, | |
6592 | + (char *) &to_load, | |
6593 | + sizeof(int)); | |
6594 | + if (result) | |
6595 | + return result; | |
6596 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "%d chains to read.", to_load); | |
6597 | + | |
6598 | + for (i = 0; i < to_load; i++) { | |
6599 | + toi_message(TOI_IO, TOI_VERBOSE, 0, " >> Loading chain %d/%d.", | |
6600 | + i, to_load); | |
5dd10c98 | 6601 | + result = toi_load_extent_chain(i, &extents_loaded); |
7e46296a AM |
6602 | + if (result) |
6603 | + return result; | |
6604 | + } | |
6605 | + | |
5dd10c98 AM |
6606 | + /* If we never got to a second extent, we still need to do this. */ |
6607 | + if (extents_loaded == 1) | |
6608 | + set_up_start_position(); | |
6609 | + | |
7e46296a AM |
6610 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Save chain numbers."); |
6611 | + result = toiActiveAllocator->rw_header_chunk_noreadahead(READ, | |
6612 | + &toi_blockwriter_ops, | |
6613 | + (char *) &toi_writer_posn.saved_chain_number[0], | |
6614 | + 4 * sizeof(int)); | |
6615 | + | |
6616 | + return result; | |
6617 | +} | |
6618 | + | |
6619 | +static int toi_end_of_stream(int writing, int section_barrier) | |
6620 | +{ | |
6621 | + struct toi_bdev_info *cur_chain = toi_writer_posn.current_chain; | |
6622 | + int compare_to = next_section[current_stream]; | |
6623 | + struct toi_bdev_info *compare_chain = | |
6624 | + toi_writer_posn.saved_chain_ptr[compare_to]; | |
6625 | + int compare_offset = compare_chain ? | |
6626 | + compare_chain->saved_state[compare_to].offset : 0; | |
6627 | + | |
6628 | + if (!section_barrier) | |
6629 | + return 0; | |
6630 | + | |
6631 | + if (!cur_chain) | |
6632 | + return 1; | |
6633 | + | |
6634 | + if (cur_chain == compare_chain && | |
6635 | + cur_chain->blocks.current_offset == compare_offset) { | |
6636 | + if (writing) { | |
6637 | + if (!current_stream) { | |
6638 | + debug_broken_header(); | |
6639 | + return 1; | |
6640 | + } | |
0ada99ac | 6641 | + } else { |
e999739a | 6642 | + more_readahead = 0; |
7e46296a AM |
6643 | + toi_message(TOI_IO, TOI_VERBOSE, 0, |
6644 | + "Reached the end of stream %d " | |
6645 | + "(not an error).", current_stream); | |
6646 | + return 1; | |
e999739a | 6647 | + } |
6648 | + } | |
6649 | + | |
7e46296a AM |
6650 | + return 0; |
6651 | +} | |
6652 | + | |
6653 | +/** | |
6654 | + * go_next_page - skip blocks to the start of the next page | |
6655 | + * @writing: Whether we're reading or writing the image. | |
6656 | + * | |
6657 | + * Go forward one page. | |
6658 | + **/ | |
6659 | +int go_next_page(int writing, int section_barrier) | |
6660 | +{ | |
6661 | + struct toi_bdev_info *cur_chain = toi_writer_posn.current_chain; | |
6662 | + int max = cur_chain ? cur_chain->blocks_per_page : 1; | |
2380c486 | 6663 | + |
7e46296a AM |
6664 | + /* Nope. Go foward a page - or maybe two. Don't stripe the header, |
6665 | + * so that bad fragmentation doesn't put the extent data containing | |
6666 | + * the location of the second page out of the first header page. | |
6667 | + */ | |
6668 | + if (toi_extent_state_next(max, current_stream)) { | |
2380c486 | 6669 | + /* Don't complain if readahead falls off the end */ |
0ada99ac | 6670 | + if (writing && section_barrier) { |
7e46296a AM |
6671 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Extent state eof. " |
6672 | + "Expected compression ratio too optimistic?"); | |
6673 | + if (test_action_state(TOI_LOGALL)) | |
6674 | + dump_block_chains(); | |
2380c486 | 6675 | + } |
7e46296a AM |
6676 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Ran out of extents to " |
6677 | + "read/write. (Not necessarily a fatal error."); | |
e876a0dd | 6678 | + return -ENOSPC; |
2380c486 JR |
6679 | + } |
6680 | + | |
2380c486 JR |
6681 | + return 0; |
6682 | +} | |
6683 | + | |
7e46296a | 6684 | +int devices_of_same_priority(struct toi_bdev_info *this) |
2380c486 | 6685 | +{ |
7e46296a AM |
6686 | + struct toi_bdev_info *check = prio_chain_head; |
6687 | + int i = 0; | |
6688 | + | |
6689 | + while (check) { | |
6690 | + if (check->prio == this->prio) | |
6691 | + i++; | |
6692 | + check = check->next; | |
6693 | + } | |
6694 | + | |
6695 | + return i; | |
2380c486 JR |
6696 | +} |
6697 | + | |
6698 | +/** | |
6699 | + * toi_bio_rw_page - do i/o on the next disk page in the image | |
6700 | + * @writing: Whether reading or writing. | |
6701 | + * @page: Page to do i/o on. | |
6702 | + * @is_readahead: Whether we're doing readahead | |
6703 | + * @free_group: The group used in allocating the page | |
6704 | + * | |
6705 | + * Submit a page for reading or writing, possibly readahead. | |
6706 | + * Pass the group used in allocating the page as well, as it should | |
6707 | + * be freed on completion of the bio if we're writing the page. | |
6708 | + **/ | |
7e46296a | 6709 | +int toi_bio_rw_page(int writing, struct page *page, |
2380c486 JR |
6710 | + int is_readahead, int free_group) |
6711 | +{ | |
7e46296a AM |
6712 | + int result = toi_end_of_stream(writing, 1); |
6713 | + struct toi_bdev_info *dev_info = toi_writer_posn.current_chain; | |
2380c486 | 6714 | + |
7e46296a AM |
6715 | + if (result) { |
6716 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Seeking to read/write " | |
6717 | + "another page when stream has ended."); | |
e876a0dd | 6718 | + return -ENOSPC; |
7e46296a | 6719 | + } |
2380c486 | 6720 | + |
7e46296a AM |
6721 | + toi_message(TOI_IO, TOI_VERBOSE, 0, |
6722 | + "%sing device %lx, sector %d << %d.", | |
6723 | + writing ? "Writ" : "Read", | |
6724 | + dev_info->bdev, dev_info->blocks.current_offset, | |
6725 | + dev_info->bmap_shift); | |
2380c486 | 6726 | + |
7e46296a AM |
6727 | + result = toi_do_io(writing, dev_info->bdev, |
6728 | + dev_info->blocks.current_offset << dev_info->bmap_shift, | |
2380c486 | 6729 | + page, is_readahead, 0, free_group); |
2380c486 | 6730 | + |
7e46296a AM |
6731 | + /* Ignore the result here - will check end of stream if come in again */ |
6732 | + go_next_page(writing, 1); | |
2380c486 | 6733 | + |
7e46296a AM |
6734 | + if (result) |
6735 | + printk(KERN_ERR "toi_do_io returned %d.\n", result); | |
6736 | + return result; | |
2380c486 JR |
6737 | +} |
6738 | + | |
7e46296a | 6739 | +dev_t get_header_dev_t(void) |
2380c486 | 6740 | +{ |
7e46296a | 6741 | + return prio_chain_head->dev_t; |
2380c486 JR |
6742 | +} |
6743 | + | |
7e46296a | 6744 | +struct block_device *get_header_bdev(void) |
2380c486 | 6745 | +{ |
7e46296a AM |
6746 | + return prio_chain_head->bdev; |
6747 | +} | |
2380c486 | 6748 | + |
7e46296a AM |
6749 | +unsigned long get_headerblock(void) |
6750 | +{ | |
6751 | + return prio_chain_head->blocks.first->start << | |
6752 | + prio_chain_head->bmap_shift; | |
6753 | +} | |
2380c486 | 6754 | + |
7e46296a AM |
6755 | +int get_main_pool_phys_params(void) |
6756 | +{ | |
6757 | + struct toi_bdev_info *this = prio_chain_head; | |
6758 | + int result; | |
2380c486 | 6759 | + |
7e46296a AM |
6760 | + while (this) { |
6761 | + result = this->allocator->bio_allocator_ops->bmap(this); | |
6762 | + if (result) | |
6763 | + return result; | |
6764 | + this = this->next; | |
6765 | + } | |
2380c486 | 6766 | + |
7e46296a | 6767 | + return 0; |
2380c486 JR |
6768 | +} |
6769 | + | |
7e46296a | 6770 | +static int apply_header_reservation(void) |
2380c486 | 6771 | +{ |
7e46296a | 6772 | + int i; |
2380c486 | 6773 | + |
7e46296a AM |
6774 | + if (!header_pages_reserved) { |
6775 | + toi_message(TOI_IO, TOI_VERBOSE, 0, | |
6776 | + "No header pages reserved at the moment."); | |
6777 | + return 0; | |
6778 | + } | |
2380c486 | 6779 | + |
7e46296a | 6780 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Applying header reservation."); |
2380c486 | 6781 | + |
7e46296a AM |
6782 | + /* Apply header space reservation */ |
6783 | + toi_extent_state_goto_start(); | |
2380c486 | 6784 | + |
7e46296a AM |
6785 | + for (i = 0; i < header_pages_reserved; i++) |
6786 | + if (go_next_page(1, 0)) | |
e876a0dd | 6787 | + return -ENOSPC; |
2380c486 | 6788 | + |
7e46296a AM |
6789 | + /* The end of header pages will be the start of pageset 2 */ |
6790 | + toi_extent_state_save(2); | |
2380c486 | 6791 | + |
7e46296a AM |
6792 | + toi_message(TOI_IO, TOI_VERBOSE, 0, |
6793 | + "Finished applying header reservation."); | |
6794 | + return 0; | |
6795 | +} | |
2380c486 | 6796 | + |
7e46296a AM |
6797 | +static int toi_bio_register_storage(void) |
6798 | +{ | |
6799 | + int result = 0; | |
6800 | + struct toi_module_ops *this_module; | |
2380c486 | 6801 | + |
7e46296a AM |
6802 | + list_for_each_entry(this_module, &toi_modules, module_list) { |
6803 | + if (!this_module->enabled || | |
6804 | + this_module->type != BIO_ALLOCATOR_MODULE) | |
2380c486 | 6805 | + continue; |
7e46296a AM |
6806 | + toi_message(TOI_IO, TOI_VERBOSE, 0, |
6807 | + "Registering storage from %s.", | |
6808 | + this_module->name); | |
6809 | + result = this_module->bio_allocator_ops->register_storage(); | |
6810 | + if (result) | |
6811 | + break; | |
2380c486 JR |
6812 | + } |
6813 | + | |
0ada99ac | 6814 | + return result; |
2380c486 JR |
6815 | +} |
6816 | + | |
7e46296a | 6817 | +int toi_bio_allocate_storage(unsigned long request) |
2380c486 | 6818 | +{ |
7e46296a AM |
6819 | + struct toi_bdev_info *chain = prio_chain_head; |
6820 | + unsigned long to_get = request; | |
6821 | + unsigned long extra_pages, needed; | |
6822 | + int no_free = 0; | |
2380c486 | 6823 | + |
7e46296a AM |
6824 | + if (!chain) { |
6825 | + int result = toi_bio_register_storage(); | |
5dd10c98 AM |
6826 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_bio_allocate_storage: " |
6827 | + "Registering storage."); | |
7e46296a AM |
6828 | + if (result) |
6829 | + return 0; | |
6830 | + chain = prio_chain_head; | |
5dd10c98 AM |
6831 | + if (!chain) { |
6832 | + printk("TuxOnIce: No storage was registered.\n"); | |
6833 | + return 0; | |
6834 | + } | |
7e46296a | 6835 | + } |
5dd10c98 | 6836 | + |
7e46296a AM |
6837 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_bio_allocate_storage: " |
6838 | + "Request is %lu pages.", request); | |
6839 | + extra_pages = DIV_ROUND_UP(request * (sizeof(unsigned long) | |
6840 | + + sizeof(int)), PAGE_SIZE); | |
6841 | + needed = request + extra_pages + header_pages_reserved; | |
6842 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Adding %lu extra pages and %lu " | |
6843 | + "for header => %lu.", | |
6844 | + extra_pages, header_pages_reserved, needed); | |
6845 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Already allocated %lu pages.", | |
6846 | + raw_pages_allocd); | |
2380c486 | 6847 | + |
7e46296a AM |
6848 | + to_get = needed > raw_pages_allocd ? needed - raw_pages_allocd : 0; |
6849 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Need to get %lu pages.", to_get); | |
2380c486 | 6850 | + |
7e46296a AM |
6851 | + if (!to_get) |
6852 | + return apply_header_reservation(); | |
2380c486 | 6853 | + |
7e46296a | 6854 | + while (to_get && chain) { |
5dd10c98 AM |
6855 | + int num_group = devices_of_same_priority(chain); |
6856 | + int divisor = num_group - no_free; | |
7e46296a AM |
6857 | + int i; |
6858 | + unsigned long portion = DIV_ROUND_UP(to_get, divisor); | |
6859 | + unsigned long got = 0; | |
6860 | + unsigned long got_this_round = 0; | |
6861 | + struct toi_bdev_info *top = chain; | |
2380c486 | 6862 | + |
7e46296a AM |
6863 | + toi_message(TOI_IO, TOI_VERBOSE, 0, |
6864 | + " Start of loop. To get is %lu. Divisor is %d.", | |
6865 | + to_get, divisor); | |
6866 | + no_free = 0; | |
2380c486 | 6867 | + |
7e46296a AM |
6868 | + /* |
6869 | + * We're aiming to spread the allocated storage as evenly | |
6870 | + * as possible, but we also want to get all the storage we | |
6871 | + * can off this priority. | |
6872 | + */ | |
5dd10c98 | 6873 | + for (i = 0; i < num_group; i++) { |
7e46296a AM |
6874 | + struct toi_bio_allocator_ops *ops = |
6875 | + chain->allocator->bio_allocator_ops; | |
6876 | + toi_message(TOI_IO, TOI_VERBOSE, 0, | |
6877 | + " Asking for %lu pages from chain %p.", | |
6878 | + portion, chain); | |
6879 | + got = ops->allocate_storage(chain, portion); | |
6880 | + toi_message(TOI_IO, TOI_VERBOSE, 0, | |
6881 | + " Got %lu pages from allocator %p.", | |
6882 | + got, chain); | |
6883 | + if (!got) | |
6884 | + no_free++; | |
6885 | + got_this_round += got; | |
6886 | + chain = chain->next; | |
6887 | + } | |
6888 | + toi_message(TOI_IO, TOI_VERBOSE, 0, " Loop finished. Got a " | |
6889 | + "total of %lu pages from %d allocators.", | |
6890 | + got_this_round, divisor - no_free); | |
6891 | + | |
6892 | + raw_pages_allocd += got_this_round; | |
6893 | + to_get = needed > raw_pages_allocd ? needed - raw_pages_allocd : | |
6894 | + 0; | |
6895 | + | |
6896 | + /* | |
6897 | + * If we got anything from chains of this priority and we | |
6898 | + * still have storage to allocate, go over this priority | |
6899 | + * again. | |
6900 | + */ | |
6901 | + if (got_this_round && to_get) | |
6902 | + chain = top; | |
6903 | + else | |
6904 | + no_free = 0; | |
6905 | + } | |
6906 | + | |
6907 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Finished allocating. Calling " | |
6908 | + "get_main_pool_phys_params"); | |
6909 | + /* Now let swap allocator bmap the pages */ | |
6910 | + get_main_pool_phys_params(); | |
6911 | + | |
6912 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Done. Reserving header."); | |
6913 | + return apply_header_reservation(); | |
6914 | +} | |
5dd10c98 AM |
6915 | + |
6916 | +void toi_bio_chains_post_atomic(struct toi_boot_kernel_data *bkd) | |
6917 | +{ | |
6918 | + int i = 0; | |
6919 | + struct toi_bdev_info *cur_chain = prio_chain_head; | |
6920 | + | |
6921 | + while (cur_chain) { | |
6922 | + cur_chain->pages_used = bkd->pages_used[i]; | |
6923 | + cur_chain = cur_chain->next; | |
6924 | + i++; | |
6925 | + } | |
6926 | +} | |
6927 | + | |
6928 | +int toi_bio_chains_debug_info(char *buffer, int size) | |
6929 | +{ | |
6930 | + /* Show what we actually used */ | |
6931 | + struct toi_bdev_info *cur_chain = prio_chain_head; | |
6932 | + int len = 0; | |
6933 | + | |
6934 | + while (cur_chain) { | |
6935 | + len += scnprintf(buffer + len, size - len, " Used %lu pages " | |
6936 | + "from %s.\n", cur_chain->pages_used, | |
6937 | + cur_chain->name); | |
6938 | + cur_chain = cur_chain->next; | |
6939 | + } | |
6940 | + | |
6941 | + return len; | |
6942 | +} | |
7e46296a AM |
6943 | diff --git a/kernel/power/tuxonice_bio_core.c b/kernel/power/tuxonice_bio_core.c |
6944 | new file mode 100644 | |
cacc47f8 | 6945 | index 0000000..414d249 |
7e46296a AM |
6946 | --- /dev/null |
6947 | +++ b/kernel/power/tuxonice_bio_core.c | |
cacc47f8 | 6948 | @@ -0,0 +1,1822 @@ |
7e46296a AM |
6949 | +/* |
6950 | + * kernel/power/tuxonice_bio.c | |
2380c486 | 6951 | + * |
5dd10c98 | 6952 | + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 | 6953 | + * |
7e46296a | 6954 | + * Distributed under GPLv2. |
2380c486 | 6955 | + * |
7e46296a AM |
6956 | + * This file contains block io functions for TuxOnIce. These are |
6957 | + * used by the swapwriter and it is planned that they will also | |
6958 | + * be used by the NFSwriter. | |
2380c486 | 6959 | + * |
7e46296a | 6960 | + */ |
2380c486 | 6961 | + |
7e46296a AM |
6962 | +#include <linux/blkdev.h> |
6963 | +#include <linux/syscalls.h> | |
6964 | +#include <linux/suspend.h> | |
6965 | +#include <linux/ctype.h> | |
cacc47f8 | 6966 | +#include <linux/fs_uuid.h> |
7e46296a | 6967 | +#include <scsi/scsi_scan.h> |
2380c486 | 6968 | + |
7e46296a AM |
6969 | +#include "tuxonice.h" |
6970 | +#include "tuxonice_sysfs.h" | |
6971 | +#include "tuxonice_modules.h" | |
6972 | +#include "tuxonice_prepare_image.h" | |
6973 | +#include "tuxonice_bio.h" | |
6974 | +#include "tuxonice_ui.h" | |
6975 | +#include "tuxonice_alloc.h" | |
6976 | +#include "tuxonice_io.h" | |
6977 | +#include "tuxonice_builtin.h" | |
6978 | +#include "tuxonice_bio_internal.h" | |
2380c486 | 6979 | + |
7e46296a AM |
6980 | +#define MEMORY_ONLY 1 |
6981 | +#define THROTTLE_WAIT 2 | |
2380c486 | 6982 | + |
7e46296a AM |
6983 | +/* #define MEASURE_MUTEX_CONTENTION */ |
6984 | +#ifndef MEASURE_MUTEX_CONTENTION | |
6985 | +#define my_mutex_lock(index, the_lock) mutex_lock(the_lock) | |
6986 | +#define my_mutex_unlock(index, the_lock) mutex_unlock(the_lock) | |
6987 | +#else | |
6988 | +unsigned long mutex_times[2][2][NR_CPUS]; | |
6989 | +#define my_mutex_lock(index, the_lock) do { \ | |
6990 | + int have_mutex; \ | |
6991 | + have_mutex = mutex_trylock(the_lock); \ | |
6992 | + if (!have_mutex) { \ | |
6993 | + mutex_lock(the_lock); \ | |
6994 | + mutex_times[index][0][smp_processor_id()]++; \ | |
6995 | + } else { \ | |
6996 | + mutex_times[index][1][smp_processor_id()]++; \ | |
6997 | + } | |
2380c486 | 6998 | + |
7e46296a AM |
6999 | +#define my_mutex_unlock(index, the_lock) \ |
7000 | + mutex_unlock(the_lock); \ | |
7001 | +} while (0) | |
7002 | +#endif | |
2380c486 | 7003 | + |
7e46296a AM |
7004 | +static int page_idx, reset_idx; |
7005 | + | |
7006 | +static int target_outstanding_io = 1024; | |
7007 | +static int max_outstanding_writes, max_outstanding_reads; | |
7008 | + | |
7009 | +static struct page *bio_queue_head, *bio_queue_tail; | |
7010 | +static atomic_t toi_bio_queue_size; | |
7011 | +static DEFINE_SPINLOCK(bio_queue_lock); | |
7012 | + | |
7013 | +static int free_mem_throttle, throughput_throttle; | |
7014 | +int more_readahead = 1; | |
7015 | +static struct page *readahead_list_head, *readahead_list_tail; | |
7016 | + | |
7017 | +static struct page *waiting_on; | |
7018 | + | |
7019 | +static atomic_t toi_io_in_progress, toi_io_done; | |
7020 | +static DECLARE_WAIT_QUEUE_HEAD(num_in_progress_wait); | |
7021 | + | |
7022 | +int current_stream; | |
7023 | +/* Not static, so that the allocators can setup and complete | |
7024 | + * writing the header */ | |
7025 | +char *toi_writer_buffer; | |
7026 | +int toi_writer_buffer_posn; | |
7027 | + | |
7028 | +static DEFINE_MUTEX(toi_bio_mutex); | |
7029 | +static DEFINE_MUTEX(toi_bio_readahead_mutex); | |
7030 | + | |
7031 | +static struct task_struct *toi_queue_flusher; | |
7032 | +static int toi_bio_queue_flush_pages(int dedicated_thread); | |
7033 | + | |
7034 | +struct toi_module_ops toi_blockwriter_ops; | |
7035 | + | |
7036 | +#define TOTAL_OUTSTANDING_IO (atomic_read(&toi_io_in_progress) + \ | |
7037 | + atomic_read(&toi_bio_queue_size)) | |
7038 | + | |
7039 | +unsigned long raw_pages_allocd, header_pages_reserved; | |
2380c486 JR |
7040 | + |
7041 | +/** | |
7e46296a AM |
7042 | + * set_free_mem_throttle - set the point where we pause to avoid oom. |
7043 | + * | |
7044 | + * Initially, this value is zero, but when we first fail to allocate memory, | |
7045 | + * we set it (plus a buffer) and thereafter throttle i/o once that limit is | |
7046 | + * reached. | |
2380c486 | 7047 | + **/ |
7e46296a | 7048 | +static void set_free_mem_throttle(void) |
2380c486 | 7049 | +{ |
7e46296a | 7050 | + int new_throttle = nr_unallocated_buffer_pages() + 256; |
2380c486 | 7051 | + |
7e46296a AM |
7052 | + if (new_throttle > free_mem_throttle) |
7053 | + free_mem_throttle = new_throttle; | |
2380c486 JR |
7054 | +} |
7055 | + | |
7e46296a AM |
7056 | +#define NUM_REASONS 7 |
7057 | +static atomic_t reasons[NUM_REASONS]; | |
7058 | +static char *reason_name[NUM_REASONS] = { | |
7059 | + "readahead not ready", | |
7060 | + "bio allocation", | |
7061 | + "synchronous I/O", | |
7062 | + "toi_bio_get_new_page", | |
7063 | + "memory low", | |
7064 | + "readahead buffer allocation", | |
7065 | + "throughput_throttle", | |
7066 | +}; | |
7067 | + | |
7068 | +/* User Specified Parameters. */ | |
7069 | +unsigned long resume_firstblock; | |
7070 | +dev_t resume_dev_t; | |
7071 | +struct block_device *resume_block_device; | |
7072 | +static atomic_t resume_bdev_open_count; | |
7073 | + | |
7074 | +struct block_device *header_block_device; | |
7075 | + | |
2380c486 | 7076 | +/** |
7e46296a AM |
7077 | + * toi_open_bdev: Open a bdev at resume time. |
7078 | + * | |
7079 | + * index: The swap index. May be MAX_SWAPFILES for the resume_dev_t | |
7080 | + * (the user can have resume= pointing at a swap partition/file that isn't | |
7081 | + * swapon'd when they hibernate. MAX_SWAPFILES+1 for the first page of the | |
7082 | + * header. It will be from a swap partition that was enabled when we hibernated, | |
7083 | + * but we don't know it's real index until we read that first page. | |
7084 | + * dev_t: The device major/minor. | |
7085 | + * display_errs: Whether to try to do this quietly. | |
7086 | + * | |
7087 | + * We stored a dev_t in the image header. Open the matching device without | |
7088 | + * requiring /dev/<whatever> in most cases and record the details needed | |
7089 | + * to close it later and avoid duplicating work. | |
7090 | + */ | |
7091 | +struct block_device *toi_open_bdev(char *uuid, dev_t default_device, | |
7092 | + int display_errs) | |
7093 | +{ | |
7094 | + struct block_device *bdev; | |
7095 | + dev_t device = default_device; | |
7096 | + char buf[32]; | |
7097 | + | |
7098 | + if (uuid) { | |
cacc47f8 AM |
7099 | + struct fs_info seek; |
7100 | + strncpy((char *) &seek.uuid, uuid, 16); | |
7101 | + seek.dev_t = 0; | |
7102 | + seek.last_mount_size = 0; | |
7103 | + device = blk_lookup_fs_info(&seek); | |
7e46296a AM |
7104 | + if (!device) { |
7105 | + device = default_device; | |
7106 | + printk(KERN_DEBUG "Unable to resolve uuid. Falling back" | |
7107 | + " to dev_t.\n"); | |
7108 | + } else | |
7109 | + printk(KERN_DEBUG "Resolved uuid to device %s.\n", | |
7110 | + format_dev_t(buf, device)); | |
2380c486 JR |
7111 | + } |
7112 | + | |
7e46296a AM |
7113 | + if (!device) { |
7114 | + printk(KERN_ERR "TuxOnIce attempting to open a " | |
7115 | + "blank dev_t!\n"); | |
7116 | + dump_stack(); | |
7117 | + return NULL; | |
2380c486 | 7118 | + } |
5dd10c98 | 7119 | + bdev = toi_open_by_devnum(device); |
2380c486 | 7120 | + |
7e46296a AM |
7121 | + if (IS_ERR(bdev) || !bdev) { |
7122 | + if (display_errs) | |
7123 | + toi_early_boot_message(1, TOI_CONTINUE_REQ, | |
7124 | + "Failed to get access to block device " | |
7125 | + "\"%x\" (error %d).\n Maybe you need " | |
7126 | + "to run mknod and/or lvmsetup in an " | |
7127 | + "initrd/ramfs?", device, bdev); | |
7128 | + return ERR_PTR(-EINVAL); | |
2380c486 | 7129 | + } |
7e46296a AM |
7130 | + toi_message(TOI_IO, TOI_VERBOSE, 0, |
7131 | + "TuxOnIce got bdev %p for dev_t %x.", | |
7132 | + bdev, device); | |
2380c486 | 7133 | + |
7e46296a AM |
7134 | + return bdev; |
7135 | +} | |
2380c486 | 7136 | + |
7e46296a AM |
7137 | +static void toi_bio_reserve_header_space(unsigned long request) |
7138 | +{ | |
7139 | + header_pages_reserved = request; | |
2380c486 JR |
7140 | +} |
7141 | + | |
7142 | +/** | |
7e46296a AM |
7143 | + * do_bio_wait - wait for some TuxOnIce I/O to complete |
7144 | + * @reason: The array index of the reason we're waiting. | |
2380c486 | 7145 | + * |
7e46296a AM |
7146 | + * Wait for a particular page of I/O if we're after a particular page. |
7147 | + * If we're not after a particular page, wait instead for all in flight | |
7148 | + * I/O to be completed or for us to have enough free memory to be able | |
7149 | + * to submit more I/O. | |
0ada99ac | 7150 | + * |
7e46296a | 7151 | + * If we wait, we also update our statistics regarding why we waited. |
2380c486 | 7152 | + **/ |
7e46296a | 7153 | +static void do_bio_wait(int reason) |
2380c486 | 7154 | +{ |
7e46296a | 7155 | + struct page *was_waiting_on = waiting_on; |
2380c486 | 7156 | + |
7e46296a AM |
7157 | + /* On SMP, waiting_on can be reset, so we make a copy */ |
7158 | + if (was_waiting_on) { | |
5dd10c98 AM |
7159 | + wait_on_page_locked(was_waiting_on); |
7160 | + atomic_inc(&reasons[reason]); | |
7e46296a AM |
7161 | + } else { |
7162 | + atomic_inc(&reasons[reason]); | |
2380c486 | 7163 | + |
7e46296a AM |
7164 | + wait_event(num_in_progress_wait, |
7165 | + !atomic_read(&toi_io_in_progress) || | |
7166 | + nr_unallocated_buffer_pages() > free_mem_throttle); | |
2380c486 | 7167 | + } |
2380c486 JR |
7168 | +} |
7169 | + | |
7170 | +/** | |
7e46296a AM |
7171 | + * throttle_if_needed - wait for I/O completion if throttle points are reached |
7172 | + * @flags: What to check and how to act. | |
7173 | + * | |
7174 | + * Check whether we need to wait for some I/O to complete. We always check | |
7175 | + * whether we have enough memory available, but may also (depending upon | |
7176 | + * @reason) check if the throughput throttle limit has been reached. | |
2380c486 | 7177 | + **/ |
7e46296a | 7178 | +static int throttle_if_needed(int flags) |
2380c486 | 7179 | +{ |
7e46296a | 7180 | + int free_pages = nr_unallocated_buffer_pages(); |
2380c486 | 7181 | + |
7e46296a AM |
7182 | + /* Getting low on memory and I/O is in progress? */ |
7183 | + while (unlikely(free_pages < free_mem_throttle) && | |
e876a0dd AM |
7184 | + atomic_read(&toi_io_in_progress) && |
7185 | + !test_result_state(TOI_ABORTED)) { | |
7e46296a AM |
7186 | + if (!(flags & THROTTLE_WAIT)) |
7187 | + return -ENOMEM; | |
7188 | + do_bio_wait(4); | |
7189 | + free_pages = nr_unallocated_buffer_pages(); | |
7190 | + } | |
7191 | + | |
7192 | + while (!(flags & MEMORY_ONLY) && throughput_throttle && | |
e876a0dd AM |
7193 | + TOTAL_OUTSTANDING_IO >= throughput_throttle && |
7194 | + !test_result_state(TOI_ABORTED)) { | |
7e46296a AM |
7195 | + int result = toi_bio_queue_flush_pages(0); |
7196 | + if (result) | |
7197 | + return result; | |
7198 | + atomic_inc(&reasons[6]); | |
7199 | + wait_event(num_in_progress_wait, | |
7200 | + !atomic_read(&toi_io_in_progress) || | |
7201 | + TOTAL_OUTSTANDING_IO < throughput_throttle); | |
2380c486 JR |
7202 | + } |
7203 | + | |
7204 | + return 0; | |
7205 | +} | |
7206 | + | |
7207 | +/** | |
7e46296a AM |
7208 | + * update_throughput_throttle - update the raw throughput throttle |
7209 | + * @jif_index: The number of times this function has been called. | |
7210 | + * | |
5dd10c98 AM |
7211 | + * This function is called four times per second by the core, and used to limit |
7212 | + * the amount of I/O we submit at once, spreading out our waiting through the | |
7e46296a AM |
7213 | + * whole job and letting userui get an opportunity to do its work. |
7214 | + * | |
5dd10c98 | 7215 | + * We don't start limiting I/O until 1/4s has gone so that we get a |
7e46296a AM |
7216 | + * decent sample for our initial limit, and keep updating it because |
7217 | + * throughput may vary (on rotating media, eg) with our block number. | |
7218 | + * | |
7219 | + * We throttle to 1/10s worth of I/O. | |
2380c486 | 7220 | + **/ |
7e46296a | 7221 | +static void update_throughput_throttle(int jif_index) |
2380c486 | 7222 | +{ |
7e46296a | 7223 | + int done = atomic_read(&toi_io_done); |
5dd10c98 | 7224 | + throughput_throttle = done * 2 / 5 / jif_index; |
2380c486 JR |
7225 | +} |
7226 | + | |
7227 | +/** | |
7e46296a | 7228 | + * toi_finish_all_io - wait for all outstanding i/o to complete |
2380c486 | 7229 | + * |
7e46296a | 7230 | + * Flush any queued but unsubmitted I/O and wait for it all to complete. |
2380c486 | 7231 | + **/ |
7e46296a | 7232 | +static int toi_finish_all_io(void) |
2380c486 | 7233 | +{ |
7e46296a AM |
7234 | + int result = toi_bio_queue_flush_pages(0); |
7235 | + wait_event(num_in_progress_wait, !TOTAL_OUTSTANDING_IO); | |
2380c486 JR |
7236 | + return result; |
7237 | +} | |
7238 | + | |
7239 | +/** | |
7e46296a AM |
7240 | + * toi_end_bio - bio completion function. |
7241 | + * @bio: bio that has completed. | |
7242 | + * @err: Error value. Yes, like end_swap_bio_read, we ignore it. | |
2380c486 | 7243 | + * |
7e46296a AM |
7244 | + * Function called by the block driver from interrupt context when I/O is |
7245 | + * completed. If we were writing the page, we want to free it and will have | |
7246 | + * set bio->bi_private to the parameter we should use in telling the page | |
7247 | + * allocation accounting code what the page was allocated for. If we're | |
7248 | + * reading the page, it will be in the singly linked list made from | |
7249 | + * page->private pointers. | |
2380c486 | 7250 | + **/ |
7e46296a | 7251 | +static void toi_end_bio(struct bio *bio, int err) |
2380c486 | 7252 | +{ |
7e46296a | 7253 | + struct page *page = bio->bi_io_vec[0].bv_page; |
2380c486 | 7254 | + |
7e46296a | 7255 | + BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags)); |
2380c486 | 7256 | + |
7e46296a AM |
7257 | + unlock_page(page); |
7258 | + bio_put(bio); | |
2380c486 | 7259 | + |
7e46296a AM |
7260 | + if (waiting_on == page) |
7261 | + waiting_on = NULL; | |
2380c486 | 7262 | + |
7e46296a | 7263 | + put_page(page); |
2380c486 | 7264 | + |
7e46296a AM |
7265 | + if (bio->bi_private) |
7266 | + toi__free_page((int) ((unsigned long) bio->bi_private) , page); | |
2380c486 | 7267 | + |
7e46296a | 7268 | + bio_put(bio); |
2380c486 | 7269 | + |
7e46296a AM |
7270 | + atomic_dec(&toi_io_in_progress); |
7271 | + atomic_inc(&toi_io_done); | |
2380c486 | 7272 | + |
7e46296a | 7273 | + wake_up(&num_in_progress_wait); |
2380c486 JR |
7274 | +} |
7275 | + | |
7276 | +/** | |
7e46296a AM |
7277 | + * submit - submit BIO request |
7278 | + * @writing: READ or WRITE. | |
7279 | + * @dev: The block device we're using. | |
7280 | + * @first_block: The first sector we're using. | |
7281 | + * @page: The page being used for I/O. | |
7282 | + * @free_group: If writing, the group that was used in allocating the page | |
7283 | + * and which will be used in freeing the page from the completion | |
7284 | + * routine. | |
2380c486 | 7285 | + * |
7e46296a AM |
7286 | + * Based on Patrick Mochell's pmdisk code from long ago: "Straight from the |
7287 | + * textbook - allocate and initialize the bio. If we're writing, make sure | |
7288 | + * the page is marked as dirty. Then submit it and carry on." | |
7289 | + * | |
7290 | + * If we're just testing the speed of our own code, we fake having done all | |
7291 | + * the hard work and all toi_end_bio immediately. | |
7292 | + **/ | |
7293 | +static int submit(int writing, struct block_device *dev, sector_t first_block, | |
7294 | + struct page *page, int free_group) | |
2380c486 | 7295 | +{ |
7e46296a AM |
7296 | + struct bio *bio = NULL; |
7297 | + int cur_outstanding_io, result; | |
2380c486 | 7298 | + |
7e46296a AM |
7299 | + /* |
7300 | + * Shouldn't throttle if reading - can deadlock in the single | |
7301 | + * threaded case as pages are only freed when we use the | |
7302 | + * readahead. | |
7303 | + */ | |
7304 | + if (writing) { | |
7305 | + result = throttle_if_needed(MEMORY_ONLY | THROTTLE_WAIT); | |
7306 | + if (result) | |
7307 | + return result; | |
7308 | + } | |
7309 | + | |
7310 | + while (!bio) { | |
7311 | + bio = bio_alloc(TOI_ATOMIC_GFP, 1); | |
7312 | + if (!bio) { | |
7313 | + set_free_mem_throttle(); | |
7314 | + do_bio_wait(1); | |
2380c486 | 7315 | + } |
0ada99ac | 7316 | + } |
2380c486 | 7317 | + |
7e46296a AM |
7318 | + bio->bi_bdev = dev; |
7319 | + bio->bi_sector = first_block; | |
7320 | + bio->bi_private = (void *) ((unsigned long) free_group); | |
7321 | + bio->bi_end_io = toi_end_bio; | |
2380c486 | 7322 | + |
7e46296a AM |
7323 | + if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { |
7324 | + printk(KERN_DEBUG "ERROR: adding page to bio at %lld\n", | |
7325 | + (unsigned long long) first_block); | |
7326 | + bio_put(bio); | |
7327 | + return -EFAULT; | |
7328 | + } | |
2380c486 | 7329 | + |
7e46296a | 7330 | + bio_get(bio); |
2380c486 | 7331 | + |
7e46296a AM |
7332 | + cur_outstanding_io = atomic_add_return(1, &toi_io_in_progress); |
7333 | + if (writing) { | |
7334 | + if (cur_outstanding_io > max_outstanding_writes) | |
7335 | + max_outstanding_writes = cur_outstanding_io; | |
7336 | + } else { | |
7337 | + if (cur_outstanding_io > max_outstanding_reads) | |
7338 | + max_outstanding_reads = cur_outstanding_io; | |
7339 | + } | |
2380c486 | 7340 | + |
7e46296a AM |
7341 | + |
7342 | + if (unlikely(test_action_state(TOI_TEST_BIO))) { | |
7343 | + /* Fake having done the hard work */ | |
7344 | + set_bit(BIO_UPTODATE, &bio->bi_flags); | |
7345 | + toi_end_bio(bio, 0); | |
7346 | + } else | |
7347 | + submit_bio(writing | (1 << BIO_RW_SYNCIO) | | |
7348 | + (1 << BIO_RW_TUXONICE) | | |
7349 | + (1 << BIO_RW_UNPLUG), bio); | |
7350 | + | |
7351 | + return 0; | |
2380c486 JR |
7352 | +} |
7353 | + | |
7354 | +/** | |
7e46296a AM |
7355 | + * toi_do_io: Prepare to do some i/o on a page and submit or batch it. |
7356 | + * | |
7357 | + * @writing: Whether reading or writing. | |
7358 | + * @bdev: The block device which we're using. | |
7359 | + * @block0: The first sector we're reading or writing. | |
7360 | + * @page: The page on which I/O is being done. | |
7361 | + * @readahead_index: If doing readahead, the index (reset this flag when done). | |
7362 | + * @syncio: Whether the i/o is being done synchronously. | |
7363 | + * | |
7364 | + * Prepare and start a read or write operation. | |
7365 | + * | |
7366 | + * Note that we always work with our own page. If writing, we might be given a | |
7367 | + * compression buffer that will immediately be used to start compressing the | |
7368 | + * next page. For reading, we do readahead and therefore don't know the final | |
7369 | + * address where the data needs to go. | |
2380c486 | 7370 | + **/ |
7e46296a AM |
7371 | +int toi_do_io(int writing, struct block_device *bdev, long block0, |
7372 | + struct page *page, int is_readahead, int syncio, int free_group) | |
2380c486 | 7373 | +{ |
7e46296a | 7374 | + page->private = 0; |
2380c486 | 7375 | + |
7e46296a AM |
7376 | + /* Do here so we don't race against toi_bio_get_next_page_read */ |
7377 | + lock_page(page); | |
2380c486 | 7378 | + |
7e46296a AM |
7379 | + if (is_readahead) { |
7380 | + if (readahead_list_head) | |
7381 | + readahead_list_tail->private = (unsigned long) page; | |
7382 | + else | |
7383 | + readahead_list_head = page; | |
2380c486 | 7384 | + |
7e46296a AM |
7385 | + readahead_list_tail = page; |
7386 | + } | |
2380c486 | 7387 | + |
7e46296a AM |
7388 | + /* Done before submitting to avoid races. */ |
7389 | + if (syncio) | |
7390 | + waiting_on = page; | |
7391 | + | |
7392 | + /* Submit the page */ | |
7393 | + get_page(page); | |
7394 | + | |
7395 | + if (submit(writing, bdev, block0, page, free_group)) | |
7396 | + return -EFAULT; | |
7397 | + | |
7398 | + if (syncio) | |
7399 | + do_bio_wait(2); | |
7400 | + | |
7401 | + return 0; | |
2380c486 JR |
7402 | +} |
7403 | + | |
7404 | +/** | |
7e46296a AM |
7405 | + * toi_bdev_page_io - simpler interface to do directly i/o on a single page |
7406 | + * @writing: Whether reading or writing. | |
7407 | + * @bdev: Block device on which we're operating. | |
7408 | + * @pos: Sector at which page to read or write starts. | |
7409 | + * @page: Page to be read/written. | |
7410 | + * | |
7411 | + * A simple interface to submit a page of I/O and wait for its completion. | |
7412 | + * The caller must free the page used. | |
2380c486 | 7413 | + **/ |
7e46296a AM |
7414 | +static int toi_bdev_page_io(int writing, struct block_device *bdev, |
7415 | + long pos, struct page *page) | |
2380c486 | 7416 | +{ |
7e46296a | 7417 | + return toi_do_io(writing, bdev, pos, page, 0, 1, 0); |
2380c486 JR |
7418 | +} |
7419 | + | |
7420 | +/** | |
7e46296a AM |
7421 | + * toi_bio_memory_needed - report the amount of memory needed for block i/o |
7422 | + * | |
7423 | + * We want to have at least enough memory so as to have target_outstanding_io | |
7424 | + * or more transactions on the fly at once. If we can do more, fine. | |
2380c486 | 7425 | + **/ |
7e46296a | 7426 | +static int toi_bio_memory_needed(void) |
2380c486 | 7427 | +{ |
7e46296a AM |
7428 | + return target_outstanding_io * (PAGE_SIZE + sizeof(struct request) + |
7429 | + sizeof(struct bio)); | |
2380c486 JR |
7430 | +} |
7431 | + | |
7432 | +/** | |
7e46296a AM |
7433 | + * toi_bio_print_debug_stats - put out debugging info in the buffer provided |
7434 | + * @buffer: A buffer of size @size into which text should be placed. | |
7435 | + * @size: The size of @buffer. | |
7436 | + * | |
7437 | + * Fill a buffer with debugging info. This is used for both our debug_info sysfs | |
7438 | + * entry and for recording the same info in dmesg. | |
2380c486 | 7439 | + **/ |
7e46296a | 7440 | +static int toi_bio_print_debug_stats(char *buffer, int size) |
2380c486 | 7441 | +{ |
7e46296a | 7442 | + int len = 0; |
2380c486 | 7443 | + |
7e46296a AM |
7444 | + if (toiActiveAllocator != &toi_blockwriter_ops) { |
7445 | + len = scnprintf(buffer, size, | |
7446 | + "- Block I/O inactive.\n"); | |
7447 | + return len; | |
2380c486 JR |
7448 | + } |
7449 | + | |
7e46296a | 7450 | + len = scnprintf(buffer, size, "- Block I/O active.\n"); |
2380c486 | 7451 | + |
5dd10c98 AM |
7452 | + len += toi_bio_chains_debug_info(buffer + len, size - len); |
7453 | + | |
7e46296a AM |
7454 | + len += scnprintf(buffer + len, size - len, |
7455 | + "- Max outstanding reads %d. Max writes %d.\n", | |
7456 | + max_outstanding_reads, max_outstanding_writes); | |
2380c486 | 7457 | + |
7e46296a AM |
7458 | + len += scnprintf(buffer + len, size - len, |
7459 | + " Memory_needed: %d x (%lu + %u + %u) = %d bytes.\n", | |
7460 | + target_outstanding_io, | |
7461 | + PAGE_SIZE, (unsigned int) sizeof(struct request), | |
7462 | + (unsigned int) sizeof(struct bio), toi_bio_memory_needed()); | |
2380c486 | 7463 | + |
7e46296a AM |
7464 | +#ifdef MEASURE_MUTEX_CONTENTION |
7465 | + { | |
7466 | + int i; | |
2380c486 | 7467 | + |
7e46296a AM |
7468 | + len += scnprintf(buffer + len, size - len, |
7469 | + " Mutex contention while reading:\n Contended Free\n"); | |
2380c486 | 7470 | + |
7e46296a AM |
7471 | + for_each_online_cpu(i) |
7472 | + len += scnprintf(buffer + len, size - len, | |
7473 | + " %9lu %9lu\n", | |
7474 | + mutex_times[0][0][i], mutex_times[0][1][i]); | |
2380c486 | 7475 | + |
7e46296a AM |
7476 | + len += scnprintf(buffer + len, size - len, |
7477 | + " Mutex contention while writing:\n Contended Free\n"); | |
2380c486 | 7478 | + |
7e46296a AM |
7479 | + for_each_online_cpu(i) |
7480 | + len += scnprintf(buffer + len, size - len, | |
7481 | + " %9lu %9lu\n", | |
7482 | + mutex_times[1][0][i], mutex_times[1][1][i]); | |
2380c486 | 7483 | + |
7e46296a | 7484 | + } |
2380c486 | 7485 | +#endif |
2380c486 | 7486 | + |
7e46296a AM |
7487 | + return len + scnprintf(buffer + len, size - len, |
7488 | + " Free mem throttle point reached %d.\n", free_mem_throttle); | |
7489 | +} | |
2380c486 | 7490 | + |
7e46296a AM |
7491 | +static int total_header_bytes; |
7492 | +static int unowned; | |
2380c486 | 7493 | + |
7e46296a AM |
7494 | +void debug_broken_header(void) |
7495 | +{ | |
7496 | + printk(KERN_DEBUG "Image header too big for size allocated!\n"); | |
7497 | + print_toi_header_storage_for_modules(); | |
7498 | + printk(KERN_DEBUG "Page flags : %d.\n", toi_pageflags_space_needed()); | |
5dd10c98 | 7499 | + printk(KERN_DEBUG "toi_header : %zu.\n", sizeof(struct toi_header)); |
7e46296a AM |
7500 | + printk(KERN_DEBUG "Total unowned : %d.\n", unowned); |
7501 | + printk(KERN_DEBUG "Total used : %d (%ld pages).\n", total_header_bytes, | |
7502 | + DIV_ROUND_UP(total_header_bytes, PAGE_SIZE)); | |
7503 | + printk(KERN_DEBUG "Space needed now : %ld.\n", | |
7504 | + get_header_storage_needed()); | |
7505 | + dump_block_chains(); | |
7506 | + abort_hibernate(TOI_HEADER_TOO_BIG, "Header reservation too small."); | |
7507 | +} | |
2380c486 | 7508 | + |
7e46296a AM |
7509 | +/** |
7510 | + * toi_rw_init - prepare to read or write a stream in the image | |
7511 | + * @writing: Whether reading or writing. | |
7512 | + * @stream number: Section of the image being processed. | |
2380c486 | 7513 | + * |
7e46296a AM |
7514 | + * Prepare to read or write a section ('stream') in the image. |
7515 | + **/ | |
7516 | +static int toi_rw_init(int writing, int stream_number) | |
7517 | +{ | |
7518 | + if (stream_number) | |
7519 | + toi_extent_state_restore(stream_number); | |
7520 | + else | |
7521 | + toi_extent_state_goto_start(); | |
2380c486 | 7522 | + |
7e46296a AM |
7523 | + if (writing) { |
7524 | + reset_idx = 0; | |
7525 | + if (!current_stream) | |
7526 | + page_idx = 0; | |
7527 | + } else { | |
7528 | + reset_idx = 1; | |
7529 | + } | |
2380c486 | 7530 | + |
7e46296a AM |
7531 | + atomic_set(&toi_io_done, 0); |
7532 | + if (!toi_writer_buffer) | |
7533 | + toi_writer_buffer = (char *) toi_get_zeroed_page(11, | |
7534 | + TOI_ATOMIC_GFP); | |
7535 | + toi_writer_buffer_posn = writing ? 0 : PAGE_SIZE; | |
7536 | + | |
7537 | + current_stream = stream_number; | |
7538 | + | |
7539 | + more_readahead = 1; | |
7540 | + | |
7541 | + return toi_writer_buffer ? 0 : -ENOMEM; | |
7542 | +} | |
2380c486 JR |
7543 | + |
7544 | +/** | |
7e46296a AM |
7545 | + * toi_bio_queue_write - queue a page for writing |
7546 | + * @full_buffer: Pointer to a page to be queued | |
2380c486 | 7547 | + * |
7e46296a AM |
7548 | + * Add a page to the queue to be submitted. If we're the queue flusher, |
7549 | + * we'll do this once we've dropped toi_bio_mutex, so other threads can | |
7550 | + * continue to submit I/O while we're on the slow path doing the actual | |
7551 | + * submission. | |
2380c486 | 7552 | + **/ |
7e46296a | 7553 | +static void toi_bio_queue_write(char **full_buffer) |
2380c486 | 7554 | +{ |
7e46296a AM |
7555 | + struct page *page = virt_to_page(*full_buffer); |
7556 | + unsigned long flags; | |
2380c486 | 7557 | + |
7e46296a AM |
7558 | + *full_buffer = NULL; |
7559 | + page->private = 0; | |
2380c486 | 7560 | + |
7e46296a AM |
7561 | + spin_lock_irqsave(&bio_queue_lock, flags); |
7562 | + if (!bio_queue_head) | |
7563 | + bio_queue_head = page; | |
7564 | + else | |
7565 | + bio_queue_tail->private = (unsigned long) page; | |
2380c486 | 7566 | + |
7e46296a AM |
7567 | + bio_queue_tail = page; |
7568 | + atomic_inc(&toi_bio_queue_size); | |
2380c486 | 7569 | + |
7e46296a AM |
7570 | + spin_unlock_irqrestore(&bio_queue_lock, flags); |
7571 | + wake_up(&toi_io_queue_flusher); | |
7572 | +} | |
2380c486 | 7573 | + |
7e46296a AM |
7574 | +/** |
7575 | + * toi_rw_cleanup - Cleanup after i/o. | |
7576 | + * @writing: Whether we were reading or writing. | |
7577 | + * | |
7578 | + * Flush all I/O and clean everything up after reading or writing a | |
7579 | + * section of the image. | |
7580 | + **/ | |
7581 | +static int toi_rw_cleanup(int writing) | |
7582 | +{ | |
e876a0dd | 7583 | + int i, result = 0; |
2380c486 | 7584 | + |
7e46296a AM |
7585 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_rw_cleanup."); |
7586 | + if (writing) { | |
7e46296a AM |
7587 | + if (toi_writer_buffer_posn && !test_result_state(TOI_ABORTED)) |
7588 | + toi_bio_queue_write(&toi_writer_buffer); | |
2380c486 | 7589 | + |
e876a0dd AM |
7590 | + while (bio_queue_head && !result) |
7591 | + result = toi_bio_queue_flush_pages(0); | |
2380c486 | 7592 | + |
7e46296a AM |
7593 | + if (result) |
7594 | + return result; | |
2380c486 | 7595 | + |
7e46296a AM |
7596 | + if (current_stream == 2) |
7597 | + toi_extent_state_save(1); | |
7598 | + else if (current_stream == 1) | |
7599 | + toi_extent_state_save(3); | |
2380c486 JR |
7600 | + } |
7601 | + | |
7e46296a | 7602 | + result = toi_finish_all_io(); |
2380c486 | 7603 | + |
7e46296a AM |
7604 | + while (readahead_list_head) { |
7605 | + void *next = (void *) readahead_list_head->private; | |
7606 | + toi__free_page(12, readahead_list_head); | |
7607 | + readahead_list_head = next; | |
2380c486 JR |
7608 | + } |
7609 | + | |
7e46296a | 7610 | + readahead_list_tail = NULL; |
2380c486 | 7611 | + |
7e46296a AM |
7612 | + if (!current_stream) |
7613 | + return result; | |
2380c486 | 7614 | + |
7e46296a AM |
7615 | + for (i = 0; i < NUM_REASONS; i++) { |
7616 | + if (!atomic_read(&reasons[i])) | |
7617 | + continue; | |
7618 | + printk(KERN_DEBUG "Waited for i/o due to %s %d times.\n", | |
7619 | + reason_name[i], atomic_read(&reasons[i])); | |
7620 | + atomic_set(&reasons[i], 0); | |
2380c486 JR |
7621 | + } |
7622 | + | |
7e46296a AM |
7623 | + current_stream = 0; |
7624 | + return result; | |
2380c486 | 7625 | +} |
2380c486 | 7626 | + |
7e46296a AM |
7627 | +/** |
7628 | + * toi_start_one_readahead - start one page of readahead | |
7629 | + * @dedicated_thread: Is this a thread dedicated to doing readahead? | |
7630 | + * | |
7631 | + * Start one new page of readahead. If this is being called by a thread | |
7632 | + * whose only just is to submit readahead, don't quit because we failed | |
7633 | + * to allocate a page. | |
7634 | + **/ | |
7635 | +static int toi_start_one_readahead(int dedicated_thread) | |
2380c486 | 7636 | +{ |
7e46296a AM |
7637 | + char *buffer = NULL; |
7638 | + int oom = 0, result; | |
2380c486 | 7639 | + |
7e46296a | 7640 | + result = throttle_if_needed(dedicated_thread ? THROTTLE_WAIT : 0); |
5dd10c98 | 7641 | + if (result) |
7e46296a | 7642 | + return result; |
2380c486 | 7643 | + |
7e46296a | 7644 | + mutex_lock(&toi_bio_readahead_mutex); |
2380c486 | 7645 | + |
7e46296a AM |
7646 | + while (!buffer) { |
7647 | + buffer = (char *) toi_get_zeroed_page(12, | |
7648 | + TOI_ATOMIC_GFP); | |
7649 | + if (!buffer) { | |
7650 | + if (oom && !dedicated_thread) { | |
7651 | + mutex_unlock(&toi_bio_readahead_mutex); | |
7e46296a AM |
7652 | + return -ENOMEM; |
7653 | + } | |
2380c486 | 7654 | + |
7e46296a AM |
7655 | + oom = 1; |
7656 | + set_free_mem_throttle(); | |
7657 | + do_bio_wait(5); | |
7658 | + } | |
7659 | + } | |
2380c486 | 7660 | + |
7e46296a | 7661 | + result = toi_bio_rw_page(READ, virt_to_page(buffer), 1, 0); |
e876a0dd | 7662 | + if (result == -ENOSPC) |
7e46296a AM |
7663 | + toi__free_page(12, virt_to_page(buffer)); |
7664 | + mutex_unlock(&toi_bio_readahead_mutex); | |
5dd10c98 | 7665 | + if (result) { |
e876a0dd | 7666 | + if (result == -ENOSPC) |
5dd10c98 AM |
7667 | + toi_message(TOI_IO, TOI_VERBOSE, 0, |
7668 | + "Last readahead page submitted."); | |
7669 | + else | |
7670 | + printk(KERN_DEBUG "toi_bio_rw_page returned %d.\n", | |
7671 | + result); | |
7672 | + } | |
7e46296a | 7673 | + return result; |
2380c486 JR |
7674 | +} |
7675 | + | |
7e46296a AM |
7676 | +/** |
7677 | + * toi_start_new_readahead - start new readahead | |
7678 | + * @dedicated_thread: Are we dedicated to this task? | |
7679 | + * | |
7680 | + * Start readahead of image pages. | |
7681 | + * | |
7682 | + * We can be called as a thread dedicated to this task (may be helpful on | |
7683 | + * systems with lots of CPUs), in which case we don't exit until there's no | |
7684 | + * more readahead. | |
7685 | + * | |
7686 | + * If this is not called by a dedicated thread, we top up our queue until | |
7687 | + * there's no more readahead to submit, we've submitted the number given | |
7688 | + * in target_outstanding_io or the number in progress exceeds the target | |
7689 | + * outstanding I/O value. | |
7690 | + * | |
7691 | + * No mutex needed because this is only ever called by the first cpu. | |
7692 | + **/ | |
7693 | +static int toi_start_new_readahead(int dedicated_thread) | |
2380c486 | 7694 | +{ |
7e46296a | 7695 | + int last_result, num_submitted = 0; |
2380c486 | 7696 | + |
7e46296a AM |
7697 | + /* Start a new readahead? */ |
7698 | + if (!more_readahead) | |
7699 | + return 0; | |
2380c486 | 7700 | + |
7e46296a AM |
7701 | + do { |
7702 | + last_result = toi_start_one_readahead(dedicated_thread); | |
2380c486 | 7703 | + |
7e46296a | 7704 | + if (last_result) { |
e876a0dd | 7705 | + if (last_result == -ENOMEM || last_result == -ENOSPC) |
7e46296a | 7706 | + return 0; |
2380c486 | 7707 | + |
7e46296a AM |
7708 | + printk(KERN_DEBUG |
7709 | + "Begin read chunk returned %d.\n", | |
7710 | + last_result); | |
7711 | + } else | |
7712 | + num_submitted++; | |
92bca44c | 7713 | + |
7e46296a AM |
7714 | + } while (more_readahead && !last_result && |
7715 | + (dedicated_thread || | |
7716 | + (num_submitted < target_outstanding_io && | |
7717 | + atomic_read(&toi_io_in_progress) < target_outstanding_io))); | |
2380c486 | 7718 | + |
7e46296a AM |
7719 | + return last_result; |
7720 | +} | |
2380c486 | 7721 | + |
7e46296a AM |
7722 | +/** |
7723 | + * bio_io_flusher - start the dedicated I/O flushing routine | |
7724 | + * @writing: Whether we're writing the image. | |
7725 | + **/ | |
7726 | +static int bio_io_flusher(int writing) | |
7727 | +{ | |
7728 | + | |
7729 | + if (writing) | |
7730 | + return toi_bio_queue_flush_pages(1); | |
2380c486 | 7731 | + else |
7e46296a | 7732 | + return toi_start_new_readahead(1); |
2380c486 JR |
7733 | +} |
7734 | + | |
7e46296a AM |
7735 | +/** |
7736 | + * toi_bio_get_next_page_read - read a disk page, perhaps with readahead | |
7737 | + * @no_readahead: Whether we can use readahead | |
7738 | + * | |
7739 | + * Read a page from disk, submitting readahead and cleaning up finished i/o | |
7740 | + * while we wait for the page we're after. | |
7741 | + **/ | |
7742 | +static int toi_bio_get_next_page_read(int no_readahead) | |
2380c486 | 7743 | +{ |
7e46296a AM |
7744 | + unsigned long *virt; |
7745 | + struct page *next; | |
2380c486 | 7746 | + |
7e46296a AM |
7747 | + /* |
7748 | + * When reading the second page of the header, we have to | |
7749 | + * delay submitting the read until after we've gotten the | |
7750 | + * extents out of the first page. | |
7751 | + */ | |
7752 | + if (unlikely(no_readahead && toi_start_one_readahead(0))) { | |
7753 | + printk(KERN_EMERG "No readahead and toi_start_one_readahead " | |
7754 | + "returned non-zero.\n"); | |
7755 | + return -EIO; | |
7756 | + } | |
2380c486 | 7757 | + |
7e46296a AM |
7758 | + if (unlikely(!readahead_list_head)) { |
7759 | + /* | |
7760 | + * If the last page finishes exactly on the page | |
7761 | + * boundary, we will be called one extra time and | |
7762 | + * have no data to return. In this case, we should | |
7763 | + * not BUG(), like we used to! | |
7764 | + */ | |
7765 | + if (!more_readahead) { | |
7766 | + printk(KERN_EMERG "No more readahead.\n"); | |
e876a0dd | 7767 | + return -ENOSPC; |
7e46296a AM |
7768 | + } |
7769 | + if (unlikely(toi_start_one_readahead(0))) { | |
7770 | + printk(KERN_EMERG "No readahead and " | |
7771 | + "toi_start_one_readahead returned non-zero.\n"); | |
7772 | + return -EIO; | |
7773 | + } | |
2380c486 JR |
7774 | + } |
7775 | + | |
7e46296a AM |
7776 | + if (PageLocked(readahead_list_head)) { |
7777 | + waiting_on = readahead_list_head; | |
7778 | + do_bio_wait(0); | |
7779 | + } | |
2380c486 | 7780 | + |
7e46296a AM |
7781 | + virt = page_address(readahead_list_head); |
7782 | + memcpy(toi_writer_buffer, virt, PAGE_SIZE); | |
7783 | + | |
7784 | + next = (struct page *) readahead_list_head->private; | |
7785 | + toi__free_page(12, readahead_list_head); | |
7786 | + readahead_list_head = next; | |
7787 | + return 0; | |
2380c486 | 7788 | +} |
2380c486 | 7789 | + |
7e46296a AM |
7790 | +/** |
7791 | + * toi_bio_queue_flush_pages - flush the queue of pages queued for writing | |
7792 | + * @dedicated_thread: Whether we're a dedicated thread | |
7793 | + * | |
7794 | + * Flush the queue of pages ready to be written to disk. | |
7795 | + * | |
7796 | + * If we're a dedicated thread, stay in here until told to leave, | |
7797 | + * sleeping in wait_event. | |
7798 | + * | |
7799 | + * The first thread is normally the only one to come in here. Another | |
7800 | + * thread can enter this routine too, though, via throttle_if_needed. | |
7801 | + * Since that's the case, we must be careful to only have one thread | |
7802 | + * doing this work at a time. Otherwise we have a race and could save | |
7803 | + * pages out of order. | |
7804 | + * | |
7805 | + * If an error occurs, free all remaining pages without submitting them | |
7806 | + * for I/O. | |
7807 | + **/ | |
2380c486 | 7808 | + |
7e46296a AM |
7809 | +int toi_bio_queue_flush_pages(int dedicated_thread) |
7810 | +{ | |
7811 | + unsigned long flags; | |
7812 | + int result = 0; | |
7813 | + static DEFINE_MUTEX(busy); | |
2380c486 | 7814 | + |
7e46296a AM |
7815 | + if (!mutex_trylock(&busy)) |
7816 | + return 0; | |
2380c486 | 7817 | + |
7e46296a AM |
7818 | +top: |
7819 | + spin_lock_irqsave(&bio_queue_lock, flags); | |
7820 | + while (bio_queue_head) { | |
7821 | + struct page *page = bio_queue_head; | |
7822 | + bio_queue_head = (struct page *) page->private; | |
7823 | + if (bio_queue_tail == page) | |
7824 | + bio_queue_tail = NULL; | |
7825 | + atomic_dec(&toi_bio_queue_size); | |
7826 | + spin_unlock_irqrestore(&bio_queue_lock, flags); | |
e876a0dd AM |
7827 | + |
7828 | + /* Don't generate more error messages if already had one */ | |
7829 | + if (!result) | |
7830 | + result = toi_bio_rw_page(WRITE, page, 0, 11); | |
7e46296a AM |
7831 | + /* |
7832 | + * If writing the page failed, don't drop out. | |
7833 | + * Flush the rest of the queue too. | |
7834 | + */ | |
7835 | + if (result) | |
7836 | + toi__free_page(11 , page); | |
7837 | + spin_lock_irqsave(&bio_queue_lock, flags); | |
7838 | + } | |
7839 | + spin_unlock_irqrestore(&bio_queue_lock, flags); | |
2380c486 | 7840 | + |
7e46296a AM |
7841 | + if (dedicated_thread) { |
7842 | + wait_event(toi_io_queue_flusher, bio_queue_head || | |
7843 | + toi_bio_queue_flusher_should_finish); | |
7844 | + if (likely(!toi_bio_queue_flusher_should_finish)) | |
7845 | + goto top; | |
7846 | + toi_bio_queue_flusher_should_finish = 0; | |
7847 | + } | |
2380c486 | 7848 | + |
7e46296a AM |
7849 | + mutex_unlock(&busy); |
7850 | + return result; | |
7851 | +} | |
2380c486 | 7852 | + |
7e46296a AM |
7853 | +/** |
7854 | + * toi_bio_get_new_page - get a new page for I/O | |
7855 | + * @full_buffer: Pointer to a page to allocate. | |
7856 | + **/ | |
7857 | +static int toi_bio_get_new_page(char **full_buffer) | |
2380c486 | 7858 | +{ |
7e46296a AM |
7859 | + int result = throttle_if_needed(THROTTLE_WAIT); |
7860 | + if (result) | |
7861 | + return result; | |
2380c486 | 7862 | + |
7e46296a AM |
7863 | + while (!*full_buffer) { |
7864 | + *full_buffer = (char *) toi_get_zeroed_page(11, TOI_ATOMIC_GFP); | |
7865 | + if (!*full_buffer) { | |
7866 | + set_free_mem_throttle(); | |
7867 | + do_bio_wait(3); | |
7868 | + } | |
2380c486 JR |
7869 | + } |
7870 | + | |
7e46296a | 7871 | + return 0; |
2380c486 JR |
7872 | +} |
7873 | + | |
7e46296a AM |
7874 | +/** |
7875 | + * toi_rw_buffer - combine smaller buffers into PAGE_SIZE I/O | |
7876 | + * @writing: Bool - whether writing (or reading). | |
7877 | + * @buffer: The start of the buffer to write or fill. | |
7878 | + * @buffer_size: The size of the buffer to write or fill. | |
7879 | + * @no_readahead: Don't try to start readhead (when getting extents). | |
7880 | + **/ | |
7881 | +static int toi_rw_buffer(int writing, char *buffer, int buffer_size, | |
7882 | + int no_readahead) | |
92bca44c | 7883 | +{ |
7e46296a | 7884 | + int bytes_left = buffer_size, result = 0; |
92bca44c | 7885 | + |
7e46296a AM |
7886 | + while (bytes_left) { |
7887 | + char *source_start = buffer + buffer_size - bytes_left; | |
7888 | + char *dest_start = toi_writer_buffer + toi_writer_buffer_posn; | |
7889 | + int capacity = PAGE_SIZE - toi_writer_buffer_posn; | |
7890 | + char *to = writing ? dest_start : source_start; | |
7891 | + char *from = writing ? source_start : dest_start; | |
92bca44c | 7892 | + |
7e46296a AM |
7893 | + if (bytes_left <= capacity) { |
7894 | + memcpy(to, from, bytes_left); | |
7895 | + toi_writer_buffer_posn += bytes_left; | |
7896 | + return 0; | |
7897 | + } | |
2380c486 | 7898 | + |
7e46296a AM |
7899 | + /* Complete this page and start a new one */ |
7900 | + memcpy(to, from, capacity); | |
7901 | + bytes_left -= capacity; | |
2380c486 | 7902 | + |
7e46296a AM |
7903 | + if (!writing) { |
7904 | + /* | |
7905 | + * Perform actual I/O: | |
7906 | + * read readahead_list_head into toi_writer_buffer | |
7907 | + */ | |
7908 | + int result = toi_bio_get_next_page_read(no_readahead); | |
7909 | + if (result) { | |
7910 | + printk("toi_bio_get_next_page_read " | |
7911 | + "returned %d.\n", result); | |
7912 | + return result; | |
7913 | + } | |
7914 | + } else { | |
7915 | + toi_bio_queue_write(&toi_writer_buffer); | |
7916 | + result = toi_bio_get_new_page(&toi_writer_buffer); | |
7917 | + if (result) { | |
7918 | + printk(KERN_ERR "toi_bio_get_new_page returned " | |
7919 | + "%d.\n", result); | |
7920 | + return result; | |
7921 | + } | |
7922 | + } | |
7923 | + | |
7924 | + toi_writer_buffer_posn = 0; | |
7925 | + toi_cond_pause(0, NULL); | |
7926 | + } | |
7927 | + | |
7928 | + return 0; | |
2380c486 JR |
7929 | +} |
7930 | + | |
7e46296a AM |
7931 | +/** |
7932 | + * toi_bio_read_page - read a page of the image | |
7933 | + * @pfn: The pfn where the data belongs. | |
7934 | + * @buffer_page: The page containing the (possibly compressed) data. | |
7935 | + * @buf_size: The number of bytes on @buffer_page used (PAGE_SIZE). | |
2380c486 | 7936 | + * |
7e46296a AM |
7937 | + * Read a (possibly compressed) page from the image, into buffer_page, |
7938 | + * returning its pfn and the buffer size. | |
7939 | + **/ | |
7940 | +static int toi_bio_read_page(unsigned long *pfn, struct page *buffer_page, | |
7941 | + unsigned int *buf_size) | |
7942 | +{ | |
7943 | + int result = 0; | |
7944 | + int this_idx; | |
7945 | + char *buffer_virt = kmap(buffer_page); | |
2380c486 | 7946 | + |
7e46296a AM |
7947 | + /* |
7948 | + * Only call start_new_readahead if we don't have a dedicated thread | |
7949 | + * and we're the queue flusher. | |
7950 | + */ | |
7951 | + if (current == toi_queue_flusher && more_readahead) { | |
7952 | + int result2 = toi_start_new_readahead(0); | |
7953 | + if (result2) { | |
7954 | + printk(KERN_DEBUG "Queue flusher and " | |
7955 | + "toi_start_one_readahead returned non-zero.\n"); | |
7956 | + result = -EIO; | |
7957 | + goto out; | |
7958 | + } | |
7959 | + } | |
2380c486 | 7960 | + |
7e46296a | 7961 | + my_mutex_lock(0, &toi_bio_mutex); |
2380c486 | 7962 | + |
7e46296a AM |
7963 | + /* |
7964 | + * Structure in the image: | |
7965 | + * [destination pfn|page size|page data] | |
7966 | + * buf_size is PAGE_SIZE | |
7967 | + */ | |
7968 | + if (toi_rw_buffer(READ, (char *) &this_idx, sizeof(int), 0) || | |
7969 | + toi_rw_buffer(READ, (char *) pfn, sizeof(unsigned long), 0) || | |
7970 | + toi_rw_buffer(READ, (char *) buf_size, sizeof(int), 0) || | |
7971 | + toi_rw_buffer(READ, buffer_virt, *buf_size, 0)) { | |
7972 | + abort_hibernate(TOI_FAILED_IO, "Read of data failed."); | |
7973 | + result = 1; | |
7974 | + } | |
2380c486 | 7975 | + |
7e46296a AM |
7976 | + if (reset_idx) { |
7977 | + page_idx = this_idx; | |
7978 | + reset_idx = 0; | |
7979 | + } else { | |
7980 | + page_idx++; | |
7981 | + if (page_idx != this_idx) | |
7982 | + printk(KERN_ERR "Got page index %d, expected %d.\n", | |
7983 | + this_idx, page_idx); | |
7984 | + } | |
2380c486 | 7985 | + |
7e46296a AM |
7986 | + my_mutex_unlock(0, &toi_bio_mutex); |
7987 | +out: | |
7988 | + kunmap(buffer_page); | |
7989 | + return result; | |
7990 | +} | |
2380c486 | 7991 | + |
7e46296a AM |
7992 | +/** |
7993 | + * toi_bio_write_page - write a page of the image | |
7994 | + * @pfn: The pfn where the data belongs. | |
7995 | + * @buffer_page: The page containing the (possibly compressed) data. | |
7996 | + * @buf_size: The number of bytes on @buffer_page used. | |
2380c486 | 7997 | + * |
7e46296a AM |
7998 | + * Write a (possibly compressed) page to the image from the buffer, together |
7999 | + * with it's index and buffer size. | |
8000 | + **/ | |
8001 | +static int toi_bio_write_page(unsigned long pfn, struct page *buffer_page, | |
8002 | + unsigned int buf_size) | |
8003 | +{ | |
8004 | + char *buffer_virt; | |
8005 | + int result = 0, result2 = 0; | |
2380c486 | 8006 | + |
7e46296a AM |
8007 | + if (unlikely(test_action_state(TOI_TEST_FILTER_SPEED))) |
8008 | + return 0; | |
2380c486 | 8009 | + |
7e46296a | 8010 | + my_mutex_lock(1, &toi_bio_mutex); |
2380c486 | 8011 | + |
7e46296a AM |
8012 | + if (test_result_state(TOI_ABORTED)) { |
8013 | + my_mutex_unlock(1, &toi_bio_mutex); | |
8014 | + return -EIO; | |
8015 | + } | |
2380c486 | 8016 | + |
7e46296a AM |
8017 | + buffer_virt = kmap(buffer_page); |
8018 | + page_idx++; | |
2380c486 | 8019 | + |
7e46296a AM |
8020 | + /* |
8021 | + * Structure in the image: | |
8022 | + * [destination pfn|page size|page data] | |
8023 | + * buf_size is PAGE_SIZE | |
8024 | + */ | |
8025 | + if (toi_rw_buffer(WRITE, (char *) &page_idx, sizeof(int), 0) || | |
8026 | + toi_rw_buffer(WRITE, (char *) &pfn, sizeof(unsigned long), 0) || | |
8027 | + toi_rw_buffer(WRITE, (char *) &buf_size, sizeof(int), 0) || | |
8028 | + toi_rw_buffer(WRITE, buffer_virt, buf_size, 0)) { | |
8029 | + printk(KERN_DEBUG "toi_rw_buffer returned non-zero to " | |
8030 | + "toi_bio_write_page.\n"); | |
8031 | + result = -EIO; | |
8032 | + } | |
2380c486 | 8033 | + |
7e46296a AM |
8034 | + kunmap(buffer_page); |
8035 | + my_mutex_unlock(1, &toi_bio_mutex); | |
2380c486 | 8036 | + |
7e46296a AM |
8037 | + if (current == toi_queue_flusher) |
8038 | + result2 = toi_bio_queue_flush_pages(0); | |
2380c486 | 8039 | + |
7e46296a | 8040 | + return result ? result : result2; |
2380c486 JR |
8041 | +} |
8042 | + | |
7e46296a AM |
8043 | +/** |
8044 | + * _toi_rw_header_chunk - read or write a portion of the image header | |
8045 | + * @writing: Whether reading or writing. | |
8046 | + * @owner: The module for which we're writing. | |
8047 | + * Used for confirming that modules | |
8048 | + * don't use more header space than they asked for. | |
8049 | + * @buffer: Address of the data to write. | |
8050 | + * @buffer_size: Size of the data buffer. | |
8051 | + * @no_readahead: Don't try to start readhead (when getting extents). | |
2380c486 | 8052 | + * |
7e46296a AM |
8053 | + * Perform PAGE_SIZE I/O. Start readahead if needed. |
8054 | + **/ | |
8055 | +static int _toi_rw_header_chunk(int writing, struct toi_module_ops *owner, | |
8056 | + char *buffer, int buffer_size, int no_readahead) | |
2380c486 | 8057 | +{ |
7e46296a | 8058 | + int result = 0; |
2380c486 | 8059 | + |
7e46296a AM |
8060 | + if (owner) { |
8061 | + owner->header_used += buffer_size; | |
8062 | + toi_message(TOI_HEADER, TOI_LOW, 1, | |
8063 | + "Header: %s : %d bytes (%d/%d) from offset %d.", | |
8064 | + owner->name, | |
8065 | + buffer_size, owner->header_used, | |
8066 | + owner->header_requested, | |
8067 | + toi_writer_buffer_posn); | |
8068 | + if (owner->header_used > owner->header_requested && writing) { | |
8069 | + printk(KERN_EMERG "TuxOnIce module %s is using more " | |
8070 | + "header space (%u) than it requested (%u).\n", | |
8071 | + owner->name, | |
8072 | + owner->header_used, | |
8073 | + owner->header_requested); | |
8074 | + return buffer_size; | |
2380c486 | 8075 | + } |
7e46296a AM |
8076 | + } else { |
8077 | + unowned += buffer_size; | |
8078 | + toi_message(TOI_HEADER, TOI_LOW, 1, | |
8079 | + "Header: (No owner): %d bytes (%d total so far) from " | |
8080 | + "offset %d.", buffer_size, unowned, | |
8081 | + toi_writer_buffer_posn); | |
2380c486 | 8082 | + } |
2380c486 | 8083 | + |
7e46296a AM |
8084 | + if (!writing && !no_readahead && more_readahead) { |
8085 | + result = toi_start_new_readahead(0); | |
8086 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Start new readahead " | |
8087 | + "returned %d.", result); | |
2380c486 JR |
8088 | + } |
8089 | + | |
7e46296a AM |
8090 | + if (!result) { |
8091 | + result = toi_rw_buffer(writing, buffer, buffer_size, | |
8092 | + no_readahead); | |
8093 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "rw_buffer returned " | |
8094 | + "%d.", result); | |
2380c486 | 8095 | + } |
2380c486 | 8096 | + |
7e46296a AM |
8097 | + total_header_bytes += buffer_size; |
8098 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "_toi_rw_header_chunk returning " | |
8099 | + "%d.", result); | |
8100 | + return result; | |
8101 | +} | |
2380c486 | 8102 | + |
7e46296a AM |
8103 | +static int toi_rw_header_chunk(int writing, struct toi_module_ops *owner, |
8104 | + char *buffer, int size) | |
2380c486 | 8105 | +{ |
7e46296a | 8106 | + return _toi_rw_header_chunk(writing, owner, buffer, size, 1); |
2380c486 JR |
8107 | +} |
8108 | + | |
7e46296a AM |
8109 | +static int toi_rw_header_chunk_noreadahead(int writing, |
8110 | + struct toi_module_ops *owner, char *buffer, int size) | |
2380c486 | 8111 | +{ |
7e46296a | 8112 | + return _toi_rw_header_chunk(writing, owner, buffer, size, 1); |
2380c486 JR |
8113 | +} |
8114 | + | |
7e46296a AM |
8115 | +/** |
8116 | + * toi_bio_storage_needed - get the amount of storage needed for my fns | |
8117 | + **/ | |
8118 | +static int toi_bio_storage_needed(void) | |
2380c486 | 8119 | +{ |
7e46296a | 8120 | + return sizeof(int) + PAGE_SIZE + toi_bio_devinfo_storage_needed(); |
2380c486 JR |
8121 | +} |
8122 | + | |
7e46296a AM |
8123 | +/** |
8124 | + * toi_bio_save_config_info - save block I/O config to image header | |
8125 | + * @buf: PAGE_SIZE'd buffer into which data should be saved. | |
8126 | + **/ | |
8127 | +static int toi_bio_save_config_info(char *buf) | |
2380c486 | 8128 | +{ |
7e46296a AM |
8129 | + int *ints = (int *) buf; |
8130 | + ints[0] = target_outstanding_io; | |
8131 | + return sizeof(int); | |
2380c486 JR |
8132 | +} |
8133 | + | |
7e46296a AM |
8134 | +/** |
8135 | + * toi_bio_load_config_info - restore block I/O config | |
8136 | + * @buf: Data to be reloaded. | |
8137 | + * @size: Size of the buffer saved. | |
8138 | + **/ | |
8139 | +static void toi_bio_load_config_info(char *buf, int size) | |
2380c486 | 8140 | +{ |
7e46296a AM |
8141 | + int *ints = (int *) buf; |
8142 | + target_outstanding_io = ints[0]; | |
2380c486 JR |
8143 | +} |
8144 | + | |
5dd10c98 | 8145 | +void close_resume_dev_t(int force) |
2380c486 | 8146 | +{ |
5dd10c98 AM |
8147 | + if (!resume_block_device) |
8148 | + return; | |
8149 | + | |
8150 | + if (force) | |
8151 | + atomic_set(&resume_bdev_open_count, 0); | |
8152 | + else | |
8153 | + atomic_dec(&resume_bdev_open_count); | |
8154 | + | |
8155 | + if (!atomic_read(&resume_bdev_open_count)) { | |
7e46296a AM |
8156 | + toi_close_bdev(resume_block_device); |
8157 | + resume_block_device = NULL; | |
2380c486 JR |
8158 | + } |
8159 | +} | |
8160 | + | |
5dd10c98 | 8161 | +int open_resume_dev_t(int force, int quiet) |
2380c486 | 8162 | +{ |
5dd10c98 | 8163 | + if (force) { |
7e46296a | 8164 | + close_resume_dev_t(1); |
5dd10c98 AM |
8165 | + atomic_set(&resume_bdev_open_count, 1); |
8166 | + } else | |
7e46296a | 8167 | + atomic_inc(&resume_bdev_open_count); |
2380c486 | 8168 | + |
7e46296a | 8169 | + if (resume_block_device) |
2380c486 JR |
8170 | + return 0; |
8171 | + | |
7e46296a AM |
8172 | + resume_block_device = toi_open_bdev(NULL, resume_dev_t, 0); |
8173 | + if (IS_ERR(resume_block_device)) { | |
8174 | + if (!quiet) | |
8175 | + toi_early_boot_message(1, TOI_CONTINUE_REQ, | |
8176 | + "Failed to open device %x, where" | |
8177 | + " the header should be found.", | |
8178 | + resume_dev_t); | |
5dd10c98 AM |
8179 | + resume_block_device = NULL; |
8180 | + atomic_set(&resume_bdev_open_count, 0); | |
7e46296a | 8181 | + return 1; |
2380c486 JR |
8182 | + } |
8183 | + | |
2380c486 JR |
8184 | + return 0; |
8185 | +} | |
8186 | + | |
7e46296a AM |
8187 | +/** |
8188 | + * toi_bio_initialise - initialise bio code at start of some action | |
8189 | + * @starting_cycle: Whether starting a hibernation cycle, or just reading or | |
8190 | + * writing a sysfs value. | |
8191 | + **/ | |
8192 | +static int toi_bio_initialise(int starting_cycle) | |
2380c486 | 8193 | +{ |
7e46296a | 8194 | + int result; |
2380c486 | 8195 | + |
7e46296a AM |
8196 | + if (!starting_cycle || !resume_dev_t) |
8197 | + return 0; | |
2380c486 | 8198 | + |
7e46296a AM |
8199 | + max_outstanding_writes = 0; |
8200 | + max_outstanding_reads = 0; | |
8201 | + current_stream = 0; | |
8202 | + toi_queue_flusher = current; | |
8203 | +#ifdef MEASURE_MUTEX_CONTENTION | |
8204 | + { | |
8205 | + int i, j, k; | |
8206 | + | |
8207 | + for (i = 0; i < 2; i++) | |
8208 | + for (j = 0; j < 2; j++) | |
8209 | + for_each_online_cpu(k) | |
8210 | + mutex_times[i][j][k] = 0; | |
8211 | + } | |
8212 | +#endif | |
8213 | + result = open_resume_dev_t(0, 1); | |
8214 | + | |
8215 | + if (result) | |
8216 | + return result; | |
8217 | + | |
8218 | + return get_signature_page(); | |
2380c486 JR |
8219 | +} |
8220 | + | |
7e46296a | 8221 | +static unsigned long raw_to_real(unsigned long raw) |
2380c486 | 8222 | +{ |
7e46296a | 8223 | + unsigned long result; |
2380c486 | 8224 | + |
7e46296a AM |
8225 | + result = raw - (raw * (sizeof(unsigned long) + sizeof(int)) + |
8226 | + (PAGE_SIZE + sizeof(unsigned long) + sizeof(int) + 1)) / | |
8227 | + (PAGE_SIZE + sizeof(unsigned long) + sizeof(int)); | |
2380c486 | 8228 | + |
7e46296a AM |
8229 | + return result < 0 ? 0 : result; |
8230 | +} | |
8231 | + | |
8232 | +static unsigned long toi_bio_storage_available(void) | |
8233 | +{ | |
8234 | + unsigned long sum = 0; | |
8235 | + struct toi_module_ops *this_module; | |
8236 | + | |
8237 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
8238 | + if (!this_module->enabled || | |
8239 | + this_module->type != BIO_ALLOCATOR_MODULE) | |
8240 | + continue; | |
8241 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Seeking storage " | |
8242 | + "available from %s.", this_module->name); | |
8243 | + sum += this_module->bio_allocator_ops->storage_available(); | |
8244 | + } | |
8245 | + | |
8246 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Total storage available is %lu " | |
8247 | + "pages.", sum); | |
8248 | + return raw_to_real(sum - header_pages_reserved); | |
8249 | + | |
8250 | +} | |
8251 | + | |
8252 | +static unsigned long toi_bio_storage_allocated(void) | |
8253 | +{ | |
8254 | + return raw_pages_allocd > header_pages_reserved ? | |
8255 | + raw_to_real(raw_pages_allocd - header_pages_reserved) : 0; | |
2380c486 | 8256 | +} |
7e46296a | 8257 | + |
2380c486 | 8258 | +/* |
7e46296a AM |
8259 | + * If we have read part of the image, we might have filled memory with |
8260 | + * data that should be zeroed out. | |
2380c486 | 8261 | + */ |
7e46296a | 8262 | +static void toi_bio_noresume_reset(void) |
2380c486 | 8263 | +{ |
7e46296a AM |
8264 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_bio_noresume_reset."); |
8265 | + toi_rw_cleanup(READ); | |
8266 | + free_all_bdev_info(); | |
8267 | +} | |
2380c486 | 8268 | + |
7e46296a AM |
8269 | +/** |
8270 | + * toi_bio_cleanup - cleanup after some action | |
8271 | + * @finishing_cycle: Whether completing a cycle. | |
8272 | + **/ | |
8273 | +static void toi_bio_cleanup(int finishing_cycle) | |
8274 | +{ | |
8275 | + if (!finishing_cycle) | |
2380c486 JR |
8276 | + return; |
8277 | + | |
7e46296a AM |
8278 | + if (toi_writer_buffer) { |
8279 | + toi_free_page(11, (unsigned long) toi_writer_buffer); | |
8280 | + toi_writer_buffer = NULL; | |
8281 | + } | |
2380c486 | 8282 | + |
7e46296a | 8283 | + forget_signature_page(); |
2380c486 | 8284 | + |
7e46296a AM |
8285 | + if (header_block_device && toi_sig_data && |
8286 | + toi_sig_data->header_dev_t != resume_dev_t) | |
8287 | + toi_close_bdev(header_block_device); | |
2380c486 | 8288 | + |
7e46296a | 8289 | + header_block_device = NULL; |
5dd10c98 AM |
8290 | + |
8291 | + close_resume_dev_t(0); | |
7e46296a | 8292 | +} |
2380c486 | 8293 | + |
7e46296a AM |
8294 | +static int toi_bio_write_header_init(void) |
8295 | +{ | |
8296 | + int result; | |
2380c486 | 8297 | + |
7e46296a AM |
8298 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_bio_write_header_init"); |
8299 | + toi_rw_init(WRITE, 0); | |
8300 | + toi_writer_buffer_posn = 0; | |
2380c486 | 8301 | + |
7e46296a AM |
8302 | + /* Info needed to bootstrap goes at the start of the header. |
8303 | + * First we save the positions and devinfo, including the number | |
8304 | + * of header pages. Then we save the structs containing data needed | |
8305 | + * for reading the header pages back. | |
8306 | + * Note that even if header pages take more than one page, when we | |
8307 | + * read back the info, we will have restored the location of the | |
8308 | + * next header page by the time we go to use it. | |
8309 | + */ | |
2380c486 | 8310 | + |
7e46296a AM |
8311 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "serialise extent chains."); |
8312 | + result = toi_serialise_extent_chains(); | |
8313 | + | |
8314 | + if (result) | |
8315 | + return result; | |
8316 | + | |
8317 | + /* | |
8318 | + * Signature page hasn't been modified at this point. Write it in | |
8319 | + * the header so we can restore it later. | |
8320 | + */ | |
8321 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "serialise signature page."); | |
8322 | + return toi_rw_header_chunk_noreadahead(WRITE, &toi_blockwriter_ops, | |
8323 | + (char *) toi_cur_sig_page, | |
8324 | + PAGE_SIZE); | |
2380c486 JR |
8325 | +} |
8326 | + | |
7e46296a AM |
8327 | +static int toi_bio_write_header_cleanup(void) |
8328 | +{ | |
8329 | + int result = 0; | |
8330 | + | |
8331 | + if (toi_writer_buffer_posn) | |
8332 | + toi_bio_queue_write(&toi_writer_buffer); | |
8333 | + | |
8334 | + result = toi_finish_all_io(); | |
8335 | + | |
8336 | + unowned = 0; | |
8337 | + total_header_bytes = 0; | |
8338 | + | |
8339 | + /* Set signature to save we have an image */ | |
8340 | + if (!result) | |
8341 | + result = toi_bio_mark_have_image(); | |
8342 | + | |
8343 | + return result; | |
8344 | +} | |
2380c486 JR |
8345 | + |
8346 | +/* | |
7e46296a AM |
8347 | + * toi_bio_read_header_init() |
8348 | + * | |
8349 | + * Description: | |
8350 | + * 1. Attempt to read the device specified with resume=. | |
8351 | + * 2. Check the contents of the swap header for our signature. | |
8352 | + * 3. Warn, ignore, reset and/or continue as appropriate. | |
8353 | + * 4. If continuing, read the toi_swap configuration section | |
8354 | + * of the header and set up block device info so we can read | |
8355 | + * the rest of the header & image. | |
8356 | + * | |
8357 | + * Returns: | |
8358 | + * May not return if user choose to reboot at a warning. | |
8359 | + * -EINVAL if cannot resume at this time. Booting should continue | |
8360 | + * normally. | |
2380c486 | 8361 | + */ |
2380c486 | 8362 | + |
7e46296a | 8363 | +static int toi_bio_read_header_init(void) |
2380c486 | 8364 | +{ |
7e46296a AM |
8365 | + int result = 0; |
8366 | + char buf[32]; | |
8367 | + | |
8368 | + toi_writer_buffer_posn = 0; | |
8369 | + | |
8370 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_bio_read_header_init"); | |
8371 | + | |
8372 | + if (!toi_sig_data) { | |
8373 | + printk(KERN_INFO "toi_bio_read_header_init called when we " | |
8374 | + "haven't verified there is an image!\n"); | |
8375 | + return -EINVAL; | |
8376 | + } | |
8377 | + | |
8378 | + /* | |
8379 | + * If the header is not on the resume_swap_dev_t, get the resume device | |
8380 | + * first. | |
8381 | + */ | |
8382 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Header dev_t is %lx.", | |
8383 | + toi_sig_data->header_dev_t); | |
8384 | + if (toi_sig_data->have_uuid) { | |
cacc47f8 | 8385 | + struct fs_info seek; |
7e46296a | 8386 | + dev_t device; |
cacc47f8 AM |
8387 | + |
8388 | + strncpy((char *) seek.uuid, toi_sig_data->header_uuid, 16); | |
8389 | + seek.dev_t = toi_sig_data->header_dev_t; | |
8390 | + seek.last_mount_size = 0; | |
8391 | + device = blk_lookup_fs_info(&seek); | |
7e46296a | 8392 | + if (device) { |
cacc47f8 | 8393 | + printk("Using dev_t %s, returned by blk_lookup_fs_info.\n", |
7e46296a AM |
8394 | + format_dev_t(buf, device)); |
8395 | + toi_sig_data->header_dev_t = device; | |
8396 | + } | |
8397 | + } | |
8398 | + if (toi_sig_data->header_dev_t != resume_dev_t) { | |
8399 | + header_block_device = toi_open_bdev(NULL, | |
8400 | + toi_sig_data->header_dev_t, 1); | |
8401 | + | |
8402 | + if (IS_ERR(header_block_device)) | |
8403 | + return PTR_ERR(header_block_device); | |
8404 | + } else | |
8405 | + header_block_device = resume_block_device; | |
8406 | + | |
8407 | + if (!toi_writer_buffer) | |
8408 | + toi_writer_buffer = (char *) toi_get_zeroed_page(11, | |
8409 | + TOI_ATOMIC_GFP); | |
8410 | + more_readahead = 1; | |
8411 | + | |
8412 | + /* | |
8413 | + * Read toi_swap configuration. | |
8414 | + * Headerblock size taken into account already. | |
8415 | + */ | |
8416 | + result = toi_bio_ops.bdev_page_io(READ, header_block_device, | |
8417 | + toi_sig_data->first_header_block, | |
8418 | + virt_to_page((unsigned long) toi_writer_buffer)); | |
8419 | + if (result) | |
8420 | + return result; | |
8421 | + | |
8422 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "load extent chains."); | |
8423 | + result = toi_load_extent_chains(); | |
8424 | + | |
8425 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "load original signature page."); | |
8426 | + toi_orig_sig_page = (char *) toi_get_zeroed_page(38, TOI_ATOMIC_GFP); | |
8427 | + if (!toi_orig_sig_page) { | |
8428 | + printk(KERN_ERR "Failed to allocate memory for the current" | |
8429 | + " image signature.\n"); | |
8430 | + return -ENOMEM; | |
8431 | + } | |
8432 | + | |
8433 | + return toi_rw_header_chunk_noreadahead(READ, &toi_blockwriter_ops, | |
8434 | + (char *) toi_orig_sig_page, | |
8435 | + PAGE_SIZE); | |
2380c486 JR |
8436 | +} |
8437 | + | |
7e46296a | 8438 | +static int toi_bio_read_header_cleanup(void) |
2380c486 | 8439 | +{ |
7e46296a AM |
8440 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_bio_read_header_cleanup."); |
8441 | + return toi_rw_cleanup(READ); | |
2380c486 | 8442 | +} |
7e46296a AM |
8443 | + |
8444 | +/* Works only for digits and letters, but small and fast */ | |
8445 | +#define TOLOWER(x) ((x) | 0x20) | |
8446 | + | |
2380c486 | 8447 | +/* |
7e46296a AM |
8448 | + * UUID must be 32 chars long. It may have dashes, but nothing |
8449 | + * else. | |
2380c486 | 8450 | + */ |
7e46296a AM |
8451 | +char *uuid_from_commandline(char *commandline) |
8452 | +{ | |
8453 | + int low = 0; | |
8454 | + char *result = NULL, *output, *ptr; | |
2380c486 | 8455 | + |
7e46296a AM |
8456 | + if (strncmp(commandline, "UUID=", 5)) |
8457 | + return NULL; | |
2380c486 | 8458 | + |
7e46296a AM |
8459 | + result = kzalloc(17, GFP_KERNEL); |
8460 | + if (!result) { | |
8461 | + printk("Failed to kzalloc UUID text memory.\n"); | |
8462 | + return NULL; | |
8463 | + } | |
8464 | + | |
8465 | + ptr = commandline + 5; | |
8466 | + output = result; | |
8467 | + | |
8468 | + while (*ptr && (output - result) < 16) { | |
8469 | + if (isxdigit(*ptr)) { | |
8470 | + int value = isdigit(*ptr) ? *ptr - '0' : | |
8471 | + TOLOWER(*ptr) - 'a' + 10; | |
8472 | + if (low) { | |
8473 | + *output += value; | |
8474 | + output++; | |
8475 | + } else { | |
8476 | + *output = value << 4; | |
8477 | + } | |
8478 | + low = !low; | |
8479 | + } else if (*ptr != '-') | |
8480 | + break; | |
8481 | + ptr++; | |
8482 | + } | |
8483 | + | |
8484 | + if ((output - result) < 16 || *ptr) { | |
8485 | + printk(KERN_DEBUG "Found resume=UUID=, but the value looks " | |
8486 | + "invalid.\n"); | |
8487 | + kfree(result); | |
8488 | + result = NULL; | |
8489 | + } | |
8490 | + | |
8491 | + return result; | |
8492 | +} | |
8493 | + | |
5dd10c98 AM |
8494 | +#define retry_if_fails(command) \ |
8495 | +do { \ | |
8496 | + command; \ | |
8497 | + if (!resume_dev_t && !waited_for_device_probe) { \ | |
8498 | + wait_for_device_probe(); \ | |
8499 | + scsi_complete_async_scans(); \ | |
8500 | + command; \ | |
8501 | + waited_for_device_probe = 1; \ | |
8502 | + } \ | |
8503 | +} while(0) | |
8504 | + | |
7e46296a AM |
8505 | +/** |
8506 | + * try_to_open_resume_device: Try to parse and open resume= | |
2380c486 | 8507 | + * |
7e46296a AM |
8508 | + * Any "swap:" has been stripped away and we just have the path to deal with. |
8509 | + * We attempt to do name_to_dev_t, open and stat the file. Having opened the | |
8510 | + * file, get the struct block_device * to match. | |
8511 | + */ | |
8512 | +static int try_to_open_resume_device(char *commandline, int quiet) | |
8513 | +{ | |
8514 | + struct kstat stat; | |
8515 | + int error = 0; | |
8516 | + char *uuid = uuid_from_commandline(commandline); | |
5dd10c98 | 8517 | + int waited_for_device_probe = 0; |
7e46296a AM |
8518 | + |
8519 | + resume_dev_t = MKDEV(0, 0); | |
8520 | + | |
5dd10c98 AM |
8521 | + if (!strlen(commandline)) |
8522 | + retry_if_fails(toi_bio_scan_for_image(quiet)); | |
8523 | + | |
7e46296a | 8524 | + if (uuid) { |
cacc47f8 AM |
8525 | + struct fs_info seek; |
8526 | + strncpy((char *) &seek.uuid, uuid, 16); | |
8527 | + seek.dev_t = resume_dev_t; | |
8528 | + seek.last_mount_size = 0; | |
8529 | + retry_if_fails(resume_dev_t = blk_lookup_fs_info(&seek)); | |
7e46296a AM |
8530 | + kfree(uuid); |
8531 | + } | |
8532 | + | |
8533 | + if (!resume_dev_t) | |
5dd10c98 | 8534 | + retry_if_fails(resume_dev_t = name_to_dev_t(commandline)); |
7e46296a AM |
8535 | + |
8536 | + if (!resume_dev_t) { | |
8537 | + struct file *file = filp_open(commandline, | |
8538 | + O_RDONLY|O_LARGEFILE, 0); | |
8539 | + | |
8540 | + if (!IS_ERR(file) && file) { | |
8541 | + vfs_getattr(file->f_vfsmnt, file->f_dentry, &stat); | |
8542 | + filp_close(file, NULL); | |
8543 | + } else | |
8544 | + error = vfs_stat(commandline, &stat); | |
8545 | + if (!error) | |
8546 | + resume_dev_t = stat.rdev; | |
8547 | + } | |
8548 | + | |
8549 | + if (!resume_dev_t) { | |
8550 | + if (quiet) | |
8551 | + return 1; | |
8552 | + | |
8553 | + if (test_toi_state(TOI_TRYING_TO_RESUME)) | |
8554 | + toi_early_boot_message(1, toi_translate_err_default, | |
8555 | + "Failed to translate \"%s\" into a device id.\n", | |
8556 | + commandline); | |
8557 | + else | |
8558 | + printk("TuxOnIce: Can't translate \"%s\" into a device " | |
8559 | + "id yet.\n", commandline); | |
8560 | + return 1; | |
8561 | + } | |
8562 | + | |
8563 | + return open_resume_dev_t(1, quiet); | |
8564 | +} | |
8565 | + | |
8566 | +/* | |
8567 | + * Parse Image Location | |
2380c486 | 8568 | + * |
7e46296a AM |
8569 | + * Attempt to parse a resume= parameter. |
8570 | + * Swap Writer accepts: | |
8571 | + * resume=[swap:|file:]DEVNAME[:FIRSTBLOCK][@BLOCKSIZE] | |
2380c486 | 8572 | + * |
7e46296a AM |
8573 | + * Where: |
8574 | + * DEVNAME is convertable to a dev_t by name_to_dev_t | |
8575 | + * FIRSTBLOCK is the location of the first block in the swap file | |
8576 | + * (specifying for a swap partition is nonsensical but not prohibited). | |
8577 | + * Data is validated by attempting to read a swap header from the | |
8578 | + * location given. Failure will result in toi_swap refusing to | |
8579 | + * save an image, and a reboot with correct parameters will be | |
8580 | + * necessary. | |
8581 | + */ | |
8582 | +static int toi_bio_parse_sig_location(char *commandline, | |
8583 | + int only_allocator, int quiet) | |
8584 | +{ | |
8585 | + char *thischar, *devstart, *colon = NULL; | |
8586 | + int signature_found, result = -EINVAL, temp_result = 0; | |
8587 | + | |
8588 | + if (strncmp(commandline, "swap:", 5) && | |
8589 | + strncmp(commandline, "file:", 5)) { | |
8590 | + /* | |
5dd10c98 AM |
8591 | + * Failing swap:, we'll take a simple resume=/dev/hda2, or a |
8592 | + * blank value (scan) but fall through to other allocators | |
8593 | + * if /dev/ or UUID= isn't matched. | |
7e46296a AM |
8594 | + */ |
8595 | + if (strncmp(commandline, "/dev/", 5) && | |
5dd10c98 AM |
8596 | + strncmp(commandline, "UUID=", 5) && |
8597 | + strlen(commandline)) | |
7e46296a AM |
8598 | + return 1; |
8599 | + } else | |
8600 | + commandline += 5; | |
8601 | + | |
8602 | + devstart = commandline; | |
8603 | + thischar = commandline; | |
8604 | + while ((*thischar != ':') && (*thischar != '@') && | |
8605 | + ((thischar - commandline) < 250) && (*thischar)) | |
8606 | + thischar++; | |
8607 | + | |
8608 | + if (*thischar == ':') { | |
8609 | + colon = thischar; | |
8610 | + *colon = 0; | |
8611 | + thischar++; | |
8612 | + } | |
8613 | + | |
8614 | + while ((thischar - commandline) < 250 && *thischar) | |
8615 | + thischar++; | |
8616 | + | |
8617 | + if (colon) { | |
8618 | + unsigned long block; | |
8619 | + temp_result = strict_strtoul(colon + 1, 0, &block); | |
8620 | + if (!temp_result) | |
8621 | + resume_firstblock = (int) block; | |
8622 | + } else | |
8623 | + resume_firstblock = 0; | |
8624 | + | |
8625 | + clear_toi_state(TOI_CAN_HIBERNATE); | |
8626 | + clear_toi_state(TOI_CAN_RESUME); | |
8627 | + | |
8628 | + if (!temp_result) | |
8629 | + temp_result = try_to_open_resume_device(devstart, quiet); | |
8630 | + | |
8631 | + if (colon) | |
8632 | + *colon = ':'; | |
8633 | + | |
5dd10c98 | 8634 | + /* No error if we only scanned */ |
7e46296a | 8635 | + if (temp_result) |
5dd10c98 | 8636 | + return strlen(commandline) ? -EINVAL : 1; |
7e46296a AM |
8637 | + |
8638 | + signature_found = toi_bio_image_exists(quiet); | |
8639 | + | |
8640 | + if (signature_found != -1) { | |
8641 | + result = 0; | |
8642 | + /* | |
8643 | + * TODO: If only file storage, CAN_HIBERNATE should only be | |
8644 | + * set if file allocator's target is valid. | |
8645 | + */ | |
8646 | + set_toi_state(TOI_CAN_HIBERNATE); | |
8647 | + set_toi_state(TOI_CAN_RESUME); | |
8648 | + } else | |
8649 | + if (!quiet) | |
8650 | + printk(KERN_ERR "TuxOnIce: Block I/O: No " | |
8651 | + "signature found at %s.\n", devstart); | |
8652 | + | |
7e46296a AM |
8653 | + return result; |
8654 | +} | |
8655 | + | |
8656 | +static void toi_bio_release_storage(void) | |
8657 | +{ | |
8658 | + header_pages_reserved = 0; | |
8659 | + raw_pages_allocd = 0; | |
8660 | + | |
8661 | + free_all_bdev_info(); | |
8662 | +} | |
8663 | + | |
8664 | +/* toi_swap_remove_image | |
2380c486 | 8665 | + * |
7e46296a AM |
8666 | + */ |
8667 | +static int toi_bio_remove_image(void) | |
8668 | +{ | |
8669 | + int result; | |
8670 | + | |
8671 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_bio_remove_image."); | |
8672 | + | |
8673 | + result = toi_bio_restore_original_signature(); | |
8674 | + | |
8675 | + /* | |
8676 | + * We don't do a sanity check here: we want to restore the swap | |
8677 | + * whatever version of kernel made the hibernate image. | |
8678 | + * | |
8679 | + * We need to write swap, but swap may not be enabled so | |
8680 | + * we write the device directly | |
8681 | + * | |
8682 | + * If we don't have an current_signature_page, we didn't | |
8683 | + * read an image header, so don't change anything. | |
8684 | + */ | |
8685 | + | |
8686 | + toi_bio_release_storage(); | |
8687 | + | |
8688 | + return result; | |
8689 | +} | |
8690 | + | |
8691 | +struct toi_bio_ops toi_bio_ops = { | |
8692 | + .bdev_page_io = toi_bdev_page_io, | |
8693 | + .register_storage = toi_register_storage_chain, | |
8694 | + .free_storage = toi_bio_release_storage, | |
8695 | +}; | |
8696 | +EXPORT_SYMBOL_GPL(toi_bio_ops); | |
8697 | + | |
8698 | +static struct toi_sysfs_data sysfs_params[] = { | |
8699 | + SYSFS_INT("target_outstanding_io", SYSFS_RW, &target_outstanding_io, | |
8700 | + 0, 16384, 0, NULL), | |
8701 | +}; | |
8702 | + | |
8703 | +struct toi_module_ops toi_blockwriter_ops = { | |
8704 | + .type = WRITER_MODULE, | |
8705 | + .name = "block i/o", | |
8706 | + .directory = "block_io", | |
8707 | + .module = THIS_MODULE, | |
8708 | + .memory_needed = toi_bio_memory_needed, | |
8709 | + .print_debug_info = toi_bio_print_debug_stats, | |
8710 | + .storage_needed = toi_bio_storage_needed, | |
8711 | + .save_config_info = toi_bio_save_config_info, | |
8712 | + .load_config_info = toi_bio_load_config_info, | |
8713 | + .initialise = toi_bio_initialise, | |
8714 | + .cleanup = toi_bio_cleanup, | |
5dd10c98 | 8715 | + .post_atomic_restore = toi_bio_chains_post_atomic, |
7e46296a AM |
8716 | + |
8717 | + .rw_init = toi_rw_init, | |
8718 | + .rw_cleanup = toi_rw_cleanup, | |
8719 | + .read_page = toi_bio_read_page, | |
8720 | + .write_page = toi_bio_write_page, | |
8721 | + .rw_header_chunk = toi_rw_header_chunk, | |
8722 | + .rw_header_chunk_noreadahead = toi_rw_header_chunk_noreadahead, | |
8723 | + .io_flusher = bio_io_flusher, | |
8724 | + .update_throughput_throttle = update_throughput_throttle, | |
8725 | + .finish_all_io = toi_finish_all_io, | |
8726 | + | |
8727 | + .noresume_reset = toi_bio_noresume_reset, | |
8728 | + .storage_available = toi_bio_storage_available, | |
8729 | + .storage_allocated = toi_bio_storage_allocated, | |
8730 | + .reserve_header_space = toi_bio_reserve_header_space, | |
8731 | + .allocate_storage = toi_bio_allocate_storage, | |
8732 | + .image_exists = toi_bio_image_exists, | |
8733 | + .mark_resume_attempted = toi_bio_mark_resume_attempted, | |
8734 | + .write_header_init = toi_bio_write_header_init, | |
8735 | + .write_header_cleanup = toi_bio_write_header_cleanup, | |
8736 | + .read_header_init = toi_bio_read_header_init, | |
8737 | + .read_header_cleanup = toi_bio_read_header_cleanup, | |
5dd10c98 | 8738 | + .get_header_version = toi_bio_get_header_version, |
7e46296a AM |
8739 | + .remove_image = toi_bio_remove_image, |
8740 | + .parse_sig_location = toi_bio_parse_sig_location, | |
8741 | + | |
8742 | + .sysfs_data = sysfs_params, | |
8743 | + .num_sysfs_entries = sizeof(sysfs_params) / | |
8744 | + sizeof(struct toi_sysfs_data), | |
8745 | +}; | |
8746 | + | |
8747 | +/** | |
8748 | + * toi_block_io_load - load time routine for block I/O module | |
2380c486 | 8749 | + * |
7e46296a AM |
8750 | + * Register block i/o ops and sysfs entries. |
8751 | + **/ | |
8752 | +static __init int toi_block_io_load(void) | |
8753 | +{ | |
8754 | + return toi_register_module(&toi_blockwriter_ops); | |
8755 | +} | |
8756 | + | |
8757 | +#ifdef MODULE | |
8758 | +static __exit void toi_block_io_unload(void) | |
8759 | +{ | |
8760 | + toi_unregister_module(&toi_blockwriter_ops); | |
8761 | +} | |
8762 | + | |
8763 | +module_init(toi_block_io_load); | |
8764 | +module_exit(toi_block_io_unload); | |
8765 | +MODULE_LICENSE("GPL"); | |
8766 | +MODULE_AUTHOR("Nigel Cunningham"); | |
8767 | +MODULE_DESCRIPTION("TuxOnIce block io functions"); | |
8768 | +#else | |
8769 | +late_initcall(toi_block_io_load); | |
8770 | +#endif | |
8771 | diff --git a/kernel/power/tuxonice_bio_internal.h b/kernel/power/tuxonice_bio_internal.h | |
8772 | new file mode 100644 | |
5dd10c98 | 8773 | index 0000000..58c2481 |
7e46296a AM |
8774 | --- /dev/null |
8775 | +++ b/kernel/power/tuxonice_bio_internal.h | |
5dd10c98 AM |
8776 | @@ -0,0 +1,86 @@ |
8777 | +/* | |
8778 | + * kernel/power/tuxonice_bio_internal.h | |
8779 | + * | |
8780 | + * Copyright (C) 2009-2010 Nigel Cunningham (nigel at tuxonice net) | |
8781 | + * | |
8782 | + * Distributed under GPLv2. | |
8783 | + * | |
8784 | + * This file contains declarations for functions exported from | |
8785 | + * tuxonice_bio.c, which contains low level io functions. | |
8786 | + */ | |
8787 | + | |
7e46296a AM |
8788 | +/* Extent chains */ |
8789 | +void toi_extent_state_goto_start(void); | |
8790 | +void toi_extent_state_save(int slot); | |
8791 | +int go_next_page(int writing, int section_barrier); | |
8792 | +void toi_extent_state_restore(int slot); | |
8793 | +void free_all_bdev_info(void); | |
8794 | +int devices_of_same_priority(struct toi_bdev_info *this); | |
8795 | +int toi_register_storage_chain(struct toi_bdev_info *new); | |
8796 | +int toi_serialise_extent_chains(void); | |
8797 | +int toi_load_extent_chains(void); | |
8798 | +int toi_bio_rw_page(int writing, struct page *page, int is_readahead, | |
8799 | + int free_group); | |
8800 | +int toi_bio_restore_original_signature(void); | |
8801 | +int toi_bio_devinfo_storage_needed(void); | |
8802 | +unsigned long get_headerblock(void); | |
8803 | +dev_t get_header_dev_t(void); | |
8804 | +struct block_device *get_header_bdev(void); | |
8805 | +int toi_bio_allocate_storage(unsigned long request); | |
8806 | + | |
8807 | +/* Signature functions */ | |
8808 | +#define HaveImage "HaveImage" | |
8809 | +#define NoImage "TuxOnIce" | |
8810 | +#define sig_size (sizeof(HaveImage)) | |
8811 | + | |
8812 | +struct sig_data { | |
8813 | + char sig[sig_size]; | |
8814 | + int have_image; | |
8815 | + int resumed_before; | |
8816 | + | |
8817 | + char have_uuid; | |
8818 | + char header_uuid[17]; | |
8819 | + dev_t header_dev_t; | |
8820 | + unsigned long first_header_block; | |
5dd10c98 AM |
8821 | + |
8822 | + /* Repeat the signature to be sure we have a header version */ | |
8823 | + char sig2[sig_size]; | |
8824 | + int header_version; | |
7e46296a AM |
8825 | +}; |
8826 | + | |
8827 | +void forget_signature_page(void); | |
8828 | +int toi_check_for_signature(void); | |
8829 | +int toi_bio_image_exists(int quiet); | |
8830 | +int get_signature_page(void); | |
8831 | +int toi_bio_mark_resume_attempted(int); | |
8832 | +extern char *toi_cur_sig_page; | |
8833 | +extern char *toi_orig_sig_page; | |
8834 | +int toi_bio_mark_have_image(void); | |
8835 | +extern struct sig_data *toi_sig_data; | |
8836 | +extern dev_t resume_dev_t; | |
8837 | +extern struct block_device *resume_block_device; | |
8838 | +extern struct block_device *header_block_device; | |
8839 | +extern unsigned long resume_firstblock; | |
8840 | + | |
8841 | +struct block_device *open_bdev(dev_t device, int display_errs); | |
8842 | +extern int current_stream; | |
8843 | +extern int more_readahead; | |
8844 | +int toi_do_io(int writing, struct block_device *bdev, long block0, | |
8845 | + struct page *page, int is_readahead, int syncio, int free_group); | |
8846 | +int get_main_pool_phys_params(void); | |
8847 | + | |
8848 | +void toi_close_bdev(struct block_device *bdev); | |
8849 | +struct block_device *toi_open_bdev(char *uuid, dev_t default_device, | |
8850 | + int display_errs); | |
8851 | + | |
8852 | +extern struct toi_module_ops toi_blockwriter_ops; | |
8853 | +void dump_block_chains(void); | |
8854 | +void debug_broken_header(void); | |
8855 | +extern unsigned long raw_pages_allocd, header_pages_reserved; | |
5dd10c98 AM |
8856 | +int toi_bio_chains_debug_info(char *buffer, int size); |
8857 | +void toi_bio_chains_post_atomic(struct toi_boot_kernel_data *bkd); | |
8858 | +int toi_bio_scan_for_image(int quiet); | |
8859 | +int toi_bio_get_header_version(void); | |
8860 | + | |
8861 | +void close_resume_dev_t(int force); | |
8862 | +int open_resume_dev_t(int force, int quiet); | |
7e46296a AM |
8863 | diff --git a/kernel/power/tuxonice_bio_signature.c b/kernel/power/tuxonice_bio_signature.c |
8864 | new file mode 100644 | |
cacc47f8 | 8865 | index 0000000..2ebee7e |
7e46296a AM |
8866 | --- /dev/null |
8867 | +++ b/kernel/power/tuxonice_bio_signature.c | |
de6743ae | 8868 | @@ -0,0 +1,404 @@ |
7e46296a AM |
8869 | +/* |
8870 | + * kernel/power/tuxonice_bio_signature.c | |
2380c486 | 8871 | + * |
5dd10c98 | 8872 | + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 | 8873 | + * |
7e46296a | 8874 | + * Distributed under GPLv2. |
2380c486 | 8875 | + * |
7e46296a AM |
8876 | + */ |
8877 | + | |
cacc47f8 | 8878 | +#include <linux/fs_uuid.h> |
7e46296a AM |
8879 | + |
8880 | +#include "tuxonice.h" | |
8881 | +#include "tuxonice_sysfs.h" | |
8882 | +#include "tuxonice_modules.h" | |
8883 | +#include "tuxonice_prepare_image.h" | |
8884 | +#include "tuxonice_bio.h" | |
8885 | +#include "tuxonice_ui.h" | |
8886 | +#include "tuxonice_alloc.h" | |
8887 | +#include "tuxonice_io.h" | |
8888 | +#include "tuxonice_builtin.h" | |
8889 | +#include "tuxonice_bio_internal.h" | |
8890 | + | |
8891 | +struct sig_data *toi_sig_data; | |
8892 | + | |
8893 | +/* Struct of swap header pages */ | |
8894 | + | |
5dd10c98 AM |
8895 | +struct old_sig_data { |
8896 | + dev_t device; | |
8897 | + unsigned long sector; | |
8898 | + int resume_attempted; | |
8899 | + int orig_sig_type; | |
8900 | +}; | |
8901 | + | |
7e46296a AM |
8902 | +union diskpage { |
8903 | + union swap_header swh; /* swh.magic is the only member used */ | |
8904 | + struct sig_data sig_data; | |
5dd10c98 | 8905 | + struct old_sig_data old_sig_data; |
7e46296a AM |
8906 | +}; |
8907 | + | |
8908 | +union p_diskpage { | |
8909 | + union diskpage *pointer; | |
8910 | + char *ptr; | |
8911 | + unsigned long address; | |
8912 | +}; | |
8913 | + | |
8914 | +char *toi_cur_sig_page; | |
8915 | +char *toi_orig_sig_page; | |
8916 | +int have_image; | |
5dd10c98 | 8917 | +int have_old_image; |
7e46296a AM |
8918 | + |
8919 | +int get_signature_page(void) | |
8920 | +{ | |
8921 | + if (!toi_cur_sig_page) { | |
8922 | + toi_message(TOI_IO, TOI_VERBOSE, 0, | |
8923 | + "Allocating current signature page."); | |
8924 | + toi_cur_sig_page = (char *) toi_get_zeroed_page(38, | |
8925 | + TOI_ATOMIC_GFP); | |
8926 | + if (!toi_cur_sig_page) { | |
8927 | + printk(KERN_ERR "Failed to allocate memory for the " | |
8928 | + "current image signature.\n"); | |
8929 | + return -ENOMEM; | |
8930 | + } | |
8931 | + | |
8932 | + toi_sig_data = (struct sig_data *) toi_cur_sig_page; | |
8933 | + } | |
8934 | + | |
8935 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Reading signature from dev %lx," | |
8936 | + " sector %d.", | |
8937 | + resume_block_device->bd_dev, resume_firstblock); | |
8938 | + | |
8939 | + return toi_bio_ops.bdev_page_io(READ, resume_block_device, | |
8940 | + resume_firstblock, virt_to_page(toi_cur_sig_page)); | |
8941 | +} | |
8942 | + | |
8943 | +void forget_signature_page(void) | |
8944 | +{ | |
8945 | + if (toi_cur_sig_page) { | |
8946 | + toi_sig_data = NULL; | |
8947 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Freeing toi_cur_sig_page" | |
8948 | + " (%p).", toi_cur_sig_page); | |
8949 | + toi_free_page(38, (unsigned long) toi_cur_sig_page); | |
8950 | + toi_cur_sig_page = NULL; | |
8951 | + } | |
8952 | + | |
8953 | + if (toi_orig_sig_page) { | |
8954 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Freeing toi_orig_sig_page" | |
8955 | + " (%p).", toi_orig_sig_page); | |
8956 | + toi_free_page(38, (unsigned long) toi_orig_sig_page); | |
8957 | + toi_orig_sig_page = NULL; | |
8958 | + } | |
8959 | +} | |
8960 | + | |
5dd10c98 AM |
8961 | +/* |
8962 | + * We need to ensure we use the signature page that's currently on disk, | |
8963 | + * so as to not remove the image header. Post-atomic-restore, the orig sig | |
8964 | + * page will be empty, so we can use that as our method of knowing that we | |
8965 | + * need to load the on-disk signature and not use the non-image sig in | |
8966 | + * memory. (We're going to powerdown after writing the change, so it's safe. | |
8967 | + */ | |
7e46296a AM |
8968 | +int toi_bio_mark_resume_attempted(int flag) |
8969 | +{ | |
8970 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Make resume attempted = %d.", | |
8971 | + flag); | |
5dd10c98 AM |
8972 | + if (!toi_orig_sig_page) { |
8973 | + forget_signature_page(); | |
8974 | + get_signature_page(); | |
8975 | + } | |
7e46296a AM |
8976 | + toi_sig_data->resumed_before = flag; |
8977 | + return toi_bio_ops.bdev_page_io(WRITE, resume_block_device, | |
8978 | + resume_firstblock, virt_to_page(toi_cur_sig_page)); | |
8979 | +} | |
8980 | + | |
8981 | +int toi_bio_mark_have_image(void) | |
8982 | +{ | |
5dd10c98 | 8983 | + int result = 0; |
7e46296a | 8984 | + char buf[32]; |
5dd10c98 | 8985 | + struct fs_info *fs_info; |
7e46296a AM |
8986 | + |
8987 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Recording that an image exists."); | |
8988 | + memcpy(toi_sig_data->sig, tuxonice_signature, | |
8989 | + sizeof(tuxonice_signature)); | |
8990 | + toi_sig_data->have_image = 1; | |
8991 | + toi_sig_data->resumed_before = 0; | |
8992 | + toi_sig_data->header_dev_t = get_header_dev_t(); | |
8993 | + toi_sig_data->have_uuid = 0; | |
8994 | + | |
5dd10c98 AM |
8995 | + fs_info = fs_info_from_block_dev(get_header_bdev()); |
8996 | + if (fs_info && !IS_ERR(fs_info)) { | |
8997 | + memcpy(toi_sig_data->header_uuid, &fs_info->uuid, 16); | |
8998 | + free_fs_info(fs_info); | |
8999 | + } else | |
9000 | + result = (int) PTR_ERR(fs_info); | |
9001 | + | |
7e46296a AM |
9002 | + if (!result) { |
9003 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Got uuid for dev_t %s.", | |
9004 | + format_dev_t(buf, get_header_dev_t())); | |
9005 | + toi_sig_data->have_uuid = 1; | |
9006 | + } else | |
9007 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Could not get uuid for " | |
9008 | + "dev_t %s.", | |
9009 | + format_dev_t(buf, get_header_dev_t())); | |
9010 | + | |
9011 | + toi_sig_data->first_header_block = get_headerblock(); | |
9012 | + have_image = 1; | |
9013 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "header dev_t is %x. First block " | |
9014 | + "is %d.", toi_sig_data->header_dev_t, | |
9015 | + toi_sig_data->first_header_block); | |
9016 | + | |
5dd10c98 AM |
9017 | + memcpy(toi_sig_data->sig2, tuxonice_signature, |
9018 | + sizeof(tuxonice_signature)); | |
9019 | + toi_sig_data->header_version = TOI_HEADER_VERSION; | |
9020 | + | |
7e46296a AM |
9021 | + return toi_bio_ops.bdev_page_io(WRITE, resume_block_device, |
9022 | + resume_firstblock, virt_to_page(toi_cur_sig_page)); | |
9023 | +} | |
9024 | + | |
5dd10c98 AM |
9025 | +int remove_old_signature(void) |
9026 | +{ | |
9027 | + union p_diskpage swap_header_page = (union p_diskpage) toi_cur_sig_page; | |
9028 | + char *orig_sig, *no_image_signature_contents; | |
9029 | + char *header_start = (char *) toi_get_zeroed_page(38, TOI_ATOMIC_GFP); | |
9030 | + int result; | |
9031 | + struct block_device *header_bdev; | |
9032 | + struct old_sig_data *old_sig_data = | |
9033 | + &swap_header_page.pointer->old_sig_data; | |
9034 | + | |
9035 | + header_bdev = toi_open_bdev(NULL, old_sig_data->device, 1); | |
9036 | + result = toi_bio_ops.bdev_page_io(READ, header_bdev, | |
9037 | + old_sig_data->sector, virt_to_page(header_start)); | |
9038 | + | |
9039 | + if (result) | |
9040 | + goto out; | |
9041 | + | |
9042 | + /* | |
9043 | + * TODO: Get the original contents of the first bytes of the swap | |
9044 | + * header page. | |
9045 | + */ | |
9046 | + if (!old_sig_data->orig_sig_type) | |
9047 | + orig_sig = "SWAP-SPACE"; | |
9048 | + else | |
9049 | + orig_sig = "SWAPSPACE2"; | |
9050 | + | |
9051 | + memcpy(swap_header_page.pointer->swh.magic.magic, orig_sig, 10); | |
9052 | + memcpy(swap_header_page.ptr, header_start, | |
9053 | + sizeof(no_image_signature_contents)); | |
9054 | + | |
9055 | + result = toi_bio_ops.bdev_page_io(WRITE, resume_block_device, | |
9056 | + resume_firstblock, virt_to_page(swap_header_page.ptr)); | |
9057 | + | |
9058 | +out: | |
9059 | + toi_close_bdev(header_bdev); | |
9060 | + have_old_image = 0; | |
9061 | + toi_free_page(38, (unsigned long) header_start); | |
9062 | + return result; | |
9063 | +} | |
9064 | + | |
7e46296a AM |
9065 | +/* |
9066 | + * toi_bio_restore_original_signature - restore the original signature | |
2380c486 | 9067 | + * |
7e46296a AM |
9068 | + * At boot time (aborting pre atomic-restore), toi_orig_sig_page gets used. |
9069 | + * It will have the original signature page contents, stored in the image | |
9070 | + * header. Post atomic-restore, we use :toi_cur_sig_page, which will contain | |
9071 | + * the contents that were loaded when we started the cycle. | |
9072 | + */ | |
9073 | +int toi_bio_restore_original_signature(void) | |
9074 | +{ | |
9075 | + char *use = toi_orig_sig_page ? toi_orig_sig_page : toi_cur_sig_page; | |
9076 | + | |
5dd10c98 AM |
9077 | + if (have_old_image) |
9078 | + return remove_old_signature(); | |
9079 | + | |
7e46296a AM |
9080 | + if (!use) { |
9081 | + printk("toi_bio_restore_original_signature: No signature " | |
9082 | + "page loaded.\n"); | |
9083 | + return 0; | |
9084 | + } | |
9085 | + | |
9086 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Recording that no image exists."); | |
9087 | + have_image = 0; | |
9088 | + toi_sig_data->have_image = 0; | |
9089 | + return toi_bio_ops.bdev_page_io(WRITE, resume_block_device, | |
9090 | + resume_firstblock, virt_to_page(use)); | |
9091 | +} | |
9092 | + | |
9093 | +/* | |
9094 | + * check_for_signature - See whether we have an image. | |
2380c486 | 9095 | + * |
7e46296a AM |
9096 | + * Returns 0 if no image, 1 if there is one, -1 if indeterminate. |
9097 | + */ | |
9098 | +int toi_check_for_signature(void) | |
9099 | +{ | |
9100 | + union p_diskpage swap_header_page; | |
9101 | + int type; | |
9102 | + const char *normal_sigs[] = {"SWAP-SPACE", "SWAPSPACE2" }; | |
9103 | + const char *swsusp_sigs[] = {"S1SUSP", "S2SUSP", "S1SUSPEND" }; | |
9104 | + char *swap_header; | |
9105 | + | |
9106 | + if (!toi_cur_sig_page) { | |
9107 | + int result = get_signature_page(); | |
9108 | + | |
9109 | + if (result) | |
9110 | + return result; | |
9111 | + } | |
9112 | + | |
9113 | + /* | |
9114 | + * Start by looking for the binary header. | |
9115 | + */ | |
9116 | + if (!memcmp(tuxonice_signature, toi_cur_sig_page, | |
9117 | + sizeof(tuxonice_signature))) { | |
9118 | + have_image = toi_sig_data->have_image; | |
9119 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Have binary signature. " | |
9120 | + "Have image is %d.", have_image); | |
9121 | + if (have_image) | |
9122 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "header dev_t is " | |
9123 | + "%x. First block is %d.", | |
9124 | + toi_sig_data->header_dev_t, | |
9125 | + toi_sig_data->first_header_block); | |
9126 | + return toi_sig_data->have_image; | |
9127 | + } | |
9128 | + | |
9129 | + /* | |
9130 | + * Failing that, try old file allocator headers. | |
9131 | + */ | |
9132 | + | |
9133 | + if (!memcmp(HaveImage, toi_cur_sig_page, strlen(HaveImage))) { | |
9134 | + have_image = 1; | |
9135 | + return 1; | |
9136 | + } | |
9137 | + | |
9138 | + have_image = 0; | |
9139 | + | |
9140 | + if (!memcmp(NoImage, toi_cur_sig_page, strlen(NoImage))) | |
9141 | + return 0; | |
9142 | + | |
9143 | + /* | |
9144 | + * Nope? How about swap? | |
9145 | + */ | |
9146 | + swap_header_page = (union p_diskpage) toi_cur_sig_page; | |
9147 | + swap_header = swap_header_page.pointer->swh.magic.magic; | |
9148 | + | |
9149 | + /* Normal swapspace? */ | |
9150 | + for (type = 0; type < 2; type++) | |
9151 | + if (!memcmp(normal_sigs[type], swap_header, | |
9152 | + strlen(normal_sigs[type]))) | |
9153 | + return 0; | |
9154 | + | |
9155 | + /* Swsusp or uswsusp? */ | |
9156 | + for (type = 0; type < 3; type++) | |
9157 | + if (!memcmp(swsusp_sigs[type], swap_header, | |
9158 | + strlen(swsusp_sigs[type]))) | |
9159 | + return 2; | |
9160 | + | |
5dd10c98 AM |
9161 | + /* Old TuxOnIce version? */ |
9162 | + if (!memcmp(tuxonice_signature, swap_header, | |
9163 | + sizeof(tuxonice_signature) - 1)) { | |
9164 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Found old TuxOnIce " | |
9165 | + "signature."); | |
9166 | + have_old_image = 1; | |
9167 | + return 3; | |
9168 | + } | |
9169 | + | |
7e46296a AM |
9170 | + return -1; |
9171 | +} | |
9172 | + | |
9173 | +/* | |
9174 | + * Image_exists | |
2380c486 | 9175 | + * |
7e46296a | 9176 | + * Returns -1 if don't know, otherwise 0 (no) or 1 (yes). |
2380c486 | 9177 | + */ |
7e46296a AM |
9178 | +int toi_bio_image_exists(int quiet) |
9179 | +{ | |
9180 | + int result; | |
7e46296a | 9181 | + char *msg = NULL; |
2380c486 | 9182 | + |
7e46296a AM |
9183 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_bio_image_exists."); |
9184 | + | |
9185 | + if (!resume_dev_t) { | |
9186 | + if (!quiet) | |
9187 | + printk(KERN_INFO "Not even trying to read header " | |
9188 | + "because resume_dev_t is not set.\n"); | |
9189 | + return -1; | |
9190 | + } | |
9191 | + | |
5dd10c98 AM |
9192 | + if (open_resume_dev_t(0, quiet)) |
9193 | + return -1; | |
7e46296a AM |
9194 | + |
9195 | + result = toi_check_for_signature(); | |
9196 | + | |
9197 | + clear_toi_state(TOI_RESUMED_BEFORE); | |
9198 | + if (toi_sig_data->resumed_before) | |
9199 | + set_toi_state(TOI_RESUMED_BEFORE); | |
9200 | + | |
9201 | + if (quiet || result == -ENOMEM) | |
de6743ae | 9202 | + return result; |
7e46296a AM |
9203 | + |
9204 | + if (result == -1) | |
9205 | + msg = "TuxOnIce: Unable to find a signature." | |
9206 | + " Could you have moved a swap file?\n"; | |
9207 | + else if (!result) | |
9208 | + msg = "TuxOnIce: No image found.\n"; | |
9209 | + else if (result == 1) | |
9210 | + msg = "TuxOnIce: Image found.\n"; | |
9211 | + else if (result == 2) | |
9212 | + msg = "TuxOnIce: uswsusp or swsusp image found.\n"; | |
5dd10c98 AM |
9213 | + else if (result == 3) |
9214 | + msg = "TuxOnIce: Old implementation's signature found.\n"; | |
2380c486 | 9215 | + |
7e46296a AM |
9216 | + printk(KERN_INFO "%s", msg); |
9217 | + | |
7e46296a AM |
9218 | + return result; |
9219 | +} | |
5dd10c98 AM |
9220 | + |
9221 | +int toi_bio_scan_for_image(int quiet) | |
9222 | +{ | |
9223 | + struct block_device *bdev; | |
9224 | + char default_name[255] = ""; | |
9225 | + | |
9226 | + if (!quiet) | |
9227 | + printk(KERN_DEBUG "Scanning swap devices for TuxOnIce " | |
9228 | + "signature...\n"); | |
9229 | + for (bdev = next_bdev_of_type(NULL, "swap"); bdev; | |
9230 | + bdev = next_bdev_of_type(bdev, "swap")) { | |
9231 | + int result; | |
9232 | + char name[255] = ""; | |
9233 | + sprintf(name, "%u:%u", MAJOR(bdev->bd_dev), | |
9234 | + MINOR(bdev->bd_dev)); | |
9235 | + if (!quiet) | |
9236 | + printk(KERN_DEBUG "- Trying %s.\n", name); | |
9237 | + resume_block_device = bdev; | |
9238 | + resume_dev_t = bdev->bd_dev; | |
9239 | + | |
9240 | + result = toi_check_for_signature(); | |
9241 | + | |
9242 | + resume_block_device = NULL; | |
9243 | + resume_dev_t = MKDEV(0, 0); | |
9244 | + | |
9245 | + if (!default_name[0]) | |
9246 | + strcpy(default_name, name); | |
9247 | + | |
9248 | + if (result == 1) { | |
9249 | + /* Got one! */ | |
9250 | + strcpy(resume_file, name); | |
9251 | + next_bdev_of_type(bdev, NULL); | |
9252 | + if (!quiet) | |
9253 | + printk(KERN_DEBUG " ==> Image found on %s.\n", | |
9254 | + resume_file); | |
9255 | + return 1; | |
9256 | + } | |
9257 | + forget_signature_page(); | |
9258 | + } | |
9259 | + | |
9260 | + if (!quiet) | |
9261 | + printk(KERN_DEBUG "TuxOnIce scan: No image found.\n"); | |
9262 | + strcpy(resume_file, default_name); | |
9263 | + return 0; | |
9264 | +} | |
9265 | + | |
9266 | +int toi_bio_get_header_version(void) | |
9267 | +{ | |
9268 | + return (memcmp(toi_sig_data->sig2, tuxonice_signature, | |
9269 | + sizeof(tuxonice_signature))) ? | |
9270 | + 0 : toi_sig_data->header_version; | |
9271 | + | |
9272 | +} | |
7e46296a AM |
9273 | diff --git a/kernel/power/tuxonice_builtin.c b/kernel/power/tuxonice_builtin.c |
9274 | new file mode 100644 | |
cacc47f8 | 9275 | index 0000000..bc967d6 |
7e46296a AM |
9276 | --- /dev/null |
9277 | +++ b/kernel/power/tuxonice_builtin.c | |
cacc47f8 | 9278 | @@ -0,0 +1,372 @@ |
7e46296a | 9279 | +/* |
5dd10c98 | 9280 | + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net) |
7e46296a AM |
9281 | + * |
9282 | + * This file is released under the GPLv2. | |
9283 | + */ | |
9284 | +#include <linux/resume-trace.h> | |
9285 | +#include <linux/kernel.h> | |
9286 | +#include <linux/swap.h> | |
9287 | +#include <linux/syscalls.h> | |
9288 | +#include <linux/bio.h> | |
9289 | +#include <linux/root_dev.h> | |
9290 | +#include <linux/freezer.h> | |
9291 | +#include <linux/reboot.h> | |
9292 | +#include <linux/writeback.h> | |
9293 | +#include <linux/tty.h> | |
9294 | +#include <linux/crypto.h> | |
9295 | +#include <linux/cpu.h> | |
9296 | +#include <linux/ctype.h> | |
9297 | +#include "tuxonice_io.h" | |
2380c486 | 9298 | +#include "tuxonice.h" |
7e46296a AM |
9299 | +#include "tuxonice_extent.h" |
9300 | +#include "tuxonice_netlink.h" | |
9301 | +#include "tuxonice_prepare_image.h" | |
9302 | +#include "tuxonice_ui.h" | |
2380c486 | 9303 | +#include "tuxonice_sysfs.h" |
7e46296a AM |
9304 | +#include "tuxonice_pagedir.h" |
9305 | +#include "tuxonice_modules.h" | |
9306 | +#include "tuxonice_builtin.h" | |
9307 | +#include "tuxonice_power_off.h" | |
2380c486 | 9308 | + |
7e46296a AM |
9309 | +/* |
9310 | + * Highmem related functions (x86 only). | |
9311 | + */ | |
2380c486 | 9312 | + |
7e46296a | 9313 | +#ifdef CONFIG_HIGHMEM |
2380c486 | 9314 | + |
7e46296a AM |
9315 | +/** |
9316 | + * copyback_high: Restore highmem pages. | |
9317 | + * | |
9318 | + * Highmem data and pbe lists are/can be stored in highmem. | |
9319 | + * The format is slightly different to the lowmem pbe lists | |
9320 | + * used for the assembly code: the last pbe in each page is | |
9321 | + * a struct page * instead of struct pbe *, pointing to the | |
9322 | + * next page where pbes are stored (or NULL if happens to be | |
9323 | + * the end of the list). Since we don't want to generate | |
9324 | + * unnecessary deltas against swsusp code, we use a cast | |
9325 | + * instead of a union. | |
9326 | + **/ | |
2380c486 | 9327 | + |
7e46296a | 9328 | +static void copyback_high(void) |
2380c486 | 9329 | +{ |
7e46296a AM |
9330 | + struct page *pbe_page = (struct page *) restore_highmem_pblist; |
9331 | + struct pbe *this_pbe, *first_pbe; | |
9332 | + unsigned long *origpage, *copypage; | |
9333 | + int pbe_index = 1; | |
9334 | + | |
9335 | + if (!pbe_page) | |
9336 | + return; | |
9337 | + | |
9338 | + this_pbe = (struct pbe *) kmap_atomic(pbe_page, KM_BOUNCE_READ); | |
9339 | + first_pbe = this_pbe; | |
9340 | + | |
9341 | + while (this_pbe) { | |
9342 | + int loop = (PAGE_SIZE / sizeof(unsigned long)) - 1; | |
9343 | + | |
9344 | + origpage = kmap_atomic((struct page *) this_pbe->orig_address, | |
9345 | + KM_BIO_DST_IRQ); | |
9346 | + copypage = kmap_atomic((struct page *) this_pbe->address, | |
9347 | + KM_BIO_SRC_IRQ); | |
9348 | + | |
9349 | + while (loop >= 0) { | |
9350 | + *(origpage + loop) = *(copypage + loop); | |
9351 | + loop--; | |
9352 | + } | |
9353 | + | |
9354 | + kunmap_atomic(origpage, KM_BIO_DST_IRQ); | |
9355 | + kunmap_atomic(copypage, KM_BIO_SRC_IRQ); | |
9356 | + | |
9357 | + if (!this_pbe->next) | |
9358 | + break; | |
9359 | + | |
9360 | + if (pbe_index < PBES_PER_PAGE) { | |
9361 | + this_pbe++; | |
9362 | + pbe_index++; | |
9363 | + } else { | |
9364 | + pbe_page = (struct page *) this_pbe->next; | |
9365 | + kunmap_atomic(first_pbe, KM_BOUNCE_READ); | |
9366 | + if (!pbe_page) | |
9367 | + return; | |
9368 | + this_pbe = (struct pbe *) kmap_atomic(pbe_page, | |
9369 | + KM_BOUNCE_READ); | |
9370 | + first_pbe = this_pbe; | |
9371 | + pbe_index = 1; | |
9372 | + } | |
2380c486 | 9373 | + } |
7e46296a | 9374 | + kunmap_atomic(first_pbe, KM_BOUNCE_READ); |
2380c486 JR |
9375 | +} |
9376 | + | |
7e46296a AM |
9377 | +#else /* CONFIG_HIGHMEM */ |
9378 | +static void copyback_high(void) { } | |
9379 | +#endif | |
2380c486 | 9380 | + |
7e46296a AM |
9381 | +char toi_wait_for_keypress_dev_console(int timeout) |
9382 | +{ | |
9383 | + int fd, this_timeout = 255; | |
9384 | + char key = '\0'; | |
9385 | + struct termios t, t_backup; | |
9386 | + | |
9387 | + /* We should be guaranteed /dev/console exists after populate_rootfs() | |
9388 | + * in init/main.c. | |
9389 | + */ | |
9390 | + fd = sys_open("/dev/console", O_RDONLY, 0); | |
9391 | + if (fd < 0) { | |
9392 | + printk(KERN_INFO "Couldn't open /dev/console.\n"); | |
9393 | + return key; | |
9394 | + } | |
9395 | + | |
9396 | + if (sys_ioctl(fd, TCGETS, (long)&t) < 0) | |
9397 | + goto out_close; | |
9398 | + | |
9399 | + memcpy(&t_backup, &t, sizeof(t)); | |
9400 | + | |
9401 | + t.c_lflag &= ~(ISIG|ICANON|ECHO); | |
9402 | + t.c_cc[VMIN] = 0; | |
9403 | + | |
9404 | +new_timeout: | |
9405 | + if (timeout > 0) { | |
9406 | + this_timeout = timeout < 26 ? timeout : 25; | |
9407 | + timeout -= this_timeout; | |
9408 | + this_timeout *= 10; | |
9409 | + } | |
9410 | + | |
9411 | + t.c_cc[VTIME] = this_timeout; | |
9412 | + | |
9413 | + if (sys_ioctl(fd, TCSETS, (long)&t) < 0) | |
9414 | + goto out_restore; | |
9415 | + | |
9416 | + while (1) { | |
9417 | + if (sys_read(fd, &key, 1) <= 0) { | |
9418 | + if (timeout) | |
9419 | + goto new_timeout; | |
9420 | + key = '\0'; | |
9421 | + break; | |
9422 | + } | |
9423 | + key = tolower(key); | |
9424 | + if (test_toi_state(TOI_SANITY_CHECK_PROMPT)) { | |
9425 | + if (key == 'c') { | |
9426 | + set_toi_state(TOI_CONTINUE_REQ); | |
9427 | + break; | |
9428 | + } else if (key == ' ') | |
9429 | + break; | |
9430 | + } else | |
9431 | + break; | |
9432 | + } | |
2380c486 | 9433 | + |
7e46296a AM |
9434 | +out_restore: |
9435 | + sys_ioctl(fd, TCSETS, (long)&t_backup); | |
9436 | +out_close: | |
9437 | + sys_close(fd); | |
2380c486 | 9438 | + |
7e46296a AM |
9439 | + return key; |
9440 | +} | |
9441 | +EXPORT_SYMBOL_GPL(toi_wait_for_keypress_dev_console); | |
9442 | + | |
9443 | +struct toi_boot_kernel_data toi_bkd __nosavedata | |
9444 | + __attribute__((aligned(PAGE_SIZE))) = { | |
9445 | + MY_BOOT_KERNEL_DATA_VERSION, | |
9446 | + 0, | |
9447 | +#ifdef CONFIG_TOI_REPLACE_SWSUSP | |
9448 | + (1 << TOI_REPLACE_SWSUSP) | | |
9449 | +#endif | |
9450 | + (1 << TOI_NO_FLUSHER_THREAD) | | |
9451 | + (1 << TOI_PAGESET2_FULL) | (1 << TOI_LATE_CPU_HOTPLUG), | |
2380c486 | 9452 | +}; |
7e46296a | 9453 | +EXPORT_SYMBOL_GPL(toi_bkd); |
2380c486 | 9454 | + |
5dd10c98 | 9455 | +struct block_device *toi_open_by_devnum(dev_t dev) |
7e46296a AM |
9456 | +{ |
9457 | + struct block_device *bdev = bdget(dev); | |
9458 | + int err = -ENOMEM; | |
9459 | + if (bdev) | |
5dd10c98 | 9460 | + err = blkdev_get(bdev, FMODE_READ | FMODE_NDELAY); |
7e46296a AM |
9461 | + return err ? ERR_PTR(err) : bdev; |
9462 | +} | |
9463 | +EXPORT_SYMBOL_GPL(toi_open_by_devnum); | |
2380c486 | 9464 | + |
5dd10c98 AM |
9465 | +/** |
9466 | + * toi_close_bdev: Close a swap bdev. | |
9467 | + * | |
9468 | + * int: The swap entry number to close. | |
9469 | + */ | |
9470 | +void toi_close_bdev(struct block_device *bdev) | |
9471 | +{ | |
9472 | + blkdev_put(bdev, FMODE_READ | FMODE_NDELAY); | |
9473 | +} | |
9474 | +EXPORT_SYMBOL_GPL(toi_close_bdev); | |
9475 | + | |
7e46296a AM |
9476 | +int toi_wait = CONFIG_TOI_DEFAULT_WAIT; |
9477 | +EXPORT_SYMBOL_GPL(toi_wait); | |
2380c486 | 9478 | + |
7e46296a AM |
9479 | +struct toi_core_fns *toi_core_fns; |
9480 | +EXPORT_SYMBOL_GPL(toi_core_fns); | |
2380c486 | 9481 | + |
7e46296a AM |
9482 | +unsigned long toi_result; |
9483 | +EXPORT_SYMBOL_GPL(toi_result); | |
2380c486 | 9484 | + |
7e46296a AM |
9485 | +struct pagedir pagedir1 = {1}; |
9486 | +EXPORT_SYMBOL_GPL(pagedir1); | |
2380c486 | 9487 | + |
7e46296a AM |
9488 | +unsigned long toi_get_nonconflicting_page(void) |
9489 | +{ | |
9490 | + return toi_core_fns->get_nonconflicting_page(); | |
9491 | +} | |
2380c486 | 9492 | + |
7e46296a AM |
9493 | +int toi_post_context_save(void) |
9494 | +{ | |
9495 | + return toi_core_fns->post_context_save(); | |
9496 | +} | |
2380c486 | 9497 | + |
7e46296a AM |
9498 | +int try_tuxonice_hibernate(void) |
9499 | +{ | |
9500 | + if (!toi_core_fns) | |
9501 | + return -ENODEV; | |
2380c486 | 9502 | + |
7e46296a AM |
9503 | + return toi_core_fns->try_hibernate(); |
9504 | +} | |
2380c486 | 9505 | + |
7e46296a AM |
9506 | +static int num_resume_calls; |
9507 | +#ifdef CONFIG_TOI_IGNORE_LATE_INITCALL | |
9508 | +static int ignore_late_initcall = 1; | |
9509 | +#else | |
9510 | +static int ignore_late_initcall; | |
9511 | +#endif | |
2380c486 | 9512 | + |
7e46296a AM |
9513 | +int toi_translate_err_default = TOI_CONTINUE_REQ; |
9514 | +EXPORT_SYMBOL_GPL(toi_translate_err_default); | |
2380c486 | 9515 | + |
7e46296a | 9516 | +void try_tuxonice_resume(void) |
2380c486 | 9517 | +{ |
7e46296a AM |
9518 | + /* Don't let it wrap around eventually */ |
9519 | + if (num_resume_calls < 2) | |
9520 | + num_resume_calls++; | |
2380c486 | 9521 | + |
7e46296a AM |
9522 | + if (num_resume_calls == 1 && ignore_late_initcall) { |
9523 | + printk(KERN_INFO "TuxOnIce: Ignoring late initcall, as requested.\n"); | |
9524 | + return; | |
9525 | + } | |
2380c486 | 9526 | + |
7e46296a AM |
9527 | + if (toi_core_fns) |
9528 | + toi_core_fns->try_resume(); | |
9529 | + else | |
9530 | + printk(KERN_INFO "TuxOnIce core not loaded yet.\n"); | |
2380c486 JR |
9531 | +} |
9532 | + | |
7e46296a | 9533 | +int toi_lowlevel_builtin(void) |
2380c486 | 9534 | +{ |
7e46296a | 9535 | + int error = 0; |
2380c486 | 9536 | + |
7e46296a AM |
9537 | + save_processor_state(); |
9538 | + error = swsusp_arch_suspend(); | |
9539 | + if (error) | |
9540 | + printk(KERN_ERR "Error %d hibernating\n", error); | |
2380c486 | 9541 | + |
7e46296a AM |
9542 | + /* Restore control flow appears here */ |
9543 | + if (!toi_in_hibernate) { | |
9544 | + copyback_high(); | |
9545 | + set_toi_state(TOI_NOW_RESUMING); | |
2380c486 JR |
9546 | + } |
9547 | + | |
7e46296a AM |
9548 | + restore_processor_state(); |
9549 | + | |
9550 | + return error; | |
2380c486 | 9551 | +} |
7e46296a | 9552 | +EXPORT_SYMBOL_GPL(toi_lowlevel_builtin); |
2380c486 | 9553 | + |
7e46296a AM |
9554 | +unsigned long toi_compress_bytes_in; |
9555 | +EXPORT_SYMBOL_GPL(toi_compress_bytes_in); | |
2380c486 | 9556 | + |
7e46296a AM |
9557 | +unsigned long toi_compress_bytes_out; |
9558 | +EXPORT_SYMBOL_GPL(toi_compress_bytes_out); | |
2380c486 | 9559 | + |
7e46296a AM |
9560 | +unsigned long toi_state = ((1 << TOI_BOOT_TIME) | |
9561 | + (1 << TOI_IGNORE_LOGLEVEL) | | |
9562 | + (1 << TOI_IO_STOPPED)); | |
9563 | +EXPORT_SYMBOL_GPL(toi_state); | |
2380c486 | 9564 | + |
7e46296a AM |
9565 | +/* The number of hibernates we have started (some may have been cancelled) */ |
9566 | +unsigned int nr_hibernates; | |
9567 | +EXPORT_SYMBOL_GPL(nr_hibernates); | |
2380c486 | 9568 | + |
7e46296a AM |
9569 | +int toi_running; |
9570 | +EXPORT_SYMBOL_GPL(toi_running); | |
2380c486 | 9571 | + |
7e46296a AM |
9572 | +__nosavedata int toi_in_hibernate; |
9573 | +EXPORT_SYMBOL_GPL(toi_in_hibernate); | |
2380c486 | 9574 | + |
7e46296a AM |
9575 | +__nosavedata struct pbe *restore_highmem_pblist; |
9576 | +EXPORT_SYMBOL_GPL(restore_highmem_pblist); | |
2380c486 | 9577 | + |
cacc47f8 AM |
9578 | +void toi_read_lock_tasklist(void) |
9579 | +{ | |
9580 | + read_lock(&tasklist_lock); | |
9581 | +} | |
9582 | +EXPORT_SYMBOL_GPL(toi_read_lock_tasklist); | |
9583 | + | |
9584 | +void toi_read_unlock_tasklist(void) | |
9585 | +{ | |
9586 | + read_unlock(&tasklist_lock); | |
9587 | +} | |
9588 | +EXPORT_SYMBOL_GPL(toi_read_unlock_tasklist); | |
9589 | + | |
7e46296a AM |
9590 | +static int __init toi_wait_setup(char *str) |
9591 | +{ | |
9592 | + int value; | |
2380c486 | 9593 | + |
7e46296a AM |
9594 | + if (sscanf(str, "=%d", &value)) { |
9595 | + if (value < -1 || value > 255) | |
9596 | + printk(KERN_INFO "TuxOnIce_wait outside range -1 to " | |
9597 | + "255.\n"); | |
9598 | + else | |
9599 | + toi_wait = value; | |
9600 | + } | |
2380c486 | 9601 | + |
2380c486 JR |
9602 | + return 1; |
9603 | +} | |
9604 | + | |
7e46296a | 9605 | +__setup("toi_wait", toi_wait_setup); |
2380c486 | 9606 | + |
7e46296a AM |
9607 | +static int __init toi_translate_retry_setup(char *str) |
9608 | +{ | |
9609 | + toi_translate_err_default = 0; | |
9610 | + return 1; | |
9611 | +} | |
2380c486 | 9612 | + |
7e46296a | 9613 | +__setup("toi_translate_retry", toi_translate_retry_setup); |
2380c486 | 9614 | + |
7e46296a AM |
9615 | +static int __init toi_debug_setup(char *str) |
9616 | +{ | |
9617 | + toi_bkd.toi_action |= (1 << TOI_LOGALL) | (1 << TOI_PAUSE); | |
9618 | + toi_bkd.toi_debug_state = 255; | |
9619 | + toi_bkd.toi_default_console_level = 7; | |
9620 | + return 1; | |
2380c486 JR |
9621 | +} |
9622 | + | |
7e46296a AM |
9623 | +__setup("toi_debug_setup", toi_debug_setup); |
9624 | + | |
9625 | +static int __init toi_ignore_late_initcall_setup(char *str) | |
2380c486 | 9626 | +{ |
7e46296a | 9627 | + int value; |
2380c486 | 9628 | + |
7e46296a AM |
9629 | + if (sscanf(str, "=%d", &value)) |
9630 | + ignore_late_initcall = value; | |
2380c486 | 9631 | + |
7e46296a AM |
9632 | + return 1; |
9633 | +} | |
9634 | + | |
9635 | +__setup("toi_initramfs_resume_only", toi_ignore_late_initcall_setup); | |
2380c486 | 9636 | + |
7e46296a AM |
9637 | +int toi_force_no_multithreaded; |
9638 | +EXPORT_SYMBOL_GPL(toi_force_no_multithreaded); | |
2380c486 | 9639 | + |
7e46296a AM |
9640 | +static int __init toi_force_no_multithreaded_setup(char *str) |
9641 | +{ | |
9642 | + int value; | |
2380c486 | 9643 | + |
7e46296a AM |
9644 | + if (sscanf(str, "=%d", &value)) |
9645 | + toi_force_no_multithreaded = value; | |
9646 | + | |
9647 | + return 1; | |
9648 | +} | |
2380c486 | 9649 | + |
7e46296a AM |
9650 | +__setup("toi_no_multithreaded", toi_force_no_multithreaded_setup); |
9651 | diff --git a/kernel/power/tuxonice_builtin.h b/kernel/power/tuxonice_builtin.h | |
9652 | new file mode 100644 | |
cacc47f8 | 9653 | index 0000000..ab67d31 |
7e46296a AM |
9654 | --- /dev/null |
9655 | +++ b/kernel/power/tuxonice_builtin.h | |
cacc47f8 | 9656 | @@ -0,0 +1,32 @@ |
2380c486 | 9657 | +/* |
5dd10c98 | 9658 | + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net) |
7e46296a AM |
9659 | + * |
9660 | + * This file is released under the GPLv2. | |
2380c486 | 9661 | + */ |
7e46296a | 9662 | +#include <asm/setup.h> |
2380c486 | 9663 | + |
7e46296a AM |
9664 | +extern struct toi_core_fns *toi_core_fns; |
9665 | +extern unsigned long toi_compress_bytes_in, toi_compress_bytes_out; | |
9666 | +extern unsigned int nr_hibernates; | |
9667 | +extern int toi_in_hibernate; | |
2380c486 | 9668 | + |
7e46296a | 9669 | +extern __nosavedata struct pbe *restore_highmem_pblist; |
2380c486 | 9670 | + |
7e46296a | 9671 | +int toi_lowlevel_builtin(void); |
2380c486 | 9672 | + |
7e46296a AM |
9673 | +#ifdef CONFIG_HIGHMEM |
9674 | +extern __nosavedata struct zone_data *toi_nosave_zone_list; | |
9675 | +extern __nosavedata unsigned long toi_nosave_max_pfn; | |
9676 | +#endif | |
2380c486 | 9677 | + |
7e46296a AM |
9678 | +extern unsigned long toi_get_nonconflicting_page(void); |
9679 | +extern int toi_post_context_save(void); | |
2380c486 | 9680 | + |
7e46296a | 9681 | +extern char toi_wait_for_keypress_dev_console(int timeout); |
5dd10c98 AM |
9682 | +extern struct block_device *toi_open_by_devnum(dev_t dev); |
9683 | +extern void toi_close_bdev(struct block_device *bdev); | |
7e46296a AM |
9684 | +extern int toi_wait; |
9685 | +extern int toi_translate_err_default; | |
9686 | +extern int toi_force_no_multithreaded; | |
cacc47f8 AM |
9687 | +extern void toi_read_lock_tasklist(void); |
9688 | +extern void toi_read_unlock_tasklist(void); | |
7e46296a AM |
9689 | diff --git a/kernel/power/tuxonice_checksum.c b/kernel/power/tuxonice_checksum.c |
9690 | new file mode 100644 | |
5dd10c98 | 9691 | index 0000000..3ec2c76 |
7e46296a AM |
9692 | --- /dev/null |
9693 | +++ b/kernel/power/tuxonice_checksum.c | |
5dd10c98 | 9694 | @@ -0,0 +1,377 @@ |
7e46296a AM |
9695 | +/* |
9696 | + * kernel/power/tuxonice_checksum.c | |
9697 | + * | |
5dd10c98 | 9698 | + * Copyright (C) 2006-2010 Nigel Cunningham (nigel at tuxonice net) |
7e46296a AM |
9699 | + * |
9700 | + * This file is released under the GPLv2. | |
9701 | + * | |
9702 | + * This file contains data checksum routines for TuxOnIce, | |
9703 | + * using cryptoapi. They are used to locate any modifications | |
9704 | + * made to pageset 2 while we're saving it. | |
9705 | + */ | |
2380c486 | 9706 | + |
7e46296a AM |
9707 | +#include <linux/suspend.h> |
9708 | +#include <linux/highmem.h> | |
9709 | +#include <linux/vmalloc.h> | |
9710 | +#include <linux/crypto.h> | |
9711 | +#include <linux/scatterlist.h> | |
2380c486 | 9712 | + |
7e46296a AM |
9713 | +#include "tuxonice.h" |
9714 | +#include "tuxonice_modules.h" | |
9715 | +#include "tuxonice_sysfs.h" | |
9716 | +#include "tuxonice_io.h" | |
9717 | +#include "tuxonice_pageflags.h" | |
9718 | +#include "tuxonice_checksum.h" | |
9719 | +#include "tuxonice_pagedir.h" | |
9720 | +#include "tuxonice_alloc.h" | |
2380c486 | 9721 | + |
7e46296a | 9722 | +static struct toi_module_ops toi_checksum_ops; |
2380c486 | 9723 | + |
7e46296a AM |
9724 | +/* Constant at the mo, but I might allow tuning later */ |
9725 | +static char toi_checksum_name[32] = "md4"; | |
9726 | +/* Bytes per checksum */ | |
9727 | +#define CHECKSUM_SIZE (16) | |
2380c486 | 9728 | + |
7e46296a | 9729 | +#define CHECKSUMS_PER_PAGE ((PAGE_SIZE - sizeof(void *)) / CHECKSUM_SIZE) |
2380c486 | 9730 | + |
7e46296a AM |
9731 | +struct cpu_context { |
9732 | + struct crypto_hash *transform; | |
9733 | + struct hash_desc desc; | |
9734 | + struct scatterlist sg[2]; | |
9735 | + char *buf; | |
9736 | +}; | |
2380c486 | 9737 | + |
7e46296a AM |
9738 | +static DEFINE_PER_CPU(struct cpu_context, contexts); |
9739 | +static int pages_allocated; | |
9740 | +static unsigned long page_list; | |
2380c486 | 9741 | + |
7e46296a | 9742 | +static int toi_num_resaved; |
2380c486 | 9743 | + |
7e46296a AM |
9744 | +static unsigned long this_checksum, next_page; |
9745 | +static int checksum_index; | |
2380c486 | 9746 | + |
7e46296a AM |
9747 | +static inline int checksum_pages_needed(void) |
9748 | +{ | |
9749 | + return DIV_ROUND_UP(pagedir2.size, CHECKSUMS_PER_PAGE); | |
9750 | +} | |
2380c486 | 9751 | + |
7e46296a | 9752 | +/* ---- Local buffer management ---- */ |
2380c486 | 9753 | + |
7e46296a AM |
9754 | +/* |
9755 | + * toi_checksum_cleanup | |
9756 | + * | |
9757 | + * Frees memory allocated for our labours. | |
9758 | + */ | |
9759 | +static void toi_checksum_cleanup(int ending_cycle) | |
9760 | +{ | |
9761 | + int cpu; | |
2380c486 | 9762 | + |
7e46296a AM |
9763 | + if (ending_cycle) { |
9764 | + for_each_online_cpu(cpu) { | |
9765 | + struct cpu_context *this = &per_cpu(contexts, cpu); | |
9766 | + if (this->transform) { | |
9767 | + crypto_free_hash(this->transform); | |
9768 | + this->transform = NULL; | |
9769 | + this->desc.tfm = NULL; | |
9770 | + } | |
2380c486 | 9771 | + |
7e46296a AM |
9772 | + if (this->buf) { |
9773 | + toi_free_page(27, (unsigned long) this->buf); | |
9774 | + this->buf = NULL; | |
9775 | + } | |
2380c486 | 9776 | + } |
7e46296a AM |
9777 | + } |
9778 | +} | |
2380c486 | 9779 | + |
7e46296a AM |
9780 | +/* |
9781 | + * toi_crypto_initialise | |
9782 | + * | |
9783 | + * Prepare to do some work by allocating buffers and transforms. | |
9784 | + * Returns: Int: Zero. Even if we can't set up checksum, we still | |
9785 | + * seek to hibernate. | |
9786 | + */ | |
9787 | +static int toi_checksum_initialise(int starting_cycle) | |
9788 | +{ | |
9789 | + int cpu; | |
2380c486 | 9790 | + |
7e46296a AM |
9791 | + if (!(starting_cycle & SYSFS_HIBERNATE) || !toi_checksum_ops.enabled) |
9792 | + return 0; | |
9793 | + | |
9794 | + if (!*toi_checksum_name) { | |
9795 | + printk(KERN_INFO "TuxOnIce: No checksum algorithm name set.\n"); | |
9796 | + return 1; | |
9797 | + } | |
9798 | + | |
9799 | + for_each_online_cpu(cpu) { | |
9800 | + struct cpu_context *this = &per_cpu(contexts, cpu); | |
9801 | + struct page *page; | |
9802 | + | |
9803 | + this->transform = crypto_alloc_hash(toi_checksum_name, 0, 0); | |
9804 | + if (IS_ERR(this->transform)) { | |
9805 | + printk(KERN_INFO "TuxOnIce: Failed to initialise the " | |
9806 | + "%s checksum algorithm: %ld.\n", | |
9807 | + toi_checksum_name, (long) this->transform); | |
9808 | + this->transform = NULL; | |
9809 | + return 1; | |
2380c486 | 9810 | + } |
2380c486 | 9811 | + |
7e46296a AM |
9812 | + this->desc.tfm = this->transform; |
9813 | + this->desc.flags = 0; | |
2380c486 | 9814 | + |
7e46296a AM |
9815 | + page = toi_alloc_page(27, GFP_KERNEL); |
9816 | + if (!page) | |
9817 | + return 1; | |
9818 | + this->buf = page_address(page); | |
9819 | + sg_init_one(&this->sg[0], this->buf, PAGE_SIZE); | |
9820 | + } | |
2380c486 JR |
9821 | + return 0; |
9822 | +} | |
9823 | + | |
9824 | +/* | |
7e46296a AM |
9825 | + * toi_checksum_print_debug_stats |
9826 | + * @buffer: Pointer to a buffer into which the debug info will be printed. | |
9827 | + * @size: Size of the buffer. | |
9828 | + * | |
9829 | + * Print information to be recorded for debugging purposes into a buffer. | |
9830 | + * Returns: Number of characters written to the buffer. | |
2380c486 | 9831 | + */ |
2380c486 | 9832 | + |
7e46296a AM |
9833 | +static int toi_checksum_print_debug_stats(char *buffer, int size) |
9834 | +{ | |
9835 | + int len; | |
2380c486 | 9836 | + |
7e46296a AM |
9837 | + if (!toi_checksum_ops.enabled) |
9838 | + return scnprintf(buffer, size, | |
9839 | + "- Checksumming disabled.\n"); | |
2380c486 | 9840 | + |
7e46296a AM |
9841 | + len = scnprintf(buffer, size, "- Checksum method is '%s'.\n", |
9842 | + toi_checksum_name); | |
9843 | + len += scnprintf(buffer + len, size - len, | |
9844 | + " %d pages resaved in atomic copy.\n", toi_num_resaved); | |
9845 | + return len; | |
9846 | +} | |
2380c486 | 9847 | + |
7e46296a AM |
9848 | +static int toi_checksum_memory_needed(void) |
9849 | +{ | |
9850 | + return toi_checksum_ops.enabled ? | |
9851 | + checksum_pages_needed() << PAGE_SHIFT : 0; | |
2380c486 JR |
9852 | +} |
9853 | + | |
7e46296a AM |
9854 | +static int toi_checksum_storage_needed(void) |
9855 | +{ | |
9856 | + if (toi_checksum_ops.enabled) | |
9857 | + return strlen(toi_checksum_name) + sizeof(int) + 1; | |
9858 | + else | |
9859 | + return 0; | |
9860 | +} | |
2380c486 | 9861 | + |
7e46296a AM |
9862 | +/* |
9863 | + * toi_checksum_save_config_info | |
9864 | + * @buffer: Pointer to a buffer of size PAGE_SIZE. | |
9865 | + * | |
9866 | + * Save informaton needed when reloading the image at resume time. | |
9867 | + * Returns: Number of bytes used for saving our data. | |
9868 | + */ | |
9869 | +static int toi_checksum_save_config_info(char *buffer) | |
9870 | +{ | |
9871 | + int namelen = strlen(toi_checksum_name) + 1; | |
9872 | + int total_len; | |
2380c486 | 9873 | + |
7e46296a AM |
9874 | + *((unsigned int *) buffer) = namelen; |
9875 | + strncpy(buffer + sizeof(unsigned int), toi_checksum_name, namelen); | |
9876 | + total_len = sizeof(unsigned int) + namelen; | |
9877 | + return total_len; | |
9878 | +} | |
2380c486 | 9879 | + |
7e46296a AM |
9880 | +/* toi_checksum_load_config_info |
9881 | + * @buffer: Pointer to the start of the data. | |
9882 | + * @size: Number of bytes that were saved. | |
9883 | + * | |
9884 | + * Description: Reload information needed for dechecksuming the image at | |
9885 | + * resume time. | |
9886 | + */ | |
9887 | +static void toi_checksum_load_config_info(char *buffer, int size) | |
2380c486 | 9888 | +{ |
7e46296a | 9889 | + int namelen; |
2380c486 | 9890 | + |
7e46296a AM |
9891 | + namelen = *((unsigned int *) (buffer)); |
9892 | + strncpy(toi_checksum_name, buffer + sizeof(unsigned int), | |
9893 | + namelen); | |
9894 | + return; | |
9895 | +} | |
2380c486 | 9896 | + |
7e46296a AM |
9897 | +/* |
9898 | + * Free Checksum Memory | |
9899 | + */ | |
2380c486 | 9900 | + |
7e46296a AM |
9901 | +void free_checksum_pages(void) |
9902 | +{ | |
9903 | + while (pages_allocated) { | |
9904 | + unsigned long next = *((unsigned long *) page_list); | |
9905 | + ClearPageNosave(virt_to_page(page_list)); | |
9906 | + toi_free_page(15, (unsigned long) page_list); | |
9907 | + page_list = next; | |
9908 | + pages_allocated--; | |
2380c486 | 9909 | + } |
2380c486 JR |
9910 | +} |
9911 | + | |
7e46296a AM |
9912 | +/* |
9913 | + * Allocate Checksum Memory | |
9914 | + */ | |
9915 | + | |
9916 | +int allocate_checksum_pages(void) | |
2380c486 | 9917 | +{ |
7e46296a | 9918 | + int pages_needed = checksum_pages_needed(); |
2380c486 | 9919 | + |
7e46296a AM |
9920 | + if (!toi_checksum_ops.enabled) |
9921 | + return 0; | |
9922 | + | |
9923 | + while (pages_allocated < pages_needed) { | |
9924 | + unsigned long *new_page = | |
9925 | + (unsigned long *) toi_get_zeroed_page(15, TOI_ATOMIC_GFP); | |
9926 | + if (!new_page) { | |
9927 | + printk(KERN_ERR "Unable to allocate checksum pages.\n"); | |
9928 | + return -ENOMEM; | |
2380c486 | 9929 | + } |
7e46296a AM |
9930 | + SetPageNosave(virt_to_page(new_page)); |
9931 | + (*new_page) = page_list; | |
9932 | + page_list = (unsigned long) new_page; | |
9933 | + pages_allocated++; | |
2380c486 | 9934 | + } |
2380c486 | 9935 | + |
7e46296a AM |
9936 | + next_page = (unsigned long) page_list; |
9937 | + checksum_index = 0; | |
2380c486 | 9938 | + |
7e46296a | 9939 | + return 0; |
2380c486 JR |
9940 | +} |
9941 | + | |
7e46296a | 9942 | +char *tuxonice_get_next_checksum(void) |
2380c486 | 9943 | +{ |
7e46296a AM |
9944 | + if (!toi_checksum_ops.enabled) |
9945 | + return NULL; | |
2380c486 | 9946 | + |
7e46296a AM |
9947 | + if (checksum_index % CHECKSUMS_PER_PAGE) |
9948 | + this_checksum += CHECKSUM_SIZE; | |
9949 | + else { | |
9950 | + this_checksum = next_page + sizeof(void *); | |
9951 | + next_page = *((unsigned long *) next_page); | |
9952 | + } | |
2380c486 | 9953 | + |
7e46296a AM |
9954 | + checksum_index++; |
9955 | + return (char *) this_checksum; | |
2380c486 JR |
9956 | +} |
9957 | + | |
7e46296a | 9958 | +int tuxonice_calc_checksum(struct page *page, char *checksum_locn) |
2380c486 | 9959 | +{ |
7e46296a AM |
9960 | + char *pa; |
9961 | + int result, cpu = smp_processor_id(); | |
9962 | + struct cpu_context *ctx = &per_cpu(contexts, cpu); | |
2380c486 | 9963 | + |
7e46296a AM |
9964 | + if (!toi_checksum_ops.enabled) |
9965 | + return 0; | |
2380c486 | 9966 | + |
7e46296a AM |
9967 | + pa = kmap(page); |
9968 | + memcpy(ctx->buf, pa, PAGE_SIZE); | |
9969 | + kunmap(page); | |
9970 | + result = crypto_hash_digest(&ctx->desc, ctx->sg, PAGE_SIZE, | |
9971 | + checksum_locn); | |
9972 | + if (result) | |
9973 | + printk(KERN_ERR "TuxOnIce checksumming: crypto_hash_digest " | |
9974 | + "returned %d.\n", result); | |
9975 | + return result; | |
2380c486 | 9976 | +} |
7e46296a AM |
9977 | +/* |
9978 | + * Calculate checksums | |
9979 | + */ | |
2380c486 | 9980 | + |
7e46296a | 9981 | +void check_checksums(void) |
2380c486 | 9982 | +{ |
7e46296a AM |
9983 | + int pfn, index = 0, cpu = smp_processor_id(); |
9984 | + char current_checksum[CHECKSUM_SIZE]; | |
9985 | + struct cpu_context *ctx = &per_cpu(contexts, cpu); | |
2380c486 | 9986 | + |
7e46296a | 9987 | + if (!toi_checksum_ops.enabled) |
2380c486 JR |
9988 | + return; |
9989 | + | |
7e46296a | 9990 | + next_page = (unsigned long) page_list; |
2380c486 | 9991 | + |
7e46296a AM |
9992 | + toi_num_resaved = 0; |
9993 | + this_checksum = 0; | |
2380c486 | 9994 | + |
7e46296a AM |
9995 | + memory_bm_position_reset(pageset2_map); |
9996 | + for (pfn = memory_bm_next_pfn(pageset2_map); pfn != BM_END_OF_MAP; | |
9997 | + pfn = memory_bm_next_pfn(pageset2_map)) { | |
9998 | + int ret; | |
9999 | + char *pa; | |
10000 | + struct page *page = pfn_to_page(pfn); | |
2380c486 | 10001 | + |
7e46296a AM |
10002 | + if (index % CHECKSUMS_PER_PAGE) { |
10003 | + this_checksum += CHECKSUM_SIZE; | |
10004 | + } else { | |
10005 | + this_checksum = next_page + sizeof(void *); | |
10006 | + next_page = *((unsigned long *) next_page); | |
10007 | + } | |
2380c486 | 10008 | + |
7e46296a AM |
10009 | + /* Done when IRQs disabled so must be atomic */ |
10010 | + pa = kmap_atomic(page, KM_USER1); | |
10011 | + memcpy(ctx->buf, pa, PAGE_SIZE); | |
10012 | + kunmap_atomic(pa, KM_USER1); | |
10013 | + ret = crypto_hash_digest(&ctx->desc, ctx->sg, PAGE_SIZE, | |
10014 | + current_checksum); | |
2380c486 | 10015 | + |
7e46296a AM |
10016 | + if (ret) { |
10017 | + printk(KERN_INFO "Digest failed. Returned %d.\n", ret); | |
10018 | + return; | |
10019 | + } | |
2380c486 | 10020 | + |
7e46296a AM |
10021 | + if (memcmp(current_checksum, (char *) this_checksum, |
10022 | + CHECKSUM_SIZE)) { | |
10023 | + SetPageResave(pfn_to_page(pfn)); | |
10024 | + toi_num_resaved++; | |
10025 | + if (test_action_state(TOI_ABORT_ON_RESAVE_NEEDED)) | |
10026 | + set_abort_result(TOI_RESAVE_NEEDED); | |
10027 | + } | |
2380c486 | 10028 | + |
7e46296a AM |
10029 | + index++; |
10030 | + } | |
2380c486 | 10031 | +} |
2380c486 | 10032 | + |
7e46296a AM |
10033 | +static struct toi_sysfs_data sysfs_params[] = { |
10034 | + SYSFS_INT("enabled", SYSFS_RW, &toi_checksum_ops.enabled, 0, 1, 0, | |
10035 | + NULL), | |
10036 | + SYSFS_BIT("abort_if_resave_needed", SYSFS_RW, &toi_bkd.toi_action, | |
10037 | + TOI_ABORT_ON_RESAVE_NEEDED, 0) | |
10038 | +}; | |
10039 | + | |
10040 | +/* | |
10041 | + * Ops structure. | |
2380c486 | 10042 | + */ |
7e46296a AM |
10043 | +static struct toi_module_ops toi_checksum_ops = { |
10044 | + .type = MISC_MODULE, | |
10045 | + .name = "checksumming", | |
10046 | + .directory = "checksum", | |
10047 | + .module = THIS_MODULE, | |
10048 | + .initialise = toi_checksum_initialise, | |
10049 | + .cleanup = toi_checksum_cleanup, | |
10050 | + .print_debug_info = toi_checksum_print_debug_stats, | |
10051 | + .save_config_info = toi_checksum_save_config_info, | |
10052 | + .load_config_info = toi_checksum_load_config_info, | |
10053 | + .memory_needed = toi_checksum_memory_needed, | |
10054 | + .storage_needed = toi_checksum_storage_needed, | |
2380c486 | 10055 | + |
7e46296a AM |
10056 | + .sysfs_data = sysfs_params, |
10057 | + .num_sysfs_entries = sizeof(sysfs_params) / | |
10058 | + sizeof(struct toi_sysfs_data), | |
10059 | +}; | |
2380c486 | 10060 | + |
7e46296a AM |
10061 | +/* ---- Registration ---- */ |
10062 | +int toi_checksum_init(void) | |
2380c486 | 10063 | +{ |
7e46296a AM |
10064 | + int result = toi_register_module(&toi_checksum_ops); |
10065 | + return result; | |
2380c486 JR |
10066 | +} |
10067 | + | |
7e46296a | 10068 | +void toi_checksum_exit(void) |
2380c486 | 10069 | +{ |
7e46296a | 10070 | + toi_unregister_module(&toi_checksum_ops); |
2380c486 | 10071 | +} |
7e46296a AM |
10072 | diff --git a/kernel/power/tuxonice_checksum.h b/kernel/power/tuxonice_checksum.h |
10073 | new file mode 100644 | |
5dd10c98 | 10074 | index 0000000..0f2812e |
7e46296a AM |
10075 | --- /dev/null |
10076 | +++ b/kernel/power/tuxonice_checksum.h | |
5dd10c98 | 10077 | @@ -0,0 +1,31 @@ |
7e46296a AM |
10078 | +/* |
10079 | + * kernel/power/tuxonice_checksum.h | |
2380c486 | 10080 | + * |
5dd10c98 | 10081 | + * Copyright (C) 2006-2010 Nigel Cunningham (nigel at tuxonice net) |
7e46296a AM |
10082 | + * |
10083 | + * This file is released under the GPLv2. | |
10084 | + * | |
10085 | + * This file contains data checksum routines for TuxOnIce, | |
10086 | + * using cryptoapi. They are used to locate any modifications | |
10087 | + * made to pageset 2 while we're saving it. | |
2380c486 | 10088 | + */ |
2380c486 | 10089 | + |
7e46296a AM |
10090 | +#if defined(CONFIG_TOI_CHECKSUM) |
10091 | +extern int toi_checksum_init(void); | |
10092 | +extern void toi_checksum_exit(void); | |
10093 | +void check_checksums(void); | |
10094 | +int allocate_checksum_pages(void); | |
10095 | +void free_checksum_pages(void); | |
10096 | +char *tuxonice_get_next_checksum(void); | |
10097 | +int tuxonice_calc_checksum(struct page *page, char *checksum_locn); | |
10098 | +#else | |
10099 | +static inline int toi_checksum_init(void) { return 0; } | |
10100 | +static inline void toi_checksum_exit(void) { } | |
10101 | +static inline void check_checksums(void) { }; | |
10102 | +static inline int allocate_checksum_pages(void) { return 0; }; | |
10103 | +static inline void free_checksum_pages(void) { }; | |
10104 | +static inline char *tuxonice_get_next_checksum(void) { return NULL; }; | |
10105 | +static inline int tuxonice_calc_checksum(struct page *page, char *checksum_locn) | |
10106 | + { return 0; } | |
10107 | +#endif | |
10108 | + | |
10109 | diff --git a/kernel/power/tuxonice_cluster.c b/kernel/power/tuxonice_cluster.c | |
10110 | new file mode 100644 | |
5dd10c98 | 10111 | index 0000000..0e5a262 |
7e46296a AM |
10112 | --- /dev/null |
10113 | +++ b/kernel/power/tuxonice_cluster.c | |
10114 | @@ -0,0 +1,1069 @@ | |
10115 | +/* | |
10116 | + * kernel/power/tuxonice_cluster.c | |
10117 | + * | |
5dd10c98 | 10118 | + * Copyright (C) 2006-2010 Nigel Cunningham (nigel at tuxonice net) |
7e46296a AM |
10119 | + * |
10120 | + * This file is released under the GPLv2. | |
10121 | + * | |
10122 | + * This file contains routines for cluster hibernation support. | |
10123 | + * | |
10124 | + * Based on ip autoconfiguration code in net/ipv4/ipconfig.c. | |
10125 | + * | |
10126 | + * How does it work? | |
10127 | + * | |
10128 | + * There is no 'master' node that tells everyone else what to do. All nodes | |
10129 | + * send messages to the broadcast address/port, maintain a list of peers | |
10130 | + * and figure out when to progress to the next step in hibernating or resuming. | |
10131 | + * This makes us more fault tolerant when it comes to nodes coming and going | |
10132 | + * (which may be more of an issue if we're hibernating when power supplies | |
10133 | + * are being unreliable). | |
10134 | + * | |
10135 | + * At boot time, we start a ktuxonice thread that handles communication with | |
10136 | + * other nodes. This node maintains a state machine that controls our progress | |
10137 | + * through hibernating and resuming, keeping us in step with other nodes. Nodes | |
10138 | + * are identified by their hw address. | |
10139 | + * | |
10140 | + * On startup, the node sends CLUSTER_PING on the configured interface's | |
10141 | + * broadcast address, port $toi_cluster_port (see below) and begins to listen | |
10142 | + * for other broadcast messages. CLUSTER_PING messages are repeated at | |
10143 | + * intervals of 5 minutes, with a random offset to spread traffic out. | |
10144 | + * | |
10145 | + * A hibernation cycle is initiated from any node via | |
10146 | + * | |
10147 | + * echo > /sys/power/tuxonice/do_hibernate | |
10148 | + * | |
10149 | + * and (possibily) the hibernate script. At each step of the process, the node | |
10150 | + * completes its work, and waits for all other nodes to signal completion of | |
10151 | + * their work (or timeout) before progressing to the next step. | |
10152 | + * | |
10153 | + * Request/state Action before reply Possible reply Next state | |
10154 | + * HIBERNATE capable, pre-script HIBERNATE|ACK NODE_PREP | |
10155 | + * HIBERNATE|NACK INIT_0 | |
10156 | + * | |
10157 | + * PREP prepare_image PREP|ACK IMAGE_WRITE | |
10158 | + * PREP|NACK INIT_0 | |
10159 | + * ABORT RUNNING | |
10160 | + * | |
10161 | + * IO write image IO|ACK power off | |
10162 | + * ABORT POST_RESUME | |
10163 | + * | |
10164 | + * (Boot time) check for image IMAGE|ACK RESUME_PREP | |
10165 | + * (Note 1) | |
10166 | + * IMAGE|NACK (Note 2) | |
10167 | + * | |
10168 | + * PREP prepare read image PREP|ACK IMAGE_READ | |
10169 | + * PREP|NACK (As NACK_IMAGE) | |
10170 | + * | |
10171 | + * IO read image IO|ACK POST_RESUME | |
10172 | + * | |
10173 | + * POST_RESUME thaw, post-script RUNNING | |
10174 | + * | |
10175 | + * INIT_0 init 0 | |
10176 | + * | |
10177 | + * Other messages: | |
10178 | + * | |
10179 | + * - PING: Request for all other live nodes to send a PONG. Used at startup to | |
10180 | + * announce presence, when a node is suspected dead and periodically, in case | |
10181 | + * segments of the network are [un]plugged. | |
10182 | + * | |
10183 | + * - PONG: Response to a PING. | |
10184 | + * | |
10185 | + * - ABORT: Request to cancel writing an image. | |
10186 | + * | |
10187 | + * - BYE: Notification that this node is shutting down. | |
10188 | + * | |
10189 | + * Note 1: Repeated at 3s intervals until we continue to boot/resume, so that | |
10190 | + * nodes which are slower to start up can get state synchronised. If a node | |
10191 | + * starting up sees other nodes sending RESUME_PREP or IMAGE_READ, it may send | |
10192 | + * ACK_IMAGE and they will wait for it to catch up. If it sees ACK_READ, it | |
10193 | + * must invalidate its image (if any) and boot normally. | |
10194 | + * | |
10195 | + * Note 2: May occur when one node lost power or powered off while others | |
10196 | + * hibernated. This node waits for others to complete resuming (ACK_READ) | |
10197 | + * before completing its boot, so that it appears as a fail node restarting. | |
2380c486 | 10198 | + * |
7e46296a AM |
10199 | + * If any node has an image, then it also has a list of nodes that hibernated |
10200 | + * in synchronisation with it. The node will wait for other nodes to appear | |
10201 | + * or timeout before beginning its restoration. | |
10202 | + * | |
10203 | + * If a node has no image, it needs to wait, in case other nodes which do have | |
10204 | + * an image are going to resume, but are taking longer to announce their | |
10205 | + * presence. For this reason, the user can specify a timeout value and a number | |
10206 | + * of nodes detected before we just continue. (We might want to assume in a | |
10207 | + * cluster of, say, 15 nodes, if 8 others have booted without finding an image, | |
10208 | + * the remaining nodes will too. This might help in situations where some nodes | |
10209 | + * are much slower to boot, or more subject to hardware failures or such like). | |
2380c486 | 10210 | + */ |
2380c486 | 10211 | + |
7e46296a AM |
10212 | +#include <linux/suspend.h> |
10213 | +#include <linux/module.h> | |
10214 | +#include <linux/moduleparam.h> | |
10215 | +#include <linux/if.h> | |
10216 | +#include <linux/rtnetlink.h> | |
10217 | +#include <linux/ip.h> | |
10218 | +#include <linux/udp.h> | |
10219 | +#include <linux/in.h> | |
10220 | +#include <linux/if_arp.h> | |
10221 | +#include <linux/kthread.h> | |
10222 | +#include <linux/wait.h> | |
10223 | +#include <linux/netdevice.h> | |
10224 | +#include <net/ip.h> | |
2380c486 | 10225 | + |
7e46296a AM |
10226 | +#include "tuxonice.h" |
10227 | +#include "tuxonice_modules.h" | |
10228 | +#include "tuxonice_sysfs.h" | |
10229 | +#include "tuxonice_alloc.h" | |
10230 | +#include "tuxonice_io.h" | |
2380c486 | 10231 | + |
7e46296a AM |
10232 | +#if 1 |
10233 | +#define PRINTK(a, b...) do { printk(a, ##b); } while (0) | |
10234 | +#else | |
10235 | +#define PRINTK(a, b...) do { } while (0) | |
10236 | +#endif | |
2380c486 | 10237 | + |
7e46296a AM |
10238 | +static int loopback_mode; |
10239 | +static int num_local_nodes = 1; | |
10240 | +#define MAX_LOCAL_NODES 8 | |
10241 | +#define SADDR (loopback_mode ? b->sid : h->saddr) | |
2380c486 | 10242 | + |
7e46296a | 10243 | +#define MYNAME "TuxOnIce Clustering" |
2380c486 | 10244 | + |
7e46296a AM |
10245 | +enum cluster_message { |
10246 | + MSG_ACK = 1, | |
10247 | + MSG_NACK = 2, | |
10248 | + MSG_PING = 4, | |
10249 | + MSG_ABORT = 8, | |
10250 | + MSG_BYE = 16, | |
10251 | + MSG_HIBERNATE = 32, | |
10252 | + MSG_IMAGE = 64, | |
10253 | + MSG_IO = 128, | |
10254 | + MSG_RUNNING = 256 | |
10255 | +}; | |
2380c486 | 10256 | + |
7e46296a AM |
10257 | +static char *str_message(int message) |
10258 | +{ | |
10259 | + switch (message) { | |
10260 | + case 4: | |
10261 | + return "Ping"; | |
10262 | + case 8: | |
10263 | + return "Abort"; | |
10264 | + case 9: | |
10265 | + return "Abort acked"; | |
10266 | + case 10: | |
10267 | + return "Abort nacked"; | |
10268 | + case 16: | |
10269 | + return "Bye"; | |
10270 | + case 17: | |
10271 | + return "Bye acked"; | |
10272 | + case 18: | |
10273 | + return "Bye nacked"; | |
10274 | + case 32: | |
10275 | + return "Hibernate request"; | |
10276 | + case 33: | |
10277 | + return "Hibernate ack"; | |
10278 | + case 34: | |
10279 | + return "Hibernate nack"; | |
10280 | + case 64: | |
10281 | + return "Image exists?"; | |
10282 | + case 65: | |
10283 | + return "Image does exist"; | |
10284 | + case 66: | |
10285 | + return "No image here"; | |
10286 | + case 128: | |
10287 | + return "I/O"; | |
10288 | + case 129: | |
10289 | + return "I/O okay"; | |
10290 | + case 130: | |
10291 | + return "I/O failed"; | |
10292 | + case 256: | |
10293 | + return "Running"; | |
10294 | + default: | |
10295 | + printk(KERN_ERR "Unrecognised message %d.\n", message); | |
10296 | + return "Unrecognised message (see dmesg)"; | |
10297 | + } | |
10298 | +} | |
2380c486 | 10299 | + |
7e46296a AM |
10300 | +#define MSG_ACK_MASK (MSG_ACK | MSG_NACK) |
10301 | +#define MSG_STATE_MASK (~MSG_ACK_MASK) | |
2380c486 | 10302 | + |
7e46296a AM |
10303 | +struct node_info { |
10304 | + struct list_head member_list; | |
10305 | + wait_queue_head_t member_events; | |
10306 | + spinlock_t member_list_lock; | |
10307 | + spinlock_t receive_lock; | |
10308 | + int peer_count, ignored_peer_count; | |
10309 | + struct toi_sysfs_data sysfs_data; | |
10310 | + enum cluster_message current_message; | |
10311 | +}; | |
2380c486 | 10312 | + |
7e46296a | 10313 | +struct node_info node_array[MAX_LOCAL_NODES]; |
2380c486 | 10314 | + |
7e46296a AM |
10315 | +struct cluster_member { |
10316 | + __be32 addr; | |
10317 | + enum cluster_message message; | |
10318 | + struct list_head list; | |
10319 | + int ignore; | |
10320 | +}; | |
2380c486 | 10321 | + |
7e46296a AM |
10322 | +#define toi_cluster_port_send 3501 |
10323 | +#define toi_cluster_port_recv 3502 | |
2380c486 | 10324 | + |
7e46296a AM |
10325 | +static struct net_device *net_dev; |
10326 | +static struct toi_module_ops toi_cluster_ops; | |
2380c486 | 10327 | + |
7e46296a AM |
10328 | +static int toi_recv(struct sk_buff *skb, struct net_device *dev, |
10329 | + struct packet_type *pt, struct net_device *orig_dev); | |
2380c486 | 10330 | + |
7e46296a AM |
10331 | +static struct packet_type toi_cluster_packet_type = { |
10332 | + .type = __constant_htons(ETH_P_IP), | |
10333 | + .func = toi_recv, | |
10334 | +}; | |
2380c486 | 10335 | + |
7e46296a AM |
10336 | +struct toi_pkt { /* BOOTP packet format */ |
10337 | + struct iphdr iph; /* IP header */ | |
10338 | + struct udphdr udph; /* UDP header */ | |
10339 | + u8 htype; /* HW address type */ | |
10340 | + u8 hlen; /* HW address length */ | |
10341 | + __be32 xid; /* Transaction ID */ | |
10342 | + __be16 secs; /* Seconds since we started */ | |
10343 | + __be16 flags; /* Just what it says */ | |
10344 | + u8 hw_addr[16]; /* Sender's HW address */ | |
10345 | + u16 message; /* Message */ | |
10346 | + unsigned long sid; /* Source ID for loopback testing */ | |
10347 | +}; | |
2380c486 | 10348 | + |
7e46296a | 10349 | +static char toi_cluster_iface[IFNAMSIZ] = CONFIG_TOI_DEFAULT_CLUSTER_INTERFACE; |
2380c486 | 10350 | + |
7e46296a | 10351 | +static int added_pack; |
2380c486 | 10352 | + |
7e46296a | 10353 | +static int others_have_image; |
2380c486 | 10354 | + |
7e46296a AM |
10355 | +/* Key used to allow multiple clusters on the same lan */ |
10356 | +static char toi_cluster_key[32] = CONFIG_TOI_DEFAULT_CLUSTER_KEY; | |
10357 | +static char pre_hibernate_script[255] = | |
10358 | + CONFIG_TOI_DEFAULT_CLUSTER_PRE_HIBERNATE; | |
10359 | +static char post_hibernate_script[255] = | |
10360 | + CONFIG_TOI_DEFAULT_CLUSTER_POST_HIBERNATE; | |
2380c486 | 10361 | + |
7e46296a AM |
10362 | +/* List of cluster members */ |
10363 | +static unsigned long continue_delay = 5 * HZ; | |
10364 | +static unsigned long cluster_message_timeout = 3 * HZ; | |
2380c486 | 10365 | + |
7e46296a | 10366 | +/* === Membership list === */ |
2380c486 | 10367 | + |
7e46296a AM |
10368 | +static void print_member_info(int index) |
10369 | +{ | |
10370 | + struct cluster_member *this; | |
2380c486 | 10371 | + |
7e46296a | 10372 | + printk(KERN_INFO "==> Dumping node %d.\n", index); |
2380c486 | 10373 | + |
7e46296a AM |
10374 | + list_for_each_entry(this, &node_array[index].member_list, list) |
10375 | + printk(KERN_INFO "%d.%d.%d.%d last message %s. %s\n", | |
10376 | + NIPQUAD(this->addr), | |
10377 | + str_message(this->message), | |
10378 | + this->ignore ? "(Ignored)" : ""); | |
10379 | + printk(KERN_INFO "== Done ==\n"); | |
2380c486 JR |
10380 | +} |
10381 | + | |
7e46296a | 10382 | +static struct cluster_member *__find_member(int index, __be32 addr) |
2380c486 | 10383 | +{ |
7e46296a AM |
10384 | + struct cluster_member *this; |
10385 | + | |
10386 | + list_for_each_entry(this, &node_array[index].member_list, list) { | |
10387 | + if (this->addr != addr) | |
10388 | + continue; | |
10389 | + | |
10390 | + return this; | |
2380c486 | 10391 | + } |
7e46296a AM |
10392 | + |
10393 | + return NULL; | |
2380c486 JR |
10394 | +} |
10395 | + | |
7e46296a | 10396 | +static void set_ignore(int index, __be32 addr, struct cluster_member *this) |
2380c486 | 10397 | +{ |
7e46296a AM |
10398 | + if (this->ignore) { |
10399 | + PRINTK("Node %d already ignoring %d.%d.%d.%d.\n", | |
10400 | + index, NIPQUAD(addr)); | |
10401 | + return; | |
2380c486 | 10402 | + } |
2380c486 | 10403 | + |
7e46296a AM |
10404 | + PRINTK("Node %d sees node %d.%d.%d.%d now being ignored.\n", |
10405 | + index, NIPQUAD(addr)); | |
10406 | + this->ignore = 1; | |
10407 | + node_array[index].ignored_peer_count++; | |
2380c486 JR |
10408 | +} |
10409 | + | |
7e46296a AM |
10410 | +static int __add_update_member(int index, __be32 addr, int message) |
10411 | +{ | |
10412 | + struct cluster_member *this; | |
2380c486 | 10413 | + |
7e46296a AM |
10414 | + this = __find_member(index, addr); |
10415 | + if (this) { | |
10416 | + if (this->message != message) { | |
10417 | + this->message = message; | |
10418 | + if ((message & MSG_NACK) && | |
10419 | + (message & (MSG_HIBERNATE | MSG_IMAGE | MSG_IO))) | |
10420 | + set_ignore(index, addr, this); | |
10421 | + PRINTK("Node %d sees node %d.%d.%d.%d now sending " | |
10422 | + "%s.\n", index, NIPQUAD(addr), | |
10423 | + str_message(message)); | |
10424 | + wake_up(&node_array[index].member_events); | |
10425 | + } | |
10426 | + return 0; | |
10427 | + } | |
2380c486 | 10428 | + |
7e46296a AM |
10429 | + this = (struct cluster_member *) toi_kzalloc(36, |
10430 | + sizeof(struct cluster_member), GFP_KERNEL); | |
2380c486 | 10431 | + |
7e46296a AM |
10432 | + if (!this) |
10433 | + return -1; | |
2380c486 | 10434 | + |
7e46296a AM |
10435 | + this->addr = addr; |
10436 | + this->message = message; | |
10437 | + this->ignore = 0; | |
10438 | + INIT_LIST_HEAD(&this->list); | |
2380c486 | 10439 | + |
7e46296a | 10440 | + node_array[index].peer_count++; |
2380c486 | 10441 | + |
7e46296a AM |
10442 | + PRINTK("Node %d sees node %d.%d.%d.%d sending %s.\n", index, |
10443 | + NIPQUAD(addr), str_message(message)); | |
2380c486 | 10444 | + |
7e46296a AM |
10445 | + if ((message & MSG_NACK) && |
10446 | + (message & (MSG_HIBERNATE | MSG_IMAGE | MSG_IO))) | |
10447 | + set_ignore(index, addr, this); | |
10448 | + list_add_tail(&this->list, &node_array[index].member_list); | |
10449 | + return 1; | |
10450 | +} | |
2380c486 | 10451 | + |
7e46296a AM |
10452 | +static int add_update_member(int index, __be32 addr, int message) |
10453 | +{ | |
10454 | + int result; | |
10455 | + unsigned long flags; | |
10456 | + spin_lock_irqsave(&node_array[index].member_list_lock, flags); | |
10457 | + result = __add_update_member(index, addr, message); | |
10458 | + spin_unlock_irqrestore(&node_array[index].member_list_lock, flags); | |
2380c486 | 10459 | + |
7e46296a | 10460 | + print_member_info(index); |
2380c486 | 10461 | + |
7e46296a | 10462 | + wake_up(&node_array[index].member_events); |
2380c486 | 10463 | + |
7e46296a | 10464 | + return result; |
2380c486 JR |
10465 | +} |
10466 | + | |
7e46296a | 10467 | +static void del_member(int index, __be32 addr) |
2380c486 | 10468 | +{ |
7e46296a AM |
10469 | + struct cluster_member *this; |
10470 | + unsigned long flags; | |
2380c486 | 10471 | + |
7e46296a AM |
10472 | + spin_lock_irqsave(&node_array[index].member_list_lock, flags); |
10473 | + this = __find_member(index, addr); | |
2380c486 | 10474 | + |
7e46296a AM |
10475 | + if (this) { |
10476 | + list_del_init(&this->list); | |
10477 | + toi_kfree(36, this, sizeof(*this)); | |
10478 | + node_array[index].peer_count--; | |
10479 | + } | |
2380c486 | 10480 | + |
7e46296a | 10481 | + spin_unlock_irqrestore(&node_array[index].member_list_lock, flags); |
2380c486 JR |
10482 | +} |
10483 | + | |
7e46296a | 10484 | +/* === Message transmission === */ |
2380c486 | 10485 | + |
7e46296a | 10486 | +static void toi_send_if(int message, unsigned long my_id); |
2380c486 | 10487 | + |
2380c486 | 10488 | +/* |
7e46296a | 10489 | + * Process received TOI packet. |
2380c486 | 10490 | + */ |
7e46296a AM |
10491 | +static int toi_recv(struct sk_buff *skb, struct net_device *dev, |
10492 | + struct packet_type *pt, struct net_device *orig_dev) | |
10493 | +{ | |
10494 | + struct toi_pkt *b; | |
10495 | + struct iphdr *h; | |
10496 | + int len, result, index; | |
10497 | + unsigned long addr, message, ack; | |
2380c486 | 10498 | + |
7e46296a AM |
10499 | + /* Perform verifications before taking the lock. */ |
10500 | + if (skb->pkt_type == PACKET_OTHERHOST) | |
10501 | + goto drop; | |
2380c486 | 10502 | + |
7e46296a AM |
10503 | + if (dev != net_dev) |
10504 | + goto drop; | |
2380c486 | 10505 | + |
7e46296a AM |
10506 | + skb = skb_share_check(skb, GFP_ATOMIC); |
10507 | + if (!skb) | |
10508 | + return NET_RX_DROP; | |
2380c486 | 10509 | + |
7e46296a AM |
10510 | + if (!pskb_may_pull(skb, |
10511 | + sizeof(struct iphdr) + | |
10512 | + sizeof(struct udphdr))) | |
10513 | + goto drop; | |
2380c486 | 10514 | + |
7e46296a AM |
10515 | + b = (struct toi_pkt *)skb_network_header(skb); |
10516 | + h = &b->iph; | |
2380c486 | 10517 | + |
7e46296a AM |
10518 | + if (h->ihl != 5 || h->version != 4 || h->protocol != IPPROTO_UDP) |
10519 | + goto drop; | |
2380c486 | 10520 | + |
7e46296a AM |
10521 | + /* Fragments are not supported */ |
10522 | + if (h->frag_off & htons(IP_OFFSET | IP_MF)) { | |
10523 | + if (net_ratelimit()) | |
10524 | + printk(KERN_ERR "TuxOnIce: Ignoring fragmented " | |
10525 | + "cluster message.\n"); | |
10526 | + goto drop; | |
10527 | + } | |
2380c486 | 10528 | + |
7e46296a AM |
10529 | + if (skb->len < ntohs(h->tot_len)) |
10530 | + goto drop; | |
2380c486 | 10531 | + |
7e46296a AM |
10532 | + if (ip_fast_csum((char *) h, h->ihl)) |
10533 | + goto drop; | |
2380c486 | 10534 | + |
7e46296a AM |
10535 | + if (b->udph.source != htons(toi_cluster_port_send) || |
10536 | + b->udph.dest != htons(toi_cluster_port_recv)) | |
10537 | + goto drop; | |
2380c486 | 10538 | + |
7e46296a AM |
10539 | + if (ntohs(h->tot_len) < ntohs(b->udph.len) + sizeof(struct iphdr)) |
10540 | + goto drop; | |
2380c486 | 10541 | + |
7e46296a | 10542 | + len = ntohs(b->udph.len) - sizeof(struct udphdr); |
2380c486 | 10543 | + |
7e46296a AM |
10544 | + /* Ok the front looks good, make sure we can get at the rest. */ |
10545 | + if (!pskb_may_pull(skb, skb->len)) | |
10546 | + goto drop; | |
0ada99ac | 10547 | + |
7e46296a AM |
10548 | + b = (struct toi_pkt *)skb_network_header(skb); |
10549 | + h = &b->iph; | |
0ada99ac | 10550 | + |
7e46296a AM |
10551 | + addr = SADDR; |
10552 | + PRINTK(">>> Message %s received from " NIPQUAD_FMT ".\n", | |
10553 | + str_message(b->message), NIPQUAD(addr)); | |
2380c486 | 10554 | + |
7e46296a AM |
10555 | + message = b->message & MSG_STATE_MASK; |
10556 | + ack = b->message & MSG_ACK_MASK; | |
2380c486 | 10557 | + |
7e46296a AM |
10558 | + for (index = 0; index < num_local_nodes; index++) { |
10559 | + int new_message = node_array[index].current_message, | |
10560 | + old_message = new_message; | |
2380c486 | 10561 | + |
7e46296a AM |
10562 | + if (index == SADDR || !old_message) { |
10563 | + PRINTK("Ignoring node %d (offline or self).\n", index); | |
10564 | + continue; | |
2380c486 JR |
10565 | + } |
10566 | + | |
7e46296a AM |
10567 | + /* One message at a time, please. */ |
10568 | + spin_lock(&node_array[index].receive_lock); | |
2380c486 | 10569 | + |
7e46296a AM |
10570 | + result = add_update_member(index, SADDR, b->message); |
10571 | + if (result == -1) { | |
10572 | + printk(KERN_INFO "Failed to add new cluster member " | |
10573 | + NIPQUAD_FMT ".\n", | |
10574 | + NIPQUAD(addr)); | |
10575 | + goto drop_unlock; | |
2380c486 | 10576 | + } |
0ada99ac | 10577 | + |
7e46296a AM |
10578 | + switch (b->message & MSG_STATE_MASK) { |
10579 | + case MSG_PING: | |
10580 | + break; | |
10581 | + case MSG_ABORT: | |
10582 | + break; | |
10583 | + case MSG_BYE: | |
10584 | + break; | |
10585 | + case MSG_HIBERNATE: | |
10586 | + /* Can I hibernate? */ | |
10587 | + new_message = MSG_HIBERNATE | | |
10588 | + ((index & 1) ? MSG_NACK : MSG_ACK); | |
10589 | + break; | |
10590 | + case MSG_IMAGE: | |
10591 | + /* Can I resume? */ | |
10592 | + new_message = MSG_IMAGE | | |
10593 | + ((index & 1) ? MSG_NACK : MSG_ACK); | |
10594 | + if (new_message != old_message) | |
10595 | + printk(KERN_ERR "Setting whether I can resume " | |
10596 | + "to %d.\n", new_message); | |
10597 | + break; | |
10598 | + case MSG_IO: | |
10599 | + new_message = MSG_IO | MSG_ACK; | |
10600 | + break; | |
10601 | + case MSG_RUNNING: | |
10602 | + break; | |
10603 | + default: | |
10604 | + if (net_ratelimit()) | |
10605 | + printk(KERN_ERR "Unrecognised TuxOnIce cluster" | |
10606 | + " message %d from " NIPQUAD_FMT ".\n", | |
10607 | + b->message, NIPQUAD(addr)); | |
10608 | + }; | |
0ada99ac | 10609 | + |
7e46296a AM |
10610 | + if (old_message != new_message) { |
10611 | + node_array[index].current_message = new_message; | |
10612 | + printk(KERN_INFO ">>> Sending new message for node " | |
10613 | + "%d.\n", index); | |
10614 | + toi_send_if(new_message, index); | |
10615 | + } else if (!ack) { | |
10616 | + printk(KERN_INFO ">>> Resending message for node %d.\n", | |
10617 | + index); | |
10618 | + toi_send_if(new_message, index); | |
0ada99ac | 10619 | + } |
7e46296a AM |
10620 | +drop_unlock: |
10621 | + spin_unlock(&node_array[index].receive_lock); | |
10622 | + }; | |
0ada99ac | 10623 | + |
7e46296a AM |
10624 | +drop: |
10625 | + /* Throw the packet out. */ | |
10626 | + kfree_skb(skb); | |
2380c486 JR |
10627 | + |
10628 | + return 0; | |
10629 | +} | |
10630 | + | |
10631 | +/* | |
7e46296a | 10632 | + * Send cluster message to single interface. |
2380c486 | 10633 | + */ |
7e46296a AM |
10634 | +static void toi_send_if(int message, unsigned long my_id) |
10635 | +{ | |
10636 | + struct sk_buff *skb; | |
10637 | + struct toi_pkt *b; | |
10638 | + int hh_len = LL_RESERVED_SPACE(net_dev); | |
10639 | + struct iphdr *h; | |
2380c486 | 10640 | + |
7e46296a AM |
10641 | + /* Allocate packet */ |
10642 | + skb = alloc_skb(sizeof(struct toi_pkt) + hh_len + 15, GFP_KERNEL); | |
10643 | + if (!skb) | |
10644 | + return; | |
10645 | + skb_reserve(skb, hh_len); | |
10646 | + b = (struct toi_pkt *) skb_put(skb, sizeof(struct toi_pkt)); | |
10647 | + memset(b, 0, sizeof(struct toi_pkt)); | |
10648 | + | |
10649 | + /* Construct IP header */ | |
10650 | + skb_reset_network_header(skb); | |
10651 | + h = ip_hdr(skb); | |
10652 | + h->version = 4; | |
10653 | + h->ihl = 5; | |
10654 | + h->tot_len = htons(sizeof(struct toi_pkt)); | |
10655 | + h->frag_off = htons(IP_DF); | |
10656 | + h->ttl = 64; | |
10657 | + h->protocol = IPPROTO_UDP; | |
10658 | + h->daddr = htonl(INADDR_BROADCAST); | |
10659 | + h->check = ip_fast_csum((unsigned char *) h, h->ihl); | |
10660 | + | |
10661 | + /* Construct UDP header */ | |
10662 | + b->udph.source = htons(toi_cluster_port_send); | |
10663 | + b->udph.dest = htons(toi_cluster_port_recv); | |
10664 | + b->udph.len = htons(sizeof(struct toi_pkt) - sizeof(struct iphdr)); | |
10665 | + /* UDP checksum not calculated -- explicitly allowed in BOOTP RFC */ | |
10666 | + | |
10667 | + /* Construct message */ | |
10668 | + b->message = message; | |
10669 | + b->sid = my_id; | |
10670 | + b->htype = net_dev->type; /* can cause undefined behavior */ | |
10671 | + b->hlen = net_dev->addr_len; | |
10672 | + memcpy(b->hw_addr, net_dev->dev_addr, net_dev->addr_len); | |
10673 | + b->secs = htons(3); /* 3 seconds */ | |
10674 | + | |
10675 | + /* Chain packet down the line... */ | |
10676 | + skb->dev = net_dev; | |
10677 | + skb->protocol = htons(ETH_P_IP); | |
10678 | + if ((dev_hard_header(skb, net_dev, ntohs(skb->protocol), | |
10679 | + net_dev->broadcast, net_dev->dev_addr, skb->len) < 0) || | |
10680 | + dev_queue_xmit(skb) < 0) | |
10681 | + printk(KERN_INFO "E"); | |
10682 | +} | |
10683 | + | |
10684 | +/* ========================================= */ | |
10685 | + | |
10686 | +/* kTOICluster */ | |
10687 | + | |
10688 | +static atomic_t num_cluster_threads; | |
10689 | +static DECLARE_WAIT_QUEUE_HEAD(clusterd_events); | |
10690 | + | |
10691 | +static int kTOICluster(void *data) | |
2380c486 | 10692 | +{ |
7e46296a AM |
10693 | + unsigned long my_id; |
10694 | + | |
10695 | + my_id = atomic_add_return(1, &num_cluster_threads) - 1; | |
10696 | + node_array[my_id].current_message = (unsigned long) data; | |
2380c486 | 10697 | + |
7e46296a | 10698 | + PRINTK("kTOICluster daemon %lu starting.\n", my_id); |
2380c486 | 10699 | + |
7e46296a | 10700 | + current->flags |= PF_NOFREEZE; |
2380c486 | 10701 | + |
7e46296a AM |
10702 | + while (node_array[my_id].current_message) { |
10703 | + toi_send_if(node_array[my_id].current_message, my_id); | |
10704 | + sleep_on_timeout(&clusterd_events, | |
10705 | + cluster_message_timeout); | |
10706 | + PRINTK("Link state %lu is %d.\n", my_id, | |
10707 | + node_array[my_id].current_message); | |
10708 | + } | |
2380c486 | 10709 | + |
7e46296a AM |
10710 | + toi_send_if(MSG_BYE, my_id); |
10711 | + atomic_dec(&num_cluster_threads); | |
10712 | + wake_up(&clusterd_events); | |
2380c486 | 10713 | + |
7e46296a AM |
10714 | + PRINTK("kTOICluster daemon %lu exiting.\n", my_id); |
10715 | + __set_current_state(TASK_RUNNING); | |
2380c486 JR |
10716 | + return 0; |
10717 | +} | |
10718 | + | |
7e46296a | 10719 | +static void kill_clusterd(void) |
2380c486 | 10720 | +{ |
7e46296a AM |
10721 | + int i; |
10722 | + | |
10723 | + for (i = 0; i < num_local_nodes; i++) { | |
10724 | + if (node_array[i].current_message) { | |
10725 | + PRINTK("Seeking to kill clusterd %d.\n", i); | |
10726 | + node_array[i].current_message = 0; | |
2380c486 JR |
10727 | + } |
10728 | + } | |
7e46296a AM |
10729 | + wait_event(clusterd_events, |
10730 | + !atomic_read(&num_cluster_threads)); | |
10731 | + PRINTK("All cluster daemons have exited.\n"); | |
10732 | +} | |
2380c486 | 10733 | + |
7e46296a AM |
10734 | +static int peers_not_in_message(int index, int message, int precise) |
10735 | +{ | |
10736 | + struct cluster_member *this; | |
10737 | + unsigned long flags; | |
10738 | + int result = 0; | |
10739 | + | |
10740 | + spin_lock_irqsave(&node_array[index].member_list_lock, flags); | |
10741 | + list_for_each_entry(this, &node_array[index].member_list, list) { | |
10742 | + if (this->ignore) | |
10743 | + continue; | |
10744 | + | |
10745 | + PRINTK("Peer %d.%d.%d.%d sending %s. " | |
10746 | + "Seeking %s.\n", | |
10747 | + NIPQUAD(this->addr), | |
10748 | + str_message(this->message), str_message(message)); | |
10749 | + if ((precise ? this->message : | |
10750 | + this->message & MSG_STATE_MASK) != | |
10751 | + message) | |
10752 | + result++; | |
10753 | + } | |
10754 | + spin_unlock_irqrestore(&node_array[index].member_list_lock, flags); | |
10755 | + PRINTK("%d peers in sought message.\n", result); | |
10756 | + return result; | |
2380c486 JR |
10757 | +} |
10758 | + | |
7e46296a | 10759 | +static void reset_ignored(int index) |
2380c486 | 10760 | +{ |
7e46296a AM |
10761 | + struct cluster_member *this; |
10762 | + unsigned long flags; | |
2380c486 | 10763 | + |
7e46296a AM |
10764 | + spin_lock_irqsave(&node_array[index].member_list_lock, flags); |
10765 | + list_for_each_entry(this, &node_array[index].member_list, list) | |
10766 | + this->ignore = 0; | |
10767 | + node_array[index].ignored_peer_count = 0; | |
10768 | + spin_unlock_irqrestore(&node_array[index].member_list_lock, flags); | |
10769 | +} | |
2380c486 | 10770 | + |
7e46296a AM |
10771 | +static int peers_in_message(int index, int message, int precise) |
10772 | +{ | |
10773 | + return node_array[index].peer_count - | |
10774 | + node_array[index].ignored_peer_count - | |
10775 | + peers_not_in_message(index, message, precise); | |
10776 | +} | |
2380c486 | 10777 | + |
7e46296a AM |
10778 | +static int time_to_continue(int index, unsigned long start, int message) |
10779 | +{ | |
10780 | + int first = peers_not_in_message(index, message, 0); | |
10781 | + int second = peers_in_message(index, message, 1); | |
2380c486 | 10782 | + |
7e46296a | 10783 | + PRINTK("First part returns %d, second returns %d.\n", first, second); |
2380c486 | 10784 | + |
7e46296a AM |
10785 | + if (!first && !second) { |
10786 | + PRINTK("All peers answered message %d.\n", | |
10787 | + message); | |
10788 | + return 1; | |
10789 | + } | |
2380c486 | 10790 | + |
7e46296a AM |
10791 | + if (time_after(jiffies, start + continue_delay)) { |
10792 | + PRINTK("Timeout reached.\n"); | |
10793 | + return 1; | |
10794 | + } | |
2380c486 | 10795 | + |
7e46296a AM |
10796 | + PRINTK("Not time to continue yet (%lu < %lu).\n", jiffies, |
10797 | + start + continue_delay); | |
10798 | + return 0; | |
2380c486 JR |
10799 | +} |
10800 | + | |
7e46296a | 10801 | +void toi_initiate_cluster_hibernate(void) |
2380c486 | 10802 | +{ |
7e46296a AM |
10803 | + int result; |
10804 | + unsigned long start; | |
2380c486 | 10805 | + |
7e46296a AM |
10806 | + result = do_toi_step(STEP_HIBERNATE_PREPARE_IMAGE); |
10807 | + if (result) | |
10808 | + return; | |
2380c486 | 10809 | + |
7e46296a | 10810 | + toi_send_if(MSG_HIBERNATE, 0); |
2380c486 | 10811 | + |
7e46296a AM |
10812 | + start = jiffies; |
10813 | + wait_event(node_array[0].member_events, | |
10814 | + time_to_continue(0, start, MSG_HIBERNATE)); | |
2380c486 | 10815 | + |
7e46296a AM |
10816 | + if (test_action_state(TOI_FREEZER_TEST)) { |
10817 | + toi_send_if(MSG_ABORT, 0); | |
2380c486 | 10818 | + |
7e46296a AM |
10819 | + start = jiffies; |
10820 | + wait_event(node_array[0].member_events, | |
10821 | + time_to_continue(0, start, MSG_RUNNING)); | |
10822 | + | |
10823 | + do_toi_step(STEP_QUIET_CLEANUP); | |
10824 | + return; | |
2380c486 | 10825 | + } |
7e46296a AM |
10826 | + |
10827 | + toi_send_if(MSG_IO, 0); | |
10828 | + | |
10829 | + result = do_toi_step(STEP_HIBERNATE_SAVE_IMAGE); | |
10830 | + if (result) | |
10831 | + return; | |
10832 | + | |
10833 | + /* This code runs at resume time too! */ | |
10834 | + if (toi_in_hibernate) | |
10835 | + result = do_toi_step(STEP_HIBERNATE_POWERDOWN); | |
2380c486 | 10836 | +} |
7e46296a | 10837 | +EXPORT_SYMBOL_GPL(toi_initiate_cluster_hibernate); |
2380c486 | 10838 | + |
7e46296a | 10839 | +/* toi_cluster_print_debug_stats |
2380c486 | 10840 | + * |
7e46296a AM |
10841 | + * Description: Print information to be recorded for debugging purposes into a |
10842 | + * buffer. | |
10843 | + * Arguments: buffer: Pointer to a buffer into which the debug info will be | |
10844 | + * printed. | |
10845 | + * size: Size of the buffer. | |
10846 | + * Returns: Number of characters written to the buffer. | |
2380c486 | 10847 | + */ |
7e46296a | 10848 | +static int toi_cluster_print_debug_stats(char *buffer, int size) |
2380c486 | 10849 | +{ |
2380c486 JR |
10850 | + int len; |
10851 | + | |
7e46296a AM |
10852 | + if (strlen(toi_cluster_iface)) |
10853 | + len = scnprintf(buffer, size, | |
10854 | + "- Cluster interface is '%s'.\n", | |
10855 | + toi_cluster_iface); | |
2380c486 | 10856 | + else |
7e46296a AM |
10857 | + len = scnprintf(buffer, size, |
10858 | + "- Cluster support is disabled.\n"); | |
2380c486 JR |
10859 | + return len; |
10860 | +} | |
10861 | + | |
7e46296a AM |
10862 | +/* cluster_memory_needed |
10863 | + * | |
10864 | + * Description: Tell the caller how much memory we need to operate during | |
10865 | + * hibernate/resume. | |
10866 | + * Returns: Unsigned long. Maximum number of bytes of memory required for | |
10867 | + * operation. | |
2380c486 | 10868 | + */ |
7e46296a | 10869 | +static int toi_cluster_memory_needed(void) |
2380c486 | 10870 | +{ |
7e46296a | 10871 | + return 0; |
2380c486 JR |
10872 | +} |
10873 | + | |
7e46296a | 10874 | +static int toi_cluster_storage_needed(void) |
2380c486 | 10875 | +{ |
7e46296a | 10876 | + return 1 + strlen(toi_cluster_iface); |
2380c486 JR |
10877 | +} |
10878 | + | |
7e46296a | 10879 | +/* toi_cluster_save_config_info |
2380c486 | 10880 | + * |
7e46296a AM |
10881 | + * Description: Save informaton needed when reloading the image at resume time. |
10882 | + * Arguments: Buffer: Pointer to a buffer of size PAGE_SIZE. | |
10883 | + * Returns: Number of bytes used for saving our data. | |
2380c486 | 10884 | + */ |
7e46296a | 10885 | +static int toi_cluster_save_config_info(char *buffer) |
2380c486 | 10886 | +{ |
7e46296a AM |
10887 | + strcpy(buffer, toi_cluster_iface); |
10888 | + return strlen(toi_cluster_iface + 1); | |
2380c486 JR |
10889 | +} |
10890 | + | |
7e46296a | 10891 | +/* toi_cluster_load_config_info |
2380c486 | 10892 | + * |
7e46296a AM |
10893 | + * Description: Reload information needed for declustering the image at |
10894 | + * resume time. | |
10895 | + * Arguments: Buffer: Pointer to the start of the data. | |
10896 | + * Size: Number of bytes that were saved. | |
2380c486 | 10897 | + */ |
7e46296a | 10898 | +static void toi_cluster_load_config_info(char *buffer, int size) |
2380c486 | 10899 | +{ |
7e46296a | 10900 | + strncpy(toi_cluster_iface, buffer, size); |
2380c486 JR |
10901 | + return; |
10902 | +} | |
10903 | + | |
7e46296a | 10904 | +static void cluster_startup(void) |
2380c486 | 10905 | +{ |
7e46296a AM |
10906 | + int have_image = do_check_can_resume(), i; |
10907 | + unsigned long start = jiffies, initial_message; | |
10908 | + struct task_struct *p; | |
2380c486 | 10909 | + |
7e46296a | 10910 | + initial_message = MSG_IMAGE; |
2380c486 | 10911 | + |
7e46296a | 10912 | + have_image = 1; |
2380c486 | 10913 | + |
7e46296a AM |
10914 | + for (i = 0; i < num_local_nodes; i++) { |
10915 | + PRINTK("Starting ktoiclusterd %d.\n", i); | |
10916 | + p = kthread_create(kTOICluster, (void *) initial_message, | |
10917 | + "ktoiclusterd/%d", i); | |
10918 | + if (IS_ERR(p)) { | |
10919 | + printk(KERN_ERR "Failed to start ktoiclusterd.\n"); | |
10920 | + return; | |
10921 | + } | |
2380c486 | 10922 | + |
7e46296a AM |
10923 | + wake_up_process(p); |
10924 | + } | |
2380c486 | 10925 | + |
7e46296a AM |
10926 | + /* Wait for delay or someone else sending first message */ |
10927 | + wait_event(node_array[0].member_events, time_to_continue(0, start, | |
10928 | + MSG_IMAGE)); | |
2380c486 | 10929 | + |
7e46296a | 10930 | + others_have_image = peers_in_message(0, MSG_IMAGE | MSG_ACK, 1); |
2380c486 | 10931 | + |
7e46296a AM |
10932 | + printk(KERN_INFO "Continuing. I %shave an image. Peers with image:" |
10933 | + " %d.\n", have_image ? "" : "don't ", others_have_image); | |
2380c486 | 10934 | + |
7e46296a AM |
10935 | + if (have_image) { |
10936 | + int result; | |
2380c486 | 10937 | + |
7e46296a AM |
10938 | + /* Start to resume */ |
10939 | + printk(KERN_INFO " === Starting to resume === \n"); | |
10940 | + node_array[0].current_message = MSG_IO; | |
10941 | + toi_send_if(MSG_IO, 0); | |
2380c486 | 10942 | + |
7e46296a AM |
10943 | + /* result = do_toi_step(STEP_RESUME_LOAD_PS1); */ |
10944 | + result = 0; | |
2380c486 | 10945 | + |
7e46296a AM |
10946 | + if (!result) { |
10947 | + /* | |
10948 | + * Atomic restore - we'll come back in the hibernation | |
10949 | + * path. | |
10950 | + */ | |
2380c486 | 10951 | + |
7e46296a AM |
10952 | + /* result = do_toi_step(STEP_RESUME_DO_RESTORE); */ |
10953 | + result = 0; | |
2380c486 | 10954 | + |
7e46296a AM |
10955 | + /* do_toi_step(STEP_QUIET_CLEANUP); */ |
10956 | + } | |
2380c486 | 10957 | + |
7e46296a AM |
10958 | + node_array[0].current_message |= MSG_NACK; |
10959 | + | |
10960 | + /* For debugging - disable for real life? */ | |
10961 | + wait_event(node_array[0].member_events, | |
10962 | + time_to_continue(0, start, MSG_IO)); | |
2380c486 JR |
10963 | + } |
10964 | + | |
7e46296a AM |
10965 | + if (others_have_image) { |
10966 | + /* Wait for them to resume */ | |
10967 | + printk(KERN_INFO "Waiting for other nodes to resume.\n"); | |
10968 | + start = jiffies; | |
10969 | + wait_event(node_array[0].member_events, | |
10970 | + time_to_continue(0, start, MSG_RUNNING)); | |
10971 | + if (peers_not_in_message(0, MSG_RUNNING, 0)) | |
10972 | + printk(KERN_INFO "Timed out while waiting for other " | |
10973 | + "nodes to resume.\n"); | |
10974 | + } | |
2380c486 | 10975 | + |
7e46296a AM |
10976 | + /* Find out whether an image exists here. Send ACK_IMAGE or NACK_IMAGE |
10977 | + * as appropriate. | |
10978 | + * | |
10979 | + * If we don't have an image: | |
10980 | + * - Wait until someone else says they have one, or conditions are met | |
10981 | + * for continuing to boot (n machines or t seconds). | |
10982 | + * - If anyone has an image, wait for them to resume before continuing | |
10983 | + * to boot. | |
10984 | + * | |
10985 | + * If we have an image: | |
10986 | + * - Wait until conditions are met before continuing to resume (n | |
10987 | + * machines or t seconds). Send RESUME_PREP and freeze processes. | |
10988 | + * NACK_PREP if freezing fails (shouldn't) and follow logic for | |
10989 | + * us having no image above. On success, wait for [N]ACK_PREP from | |
10990 | + * other machines. Read image (including atomic restore) until done. | |
10991 | + * Wait for ACK_READ from others (should never fail). Thaw processes | |
10992 | + * and do post-resume. (The section after the atomic restore is done | |
10993 | + * via the code for hibernating). | |
10994 | + */ | |
2380c486 | 10995 | + |
7e46296a AM |
10996 | + node_array[0].current_message = MSG_RUNNING; |
10997 | +} | |
2380c486 | 10998 | + |
7e46296a AM |
10999 | +/* toi_cluster_open_iface |
11000 | + * | |
11001 | + * Description: Prepare to use an interface. | |
11002 | + */ | |
2380c486 | 11003 | + |
7e46296a AM |
11004 | +static int toi_cluster_open_iface(void) |
11005 | +{ | |
11006 | + struct net_device *dev; | |
2380c486 | 11007 | + |
7e46296a | 11008 | + rtnl_lock(); |
2380c486 | 11009 | + |
7e46296a AM |
11010 | + for_each_netdev(&init_net, dev) { |
11011 | + if (/* dev == &init_net.loopback_dev || */ | |
11012 | + strcmp(dev->name, toi_cluster_iface)) | |
11013 | + continue; | |
2380c486 | 11014 | + |
7e46296a AM |
11015 | + net_dev = dev; |
11016 | + break; | |
2380c486 JR |
11017 | + } |
11018 | + | |
7e46296a AM |
11019 | + rtnl_unlock(); |
11020 | + | |
11021 | + if (!net_dev) { | |
11022 | + printk(KERN_ERR MYNAME ": Device %s not found.\n", | |
11023 | + toi_cluster_iface); | |
11024 | + return -ENODEV; | |
2380c486 JR |
11025 | + } |
11026 | + | |
7e46296a AM |
11027 | + dev_add_pack(&toi_cluster_packet_type); |
11028 | + added_pack = 1; | |
2380c486 | 11029 | + |
7e46296a AM |
11030 | + loopback_mode = (net_dev == init_net.loopback_dev); |
11031 | + num_local_nodes = loopback_mode ? 8 : 1; | |
2380c486 | 11032 | + |
7e46296a AM |
11033 | + PRINTK("Loopback mode is %s. Number of local nodes is %d.\n", |
11034 | + loopback_mode ? "on" : "off", num_local_nodes); | |
2380c486 | 11035 | + |
7e46296a | 11036 | + cluster_startup(); |
2380c486 JR |
11037 | + return 0; |
11038 | +} | |
2380c486 | 11039 | + |
7e46296a AM |
11040 | +/* toi_cluster_close_iface |
11041 | + * | |
11042 | + * Description: Stop using an interface. | |
11043 | + */ | |
2380c486 | 11044 | + |
7e46296a AM |
11045 | +static int toi_cluster_close_iface(void) |
11046 | +{ | |
11047 | + kill_clusterd(); | |
11048 | + if (added_pack) { | |
11049 | + dev_remove_pack(&toi_cluster_packet_type); | |
11050 | + added_pack = 0; | |
2380c486 | 11051 | + } |
7e46296a | 11052 | + return 0; |
2380c486 | 11053 | +} |
2380c486 | 11054 | + |
7e46296a | 11055 | +static void write_side_effect(void) |
2380c486 | 11056 | +{ |
7e46296a AM |
11057 | + if (toi_cluster_ops.enabled) { |
11058 | + toi_cluster_open_iface(); | |
11059 | + set_toi_state(TOI_CLUSTER_MODE); | |
11060 | + } else { | |
11061 | + toi_cluster_close_iface(); | |
11062 | + clear_toi_state(TOI_CLUSTER_MODE); | |
2380c486 | 11063 | + } |
2380c486 | 11064 | +} |
2380c486 | 11065 | + |
7e46296a | 11066 | +static void node_write_side_effect(void) |
2380c486 | 11067 | +{ |
7e46296a | 11068 | +} |
2380c486 | 11069 | + |
7e46296a AM |
11070 | +/* |
11071 | + * data for our sysfs entries. | |
11072 | + */ | |
11073 | +static struct toi_sysfs_data sysfs_params[] = { | |
11074 | + SYSFS_STRING("interface", SYSFS_RW, toi_cluster_iface, IFNAMSIZ, 0, | |
11075 | + NULL), | |
11076 | + SYSFS_INT("enabled", SYSFS_RW, &toi_cluster_ops.enabled, 0, 1, 0, | |
11077 | + write_side_effect), | |
11078 | + SYSFS_STRING("cluster_name", SYSFS_RW, toi_cluster_key, 32, 0, NULL), | |
11079 | + SYSFS_STRING("pre-hibernate-script", SYSFS_RW, pre_hibernate_script, | |
11080 | + 256, 0, NULL), | |
11081 | + SYSFS_STRING("post-hibernate-script", SYSFS_RW, post_hibernate_script, | |
11082 | + 256, 0, STRING), | |
11083 | + SYSFS_UL("continue_delay", SYSFS_RW, &continue_delay, HZ / 2, 60 * HZ, | |
11084 | + 0) | |
11085 | +}; | |
2380c486 | 11086 | + |
7e46296a AM |
11087 | +/* |
11088 | + * Ops structure. | |
11089 | + */ | |
2380c486 | 11090 | + |
7e46296a AM |
11091 | +static struct toi_module_ops toi_cluster_ops = { |
11092 | + .type = FILTER_MODULE, | |
11093 | + .name = "Cluster", | |
11094 | + .directory = "cluster", | |
11095 | + .module = THIS_MODULE, | |
11096 | + .memory_needed = toi_cluster_memory_needed, | |
11097 | + .print_debug_info = toi_cluster_print_debug_stats, | |
11098 | + .save_config_info = toi_cluster_save_config_info, | |
11099 | + .load_config_info = toi_cluster_load_config_info, | |
11100 | + .storage_needed = toi_cluster_storage_needed, | |
2380c486 | 11101 | + |
7e46296a AM |
11102 | + .sysfs_data = sysfs_params, |
11103 | + .num_sysfs_entries = sizeof(sysfs_params) / | |
11104 | + sizeof(struct toi_sysfs_data), | |
11105 | +}; | |
2380c486 | 11106 | + |
7e46296a | 11107 | +/* ---- Registration ---- */ |
2380c486 | 11108 | + |
7e46296a AM |
11109 | +#ifdef MODULE |
11110 | +#define INIT static __init | |
11111 | +#define EXIT static __exit | |
11112 | +#else | |
11113 | +#define INIT | |
11114 | +#define EXIT | |
11115 | +#endif | |
2380c486 | 11116 | + |
7e46296a | 11117 | +INIT int toi_cluster_init(void) |
2380c486 | 11118 | +{ |
7e46296a AM |
11119 | + int temp = toi_register_module(&toi_cluster_ops), i; |
11120 | + struct kobject *kobj = toi_cluster_ops.dir_kobj; | |
2380c486 | 11121 | + |
7e46296a AM |
11122 | + for (i = 0; i < MAX_LOCAL_NODES; i++) { |
11123 | + node_array[i].current_message = 0; | |
11124 | + INIT_LIST_HEAD(&node_array[i].member_list); | |
11125 | + init_waitqueue_head(&node_array[i].member_events); | |
11126 | + spin_lock_init(&node_array[i].member_list_lock); | |
11127 | + spin_lock_init(&node_array[i].receive_lock); | |
2380c486 | 11128 | + |
7e46296a AM |
11129 | + /* Set up sysfs entry */ |
11130 | + node_array[i].sysfs_data.attr.name = toi_kzalloc(8, | |
11131 | + sizeof(node_array[i].sysfs_data.attr.name), | |
11132 | + GFP_KERNEL); | |
11133 | + sprintf((char *) node_array[i].sysfs_data.attr.name, "node_%d", | |
11134 | + i); | |
11135 | + node_array[i].sysfs_data.attr.mode = SYSFS_RW; | |
11136 | + node_array[i].sysfs_data.type = TOI_SYSFS_DATA_INTEGER; | |
11137 | + node_array[i].sysfs_data.flags = 0; | |
11138 | + node_array[i].sysfs_data.data.integer.variable = | |
11139 | + (int *) &node_array[i].current_message; | |
11140 | + node_array[i].sysfs_data.data.integer.minimum = 0; | |
11141 | + node_array[i].sysfs_data.data.integer.maximum = INT_MAX; | |
11142 | + node_array[i].sysfs_data.write_side_effect = | |
11143 | + node_write_side_effect; | |
11144 | + toi_register_sysfs_file(kobj, &node_array[i].sysfs_data); | |
11145 | + } | |
2380c486 | 11146 | + |
7e46296a | 11147 | + toi_cluster_ops.enabled = (strlen(toi_cluster_iface) > 0); |
2380c486 | 11148 | + |
7e46296a AM |
11149 | + if (toi_cluster_ops.enabled) |
11150 | + toi_cluster_open_iface(); | |
2380c486 | 11151 | + |
7e46296a | 11152 | + return temp; |
2380c486 | 11153 | +} |
2380c486 | 11154 | + |
7e46296a | 11155 | +EXIT void toi_cluster_exit(void) |
2380c486 | 11156 | +{ |
7e46296a AM |
11157 | + int i; |
11158 | + toi_cluster_close_iface(); | |
2380c486 | 11159 | + |
7e46296a AM |
11160 | + for (i = 0; i < MAX_LOCAL_NODES; i++) |
11161 | + toi_unregister_sysfs_file(toi_cluster_ops.dir_kobj, | |
11162 | + &node_array[i].sysfs_data); | |
11163 | + toi_unregister_module(&toi_cluster_ops); | |
11164 | +} | |
2380c486 | 11165 | + |
7e46296a AM |
11166 | +static int __init toi_cluster_iface_setup(char *iface) |
11167 | +{ | |
11168 | + toi_cluster_ops.enabled = (*iface && | |
11169 | + strcmp(iface, "off")); | |
2380c486 | 11170 | + |
7e46296a AM |
11171 | + if (toi_cluster_ops.enabled) |
11172 | + strncpy(toi_cluster_iface, iface, strlen(iface)); | |
2380c486 | 11173 | +} |
7e46296a AM |
11174 | + |
11175 | +__setup("toi_cluster=", toi_cluster_iface_setup); | |
11176 | + | |
11177 | +#ifdef MODULE | |
11178 | +MODULE_LICENSE("GPL"); | |
11179 | +module_init(toi_cluster_init); | |
11180 | +module_exit(toi_cluster_exit); | |
11181 | +MODULE_AUTHOR("Nigel Cunningham"); | |
11182 | +MODULE_DESCRIPTION("Cluster Support for TuxOnIce"); | |
11183 | +#endif | |
11184 | diff --git a/kernel/power/tuxonice_cluster.h b/kernel/power/tuxonice_cluster.h | |
2380c486 | 11185 | new file mode 100644 |
5dd10c98 | 11186 | index 0000000..051feb3 |
2380c486 | 11187 | --- /dev/null |
7e46296a | 11188 | +++ b/kernel/power/tuxonice_cluster.h |
5dd10c98 | 11189 | @@ -0,0 +1,18 @@ |
2380c486 | 11190 | +/* |
7e46296a | 11191 | + * kernel/power/tuxonice_cluster.h |
2380c486 | 11192 | + * |
5dd10c98 | 11193 | + * Copyright (C) 2006-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
11194 | + * |
11195 | + * This file is released under the GPLv2. | |
2380c486 JR |
11196 | + */ |
11197 | + | |
7e46296a AM |
11198 | +#ifdef CONFIG_TOI_CLUSTER |
11199 | +extern int toi_cluster_init(void); | |
11200 | +extern void toi_cluster_exit(void); | |
11201 | +extern void toi_initiate_cluster_hibernate(void); | |
11202 | +#else | |
11203 | +static inline int toi_cluster_init(void) { return 0; } | |
11204 | +static inline void toi_cluster_exit(void) { } | |
11205 | +static inline void toi_initiate_cluster_hibernate(void) { } | |
2380c486 | 11206 | +#endif |
7e46296a AM |
11207 | + |
11208 | diff --git a/kernel/power/tuxonice_compress.c b/kernel/power/tuxonice_compress.c | |
2380c486 | 11209 | new file mode 100644 |
e876a0dd | 11210 | index 0000000..6bbc446 |
2380c486 | 11211 | --- /dev/null |
7e46296a | 11212 | +++ b/kernel/power/tuxonice_compress.c |
5dd10c98 | 11213 | @@ -0,0 +1,497 @@ |
2380c486 | 11214 | +/* |
7e46296a | 11215 | + * kernel/power/compression.c |
2380c486 | 11216 | + * |
5dd10c98 | 11217 | + * Copyright (C) 2003-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 | 11218 | + * |
7e46296a | 11219 | + * This file is released under the GPLv2. |
2380c486 | 11220 | + * |
7e46296a AM |
11221 | + * This file contains data compression routines for TuxOnIce, |
11222 | + * using cryptoapi. | |
2380c486 JR |
11223 | + */ |
11224 | + | |
11225 | +#include <linux/suspend.h> | |
7e46296a AM |
11226 | +#include <linux/highmem.h> |
11227 | +#include <linux/vmalloc.h> | |
11228 | +#include <linux/crypto.h> | |
2380c486 | 11229 | + |
7e46296a | 11230 | +#include "tuxonice_builtin.h" |
2380c486 | 11231 | +#include "tuxonice.h" |
2380c486 | 11232 | +#include "tuxonice_modules.h" |
7e46296a | 11233 | +#include "tuxonice_sysfs.h" |
2380c486 | 11234 | +#include "tuxonice_io.h" |
7e46296a | 11235 | +#include "tuxonice_ui.h" |
2380c486 | 11236 | +#include "tuxonice_alloc.h" |
2380c486 | 11237 | + |
7e46296a | 11238 | +static int toi_expected_compression; |
2380c486 | 11239 | + |
7e46296a AM |
11240 | +static struct toi_module_ops toi_compression_ops; |
11241 | +static struct toi_module_ops *next_driver; | |
2380c486 | 11242 | + |
7e46296a | 11243 | +static char toi_compressor_name[32] = "lzo"; |
2380c486 | 11244 | + |
7e46296a | 11245 | +static DEFINE_MUTEX(stats_lock); |
2380c486 | 11246 | + |
7e46296a AM |
11247 | +struct cpu_context { |
11248 | + u8 *page_buffer; | |
11249 | + struct crypto_comp *transform; | |
11250 | + unsigned int len; | |
11251 | + char *buffer_start; | |
11252 | + char *output_buffer; | |
5dd10c98 | 11253 | + char *check_buffer; |
7e46296a | 11254 | +}; |
2380c486 | 11255 | + |
7e46296a | 11256 | +static DEFINE_PER_CPU(struct cpu_context, contexts); |
5dd10c98 | 11257 | +static int toi_check_compression; |
2380c486 | 11258 | + |
7e46296a AM |
11259 | +/* |
11260 | + * toi_crypto_prepare | |
11261 | + * | |
11262 | + * Prepare to do some work by allocating buffers and transforms. | |
11263 | + */ | |
11264 | +static int toi_compress_crypto_prepare(void) | |
2380c486 | 11265 | +{ |
7e46296a | 11266 | + int cpu; |
2380c486 | 11267 | + |
7e46296a AM |
11268 | + if (!*toi_compressor_name) { |
11269 | + printk(KERN_INFO "TuxOnIce: Compression enabled but no " | |
11270 | + "compressor name set.\n"); | |
11271 | + return 1; | |
2380c486 JR |
11272 | + } |
11273 | + | |
7e46296a AM |
11274 | + for_each_online_cpu(cpu) { |
11275 | + struct cpu_context *this = &per_cpu(contexts, cpu); | |
11276 | + this->transform = crypto_alloc_comp(toi_compressor_name, 0, 0); | |
11277 | + if (IS_ERR(this->transform)) { | |
11278 | + printk(KERN_INFO "TuxOnIce: Failed to initialise the " | |
11279 | + "%s compression transform.\n", | |
11280 | + toi_compressor_name); | |
11281 | + this->transform = NULL; | |
11282 | + return 1; | |
11283 | + } | |
2380c486 | 11284 | + |
7e46296a AM |
11285 | + this->page_buffer = |
11286 | + (char *) toi_get_zeroed_page(16, TOI_ATOMIC_GFP); | |
2380c486 | 11287 | + |
7e46296a AM |
11288 | + if (!this->page_buffer) { |
11289 | + printk(KERN_ERR | |
11290 | + "Failed to allocate a page buffer for TuxOnIce " | |
11291 | + "compression driver.\n"); | |
11292 | + return -ENOMEM; | |
2380c486 JR |
11293 | + } |
11294 | + | |
7e46296a AM |
11295 | + this->output_buffer = |
11296 | + (char *) vmalloc_32(2 * PAGE_SIZE); | |
2380c486 | 11297 | + |
7e46296a AM |
11298 | + if (!this->output_buffer) { |
11299 | + printk(KERN_ERR | |
11300 | + "Failed to allocate a output buffer for TuxOnIce " | |
11301 | + "compression driver.\n"); | |
11302 | + return -ENOMEM; | |
2380c486 | 11303 | + } |
2380c486 | 11304 | + |
5dd10c98 AM |
11305 | + this->check_buffer = |
11306 | + (char *) toi_get_zeroed_page(16, TOI_ATOMIC_GFP); | |
11307 | + | |
11308 | + if (!this->check_buffer) { | |
11309 | + printk(KERN_ERR | |
11310 | + "Failed to allocate a check buffer for TuxOnIce " | |
11311 | + "compression driver.\n"); | |
11312 | + return -ENOMEM; | |
11313 | + } | |
11314 | + | |
11315 | + } | |
11316 | + | |
11317 | + return 0; | |
11318 | +} | |
11319 | + | |
11320 | +static int toi_compress_rw_cleanup(int writing) | |
11321 | +{ | |
11322 | + int cpu; | |
11323 | + | |
11324 | + for_each_online_cpu(cpu) { | |
11325 | + struct cpu_context *this = &per_cpu(contexts, cpu); | |
11326 | + if (this->transform) { | |
11327 | + crypto_free_comp(this->transform); | |
11328 | + this->transform = NULL; | |
11329 | + } | |
11330 | + | |
11331 | + if (this->page_buffer) | |
11332 | + toi_free_page(16, (unsigned long) this->page_buffer); | |
11333 | + | |
11334 | + this->page_buffer = NULL; | |
11335 | + | |
11336 | + if (this->output_buffer) | |
11337 | + vfree(this->output_buffer); | |
11338 | + | |
11339 | + this->output_buffer = NULL; | |
11340 | + | |
11341 | + if (this->check_buffer) | |
11342 | + toi_free_page(16, (unsigned long) this->check_buffer); | |
11343 | + | |
11344 | + this->check_buffer = NULL; | |
2380c486 JR |
11345 | + } |
11346 | + | |
7e46296a | 11347 | + return 0; |
2380c486 JR |
11348 | +} |
11349 | + | |
7e46296a AM |
11350 | +/* |
11351 | + * toi_compress_init | |
11352 | + */ | |
11353 | + | |
11354 | +static int toi_compress_init(int toi_or_resume) | |
2380c486 | 11355 | +{ |
7e46296a AM |
11356 | + if (!toi_or_resume) |
11357 | + return 0; | |
2380c486 | 11358 | + |
7e46296a AM |
11359 | + toi_compress_bytes_in = 0; |
11360 | + toi_compress_bytes_out = 0; | |
2380c486 | 11361 | + |
7e46296a | 11362 | + next_driver = toi_get_next_filter(&toi_compression_ops); |
2380c486 | 11363 | + |
5dd10c98 | 11364 | + return next_driver ? 0 : -ECHILD; |
2380c486 JR |
11365 | +} |
11366 | + | |
7e46296a AM |
11367 | +/* |
11368 | + * toi_compress_rw_init() | |
11369 | + */ | |
11370 | + | |
11371 | +static int toi_compress_rw_init(int rw, int stream_number) | |
2380c486 | 11372 | +{ |
5dd10c98 | 11373 | + if (toi_compress_crypto_prepare()) { |
7e46296a AM |
11374 | + printk(KERN_ERR "Failed to initialise compression " |
11375 | + "algorithm.\n"); | |
11376 | + if (rw == READ) { | |
11377 | + printk(KERN_INFO "Unable to read the image.\n"); | |
11378 | + return -ENODEV; | |
11379 | + } else { | |
11380 | + printk(KERN_INFO "Continuing without " | |
11381 | + "compressing the image.\n"); | |
11382 | + toi_compression_ops.enabled = 0; | |
11383 | + } | |
2380c486 | 11384 | + } |
7e46296a | 11385 | + |
5dd10c98 AM |
11386 | + return 0; |
11387 | +} | |
11388 | + | |
11389 | +static int check_compression(struct cpu_context *ctx, struct page *buffer_page, | |
11390 | + int buf_size) | |
11391 | +{ | |
11392 | + char *original = kmap(buffer_page); | |
11393 | + int output_size = PAGE_SIZE, okay, ret; | |
11394 | + | |
11395 | + ret = crypto_comp_decompress(ctx->transform, ctx->output_buffer, | |
11396 | + ctx->len, ctx->check_buffer, &output_size); | |
11397 | + okay = (!ret && output_size == PAGE_SIZE && | |
11398 | + !memcmp(ctx->check_buffer, original, PAGE_SIZE)); | |
11399 | + | |
11400 | + if (!okay) { | |
11401 | + printk("Compression test failed.\n"); | |
11402 | + print_hex_dump(KERN_ERR, "Original page: ", DUMP_PREFIX_NONE, | |
11403 | + 16, 1, original, PAGE_SIZE, 0); | |
11404 | + printk(KERN_ERR "\nOutput %d bytes. Result %d.", ctx->len, ret); | |
11405 | + print_hex_dump(KERN_ERR, "Compressed to: ", DUMP_PREFIX_NONE, | |
11406 | + 16, 1, ctx->output_buffer, ctx->len, 0); | |
11407 | + printk(KERN_ERR "\nRestored to %d bytes.\n", output_size); | |
11408 | + print_hex_dump(KERN_ERR, "Decompressed : ", DUMP_PREFIX_NONE, | |
11409 | + 16, 1, ctx->check_buffer, output_size, 0); | |
11410 | + } | |
11411 | + kunmap(buffer_page); | |
11412 | + | |
11413 | + return okay; | |
2380c486 JR |
11414 | +} |
11415 | + | |
7e46296a AM |
11416 | +/* |
11417 | + * toi_compress_write_page() | |
11418 | + * | |
11419 | + * Compress a page of data, buffering output and passing on filled | |
11420 | + * pages to the next module in the pipeline. | |
11421 | + * | |
11422 | + * Buffer_page: Pointer to a buffer of size PAGE_SIZE, containing | |
11423 | + * data to be compressed. | |
11424 | + * | |
11425 | + * Returns: 0 on success. Otherwise the error is that returned by later | |
11426 | + * modules, -ECHILD if we have a broken pipeline or -EIO if | |
11427 | + * zlib errs. | |
11428 | + */ | |
11429 | +static int toi_compress_write_page(unsigned long index, | |
11430 | + struct page *buffer_page, unsigned int buf_size) | |
2380c486 | 11431 | +{ |
7e46296a AM |
11432 | + int ret, cpu = smp_processor_id(); |
11433 | + struct cpu_context *ctx = &per_cpu(contexts, cpu); | |
2380c486 | 11434 | + |
7e46296a AM |
11435 | + if (!ctx->transform) |
11436 | + return next_driver->write_page(index, buffer_page, buf_size); | |
2380c486 | 11437 | + |
7e46296a | 11438 | + ctx->buffer_start = kmap(buffer_page); |
2380c486 | 11439 | + |
e876a0dd | 11440 | + ctx->len = PAGE_SIZE; |
92bca44c | 11441 | + |
7e46296a AM |
11442 | + ret = crypto_comp_compress(ctx->transform, |
11443 | + ctx->buffer_start, buf_size, | |
11444 | + ctx->output_buffer, &ctx->len); | |
2380c486 | 11445 | + |
7e46296a | 11446 | + kunmap(buffer_page); |
2380c486 | 11447 | + |
7e46296a AM |
11448 | + mutex_lock(&stats_lock); |
11449 | + toi_compress_bytes_in += buf_size; | |
11450 | + toi_compress_bytes_out += ctx->len; | |
11451 | + mutex_unlock(&stats_lock); | |
2380c486 | 11452 | + |
7e46296a | 11453 | + if (!ret && ctx->len < buf_size) { /* some compression */ |
5dd10c98 AM |
11454 | + if (unlikely(toi_check_compression)) { |
11455 | + ret = check_compression(ctx, buffer_page, buf_size); | |
11456 | + if (!ret) | |
11457 | + return next_driver->write_page(index, | |
11458 | + buffer_page, buf_size); | |
11459 | + } | |
11460 | + | |
7e46296a AM |
11461 | + memcpy(ctx->page_buffer, ctx->output_buffer, ctx->len); |
11462 | + return next_driver->write_page(index, | |
11463 | + virt_to_page(ctx->page_buffer), | |
11464 | + ctx->len); | |
2380c486 | 11465 | + } else |
7e46296a AM |
11466 | + return next_driver->write_page(index, buffer_page, buf_size); |
11467 | +} | |
2380c486 | 11468 | + |
7e46296a AM |
11469 | +/* |
11470 | + * toi_compress_read_page() | |
11471 | + * @buffer_page: struct page *. Pointer to a buffer of size PAGE_SIZE. | |
11472 | + * | |
11473 | + * Retrieve data from later modules and decompress it until the input buffer | |
11474 | + * is filled. | |
11475 | + * Zero if successful. Error condition from me or from downstream on failure. | |
11476 | + */ | |
11477 | +static int toi_compress_read_page(unsigned long *index, | |
11478 | + struct page *buffer_page, unsigned int *buf_size) | |
11479 | +{ | |
11480 | + int ret, cpu = smp_processor_id(); | |
11481 | + unsigned int len; | |
11482 | + unsigned int outlen = PAGE_SIZE; | |
11483 | + char *buffer_start; | |
11484 | + struct cpu_context *ctx = &per_cpu(contexts, cpu); | |
2380c486 | 11485 | + |
7e46296a AM |
11486 | + if (!ctx->transform) |
11487 | + return next_driver->read_page(index, buffer_page, buf_size); | |
2380c486 | 11488 | + |
7e46296a AM |
11489 | + /* |
11490 | + * All our reads must be synchronous - we can't decompress | |
11491 | + * data that hasn't been read yet. | |
11492 | + */ | |
2380c486 | 11493 | + |
7e46296a | 11494 | + ret = next_driver->read_page(index, buffer_page, &len); |
2380c486 | 11495 | + |
7e46296a AM |
11496 | + /* Error or uncompressed data */ |
11497 | + if (ret || len == PAGE_SIZE) | |
11498 | + return ret; | |
2380c486 | 11499 | + |
7e46296a AM |
11500 | + buffer_start = kmap(buffer_page); |
11501 | + memcpy(ctx->page_buffer, buffer_start, len); | |
11502 | + ret = crypto_comp_decompress( | |
11503 | + ctx->transform, | |
11504 | + ctx->page_buffer, | |
11505 | + len, buffer_start, &outlen); | |
11506 | + if (ret) | |
11507 | + abort_hibernate(TOI_FAILED_IO, | |
11508 | + "Compress_read returned %d.\n", ret); | |
11509 | + else if (outlen != PAGE_SIZE) { | |
11510 | + abort_hibernate(TOI_FAILED_IO, | |
11511 | + "Decompression yielded %d bytes instead of %ld.\n", | |
11512 | + outlen, PAGE_SIZE); | |
11513 | + printk(KERN_ERR "Decompression yielded %d bytes instead of " | |
11514 | + "%ld.\n", outlen, PAGE_SIZE); | |
11515 | + ret = -EIO; | |
11516 | + *buf_size = outlen; | |
11517 | + } | |
11518 | + kunmap(buffer_page); | |
11519 | + return ret; | |
2380c486 JR |
11520 | +} |
11521 | + | |
7e46296a AM |
11522 | +/* |
11523 | + * toi_compress_print_debug_stats | |
11524 | + * @buffer: Pointer to a buffer into which the debug info will be printed. | |
11525 | + * @size: Size of the buffer. | |
2380c486 | 11526 | + * |
7e46296a AM |
11527 | + * Print information to be recorded for debugging purposes into a buffer. |
11528 | + * Returns: Number of characters written to the buffer. | |
11529 | + */ | |
2380c486 | 11530 | + |
7e46296a AM |
11531 | +static int toi_compress_print_debug_stats(char *buffer, int size) |
11532 | +{ | |
11533 | + unsigned long pages_in = toi_compress_bytes_in >> PAGE_SHIFT, | |
11534 | + pages_out = toi_compress_bytes_out >> PAGE_SHIFT; | |
11535 | + int len; | |
2380c486 | 11536 | + |
7e46296a AM |
11537 | + /* Output the compression ratio achieved. */ |
11538 | + if (*toi_compressor_name) | |
11539 | + len = scnprintf(buffer, size, "- Compressor is '%s'.\n", | |
11540 | + toi_compressor_name); | |
2380c486 | 11541 | + else |
7e46296a | 11542 | + len = scnprintf(buffer, size, "- Compressor is not set.\n"); |
2380c486 | 11543 | + |
7e46296a AM |
11544 | + if (pages_in) |
11545 | + len += scnprintf(buffer+len, size - len, " Compressed " | |
11546 | + "%lu bytes into %lu (%ld percent compression).\n", | |
11547 | + toi_compress_bytes_in, | |
11548 | + toi_compress_bytes_out, | |
11549 | + (pages_in - pages_out) * 100 / pages_in); | |
11550 | + return len; | |
2380c486 JR |
11551 | +} |
11552 | + | |
7e46296a AM |
11553 | +/* |
11554 | + * toi_compress_compression_memory_needed | |
11555 | + * | |
11556 | + * Tell the caller how much memory we need to operate during hibernate/resume. | |
11557 | + * Returns: Unsigned long. Maximum number of bytes of memory required for | |
11558 | + * operation. | |
11559 | + */ | |
11560 | +static int toi_compress_memory_needed(void) | |
2380c486 | 11561 | +{ |
7e46296a | 11562 | + return 2 * PAGE_SIZE; |
2380c486 JR |
11563 | +} |
11564 | + | |
7e46296a | 11565 | +static int toi_compress_storage_needed(void) |
2380c486 | 11566 | +{ |
7e46296a | 11567 | + return 4 * sizeof(unsigned long) + strlen(toi_compressor_name) + 1; |
2380c486 JR |
11568 | +} |
11569 | + | |
7e46296a AM |
11570 | +/* |
11571 | + * toi_compress_save_config_info | |
11572 | + * @buffer: Pointer to a buffer of size PAGE_SIZE. | |
11573 | + * | |
11574 | + * Save informaton needed when reloading the image at resume time. | |
11575 | + * Returns: Number of bytes used for saving our data. | |
11576 | + */ | |
11577 | +static int toi_compress_save_config_info(char *buffer) | |
2380c486 | 11578 | +{ |
7e46296a AM |
11579 | + int namelen = strlen(toi_compressor_name) + 1; |
11580 | + int total_len; | |
2380c486 | 11581 | + |
7e46296a AM |
11582 | + *((unsigned long *) buffer) = toi_compress_bytes_in; |
11583 | + *((unsigned long *) (buffer + 1 * sizeof(unsigned long))) = | |
11584 | + toi_compress_bytes_out; | |
11585 | + *((unsigned long *) (buffer + 2 * sizeof(unsigned long))) = | |
11586 | + toi_expected_compression; | |
11587 | + *((unsigned long *) (buffer + 3 * sizeof(unsigned long))) = namelen; | |
11588 | + strncpy(buffer + 4 * sizeof(unsigned long), toi_compressor_name, | |
11589 | + namelen); | |
11590 | + total_len = 4 * sizeof(unsigned long) + namelen; | |
11591 | + return total_len; | |
2380c486 JR |
11592 | +} |
11593 | + | |
7e46296a AM |
11594 | +/* toi_compress_load_config_info |
11595 | + * @buffer: Pointer to the start of the data. | |
11596 | + * @size: Number of bytes that were saved. | |
11597 | + * | |
11598 | + * Description: Reload information needed for decompressing the image at | |
11599 | + * resume time. | |
11600 | + */ | |
11601 | +static void toi_compress_load_config_info(char *buffer, int size) | |
2380c486 | 11602 | +{ |
7e46296a | 11603 | + int namelen; |
2380c486 | 11604 | + |
7e46296a AM |
11605 | + toi_compress_bytes_in = *((unsigned long *) buffer); |
11606 | + toi_compress_bytes_out = *((unsigned long *) (buffer + 1 * | |
11607 | + sizeof(unsigned long))); | |
11608 | + toi_expected_compression = *((unsigned long *) (buffer + 2 * | |
11609 | + sizeof(unsigned long))); | |
11610 | + namelen = *((unsigned long *) (buffer + 3 * sizeof(unsigned long))); | |
11611 | + if (strncmp(toi_compressor_name, buffer + 4 * sizeof(unsigned long), | |
5dd10c98 | 11612 | + namelen)) |
7e46296a AM |
11613 | + strncpy(toi_compressor_name, buffer + 4 * sizeof(unsigned long), |
11614 | + namelen); | |
7e46296a | 11615 | + return; |
2380c486 JR |
11616 | +} |
11617 | + | |
5dd10c98 AM |
11618 | +static void toi_compress_pre_atomic_restore(struct toi_boot_kernel_data *bkd) |
11619 | +{ | |
11620 | + bkd->compress_bytes_in = toi_compress_bytes_in; | |
11621 | + bkd->compress_bytes_out = toi_compress_bytes_out; | |
11622 | +} | |
11623 | + | |
11624 | +static void toi_compress_post_atomic_restore(struct toi_boot_kernel_data *bkd) | |
11625 | +{ | |
11626 | + toi_compress_bytes_in = bkd->compress_bytes_in; | |
11627 | + toi_compress_bytes_out = bkd->compress_bytes_out; | |
11628 | +} | |
11629 | + | |
7e46296a AM |
11630 | +/* |
11631 | + * toi_expected_compression_ratio | |
11632 | + * | |
11633 | + * Description: Returns the expected ratio between data passed into this module | |
11634 | + * and the amount of data output when writing. | |
11635 | + * Returns: 100 if the module is disabled. Otherwise the value set by the | |
11636 | + * user via our sysfs entry. | |
11637 | + */ | |
2380c486 | 11638 | + |
7e46296a AM |
11639 | +static int toi_compress_expected_ratio(void) |
11640 | +{ | |
11641 | + if (!toi_compression_ops.enabled) | |
11642 | + return 100; | |
11643 | + else | |
11644 | + return 100 - toi_expected_compression; | |
11645 | +} | |
2380c486 | 11646 | + |
7e46296a AM |
11647 | +/* |
11648 | + * data for our sysfs entries. | |
11649 | + */ | |
11650 | +static struct toi_sysfs_data sysfs_params[] = { | |
11651 | + SYSFS_INT("expected_compression", SYSFS_RW, &toi_expected_compression, | |
11652 | + 0, 99, 0, NULL), | |
11653 | + SYSFS_INT("enabled", SYSFS_RW, &toi_compression_ops.enabled, 0, 1, 0, | |
11654 | + NULL), | |
5dd10c98 AM |
11655 | + SYSFS_INT("check", SYSFS_RW, &toi_check_compression, 0, 1, 0, |
11656 | + NULL), | |
7e46296a AM |
11657 | + SYSFS_STRING("algorithm", SYSFS_RW, toi_compressor_name, 31, 0, NULL), |
11658 | +}; | |
2380c486 | 11659 | + |
7e46296a AM |
11660 | +/* |
11661 | + * Ops structure. | |
11662 | + */ | |
11663 | +static struct toi_module_ops toi_compression_ops = { | |
11664 | + .type = FILTER_MODULE, | |
11665 | + .name = "compression", | |
11666 | + .directory = "compression", | |
11667 | + .module = THIS_MODULE, | |
11668 | + .initialise = toi_compress_init, | |
7e46296a AM |
11669 | + .memory_needed = toi_compress_memory_needed, |
11670 | + .print_debug_info = toi_compress_print_debug_stats, | |
11671 | + .save_config_info = toi_compress_save_config_info, | |
11672 | + .load_config_info = toi_compress_load_config_info, | |
11673 | + .storage_needed = toi_compress_storage_needed, | |
11674 | + .expected_compression = toi_compress_expected_ratio, | |
2380c486 | 11675 | + |
5dd10c98 AM |
11676 | + .pre_atomic_restore = toi_compress_pre_atomic_restore, |
11677 | + .post_atomic_restore = toi_compress_post_atomic_restore, | |
11678 | + | |
7e46296a | 11679 | + .rw_init = toi_compress_rw_init, |
5dd10c98 | 11680 | + .rw_cleanup = toi_compress_rw_cleanup, |
2380c486 | 11681 | + |
7e46296a AM |
11682 | + .write_page = toi_compress_write_page, |
11683 | + .read_page = toi_compress_read_page, | |
2380c486 | 11684 | + |
7e46296a AM |
11685 | + .sysfs_data = sysfs_params, |
11686 | + .num_sysfs_entries = sizeof(sysfs_params) / | |
11687 | + sizeof(struct toi_sysfs_data), | |
11688 | +}; | |
2380c486 | 11689 | + |
7e46296a | 11690 | +/* ---- Registration ---- */ |
2380c486 | 11691 | + |
7e46296a AM |
11692 | +static __init int toi_compress_load(void) |
11693 | +{ | |
11694 | + return toi_register_module(&toi_compression_ops); | |
2380c486 JR |
11695 | +} |
11696 | + | |
7e46296a AM |
11697 | +#ifdef MODULE |
11698 | +static __exit void toi_compress_unload(void) | |
2380c486 | 11699 | +{ |
7e46296a | 11700 | + toi_unregister_module(&toi_compression_ops); |
2380c486 JR |
11701 | +} |
11702 | + | |
7e46296a AM |
11703 | +module_init(toi_compress_load); |
11704 | +module_exit(toi_compress_unload); | |
11705 | +MODULE_LICENSE("GPL"); | |
11706 | +MODULE_AUTHOR("Nigel Cunningham"); | |
11707 | +MODULE_DESCRIPTION("Compression Support for TuxOnIce"); | |
11708 | +#else | |
11709 | +late_initcall(toi_compress_load); | |
11710 | +#endif | |
11711 | diff --git a/kernel/power/tuxonice_extent.c b/kernel/power/tuxonice_extent.c | |
11712 | new file mode 100644 | |
5dd10c98 | 11713 | index 0000000..e84572c |
7e46296a AM |
11714 | --- /dev/null |
11715 | +++ b/kernel/power/tuxonice_extent.c | |
11716 | @@ -0,0 +1,123 @@ | |
11717 | +/* | |
11718 | + * kernel/power/tuxonice_extent.c | |
11719 | + * | |
5dd10c98 | 11720 | + * Copyright (C) 2003-2010 Nigel Cunningham (nigel at tuxonice net) |
7e46296a AM |
11721 | + * |
11722 | + * Distributed under GPLv2. | |
11723 | + * | |
11724 | + * These functions encapsulate the manipulation of storage metadata. | |
11725 | + */ | |
11726 | + | |
11727 | +#include <linux/suspend.h> | |
11728 | +#include "tuxonice_modules.h" | |
11729 | +#include "tuxonice_extent.h" | |
11730 | +#include "tuxonice_alloc.h" | |
11731 | +#include "tuxonice_ui.h" | |
11732 | +#include "tuxonice.h" | |
2380c486 JR |
11733 | + |
11734 | +/** | |
7e46296a | 11735 | + * toi_get_extent - return a free extent |
2380c486 | 11736 | + * |
7e46296a | 11737 | + * May fail, returning NULL instead. |
2380c486 | 11738 | + **/ |
7e46296a | 11739 | +static struct hibernate_extent *toi_get_extent(void) |
2380c486 | 11740 | +{ |
7e46296a AM |
11741 | + return (struct hibernate_extent *) toi_kzalloc(2, |
11742 | + sizeof(struct hibernate_extent), TOI_ATOMIC_GFP); | |
11743 | +} | |
2380c486 | 11744 | + |
7e46296a AM |
11745 | +/** |
11746 | + * toi_put_extent_chain - free a whole chain of extents | |
11747 | + * @chain: Chain to free. | |
11748 | + **/ | |
11749 | +void toi_put_extent_chain(struct hibernate_extent_chain *chain) | |
11750 | +{ | |
11751 | + struct hibernate_extent *this; | |
2380c486 | 11752 | + |
7e46296a | 11753 | + this = chain->first; |
2380c486 | 11754 | + |
7e46296a AM |
11755 | + while (this) { |
11756 | + struct hibernate_extent *next = this->next; | |
11757 | + toi_kfree(2, this, sizeof(*this)); | |
11758 | + chain->num_extents--; | |
11759 | + this = next; | |
2380c486 JR |
11760 | + } |
11761 | + | |
7e46296a AM |
11762 | + chain->first = NULL; |
11763 | + chain->last_touched = NULL; | |
11764 | + chain->current_extent = NULL; | |
11765 | + chain->size = 0; | |
2380c486 | 11766 | +} |
7e46296a | 11767 | +EXPORT_SYMBOL_GPL(toi_put_extent_chain); |
2380c486 JR |
11768 | + |
11769 | +/** | |
7e46296a AM |
11770 | + * toi_add_to_extent_chain - add an extent to an existing chain |
11771 | + * @chain: Chain to which the extend should be added | |
11772 | + * @start: Start of the extent (first physical block) | |
11773 | + * @end: End of the extent (last physical block) | |
2380c486 | 11774 | + * |
7e46296a | 11775 | + * The chain information is updated if the insertion is successful. |
2380c486 | 11776 | + **/ |
7e46296a AM |
11777 | +int toi_add_to_extent_chain(struct hibernate_extent_chain *chain, |
11778 | + unsigned long start, unsigned long end) | |
2380c486 | 11779 | +{ |
7e46296a | 11780 | + struct hibernate_extent *new_ext = NULL, *cur_ext = NULL; |
2380c486 | 11781 | + |
7e46296a AM |
11782 | + toi_message(TOI_IO, TOI_VERBOSE, 0, |
11783 | + "Adding extent %lu-%lu to chain %p.\n", start, end, chain); | |
2380c486 | 11784 | + |
7e46296a AM |
11785 | + /* Find the right place in the chain */ |
11786 | + if (chain->last_touched && chain->last_touched->start < start) | |
11787 | + cur_ext = chain->last_touched; | |
11788 | + else if (chain->first && chain->first->start < start) | |
11789 | + cur_ext = chain->first; | |
2380c486 | 11790 | + |
7e46296a AM |
11791 | + if (cur_ext) { |
11792 | + while (cur_ext->next && cur_ext->next->start < start) | |
11793 | + cur_ext = cur_ext->next; | |
2380c486 | 11794 | + |
7e46296a AM |
11795 | + if (cur_ext->end == (start - 1)) { |
11796 | + struct hibernate_extent *next_ext = cur_ext->next; | |
11797 | + cur_ext->end = end; | |
2380c486 | 11798 | + |
7e46296a AM |
11799 | + /* Merge with the following one? */ |
11800 | + if (next_ext && cur_ext->end + 1 == next_ext->start) { | |
11801 | + cur_ext->end = next_ext->end; | |
11802 | + cur_ext->next = next_ext->next; | |
11803 | + toi_kfree(2, next_ext, sizeof(*next_ext)); | |
11804 | + chain->num_extents--; | |
11805 | + } | |
2380c486 | 11806 | + |
7e46296a AM |
11807 | + chain->last_touched = cur_ext; |
11808 | + chain->size += (end - start + 1); | |
2380c486 | 11809 | + |
7e46296a | 11810 | + return 0; |
2380c486 | 11811 | + } |
2380c486 JR |
11812 | + } |
11813 | + | |
7e46296a AM |
11814 | + new_ext = toi_get_extent(); |
11815 | + if (!new_ext) { | |
11816 | + printk(KERN_INFO "Error unable to append a new extent to the " | |
11817 | + "chain.\n"); | |
11818 | + return -ENOMEM; | |
2380c486 JR |
11819 | + } |
11820 | + | |
7e46296a AM |
11821 | + chain->num_extents++; |
11822 | + chain->size += (end - start + 1); | |
11823 | + new_ext->start = start; | |
11824 | + new_ext->end = end; | |
2380c486 | 11825 | + |
7e46296a | 11826 | + chain->last_touched = new_ext; |
2380c486 | 11827 | + |
7e46296a AM |
11828 | + if (cur_ext) { |
11829 | + new_ext->next = cur_ext->next; | |
11830 | + cur_ext->next = new_ext; | |
11831 | + } else { | |
11832 | + if (chain->first) | |
11833 | + new_ext->next = chain->first; | |
11834 | + chain->first = new_ext; | |
2380c486 JR |
11835 | + } |
11836 | + | |
7e46296a AM |
11837 | + return 0; |
11838 | +} | |
11839 | +EXPORT_SYMBOL_GPL(toi_add_to_extent_chain); | |
11840 | diff --git a/kernel/power/tuxonice_extent.h b/kernel/power/tuxonice_extent.h | |
11841 | new file mode 100644 | |
5dd10c98 | 11842 | index 0000000..157446c |
7e46296a AM |
11843 | --- /dev/null |
11844 | +++ b/kernel/power/tuxonice_extent.h | |
11845 | @@ -0,0 +1,44 @@ | |
11846 | +/* | |
11847 | + * kernel/power/tuxonice_extent.h | |
11848 | + * | |
5dd10c98 | 11849 | + * Copyright (C) 2003-2010 Nigel Cunningham (nigel at tuxonice net) |
7e46296a AM |
11850 | + * |
11851 | + * This file is released under the GPLv2. | |
11852 | + * | |
11853 | + * It contains declarations related to extents. Extents are | |
11854 | + * TuxOnIce's method of storing some of the metadata for the image. | |
11855 | + * See tuxonice_extent.c for more info. | |
11856 | + * | |
11857 | + */ | |
11858 | + | |
11859 | +#include "tuxonice_modules.h" | |
2380c486 | 11860 | + |
7e46296a AM |
11861 | +#ifndef EXTENT_H |
11862 | +#define EXTENT_H | |
2380c486 | 11863 | + |
7e46296a AM |
11864 | +struct hibernate_extent { |
11865 | + unsigned long start, end; | |
11866 | + struct hibernate_extent *next; | |
11867 | +}; | |
2380c486 | 11868 | + |
7e46296a AM |
11869 | +struct hibernate_extent_chain { |
11870 | + unsigned long size; /* size of the chain ie sum (max-min+1) */ | |
11871 | + int num_extents; | |
11872 | + struct hibernate_extent *first, *last_touched; | |
11873 | + struct hibernate_extent *current_extent; | |
11874 | + unsigned long current_offset; | |
11875 | +}; | |
11876 | + | |
11877 | +/* Simplify iterating through all the values in an extent chain */ | |
11878 | +#define toi_extent_for_each(extent_chain, extentpointer, value) \ | |
11879 | +if ((extent_chain)->first) \ | |
11880 | + for ((extentpointer) = (extent_chain)->first, (value) = \ | |
11881 | + (extentpointer)->start; \ | |
11882 | + ((extentpointer) && ((extentpointer)->next || (value) <= \ | |
11883 | + (extentpointer)->end)); \ | |
11884 | + (((value) == (extentpointer)->end) ? \ | |
11885 | + ((extentpointer) = (extentpointer)->next, (value) = \ | |
11886 | + ((extentpointer) ? (extentpointer)->start : 0)) : \ | |
11887 | + (value)++)) | |
11888 | + | |
11889 | +#endif | |
11890 | diff --git a/kernel/power/tuxonice_file.c b/kernel/power/tuxonice_file.c | |
11891 | new file mode 100644 | |
cacc47f8 | 11892 | index 0000000..7a4614a |
7e46296a AM |
11893 | --- /dev/null |
11894 | +++ b/kernel/power/tuxonice_file.c | |
5dd10c98 | 11895 | @@ -0,0 +1,496 @@ |
7e46296a AM |
11896 | +/* |
11897 | + * kernel/power/tuxonice_file.c | |
2380c486 | 11898 | + * |
5dd10c98 | 11899 | + * Copyright (C) 2005-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 | 11900 | + * |
7e46296a AM |
11901 | + * Distributed under GPLv2. |
11902 | + * | |
11903 | + * This file encapsulates functions for usage of a simple file as a | |
11904 | + * backing store. It is based upon the swapallocator, and shares the | |
11905 | + * same basic working. Here, though, we have nothing to do with | |
11906 | + * swapspace, and only one device to worry about. | |
11907 | + * | |
11908 | + * The user can just | |
11909 | + * | |
11910 | + * echo TuxOnIce > /path/to/my_file | |
11911 | + * | |
11912 | + * dd if=/dev/zero bs=1M count=<file_size_desired> >> /path/to/my_file | |
11913 | + * | |
11914 | + * and | |
11915 | + * | |
11916 | + * echo /path/to/my_file > /sys/power/tuxonice/file/target | |
11917 | + * | |
11918 | + * then put what they find in /sys/power/tuxonice/resume | |
11919 | + * as their resume= parameter in lilo.conf (and rerun lilo if using it). | |
11920 | + * | |
11921 | + * Having done this, they're ready to hibernate and resume. | |
11922 | + * | |
11923 | + * TODO: | |
11924 | + * - File resizing. | |
11925 | + */ | |
2380c486 | 11926 | + |
7e46296a AM |
11927 | +#include <linux/blkdev.h> |
11928 | +#include <linux/mount.h> | |
11929 | +#include <linux/fs.h> | |
cacc47f8 | 11930 | +#include <linux/fs_uuid.h> |
2380c486 | 11931 | + |
7e46296a AM |
11932 | +#include "tuxonice.h" |
11933 | +#include "tuxonice_modules.h" | |
11934 | +#include "tuxonice_bio.h" | |
11935 | +#include "tuxonice_alloc.h" | |
11936 | +#include "tuxonice_builtin.h" | |
11937 | +#include "tuxonice_sysfs.h" | |
11938 | +#include "tuxonice_ui.h" | |
11939 | +#include "tuxonice_io.h" | |
2380c486 | 11940 | + |
7e46296a AM |
11941 | +#define target_is_normal_file() (S_ISREG(target_inode->i_mode)) |
11942 | + | |
11943 | +static struct toi_module_ops toi_fileops; | |
11944 | + | |
11945 | +static struct file *target_file; | |
11946 | +static struct block_device *toi_file_target_bdev; | |
11947 | +static unsigned long pages_available, pages_allocated; | |
11948 | +static char toi_file_target[256]; | |
11949 | +static struct inode *target_inode; | |
11950 | +static int file_target_priority; | |
11951 | +static int used_devt; | |
11952 | +static int target_claim; | |
11953 | +static dev_t toi_file_dev_t; | |
11954 | +static int sig_page_index; | |
11955 | + | |
11956 | +/* For test_toi_file_target */ | |
11957 | +static struct toi_bdev_info *file_chain; | |
11958 | + | |
11959 | +static int has_contiguous_blocks(struct toi_bdev_info *dev_info, int page_num) | |
2380c486 | 11960 | +{ |
7e46296a AM |
11961 | + int j; |
11962 | + sector_t last = 0; | |
11963 | + | |
11964 | + for (j = 0; j < dev_info->blocks_per_page; j++) { | |
11965 | + sector_t this = bmap(target_inode, | |
11966 | + page_num * dev_info->blocks_per_page + j); | |
11967 | + | |
11968 | + if (!this || (last && (last + 1) != this)) | |
11969 | + break; | |
2380c486 | 11970 | + |
7e46296a | 11971 | + last = this; |
2380c486 JR |
11972 | + } |
11973 | + | |
7e46296a AM |
11974 | + return j == dev_info->blocks_per_page; |
11975 | +} | |
2380c486 | 11976 | + |
7e46296a AM |
11977 | +static unsigned long get_usable_pages(struct toi_bdev_info *dev_info) |
11978 | +{ | |
11979 | + unsigned long result = 0; | |
11980 | + struct block_device *bdev = dev_info->bdev; | |
11981 | + int i; | |
2380c486 | 11982 | + |
7e46296a AM |
11983 | + switch (target_inode->i_mode & S_IFMT) { |
11984 | + case S_IFSOCK: | |
11985 | + case S_IFCHR: | |
11986 | + case S_IFIFO: /* Socket, Char, Fifo */ | |
11987 | + return -1; | |
11988 | + case S_IFREG: /* Regular file: current size - holes + free | |
11989 | + space on part */ | |
11990 | + for (i = 0; i < (target_inode->i_size >> PAGE_SHIFT) ; i++) { | |
11991 | + if (has_contiguous_blocks(dev_info, i)) | |
11992 | + result++; | |
11993 | + } | |
11994 | + break; | |
11995 | + case S_IFBLK: /* Block device */ | |
11996 | + if (!bdev->bd_disk) { | |
11997 | + toi_message(TOI_IO, TOI_VERBOSE, 0, | |
11998 | + "bdev->bd_disk null."); | |
11999 | + return 0; | |
12000 | + } | |
2380c486 | 12001 | + |
7e46296a AM |
12002 | + result = (bdev->bd_part ? |
12003 | + bdev->bd_part->nr_sects : | |
12004 | + get_capacity(bdev->bd_disk)) >> (PAGE_SHIFT - 9); | |
12005 | + } | |
2380c486 | 12006 | + |
2380c486 | 12007 | + |
7e46296a | 12008 | + return result; |
2380c486 JR |
12009 | +} |
12010 | + | |
7e46296a | 12011 | +static int toi_file_register_storage(void) |
2380c486 | 12012 | +{ |
7e46296a | 12013 | + struct toi_bdev_info *devinfo; |
5dd10c98 AM |
12014 | + int result = 0; |
12015 | + struct fs_info *fs_info; | |
7e46296a AM |
12016 | + |
12017 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_file_register_storage."); | |
12018 | + if (!strlen(toi_file_target)) { | |
12019 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Register file storage: " | |
12020 | + "No target filename set."); | |
12021 | + return 0; | |
12022 | + } | |
12023 | + | |
12024 | + target_file = filp_open(toi_file_target, O_RDONLY|O_LARGEFILE, 0); | |
12025 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "filp_open %s returned %p.", | |
12026 | + toi_file_target, target_file); | |
12027 | + | |
12028 | + if (IS_ERR(target_file) || !target_file) { | |
12029 | + target_file = NULL; | |
12030 | + toi_file_dev_t = name_to_dev_t(toi_file_target); | |
12031 | + if (!toi_file_dev_t) { | |
12032 | + struct kstat stat; | |
12033 | + int error = vfs_stat(toi_file_target, &stat); | |
12034 | + printk(KERN_INFO "Open file %s returned %p and " | |
12035 | + "name_to_devt failed.\n", | |
12036 | + toi_file_target, target_file); | |
12037 | + if (error) { | |
12038 | + printk(KERN_INFO "Stating the file also failed." | |
12039 | + " Nothing more we can do.\n"); | |
12040 | + return 0; | |
12041 | + } else | |
12042 | + toi_file_dev_t = stat.rdev; | |
12043 | + } | |
2380c486 | 12044 | + |
5dd10c98 | 12045 | + toi_file_target_bdev = toi_open_by_devnum(toi_file_dev_t); |
7e46296a AM |
12046 | + if (IS_ERR(toi_file_target_bdev)) { |
12047 | + printk(KERN_INFO "Got a dev_num (%lx) but failed to " | |
12048 | + "open it.\n", | |
12049 | + (unsigned long) toi_file_dev_t); | |
12050 | + toi_file_target_bdev = NULL; | |
12051 | + return 0; | |
12052 | + } | |
12053 | + used_devt = 1; | |
12054 | + target_inode = toi_file_target_bdev->bd_inode; | |
12055 | + } else | |
12056 | + target_inode = target_file->f_mapping->host; | |
2380c486 | 12057 | + |
7e46296a AM |
12058 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Succeeded in opening the target."); |
12059 | + if (S_ISLNK(target_inode->i_mode) || S_ISDIR(target_inode->i_mode) || | |
12060 | + S_ISSOCK(target_inode->i_mode) || S_ISFIFO(target_inode->i_mode)) { | |
12061 | + printk(KERN_INFO "File support works with regular files," | |
12062 | + " character files and block devices.\n"); | |
12063 | + /* Cleanup routine will undo the above */ | |
2380c486 JR |
12064 | + return 0; |
12065 | + } | |
12066 | + | |
7e46296a AM |
12067 | + if (!used_devt) { |
12068 | + if (S_ISBLK(target_inode->i_mode)) { | |
12069 | + toi_file_target_bdev = I_BDEV(target_inode); | |
12070 | + if (!bd_claim(toi_file_target_bdev, &toi_fileops)) | |
12071 | + target_claim = 1; | |
12072 | + } else | |
12073 | + toi_file_target_bdev = target_inode->i_sb->s_bdev; | |
5dd10c98 AM |
12074 | + if (!toi_file_target_bdev) { |
12075 | + printk(KERN_INFO "%s is not a valid file allocator " | |
12076 | + "target.\n", toi_file_target); | |
12077 | + return 0; | |
12078 | + } | |
7e46296a AM |
12079 | + toi_file_dev_t = toi_file_target_bdev->bd_dev; |
12080 | + } | |
2380c486 | 12081 | + |
7e46296a AM |
12082 | + devinfo = toi_kzalloc(39, sizeof(struct toi_bdev_info), GFP_ATOMIC); |
12083 | + if (!devinfo) { | |
12084 | + printk("Failed to allocate a toi_bdev_info struct for the file allocator.\n"); | |
12085 | + return -ENOMEM; | |
12086 | + } | |
2380c486 | 12087 | + |
7e46296a AM |
12088 | + devinfo->bdev = toi_file_target_bdev; |
12089 | + devinfo->allocator = &toi_fileops; | |
12090 | + devinfo->allocator_index = 0; | |
2380c486 | 12091 | + |
5dd10c98 AM |
12092 | + fs_info = fs_info_from_block_dev(toi_file_target_bdev); |
12093 | + if (fs_info && !IS_ERR(fs_info)) { | |
12094 | + memcpy(devinfo->uuid, &fs_info->uuid, 16); | |
12095 | + free_fs_info(fs_info); | |
12096 | + } else | |
12097 | + result = (int) PTR_ERR(fs_info); | |
12098 | + | |
12099 | + /* Unlike swap code, only complain if fs_info_from_block_dev returned | |
12100 | + * -ENOMEM. The 'file' might be a full partition, so might validly not | |
12101 | + * have an identifiable type, UUID etc. | |
12102 | + */ | |
7e46296a | 12103 | + if (result) |
5dd10c98 | 12104 | + printk(KERN_DEBUG "Failed to get fs_info for file device (%d).\n", |
7e46296a AM |
12105 | + result); |
12106 | + devinfo->dev_t = toi_file_dev_t; | |
12107 | + devinfo->prio = file_target_priority; | |
12108 | + devinfo->bmap_shift = target_inode->i_blkbits - 9; | |
12109 | + devinfo->blocks_per_page = | |
12110 | + (1 << (PAGE_SHIFT - target_inode->i_blkbits)); | |
5dd10c98 | 12111 | + sprintf(devinfo->name, "file %s", toi_file_target); |
7e46296a AM |
12112 | + file_chain = devinfo; |
12113 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Dev_t is %lx. Prio is %d. Bmap " | |
12114 | + "shift is %d. Blocks per page %d.", | |
12115 | + devinfo->dev_t, devinfo->prio, devinfo->bmap_shift, | |
12116 | + devinfo->blocks_per_page); | |
12117 | + | |
12118 | + /* Keep one aside for the signature */ | |
12119 | + pages_available = get_usable_pages(devinfo) - 1; | |
12120 | + | |
12121 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Registering file storage, %lu " | |
12122 | + "pages.", pages_available); | |
12123 | + | |
12124 | + toi_bio_ops.register_storage(devinfo); | |
12125 | + return 0; | |
12126 | +} | |
2380c486 | 12127 | + |
7e46296a AM |
12128 | +static unsigned long toi_file_storage_available(void) |
12129 | +{ | |
12130 | + return pages_available; | |
2380c486 JR |
12131 | +} |
12132 | + | |
7e46296a AM |
12133 | +static int toi_file_allocate_storage(struct toi_bdev_info *chain, |
12134 | + unsigned long request) | |
2380c486 | 12135 | +{ |
7e46296a AM |
12136 | + unsigned long available = pages_available - pages_allocated; |
12137 | + unsigned long to_add = min(available, request); | |
2380c486 | 12138 | + |
7e46296a AM |
12139 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Pages available is %lu. Allocated " |
12140 | + "is %lu. Allocating %lu pages from file.", | |
12141 | + pages_available, pages_allocated, to_add); | |
12142 | + pages_allocated += to_add; | |
2380c486 | 12143 | + |
7e46296a | 12144 | + return to_add; |
2380c486 JR |
12145 | +} |
12146 | + | |
12147 | +/** | |
7e46296a AM |
12148 | + * __populate_block_list - add an extent to the chain |
12149 | + * @min: Start of the extent (first physical block = sector) | |
12150 | + * @max: End of the extent (last physical block = sector) | |
2380c486 | 12151 | + * |
7e46296a AM |
12152 | + * If TOI_TEST_BIO is set, print a debug message, outputting the min and max |
12153 | + * fs block numbers. | |
2380c486 | 12154 | + **/ |
7e46296a | 12155 | +static int __populate_block_list(struct toi_bdev_info *chain, int min, int max) |
2380c486 | 12156 | +{ |
7e46296a AM |
12157 | + if (test_action_state(TOI_TEST_BIO)) |
12158 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Adding extent %d-%d.", | |
12159 | + min << chain->bmap_shift, | |
12160 | + ((max + 1) << chain->bmap_shift) - 1); | |
2380c486 | 12161 | + |
7e46296a AM |
12162 | + return toi_add_to_extent_chain(&chain->blocks, min, max); |
12163 | +} | |
2380c486 | 12164 | + |
7e46296a AM |
12165 | +static int get_main_pool_phys_params(struct toi_bdev_info *chain) |
12166 | +{ | |
12167 | + int i, extent_min = -1, extent_max = -1, result = 0, have_sig_page = 0; | |
12168 | + unsigned long pages_mapped = 0; | |
2380c486 | 12169 | + |
7e46296a | 12170 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Getting file allocator blocks."); |
2380c486 | 12171 | + |
7e46296a AM |
12172 | + if (chain->blocks.first) |
12173 | + toi_put_extent_chain(&chain->blocks); | |
2380c486 | 12174 | + |
7e46296a AM |
12175 | + if (!target_is_normal_file()) { |
12176 | + result = (pages_available > 0) ? | |
12177 | + __populate_block_list(chain, chain->blocks_per_page, | |
12178 | + (pages_allocated + 1) * | |
12179 | + chain->blocks_per_page - 1) : 0; | |
12180 | + return result; | |
2380c486 JR |
12181 | + } |
12182 | + | |
12183 | + /* | |
7e46296a AM |
12184 | + * FIXME: We are assuming the first page is contiguous. Is that |
12185 | + * assumption always right? | |
2380c486 JR |
12186 | + */ |
12187 | + | |
7e46296a AM |
12188 | + for (i = 0; i < (target_inode->i_size >> PAGE_SHIFT); i++) { |
12189 | + sector_t new_sector; | |
2380c486 | 12190 | + |
7e46296a AM |
12191 | + if (!has_contiguous_blocks(chain, i)) |
12192 | + continue; | |
2380c486 | 12193 | + |
7e46296a AM |
12194 | + if (!have_sig_page) { |
12195 | + have_sig_page = 1; | |
12196 | + sig_page_index = i; | |
12197 | + continue; | |
2380c486 | 12198 | + } |
2380c486 | 12199 | + |
7e46296a | 12200 | + pages_mapped++; |
2380c486 | 12201 | + |
7e46296a AM |
12202 | + /* Ignore first page - it has the header */ |
12203 | + if (pages_mapped == 1) | |
12204 | + continue; | |
2380c486 | 12205 | + |
7e46296a | 12206 | + new_sector = bmap(target_inode, (i * chain->blocks_per_page)); |
2380c486 | 12207 | + |
7e46296a AM |
12208 | + /* |
12209 | + * I'd love to be able to fill in holes and resize | |
12210 | + * files, but not yet... | |
12211 | + */ | |
2380c486 | 12212 | + |
7e46296a AM |
12213 | + if (new_sector == extent_max + 1) |
12214 | + extent_max += chain->blocks_per_page; | |
12215 | + else { | |
12216 | + if (extent_min > -1) { | |
12217 | + result = __populate_block_list(chain, | |
12218 | + extent_min, extent_max); | |
12219 | + if (result) | |
12220 | + return result; | |
12221 | + } | |
2380c486 | 12222 | + |
7e46296a AM |
12223 | + extent_min = new_sector; |
12224 | + extent_max = extent_min + | |
12225 | + chain->blocks_per_page - 1; | |
12226 | + } | |
2380c486 | 12227 | + |
7e46296a AM |
12228 | + if (pages_mapped == pages_allocated) |
12229 | + break; | |
12230 | + } | |
2380c486 | 12231 | + |
7e46296a AM |
12232 | + if (extent_min > -1) { |
12233 | + result = __populate_block_list(chain, extent_min, extent_max); | |
12234 | + if (result) | |
12235 | + return result; | |
12236 | + } | |
2380c486 | 12237 | + |
7e46296a | 12238 | + return 0; |
2380c486 JR |
12239 | +} |
12240 | + | |
7e46296a | 12241 | +static void toi_file_free_storage(struct toi_bdev_info *chain) |
2380c486 | 12242 | +{ |
7e46296a AM |
12243 | + pages_allocated = 0; |
12244 | + file_chain = NULL; | |
2380c486 JR |
12245 | +} |
12246 | + | |
12247 | +/** | |
7e46296a AM |
12248 | + * toi_file_print_debug_stats - print debug info |
12249 | + * @buffer: Buffer to data to populate | |
12250 | + * @size: Size of the buffer | |
2380c486 | 12251 | + **/ |
7e46296a | 12252 | +static int toi_file_print_debug_stats(char *buffer, int size) |
2380c486 | 12253 | +{ |
7e46296a AM |
12254 | + int len = scnprintf(buffer, size, "- File Allocator active.\n"); |
12255 | + | |
12256 | + len += scnprintf(buffer+len, size-len, " Storage available for " | |
12257 | + "image: %lu pages.\n", pages_available); | |
12258 | + | |
12259 | + return len; | |
2380c486 JR |
12260 | +} |
12261 | + | |
7e46296a | 12262 | +static void toi_file_cleanup(int finishing_cycle) |
2380c486 | 12263 | +{ |
7e46296a AM |
12264 | + if (toi_file_target_bdev) { |
12265 | + if (target_claim) { | |
12266 | + bd_release(toi_file_target_bdev); | |
12267 | + target_claim = 0; | |
12268 | + } | |
2380c486 | 12269 | + |
7e46296a AM |
12270 | + if (used_devt) { |
12271 | + blkdev_put(toi_file_target_bdev, | |
12272 | + FMODE_READ | FMODE_NDELAY); | |
12273 | + used_devt = 0; | |
2380c486 | 12274 | + } |
7e46296a AM |
12275 | + toi_file_target_bdev = NULL; |
12276 | + target_inode = NULL; | |
12277 | + } | |
12278 | + | |
12279 | + if (target_file) { | |
12280 | + filp_close(target_file, NULL); | |
12281 | + target_file = NULL; | |
2380c486 JR |
12282 | + } |
12283 | + | |
7e46296a AM |
12284 | + pages_available = 0; |
12285 | +} | |
2380c486 | 12286 | + |
7e46296a AM |
12287 | +/** |
12288 | + * test_toi_file_target - sysfs callback for /sys/power/tuxonince/file/target | |
12289 | + * | |
12290 | + * Test wheter the target file is valid for hibernating. | |
12291 | + **/ | |
12292 | +static void test_toi_file_target(void) | |
12293 | +{ | |
12294 | + int result = toi_file_register_storage(); | |
12295 | + sector_t sector; | |
de6743ae | 12296 | + char buf[50]; |
5dd10c98 | 12297 | + struct fs_info *fs_info; |
7e46296a | 12298 | + |
5dd10c98 | 12299 | + if (result || !file_chain) |
7e46296a AM |
12300 | + return; |
12301 | + | |
12302 | + /* This doesn't mean we're in business. Is any storage available? */ | |
12303 | + if (!pages_available) | |
12304 | + goto out; | |
12305 | + | |
12306 | + toi_file_allocate_storage(file_chain, 1); | |
12307 | + result = get_main_pool_phys_params(file_chain); | |
12308 | + if (result) | |
12309 | + goto out; | |
12310 | + | |
12311 | + | |
12312 | + sector = bmap(target_inode, sig_page_index * | |
12313 | + file_chain->blocks_per_page) << file_chain->bmap_shift; | |
12314 | + | |
12315 | + /* Use the uuid, or the dev_t if that fails */ | |
5dd10c98 AM |
12316 | + fs_info = fs_info_from_block_dev(toi_file_target_bdev); |
12317 | + if (!fs_info || IS_ERR(fs_info)) { | |
7e46296a AM |
12318 | + bdevname(toi_file_target_bdev, buf); |
12319 | + sprintf(resume_file, "/dev/%s:%llu", buf, | |
12320 | + (unsigned long long) sector); | |
12321 | + } else { | |
12322 | + int i; | |
5dd10c98 | 12323 | + hex_dump_to_buffer(fs_info->uuid, 16, 32, 1, buf, 50, 0); |
7e46296a AM |
12324 | + |
12325 | + /* Remove the spaces */ | |
12326 | + for (i = 1; i < 16; i++) { | |
12327 | + buf[2 * i] = buf[3 * i]; | |
12328 | + buf[2 * i + 1] = buf[3 * i + 1]; | |
12329 | + } | |
12330 | + buf[32] = 0; | |
5dd10c98 | 12331 | + sprintf(resume_file, "UUID=%s:0x%llx", buf, |
7e46296a | 12332 | + (unsigned long long) sector); |
5dd10c98 | 12333 | + free_fs_info(fs_info); |
2380c486 JR |
12334 | + } |
12335 | + | |
7e46296a AM |
12336 | + toi_attempt_to_parse_resume_device(0); |
12337 | +out: | |
12338 | + toi_file_free_storage(file_chain); | |
12339 | + toi_bio_ops.free_storage(); | |
2380c486 JR |
12340 | +} |
12341 | + | |
12342 | +static struct toi_sysfs_data sysfs_params[] = { | |
2380c486 JR |
12343 | + SYSFS_STRING("target", SYSFS_RW, toi_file_target, 256, |
12344 | + SYSFS_NEEDS_SM_FOR_WRITE, test_toi_file_target), | |
7e46296a AM |
12345 | + SYSFS_INT("enabled", SYSFS_RW, &toi_fileops.enabled, 0, 1, 0, NULL), |
12346 | + SYSFS_INT("priority", SYSFS_RW, &file_target_priority, -4095, | |
12347 | + 4096, 0, NULL), | |
12348 | +}; | |
12349 | + | |
12350 | +static struct toi_bio_allocator_ops toi_bio_fileops = { | |
12351 | + .register_storage = toi_file_register_storage, | |
12352 | + .storage_available = toi_file_storage_available, | |
12353 | + .allocate_storage = toi_file_allocate_storage, | |
12354 | + .bmap = get_main_pool_phys_params, | |
12355 | + .free_storage = toi_file_free_storage, | |
2380c486 JR |
12356 | +}; |
12357 | + | |
12358 | +static struct toi_module_ops toi_fileops = { | |
7e46296a | 12359 | + .type = BIO_ALLOCATOR_MODULE, |
2380c486 JR |
12360 | + .name = "file storage", |
12361 | + .directory = "file", | |
12362 | + .module = THIS_MODULE, | |
12363 | + .print_debug_info = toi_file_print_debug_stats, | |
2380c486 | 12364 | + .cleanup = toi_file_cleanup, |
7e46296a | 12365 | + .bio_allocator_ops = &toi_bio_fileops, |
2380c486 JR |
12366 | + |
12367 | + .sysfs_data = sysfs_params, | |
12368 | + .num_sysfs_entries = sizeof(sysfs_params) / | |
12369 | + sizeof(struct toi_sysfs_data), | |
12370 | +}; | |
12371 | + | |
12372 | +/* ---- Registration ---- */ | |
12373 | +static __init int toi_file_load(void) | |
12374 | +{ | |
2380c486 JR |
12375 | + return toi_register_module(&toi_fileops); |
12376 | +} | |
12377 | + | |
12378 | +#ifdef MODULE | |
12379 | +static __exit void toi_file_unload(void) | |
12380 | +{ | |
12381 | + toi_unregister_module(&toi_fileops); | |
12382 | +} | |
12383 | + | |
12384 | +module_init(toi_file_load); | |
12385 | +module_exit(toi_file_unload); | |
12386 | +MODULE_LICENSE("GPL"); | |
12387 | +MODULE_AUTHOR("Nigel Cunningham"); | |
12388 | +MODULE_DESCRIPTION("TuxOnIce FileAllocator"); | |
12389 | +#else | |
12390 | +late_initcall(toi_file_load); | |
12391 | +#endif | |
12392 | diff --git a/kernel/power/tuxonice_highlevel.c b/kernel/power/tuxonice_highlevel.c | |
12393 | new file mode 100644 | |
5dd10c98 | 12394 | index 0000000..c4bbb49 |
2380c486 JR |
12395 | --- /dev/null |
12396 | +++ b/kernel/power/tuxonice_highlevel.c | |
7e46296a | 12397 | @@ -0,0 +1,1313 @@ |
2380c486 JR |
12398 | +/* |
12399 | + * kernel/power/tuxonice_highlevel.c | |
12400 | + */ | |
12401 | +/** \mainpage TuxOnIce. | |
12402 | + * | |
12403 | + * TuxOnIce provides support for saving and restoring an image of | |
12404 | + * system memory to an arbitrary storage device, either on the local computer, | |
12405 | + * or across some network. The support is entirely OS based, so TuxOnIce | |
12406 | + * works without requiring BIOS, APM or ACPI support. The vast majority of the | |
12407 | + * code is also architecture independant, so it should be very easy to port | |
12408 | + * the code to new architectures. TuxOnIce includes support for SMP, 4G HighMem | |
12409 | + * and preemption. Initramfses and initrds are also supported. | |
12410 | + * | |
12411 | + * TuxOnIce uses a modular design, in which the method of storing the image is | |
12412 | + * completely abstracted from the core code, as are transformations on the data | |
12413 | + * such as compression and/or encryption (multiple 'modules' can be used to | |
12414 | + * provide arbitrary combinations of functionality). The user interface is also | |
12415 | + * modular, so that arbitrarily simple or complex interfaces can be used to | |
12416 | + * provide anything from debugging information through to eye candy. | |
12417 | + * | |
12418 | + * \section Copyright | |
12419 | + * | |
12420 | + * TuxOnIce is released under the GPLv2. | |
12421 | + * | |
12422 | + * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu><BR> | |
12423 | + * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz><BR> | |
12424 | + * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr><BR> | |
5dd10c98 | 12425 | + * Copyright (C) 2002-2010 Nigel Cunningham (nigel at tuxonice net)<BR> |
2380c486 JR |
12426 | + * |
12427 | + * \section Credits | |
12428 | + * | |
12429 | + * Nigel would like to thank the following people for their work: | |
12430 | + * | |
12431 | + * Bernard Blackham <bernard@blackham.com.au><BR> | |
12432 | + * Web page & Wiki administration, some coding. A person without whom | |
12433 | + * TuxOnIce would not be where it is. | |
12434 | + * | |
12435 | + * Michael Frank <mhf@linuxmail.org><BR> | |
12436 | + * Extensive testing and help with improving stability. I was constantly | |
12437 | + * amazed by the quality and quantity of Michael's help. | |
12438 | + * | |
12439 | + * Pavel Machek <pavel@ucw.cz><BR> | |
12440 | + * Modifications, defectiveness pointing, being with Gabor at the very | |
12441 | + * beginning, suspend to swap space, stop all tasks. Port to 2.4.18-ac and | |
12442 | + * 2.5.17. Even though Pavel and I disagree on the direction suspend to | |
12443 | + * disk should take, I appreciate the valuable work he did in helping Gabor | |
12444 | + * get the concept working. | |
12445 | + * | |
12446 | + * ..and of course the myriads of TuxOnIce users who have helped diagnose | |
12447 | + * and fix bugs, made suggestions on how to improve the code, proofread | |
12448 | + * documentation, and donated time and money. | |
12449 | + * | |
12450 | + * Thanks also to corporate sponsors: | |
12451 | + * | |
12452 | + * <B>Redhat.</B>Sometime employer from May 2006 (my fault, not Redhat's!). | |
12453 | + * | |
12454 | + * <B>Cyclades.com.</B> Nigel's employers from Dec 2004 until May 2006, who | |
12455 | + * allowed him to work on TuxOnIce and PM related issues on company time. | |
12456 | + * | |
12457 | + * <B>LinuxFund.org.</B> Sponsored Nigel's work on TuxOnIce for four months Oct | |
12458 | + * 2003 to Jan 2004. | |
12459 | + * | |
12460 | + * <B>LAC Linux.</B> Donated P4 hardware that enabled development and ongoing | |
12461 | + * maintenance of SMP and Highmem support. | |
12462 | + * | |
12463 | + * <B>OSDL.</B> Provided access to various hardware configurations, make | |
12464 | + * occasional small donations to the project. | |
12465 | + */ | |
12466 | + | |
12467 | +#include <linux/suspend.h> | |
2380c486 | 12468 | +#include <linux/freezer.h> |
5dd10c98 | 12469 | +#include <generated/utsrelease.h> |
2380c486 JR |
12470 | +#include <linux/cpu.h> |
12471 | +#include <linux/console.h> | |
12472 | +#include <linux/writeback.h> | |
12473 | +#include <linux/uaccess.h> /* for get/set_fs & KERNEL_DS on i386 */ | |
7e46296a | 12474 | +#include <linux/bio.h> |
2380c486 JR |
12475 | + |
12476 | +#include "tuxonice.h" | |
12477 | +#include "tuxonice_modules.h" | |
12478 | +#include "tuxonice_sysfs.h" | |
12479 | +#include "tuxonice_prepare_image.h" | |
12480 | +#include "tuxonice_io.h" | |
12481 | +#include "tuxonice_ui.h" | |
12482 | +#include "tuxonice_power_off.h" | |
12483 | +#include "tuxonice_storage.h" | |
12484 | +#include "tuxonice_checksum.h" | |
12485 | +#include "tuxonice_builtin.h" | |
12486 | +#include "tuxonice_atomic_copy.h" | |
12487 | +#include "tuxonice_alloc.h" | |
12488 | +#include "tuxonice_cluster.h" | |
12489 | + | |
12490 | +/*! Pageset metadata. */ | |
12491 | +struct pagedir pagedir2 = {2}; | |
12492 | +EXPORT_SYMBOL_GPL(pagedir2); | |
12493 | + | |
12494 | +static mm_segment_t oldfs; | |
12495 | +static DEFINE_MUTEX(tuxonice_in_use); | |
12496 | +static int block_dump_save; | |
2380c486 JR |
12497 | + |
12498 | +/* Binary signature if an image is present */ | |
7e46296a | 12499 | +char tuxonice_signature[9] = "\xed\xc3\x02\xe9\x98\x56\xe5\x0c"; |
2380c486 JR |
12500 | +EXPORT_SYMBOL_GPL(tuxonice_signature); |
12501 | + | |
2380c486 JR |
12502 | +unsigned long boot_kernel_data_buffer; |
12503 | + | |
12504 | +static char *result_strings[] = { | |
5dd10c98 | 12505 | + "Hibernation was aborted", |
2380c486 JR |
12506 | + "The user requested that we cancel the hibernation", |
12507 | + "No storage was available", | |
12508 | + "Insufficient storage was available", | |
12509 | + "Freezing filesystems and/or tasks failed", | |
12510 | + "A pre-existing image was used", | |
12511 | + "We would free memory, but image size limit doesn't allow this", | |
12512 | + "Unable to free enough memory to hibernate", | |
12513 | + "Unable to obtain the Power Management Semaphore", | |
12514 | + "A device suspend/resume returned an error", | |
12515 | + "A system device suspend/resume returned an error", | |
12516 | + "The extra pages allowance is too small", | |
12517 | + "We were unable to successfully prepare an image", | |
12518 | + "TuxOnIce module initialisation failed", | |
12519 | + "TuxOnIce module cleanup failed", | |
12520 | + "I/O errors were encountered", | |
12521 | + "Ran out of memory", | |
12522 | + "An error was encountered while reading the image", | |
12523 | + "Platform preparation failed", | |
12524 | + "CPU Hotplugging failed", | |
12525 | + "Architecture specific preparation failed", | |
12526 | + "Pages needed resaving, but we were told to abort if this happens", | |
12527 | + "We can't hibernate at the moment (invalid resume= or filewriter " | |
12528 | + "target?)", | |
12529 | + "A hibernation preparation notifier chain member cancelled the " | |
12530 | + "hibernation", | |
12531 | + "Pre-snapshot preparation failed", | |
12532 | + "Pre-restore preparation failed", | |
12533 | + "Failed to disable usermode helpers", | |
12534 | + "Can't resume from alternate image", | |
0ada99ac | 12535 | + "Header reservation too small", |
2380c486 JR |
12536 | +}; |
12537 | + | |
12538 | +/** | |
12539 | + * toi_finish_anything - cleanup after doing anything | |
12540 | + * @hibernate_or_resume: Whether finishing a cycle or attempt at | |
12541 | + * resuming. | |
12542 | + * | |
12543 | + * This is our basic clean-up routine, matching start_anything below. We | |
12544 | + * call cleanup routines, drop module references and restore process fs and | |
12545 | + * cpus allowed masks, together with the global block_dump variable's value. | |
12546 | + **/ | |
12547 | +void toi_finish_anything(int hibernate_or_resume) | |
12548 | +{ | |
12549 | + toi_cleanup_modules(hibernate_or_resume); | |
12550 | + toi_put_modules(); | |
12551 | + if (hibernate_or_resume) { | |
12552 | + block_dump = block_dump_save; | |
7e46296a | 12553 | + set_cpus_allowed_ptr(current, cpu_all_mask); |
2380c486 | 12554 | + toi_alloc_print_debug_stats(); |
2380c486 JR |
12555 | + atomic_inc(&snapshot_device_available); |
12556 | + mutex_unlock(&pm_mutex); | |
12557 | + } | |
12558 | + | |
12559 | + set_fs(oldfs); | |
12560 | + mutex_unlock(&tuxonice_in_use); | |
12561 | +} | |
12562 | + | |
12563 | +/** | |
12564 | + * toi_start_anything - basic initialisation for TuxOnIce | |
12565 | + * @toi_or_resume: Whether starting a cycle or attempt at resuming. | |
12566 | + * | |
12567 | + * Our basic initialisation routine. Take references on modules, use the | |
12568 | + * kernel segment, recheck resume= if no active allocator is set, initialise | |
12569 | + * modules, save and reset block_dump and ensure we're running on CPU0. | |
12570 | + **/ | |
12571 | +int toi_start_anything(int hibernate_or_resume) | |
12572 | +{ | |
2380c486 JR |
12573 | + mutex_lock(&tuxonice_in_use); |
12574 | + | |
12575 | + oldfs = get_fs(); | |
12576 | + set_fs(KERNEL_DS); | |
12577 | + | |
12578 | + if (hibernate_or_resume) { | |
12579 | + mutex_lock(&pm_mutex); | |
12580 | + | |
12581 | + if (!atomic_add_unless(&snapshot_device_available, -1, 0)) | |
12582 | + goto snapshotdevice_unavailable; | |
12583 | + } | |
12584 | + | |
2380c486 JR |
12585 | + if (hibernate_or_resume == SYSFS_HIBERNATE) |
12586 | + toi_print_modules(); | |
12587 | + | |
12588 | + if (toi_get_modules()) { | |
12589 | + printk(KERN_INFO "TuxOnIce: Get modules failed!\n"); | |
12590 | + goto prehibernate_err; | |
12591 | + } | |
12592 | + | |
12593 | + if (hibernate_or_resume) { | |
12594 | + block_dump_save = block_dump; | |
12595 | + block_dump = 0; | |
7e46296a AM |
12596 | + set_cpus_allowed_ptr(current, |
12597 | + &cpumask_of_cpu(first_cpu(cpu_online_map))); | |
2380c486 JR |
12598 | + } |
12599 | + | |
12600 | + if (toi_initialise_modules_early(hibernate_or_resume)) | |
12601 | + goto early_init_err; | |
12602 | + | |
12603 | + if (!toiActiveAllocator) | |
12604 | + toi_attempt_to_parse_resume_device(!hibernate_or_resume); | |
12605 | + | |
12606 | + if (!toi_initialise_modules_late(hibernate_or_resume)) | |
12607 | + return 0; | |
12608 | + | |
12609 | + toi_cleanup_modules(hibernate_or_resume); | |
12610 | +early_init_err: | |
12611 | + if (hibernate_or_resume) { | |
12612 | + block_dump_save = block_dump; | |
7e46296a | 12613 | + set_cpus_allowed_ptr(current, cpu_all_mask); |
2380c486 | 12614 | + } |
7e46296a | 12615 | + toi_put_modules(); |
2380c486 JR |
12616 | +prehibernate_err: |
12617 | + if (hibernate_or_resume) | |
12618 | + atomic_inc(&snapshot_device_available); | |
12619 | +snapshotdevice_unavailable: | |
12620 | + if (hibernate_or_resume) | |
12621 | + mutex_unlock(&pm_mutex); | |
12622 | + set_fs(oldfs); | |
12623 | + mutex_unlock(&tuxonice_in_use); | |
12624 | + return -EBUSY; | |
12625 | +} | |
12626 | + | |
12627 | +/* | |
12628 | + * Nosave page tracking. | |
12629 | + * | |
12630 | + * Here rather than in prepare_image because we want to do it once only at the | |
12631 | + * start of a cycle. | |
12632 | + */ | |
12633 | + | |
12634 | +/** | |
12635 | + * mark_nosave_pages - set up our Nosave bitmap | |
12636 | + * | |
12637 | + * Build a bitmap of Nosave pages from the list. The bitmap allows faster | |
12638 | + * use when preparing the image. | |
12639 | + **/ | |
12640 | +static void mark_nosave_pages(void) | |
12641 | +{ | |
12642 | + struct nosave_region *region; | |
12643 | + | |
12644 | + list_for_each_entry(region, &nosave_regions, list) { | |
12645 | + unsigned long pfn; | |
12646 | + | |
12647 | + for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++) | |
12648 | + if (pfn_valid(pfn)) | |
12649 | + SetPageNosave(pfn_to_page(pfn)); | |
12650 | + } | |
12651 | +} | |
12652 | + | |
12653 | +static int alloc_a_bitmap(struct memory_bitmap **bm) | |
12654 | +{ | |
12655 | + int result = 0; | |
12656 | + | |
12657 | + *bm = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); | |
12658 | + if (!*bm) { | |
12659 | + printk(KERN_ERR "Failed to kzalloc memory for a bitmap.\n"); | |
12660 | + return -ENOMEM; | |
12661 | + } | |
12662 | + | |
12663 | + result = memory_bm_create(*bm, GFP_KERNEL, 0); | |
12664 | + | |
12665 | + if (result) { | |
12666 | + printk(KERN_ERR "Failed to create a bitmap.\n"); | |
12667 | + kfree(*bm); | |
12668 | + } | |
12669 | + | |
12670 | + return result; | |
12671 | +} | |
12672 | + | |
12673 | +/** | |
12674 | + * allocate_bitmaps - allocate bitmaps used to record page states | |
12675 | + * | |
12676 | + * Allocate the bitmaps we use to record the various TuxOnIce related | |
12677 | + * page states. | |
12678 | + **/ | |
12679 | +static int allocate_bitmaps(void) | |
12680 | +{ | |
12681 | + if (alloc_a_bitmap(&pageset1_map) || | |
12682 | + alloc_a_bitmap(&pageset1_copy_map) || | |
12683 | + alloc_a_bitmap(&pageset2_map) || | |
12684 | + alloc_a_bitmap(&io_map) || | |
12685 | + alloc_a_bitmap(&nosave_map) || | |
12686 | + alloc_a_bitmap(&free_map) || | |
12687 | + alloc_a_bitmap(&page_resave_map)) | |
12688 | + return 1; | |
12689 | + | |
12690 | + return 0; | |
12691 | +} | |
12692 | + | |
12693 | +static void free_a_bitmap(struct memory_bitmap **bm) | |
12694 | +{ | |
12695 | + if (!*bm) | |
12696 | + return; | |
12697 | + | |
12698 | + memory_bm_free(*bm, 0); | |
12699 | + kfree(*bm); | |
12700 | + *bm = NULL; | |
12701 | +} | |
12702 | + | |
12703 | +/** | |
12704 | + * free_bitmaps - free the bitmaps used to record page states | |
12705 | + * | |
12706 | + * Free the bitmaps allocated above. It is not an error to call | |
12707 | + * memory_bm_free on a bitmap that isn't currently allocated. | |
12708 | + **/ | |
12709 | +static void free_bitmaps(void) | |
12710 | +{ | |
12711 | + free_a_bitmap(&pageset1_map); | |
12712 | + free_a_bitmap(&pageset1_copy_map); | |
12713 | + free_a_bitmap(&pageset2_map); | |
12714 | + free_a_bitmap(&io_map); | |
12715 | + free_a_bitmap(&nosave_map); | |
12716 | + free_a_bitmap(&free_map); | |
12717 | + free_a_bitmap(&page_resave_map); | |
12718 | +} | |
12719 | + | |
12720 | +/** | |
12721 | + * io_MB_per_second - return the number of MB/s read or written | |
12722 | + * @write: Whether to return the speed at which we wrote. | |
12723 | + * | |
12724 | + * Calculate the number of megabytes per second that were read or written. | |
12725 | + **/ | |
12726 | +static int io_MB_per_second(int write) | |
12727 | +{ | |
12728 | + return (toi_bkd.toi_io_time[write][1]) ? | |
12729 | + MB((unsigned long) toi_bkd.toi_io_time[write][0]) * HZ / | |
12730 | + toi_bkd.toi_io_time[write][1] : 0; | |
12731 | +} | |
12732 | + | |
12733 | +#define SNPRINTF(a...) do { len += scnprintf(((char *) buffer) + len, \ | |
12734 | + count - len - 1, ## a); } while (0) | |
12735 | + | |
12736 | +/** | |
12737 | + * get_debug_info - fill a buffer with debugging information | |
12738 | + * @buffer: The buffer to be filled. | |
12739 | + * @count: The size of the buffer, in bytes. | |
12740 | + * | |
12741 | + * Fill a (usually PAGE_SIZEd) buffer with the debugging info that we will | |
12742 | + * either printk or return via sysfs. | |
12743 | + **/ | |
12744 | +static int get_toi_debug_info(const char *buffer, int count) | |
12745 | +{ | |
12746 | + int len = 0, i, first_result = 1; | |
12747 | + | |
12748 | + SNPRINTF("TuxOnIce debugging info:\n"); | |
12749 | + SNPRINTF("- TuxOnIce core : " TOI_CORE_VERSION "\n"); | |
12750 | + SNPRINTF("- Kernel Version : " UTS_RELEASE "\n"); | |
12751 | + SNPRINTF("- Compiler vers. : %d.%d\n", __GNUC__, __GNUC_MINOR__); | |
12752 | + SNPRINTF("- Attempt number : %d\n", nr_hibernates); | |
7e46296a | 12753 | + SNPRINTF("- Parameters : %ld %ld %ld %d %ld %ld\n", |
2380c486 JR |
12754 | + toi_result, |
12755 | + toi_bkd.toi_action, | |
12756 | + toi_bkd.toi_debug_state, | |
12757 | + toi_bkd.toi_default_console_level, | |
12758 | + image_size_limit, | |
12759 | + toi_poweroff_method); | |
12760 | + SNPRINTF("- Overall expected compression percentage: %d.\n", | |
12761 | + 100 - toi_expected_compression_ratio()); | |
12762 | + len += toi_print_module_debug_info(((char *) buffer) + len, | |
12763 | + count - len - 1); | |
12764 | + if (toi_bkd.toi_io_time[0][1]) { | |
12765 | + if ((io_MB_per_second(0) < 5) || (io_MB_per_second(1) < 5)) { | |
12766 | + SNPRINTF("- I/O speed: Write %ld KB/s", | |
12767 | + (KB((unsigned long) toi_bkd.toi_io_time[0][0]) * HZ / | |
12768 | + toi_bkd.toi_io_time[0][1])); | |
12769 | + if (toi_bkd.toi_io_time[1][1]) | |
12770 | + SNPRINTF(", Read %ld KB/s", | |
12771 | + (KB((unsigned long) | |
12772 | + toi_bkd.toi_io_time[1][0]) * HZ / | |
12773 | + toi_bkd.toi_io_time[1][1])); | |
12774 | + } else { | |
12775 | + SNPRINTF("- I/O speed: Write %ld MB/s", | |
12776 | + (MB((unsigned long) toi_bkd.toi_io_time[0][0]) * HZ / | |
12777 | + toi_bkd.toi_io_time[0][1])); | |
12778 | + if (toi_bkd.toi_io_time[1][1]) | |
12779 | + SNPRINTF(", Read %ld MB/s", | |
12780 | + (MB((unsigned long) | |
12781 | + toi_bkd.toi_io_time[1][0]) * HZ / | |
12782 | + toi_bkd.toi_io_time[1][1])); | |
12783 | + } | |
12784 | + SNPRINTF(".\n"); | |
12785 | + } else | |
12786 | + SNPRINTF("- No I/O speed stats available.\n"); | |
92bca44c | 12787 | + SNPRINTF("- Extra pages : %lu used/%lu.\n", |
2380c486 JR |
12788 | + extra_pd1_pages_used, extra_pd1_pages_allowance); |
12789 | + | |
12790 | + for (i = 0; i < TOI_NUM_RESULT_STATES; i++) | |
12791 | + if (test_result_state(i)) { | |
12792 | + SNPRINTF("%s: %s.\n", first_result ? | |
12793 | + "- Result " : | |
12794 | + " ", | |
12795 | + result_strings[i]); | |
12796 | + first_result = 0; | |
12797 | + } | |
12798 | + if (first_result) | |
12799 | + SNPRINTF("- Result : %s.\n", nr_hibernates ? | |
12800 | + "Succeeded" : | |
12801 | + "No hibernation attempts so far"); | |
12802 | + return len; | |
12803 | +} | |
12804 | + | |
12805 | +/** | |
12806 | + * do_cleanup - cleanup after attempting to hibernate or resume | |
12807 | + * @get_debug_info: Whether to allocate and return debugging info. | |
12808 | + * | |
12809 | + * Cleanup after attempting to hibernate or resume, possibly getting | |
12810 | + * debugging info as we do so. | |
12811 | + **/ | |
e999739a | 12812 | +static void do_cleanup(int get_debug_info, int restarting) |
2380c486 JR |
12813 | +{ |
12814 | + int i = 0; | |
12815 | + char *buffer = NULL; | |
12816 | + | |
7e46296a AM |
12817 | + trap_non_toi_io = 0; |
12818 | + | |
2380c486 JR |
12819 | + if (get_debug_info) |
12820 | + toi_prepare_status(DONT_CLEAR_BAR, "Cleaning up..."); | |
12821 | + | |
12822 | + free_checksum_pages(); | |
12823 | + | |
12824 | + if (get_debug_info) | |
12825 | + buffer = (char *) toi_get_zeroed_page(20, TOI_ATOMIC_GFP); | |
12826 | + | |
12827 | + if (buffer) | |
12828 | + i = get_toi_debug_info(buffer, PAGE_SIZE); | |
12829 | + | |
12830 | + toi_free_extra_pagedir_memory(); | |
12831 | + | |
12832 | + pagedir1.size = 0; | |
12833 | + pagedir2.size = 0; | |
12834 | + set_highmem_size(pagedir1, 0); | |
12835 | + set_highmem_size(pagedir2, 0); | |
12836 | + | |
12837 | + if (boot_kernel_data_buffer) { | |
12838 | + if (!test_toi_state(TOI_BOOT_KERNEL)) | |
12839 | + toi_free_page(37, boot_kernel_data_buffer); | |
12840 | + boot_kernel_data_buffer = 0; | |
12841 | + } | |
12842 | + | |
12843 | + clear_toi_state(TOI_BOOT_KERNEL); | |
12844 | + thaw_processes(); | |
12845 | + | |
2380c486 JR |
12846 | + if (test_action_state(TOI_KEEP_IMAGE) && |
12847 | + !test_result_state(TOI_ABORTED)) { | |
12848 | + toi_message(TOI_ANY_SECTION, TOI_LOW, 1, | |
12849 | + "TuxOnIce: Not invalidating the image due " | |
7e46296a | 12850 | + "to Keep Image being enabled."); |
2380c486 JR |
12851 | + set_result_state(TOI_KEPT_IMAGE); |
12852 | + } else | |
2380c486 JR |
12853 | + if (toiActiveAllocator) |
12854 | + toiActiveAllocator->remove_image(); | |
12855 | + | |
12856 | + free_bitmaps(); | |
12857 | + usermodehelper_enable(); | |
12858 | + | |
12859 | + if (test_toi_state(TOI_NOTIFIERS_PREPARE)) { | |
12860 | + pm_notifier_call_chain(PM_POST_HIBERNATION); | |
12861 | + clear_toi_state(TOI_NOTIFIERS_PREPARE); | |
12862 | + } | |
12863 | + | |
12864 | + if (buffer && i) { | |
12865 | + /* Printk can only handle 1023 bytes, including | |
12866 | + * its level mangling. */ | |
12867 | + for (i = 0; i < 3; i++) | |
9474138d | 12868 | + printk(KERN_ERR "%s", buffer + (1023 * i)); |
2380c486 JR |
12869 | + toi_free_page(20, (unsigned long) buffer); |
12870 | + } | |
12871 | + | |
12872 | + if (!test_action_state(TOI_LATE_CPU_HOTPLUG)) | |
12873 | + enable_nonboot_cpus(); | |
e999739a | 12874 | + |
12875 | + if (!restarting) | |
12876 | + toi_cleanup_console(); | |
2380c486 JR |
12877 | + |
12878 | + free_attention_list(); | |
12879 | + | |
e999739a | 12880 | + if (!restarting) |
12881 | + toi_deactivate_storage(0); | |
2380c486 JR |
12882 | + |
12883 | + clear_toi_state(TOI_IGNORE_LOGLEVEL); | |
12884 | + clear_toi_state(TOI_TRYING_TO_RESUME); | |
12885 | + clear_toi_state(TOI_NOW_RESUMING); | |
12886 | +} | |
12887 | + | |
12888 | +/** | |
12889 | + * check_still_keeping_image - we kept an image; check whether to reuse it. | |
12890 | + * | |
12891 | + * We enter this routine when we have kept an image. If the user has said they | |
12892 | + * want to still keep it, all we need to do is powerdown. If powering down | |
12893 | + * means hibernating to ram and the power doesn't run out, we'll return 1. | |
12894 | + * If we do power off properly or the battery runs out, we'll resume via the | |
12895 | + * normal paths. | |
12896 | + * | |
12897 | + * If the user has said they want to remove the previously kept image, we | |
12898 | + * remove it, and return 0. We'll then store a new image. | |
12899 | + **/ | |
12900 | +static int check_still_keeping_image(void) | |
12901 | +{ | |
12902 | + if (test_action_state(TOI_KEEP_IMAGE)) { | |
e999739a | 12903 | + printk(KERN_INFO "Image already stored: powering down " |
12904 | + "immediately."); | |
2380c486 JR |
12905 | + do_toi_step(STEP_HIBERNATE_POWERDOWN); |
12906 | + return 1; /* Just in case we're using S3 */ | |
12907 | + } | |
12908 | + | |
e999739a | 12909 | + printk(KERN_INFO "Invalidating previous image.\n"); |
2380c486 JR |
12910 | + toiActiveAllocator->remove_image(); |
12911 | + | |
12912 | + return 0; | |
12913 | +} | |
12914 | + | |
12915 | +/** | |
12916 | + * toi_init - prepare to hibernate to disk | |
12917 | + * | |
12918 | + * Initialise variables & data structures, in preparation for | |
12919 | + * hibernating to disk. | |
12920 | + **/ | |
e999739a | 12921 | +static int toi_init(int restarting) |
2380c486 JR |
12922 | +{ |
12923 | + int result, i, j; | |
12924 | + | |
12925 | + toi_result = 0; | |
12926 | + | |
12927 | + printk(KERN_INFO "Initiating a hibernation cycle.\n"); | |
12928 | + | |
12929 | + nr_hibernates++; | |
12930 | + | |
12931 | + for (i = 0; i < 2; i++) | |
12932 | + for (j = 0; j < 2; j++) | |
12933 | + toi_bkd.toi_io_time[i][j] = 0; | |
12934 | + | |
12935 | + if (!test_toi_state(TOI_CAN_HIBERNATE) || | |
12936 | + allocate_bitmaps()) | |
12937 | + return 1; | |
12938 | + | |
12939 | + mark_nosave_pages(); | |
12940 | + | |
e999739a | 12941 | + if (!restarting) |
12942 | + toi_prepare_console(); | |
2380c486 JR |
12943 | + |
12944 | + result = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); | |
12945 | + if (result) { | |
12946 | + set_result_state(TOI_NOTIFIERS_PREPARE_FAILED); | |
12947 | + return 1; | |
12948 | + } | |
12949 | + set_toi_state(TOI_NOTIFIERS_PREPARE); | |
12950 | + | |
12951 | + result = usermodehelper_disable(); | |
12952 | + if (result) { | |
12953 | + printk(KERN_ERR "TuxOnIce: Failed to disable usermode " | |
12954 | + "helpers\n"); | |
12955 | + set_result_state(TOI_USERMODE_HELPERS_ERR); | |
12956 | + return 1; | |
12957 | + } | |
12958 | + | |
12959 | + boot_kernel_data_buffer = toi_get_zeroed_page(37, TOI_ATOMIC_GFP); | |
12960 | + if (!boot_kernel_data_buffer) { | |
12961 | + printk(KERN_ERR "TuxOnIce: Failed to allocate " | |
12962 | + "boot_kernel_data_buffer.\n"); | |
12963 | + set_result_state(TOI_OUT_OF_MEMORY); | |
12964 | + return 1; | |
12965 | + } | |
12966 | + | |
12967 | + if (test_action_state(TOI_LATE_CPU_HOTPLUG) || | |
12968 | + !disable_nonboot_cpus()) | |
12969 | + return 1; | |
12970 | + | |
12971 | + set_abort_result(TOI_CPU_HOTPLUG_FAILED); | |
12972 | + return 0; | |
12973 | +} | |
12974 | + | |
12975 | +/** | |
12976 | + * can_hibernate - perform basic 'Can we hibernate?' tests | |
12977 | + * | |
12978 | + * Perform basic tests that must pass if we're going to be able to hibernate: | |
12979 | + * Can we get the pm_mutex? Is resume= valid (we need to know where to write | |
12980 | + * the image header). | |
12981 | + **/ | |
12982 | +static int can_hibernate(void) | |
12983 | +{ | |
12984 | + if (!test_toi_state(TOI_CAN_HIBERNATE)) | |
12985 | + toi_attempt_to_parse_resume_device(0); | |
12986 | + | |
12987 | + if (!test_toi_state(TOI_CAN_HIBERNATE)) { | |
12988 | + printk(KERN_INFO "TuxOnIce: Hibernation is disabled.\n" | |
12989 | + "This may be because you haven't put something along " | |
12990 | + "the lines of\n\nresume=swap:/dev/hda1\n\n" | |
12991 | + "in lilo.conf or equivalent. (Where /dev/hda1 is your " | |
12992 | + "swap partition).\n"); | |
12993 | + set_abort_result(TOI_CANT_SUSPEND); | |
12994 | + return 0; | |
12995 | + } | |
12996 | + | |
12997 | + if (strlen(alt_resume_param)) { | |
12998 | + attempt_to_parse_alt_resume_param(); | |
12999 | + | |
13000 | + if (!strlen(alt_resume_param)) { | |
13001 | + printk(KERN_INFO "Alternate resume parameter now " | |
13002 | + "invalid. Aborting.\n"); | |
13003 | + set_abort_result(TOI_CANT_USE_ALT_RESUME); | |
13004 | + return 0; | |
13005 | + } | |
13006 | + } | |
13007 | + | |
13008 | + return 1; | |
13009 | +} | |
13010 | + | |
13011 | +/** | |
13012 | + * do_post_image_write - having written an image, figure out what to do next | |
13013 | + * | |
13014 | + * After writing an image, we might load an alternate image or power down. | |
13015 | + * Powering down might involve hibernating to ram, in which case we also | |
13016 | + * need to handle reloading pageset2. | |
13017 | + **/ | |
13018 | +static int do_post_image_write(void) | |
13019 | +{ | |
13020 | + /* If switching images fails, do normal powerdown */ | |
13021 | + if (alt_resume_param[0]) | |
13022 | + do_toi_step(STEP_RESUME_ALT_IMAGE); | |
13023 | + | |
13024 | + toi_power_down(); | |
13025 | + | |
13026 | + barrier(); | |
13027 | + mb(); | |
13028 | + return 0; | |
13029 | +} | |
13030 | + | |
13031 | +/** | |
13032 | + * __save_image - do the hard work of saving the image | |
13033 | + * | |
13034 | + * High level routine for getting the image saved. The key assumptions made | |
13035 | + * are that processes have been frozen and sufficient memory is available. | |
13036 | + * | |
13037 | + * We also exit through here at resume time, coming back from toi_hibernate | |
13038 | + * after the atomic restore. This is the reason for the toi_in_hibernate | |
13039 | + * test. | |
13040 | + **/ | |
13041 | +static int __save_image(void) | |
13042 | +{ | |
13043 | + int temp_result, did_copy = 0; | |
13044 | + | |
13045 | + toi_prepare_status(DONT_CLEAR_BAR, "Starting to save the image.."); | |
13046 | + | |
13047 | + toi_message(TOI_ANY_SECTION, TOI_LOW, 1, | |
7e46296a | 13048 | + " - Final values: %d and %d.", |
2380c486 JR |
13049 | + pagedir1.size, pagedir2.size); |
13050 | + | |
13051 | + toi_cond_pause(1, "About to write pagedir2."); | |
13052 | + | |
13053 | + temp_result = write_pageset(&pagedir2); | |
13054 | + | |
13055 | + if (temp_result == -1 || test_result_state(TOI_ABORTED)) | |
13056 | + return 1; | |
13057 | + | |
13058 | + toi_cond_pause(1, "About to copy pageset 1."); | |
13059 | + | |
13060 | + if (test_result_state(TOI_ABORTED)) | |
13061 | + return 1; | |
13062 | + | |
13063 | + toi_deactivate_storage(1); | |
13064 | + | |
13065 | + toi_prepare_status(DONT_CLEAR_BAR, "Doing atomic copy/restore."); | |
13066 | + | |
13067 | + toi_in_hibernate = 1; | |
13068 | + | |
13069 | + if (toi_go_atomic(PMSG_FREEZE, 1)) | |
13070 | + goto Failed; | |
13071 | + | |
13072 | + temp_result = toi_hibernate(); | |
13073 | + if (!temp_result) | |
13074 | + did_copy = 1; | |
13075 | + | |
13076 | + /* We return here at resume time too! */ | |
13077 | + toi_end_atomic(ATOMIC_ALL_STEPS, toi_in_hibernate, temp_result); | |
13078 | + | |
13079 | +Failed: | |
13080 | + if (toi_activate_storage(1)) | |
13081 | + panic("Failed to reactivate our storage."); | |
13082 | + | |
13083 | + /* Resume time? */ | |
13084 | + if (!toi_in_hibernate) { | |
13085 | + copyback_post(); | |
13086 | + return 0; | |
13087 | + } | |
13088 | + | |
13089 | + /* Nope. Hibernating. So, see if we can save the image... */ | |
13090 | + | |
13091 | + if (temp_result || test_result_state(TOI_ABORTED)) { | |
13092 | + if (did_copy) | |
13093 | + goto abort_reloading_pagedir_two; | |
13094 | + else | |
13095 | + return 1; | |
13096 | + } | |
13097 | + | |
13098 | + toi_update_status(pagedir2.size, pagedir1.size + pagedir2.size, | |
13099 | + NULL); | |
13100 | + | |
13101 | + if (test_result_state(TOI_ABORTED)) | |
13102 | + goto abort_reloading_pagedir_two; | |
13103 | + | |
13104 | + toi_cond_pause(1, "About to write pageset1."); | |
13105 | + | |
7e46296a | 13106 | + toi_message(TOI_ANY_SECTION, TOI_LOW, 1, "-- Writing pageset1"); |
2380c486 JR |
13107 | + |
13108 | + temp_result = write_pageset(&pagedir1); | |
13109 | + | |
13110 | + /* We didn't overwrite any memory, so no reread needs to be done. */ | |
13111 | + if (test_action_state(TOI_TEST_FILTER_SPEED)) | |
13112 | + return 1; | |
13113 | + | |
13114 | + if (temp_result == 1 || test_result_state(TOI_ABORTED)) | |
13115 | + goto abort_reloading_pagedir_two; | |
13116 | + | |
13117 | + toi_cond_pause(1, "About to write header."); | |
13118 | + | |
13119 | + if (test_result_state(TOI_ABORTED)) | |
13120 | + goto abort_reloading_pagedir_two; | |
13121 | + | |
13122 | + temp_result = write_image_header(); | |
13123 | + | |
13124 | + if (test_action_state(TOI_TEST_BIO)) | |
13125 | + return 1; | |
13126 | + | |
13127 | + if (!temp_result && !test_result_state(TOI_ABORTED)) | |
13128 | + return 0; | |
13129 | + | |
13130 | +abort_reloading_pagedir_two: | |
13131 | + temp_result = read_pageset2(1); | |
13132 | + | |
13133 | + /* If that failed, we're sunk. Panic! */ | |
13134 | + if (temp_result) | |
13135 | + panic("Attempt to reload pagedir 2 while aborting " | |
13136 | + "a hibernate failed."); | |
13137 | + | |
13138 | + return 1; | |
13139 | +} | |
13140 | + | |
13141 | +static void map_ps2_pages(int enable) | |
13142 | +{ | |
13143 | + unsigned long pfn = 0; | |
13144 | + | |
13145 | + pfn = memory_bm_next_pfn(pageset2_map); | |
13146 | + | |
13147 | + while (pfn != BM_END_OF_MAP) { | |
13148 | + struct page *page = pfn_to_page(pfn); | |
13149 | + kernel_map_pages(page, 1, enable); | |
13150 | + pfn = memory_bm_next_pfn(pageset2_map); | |
13151 | + } | |
13152 | +} | |
13153 | + | |
13154 | +/** | |
13155 | + * do_save_image - save the image and handle the result | |
13156 | + * | |
13157 | + * Save the prepared image. If we fail or we're in the path returning | |
13158 | + * from the atomic restore, cleanup. | |
13159 | + **/ | |
13160 | +static int do_save_image(void) | |
13161 | +{ | |
13162 | + int result; | |
13163 | + map_ps2_pages(0); | |
13164 | + result = __save_image(); | |
13165 | + map_ps2_pages(1); | |
13166 | + return result; | |
13167 | +} | |
13168 | + | |
13169 | +/** | |
13170 | + * do_prepare_image - try to prepare an image | |
13171 | + * | |
13172 | + * Seek to initialise and prepare an image to be saved. On failure, | |
13173 | + * cleanup. | |
13174 | + **/ | |
13175 | +static int do_prepare_image(void) | |
13176 | +{ | |
e999739a | 13177 | + int restarting = test_result_state(TOI_EXTRA_PAGES_ALLOW_TOO_SMALL); |
13178 | + | |
13179 | + if (!restarting && toi_activate_storage(0)) | |
2380c486 JR |
13180 | + return 1; |
13181 | + | |
13182 | + /* | |
13183 | + * If kept image and still keeping image and hibernating to RAM, we will | |
13184 | + * return 1 after hibernating and resuming (provided the power doesn't | |
13185 | + * run out. In that case, we skip directly to cleaning up and exiting. | |
13186 | + */ | |
13187 | + | |
13188 | + if (!can_hibernate() || | |
13189 | + (test_result_state(TOI_KEPT_IMAGE) && | |
13190 | + check_still_keeping_image())) | |
13191 | + return 1; | |
13192 | + | |
e999739a | 13193 | + if (toi_init(restarting) && !toi_prepare_image() && |
2380c486 JR |
13194 | + !test_result_state(TOI_ABORTED)) |
13195 | + return 0; | |
13196 | + | |
7e46296a AM |
13197 | + trap_non_toi_io = 1; |
13198 | + | |
2380c486 JR |
13199 | + return 1; |
13200 | +} | |
13201 | + | |
13202 | +/** | |
13203 | + * do_check_can_resume - find out whether an image has been stored | |
13204 | + * | |
13205 | + * Read whether an image exists. We use the same routine as the | |
13206 | + * image_exists sysfs entry, and just look to see whether the | |
13207 | + * first character in the resulting buffer is a '1'. | |
13208 | + **/ | |
13209 | +int do_check_can_resume(void) | |
13210 | +{ | |
7e46296a | 13211 | + int result = -1; |
2380c486 | 13212 | + |
7e46296a AM |
13213 | + if (toi_activate_storage(0)) |
13214 | + return -1; | |
2380c486 | 13215 | + |
7e46296a AM |
13216 | + if (!test_toi_state(TOI_RESUME_DEVICE_OK)) |
13217 | + toi_attempt_to_parse_resume_device(1); | |
2380c486 | 13218 | + |
7e46296a AM |
13219 | + if (toiActiveAllocator) |
13220 | + result = toiActiveAllocator->image_exists(1); | |
2380c486 | 13221 | + |
7e46296a | 13222 | + toi_deactivate_storage(0); |
2380c486 JR |
13223 | + return result; |
13224 | +} | |
13225 | +EXPORT_SYMBOL_GPL(do_check_can_resume); | |
13226 | + | |
13227 | +/** | |
13228 | + * do_load_atomic_copy - load the first part of an image, if it exists | |
13229 | + * | |
13230 | + * Check whether we have an image. If one exists, do sanity checking | |
13231 | + * (possibly invalidating the image or even rebooting if the user | |
13232 | + * requests that) before loading it into memory in preparation for the | |
13233 | + * atomic restore. | |
13234 | + * | |
13235 | + * If and only if we have an image loaded and ready to restore, we return 1. | |
13236 | + **/ | |
13237 | +static int do_load_atomic_copy(void) | |
13238 | +{ | |
13239 | + int read_image_result = 0; | |
13240 | + | |
13241 | + if (sizeof(swp_entry_t) != sizeof(long)) { | |
13242 | + printk(KERN_WARNING "TuxOnIce: The size of swp_entry_t != size" | |
13243 | + " of long. Please report this!\n"); | |
13244 | + return 1; | |
13245 | + } | |
13246 | + | |
13247 | + if (!resume_file[0]) | |
13248 | + printk(KERN_WARNING "TuxOnIce: " | |
13249 | + "You need to use a resume= command line parameter to " | |
13250 | + "tell TuxOnIce where to look for an image.\n"); | |
13251 | + | |
13252 | + toi_activate_storage(0); | |
13253 | + | |
13254 | + if (!(test_toi_state(TOI_RESUME_DEVICE_OK)) && | |
13255 | + !toi_attempt_to_parse_resume_device(0)) { | |
13256 | + /* | |
13257 | + * Without a usable storage device we can do nothing - | |
13258 | + * even if noresume is given | |
13259 | + */ | |
13260 | + | |
13261 | + if (!toiNumAllocators) | |
13262 | + printk(KERN_ALERT "TuxOnIce: " | |
13263 | + "No storage allocators have been registered.\n"); | |
13264 | + else | |
13265 | + printk(KERN_ALERT "TuxOnIce: " | |
13266 | + "Missing or invalid storage location " | |
13267 | + "(resume= parameter). Please correct and " | |
13268 | + "rerun lilo (or equivalent) before " | |
13269 | + "hibernating.\n"); | |
13270 | + toi_deactivate_storage(0); | |
13271 | + return 1; | |
13272 | + } | |
13273 | + | |
13274 | + if (allocate_bitmaps()) | |
13275 | + return 1; | |
13276 | + | |
13277 | + read_image_result = read_pageset1(); /* non fatal error ignored */ | |
13278 | + | |
13279 | + if (test_toi_state(TOI_NORESUME_SPECIFIED)) | |
13280 | + clear_toi_state(TOI_NORESUME_SPECIFIED); | |
13281 | + | |
13282 | + toi_deactivate_storage(0); | |
13283 | + | |
13284 | + if (read_image_result) | |
13285 | + return 1; | |
13286 | + | |
13287 | + return 0; | |
13288 | +} | |
13289 | + | |
13290 | +/** | |
13291 | + * prepare_restore_load_alt_image - save & restore alt image variables | |
13292 | + * | |
13293 | + * Save and restore the pageset1 maps, when loading an alternate image. | |
13294 | + **/ | |
13295 | +static void prepare_restore_load_alt_image(int prepare) | |
13296 | +{ | |
13297 | + static struct memory_bitmap *pageset1_map_save, *pageset1_copy_map_save; | |
13298 | + | |
13299 | + if (prepare) { | |
13300 | + pageset1_map_save = pageset1_map; | |
13301 | + pageset1_map = NULL; | |
13302 | + pageset1_copy_map_save = pageset1_copy_map; | |
13303 | + pageset1_copy_map = NULL; | |
13304 | + set_toi_state(TOI_LOADING_ALT_IMAGE); | |
13305 | + toi_reset_alt_image_pageset2_pfn(); | |
13306 | + } else { | |
13307 | + memory_bm_free(pageset1_map, 0); | |
13308 | + pageset1_map = pageset1_map_save; | |
13309 | + memory_bm_free(pageset1_copy_map, 0); | |
13310 | + pageset1_copy_map = pageset1_copy_map_save; | |
13311 | + clear_toi_state(TOI_NOW_RESUMING); | |
13312 | + clear_toi_state(TOI_LOADING_ALT_IMAGE); | |
13313 | + } | |
13314 | +} | |
13315 | + | |
13316 | +/** | |
13317 | + * do_toi_step - perform a step in hibernating or resuming | |
13318 | + * | |
13319 | + * Perform a step in hibernating or resuming an image. This abstraction | |
13320 | + * is in preparation for implementing cluster support, and perhaps replacing | |
13321 | + * uswsusp too (haven't looked whether that's possible yet). | |
13322 | + **/ | |
13323 | +int do_toi_step(int step) | |
13324 | +{ | |
13325 | + switch (step) { | |
13326 | + case STEP_HIBERNATE_PREPARE_IMAGE: | |
13327 | + return do_prepare_image(); | |
13328 | + case STEP_HIBERNATE_SAVE_IMAGE: | |
13329 | + return do_save_image(); | |
13330 | + case STEP_HIBERNATE_POWERDOWN: | |
13331 | + return do_post_image_write(); | |
13332 | + case STEP_RESUME_CAN_RESUME: | |
13333 | + return do_check_can_resume(); | |
13334 | + case STEP_RESUME_LOAD_PS1: | |
13335 | + return do_load_atomic_copy(); | |
13336 | + case STEP_RESUME_DO_RESTORE: | |
13337 | + /* | |
13338 | + * If we succeed, this doesn't return. | |
13339 | + * Instead, we return from do_save_image() in the | |
13340 | + * hibernated kernel. | |
13341 | + */ | |
13342 | + return toi_atomic_restore(); | |
13343 | + case STEP_RESUME_ALT_IMAGE: | |
13344 | + printk(KERN_INFO "Trying to resume alternate image.\n"); | |
13345 | + toi_in_hibernate = 0; | |
13346 | + save_restore_alt_param(SAVE, NOQUIET); | |
13347 | + prepare_restore_load_alt_image(1); | |
13348 | + if (!do_check_can_resume()) { | |
13349 | + printk(KERN_INFO "Nothing to resume from.\n"); | |
13350 | + goto out; | |
13351 | + } | |
13352 | + if (!do_load_atomic_copy()) | |
13353 | + toi_atomic_restore(); | |
13354 | + | |
13355 | + printk(KERN_INFO "Failed to load image.\n"); | |
13356 | +out: | |
13357 | + prepare_restore_load_alt_image(0); | |
13358 | + save_restore_alt_param(RESTORE, NOQUIET); | |
13359 | + break; | |
13360 | + case STEP_CLEANUP: | |
e999739a | 13361 | + do_cleanup(1, 0); |
2380c486 JR |
13362 | + break; |
13363 | + case STEP_QUIET_CLEANUP: | |
e999739a | 13364 | + do_cleanup(0, 0); |
2380c486 JR |
13365 | + break; |
13366 | + } | |
13367 | + | |
13368 | + return 0; | |
13369 | +} | |
13370 | +EXPORT_SYMBOL_GPL(do_toi_step); | |
13371 | + | |
13372 | +/* -- Functions for kickstarting a hibernate or resume --- */ | |
13373 | + | |
13374 | +/** | |
9474138d | 13375 | + * toi_try_resume - try to do the steps in resuming |
2380c486 JR |
13376 | + * |
13377 | + * Check if we have an image and if so try to resume. Clear the status | |
13378 | + * flags too. | |
13379 | + **/ | |
9474138d | 13380 | +void toi_try_resume(void) |
2380c486 JR |
13381 | +{ |
13382 | + set_toi_state(TOI_TRYING_TO_RESUME); | |
13383 | + resume_attempted = 1; | |
13384 | + | |
13385 | + current->flags |= PF_MEMALLOC; | |
13386 | + | |
13387 | + if (do_toi_step(STEP_RESUME_CAN_RESUME) && | |
13388 | + !do_toi_step(STEP_RESUME_LOAD_PS1)) | |
13389 | + do_toi_step(STEP_RESUME_DO_RESTORE); | |
13390 | + | |
e999739a | 13391 | + do_cleanup(0, 0); |
2380c486 JR |
13392 | + |
13393 | + current->flags &= ~PF_MEMALLOC; | |
13394 | + | |
13395 | + clear_toi_state(TOI_IGNORE_LOGLEVEL); | |
13396 | + clear_toi_state(TOI_TRYING_TO_RESUME); | |
13397 | + clear_toi_state(TOI_NOW_RESUMING); | |
13398 | +} | |
13399 | + | |
13400 | +/** | |
9474138d | 13401 | + * toi_sys_power_disk_try_resume - wrapper calling toi_try_resume |
2380c486 | 13402 | + * |
9474138d | 13403 | + * Wrapper for when __toi_try_resume is called from swsusp resume path, |
2380c486 JR |
13404 | + * rather than from echo > /sys/power/tuxonice/do_resume. |
13405 | + **/ | |
9474138d | 13406 | +static void toi_sys_power_disk_try_resume(void) |
2380c486 JR |
13407 | +{ |
13408 | + resume_attempted = 1; | |
13409 | + | |
13410 | + /* | |
13411 | + * There's a comment in kernel/power/disk.c that indicates | |
13412 | + * we should be able to use mutex_lock_nested below. That | |
13413 | + * doesn't seem to cut it, though, so let's just turn lockdep | |
13414 | + * off for now. | |
13415 | + */ | |
13416 | + lockdep_off(); | |
13417 | + | |
13418 | + if (toi_start_anything(SYSFS_RESUMING)) | |
13419 | + goto out; | |
13420 | + | |
9474138d | 13421 | + toi_try_resume(); |
2380c486 JR |
13422 | + |
13423 | + /* | |
13424 | + * For initramfs, we have to clear the boot time | |
13425 | + * flag after trying to resume | |
13426 | + */ | |
13427 | + clear_toi_state(TOI_BOOT_TIME); | |
13428 | + | |
13429 | + toi_finish_anything(SYSFS_RESUMING); | |
13430 | +out: | |
13431 | + lockdep_on(); | |
13432 | +} | |
13433 | + | |
13434 | +/** | |
9474138d | 13435 | + * toi_try_hibernate - try to start a hibernation cycle |
2380c486 JR |
13436 | + * |
13437 | + * Start a hibernation cycle, coming in from either | |
13438 | + * echo > /sys/power/tuxonice/do_suspend | |
13439 | + * | |
13440 | + * or | |
13441 | + * | |
13442 | + * echo disk > /sys/power/state | |
13443 | + * | |
13444 | + * In the later case, we come in without pm_sem taken; in the | |
13445 | + * former, it has been taken. | |
13446 | + **/ | |
9474138d | 13447 | +int toi_try_hibernate(void) |
2380c486 JR |
13448 | +{ |
13449 | + int result = 0, sys_power_disk = 0, retries = 0; | |
13450 | + | |
13451 | + if (!mutex_is_locked(&tuxonice_in_use)) { | |
13452 | + /* Came in via /sys/power/disk */ | |
13453 | + if (toi_start_anything(SYSFS_HIBERNATING)) | |
13454 | + return -EBUSY; | |
13455 | + sys_power_disk = 1; | |
13456 | + } | |
13457 | + | |
13458 | + current->flags |= PF_MEMALLOC; | |
13459 | + | |
13460 | + if (test_toi_state(TOI_CLUSTER_MODE)) { | |
13461 | + toi_initiate_cluster_hibernate(); | |
13462 | + goto out; | |
13463 | + } | |
13464 | + | |
13465 | +prepare: | |
13466 | + result = do_toi_step(STEP_HIBERNATE_PREPARE_IMAGE); | |
13467 | + | |
13468 | + if (result || test_action_state(TOI_FREEZER_TEST)) | |
13469 | + goto out; | |
13470 | + | |
13471 | + result = do_toi_step(STEP_HIBERNATE_SAVE_IMAGE); | |
13472 | + | |
13473 | + if (test_result_state(TOI_EXTRA_PAGES_ALLOW_TOO_SMALL)) { | |
13474 | + if (retries < 2) { | |
e999739a | 13475 | + do_cleanup(0, 1); |
2380c486 | 13476 | + retries++; |
e999739a | 13477 | + clear_result_state(TOI_ABORTED); |
2380c486 JR |
13478 | + extra_pd1_pages_allowance = extra_pd1_pages_used + 500; |
13479 | + printk(KERN_INFO "Automatically adjusting the extra" | |
13480 | + " pages allowance to %ld and restarting.\n", | |
13481 | + extra_pd1_pages_allowance); | |
13482 | + goto prepare; | |
13483 | + } | |
13484 | + | |
13485 | + printk(KERN_INFO "Adjusted extra pages allowance twice and " | |
13486 | + "still couldn't hibernate successfully. Giving up."); | |
13487 | + } | |
13488 | + | |
13489 | + /* This code runs at resume time too! */ | |
13490 | + if (!result && toi_in_hibernate) | |
13491 | + result = do_toi_step(STEP_HIBERNATE_POWERDOWN); | |
13492 | +out: | |
e999739a | 13493 | + do_cleanup(1, 0); |
2380c486 JR |
13494 | + current->flags &= ~PF_MEMALLOC; |
13495 | + | |
13496 | + if (sys_power_disk) | |
13497 | + toi_finish_anything(SYSFS_HIBERNATING); | |
13498 | + | |
13499 | + return result; | |
13500 | +} | |
13501 | + | |
13502 | +/* | |
13503 | + * channel_no: If !0, -c <channel_no> is added to args (userui). | |
13504 | + */ | |
13505 | +int toi_launch_userspace_program(char *command, int channel_no, | |
13506 | + enum umh_wait wait, int debug) | |
13507 | +{ | |
13508 | + int retval; | |
13509 | + static char *envp[] = { | |
13510 | + "HOME=/", | |
13511 | + "TERM=linux", | |
13512 | + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", | |
13513 | + NULL }; | |
5dd10c98 AM |
13514 | + static char *argv[] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL |
13515 | + }; | |
2380c486 JR |
13516 | + char *channel = NULL; |
13517 | + int arg = 0, size; | |
13518 | + char test_read[255]; | |
13519 | + char *orig_posn = command; | |
13520 | + | |
13521 | + if (!strlen(orig_posn)) | |
13522 | + return 1; | |
13523 | + | |
13524 | + if (channel_no) { | |
13525 | + channel = toi_kzalloc(4, 6, GFP_KERNEL); | |
13526 | + if (!channel) { | |
13527 | + printk(KERN_INFO "Failed to allocate memory in " | |
13528 | + "preparing to launch userspace program.\n"); | |
13529 | + return 1; | |
13530 | + } | |
13531 | + } | |
13532 | + | |
13533 | + /* Up to 6 args supported */ | |
13534 | + while (arg < 6) { | |
13535 | + sscanf(orig_posn, "%s", test_read); | |
13536 | + size = strlen(test_read); | |
13537 | + if (!(size)) | |
13538 | + break; | |
13539 | + argv[arg] = toi_kzalloc(5, size + 1, TOI_ATOMIC_GFP); | |
13540 | + strcpy(argv[arg], test_read); | |
13541 | + orig_posn += size + 1; | |
13542 | + *test_read = 0; | |
13543 | + arg++; | |
13544 | + } | |
13545 | + | |
13546 | + if (channel_no) { | |
13547 | + sprintf(channel, "-c%d", channel_no); | |
13548 | + argv[arg] = channel; | |
13549 | + } else | |
13550 | + arg--; | |
13551 | + | |
13552 | + if (debug) { | |
13553 | + argv[++arg] = toi_kzalloc(5, 8, TOI_ATOMIC_GFP); | |
13554 | + strcpy(argv[arg], "--debug"); | |
13555 | + } | |
13556 | + | |
13557 | + retval = call_usermodehelper(argv[0], argv, envp, wait); | |
13558 | + | |
13559 | + /* | |
13560 | + * If the program reports an error, retval = 256. Don't complain | |
13561 | + * about that here. | |
13562 | + */ | |
13563 | + if (retval && retval != 256) | |
e999739a | 13564 | + printk(KERN_ERR "Failed to launch userspace program '%s': " |
13565 | + "Error %d\n", command, retval); | |
2380c486 JR |
13566 | + |
13567 | + { | |
13568 | + int i; | |
13569 | + for (i = 0; i < arg; i++) | |
13570 | + if (argv[i] && argv[i] != channel) | |
7e46296a | 13571 | + toi_kfree(5, argv[i], sizeof(*argv[i])); |
2380c486 JR |
13572 | + } |
13573 | + | |
9474138d | 13574 | + toi_kfree(4, channel, sizeof(*channel)); |
2380c486 JR |
13575 | + |
13576 | + return retval; | |
13577 | +} | |
13578 | + | |
13579 | +/* | |
13580 | + * This array contains entries that are automatically registered at | |
13581 | + * boot. Modules and the console code register their own entries separately. | |
13582 | + */ | |
13583 | +static struct toi_sysfs_data sysfs_params[] = { | |
92bca44c | 13584 | + SYSFS_INT("freezer_sync", SYSFS_RW, &freezer_sync, 0, 1, 0, NULL), |
2380c486 JR |
13585 | + SYSFS_LONG("extra_pages_allowance", SYSFS_RW, |
13586 | + &extra_pd1_pages_allowance, 0, LONG_MAX, 0), | |
13587 | + SYSFS_CUSTOM("image_exists", SYSFS_RW, image_exists_read, | |
13588 | + image_exists_write, SYSFS_NEEDS_SM_FOR_BOTH, NULL), | |
13589 | + SYSFS_STRING("resume", SYSFS_RW, resume_file, 255, | |
13590 | + SYSFS_NEEDS_SM_FOR_WRITE, | |
13591 | + attempt_to_parse_resume_device2), | |
13592 | + SYSFS_STRING("alt_resume_param", SYSFS_RW, alt_resume_param, 255, | |
13593 | + SYSFS_NEEDS_SM_FOR_WRITE, | |
13594 | + attempt_to_parse_alt_resume_param), | |
13595 | + SYSFS_CUSTOM("debug_info", SYSFS_READONLY, get_toi_debug_info, NULL, 0, | |
13596 | + NULL), | |
13597 | + SYSFS_BIT("ignore_rootfs", SYSFS_RW, &toi_bkd.toi_action, | |
13598 | + TOI_IGNORE_ROOTFS, 0), | |
7e46296a AM |
13599 | + SYSFS_LONG("image_size_limit", SYSFS_RW, &image_size_limit, -2, |
13600 | + INT_MAX, 0), | |
2380c486 JR |
13601 | + SYSFS_UL("last_result", SYSFS_RW, &toi_result, 0, 0, 0), |
13602 | + SYSFS_BIT("no_multithreaded_io", SYSFS_RW, &toi_bkd.toi_action, | |
13603 | + TOI_NO_MULTITHREADED_IO, 0), | |
13604 | + SYSFS_BIT("no_flusher_thread", SYSFS_RW, &toi_bkd.toi_action, | |
13605 | + TOI_NO_FLUSHER_THREAD, 0), | |
13606 | + SYSFS_BIT("full_pageset2", SYSFS_RW, &toi_bkd.toi_action, | |
13607 | + TOI_PAGESET2_FULL, 0), | |
13608 | + SYSFS_BIT("reboot", SYSFS_RW, &toi_bkd.toi_action, TOI_REBOOT, 0), | |
13609 | + SYSFS_BIT("replace_swsusp", SYSFS_RW, &toi_bkd.toi_action, | |
13610 | + TOI_REPLACE_SWSUSP, 0), | |
13611 | + SYSFS_STRING("resume_commandline", SYSFS_RW, | |
13612 | + toi_bkd.toi_nosave_commandline, COMMAND_LINE_SIZE, 0, | |
13613 | + NULL), | |
13614 | + SYSFS_STRING("version", SYSFS_READONLY, TOI_CORE_VERSION, 0, 0, NULL), | |
2380c486 JR |
13615 | + SYSFS_BIT("freezer_test", SYSFS_RW, &toi_bkd.toi_action, |
13616 | + TOI_FREEZER_TEST, 0), | |
13617 | + SYSFS_BIT("test_bio", SYSFS_RW, &toi_bkd.toi_action, TOI_TEST_BIO, 0), | |
13618 | + SYSFS_BIT("test_filter_speed", SYSFS_RW, &toi_bkd.toi_action, | |
13619 | + TOI_TEST_FILTER_SPEED, 0), | |
13620 | + SYSFS_BIT("no_pageset2", SYSFS_RW, &toi_bkd.toi_action, | |
13621 | + TOI_NO_PAGESET2, 0), | |
13622 | + SYSFS_BIT("no_pageset2_if_unneeded", SYSFS_RW, &toi_bkd.toi_action, | |
13623 | + TOI_NO_PS2_IF_UNNEEDED, 0), | |
13624 | + SYSFS_BIT("late_cpu_hotplug", SYSFS_RW, &toi_bkd.toi_action, | |
13625 | + TOI_LATE_CPU_HOTPLUG, 0), | |
7e46296a AM |
13626 | + SYSFS_STRING("binary_signature", SYSFS_READONLY, |
13627 | + tuxonice_signature, 9, 0, NULL), | |
5dd10c98 AM |
13628 | + SYSFS_INT("max_workers", SYSFS_RW, &toi_max_workers, 0, NR_CPUS, 0, |
13629 | + NULL), | |
2380c486 JR |
13630 | +#ifdef CONFIG_TOI_KEEP_IMAGE |
13631 | + SYSFS_BIT("keep_image", SYSFS_RW , &toi_bkd.toi_action, TOI_KEEP_IMAGE, | |
13632 | + 0), | |
13633 | +#endif | |
13634 | +}; | |
13635 | + | |
13636 | +static struct toi_core_fns my_fns = { | |
13637 | + .get_nonconflicting_page = __toi_get_nonconflicting_page, | |
13638 | + .post_context_save = __toi_post_context_save, | |
9474138d AM |
13639 | + .try_hibernate = toi_try_hibernate, |
13640 | + .try_resume = toi_sys_power_disk_try_resume, | |
2380c486 JR |
13641 | +}; |
13642 | + | |
13643 | +/** | |
13644 | + * core_load - initialisation of TuxOnIce core | |
13645 | + * | |
13646 | + * Initialise the core, beginning with sysfs. Checksum and so on are part of | |
13647 | + * the core, but have their own initialisation routines because they either | |
13648 | + * aren't compiled in all the time or have their own subdirectories. | |
13649 | + **/ | |
13650 | +static __init int core_load(void) | |
13651 | +{ | |
13652 | + int i, | |
13653 | + numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data); | |
13654 | + | |
13655 | + printk(KERN_INFO "TuxOnIce " TOI_CORE_VERSION | |
13656 | + " (http://tuxonice.net)\n"); | |
2380c486 JR |
13657 | + |
13658 | + if (toi_sysfs_init()) | |
13659 | + return 1; | |
13660 | + | |
13661 | + for (i = 0; i < numfiles; i++) | |
13662 | + toi_register_sysfs_file(tuxonice_kobj, &sysfs_params[i]); | |
13663 | + | |
13664 | + toi_core_fns = &my_fns; | |
13665 | + | |
13666 | + if (toi_alloc_init()) | |
13667 | + return 1; | |
13668 | + if (toi_checksum_init()) | |
13669 | + return 1; | |
13670 | + if (toi_usm_init()) | |
13671 | + return 1; | |
13672 | + if (toi_ui_init()) | |
13673 | + return 1; | |
13674 | + if (toi_poweroff_init()) | |
13675 | + return 1; | |
13676 | + if (toi_cluster_init()) | |
13677 | + return 1; | |
13678 | + | |
13679 | + return 0; | |
13680 | +} | |
13681 | + | |
13682 | +#ifdef MODULE | |
13683 | +/** | |
13684 | + * core_unload: Prepare to unload the core code. | |
13685 | + **/ | |
13686 | +static __exit void core_unload(void) | |
13687 | +{ | |
13688 | + int i, | |
13689 | + numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data); | |
13690 | + | |
13691 | + toi_alloc_exit(); | |
13692 | + toi_checksum_exit(); | |
13693 | + toi_poweroff_exit(); | |
13694 | + toi_ui_exit(); | |
13695 | + toi_usm_exit(); | |
13696 | + toi_cluster_exit(); | |
13697 | + | |
13698 | + for (i = 0; i < numfiles; i++) | |
13699 | + toi_unregister_sysfs_file(tuxonice_kobj, &sysfs_params[i]); | |
13700 | + | |
13701 | + toi_core_fns = NULL; | |
13702 | + | |
13703 | + toi_sysfs_exit(); | |
13704 | +} | |
13705 | +MODULE_LICENSE("GPL"); | |
13706 | +module_init(core_load); | |
13707 | +module_exit(core_unload); | |
13708 | +#else | |
13709 | +late_initcall(core_load); | |
13710 | +#endif | |
13711 | diff --git a/kernel/power/tuxonice_io.c b/kernel/power/tuxonice_io.c | |
13712 | new file mode 100644 | |
cacc47f8 | 13713 | index 0000000..6030dc6 |
2380c486 JR |
13714 | --- /dev/null |
13715 | +++ b/kernel/power/tuxonice_io.c | |
cacc47f8 | 13716 | @@ -0,0 +1,1836 @@ |
2380c486 JR |
13717 | +/* |
13718 | + * kernel/power/tuxonice_io.c | |
13719 | + * | |
13720 | + * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu> | |
13721 | + * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz> | |
13722 | + * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr> | |
5dd10c98 | 13723 | + * Copyright (C) 2002-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
13724 | + * |
13725 | + * This file is released under the GPLv2. | |
13726 | + * | |
13727 | + * It contains high level IO routines for hibernating. | |
13728 | + * | |
13729 | + */ | |
13730 | + | |
13731 | +#include <linux/suspend.h> | |
13732 | +#include <linux/version.h> | |
13733 | +#include <linux/utsname.h> | |
13734 | +#include <linux/mount.h> | |
13735 | +#include <linux/highmem.h> | |
2380c486 JR |
13736 | +#include <linux/kthread.h> |
13737 | +#include <linux/cpu.h> | |
9474138d | 13738 | +#include <linux/fs_struct.h> |
7e46296a | 13739 | +#include <linux/bio.h> |
cacc47f8 | 13740 | +#include <linux/fs_uuid.h> |
2380c486 JR |
13741 | +#include <asm/tlbflush.h> |
13742 | + | |
13743 | +#include "tuxonice.h" | |
13744 | +#include "tuxonice_modules.h" | |
13745 | +#include "tuxonice_pageflags.h" | |
13746 | +#include "tuxonice_io.h" | |
13747 | +#include "tuxonice_ui.h" | |
13748 | +#include "tuxonice_storage.h" | |
13749 | +#include "tuxonice_prepare_image.h" | |
13750 | +#include "tuxonice_extent.h" | |
13751 | +#include "tuxonice_sysfs.h" | |
13752 | +#include "tuxonice_builtin.h" | |
13753 | +#include "tuxonice_checksum.h" | |
13754 | +#include "tuxonice_alloc.h" | |
13755 | +char alt_resume_param[256]; | |
13756 | + | |
5dd10c98 AM |
13757 | +/* Version read from image header at resume */ |
13758 | +static int toi_image_header_version; | |
13759 | + | |
cacc47f8 | 13760 | +#define read_if_version(VERS, VAR, DESC, ERR_ACT) do { \ |
5dd10c98 AM |
13761 | + if (likely(toi_image_header_version >= VERS)) \ |
13762 | + if (toiActiveAllocator->rw_header_chunk(READ, NULL, \ | |
13763 | + (char *) &VAR, sizeof(VAR))) { \ | |
13764 | + abort_hibernate(TOI_FAILED_IO, "Failed to read DESC."); \ | |
cacc47f8 | 13765 | + ERR_ACT; \ |
5dd10c98 AM |
13766 | + } \ |
13767 | +} while(0) \ | |
13768 | + | |
2380c486 JR |
13769 | +/* Variables shared between threads and updated under the mutex */ |
13770 | +static int io_write, io_finish_at, io_base, io_barmax, io_pageset, io_result; | |
13771 | +static int io_index, io_nextupdate, io_pc, io_pc_step; | |
13772 | +static DEFINE_MUTEX(io_mutex); | |
13773 | +static DEFINE_PER_CPU(struct page *, last_sought); | |
13774 | +static DEFINE_PER_CPU(struct page *, last_high_page); | |
13775 | +static DEFINE_PER_CPU(char *, checksum_locn); | |
13776 | +static DEFINE_PER_CPU(struct pbe *, last_low_page); | |
13777 | +static atomic_t io_count; | |
13778 | +atomic_t toi_io_workers; | |
13779 | +EXPORT_SYMBOL_GPL(toi_io_workers); | |
13780 | + | |
13781 | +DECLARE_WAIT_QUEUE_HEAD(toi_io_queue_flusher); | |
13782 | +EXPORT_SYMBOL_GPL(toi_io_queue_flusher); | |
13783 | + | |
13784 | +int toi_bio_queue_flusher_should_finish; | |
13785 | +EXPORT_SYMBOL_GPL(toi_bio_queue_flusher_should_finish); | |
13786 | + | |
13787 | +/* Indicates that this thread should be used for checking throughput */ | |
13788 | +#define MONITOR ((void *) 1) | |
13789 | + | |
5dd10c98 AM |
13790 | +int toi_max_workers; |
13791 | + | |
13792 | +static char *image_version_error = "The image header version is newer than " \ | |
13793 | + "this kernel supports."; | |
13794 | + | |
2380c486 JR |
13795 | +/** |
13796 | + * toi_attempt_to_parse_resume_device - determine if we can hibernate | |
13797 | + * | |
13798 | + * Can we hibernate, using the current resume= parameter? | |
13799 | + **/ | |
13800 | +int toi_attempt_to_parse_resume_device(int quiet) | |
13801 | +{ | |
13802 | + struct list_head *Allocator; | |
13803 | + struct toi_module_ops *thisAllocator; | |
13804 | + int result, returning = 0; | |
13805 | + | |
13806 | + if (toi_activate_storage(0)) | |
13807 | + return 0; | |
13808 | + | |
13809 | + toiActiveAllocator = NULL; | |
13810 | + clear_toi_state(TOI_RESUME_DEVICE_OK); | |
13811 | + clear_toi_state(TOI_CAN_RESUME); | |
13812 | + clear_result_state(TOI_ABORTED); | |
13813 | + | |
13814 | + if (!toiNumAllocators) { | |
13815 | + if (!quiet) | |
13816 | + printk(KERN_INFO "TuxOnIce: No storage allocators have " | |
13817 | + "been registered. Hibernating will be " | |
13818 | + "disabled.\n"); | |
13819 | + goto cleanup; | |
13820 | + } | |
13821 | + | |
2380c486 JR |
13822 | + list_for_each(Allocator, &toiAllocators) { |
13823 | + thisAllocator = list_entry(Allocator, struct toi_module_ops, | |
13824 | + type_list); | |
13825 | + | |
13826 | + /* | |
13827 | + * Not sure why you'd want to disable an allocator, but | |
13828 | + * we should honour the flag if we're providing it | |
13829 | + */ | |
13830 | + if (!thisAllocator->enabled) | |
13831 | + continue; | |
13832 | + | |
13833 | + result = thisAllocator->parse_sig_location( | |
13834 | + resume_file, (toiNumAllocators == 1), | |
13835 | + quiet); | |
13836 | + | |
13837 | + switch (result) { | |
13838 | + case -EINVAL: | |
13839 | + /* For this allocator, but not a valid | |
13840 | + * configuration. Error already printed. */ | |
13841 | + goto cleanup; | |
13842 | + | |
13843 | + case 0: | |
13844 | + /* For this allocator and valid. */ | |
13845 | + toiActiveAllocator = thisAllocator; | |
13846 | + | |
13847 | + set_toi_state(TOI_RESUME_DEVICE_OK); | |
13848 | + set_toi_state(TOI_CAN_RESUME); | |
13849 | + returning = 1; | |
13850 | + goto cleanup; | |
13851 | + } | |
13852 | + } | |
13853 | + if (!quiet) | |
e999739a | 13854 | + printk(KERN_INFO "TuxOnIce: No matching enabled allocator " |
13855 | + "found. Resuming disabled.\n"); | |
2380c486 JR |
13856 | +cleanup: |
13857 | + toi_deactivate_storage(0); | |
13858 | + return returning; | |
13859 | +} | |
13860 | +EXPORT_SYMBOL_GPL(toi_attempt_to_parse_resume_device); | |
13861 | + | |
13862 | +void attempt_to_parse_resume_device2(void) | |
13863 | +{ | |
13864 | + toi_prepare_usm(); | |
13865 | + toi_attempt_to_parse_resume_device(0); | |
13866 | + toi_cleanup_usm(); | |
13867 | +} | |
13868 | +EXPORT_SYMBOL_GPL(attempt_to_parse_resume_device2); | |
13869 | + | |
13870 | +void save_restore_alt_param(int replace, int quiet) | |
13871 | +{ | |
13872 | + static char resume_param_save[255]; | |
13873 | + static unsigned long toi_state_save; | |
13874 | + | |
13875 | + if (replace) { | |
13876 | + toi_state_save = toi_state; | |
13877 | + strcpy(resume_param_save, resume_file); | |
13878 | + strcpy(resume_file, alt_resume_param); | |
13879 | + } else { | |
13880 | + strcpy(resume_file, resume_param_save); | |
13881 | + toi_state = toi_state_save; | |
13882 | + } | |
13883 | + toi_attempt_to_parse_resume_device(quiet); | |
13884 | +} | |
13885 | + | |
13886 | +void attempt_to_parse_alt_resume_param(void) | |
13887 | +{ | |
13888 | + int ok = 0; | |
13889 | + | |
13890 | + /* Temporarily set resume_param to the poweroff value */ | |
13891 | + if (!strlen(alt_resume_param)) | |
13892 | + return; | |
13893 | + | |
e999739a | 13894 | + printk(KERN_INFO "=== Trying Poweroff Resume2 ===\n"); |
2380c486 JR |
13895 | + save_restore_alt_param(SAVE, NOQUIET); |
13896 | + if (test_toi_state(TOI_CAN_RESUME)) | |
13897 | + ok = 1; | |
13898 | + | |
13899 | + printk(KERN_INFO "=== Done ===\n"); | |
13900 | + save_restore_alt_param(RESTORE, QUIET); | |
13901 | + | |
13902 | + /* If not ok, clear the string */ | |
13903 | + if (ok) | |
13904 | + return; | |
13905 | + | |
13906 | + printk(KERN_INFO "Can't resume from that location; clearing " | |
13907 | + "alt_resume_param.\n"); | |
13908 | + alt_resume_param[0] = '\0'; | |
13909 | +} | |
13910 | + | |
13911 | +/** | |
13912 | + * noresume_reset_modules - reset data structures in case of non resuming | |
13913 | + * | |
13914 | + * When we read the start of an image, modules (and especially the | |
13915 | + * active allocator) might need to reset data structures if we | |
13916 | + * decide to remove the image rather than resuming from it. | |
13917 | + **/ | |
13918 | +static void noresume_reset_modules(void) | |
13919 | +{ | |
13920 | + struct toi_module_ops *this_filter; | |
13921 | + | |
13922 | + list_for_each_entry(this_filter, &toi_filters, type_list) | |
13923 | + if (this_filter->noresume_reset) | |
13924 | + this_filter->noresume_reset(); | |
13925 | + | |
13926 | + if (toiActiveAllocator && toiActiveAllocator->noresume_reset) | |
13927 | + toiActiveAllocator->noresume_reset(); | |
13928 | +} | |
13929 | + | |
13930 | +/** | |
13931 | + * fill_toi_header - fill the hibernate header structure | |
13932 | + * @struct toi_header: Header data structure to be filled. | |
13933 | + **/ | |
13934 | +static int fill_toi_header(struct toi_header *sh) | |
13935 | +{ | |
13936 | + int i, error; | |
13937 | + | |
e999739a | 13938 | + error = init_header((struct swsusp_info *) sh); |
2380c486 JR |
13939 | + if (error) |
13940 | + return error; | |
13941 | + | |
13942 | + sh->pagedir = pagedir1; | |
13943 | + sh->pageset_2_size = pagedir2.size; | |
13944 | + sh->param0 = toi_result; | |
13945 | + sh->param1 = toi_bkd.toi_action; | |
13946 | + sh->param2 = toi_bkd.toi_debug_state; | |
13947 | + sh->param3 = toi_bkd.toi_default_console_level; | |
13948 | + sh->root_fs = current->fs->root.mnt->mnt_sb->s_dev; | |
13949 | + for (i = 0; i < 4; i++) | |
13950 | + sh->io_time[i/2][i%2] = toi_bkd.toi_io_time[i/2][i%2]; | |
13951 | + sh->bkd = boot_kernel_data_buffer; | |
13952 | + return 0; | |
13953 | +} | |
13954 | + | |
13955 | +/** | |
13956 | + * rw_init_modules - initialize modules | |
13957 | + * @rw: Whether we are reading of writing an image. | |
13958 | + * @which: Section of the image being processed. | |
13959 | + * | |
13960 | + * Iterate over modules, preparing the ones that will be used to read or write | |
13961 | + * data. | |
13962 | + **/ | |
13963 | +static int rw_init_modules(int rw, int which) | |
13964 | +{ | |
13965 | + struct toi_module_ops *this_module; | |
13966 | + /* Initialise page transformers */ | |
13967 | + list_for_each_entry(this_module, &toi_filters, type_list) { | |
13968 | + if (!this_module->enabled) | |
13969 | + continue; | |
13970 | + if (this_module->rw_init && this_module->rw_init(rw, which)) { | |
13971 | + abort_hibernate(TOI_FAILED_MODULE_INIT, | |
13972 | + "Failed to initialize the %s filter.", | |
13973 | + this_module->name); | |
13974 | + return 1; | |
13975 | + } | |
13976 | + } | |
13977 | + | |
13978 | + /* Initialise allocator */ | |
13979 | + if (toiActiveAllocator->rw_init(rw, which)) { | |
13980 | + abort_hibernate(TOI_FAILED_MODULE_INIT, | |
13981 | + "Failed to initialise the allocator."); | |
13982 | + return 1; | |
13983 | + } | |
13984 | + | |
13985 | + /* Initialise other modules */ | |
13986 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
13987 | + if (!this_module->enabled || | |
13988 | + this_module->type == FILTER_MODULE || | |
13989 | + this_module->type == WRITER_MODULE) | |
13990 | + continue; | |
13991 | + if (this_module->rw_init && this_module->rw_init(rw, which)) { | |
13992 | + set_abort_result(TOI_FAILED_MODULE_INIT); | |
13993 | + printk(KERN_INFO "Setting aborted flag due to module " | |
13994 | + "init failure.\n"); | |
13995 | + return 1; | |
13996 | + } | |
13997 | + } | |
13998 | + | |
13999 | + return 0; | |
14000 | +} | |
14001 | + | |
14002 | +/** | |
14003 | + * rw_cleanup_modules - cleanup modules | |
14004 | + * @rw: Whether we are reading of writing an image. | |
14005 | + * | |
14006 | + * Cleanup components after reading or writing a set of pages. | |
14007 | + * Only the allocator may fail. | |
14008 | + **/ | |
14009 | +static int rw_cleanup_modules(int rw) | |
14010 | +{ | |
14011 | + struct toi_module_ops *this_module; | |
14012 | + int result = 0; | |
14013 | + | |
14014 | + /* Cleanup other modules */ | |
14015 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
14016 | + if (!this_module->enabled || | |
14017 | + this_module->type == FILTER_MODULE || | |
14018 | + this_module->type == WRITER_MODULE) | |
14019 | + continue; | |
14020 | + if (this_module->rw_cleanup) | |
14021 | + result |= this_module->rw_cleanup(rw); | |
14022 | + } | |
14023 | + | |
14024 | + /* Flush data and cleanup */ | |
14025 | + list_for_each_entry(this_module, &toi_filters, type_list) { | |
14026 | + if (!this_module->enabled) | |
14027 | + continue; | |
14028 | + if (this_module->rw_cleanup) | |
14029 | + result |= this_module->rw_cleanup(rw); | |
14030 | + } | |
14031 | + | |
14032 | + result |= toiActiveAllocator->rw_cleanup(rw); | |
14033 | + | |
14034 | + return result; | |
14035 | +} | |
14036 | + | |
14037 | +static struct page *copy_page_from_orig_page(struct page *orig_page) | |
14038 | +{ | |
14039 | + int is_high = PageHighMem(orig_page), index, min, max; | |
14040 | + struct page *high_page = NULL, | |
14041 | + **my_last_high_page = &__get_cpu_var(last_high_page), | |
14042 | + **my_last_sought = &__get_cpu_var(last_sought); | |
14043 | + struct pbe *this, **my_last_low_page = &__get_cpu_var(last_low_page); | |
14044 | + void *compare; | |
14045 | + | |
14046 | + if (is_high) { | |
14047 | + if (*my_last_sought && *my_last_high_page && | |
14048 | + *my_last_sought < orig_page) | |
14049 | + high_page = *my_last_high_page; | |
14050 | + else | |
14051 | + high_page = (struct page *) restore_highmem_pblist; | |
14052 | + this = (struct pbe *) kmap(high_page); | |
14053 | + compare = orig_page; | |
14054 | + } else { | |
14055 | + if (*my_last_sought && *my_last_low_page && | |
14056 | + *my_last_sought < orig_page) | |
14057 | + this = *my_last_low_page; | |
14058 | + else | |
14059 | + this = restore_pblist; | |
14060 | + compare = page_address(orig_page); | |
14061 | + } | |
14062 | + | |
14063 | + *my_last_sought = orig_page; | |
14064 | + | |
14065 | + /* Locate page containing pbe */ | |
14066 | + while (this[PBES_PER_PAGE - 1].next && | |
14067 | + this[PBES_PER_PAGE - 1].orig_address < compare) { | |
14068 | + if (is_high) { | |
14069 | + struct page *next_high_page = (struct page *) | |
14070 | + this[PBES_PER_PAGE - 1].next; | |
14071 | + kunmap(high_page); | |
14072 | + this = kmap(next_high_page); | |
14073 | + high_page = next_high_page; | |
14074 | + } else | |
14075 | + this = this[PBES_PER_PAGE - 1].next; | |
14076 | + } | |
14077 | + | |
14078 | + /* Do a binary search within the page */ | |
14079 | + min = 0; | |
14080 | + max = PBES_PER_PAGE; | |
14081 | + index = PBES_PER_PAGE / 2; | |
14082 | + while (max - min) { | |
14083 | + if (!this[index].orig_address || | |
14084 | + this[index].orig_address > compare) | |
14085 | + max = index; | |
14086 | + else if (this[index].orig_address == compare) { | |
14087 | + if (is_high) { | |
14088 | + struct page *page = this[index].address; | |
14089 | + *my_last_high_page = high_page; | |
14090 | + kunmap(high_page); | |
14091 | + return page; | |
14092 | + } | |
14093 | + *my_last_low_page = this; | |
14094 | + return virt_to_page(this[index].address); | |
14095 | + } else | |
14096 | + min = index; | |
14097 | + index = ((max + min) / 2); | |
14098 | + }; | |
14099 | + | |
14100 | + if (is_high) | |
14101 | + kunmap(high_page); | |
14102 | + | |
14103 | + abort_hibernate(TOI_FAILED_IO, "Failed to get destination page for" | |
14104 | + " orig page %p. This[min].orig_address=%p.\n", orig_page, | |
14105 | + this[index].orig_address); | |
14106 | + return NULL; | |
14107 | +} | |
14108 | + | |
14109 | +/** | |
9474138d AM |
14110 | + * write_next_page - write the next page in a pageset |
14111 | + * @data_pfn: The pfn where the next data to write is located. | |
14112 | + * @my_io_index: The index of the page in the pageset. | |
14113 | + * @write_pfn: The pfn number to write in the image (where the data belongs). | |
14114 | + * @first_filter: Where to send the page (optimisation). | |
14115 | + * | |
14116 | + * Get the pfn of the next page to write, map the page if necessary and do the | |
14117 | + * write. | |
14118 | + **/ | |
14119 | +static int write_next_page(unsigned long *data_pfn, int *my_io_index, | |
14120 | + unsigned long *write_pfn, struct toi_module_ops *first_filter) | |
14121 | +{ | |
14122 | + struct page *page; | |
14123 | + char **my_checksum_locn = &__get_cpu_var(checksum_locn); | |
14124 | + int result = 0, was_present; | |
14125 | + | |
14126 | + *data_pfn = memory_bm_next_pfn(io_map); | |
14127 | + | |
14128 | + /* Another thread could have beaten us to it. */ | |
14129 | + if (*data_pfn == BM_END_OF_MAP) { | |
14130 | + if (atomic_read(&io_count)) { | |
14131 | + printk(KERN_INFO "Ran out of pfns but io_count is " | |
14132 | + "still %d.\n", atomic_read(&io_count)); | |
14133 | + BUG(); | |
14134 | + } | |
e876a0dd | 14135 | + mutex_unlock(&io_mutex); |
9474138d AM |
14136 | + return -ENODATA; |
14137 | + } | |
14138 | + | |
14139 | + *my_io_index = io_finish_at - atomic_sub_return(1, &io_count); | |
14140 | + | |
14141 | + memory_bm_clear_bit(io_map, *data_pfn); | |
14142 | + page = pfn_to_page(*data_pfn); | |
14143 | + | |
14144 | + was_present = kernel_page_present(page); | |
14145 | + if (!was_present) | |
14146 | + kernel_map_pages(page, 1, 1); | |
14147 | + | |
14148 | + if (io_pageset == 1) | |
14149 | + *write_pfn = memory_bm_next_pfn(pageset1_map); | |
14150 | + else { | |
14151 | + *write_pfn = *data_pfn; | |
14152 | + *my_checksum_locn = tuxonice_get_next_checksum(); | |
14153 | + } | |
14154 | + | |
14155 | + mutex_unlock(&io_mutex); | |
14156 | + | |
14157 | + if (io_pageset == 2 && tuxonice_calc_checksum(page, *my_checksum_locn)) | |
14158 | + return 1; | |
14159 | + | |
14160 | + result = first_filter->write_page(*write_pfn, page, PAGE_SIZE); | |
14161 | + | |
14162 | + if (!was_present) | |
14163 | + kernel_map_pages(page, 1, 0); | |
14164 | + | |
14165 | + return result; | |
14166 | +} | |
14167 | + | |
14168 | +/** | |
14169 | + * read_next_page - read the next page in a pageset | |
14170 | + * @my_io_index: The index of the page in the pageset. | |
14171 | + * @write_pfn: The pfn in which the data belongs. | |
14172 | + * | |
e876a0dd AM |
14173 | + * Read a page of the image into our buffer. It can happen (here and in the |
14174 | + * write routine) that threads don't get run until after other CPUs have done | |
14175 | + * all the work. This was the cause of the long standing issue with | |
14176 | + * occasionally getting -ENODATA errors at the end of reading the image. We | |
14177 | + * therefore need to check there's actually a page to read before trying to | |
14178 | + * retrieve one. | |
9474138d AM |
14179 | + **/ |
14180 | + | |
14181 | +static int read_next_page(int *my_io_index, unsigned long *write_pfn, | |
14182 | + struct page *buffer, struct toi_module_ops *first_filter) | |
14183 | +{ | |
92bca44c | 14184 | + unsigned int buf_size = PAGE_SIZE; |
e876a0dd AM |
14185 | + unsigned long left = atomic_read(&io_count); |
14186 | + | |
14187 | + if (left) | |
14188 | + *my_io_index = io_finish_at - atomic_sub_return(1, &io_count); | |
9474138d | 14189 | + |
9474138d AM |
14190 | + mutex_unlock(&io_mutex); |
14191 | + | |
14192 | + /* | |
14193 | + * Are we aborting? If so, don't submit any more I/O as | |
14194 | + * resetting the resume_attempted flag (from ui.c) will | |
14195 | + * clear the bdev flags, making this thread oops. | |
14196 | + */ | |
14197 | + if (unlikely(test_toi_state(TOI_STOP_RESUME))) { | |
14198 | + atomic_dec(&toi_io_workers); | |
5dd10c98 AM |
14199 | + if (!atomic_read(&toi_io_workers)) { |
14200 | + /* | |
14201 | + * So we can be sure we'll have memory for | |
14202 | + * marking that we haven't resumed. | |
14203 | + */ | |
14204 | + rw_cleanup_modules(READ); | |
9474138d | 14205 | + set_toi_state(TOI_IO_STOPPED); |
5dd10c98 | 14206 | + } |
9474138d AM |
14207 | + while (1) |
14208 | + schedule(); | |
14209 | + } | |
14210 | + | |
e876a0dd AM |
14211 | + if (!left) |
14212 | + return -ENODATA; | |
14213 | + | |
92bca44c | 14214 | + /* |
7e46296a | 14215 | + * See toi_bio_read_page in tuxonice_bio.c: |
9474138d AM |
14216 | + * read the next page in the image. |
14217 | + */ | |
92bca44c | 14218 | + return first_filter->read_page(write_pfn, buffer, &buf_size); |
9474138d AM |
14219 | +} |
14220 | + | |
9474138d AM |
14221 | +static void use_read_page(unsigned long write_pfn, struct page *buffer) |
14222 | +{ | |
14223 | + struct page *final_page = pfn_to_page(write_pfn), | |
14224 | + *copy_page = final_page; | |
14225 | + char *virt, *buffer_virt; | |
14226 | + | |
5dd10c98 | 14227 | + if (io_pageset == 1 && !PagePageset1Copy(final_page)) { |
9474138d AM |
14228 | + copy_page = copy_page_from_orig_page(final_page); |
14229 | + BUG_ON(!copy_page); | |
14230 | + } | |
14231 | + | |
14232 | + if (memory_bm_test_bit(io_map, write_pfn)) { | |
14233 | + int was_present; | |
14234 | + | |
14235 | + virt = kmap(copy_page); | |
14236 | + buffer_virt = kmap(buffer); | |
14237 | + was_present = kernel_page_present(copy_page); | |
14238 | + if (!was_present) | |
14239 | + kernel_map_pages(copy_page, 1, 1); | |
14240 | + memcpy(virt, buffer_virt, PAGE_SIZE); | |
14241 | + if (!was_present) | |
14242 | + kernel_map_pages(copy_page, 1, 0); | |
14243 | + kunmap(copy_page); | |
14244 | + kunmap(buffer); | |
14245 | + memory_bm_clear_bit(io_map, write_pfn); | |
14246 | + } else { | |
14247 | + mutex_lock(&io_mutex); | |
14248 | + atomic_inc(&io_count); | |
14249 | + mutex_unlock(&io_mutex); | |
14250 | + } | |
14251 | +} | |
14252 | + | |
5dd10c98 AM |
14253 | +static unsigned long status_update(int writing, unsigned long done, |
14254 | + unsigned long ticks) | |
14255 | +{ | |
14256 | + int cs_index = writing ? 0 : 1; | |
14257 | + unsigned long ticks_so_far = toi_bkd.toi_io_time[cs_index][1] + ticks; | |
14258 | + unsigned long msec = jiffies_to_msecs(abs(ticks_so_far)); | |
14259 | + unsigned long pgs_per_s, estimate = 0, pages_left; | |
14260 | + | |
14261 | + if (msec) { | |
14262 | + pages_left = io_barmax - done; | |
14263 | + pgs_per_s = 1000 * done / msec; | |
14264 | + if (pgs_per_s) | |
14265 | + estimate = pages_left / pgs_per_s; | |
14266 | + } | |
14267 | + | |
14268 | + if (estimate && ticks > HZ / 2) | |
14269 | + return toi_update_status(done, io_barmax, | |
14270 | + " %d/%d MB (%lu sec left)", | |
14271 | + MB(done+1), MB(io_barmax), estimate); | |
14272 | + | |
14273 | + return toi_update_status(done, io_barmax, " %d/%d MB", | |
14274 | + MB(done+1), MB(io_barmax)); | |
14275 | +} | |
14276 | + | |
9474138d | 14277 | +/** |
2380c486 JR |
14278 | + * worker_rw_loop - main loop to read/write pages |
14279 | + * | |
14280 | + * The main I/O loop for reading or writing pages. The io_map bitmap is used to | |
14281 | + * track the pages to read/write. | |
14282 | + * If we are reading, the pages are loaded to their final (mapped) pfn. | |
14283 | + **/ | |
14284 | +static int worker_rw_loop(void *data) | |
14285 | +{ | |
5dd10c98 AM |
14286 | + unsigned long data_pfn, write_pfn, next_jiffies = jiffies + HZ / 4, |
14287 | + jif_index = 1, start_time = jiffies; | |
0ada99ac | 14288 | + int result = 0, my_io_index = 0, last_worker; |
2380c486 JR |
14289 | + struct toi_module_ops *first_filter = toi_get_next_filter(NULL); |
14290 | + struct page *buffer = toi_alloc_page(28, TOI_ATOMIC_GFP); | |
14291 | + | |
14292 | + current->flags |= PF_NOFREEZE; | |
14293 | + | |
2380c486 JR |
14294 | + mutex_lock(&io_mutex); |
14295 | + | |
14296 | + do { | |
2380c486 | 14297 | + if (data && jiffies > next_jiffies) { |
5dd10c98 | 14298 | + next_jiffies += HZ / 4; |
2380c486 JR |
14299 | + if (toiActiveAllocator->update_throughput_throttle) |
14300 | + toiActiveAllocator->update_throughput_throttle( | |
14301 | + jif_index); | |
14302 | + jif_index++; | |
14303 | + } | |
14304 | + | |
14305 | + /* | |
14306 | + * What page to use? If reading, don't know yet which page's | |
14307 | + * data will be read, so always use the buffer. If writing, | |
14308 | + * use the copy (Pageset1) or original page (Pageset2), but | |
14309 | + * always write the pfn of the original page. | |
14310 | + */ | |
9474138d AM |
14311 | + if (io_write) |
14312 | + result = write_next_page(&data_pfn, &my_io_index, | |
14313 | + &write_pfn, first_filter); | |
14314 | + else /* Reading */ | |
14315 | + result = read_next_page(&my_io_index, &write_pfn, | |
14316 | + buffer, first_filter); | |
14317 | + | |
2380c486 | 14318 | + if (result) { |
92bca44c | 14319 | + mutex_lock(&io_mutex); |
e876a0dd AM |
14320 | + /* Nothing to do? */ |
14321 | + if (result == -ENODATA) | |
14322 | + break; | |
14323 | + | |
14324 | + io_result = result; | |
92bca44c | 14325 | + |
2380c486 JR |
14326 | + if (io_write) { |
14327 | + printk(KERN_INFO "Write chunk returned %d.\n", | |
14328 | + result); | |
14329 | + abort_hibernate(TOI_FAILED_IO, | |
14330 | + "Failed to write a chunk of the " | |
14331 | + "image."); | |
92bca44c AM |
14332 | + break; |
14333 | + } | |
14334 | + | |
14335 | + if (io_pageset == 1) { | |
14336 | + printk(KERN_ERR "\nBreaking out of I/O loop " | |
14337 | + "because of result code %d.\n", result); | |
2380c486 JR |
14338 | + break; |
14339 | + } | |
14340 | + panic("Read chunk returned (%d)", result); | |
14341 | + } | |
14342 | + | |
14343 | + /* | |
14344 | + * Discard reads of resaved pages while reading ps2 | |
14345 | + * and unwanted pages while rereading ps2 when aborting. | |
14346 | + */ | |
9474138d AM |
14347 | + if (!io_write && !PageResave(pfn_to_page(write_pfn))) |
14348 | + use_read_page(write_pfn, buffer); | |
2380c486 | 14349 | + |
2380c486 | 14350 | + if (my_io_index + io_base == io_nextupdate) |
5dd10c98 AM |
14351 | + io_nextupdate = status_update(io_write, my_io_index + |
14352 | + io_base, jiffies - start_time); | |
2380c486 JR |
14353 | + |
14354 | + if (my_io_index == io_pc) { | |
92bca44c | 14355 | + printk(KERN_CONT "...%d%%", 20 * io_pc_step); |
2380c486 JR |
14356 | + io_pc_step++; |
14357 | + io_pc = io_finish_at * io_pc_step / 5; | |
14358 | + } | |
14359 | + | |
14360 | + toi_cond_pause(0, NULL); | |
14361 | + | |
14362 | + /* | |
14363 | + * Subtle: If there's less I/O still to be done than threads | |
14364 | + * running, quit. This stops us doing I/O beyond the end of | |
14365 | + * the image when reading. | |
14366 | + * | |
14367 | + * Possible race condition. Two threads could do the test at | |
14368 | + * the same time; one should exit and one should continue. | |
14369 | + * Therefore we take the mutex before comparing and exiting. | |
14370 | + */ | |
14371 | + | |
14372 | + mutex_lock(&io_mutex); | |
14373 | + | |
14374 | + } while (atomic_read(&io_count) >= atomic_read(&toi_io_workers) && | |
14375 | + !(io_write && test_result_state(TOI_ABORTED))); | |
14376 | + | |
14377 | + last_worker = atomic_dec_and_test(&toi_io_workers); | |
14378 | + mutex_unlock(&io_mutex); | |
14379 | + | |
14380 | + if (last_worker) { | |
14381 | + toi_bio_queue_flusher_should_finish = 1; | |
14382 | + wake_up(&toi_io_queue_flusher); | |
0ada99ac | 14383 | + result = toiActiveAllocator->finish_all_io(); |
92bca44c | 14384 | + printk(KERN_CONT "\n"); |
2380c486 JR |
14385 | + } |
14386 | + | |
14387 | + toi__free_page(28, buffer); | |
14388 | + | |
0ada99ac | 14389 | + return result; |
2380c486 JR |
14390 | +} |
14391 | + | |
14392 | +static int start_other_threads(void) | |
14393 | +{ | |
14394 | + int cpu, num_started = 0; | |
14395 | + struct task_struct *p; | |
e876a0dd AM |
14396 | + int to_start = (toi_max_workers ? toi_max_workers : num_online_cpus()) - 1; |
14397 | + | |
14398 | + atomic_set(&toi_io_workers, to_start); | |
2380c486 JR |
14399 | + |
14400 | + for_each_online_cpu(cpu) { | |
e876a0dd | 14401 | + if (num_started == to_start) |
5dd10c98 AM |
14402 | + break; |
14403 | + | |
2380c486 JR |
14404 | + if (cpu == smp_processor_id()) |
14405 | + continue; | |
14406 | + | |
14407 | + p = kthread_create(worker_rw_loop, num_started ? NULL : MONITOR, | |
14408 | + "ktoi_io/%d", cpu); | |
14409 | + if (IS_ERR(p)) { | |
e999739a | 14410 | + printk(KERN_ERR "ktoi_io for %i failed\n", cpu); |
e876a0dd | 14411 | + atomic_dec(&toi_io_workers); |
2380c486 JR |
14412 | + continue; |
14413 | + } | |
14414 | + kthread_bind(p, cpu); | |
14415 | + p->flags |= PF_MEMALLOC; | |
14416 | + wake_up_process(p); | |
14417 | + num_started++; | |
14418 | + } | |
14419 | + | |
14420 | + return num_started; | |
14421 | +} | |
14422 | + | |
14423 | +/** | |
14424 | + * do_rw_loop - main highlevel function for reading or writing pages | |
14425 | + * | |
14426 | + * Create the io_map bitmap and call worker_rw_loop to perform I/O operations. | |
14427 | + **/ | |
14428 | +static int do_rw_loop(int write, int finish_at, struct memory_bitmap *pageflags, | |
14429 | + int base, int barmax, int pageset) | |
14430 | +{ | |
0ada99ac | 14431 | + int index = 0, cpu, num_other_threads = 0, result = 0; |
2380c486 JR |
14432 | + unsigned long pfn; |
14433 | + | |
14434 | + if (!finish_at) | |
14435 | + return 0; | |
14436 | + | |
14437 | + io_write = write; | |
14438 | + io_finish_at = finish_at; | |
14439 | + io_base = base; | |
14440 | + io_barmax = barmax; | |
14441 | + io_pageset = pageset; | |
14442 | + io_index = 0; | |
14443 | + io_pc = io_finish_at / 5; | |
14444 | + io_pc_step = 1; | |
14445 | + io_result = 0; | |
14446 | + io_nextupdate = base + 1; | |
14447 | + toi_bio_queue_flusher_should_finish = 0; | |
14448 | + | |
14449 | + for_each_online_cpu(cpu) { | |
14450 | + per_cpu(last_sought, cpu) = NULL; | |
14451 | + per_cpu(last_low_page, cpu) = NULL; | |
14452 | + per_cpu(last_high_page, cpu) = NULL; | |
14453 | + } | |
14454 | + | |
14455 | + /* Ensure all bits clear */ | |
14456 | + memory_bm_clear(io_map); | |
14457 | + | |
14458 | + /* Set the bits for the pages to write */ | |
14459 | + memory_bm_position_reset(pageflags); | |
14460 | + | |
14461 | + pfn = memory_bm_next_pfn(pageflags); | |
14462 | + | |
14463 | + while (pfn != BM_END_OF_MAP && index < finish_at) { | |
14464 | + memory_bm_set_bit(io_map, pfn); | |
14465 | + pfn = memory_bm_next_pfn(pageflags); | |
14466 | + index++; | |
14467 | + } | |
14468 | + | |
14469 | + BUG_ON(index < finish_at); | |
14470 | + | |
14471 | + atomic_set(&io_count, finish_at); | |
14472 | + | |
14473 | + memory_bm_position_reset(pageset1_map); | |
14474 | + | |
14475 | + clear_toi_state(TOI_IO_STOPPED); | |
14476 | + memory_bm_position_reset(io_map); | |
14477 | + | |
7e46296a AM |
14478 | + if (!test_action_state(TOI_NO_MULTITHREADED_IO) && |
14479 | + (write || !toi_force_no_multithreaded)) | |
2380c486 JR |
14480 | + num_other_threads = start_other_threads(); |
14481 | + | |
14482 | + if (!num_other_threads || !toiActiveAllocator->io_flusher || | |
e876a0dd AM |
14483 | + test_action_state(TOI_NO_FLUSHER_THREAD)) { |
14484 | + atomic_inc(&toi_io_workers); | |
2380c486 | 14485 | + worker_rw_loop(num_other_threads ? NULL : MONITOR); |
e876a0dd | 14486 | + } else |
0ada99ac | 14487 | + result = toiActiveAllocator->io_flusher(write); |
2380c486 JR |
14488 | + |
14489 | + while (atomic_read(&toi_io_workers)) | |
14490 | + schedule(); | |
14491 | + | |
2380c486 | 14492 | + if (unlikely(test_toi_state(TOI_STOP_RESUME))) { |
5dd10c98 AM |
14493 | + if (!atomic_read(&toi_io_workers)) { |
14494 | + rw_cleanup_modules(READ); | |
14495 | + set_toi_state(TOI_IO_STOPPED); | |
14496 | + } | |
2380c486 JR |
14497 | + while (1) |
14498 | + schedule(); | |
14499 | + } | |
5dd10c98 | 14500 | + set_toi_state(TOI_IO_STOPPED); |
2380c486 | 14501 | + |
0ada99ac | 14502 | + if (!io_result && !result && !test_result_state(TOI_ABORTED)) { |
2380c486 JR |
14503 | + unsigned long next; |
14504 | + | |
2380c486 JR |
14505 | + toi_update_status(io_base + io_finish_at, io_barmax, |
14506 | + " %d/%d MB ", | |
14507 | + MB(io_base + io_finish_at), MB(io_barmax)); | |
14508 | + | |
14509 | + memory_bm_position_reset(io_map); | |
14510 | + next = memory_bm_next_pfn(io_map); | |
14511 | + if (next != BM_END_OF_MAP) { | |
14512 | + printk(KERN_INFO "Finished I/O loop but still work to " | |
14513 | + "do?\nFinish at = %d. io_count = %d.\n", | |
14514 | + finish_at, atomic_read(&io_count)); | |
14515 | + printk(KERN_INFO "I/O bitmap still records work to do." | |
14516 | + "%ld.\n", next); | |
92bca44c AM |
14517 | + do { |
14518 | + cpu_relax(); | |
7e46296a | 14519 | + } while (0); |
2380c486 JR |
14520 | + } |
14521 | + } | |
14522 | + | |
0ada99ac | 14523 | + return io_result ? io_result : result; |
2380c486 JR |
14524 | +} |
14525 | + | |
14526 | +/** | |
14527 | + * write_pageset - write a pageset to disk. | |
14528 | + * @pagedir: Which pagedir to write. | |
14529 | + * | |
14530 | + * Returns: | |
14531 | + * Zero on success or -1 on failure. | |
14532 | + **/ | |
14533 | +int write_pageset(struct pagedir *pagedir) | |
14534 | +{ | |
5dd10c98 | 14535 | + int finish_at, base = 0; |
2380c486 JR |
14536 | + int barmax = pagedir1.size + pagedir2.size; |
14537 | + long error = 0; | |
14538 | + struct memory_bitmap *pageflags; | |
5dd10c98 | 14539 | + unsigned long start_time, end_time; |
2380c486 JR |
14540 | + |
14541 | + /* | |
14542 | + * Even if there is nothing to read or write, the allocator | |
14543 | + * may need the init/cleanup for it's housekeeping. (eg: | |
14544 | + * Pageset1 may start where pageset2 ends when writing). | |
14545 | + */ | |
14546 | + finish_at = pagedir->size; | |
14547 | + | |
14548 | + if (pagedir->id == 1) { | |
14549 | + toi_prepare_status(DONT_CLEAR_BAR, | |
14550 | + "Writing kernel & process data..."); | |
14551 | + base = pagedir2.size; | |
14552 | + if (test_action_state(TOI_TEST_FILTER_SPEED) || | |
14553 | + test_action_state(TOI_TEST_BIO)) | |
14554 | + pageflags = pageset1_map; | |
14555 | + else | |
14556 | + pageflags = pageset1_copy_map; | |
14557 | + } else { | |
14558 | + toi_prepare_status(DONT_CLEAR_BAR, "Writing caches..."); | |
14559 | + pageflags = pageset2_map; | |
14560 | + } | |
14561 | + | |
14562 | + start_time = jiffies; | |
14563 | + | |
14564 | + if (rw_init_modules(1, pagedir->id)) { | |
14565 | + abort_hibernate(TOI_FAILED_MODULE_INIT, | |
14566 | + "Failed to initialise modules for writing."); | |
14567 | + error = 1; | |
14568 | + } | |
14569 | + | |
14570 | + if (!error) | |
14571 | + error = do_rw_loop(1, finish_at, pageflags, base, barmax, | |
14572 | + pagedir->id); | |
14573 | + | |
14574 | + if (rw_cleanup_modules(WRITE) && !error) { | |
14575 | + abort_hibernate(TOI_FAILED_MODULE_CLEANUP, | |
14576 | + "Failed to cleanup after writing."); | |
14577 | + error = 1; | |
14578 | + } | |
14579 | + | |
14580 | + end_time = jiffies; | |
14581 | + | |
14582 | + if ((end_time - start_time) && (!test_result_state(TOI_ABORTED))) { | |
14583 | + toi_bkd.toi_io_time[0][0] += finish_at, | |
14584 | + toi_bkd.toi_io_time[0][1] += (end_time - start_time); | |
14585 | + } | |
14586 | + | |
14587 | + return error; | |
14588 | +} | |
14589 | + | |
14590 | +/** | |
14591 | + * read_pageset - highlevel function to read a pageset from disk | |
14592 | + * @pagedir: pageset to read | |
14593 | + * @overwrittenpagesonly: Whether to read the whole pageset or | |
14594 | + * only part of it. | |
14595 | + * | |
14596 | + * Returns: | |
14597 | + * Zero on success or -1 on failure. | |
14598 | + **/ | |
14599 | +static int read_pageset(struct pagedir *pagedir, int overwrittenpagesonly) | |
14600 | +{ | |
5dd10c98 | 14601 | + int result = 0, base = 0; |
2380c486 JR |
14602 | + int finish_at = pagedir->size; |
14603 | + int barmax = pagedir1.size + pagedir2.size; | |
14604 | + struct memory_bitmap *pageflags; | |
5dd10c98 | 14605 | + unsigned long start_time, end_time; |
2380c486 JR |
14606 | + |
14607 | + if (pagedir->id == 1) { | |
14608 | + toi_prepare_status(DONT_CLEAR_BAR, | |
14609 | + "Reading kernel & process data..."); | |
14610 | + pageflags = pageset1_map; | |
14611 | + } else { | |
14612 | + toi_prepare_status(DONT_CLEAR_BAR, "Reading caches..."); | |
14613 | + if (overwrittenpagesonly) { | |
14614 | + barmax = min(pagedir1.size, pagedir2.size); | |
14615 | + finish_at = min(pagedir1.size, pagedir2.size); | |
14616 | + } else | |
14617 | + base = pagedir1.size; | |
14618 | + pageflags = pageset2_map; | |
14619 | + } | |
14620 | + | |
14621 | + start_time = jiffies; | |
14622 | + | |
14623 | + if (rw_init_modules(0, pagedir->id)) { | |
14624 | + toiActiveAllocator->remove_image(); | |
14625 | + result = 1; | |
14626 | + } else | |
14627 | + result = do_rw_loop(0, finish_at, pageflags, base, barmax, | |
14628 | + pagedir->id); | |
14629 | + | |
14630 | + if (rw_cleanup_modules(READ) && !result) { | |
14631 | + abort_hibernate(TOI_FAILED_MODULE_CLEANUP, | |
14632 | + "Failed to cleanup after reading."); | |
14633 | + result = 1; | |
14634 | + } | |
14635 | + | |
14636 | + /* Statistics */ | |
14637 | + end_time = jiffies; | |
14638 | + | |
14639 | + if ((end_time - start_time) && (!test_result_state(TOI_ABORTED))) { | |
14640 | + toi_bkd.toi_io_time[1][0] += finish_at, | |
14641 | + toi_bkd.toi_io_time[1][1] += (end_time - start_time); | |
14642 | + } | |
14643 | + | |
14644 | + return result; | |
14645 | +} | |
14646 | + | |
14647 | +/** | |
14648 | + * write_module_configs - store the modules configuration | |
14649 | + * | |
14650 | + * The configuration for each module is stored in the image header. | |
14651 | + * Returns: Int | |
14652 | + * Zero on success, Error value otherwise. | |
14653 | + **/ | |
14654 | +static int write_module_configs(void) | |
14655 | +{ | |
14656 | + struct toi_module_ops *this_module; | |
14657 | + char *buffer = (char *) toi_get_zeroed_page(22, TOI_ATOMIC_GFP); | |
14658 | + int len, index = 1; | |
14659 | + struct toi_module_header toi_module_header; | |
14660 | + | |
14661 | + if (!buffer) { | |
14662 | + printk(KERN_INFO "Failed to allocate a buffer for saving " | |
14663 | + "module configuration info.\n"); | |
14664 | + return -ENOMEM; | |
14665 | + } | |
14666 | + | |
14667 | + /* | |
14668 | + * We have to know which data goes with which module, so we at | |
14669 | + * least write a length of zero for a module. Note that we are | |
14670 | + * also assuming every module's config data takes <= PAGE_SIZE. | |
14671 | + */ | |
14672 | + | |
14673 | + /* For each module (in registration order) */ | |
14674 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
14675 | + if (!this_module->enabled || !this_module->storage_needed || | |
14676 | + (this_module->type == WRITER_MODULE && | |
14677 | + toiActiveAllocator != this_module)) | |
14678 | + continue; | |
14679 | + | |
14680 | + /* Get the data from the module */ | |
14681 | + len = 0; | |
14682 | + if (this_module->save_config_info) | |
14683 | + len = this_module->save_config_info(buffer); | |
14684 | + | |
14685 | + /* Save the details of the module */ | |
14686 | + toi_module_header.enabled = this_module->enabled; | |
14687 | + toi_module_header.type = this_module->type; | |
14688 | + toi_module_header.index = index++; | |
14689 | + strncpy(toi_module_header.name, this_module->name, | |
14690 | + sizeof(toi_module_header.name)); | |
14691 | + toiActiveAllocator->rw_header_chunk(WRITE, | |
14692 | + this_module, | |
14693 | + (char *) &toi_module_header, | |
14694 | + sizeof(toi_module_header)); | |
14695 | + | |
14696 | + /* Save the size of the data and any data returned */ | |
14697 | + toiActiveAllocator->rw_header_chunk(WRITE, | |
14698 | + this_module, | |
14699 | + (char *) &len, sizeof(int)); | |
14700 | + if (len) | |
14701 | + toiActiveAllocator->rw_header_chunk( | |
14702 | + WRITE, this_module, buffer, len); | |
14703 | + } | |
14704 | + | |
14705 | + /* Write a blank header to terminate the list */ | |
14706 | + toi_module_header.name[0] = '\0'; | |
14707 | + toiActiveAllocator->rw_header_chunk(WRITE, NULL, | |
14708 | + (char *) &toi_module_header, sizeof(toi_module_header)); | |
14709 | + | |
14710 | + toi_free_page(22, (unsigned long) buffer); | |
14711 | + return 0; | |
14712 | +} | |
14713 | + | |
14714 | +/** | |
14715 | + * read_one_module_config - read and configure one module | |
14716 | + * | |
14717 | + * Read the configuration for one module, and configure the module | |
14718 | + * to match if it is loaded. | |
14719 | + * | |
14720 | + * Returns: Int | |
14721 | + * Zero on success, Error value otherwise. | |
14722 | + **/ | |
14723 | +static int read_one_module_config(struct toi_module_header *header) | |
14724 | +{ | |
14725 | + struct toi_module_ops *this_module; | |
14726 | + int result, len; | |
14727 | + char *buffer; | |
14728 | + | |
14729 | + /* Find the module */ | |
14730 | + this_module = toi_find_module_given_name(header->name); | |
14731 | + | |
14732 | + if (!this_module) { | |
14733 | + if (header->enabled) { | |
14734 | + toi_early_boot_message(1, TOI_CONTINUE_REQ, | |
14735 | + "It looks like we need module %s for reading " | |
14736 | + "the image but it hasn't been registered.\n", | |
14737 | + header->name); | |
14738 | + if (!(test_toi_state(TOI_CONTINUE_REQ))) | |
14739 | + return -EINVAL; | |
14740 | + } else | |
14741 | + printk(KERN_INFO "Module %s configuration data found, " | |
14742 | + "but the module hasn't registered. Looks like " | |
14743 | + "it was disabled, so we're ignoring its data.", | |
14744 | + header->name); | |
14745 | + } | |
14746 | + | |
14747 | + /* Get the length of the data (if any) */ | |
14748 | + result = toiActiveAllocator->rw_header_chunk(READ, NULL, (char *) &len, | |
14749 | + sizeof(int)); | |
14750 | + if (result) { | |
e999739a | 14751 | + printk(KERN_ERR "Failed to read the length of the module %s's" |
2380c486 JR |
14752 | + " configuration data.\n", |
14753 | + header->name); | |
14754 | + return -EINVAL; | |
14755 | + } | |
14756 | + | |
14757 | + /* Read any data and pass to the module (if we found one) */ | |
14758 | + if (!len) | |
14759 | + return 0; | |
14760 | + | |
14761 | + buffer = (char *) toi_get_zeroed_page(23, TOI_ATOMIC_GFP); | |
14762 | + | |
14763 | + if (!buffer) { | |
e999739a | 14764 | + printk(KERN_ERR "Failed to allocate a buffer for reloading " |
14765 | + "module configuration info.\n"); | |
2380c486 JR |
14766 | + return -ENOMEM; |
14767 | + } | |
14768 | + | |
14769 | + toiActiveAllocator->rw_header_chunk(READ, NULL, buffer, len); | |
14770 | + | |
14771 | + if (!this_module) | |
14772 | + goto out; | |
14773 | + | |
14774 | + if (!this_module->save_config_info) | |
e999739a | 14775 | + printk(KERN_ERR "Huh? Module %s appears to have a " |
14776 | + "save_config_info, but not a load_config_info " | |
14777 | + "function!\n", this_module->name); | |
2380c486 JR |
14778 | + else |
14779 | + this_module->load_config_info(buffer, len); | |
14780 | + | |
14781 | + /* | |
14782 | + * Now move this module to the tail of its lists. This will put it in | |
14783 | + * order. Any new modules will end up at the top of the lists. They | |
14784 | + * should have been set to disabled when loaded (people will | |
14785 | + * normally not edit an initrd to load a new module and then hibernate | |
14786 | + * without using it!). | |
14787 | + */ | |
14788 | + | |
14789 | + toi_move_module_tail(this_module); | |
14790 | + | |
14791 | + this_module->enabled = header->enabled; | |
14792 | + | |
14793 | +out: | |
14794 | + toi_free_page(23, (unsigned long) buffer); | |
14795 | + return 0; | |
14796 | +} | |
14797 | + | |
14798 | +/** | |
14799 | + * read_module_configs - reload module configurations from the image header. | |
14800 | + * | |
14801 | + * Returns: Int | |
14802 | + * Zero on success or an error code. | |
14803 | + **/ | |
14804 | +static int read_module_configs(void) | |
14805 | +{ | |
14806 | + int result = 0; | |
14807 | + struct toi_module_header toi_module_header; | |
14808 | + struct toi_module_ops *this_module; | |
14809 | + | |
14810 | + /* All modules are initially disabled. That way, if we have a module | |
14811 | + * loaded now that wasn't loaded when we hibernated, it won't be used | |
14812 | + * in trying to read the data. | |
14813 | + */ | |
14814 | + list_for_each_entry(this_module, &toi_modules, module_list) | |
14815 | + this_module->enabled = 0; | |
14816 | + | |
14817 | + /* Get the first module header */ | |
14818 | + result = toiActiveAllocator->rw_header_chunk(READ, NULL, | |
14819 | + (char *) &toi_module_header, | |
14820 | + sizeof(toi_module_header)); | |
14821 | + if (result) { | |
14822 | + printk(KERN_ERR "Failed to read the next module header.\n"); | |
14823 | + return -EINVAL; | |
14824 | + } | |
14825 | + | |
14826 | + /* For each module (in registration order) */ | |
14827 | + while (toi_module_header.name[0]) { | |
14828 | + result = read_one_module_config(&toi_module_header); | |
14829 | + | |
14830 | + if (result) | |
14831 | + return -EINVAL; | |
14832 | + | |
14833 | + /* Get the next module header */ | |
14834 | + result = toiActiveAllocator->rw_header_chunk(READ, NULL, | |
14835 | + (char *) &toi_module_header, | |
14836 | + sizeof(toi_module_header)); | |
14837 | + | |
14838 | + if (result) { | |
14839 | + printk(KERN_ERR "Failed to read the next module " | |
14840 | + "header.\n"); | |
14841 | + return -EINVAL; | |
14842 | + } | |
14843 | + } | |
14844 | + | |
14845 | + return 0; | |
14846 | +} | |
14847 | + | |
5dd10c98 AM |
14848 | +static inline int save_fs_info(struct fs_info *fs, struct block_device *bdev) |
14849 | +{ | |
14850 | + return (!fs || IS_ERR(fs) || !fs->last_mount_size) ? 0 : 1; | |
14851 | +} | |
14852 | + | |
14853 | +int fs_info_space_needed(void) | |
14854 | +{ | |
14855 | + const struct super_block *sb; | |
14856 | + int result = sizeof(int); | |
14857 | + | |
14858 | + list_for_each_entry(sb, &super_blocks, s_list) { | |
14859 | + struct fs_info *fs; | |
14860 | + | |
14861 | + if (!sb->s_bdev) | |
14862 | + continue; | |
14863 | + | |
14864 | + fs = fs_info_from_block_dev(sb->s_bdev); | |
14865 | + if (save_fs_info(fs, sb->s_bdev)) | |
cacc47f8 AM |
14866 | + result += 16 + sizeof(dev_t) + sizeof(int) + |
14867 | + fs->last_mount_size; | |
5dd10c98 AM |
14868 | + free_fs_info(fs); |
14869 | + } | |
14870 | + return result; | |
14871 | +} | |
14872 | + | |
14873 | +static int fs_info_num_to_save(void) | |
14874 | +{ | |
14875 | + const struct super_block *sb; | |
14876 | + int to_save = 0; | |
14877 | + | |
14878 | + list_for_each_entry(sb, &super_blocks, s_list) { | |
14879 | + struct fs_info *fs; | |
14880 | + | |
14881 | + if (!sb->s_bdev) | |
14882 | + continue; | |
14883 | + | |
14884 | + fs = fs_info_from_block_dev(sb->s_bdev); | |
14885 | + if (save_fs_info(fs, sb->s_bdev)) | |
14886 | + to_save++; | |
14887 | + free_fs_info(fs); | |
14888 | + } | |
14889 | + | |
14890 | + return to_save; | |
14891 | +} | |
14892 | + | |
14893 | +static int fs_info_save(void) | |
14894 | +{ | |
14895 | + const struct super_block *sb; | |
14896 | + int to_save = fs_info_num_to_save(); | |
14897 | + | |
14898 | + if (toiActiveAllocator->rw_header_chunk(WRITE, NULL, (char *) &to_save, | |
14899 | + sizeof(int))) { | |
14900 | + abort_hibernate(TOI_FAILED_IO, "Failed to write num fs_info" | |
14901 | + " to save."); | |
14902 | + return -EIO; | |
14903 | + } | |
14904 | + | |
14905 | + list_for_each_entry(sb, &super_blocks, s_list) { | |
14906 | + struct fs_info *fs; | |
14907 | + | |
14908 | + if (!sb->s_bdev) | |
14909 | + continue; | |
14910 | + | |
14911 | + fs = fs_info_from_block_dev(sb->s_bdev); | |
14912 | + if (save_fs_info(fs, sb->s_bdev)) { | |
14913 | + if (toiActiveAllocator->rw_header_chunk(WRITE, NULL, | |
14914 | + &fs->uuid[0], 16)) { | |
14915 | + abort_hibernate(TOI_FAILED_IO, "Failed to " | |
14916 | + "write uuid."); | |
14917 | + return -EIO; | |
14918 | + } | |
14919 | + if (toiActiveAllocator->rw_header_chunk(WRITE, NULL, | |
cacc47f8 AM |
14920 | + (char *) &fs->dev_t, sizeof(dev_t))) { |
14921 | + abort_hibernate(TOI_FAILED_IO, "Failed to " | |
14922 | + "write dev_t."); | |
14923 | + return -EIO; | |
14924 | + } | |
14925 | + if (toiActiveAllocator->rw_header_chunk(WRITE, NULL, | |
5dd10c98 AM |
14926 | + (char *) &fs->last_mount_size, sizeof(int))) { |
14927 | + abort_hibernate(TOI_FAILED_IO, "Failed to " | |
14928 | + "write last mount length."); | |
14929 | + return -EIO; | |
14930 | + } | |
14931 | + if (toiActiveAllocator->rw_header_chunk(WRITE, NULL, | |
14932 | + fs->last_mount, fs->last_mount_size)) { | |
14933 | + abort_hibernate(TOI_FAILED_IO, "Failed to " | |
14934 | + "write uuid."); | |
14935 | + return -EIO; | |
14936 | + } | |
14937 | + } | |
14938 | + free_fs_info(fs); | |
14939 | + } | |
14940 | + return 0; | |
14941 | +} | |
14942 | + | |
14943 | +static int fs_info_load_and_check_one(void) | |
14944 | +{ | |
14945 | + char uuid[16], *last_mount; | |
14946 | + int result = 0, ln; | |
14947 | + dev_t dev_t; | |
14948 | + struct block_device *dev; | |
cacc47f8 | 14949 | + struct fs_info *fs_info, seek; |
5dd10c98 AM |
14950 | + |
14951 | + if (toiActiveAllocator->rw_header_chunk(READ, NULL, uuid, 16)) { | |
14952 | + abort_hibernate(TOI_FAILED_IO, "Failed to read uuid."); | |
14953 | + return -EIO; | |
14954 | + } | |
14955 | + | |
cacc47f8 AM |
14956 | + read_if_version(3, dev_t, "uuid dev_t field", return -EIO); |
14957 | + | |
5dd10c98 AM |
14958 | + if (toiActiveAllocator->rw_header_chunk(READ, NULL, (char *) &ln, |
14959 | + sizeof(int))) { | |
14960 | + abort_hibernate(TOI_FAILED_IO, | |
14961 | + "Failed to read last mount size."); | |
14962 | + return -EIO; | |
14963 | + } | |
14964 | + | |
14965 | + last_mount = kzalloc(ln, GFP_KERNEL); | |
14966 | + | |
14967 | + if (!last_mount) | |
14968 | + return -ENOMEM; | |
14969 | + | |
14970 | + if (toiActiveAllocator->rw_header_chunk(READ, NULL, last_mount, ln)) { | |
14971 | + abort_hibernate(TOI_FAILED_IO, | |
14972 | + "Failed to read last mount timestamp."); | |
14973 | + result = -EIO; | |
14974 | + goto out_lmt; | |
14975 | + } | |
14976 | + | |
cacc47f8 AM |
14977 | + strncpy((char *) &seek.uuid, uuid, 16); |
14978 | + seek.dev_t = dev_t; | |
14979 | + seek.last_mount_size = ln; | |
14980 | + seek.last_mount = last_mount; | |
14981 | + dev_t = blk_lookup_fs_info(&seek); | |
5dd10c98 AM |
14982 | + if (!dev_t) |
14983 | + goto out_lmt; | |
14984 | + | |
14985 | + dev = toi_open_by_devnum(dev_t); | |
14986 | + | |
14987 | + fs_info = fs_info_from_block_dev(dev); | |
14988 | + if (fs_info && !IS_ERR(fs_info)) { | |
14989 | + if (ln != fs_info->last_mount_size) { | |
14990 | + printk(KERN_EMERG "Found matching uuid but last mount " | |
14991 | + "time lengths differ?! " | |
14992 | + "(%d vs %d).\n", ln, | |
14993 | + fs_info->last_mount_size); | |
14994 | + result = -EINVAL; | |
14995 | + } else { | |
14996 | + char buf[BDEVNAME_SIZE]; | |
14997 | + result = !!memcmp(fs_info->last_mount, last_mount, ln); | |
14998 | + if (result) | |
14999 | + printk(KERN_EMERG "Last mount time for %s has " | |
15000 | + "changed!\n", bdevname(dev, buf)); | |
15001 | + } | |
15002 | + } | |
15003 | + toi_close_bdev(dev); | |
15004 | + free_fs_info(fs_info); | |
15005 | +out_lmt: | |
15006 | + kfree(last_mount); | |
15007 | + return result; | |
15008 | +} | |
15009 | + | |
15010 | +static int fs_info_load_and_check(void) | |
15011 | +{ | |
de6743ae | 15012 | + int to_do, result = 0; |
5dd10c98 AM |
15013 | + |
15014 | + if (toiActiveAllocator->rw_header_chunk(READ, NULL, (char *) &to_do, | |
15015 | + sizeof(int))) { | |
15016 | + abort_hibernate(TOI_FAILED_IO, "Failed to read num fs_info " | |
15017 | + "to load."); | |
15018 | + return -EIO; | |
15019 | + } | |
15020 | + | |
15021 | + while(to_do--) | |
15022 | + result |= fs_info_load_and_check_one(); | |
15023 | + | |
15024 | + return result; | |
15025 | +} | |
15026 | + | |
2380c486 JR |
15027 | +/** |
15028 | + * write_image_header - write the image header after write the image proper | |
15029 | + * | |
15030 | + * Returns: Int | |
15031 | + * Zero on success, error value otherwise. | |
15032 | + **/ | |
15033 | +int write_image_header(void) | |
15034 | +{ | |
15035 | + int ret; | |
15036 | + int total = pagedir1.size + pagedir2.size+2; | |
15037 | + char *header_buffer = NULL; | |
15038 | + | |
15039 | + /* Now prepare to write the header */ | |
15040 | + ret = toiActiveAllocator->write_header_init(); | |
15041 | + if (ret) { | |
15042 | + abort_hibernate(TOI_FAILED_MODULE_INIT, | |
15043 | + "Active allocator's write_header_init" | |
15044 | + " function failed."); | |
15045 | + goto write_image_header_abort; | |
15046 | + } | |
15047 | + | |
15048 | + /* Get a buffer */ | |
15049 | + header_buffer = (char *) toi_get_zeroed_page(24, TOI_ATOMIC_GFP); | |
15050 | + if (!header_buffer) { | |
15051 | + abort_hibernate(TOI_OUT_OF_MEMORY, | |
15052 | + "Out of memory when trying to get page for header!"); | |
15053 | + goto write_image_header_abort; | |
15054 | + } | |
15055 | + | |
15056 | + /* Write hibernate header */ | |
15057 | + if (fill_toi_header((struct toi_header *) header_buffer)) { | |
15058 | + abort_hibernate(TOI_OUT_OF_MEMORY, | |
15059 | + "Failure to fill header information!"); | |
15060 | + goto write_image_header_abort; | |
15061 | + } | |
2380c486 | 15062 | + |
5dd10c98 AM |
15063 | + if (toiActiveAllocator->rw_header_chunk(WRITE, NULL, |
15064 | + header_buffer, sizeof(struct toi_header))) { | |
15065 | + abort_hibernate(TOI_OUT_OF_MEMORY, | |
15066 | + "Failure to write header info."); | |
15067 | + goto write_image_header_abort; | |
15068 | + } | |
15069 | + | |
15070 | + if (toiActiveAllocator->rw_header_chunk(WRITE, NULL, | |
15071 | + (char *) &toi_max_workers, sizeof(toi_max_workers))) { | |
15072 | + abort_hibernate(TOI_OUT_OF_MEMORY, | |
15073 | + "Failure to number of workers to use."); | |
15074 | + goto write_image_header_abort; | |
15075 | + } | |
15076 | + | |
15077 | + /* Write filesystem info */ | |
15078 | + if (fs_info_save()) | |
15079 | + goto write_image_header_abort; | |
2380c486 JR |
15080 | + |
15081 | + /* Write module configurations */ | |
15082 | + ret = write_module_configs(); | |
15083 | + if (ret) { | |
15084 | + abort_hibernate(TOI_FAILED_IO, | |
15085 | + "Failed to write module configs."); | |
15086 | + goto write_image_header_abort; | |
15087 | + } | |
15088 | + | |
5dd10c98 AM |
15089 | + if (memory_bm_write(pageset1_map, |
15090 | + toiActiveAllocator->rw_header_chunk)) { | |
15091 | + abort_hibernate(TOI_FAILED_IO, | |
15092 | + "Failed to write bitmaps."); | |
15093 | + goto write_image_header_abort; | |
15094 | + } | |
2380c486 JR |
15095 | + |
15096 | + /* Flush data and let allocator cleanup */ | |
15097 | + if (toiActiveAllocator->write_header_cleanup()) { | |
15098 | + abort_hibernate(TOI_FAILED_IO, | |
15099 | + "Failed to cleanup writing header."); | |
15100 | + goto write_image_header_abort_no_cleanup; | |
15101 | + } | |
15102 | + | |
15103 | + if (test_result_state(TOI_ABORTED)) | |
15104 | + goto write_image_header_abort_no_cleanup; | |
15105 | + | |
15106 | + toi_update_status(total, total, NULL); | |
15107 | + | |
5dd10c98 AM |
15108 | +out: |
15109 | + if (header_buffer) | |
15110 | + toi_free_page(24, (unsigned long) header_buffer); | |
15111 | + return ret; | |
2380c486 JR |
15112 | + |
15113 | +write_image_header_abort: | |
15114 | + toiActiveAllocator->write_header_cleanup(); | |
15115 | +write_image_header_abort_no_cleanup: | |
5dd10c98 AM |
15116 | + ret = -1; |
15117 | + goto out; | |
2380c486 JR |
15118 | +} |
15119 | + | |
15120 | +/** | |
15121 | + * sanity_check - check the header | |
15122 | + * @sh: the header which was saved at hibernate time. | |
15123 | + * | |
15124 | + * Perform a few checks, seeking to ensure that the kernel being | |
15125 | + * booted matches the one hibernated. They need to match so we can | |
15126 | + * be _sure_ things will work. It is not absolutely impossible for | |
15127 | + * resuming from a different kernel to work, just not assured. | |
15128 | + **/ | |
15129 | +static char *sanity_check(struct toi_header *sh) | |
15130 | +{ | |
e999739a | 15131 | + char *reason = check_image_kernel((struct swsusp_info *) sh); |
2380c486 JR |
15132 | + |
15133 | + if (reason) | |
15134 | + return reason; | |
15135 | + | |
15136 | + if (!test_action_state(TOI_IGNORE_ROOTFS)) { | |
15137 | + const struct super_block *sb; | |
15138 | + list_for_each_entry(sb, &super_blocks, s_list) { | |
15139 | + if ((!(sb->s_flags & MS_RDONLY)) && | |
15140 | + (sb->s_type->fs_flags & FS_REQUIRES_DEV)) | |
15141 | + return "Device backed fs has been mounted " | |
15142 | + "rw prior to resume or initrd/ramfs " | |
15143 | + "is mounted rw."; | |
15144 | + } | |
15145 | + } | |
15146 | + | |
15147 | + return NULL; | |
15148 | +} | |
15149 | + | |
15150 | +static DECLARE_WAIT_QUEUE_HEAD(freeze_wait); | |
15151 | + | |
15152 | +#define FREEZE_IN_PROGRESS (~0) | |
15153 | + | |
15154 | +static int freeze_result; | |
15155 | + | |
15156 | +static void do_freeze(struct work_struct *dummy) | |
15157 | +{ | |
15158 | + freeze_result = freeze_processes(); | |
15159 | + wake_up(&freeze_wait); | |
7e46296a | 15160 | + trap_non_toi_io = 1; |
2380c486 JR |
15161 | +} |
15162 | + | |
15163 | +static DECLARE_WORK(freeze_work, do_freeze); | |
15164 | + | |
15165 | +/** | |
15166 | + * __read_pageset1 - test for the existence of an image and attempt to load it | |
15167 | + * | |
15168 | + * Returns: Int | |
15169 | + * Zero if image found and pageset1 successfully loaded. | |
15170 | + * Error if no image found or loaded. | |
15171 | + **/ | |
15172 | +static int __read_pageset1(void) | |
15173 | +{ | |
15174 | + int i, result = 0; | |
15175 | + char *header_buffer = (char *) toi_get_zeroed_page(25, TOI_ATOMIC_GFP), | |
15176 | + *sanity_error = NULL; | |
15177 | + struct toi_header *toi_header; | |
15178 | + | |
15179 | + if (!header_buffer) { | |
15180 | + printk(KERN_INFO "Unable to allocate a page for reading the " | |
15181 | + "signature.\n"); | |
15182 | + return -ENOMEM; | |
15183 | + } | |
15184 | + | |
15185 | + /* Check for an image */ | |
15186 | + result = toiActiveAllocator->image_exists(1); | |
5dd10c98 AM |
15187 | + if (result == 3) { |
15188 | + result = -ENODATA; | |
15189 | + toi_early_boot_message(1, 0, "The signature from an older " | |
15190 | + "version of TuxOnIce has been detected."); | |
15191 | + goto out_remove_image; | |
15192 | + } | |
15193 | + | |
7e46296a | 15194 | + if (result != 1) { |
2380c486 JR |
15195 | + result = -ENODATA; |
15196 | + noresume_reset_modules(); | |
15197 | + printk(KERN_INFO "TuxOnIce: No image found.\n"); | |
15198 | + goto out; | |
15199 | + } | |
15200 | + | |
15201 | + /* | |
15202 | + * Prepare the active allocator for reading the image header. The | |
15203 | + * activate allocator might read its own configuration. | |
15204 | + * | |
15205 | + * NB: This call may never return because there might be a signature | |
15206 | + * for a different image such that we warn the user and they choose | |
15207 | + * to reboot. (If the device ids look erroneous (2.4 vs 2.6) or the | |
15208 | + * location of the image might be unavailable if it was stored on a | |
15209 | + * network connection). | |
15210 | + */ | |
15211 | + | |
15212 | + result = toiActiveAllocator->read_header_init(); | |
15213 | + if (result) { | |
15214 | + printk(KERN_INFO "TuxOnIce: Failed to initialise, reading the " | |
15215 | + "image header.\n"); | |
15216 | + goto out_remove_image; | |
15217 | + } | |
15218 | + | |
15219 | + /* Check for noresume command line option */ | |
15220 | + if (test_toi_state(TOI_NORESUME_SPECIFIED)) { | |
15221 | + printk(KERN_INFO "TuxOnIce: Noresume on command line. Removed " | |
15222 | + "image.\n"); | |
15223 | + goto out_remove_image; | |
15224 | + } | |
15225 | + | |
15226 | + /* Check whether we've resumed before */ | |
15227 | + if (test_toi_state(TOI_RESUMED_BEFORE)) { | |
15228 | + toi_early_boot_message(1, 0, NULL); | |
15229 | + if (!(test_toi_state(TOI_CONTINUE_REQ))) { | |
15230 | + printk(KERN_INFO "TuxOnIce: Tried to resume before: " | |
15231 | + "Invalidated image.\n"); | |
15232 | + goto out_remove_image; | |
15233 | + } | |
15234 | + } | |
15235 | + | |
15236 | + clear_toi_state(TOI_CONTINUE_REQ); | |
15237 | + | |
5dd10c98 AM |
15238 | + toi_image_header_version = toiActiveAllocator->get_header_version(); |
15239 | + | |
15240 | + if (unlikely(toi_image_header_version > TOI_HEADER_VERSION)) { | |
15241 | + toi_early_boot_message(1, 0, image_version_error); | |
15242 | + if (!(test_toi_state(TOI_CONTINUE_REQ))) { | |
15243 | + printk(KERN_INFO "TuxOnIce: Header version too new: " | |
15244 | + "Invalidated image.\n"); | |
15245 | + goto out_remove_image; | |
15246 | + } | |
15247 | + } | |
15248 | + | |
2380c486 JR |
15249 | + /* Read hibernate header */ |
15250 | + result = toiActiveAllocator->rw_header_chunk(READ, NULL, | |
15251 | + header_buffer, sizeof(struct toi_header)); | |
15252 | + if (result < 0) { | |
e999739a | 15253 | + printk(KERN_ERR "TuxOnIce: Failed to read the image " |
15254 | + "signature.\n"); | |
2380c486 JR |
15255 | + goto out_remove_image; |
15256 | + } | |
15257 | + | |
15258 | + toi_header = (struct toi_header *) header_buffer; | |
15259 | + | |
15260 | + /* | |
15261 | + * NB: This call may also result in a reboot rather than returning. | |
15262 | + */ | |
15263 | + | |
15264 | + sanity_error = sanity_check(toi_header); | |
15265 | + if (sanity_error) { | |
15266 | + toi_early_boot_message(1, TOI_CONTINUE_REQ, | |
15267 | + sanity_error); | |
15268 | + printk(KERN_INFO "TuxOnIce: Sanity check failed.\n"); | |
15269 | + goto out_remove_image; | |
15270 | + } | |
15271 | + | |
15272 | + /* | |
15273 | + * We have an image and it looks like it will load okay. | |
15274 | + * | |
15275 | + * Get metadata from header. Don't override commandline parameters. | |
15276 | + * | |
15277 | + * We don't need to save the image size limit because it's not used | |
15278 | + * during resume and will be restored with the image anyway. | |
15279 | + */ | |
15280 | + | |
15281 | + memcpy((char *) &pagedir1, | |
15282 | + (char *) &toi_header->pagedir, sizeof(pagedir1)); | |
15283 | + toi_result = toi_header->param0; | |
7e46296a AM |
15284 | + if (!toi_bkd.toi_debug_state) { |
15285 | + toi_bkd.toi_action = toi_header->param1; | |
15286 | + toi_bkd.toi_debug_state = toi_header->param2; | |
15287 | + toi_bkd.toi_default_console_level = toi_header->param3; | |
15288 | + } | |
2380c486 JR |
15289 | + clear_toi_state(TOI_IGNORE_LOGLEVEL); |
15290 | + pagedir2.size = toi_header->pageset_2_size; | |
15291 | + for (i = 0; i < 4; i++) | |
15292 | + toi_bkd.toi_io_time[i/2][i%2] = | |
15293 | + toi_header->io_time[i/2][i%2]; | |
15294 | + | |
15295 | + set_toi_state(TOI_BOOT_KERNEL); | |
15296 | + boot_kernel_data_buffer = toi_header->bkd; | |
15297 | + | |
cacc47f8 AM |
15298 | + read_if_version(1, toi_max_workers, "TuxOnIce max workers", |
15299 | + goto out_remove_image); | |
5dd10c98 AM |
15300 | + |
15301 | + /* Read filesystem info */ | |
15302 | + if (fs_info_load_and_check()) { | |
15303 | + printk(KERN_EMERG "TuxOnIce: File system mount time checks " | |
15304 | + "failed. Refusing to corrupt your filesystems!\n"); | |
15305 | + goto out_remove_image; | |
15306 | + } | |
15307 | + | |
2380c486 JR |
15308 | + /* Read module configurations */ |
15309 | + result = read_module_configs(); | |
15310 | + if (result) { | |
15311 | + pagedir1.size = 0; | |
15312 | + pagedir2.size = 0; | |
15313 | + printk(KERN_INFO "TuxOnIce: Failed to read TuxOnIce module " | |
15314 | + "configurations.\n"); | |
15315 | + clear_action_state(TOI_KEEP_IMAGE); | |
15316 | + goto out_remove_image; | |
15317 | + } | |
15318 | + | |
15319 | + toi_prepare_console(); | |
15320 | + | |
15321 | + set_toi_state(TOI_NOW_RESUMING); | |
15322 | + | |
15323 | + if (!test_action_state(TOI_LATE_CPU_HOTPLUG)) { | |
15324 | + toi_prepare_status(DONT_CLEAR_BAR, "Disable nonboot cpus."); | |
15325 | + if (disable_nonboot_cpus()) { | |
15326 | + set_abort_result(TOI_CPU_HOTPLUG_FAILED); | |
15327 | + goto out_reset_console; | |
15328 | + } | |
15329 | + } | |
15330 | + | |
15331 | + if (usermodehelper_disable()) | |
15332 | + goto out_enable_nonboot_cpus; | |
15333 | + | |
15334 | + current->flags |= PF_NOFREEZE; | |
15335 | + freeze_result = FREEZE_IN_PROGRESS; | |
15336 | + | |
15337 | + schedule_work_on(first_cpu(cpu_online_map), &freeze_work); | |
15338 | + | |
15339 | + toi_cond_pause(1, "About to read original pageset1 locations."); | |
15340 | + | |
15341 | + /* | |
7e46296a | 15342 | + * See _toi_rw_header_chunk in tuxonice_bio.c: |
2380c486 JR |
15343 | + * Initialize pageset1_map by reading the map from the image. |
15344 | + */ | |
15345 | + if (memory_bm_read(pageset1_map, toiActiveAllocator->rw_header_chunk)) | |
15346 | + goto out_thaw; | |
15347 | + | |
15348 | + /* | |
7e46296a | 15349 | + * See toi_rw_cleanup in tuxonice_bio.c: |
2380c486 JR |
15350 | + * Clean up after reading the header. |
15351 | + */ | |
15352 | + result = toiActiveAllocator->read_header_cleanup(); | |
15353 | + if (result) { | |
15354 | + printk(KERN_ERR "TuxOnIce: Failed to cleanup after reading the " | |
15355 | + "image header.\n"); | |
15356 | + goto out_thaw; | |
15357 | + } | |
15358 | + | |
15359 | + toi_cond_pause(1, "About to read pagedir."); | |
15360 | + | |
15361 | + /* | |
15362 | + * Get the addresses of pages into which we will load the kernel to | |
15363 | + * be copied back and check if they conflict with the ones we are using. | |
15364 | + */ | |
15365 | + if (toi_get_pageset1_load_addresses()) { | |
15366 | + printk(KERN_INFO "TuxOnIce: Failed to get load addresses for " | |
15367 | + "pageset1.\n"); | |
15368 | + goto out_thaw; | |
15369 | + } | |
15370 | + | |
15371 | + /* Read the original kernel back */ | |
15372 | + toi_cond_pause(1, "About to read pageset 1."); | |
15373 | + | |
15374 | + /* Given the pagemap, read back the data from disk */ | |
15375 | + if (read_pageset(&pagedir1, 0)) { | |
15376 | + toi_prepare_status(DONT_CLEAR_BAR, "Failed to read pageset 1."); | |
15377 | + result = -EIO; | |
15378 | + goto out_thaw; | |
15379 | + } | |
15380 | + | |
15381 | + toi_cond_pause(1, "About to restore original kernel."); | |
15382 | + result = 0; | |
15383 | + | |
15384 | + if (!test_action_state(TOI_KEEP_IMAGE) && | |
15385 | + toiActiveAllocator->mark_resume_attempted) | |
15386 | + toiActiveAllocator->mark_resume_attempted(1); | |
15387 | + | |
15388 | + wait_event(freeze_wait, freeze_result != FREEZE_IN_PROGRESS); | |
15389 | +out: | |
15390 | + current->flags &= ~PF_NOFREEZE; | |
15391 | + toi_free_page(25, (unsigned long) header_buffer); | |
15392 | + return result; | |
15393 | + | |
15394 | +out_thaw: | |
15395 | + wait_event(freeze_wait, freeze_result != FREEZE_IN_PROGRESS); | |
7e46296a | 15396 | + trap_non_toi_io = 0; |
2380c486 JR |
15397 | + thaw_processes(); |
15398 | + usermodehelper_enable(); | |
15399 | +out_enable_nonboot_cpus: | |
15400 | + enable_nonboot_cpus(); | |
15401 | +out_reset_console: | |
15402 | + toi_cleanup_console(); | |
15403 | +out_remove_image: | |
15404 | + result = -EINVAL; | |
15405 | + if (!test_action_state(TOI_KEEP_IMAGE)) | |
15406 | + toiActiveAllocator->remove_image(); | |
15407 | + toiActiveAllocator->read_header_cleanup(); | |
15408 | + noresume_reset_modules(); | |
15409 | + goto out; | |
15410 | +} | |
15411 | + | |
15412 | +/** | |
15413 | + * read_pageset1 - highlevel function to read the saved pages | |
15414 | + * | |
15415 | + * Attempt to read the header and pageset1 of a hibernate image. | |
15416 | + * Handle the outcome, complaining where appropriate. | |
15417 | + **/ | |
15418 | +int read_pageset1(void) | |
15419 | +{ | |
15420 | + int error; | |
15421 | + | |
15422 | + error = __read_pageset1(); | |
15423 | + | |
15424 | + if (error && error != -ENODATA && error != -EINVAL && | |
15425 | + !test_result_state(TOI_ABORTED)) | |
15426 | + abort_hibernate(TOI_IMAGE_ERROR, | |
15427 | + "TuxOnIce: Error %d resuming\n", error); | |
15428 | + | |
15429 | + return error; | |
15430 | +} | |
15431 | + | |
15432 | +/** | |
15433 | + * get_have_image_data - check the image header | |
15434 | + **/ | |
15435 | +static char *get_have_image_data(void) | |
15436 | +{ | |
15437 | + char *output_buffer = (char *) toi_get_zeroed_page(26, TOI_ATOMIC_GFP); | |
15438 | + struct toi_header *toi_header; | |
15439 | + | |
15440 | + if (!output_buffer) { | |
15441 | + printk(KERN_INFO "Output buffer null.\n"); | |
15442 | + return NULL; | |
15443 | + } | |
15444 | + | |
15445 | + /* Check for an image */ | |
15446 | + if (!toiActiveAllocator->image_exists(1) || | |
15447 | + toiActiveAllocator->read_header_init() || | |
15448 | + toiActiveAllocator->rw_header_chunk(READ, NULL, | |
15449 | + output_buffer, sizeof(struct toi_header))) { | |
15450 | + sprintf(output_buffer, "0\n"); | |
15451 | + /* | |
15452 | + * From an initrd/ramfs, catting have_image and | |
15453 | + * getting a result of 0 is sufficient. | |
15454 | + */ | |
15455 | + clear_toi_state(TOI_BOOT_TIME); | |
15456 | + goto out; | |
15457 | + } | |
15458 | + | |
15459 | + toi_header = (struct toi_header *) output_buffer; | |
15460 | + | |
15461 | + sprintf(output_buffer, "1\n%s\n%s\n", | |
15462 | + toi_header->uts.machine, | |
15463 | + toi_header->uts.version); | |
15464 | + | |
15465 | + /* Check whether we've resumed before */ | |
15466 | + if (test_toi_state(TOI_RESUMED_BEFORE)) | |
15467 | + strcat(output_buffer, "Resumed before.\n"); | |
15468 | + | |
15469 | +out: | |
15470 | + noresume_reset_modules(); | |
15471 | + return output_buffer; | |
15472 | +} | |
15473 | + | |
15474 | +/** | |
15475 | + * read_pageset2 - read second part of the image | |
15476 | + * @overwrittenpagesonly: Read only pages which would have been | |
15477 | + * verwritten by pageset1? | |
15478 | + * | |
15479 | + * Read in part or all of pageset2 of an image, depending upon | |
15480 | + * whether we are hibernating and have only overwritten a portion | |
15481 | + * with pageset1 pages, or are resuming and need to read them | |
15482 | + * all. | |
15483 | + * | |
15484 | + * Returns: Int | |
15485 | + * Zero if no error, otherwise the error value. | |
15486 | + **/ | |
15487 | +int read_pageset2(int overwrittenpagesonly) | |
15488 | +{ | |
15489 | + int result = 0; | |
15490 | + | |
15491 | + if (!pagedir2.size) | |
15492 | + return 0; | |
15493 | + | |
15494 | + result = read_pageset(&pagedir2, overwrittenpagesonly); | |
15495 | + | |
15496 | + toi_cond_pause(1, "Pagedir 2 read."); | |
15497 | + | |
15498 | + return result; | |
15499 | +} | |
15500 | + | |
15501 | +/** | |
15502 | + * image_exists_read - has an image been found? | |
15503 | + * @page: Output buffer | |
15504 | + * | |
15505 | + * Store 0 or 1 in page, depending on whether an image is found. | |
15506 | + * Incoming buffer is PAGE_SIZE and result is guaranteed | |
15507 | + * to be far less than that, so we don't worry about | |
15508 | + * overflow. | |
15509 | + **/ | |
15510 | +int image_exists_read(const char *page, int count) | |
15511 | +{ | |
15512 | + int len = 0; | |
15513 | + char *result; | |
15514 | + | |
15515 | + if (toi_activate_storage(0)) | |
15516 | + return count; | |
15517 | + | |
15518 | + if (!test_toi_state(TOI_RESUME_DEVICE_OK)) | |
15519 | + toi_attempt_to_parse_resume_device(0); | |
15520 | + | |
15521 | + if (!toiActiveAllocator) { | |
15522 | + len = sprintf((char *) page, "-1\n"); | |
15523 | + } else { | |
15524 | + result = get_have_image_data(); | |
15525 | + if (result) { | |
15526 | + len = sprintf((char *) page, "%s", result); | |
15527 | + toi_free_page(26, (unsigned long) result); | |
15528 | + } | |
15529 | + } | |
15530 | + | |
15531 | + toi_deactivate_storage(0); | |
15532 | + | |
15533 | + return len; | |
15534 | +} | |
15535 | + | |
15536 | +/** | |
15537 | + * image_exists_write - invalidate an image if one exists | |
15538 | + **/ | |
15539 | +int image_exists_write(const char *buffer, int count) | |
15540 | +{ | |
15541 | + if (toi_activate_storage(0)) | |
15542 | + return count; | |
15543 | + | |
15544 | + if (toiActiveAllocator && toiActiveAllocator->image_exists(1)) | |
15545 | + toiActiveAllocator->remove_image(); | |
15546 | + | |
15547 | + toi_deactivate_storage(0); | |
15548 | + | |
15549 | + clear_result_state(TOI_KEPT_IMAGE); | |
15550 | + | |
15551 | + return count; | |
15552 | +} | |
15553 | diff --git a/kernel/power/tuxonice_io.h b/kernel/power/tuxonice_io.h | |
15554 | new file mode 100644 | |
5dd10c98 | 15555 | index 0000000..fe37713 |
2380c486 JR |
15556 | --- /dev/null |
15557 | +++ b/kernel/power/tuxonice_io.h | |
5dd10c98 | 15558 | @@ -0,0 +1,74 @@ |
2380c486 JR |
15559 | +/* |
15560 | + * kernel/power/tuxonice_io.h | |
15561 | + * | |
5dd10c98 | 15562 | + * Copyright (C) 2005-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
15563 | + * |
15564 | + * This file is released under the GPLv2. | |
15565 | + * | |
15566 | + * It contains high level IO routines for hibernating. | |
15567 | + * | |
15568 | + */ | |
15569 | + | |
15570 | +#include <linux/utsname.h> | |
15571 | +#include "tuxonice_pagedir.h" | |
2380c486 JR |
15572 | + |
15573 | +/* Non-module data saved in our image header */ | |
15574 | +struct toi_header { | |
15575 | + /* | |
15576 | + * Mirror struct swsusp_info, but without | |
15577 | + * the page aligned attribute | |
15578 | + */ | |
15579 | + struct new_utsname uts; | |
15580 | + u32 version_code; | |
15581 | + unsigned long num_physpages; | |
15582 | + int cpus; | |
15583 | + unsigned long image_pages; | |
15584 | + unsigned long pages; | |
15585 | + unsigned long size; | |
15586 | + | |
15587 | + /* Our own data */ | |
15588 | + unsigned long orig_mem_free; | |
15589 | + int page_size; | |
15590 | + int pageset_2_size; | |
15591 | + int param0; | |
15592 | + int param1; | |
15593 | + int param2; | |
15594 | + int param3; | |
15595 | + int progress0; | |
15596 | + int progress1; | |
15597 | + int progress2; | |
15598 | + int progress3; | |
15599 | + int io_time[2][2]; | |
15600 | + struct pagedir pagedir; | |
15601 | + dev_t root_fs; | |
15602 | + unsigned long bkd; /* Boot kernel data locn */ | |
15603 | +}; | |
15604 | + | |
15605 | +extern int write_pageset(struct pagedir *pagedir); | |
15606 | +extern int write_image_header(void); | |
15607 | +extern int read_pageset1(void); | |
15608 | +extern int read_pageset2(int overwrittenpagesonly); | |
15609 | + | |
15610 | +extern int toi_attempt_to_parse_resume_device(int quiet); | |
15611 | +extern void attempt_to_parse_resume_device2(void); | |
15612 | +extern void attempt_to_parse_alt_resume_param(void); | |
15613 | +int image_exists_read(const char *page, int count); | |
15614 | +int image_exists_write(const char *buffer, int count); | |
15615 | +extern void save_restore_alt_param(int replace, int quiet); | |
15616 | +extern atomic_t toi_io_workers; | |
15617 | + | |
15618 | +/* Args to save_restore_alt_param */ | |
15619 | +#define RESTORE 0 | |
15620 | +#define SAVE 1 | |
15621 | + | |
15622 | +#define NOQUIET 0 | |
15623 | +#define QUIET 1 | |
15624 | + | |
15625 | +extern dev_t name_to_dev_t(char *line); | |
15626 | + | |
15627 | +extern wait_queue_head_t toi_io_queue_flusher; | |
15628 | +extern int toi_bio_queue_flusher_should_finish; | |
5dd10c98 AM |
15629 | + |
15630 | +int fs_info_space_needed(void); | |
15631 | + | |
15632 | +extern int toi_max_workers; | |
2380c486 JR |
15633 | diff --git a/kernel/power/tuxonice_modules.c b/kernel/power/tuxonice_modules.c |
15634 | new file mode 100644 | |
5dd10c98 | 15635 | index 0000000..4cc24a9 |
2380c486 JR |
15636 | --- /dev/null |
15637 | +++ b/kernel/power/tuxonice_modules.c | |
5dd10c98 | 15638 | @@ -0,0 +1,522 @@ |
2380c486 JR |
15639 | +/* |
15640 | + * kernel/power/tuxonice_modules.c | |
15641 | + * | |
5dd10c98 | 15642 | + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
15643 | + * |
15644 | + */ | |
15645 | + | |
15646 | +#include <linux/suspend.h> | |
2380c486 JR |
15647 | +#include "tuxonice.h" |
15648 | +#include "tuxonice_modules.h" | |
15649 | +#include "tuxonice_sysfs.h" | |
15650 | +#include "tuxonice_ui.h" | |
15651 | + | |
15652 | +LIST_HEAD(toi_filters); | |
15653 | +LIST_HEAD(toiAllocators); | |
7e46296a | 15654 | + |
2380c486 | 15655 | +LIST_HEAD(toi_modules); |
7e46296a | 15656 | +EXPORT_SYMBOL_GPL(toi_modules); |
2380c486 JR |
15657 | + |
15658 | +struct toi_module_ops *toiActiveAllocator; | |
15659 | +EXPORT_SYMBOL_GPL(toiActiveAllocator); | |
15660 | + | |
15661 | +static int toi_num_filters; | |
15662 | +int toiNumAllocators, toi_num_modules; | |
15663 | + | |
15664 | +/* | |
15665 | + * toi_header_storage_for_modules | |
15666 | + * | |
15667 | + * Returns the amount of space needed to store configuration | |
15668 | + * data needed by the modules prior to copying back the original | |
15669 | + * kernel. We can exclude data for pageset2 because it will be | |
15670 | + * available anyway once the kernel is copied back. | |
15671 | + */ | |
15672 | +long toi_header_storage_for_modules(void) | |
15673 | +{ | |
15674 | + struct toi_module_ops *this_module; | |
15675 | + int bytes = 0; | |
15676 | + | |
15677 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
15678 | + if (!this_module->enabled || | |
15679 | + (this_module->type == WRITER_MODULE && | |
15680 | + toiActiveAllocator != this_module)) | |
15681 | + continue; | |
15682 | + if (this_module->storage_needed) { | |
15683 | + int this = this_module->storage_needed() + | |
15684 | + sizeof(struct toi_module_header) + | |
15685 | + sizeof(int); | |
15686 | + this_module->header_requested = this; | |
15687 | + bytes += this; | |
15688 | + } | |
15689 | + } | |
15690 | + | |
15691 | + /* One more for the empty terminator */ | |
15692 | + return bytes + sizeof(struct toi_module_header); | |
15693 | +} | |
15694 | + | |
0ada99ac | 15695 | +void print_toi_header_storage_for_modules(void) |
15696 | +{ | |
15697 | + struct toi_module_ops *this_module; | |
15698 | + int bytes = 0; | |
15699 | + | |
15700 | + printk(KERN_DEBUG "Header storage:\n"); | |
15701 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
15702 | + if (!this_module->enabled || | |
15703 | + (this_module->type == WRITER_MODULE && | |
15704 | + toiActiveAllocator != this_module)) | |
15705 | + continue; | |
15706 | + if (this_module->storage_needed) { | |
15707 | + int this = this_module->storage_needed() + | |
15708 | + sizeof(struct toi_module_header) + | |
15709 | + sizeof(int); | |
15710 | + this_module->header_requested = this; | |
15711 | + bytes += this; | |
15712 | + printk(KERN_DEBUG "+ %16s : %-4d/%d.\n", | |
15713 | + this_module->name, | |
15714 | + this_module->header_used, this); | |
15715 | + } | |
15716 | + } | |
15717 | + | |
5dd10c98 | 15718 | + printk(KERN_DEBUG "+ empty terminator : %zu.\n", |
0ada99ac | 15719 | + sizeof(struct toi_module_header)); |
15720 | + printk(KERN_DEBUG " ====\n"); | |
5dd10c98 | 15721 | + printk(KERN_DEBUG " %zu\n", |
0ada99ac | 15722 | + bytes + sizeof(struct toi_module_header)); |
15723 | +} | |
9474138d | 15724 | +EXPORT_SYMBOL_GPL(print_toi_header_storage_for_modules); |
0ada99ac | 15725 | + |
2380c486 JR |
15726 | +/* |
15727 | + * toi_memory_for_modules | |
15728 | + * | |
15729 | + * Returns the amount of memory requested by modules for | |
15730 | + * doing their work during the cycle. | |
15731 | + */ | |
15732 | + | |
15733 | +long toi_memory_for_modules(int print_parts) | |
15734 | +{ | |
15735 | + long bytes = 0, result; | |
15736 | + struct toi_module_ops *this_module; | |
15737 | + | |
15738 | + if (print_parts) | |
15739 | + printk(KERN_INFO "Memory for modules:\n===================\n"); | |
15740 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
15741 | + int this; | |
15742 | + if (!this_module->enabled) | |
15743 | + continue; | |
15744 | + if (this_module->memory_needed) { | |
15745 | + this = this_module->memory_needed(); | |
15746 | + if (print_parts) | |
15747 | + printk(KERN_INFO "%10d bytes (%5ld pages) for " | |
15748 | + "module '%s'.\n", this, | |
15749 | + DIV_ROUND_UP(this, PAGE_SIZE), | |
15750 | + this_module->name); | |
15751 | + bytes += this; | |
15752 | + } | |
15753 | + } | |
15754 | + | |
15755 | + result = DIV_ROUND_UP(bytes, PAGE_SIZE); | |
15756 | + if (print_parts) | |
15757 | + printk(KERN_INFO " => %ld bytes, %ld pages.\n", bytes, result); | |
15758 | + | |
15759 | + return result; | |
15760 | +} | |
15761 | + | |
15762 | +/* | |
15763 | + * toi_expected_compression_ratio | |
15764 | + * | |
15765 | + * Returns the compression ratio expected when saving the image. | |
15766 | + */ | |
15767 | + | |
15768 | +int toi_expected_compression_ratio(void) | |
15769 | +{ | |
15770 | + int ratio = 100; | |
15771 | + struct toi_module_ops *this_module; | |
15772 | + | |
15773 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
15774 | + if (!this_module->enabled) | |
15775 | + continue; | |
15776 | + if (this_module->expected_compression) | |
15777 | + ratio = ratio * this_module->expected_compression() | |
15778 | + / 100; | |
15779 | + } | |
15780 | + | |
15781 | + return ratio; | |
15782 | +} | |
15783 | + | |
15784 | +/* toi_find_module_given_dir | |
15785 | + * Functionality : Return a module (if found), given a pointer | |
15786 | + * to its directory name | |
15787 | + */ | |
15788 | + | |
15789 | +static struct toi_module_ops *toi_find_module_given_dir(char *name) | |
15790 | +{ | |
15791 | + struct toi_module_ops *this_module, *found_module = NULL; | |
15792 | + | |
15793 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
15794 | + if (!strcmp(name, this_module->directory)) { | |
15795 | + found_module = this_module; | |
15796 | + break; | |
15797 | + } | |
15798 | + } | |
15799 | + | |
15800 | + return found_module; | |
15801 | +} | |
15802 | + | |
15803 | +/* toi_find_module_given_name | |
15804 | + * Functionality : Return a module (if found), given a pointer | |
15805 | + * to its name | |
15806 | + */ | |
15807 | + | |
15808 | +struct toi_module_ops *toi_find_module_given_name(char *name) | |
15809 | +{ | |
15810 | + struct toi_module_ops *this_module, *found_module = NULL; | |
15811 | + | |
15812 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
15813 | + if (!strcmp(name, this_module->name)) { | |
15814 | + found_module = this_module; | |
15815 | + break; | |
15816 | + } | |
15817 | + } | |
15818 | + | |
15819 | + return found_module; | |
15820 | +} | |
15821 | + | |
15822 | +/* | |
15823 | + * toi_print_module_debug_info | |
15824 | + * Functionality : Get debugging info from modules into a buffer. | |
15825 | + */ | |
15826 | +int toi_print_module_debug_info(char *buffer, int buffer_size) | |
15827 | +{ | |
15828 | + struct toi_module_ops *this_module; | |
15829 | + int len = 0; | |
15830 | + | |
15831 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
15832 | + if (!this_module->enabled) | |
15833 | + continue; | |
15834 | + if (this_module->print_debug_info) { | |
15835 | + int result; | |
15836 | + result = this_module->print_debug_info(buffer + len, | |
15837 | + buffer_size - len); | |
15838 | + len += result; | |
15839 | + } | |
15840 | + } | |
15841 | + | |
15842 | + /* Ensure null terminated */ | |
15843 | + buffer[buffer_size] = 0; | |
15844 | + | |
15845 | + return len; | |
15846 | +} | |
15847 | + | |
15848 | +/* | |
15849 | + * toi_register_module | |
15850 | + * | |
15851 | + * Register a module. | |
15852 | + */ | |
15853 | +int toi_register_module(struct toi_module_ops *module) | |
15854 | +{ | |
15855 | + int i; | |
15856 | + struct kobject *kobj; | |
15857 | + | |
15858 | + module->enabled = 1; | |
15859 | + | |
15860 | + if (toi_find_module_given_name(module->name)) { | |
15861 | + printk(KERN_INFO "TuxOnIce: Trying to load module %s," | |
15862 | + " which is already registered.\n", | |
15863 | + module->name); | |
15864 | + return -EBUSY; | |
15865 | + } | |
15866 | + | |
15867 | + switch (module->type) { | |
15868 | + case FILTER_MODULE: | |
15869 | + list_add_tail(&module->type_list, &toi_filters); | |
15870 | + toi_num_filters++; | |
15871 | + break; | |
15872 | + case WRITER_MODULE: | |
15873 | + list_add_tail(&module->type_list, &toiAllocators); | |
15874 | + toiNumAllocators++; | |
15875 | + break; | |
15876 | + case MISC_MODULE: | |
15877 | + case MISC_HIDDEN_MODULE: | |
7e46296a | 15878 | + case BIO_ALLOCATOR_MODULE: |
2380c486 JR |
15879 | + break; |
15880 | + default: | |
e999739a | 15881 | + printk(KERN_ERR "Hmmm. Module '%s' has an invalid type." |
2380c486 JR |
15882 | + " It has been ignored.\n", module->name); |
15883 | + return -EINVAL; | |
15884 | + } | |
15885 | + list_add_tail(&module->module_list, &toi_modules); | |
15886 | + toi_num_modules++; | |
15887 | + | |
15888 | + if ((!module->directory && !module->shared_directory) || | |
15889 | + !module->sysfs_data || !module->num_sysfs_entries) | |
15890 | + return 0; | |
15891 | + | |
15892 | + /* | |
15893 | + * Modules may share a directory, but those with shared_dir | |
15894 | + * set must be loaded (via symbol dependencies) after parents | |
15895 | + * and unloaded beforehand. | |
15896 | + */ | |
15897 | + if (module->shared_directory) { | |
15898 | + struct toi_module_ops *shared = | |
15899 | + toi_find_module_given_dir(module->shared_directory); | |
15900 | + if (!shared) { | |
e999739a | 15901 | + printk(KERN_ERR "TuxOnIce: Module %s wants to share " |
15902 | + "%s's directory but %s isn't loaded.\n", | |
2380c486 JR |
15903 | + module->name, module->shared_directory, |
15904 | + module->shared_directory); | |
15905 | + toi_unregister_module(module); | |
15906 | + return -ENODEV; | |
15907 | + } | |
15908 | + kobj = shared->dir_kobj; | |
15909 | + } else { | |
15910 | + if (!strncmp(module->directory, "[ROOT]", 6)) | |
15911 | + kobj = tuxonice_kobj; | |
15912 | + else | |
15913 | + kobj = make_toi_sysdir(module->directory); | |
15914 | + } | |
15915 | + module->dir_kobj = kobj; | |
15916 | + for (i = 0; i < module->num_sysfs_entries; i++) { | |
15917 | + int result = toi_register_sysfs_file(kobj, | |
15918 | + &module->sysfs_data[i]); | |
15919 | + if (result) | |
15920 | + return result; | |
15921 | + } | |
15922 | + return 0; | |
15923 | +} | |
15924 | +EXPORT_SYMBOL_GPL(toi_register_module); | |
15925 | + | |
15926 | +/* | |
15927 | + * toi_unregister_module | |
15928 | + * | |
15929 | + * Remove a module. | |
15930 | + */ | |
15931 | +void toi_unregister_module(struct toi_module_ops *module) | |
15932 | +{ | |
15933 | + int i; | |
15934 | + | |
15935 | + if (module->dir_kobj) | |
15936 | + for (i = 0; i < module->num_sysfs_entries; i++) | |
15937 | + toi_unregister_sysfs_file(module->dir_kobj, | |
15938 | + &module->sysfs_data[i]); | |
15939 | + | |
15940 | + if (!module->shared_directory && module->directory && | |
15941 | + strncmp(module->directory, "[ROOT]", 6)) | |
15942 | + remove_toi_sysdir(module->dir_kobj); | |
15943 | + | |
15944 | + switch (module->type) { | |
15945 | + case FILTER_MODULE: | |
15946 | + list_del(&module->type_list); | |
15947 | + toi_num_filters--; | |
15948 | + break; | |
15949 | + case WRITER_MODULE: | |
15950 | + list_del(&module->type_list); | |
15951 | + toiNumAllocators--; | |
15952 | + if (toiActiveAllocator == module) { | |
15953 | + toiActiveAllocator = NULL; | |
15954 | + clear_toi_state(TOI_CAN_RESUME); | |
15955 | + clear_toi_state(TOI_CAN_HIBERNATE); | |
15956 | + } | |
15957 | + break; | |
15958 | + case MISC_MODULE: | |
15959 | + case MISC_HIDDEN_MODULE: | |
7e46296a | 15960 | + case BIO_ALLOCATOR_MODULE: |
2380c486 JR |
15961 | + break; |
15962 | + default: | |
e999739a | 15963 | + printk(KERN_ERR "Module '%s' has an invalid type." |
2380c486 JR |
15964 | + " It has been ignored.\n", module->name); |
15965 | + return; | |
15966 | + } | |
15967 | + list_del(&module->module_list); | |
15968 | + toi_num_modules--; | |
15969 | +} | |
15970 | +EXPORT_SYMBOL_GPL(toi_unregister_module); | |
15971 | + | |
15972 | +/* | |
15973 | + * toi_move_module_tail | |
15974 | + * | |
15975 | + * Rearrange modules when reloading the config. | |
15976 | + */ | |
15977 | +void toi_move_module_tail(struct toi_module_ops *module) | |
15978 | +{ | |
15979 | + switch (module->type) { | |
15980 | + case FILTER_MODULE: | |
15981 | + if (toi_num_filters > 1) | |
15982 | + list_move_tail(&module->type_list, &toi_filters); | |
15983 | + break; | |
15984 | + case WRITER_MODULE: | |
15985 | + if (toiNumAllocators > 1) | |
15986 | + list_move_tail(&module->type_list, &toiAllocators); | |
15987 | + break; | |
15988 | + case MISC_MODULE: | |
15989 | + case MISC_HIDDEN_MODULE: | |
7e46296a | 15990 | + case BIO_ALLOCATOR_MODULE: |
2380c486 JR |
15991 | + break; |
15992 | + default: | |
e999739a | 15993 | + printk(KERN_ERR "Module '%s' has an invalid type." |
2380c486 JR |
15994 | + " It has been ignored.\n", module->name); |
15995 | + return; | |
15996 | + } | |
15997 | + if ((toi_num_filters + toiNumAllocators) > 1) | |
15998 | + list_move_tail(&module->module_list, &toi_modules); | |
15999 | +} | |
16000 | + | |
16001 | +/* | |
16002 | + * toi_initialise_modules | |
16003 | + * | |
16004 | + * Get ready to do some work! | |
16005 | + */ | |
16006 | +int toi_initialise_modules(int starting_cycle, int early) | |
16007 | +{ | |
16008 | + struct toi_module_ops *this_module; | |
16009 | + int result; | |
16010 | + | |
16011 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
16012 | + this_module->header_requested = 0; | |
16013 | + this_module->header_used = 0; | |
16014 | + if (!this_module->enabled) | |
16015 | + continue; | |
16016 | + if (this_module->early != early) | |
16017 | + continue; | |
16018 | + if (this_module->initialise) { | |
2380c486 JR |
16019 | + result = this_module->initialise(starting_cycle); |
16020 | + if (result) { | |
16021 | + toi_cleanup_modules(starting_cycle); | |
16022 | + return result; | |
16023 | + } | |
16024 | + this_module->initialised = 1; | |
16025 | + } | |
16026 | + } | |
16027 | + | |
16028 | + return 0; | |
16029 | +} | |
16030 | + | |
16031 | +/* | |
16032 | + * toi_cleanup_modules | |
16033 | + * | |
16034 | + * Tell modules the work is done. | |
16035 | + */ | |
16036 | +void toi_cleanup_modules(int finishing_cycle) | |
16037 | +{ | |
16038 | + struct toi_module_ops *this_module; | |
16039 | + | |
16040 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
16041 | + if (!this_module->enabled || !this_module->initialised) | |
16042 | + continue; | |
7e46296a | 16043 | + if (this_module->cleanup) |
2380c486 | 16044 | + this_module->cleanup(finishing_cycle); |
2380c486 JR |
16045 | + this_module->initialised = 0; |
16046 | + } | |
16047 | +} | |
16048 | + | |
16049 | +/* | |
5dd10c98 AM |
16050 | + * toi_pre_atomic_restore_modules |
16051 | + * | |
16052 | + * Get ready to do some work! | |
16053 | + */ | |
16054 | +void toi_pre_atomic_restore_modules(struct toi_boot_kernel_data *bkd) | |
16055 | +{ | |
16056 | + struct toi_module_ops *this_module; | |
16057 | + | |
16058 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
16059 | + if (this_module->enabled && this_module->pre_atomic_restore) | |
16060 | + this_module->pre_atomic_restore(bkd); | |
16061 | + } | |
16062 | +} | |
16063 | + | |
16064 | +/* | |
16065 | + * toi_post_atomic_restore_modules | |
16066 | + * | |
16067 | + * Get ready to do some work! | |
16068 | + */ | |
16069 | +void toi_post_atomic_restore_modules(struct toi_boot_kernel_data *bkd) | |
16070 | +{ | |
16071 | + struct toi_module_ops *this_module; | |
16072 | + | |
16073 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
16074 | + if (this_module->enabled && this_module->post_atomic_restore) | |
16075 | + this_module->post_atomic_restore(bkd); | |
16076 | + } | |
16077 | +} | |
16078 | + | |
16079 | +/* | |
2380c486 JR |
16080 | + * toi_get_next_filter |
16081 | + * | |
16082 | + * Get the next filter in the pipeline. | |
16083 | + */ | |
16084 | +struct toi_module_ops *toi_get_next_filter(struct toi_module_ops *filter_sought) | |
16085 | +{ | |
16086 | + struct toi_module_ops *last_filter = NULL, *this_filter = NULL; | |
16087 | + | |
16088 | + list_for_each_entry(this_filter, &toi_filters, type_list) { | |
16089 | + if (!this_filter->enabled) | |
16090 | + continue; | |
16091 | + if ((last_filter == filter_sought) || (!filter_sought)) | |
16092 | + return this_filter; | |
16093 | + last_filter = this_filter; | |
16094 | + } | |
16095 | + | |
16096 | + return toiActiveAllocator; | |
16097 | +} | |
16098 | +EXPORT_SYMBOL_GPL(toi_get_next_filter); | |
16099 | + | |
16100 | +/** | |
16101 | + * toi_show_modules: Printk what support is loaded. | |
16102 | + */ | |
16103 | +void toi_print_modules(void) | |
16104 | +{ | |
16105 | + struct toi_module_ops *this_module; | |
16106 | + int prev = 0; | |
16107 | + | |
e999739a | 16108 | + printk(KERN_INFO "TuxOnIce " TOI_CORE_VERSION ", with support for"); |
2380c486 JR |
16109 | + |
16110 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
16111 | + if (this_module->type == MISC_HIDDEN_MODULE) | |
16112 | + continue; | |
16113 | + printk("%s %s%s%s", prev ? "," : "", | |
16114 | + this_module->enabled ? "" : "[", | |
16115 | + this_module->name, | |
16116 | + this_module->enabled ? "" : "]"); | |
16117 | + prev = 1; | |
16118 | + } | |
16119 | + | |
16120 | + printk(".\n"); | |
16121 | +} | |
16122 | + | |
16123 | +/* toi_get_modules | |
16124 | + * | |
16125 | + * Take a reference to modules so they can't go away under us. | |
16126 | + */ | |
16127 | + | |
16128 | +int toi_get_modules(void) | |
16129 | +{ | |
16130 | + struct toi_module_ops *this_module; | |
16131 | + | |
16132 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
16133 | + struct toi_module_ops *this_module2; | |
16134 | + | |
16135 | + if (try_module_get(this_module->module)) | |
16136 | + continue; | |
16137 | + | |
16138 | + /* Failed! Reverse gets and return error */ | |
16139 | + list_for_each_entry(this_module2, &toi_modules, | |
16140 | + module_list) { | |
16141 | + if (this_module == this_module2) | |
16142 | + return -EINVAL; | |
16143 | + module_put(this_module2->module); | |
16144 | + } | |
16145 | + } | |
16146 | + return 0; | |
16147 | +} | |
16148 | + | |
16149 | +/* toi_put_modules | |
16150 | + * | |
16151 | + * Release our references to modules we used. | |
16152 | + */ | |
16153 | + | |
16154 | +void toi_put_modules(void) | |
16155 | +{ | |
16156 | + struct toi_module_ops *this_module; | |
16157 | + | |
16158 | + list_for_each_entry(this_module, &toi_modules, module_list) | |
16159 | + module_put(this_module->module); | |
16160 | +} | |
16161 | diff --git a/kernel/power/tuxonice_modules.h b/kernel/power/tuxonice_modules.h | |
16162 | new file mode 100644 | |
5dd10c98 | 16163 | index 0000000..9e198c4 |
2380c486 JR |
16164 | --- /dev/null |
16165 | +++ b/kernel/power/tuxonice_modules.h | |
5dd10c98 | 16166 | @@ -0,0 +1,197 @@ |
2380c486 JR |
16167 | +/* |
16168 | + * kernel/power/tuxonice_modules.h | |
16169 | + * | |
5dd10c98 | 16170 | + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
16171 | + * |
16172 | + * This file is released under the GPLv2. | |
16173 | + * | |
16174 | + * It contains declarations for modules. Modules are additions to | |
16175 | + * TuxOnIce that provide facilities such as image compression or | |
16176 | + * encryption, backends for storage of the image and user interfaces. | |
16177 | + * | |
16178 | + */ | |
16179 | + | |
16180 | +#ifndef TOI_MODULES_H | |
16181 | +#define TOI_MODULES_H | |
16182 | + | |
16183 | +/* This is the maximum size we store in the image header for a module name */ | |
16184 | +#define TOI_MAX_MODULE_NAME_LENGTH 30 | |
16185 | + | |
5dd10c98 AM |
16186 | +struct toi_boot_kernel_data; |
16187 | + | |
2380c486 JR |
16188 | +/* Per-module metadata */ |
16189 | +struct toi_module_header { | |
16190 | + char name[TOI_MAX_MODULE_NAME_LENGTH]; | |
16191 | + int enabled; | |
16192 | + int type; | |
16193 | + int index; | |
16194 | + int data_length; | |
16195 | + unsigned long signature; | |
16196 | +}; | |
16197 | + | |
16198 | +enum { | |
16199 | + FILTER_MODULE, | |
16200 | + WRITER_MODULE, | |
7e46296a AM |
16201 | + BIO_ALLOCATOR_MODULE, |
16202 | + MISC_MODULE, | |
2380c486 JR |
16203 | + MISC_HIDDEN_MODULE, |
16204 | +}; | |
16205 | + | |
16206 | +enum { | |
16207 | + TOI_ASYNC, | |
16208 | + TOI_SYNC | |
16209 | +}; | |
16210 | + | |
16211 | +struct toi_module_ops { | |
16212 | + /* Functions common to all modules */ | |
16213 | + int type; | |
16214 | + char *name; | |
16215 | + char *directory; | |
16216 | + char *shared_directory; | |
16217 | + struct kobject *dir_kobj; | |
16218 | + struct module *module; | |
16219 | + int enabled, early, initialised; | |
16220 | + struct list_head module_list; | |
16221 | + | |
16222 | + /* List of filters or allocators */ | |
16223 | + struct list_head list, type_list; | |
16224 | + | |
16225 | + /* | |
16226 | + * Requirements for memory and storage in | |
16227 | + * the image header.. | |
16228 | + */ | |
16229 | + int (*memory_needed) (void); | |
16230 | + int (*storage_needed) (void); | |
16231 | + | |
16232 | + int header_requested, header_used; | |
16233 | + | |
16234 | + int (*expected_compression) (void); | |
16235 | + | |
16236 | + /* | |
16237 | + * Debug info | |
16238 | + */ | |
16239 | + int (*print_debug_info) (char *buffer, int size); | |
16240 | + int (*save_config_info) (char *buffer); | |
16241 | + void (*load_config_info) (char *buffer, int len); | |
16242 | + | |
16243 | + /* | |
16244 | + * Initialise & cleanup - general routines called | |
16245 | + * at the start and end of a cycle. | |
16246 | + */ | |
16247 | + int (*initialise) (int starting_cycle); | |
16248 | + void (*cleanup) (int finishing_cycle); | |
16249 | + | |
5dd10c98 AM |
16250 | + void (*pre_atomic_restore) (struct toi_boot_kernel_data *bkd); |
16251 | + void (*post_atomic_restore) (struct toi_boot_kernel_data *bkd); | |
16252 | + | |
2380c486 JR |
16253 | + /* |
16254 | + * Calls for allocating storage (allocators only). | |
16255 | + * | |
0ada99ac | 16256 | + * Header space is requested separately and cannot fail, but the |
16257 | + * reservation is only applied when main storage is allocated. | |
16258 | + * The header space reservation is thus always set prior to | |
16259 | + * requesting the allocation of storage - and prior to querying | |
16260 | + * how much storage is available. | |
2380c486 JR |
16261 | + */ |
16262 | + | |
92bca44c AM |
16263 | + unsigned long (*storage_available) (void); |
16264 | + void (*reserve_header_space) (unsigned long space_requested); | |
7e46296a | 16265 | + int (*register_storage) (void); |
92bca44c AM |
16266 | + int (*allocate_storage) (unsigned long space_requested); |
16267 | + unsigned long (*storage_allocated) (void); | |
2380c486 JR |
16268 | + |
16269 | + /* | |
16270 | + * Routines used in image I/O. | |
16271 | + */ | |
16272 | + int (*rw_init) (int rw, int stream_number); | |
16273 | + int (*rw_cleanup) (int rw); | |
16274 | + int (*write_page) (unsigned long index, struct page *buffer_page, | |
16275 | + unsigned int buf_size); | |
16276 | + int (*read_page) (unsigned long *index, struct page *buffer_page, | |
16277 | + unsigned int *buf_size); | |
0ada99ac | 16278 | + int (*io_flusher) (int rw); |
2380c486 JR |
16279 | + |
16280 | + /* Reset module if image exists but reading aborted */ | |
16281 | + void (*noresume_reset) (void); | |
16282 | + | |
16283 | + /* Read and write the metadata */ | |
16284 | + int (*write_header_init) (void); | |
16285 | + int (*write_header_cleanup) (void); | |
16286 | + | |
16287 | + int (*read_header_init) (void); | |
16288 | + int (*read_header_cleanup) (void); | |
16289 | + | |
5dd10c98 AM |
16290 | + /* To be called after read_header_init */ |
16291 | + int (*get_header_version) (void); | |
16292 | + | |
2380c486 JR |
16293 | + int (*rw_header_chunk) (int rw, struct toi_module_ops *owner, |
16294 | + char *buffer_start, int buffer_size); | |
16295 | + | |
16296 | + int (*rw_header_chunk_noreadahead) (int rw, | |
16297 | + struct toi_module_ops *owner, char *buffer_start, | |
16298 | + int buffer_size); | |
16299 | + | |
16300 | + /* Attempt to parse an image location */ | |
16301 | + int (*parse_sig_location) (char *buffer, int only_writer, int quiet); | |
16302 | + | |
16303 | + /* Throttle I/O according to throughput */ | |
16304 | + void (*update_throughput_throttle) (int jif_index); | |
16305 | + | |
16306 | + /* Flush outstanding I/O */ | |
0ada99ac | 16307 | + int (*finish_all_io) (void); |
2380c486 JR |
16308 | + |
16309 | + /* Determine whether image exists that we can restore */ | |
16310 | + int (*image_exists) (int quiet); | |
16311 | + | |
16312 | + /* Mark the image as having tried to resume */ | |
16313 | + int (*mark_resume_attempted) (int); | |
16314 | + | |
16315 | + /* Destroy image if one exists */ | |
16316 | + int (*remove_image) (void); | |
16317 | + | |
16318 | + /* Sysfs Data */ | |
16319 | + struct toi_sysfs_data *sysfs_data; | |
16320 | + int num_sysfs_entries; | |
7e46296a AM |
16321 | + |
16322 | + /* Block I/O allocator */ | |
16323 | + struct toi_bio_allocator_ops *bio_allocator_ops; | |
2380c486 JR |
16324 | +}; |
16325 | + | |
16326 | +extern int toi_num_modules, toiNumAllocators; | |
16327 | + | |
16328 | +extern struct toi_module_ops *toiActiveAllocator; | |
16329 | +extern struct list_head toi_filters, toiAllocators, toi_modules; | |
16330 | + | |
16331 | +extern void toi_prepare_console_modules(void); | |
16332 | +extern void toi_cleanup_console_modules(void); | |
16333 | + | |
16334 | +extern struct toi_module_ops *toi_find_module_given_name(char *name); | |
16335 | +extern struct toi_module_ops *toi_get_next_filter(struct toi_module_ops *); | |
16336 | + | |
16337 | +extern int toi_register_module(struct toi_module_ops *module); | |
16338 | +extern void toi_move_module_tail(struct toi_module_ops *module); | |
16339 | + | |
16340 | +extern long toi_header_storage_for_modules(void); | |
16341 | +extern long toi_memory_for_modules(int print_parts); | |
0ada99ac | 16342 | +extern void print_toi_header_storage_for_modules(void); |
2380c486 JR |
16343 | +extern int toi_expected_compression_ratio(void); |
16344 | + | |
16345 | +extern int toi_print_module_debug_info(char *buffer, int buffer_size); | |
16346 | +extern int toi_register_module(struct toi_module_ops *module); | |
16347 | +extern void toi_unregister_module(struct toi_module_ops *module); | |
16348 | + | |
16349 | +extern int toi_initialise_modules(int starting_cycle, int early); | |
16350 | +#define toi_initialise_modules_early(starting) \ | |
16351 | + toi_initialise_modules(starting, 1) | |
16352 | +#define toi_initialise_modules_late(starting) \ | |
16353 | + toi_initialise_modules(starting, 0) | |
16354 | +extern void toi_cleanup_modules(int finishing_cycle); | |
16355 | + | |
5dd10c98 AM |
16356 | +extern void toi_post_atomic_restore_modules(struct toi_boot_kernel_data *bkd); |
16357 | +extern void toi_pre_atomic_restore_modules(struct toi_boot_kernel_data *bkd); | |
16358 | + | |
2380c486 JR |
16359 | +extern void toi_print_modules(void); |
16360 | + | |
16361 | +int toi_get_modules(void); | |
16362 | +void toi_put_modules(void); | |
16363 | +#endif | |
16364 | diff --git a/kernel/power/tuxonice_netlink.c b/kernel/power/tuxonice_netlink.c | |
16365 | new file mode 100644 | |
cacc47f8 | 16366 | index 0000000..c5208ee |
2380c486 JR |
16367 | --- /dev/null |
16368 | +++ b/kernel/power/tuxonice_netlink.c | |
cacc47f8 | 16369 | @@ -0,0 +1,345 @@ |
2380c486 JR |
16370 | +/* |
16371 | + * kernel/power/tuxonice_netlink.c | |
16372 | + * | |
5dd10c98 | 16373 | + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
16374 | + * |
16375 | + * This file is released under the GPLv2. | |
16376 | + * | |
16377 | + * Functions for communicating with a userspace helper via netlink. | |
16378 | + */ | |
16379 | + | |
16380 | + | |
16381 | +#include <linux/suspend.h> | |
92bca44c | 16382 | +#include <linux/sched.h> |
2380c486 JR |
16383 | +#include "tuxonice_netlink.h" |
16384 | +#include "tuxonice.h" | |
16385 | +#include "tuxonice_modules.h" | |
16386 | +#include "tuxonice_alloc.h" | |
cacc47f8 | 16387 | +#include "tuxonice_builtin.h" |
2380c486 JR |
16388 | + |
16389 | +static struct user_helper_data *uhd_list; | |
16390 | + | |
16391 | +/* | |
16392 | + * Refill our pool of SKBs for use in emergencies (eg, when eating memory and | |
16393 | + * none can be allocated). | |
16394 | + */ | |
16395 | +static void toi_fill_skb_pool(struct user_helper_data *uhd) | |
16396 | +{ | |
16397 | + while (uhd->pool_level < uhd->pool_limit) { | |
16398 | + struct sk_buff *new_skb = | |
16399 | + alloc_skb(NLMSG_SPACE(uhd->skb_size), TOI_ATOMIC_GFP); | |
16400 | + | |
16401 | + if (!new_skb) | |
16402 | + break; | |
16403 | + | |
16404 | + new_skb->next = uhd->emerg_skbs; | |
16405 | + uhd->emerg_skbs = new_skb; | |
16406 | + uhd->pool_level++; | |
16407 | + } | |
16408 | +} | |
16409 | + | |
16410 | +/* | |
16411 | + * Try to allocate a single skb. If we can't get one, try to use one from | |
16412 | + * our pool. | |
16413 | + */ | |
16414 | +static struct sk_buff *toi_get_skb(struct user_helper_data *uhd) | |
16415 | +{ | |
16416 | + struct sk_buff *skb = | |
16417 | + alloc_skb(NLMSG_SPACE(uhd->skb_size), TOI_ATOMIC_GFP); | |
16418 | + | |
16419 | + if (skb) | |
16420 | + return skb; | |
16421 | + | |
16422 | + skb = uhd->emerg_skbs; | |
16423 | + if (skb) { | |
16424 | + uhd->pool_level--; | |
16425 | + uhd->emerg_skbs = skb->next; | |
16426 | + skb->next = NULL; | |
16427 | + } | |
16428 | + | |
16429 | + return skb; | |
16430 | +} | |
16431 | + | |
16432 | +static void put_skb(struct user_helper_data *uhd, struct sk_buff *skb) | |
16433 | +{ | |
16434 | + if (uhd->pool_level < uhd->pool_limit) { | |
16435 | + skb->next = uhd->emerg_skbs; | |
16436 | + uhd->emerg_skbs = skb; | |
16437 | + } else | |
16438 | + kfree_skb(skb); | |
16439 | +} | |
16440 | + | |
16441 | +void toi_send_netlink_message(struct user_helper_data *uhd, | |
16442 | + int type, void *params, size_t len) | |
16443 | +{ | |
16444 | + struct sk_buff *skb; | |
16445 | + struct nlmsghdr *nlh; | |
16446 | + void *dest; | |
16447 | + struct task_struct *t; | |
16448 | + | |
16449 | + if (uhd->pid == -1) | |
16450 | + return; | |
16451 | + | |
16452 | + if (uhd->debug) | |
16453 | + printk(KERN_ERR "toi_send_netlink_message: Send " | |
16454 | + "message type %d.\n", type); | |
16455 | + | |
16456 | + skb = toi_get_skb(uhd); | |
16457 | + if (!skb) { | |
16458 | + printk(KERN_INFO "toi_netlink: Can't allocate skb!\n"); | |
16459 | + return; | |
16460 | + } | |
16461 | + | |
16462 | + /* NLMSG_PUT contains a hidden goto nlmsg_failure */ | |
16463 | + nlh = NLMSG_PUT(skb, 0, uhd->sock_seq, type, len); | |
16464 | + uhd->sock_seq++; | |
16465 | + | |
16466 | + dest = NLMSG_DATA(nlh); | |
16467 | + if (params && len > 0) | |
16468 | + memcpy(dest, params, len); | |
16469 | + | |
16470 | + netlink_unicast(uhd->nl, skb, uhd->pid, 0); | |
16471 | + | |
cacc47f8 | 16472 | + toi_read_lock_tasklist(); |
92bca44c | 16473 | + t = find_task_by_pid_ns(uhd->pid, &init_pid_ns); |
2380c486 | 16474 | + if (!t) { |
cacc47f8 | 16475 | + toi_read_unlock_tasklist(); |
2380c486 JR |
16476 | + if (uhd->pid > -1) |
16477 | + printk(KERN_INFO "Hmm. Can't find the userspace task" | |
16478 | + " %d.\n", uhd->pid); | |
16479 | + return; | |
16480 | + } | |
16481 | + wake_up_process(t); | |
cacc47f8 | 16482 | + toi_read_unlock_tasklist(); |
2380c486 JR |
16483 | + |
16484 | + yield(); | |
16485 | + | |
16486 | + return; | |
16487 | + | |
16488 | +nlmsg_failure: | |
16489 | + if (skb) | |
16490 | + put_skb(uhd, skb); | |
16491 | + | |
16492 | + if (uhd->debug) | |
16493 | + printk(KERN_ERR "toi_send_netlink_message: Failed to send " | |
16494 | + "message type %d.\n", type); | |
16495 | +} | |
16496 | +EXPORT_SYMBOL_GPL(toi_send_netlink_message); | |
16497 | + | |
16498 | +static void send_whether_debugging(struct user_helper_data *uhd) | |
16499 | +{ | |
16500 | + static u8 is_debugging = 1; | |
16501 | + | |
16502 | + toi_send_netlink_message(uhd, NETLINK_MSG_IS_DEBUGGING, | |
16503 | + &is_debugging, sizeof(u8)); | |
16504 | +} | |
16505 | + | |
16506 | +/* | |
16507 | + * Set the PF_NOFREEZE flag on the given process to ensure it can run whilst we | |
16508 | + * are hibernating. | |
16509 | + */ | |
16510 | +static int nl_set_nofreeze(struct user_helper_data *uhd, __u32 pid) | |
16511 | +{ | |
16512 | + struct task_struct *t; | |
16513 | + | |
16514 | + if (uhd->debug) | |
16515 | + printk(KERN_ERR "nl_set_nofreeze for pid %d.\n", pid); | |
16516 | + | |
cacc47f8 | 16517 | + toi_read_lock_tasklist(); |
92bca44c | 16518 | + t = find_task_by_pid_ns(pid, &init_pid_ns); |
2380c486 | 16519 | + if (!t) { |
cacc47f8 | 16520 | + toi_read_unlock_tasklist(); |
2380c486 JR |
16521 | + printk(KERN_INFO "Strange. Can't find the userspace task %d.\n", |
16522 | + pid); | |
16523 | + return -EINVAL; | |
16524 | + } | |
16525 | + | |
16526 | + t->flags |= PF_NOFREEZE; | |
16527 | + | |
cacc47f8 | 16528 | + toi_read_unlock_tasklist(); |
2380c486 JR |
16529 | + uhd->pid = pid; |
16530 | + | |
16531 | + toi_send_netlink_message(uhd, NETLINK_MSG_NOFREEZE_ACK, NULL, 0); | |
16532 | + | |
16533 | + return 0; | |
16534 | +} | |
16535 | + | |
16536 | +/* | |
16537 | + * Called when the userspace process has informed us that it's ready to roll. | |
16538 | + */ | |
16539 | +static int nl_ready(struct user_helper_data *uhd, u32 version) | |
16540 | +{ | |
16541 | + if (version != uhd->interface_version) { | |
16542 | + printk(KERN_INFO "%s userspace process using invalid interface" | |
16543 | + " version (%d - kernel wants %d). Trying to " | |
16544 | + "continue without it.\n", | |
16545 | + uhd->name, version, uhd->interface_version); | |
16546 | + if (uhd->not_ready) | |
16547 | + uhd->not_ready(); | |
16548 | + return -EINVAL; | |
16549 | + } | |
16550 | + | |
16551 | + complete(&uhd->wait_for_process); | |
16552 | + | |
16553 | + return 0; | |
16554 | +} | |
16555 | + | |
16556 | +void toi_netlink_close_complete(struct user_helper_data *uhd) | |
16557 | +{ | |
16558 | + if (uhd->nl) { | |
16559 | + netlink_kernel_release(uhd->nl); | |
16560 | + uhd->nl = NULL; | |
16561 | + } | |
16562 | + | |
16563 | + while (uhd->emerg_skbs) { | |
16564 | + struct sk_buff *next = uhd->emerg_skbs->next; | |
16565 | + kfree_skb(uhd->emerg_skbs); | |
16566 | + uhd->emerg_skbs = next; | |
16567 | + } | |
16568 | + | |
16569 | + uhd->pid = -1; | |
16570 | +} | |
16571 | +EXPORT_SYMBOL_GPL(toi_netlink_close_complete); | |
16572 | + | |
16573 | +static int toi_nl_gen_rcv_msg(struct user_helper_data *uhd, | |
16574 | + struct sk_buff *skb, struct nlmsghdr *nlh) | |
16575 | +{ | |
16576 | + int type = nlh->nlmsg_type; | |
16577 | + int *data; | |
16578 | + int err; | |
16579 | + | |
16580 | + if (uhd->debug) | |
16581 | + printk(KERN_ERR "toi_user_rcv_skb: Received message %d.\n", | |
16582 | + type); | |
16583 | + | |
16584 | + /* Let the more specific handler go first. It returns | |
16585 | + * 1 for valid messages that it doesn't know. */ | |
16586 | + err = uhd->rcv_msg(skb, nlh); | |
16587 | + if (err != 1) | |
16588 | + return err; | |
16589 | + | |
16590 | + /* Only allow one task to receive NOFREEZE privileges */ | |
16591 | + if (type == NETLINK_MSG_NOFREEZE_ME && uhd->pid != -1) { | |
16592 | + printk(KERN_INFO "Received extra nofreeze me requests.\n"); | |
16593 | + return -EBUSY; | |
16594 | + } | |
16595 | + | |
16596 | + data = NLMSG_DATA(nlh); | |
16597 | + | |
16598 | + switch (type) { | |
16599 | + case NETLINK_MSG_NOFREEZE_ME: | |
16600 | + return nl_set_nofreeze(uhd, nlh->nlmsg_pid); | |
16601 | + case NETLINK_MSG_GET_DEBUGGING: | |
16602 | + send_whether_debugging(uhd); | |
16603 | + return 0; | |
16604 | + case NETLINK_MSG_READY: | |
16605 | + if (nlh->nlmsg_len != NLMSG_LENGTH(sizeof(u32))) { | |
16606 | + printk(KERN_INFO "Invalid ready mesage.\n"); | |
16607 | + if (uhd->not_ready) | |
16608 | + uhd->not_ready(); | |
16609 | + return -EINVAL; | |
16610 | + } | |
16611 | + return nl_ready(uhd, (u32) *data); | |
16612 | + case NETLINK_MSG_CLEANUP: | |
16613 | + toi_netlink_close_complete(uhd); | |
16614 | + return 0; | |
16615 | + } | |
16616 | + | |
16617 | + return -EINVAL; | |
16618 | +} | |
16619 | + | |
16620 | +static void toi_user_rcv_skb(struct sk_buff *skb) | |
16621 | +{ | |
16622 | + int err; | |
16623 | + struct nlmsghdr *nlh; | |
16624 | + struct user_helper_data *uhd = uhd_list; | |
16625 | + | |
16626 | + while (uhd && uhd->netlink_id != skb->sk->sk_protocol) | |
16627 | + uhd = uhd->next; | |
16628 | + | |
16629 | + if (!uhd) | |
16630 | + return; | |
16631 | + | |
16632 | + while (skb->len >= NLMSG_SPACE(0)) { | |
16633 | + u32 rlen; | |
16634 | + | |
16635 | + nlh = (struct nlmsghdr *) skb->data; | |
16636 | + if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) | |
16637 | + return; | |
16638 | + | |
16639 | + rlen = NLMSG_ALIGN(nlh->nlmsg_len); | |
16640 | + if (rlen > skb->len) | |
16641 | + rlen = skb->len; | |
16642 | + | |
16643 | + err = toi_nl_gen_rcv_msg(uhd, skb, nlh); | |
16644 | + if (err) | |
16645 | + netlink_ack(skb, nlh, err); | |
16646 | + else if (nlh->nlmsg_flags & NLM_F_ACK) | |
16647 | + netlink_ack(skb, nlh, 0); | |
16648 | + skb_pull(skb, rlen); | |
16649 | + } | |
16650 | +} | |
16651 | + | |
16652 | +static int netlink_prepare(struct user_helper_data *uhd) | |
16653 | +{ | |
16654 | + uhd->next = uhd_list; | |
16655 | + uhd_list = uhd; | |
16656 | + | |
16657 | + uhd->sock_seq = 0x42c0ffee; | |
16658 | + uhd->nl = netlink_kernel_create(&init_net, uhd->netlink_id, 0, | |
16659 | + toi_user_rcv_skb, NULL, THIS_MODULE); | |
16660 | + if (!uhd->nl) { | |
16661 | + printk(KERN_INFO "Failed to allocate netlink socket for %s.\n", | |
16662 | + uhd->name); | |
16663 | + return -ENOMEM; | |
16664 | + } | |
16665 | + | |
16666 | + toi_fill_skb_pool(uhd); | |
16667 | + | |
16668 | + return 0; | |
16669 | +} | |
16670 | + | |
16671 | +void toi_netlink_close(struct user_helper_data *uhd) | |
16672 | +{ | |
16673 | + struct task_struct *t; | |
16674 | + | |
cacc47f8 | 16675 | + toi_read_lock_tasklist(); |
92bca44c | 16676 | + t = find_task_by_pid_ns(uhd->pid, &init_pid_ns); |
2380c486 JR |
16677 | + if (t) |
16678 | + t->flags &= ~PF_NOFREEZE; | |
cacc47f8 | 16679 | + toi_read_unlock_tasklist(); |
2380c486 JR |
16680 | + |
16681 | + toi_send_netlink_message(uhd, NETLINK_MSG_CLEANUP, NULL, 0); | |
16682 | +} | |
16683 | +EXPORT_SYMBOL_GPL(toi_netlink_close); | |
16684 | + | |
16685 | +int toi_netlink_setup(struct user_helper_data *uhd) | |
16686 | +{ | |
16687 | + /* In case userui didn't cleanup properly on us */ | |
16688 | + toi_netlink_close_complete(uhd); | |
16689 | + | |
16690 | + if (netlink_prepare(uhd) < 0) { | |
16691 | + printk(KERN_INFO "Netlink prepare failed.\n"); | |
16692 | + return 1; | |
16693 | + } | |
16694 | + | |
16695 | + if (toi_launch_userspace_program(uhd->program, uhd->netlink_id, | |
16696 | + UMH_WAIT_EXEC, uhd->debug) < 0) { | |
16697 | + printk(KERN_INFO "Launch userspace program failed.\n"); | |
16698 | + toi_netlink_close_complete(uhd); | |
16699 | + return 1; | |
16700 | + } | |
16701 | + | |
16702 | + /* Wait 2 seconds for the userspace process to make contact */ | |
16703 | + wait_for_completion_timeout(&uhd->wait_for_process, 2*HZ); | |
16704 | + | |
16705 | + if (uhd->pid == -1) { | |
16706 | + printk(KERN_INFO "%s: Failed to contact userspace process.\n", | |
16707 | + uhd->name); | |
16708 | + toi_netlink_close_complete(uhd); | |
16709 | + return 1; | |
16710 | + } | |
16711 | + | |
16712 | + return 0; | |
16713 | +} | |
16714 | +EXPORT_SYMBOL_GPL(toi_netlink_setup); | |
16715 | diff --git a/kernel/power/tuxonice_netlink.h b/kernel/power/tuxonice_netlink.h | |
16716 | new file mode 100644 | |
5dd10c98 | 16717 | index 0000000..b8ef06e |
2380c486 JR |
16718 | --- /dev/null |
16719 | +++ b/kernel/power/tuxonice_netlink.h | |
16720 | @@ -0,0 +1,62 @@ | |
16721 | +/* | |
16722 | + * kernel/power/tuxonice_netlink.h | |
16723 | + * | |
5dd10c98 | 16724 | + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
16725 | + * |
16726 | + * This file is released under the GPLv2. | |
16727 | + * | |
16728 | + * Declarations for functions for communicating with a userspace helper | |
16729 | + * via netlink. | |
16730 | + */ | |
16731 | + | |
16732 | +#include <linux/netlink.h> | |
16733 | +#include <net/sock.h> | |
16734 | + | |
16735 | +#define NETLINK_MSG_BASE 0x10 | |
16736 | + | |
16737 | +#define NETLINK_MSG_READY 0x10 | |
16738 | +#define NETLINK_MSG_NOFREEZE_ME 0x16 | |
16739 | +#define NETLINK_MSG_GET_DEBUGGING 0x19 | |
16740 | +#define NETLINK_MSG_CLEANUP 0x24 | |
16741 | +#define NETLINK_MSG_NOFREEZE_ACK 0x27 | |
16742 | +#define NETLINK_MSG_IS_DEBUGGING 0x28 | |
16743 | + | |
16744 | +struct user_helper_data { | |
16745 | + int (*rcv_msg) (struct sk_buff *skb, struct nlmsghdr *nlh); | |
16746 | + void (*not_ready) (void); | |
16747 | + struct sock *nl; | |
16748 | + u32 sock_seq; | |
16749 | + pid_t pid; | |
16750 | + char *comm; | |
16751 | + char program[256]; | |
16752 | + int pool_level; | |
16753 | + int pool_limit; | |
16754 | + struct sk_buff *emerg_skbs; | |
16755 | + int skb_size; | |
16756 | + int netlink_id; | |
16757 | + char *name; | |
16758 | + struct user_helper_data *next; | |
16759 | + struct completion wait_for_process; | |
16760 | + u32 interface_version; | |
16761 | + int must_init; | |
16762 | + int debug; | |
16763 | +}; | |
16764 | + | |
16765 | +#ifdef CONFIG_NET | |
16766 | +int toi_netlink_setup(struct user_helper_data *uhd); | |
16767 | +void toi_netlink_close(struct user_helper_data *uhd); | |
16768 | +void toi_send_netlink_message(struct user_helper_data *uhd, | |
16769 | + int type, void *params, size_t len); | |
16770 | +void toi_netlink_close_complete(struct user_helper_data *uhd); | |
16771 | +#else | |
16772 | +static inline int toi_netlink_setup(struct user_helper_data *uhd) | |
16773 | +{ | |
16774 | + return 0; | |
16775 | +} | |
16776 | + | |
16777 | +static inline void toi_netlink_close(struct user_helper_data *uhd) { }; | |
16778 | +static inline void toi_send_netlink_message(struct user_helper_data *uhd, | |
16779 | + int type, void *params, size_t len) { }; | |
16780 | +static inline void toi_netlink_close_complete(struct user_helper_data *uhd) | |
16781 | + { }; | |
16782 | +#endif | |
16783 | diff --git a/kernel/power/tuxonice_pagedir.c b/kernel/power/tuxonice_pagedir.c | |
16784 | new file mode 100644 | |
5dd10c98 | 16785 | index 0000000..091c9e3 |
2380c486 JR |
16786 | --- /dev/null |
16787 | +++ b/kernel/power/tuxonice_pagedir.c | |
5dd10c98 | 16788 | @@ -0,0 +1,339 @@ |
2380c486 JR |
16789 | +/* |
16790 | + * kernel/power/tuxonice_pagedir.c | |
16791 | + * | |
16792 | + * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu> | |
16793 | + * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz> | |
16794 | + * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr> | |
5dd10c98 | 16795 | + * Copyright (C) 2006-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
16796 | + * |
16797 | + * This file is released under the GPLv2. | |
16798 | + * | |
16799 | + * Routines for handling pagesets. | |
16800 | + * Note that pbes aren't actually stored as such. They're stored as | |
16801 | + * bitmaps and extents. | |
16802 | + */ | |
16803 | + | |
16804 | +#include <linux/suspend.h> | |
16805 | +#include <linux/highmem.h> | |
16806 | +#include <linux/bootmem.h> | |
16807 | +#include <linux/hardirq.h> | |
16808 | +#include <linux/sched.h> | |
e999739a | 16809 | +#include <linux/cpu.h> |
2380c486 JR |
16810 | +#include <asm/tlbflush.h> |
16811 | + | |
16812 | +#include "tuxonice_pageflags.h" | |
16813 | +#include "tuxonice_ui.h" | |
16814 | +#include "tuxonice_pagedir.h" | |
16815 | +#include "tuxonice_prepare_image.h" | |
16816 | +#include "tuxonice.h" | |
2380c486 JR |
16817 | +#include "tuxonice_builtin.h" |
16818 | +#include "tuxonice_alloc.h" | |
16819 | + | |
16820 | +static int ptoi_pfn; | |
16821 | +static struct pbe *this_low_pbe; | |
16822 | +static struct pbe **last_low_pbe_ptr; | |
16823 | +static struct memory_bitmap dup_map1, dup_map2; | |
16824 | + | |
16825 | +void toi_reset_alt_image_pageset2_pfn(void) | |
16826 | +{ | |
16827 | + memory_bm_position_reset(pageset2_map); | |
16828 | +} | |
16829 | + | |
16830 | +static struct page *first_conflicting_page; | |
16831 | + | |
16832 | +/* | |
16833 | + * free_conflicting_pages | |
16834 | + */ | |
16835 | + | |
16836 | +static void free_conflicting_pages(void) | |
16837 | +{ | |
16838 | + while (first_conflicting_page) { | |
16839 | + struct page *next = | |
16840 | + *((struct page **) kmap(first_conflicting_page)); | |
16841 | + kunmap(first_conflicting_page); | |
16842 | + toi__free_page(29, first_conflicting_page); | |
16843 | + first_conflicting_page = next; | |
16844 | + } | |
16845 | +} | |
16846 | + | |
16847 | +/* __toi_get_nonconflicting_page | |
16848 | + * | |
16849 | + * Description: Gets order zero pages that won't be overwritten | |
16850 | + * while copying the original pages. | |
16851 | + */ | |
16852 | + | |
16853 | +struct page *___toi_get_nonconflicting_page(int can_be_highmem) | |
16854 | +{ | |
16855 | + struct page *page; | |
16856 | + gfp_t flags = TOI_ATOMIC_GFP; | |
16857 | + if (can_be_highmem) | |
16858 | + flags |= __GFP_HIGHMEM; | |
16859 | + | |
16860 | + | |
16861 | + if (test_toi_state(TOI_LOADING_ALT_IMAGE) && | |
16862 | + pageset2_map && | |
16863 | + (ptoi_pfn != BM_END_OF_MAP)) { | |
16864 | + do { | |
16865 | + ptoi_pfn = memory_bm_next_pfn(pageset2_map); | |
16866 | + if (ptoi_pfn != BM_END_OF_MAP) { | |
16867 | + page = pfn_to_page(ptoi_pfn); | |
16868 | + if (!PagePageset1(page) && | |
16869 | + (can_be_highmem || !PageHighMem(page))) | |
16870 | + return page; | |
16871 | + } | |
16872 | + } while (ptoi_pfn != BM_END_OF_MAP); | |
16873 | + } | |
16874 | + | |
16875 | + do { | |
16876 | + page = toi_alloc_page(29, flags); | |
16877 | + if (!page) { | |
16878 | + printk(KERN_INFO "Failed to get nonconflicting " | |
16879 | + "page.\n"); | |
16880 | + return NULL; | |
16881 | + } | |
16882 | + if (PagePageset1(page)) { | |
16883 | + struct page **next = (struct page **) kmap(page); | |
16884 | + *next = first_conflicting_page; | |
16885 | + first_conflicting_page = page; | |
16886 | + kunmap(page); | |
16887 | + } | |
16888 | + } while (PagePageset1(page)); | |
16889 | + | |
16890 | + return page; | |
16891 | +} | |
16892 | + | |
16893 | +unsigned long __toi_get_nonconflicting_page(void) | |
16894 | +{ | |
16895 | + struct page *page = ___toi_get_nonconflicting_page(0); | |
16896 | + return page ? (unsigned long) page_address(page) : 0; | |
16897 | +} | |
16898 | + | |
16899 | +static struct pbe *get_next_pbe(struct page **page_ptr, struct pbe *this_pbe, | |
16900 | + int highmem) | |
16901 | +{ | |
16902 | + if (((((unsigned long) this_pbe) & (PAGE_SIZE - 1)) | |
16903 | + + 2 * sizeof(struct pbe)) > PAGE_SIZE) { | |
16904 | + struct page *new_page = | |
16905 | + ___toi_get_nonconflicting_page(highmem); | |
16906 | + if (!new_page) | |
16907 | + return ERR_PTR(-ENOMEM); | |
16908 | + this_pbe = (struct pbe *) kmap(new_page); | |
16909 | + memset(this_pbe, 0, PAGE_SIZE); | |
16910 | + *page_ptr = new_page; | |
16911 | + } else | |
16912 | + this_pbe++; | |
16913 | + | |
16914 | + return this_pbe; | |
16915 | +} | |
16916 | + | |
16917 | +/** | |
16918 | + * get_pageset1_load_addresses - generate pbes for conflicting pages | |
16919 | + * | |
16920 | + * We check here that pagedir & pages it points to won't collide | |
16921 | + * with pages where we're going to restore from the loaded pages | |
16922 | + * later. | |
16923 | + * | |
16924 | + * Returns: | |
16925 | + * Zero on success, one if couldn't find enough pages (shouldn't | |
16926 | + * happen). | |
16927 | + **/ | |
16928 | +int toi_get_pageset1_load_addresses(void) | |
16929 | +{ | |
16930 | + int pfn, highallocd = 0, lowallocd = 0; | |
16931 | + int low_needed = pagedir1.size - get_highmem_size(pagedir1); | |
16932 | + int high_needed = get_highmem_size(pagedir1); | |
16933 | + int low_pages_for_highmem = 0; | |
16934 | + gfp_t flags = GFP_ATOMIC | __GFP_NOWARN | __GFP_HIGHMEM; | |
16935 | + struct page *page, *high_pbe_page = NULL, *last_high_pbe_page = NULL, | |
16936 | + *low_pbe_page; | |
16937 | + struct pbe **last_high_pbe_ptr = &restore_highmem_pblist, | |
16938 | + *this_high_pbe = NULL; | |
16939 | + int orig_low_pfn, orig_high_pfn; | |
16940 | + int high_pbes_done = 0, low_pbes_done = 0; | |
5dd10c98 | 16941 | + int low_direct = 0, high_direct = 0, result = 0, i; |
e999739a | 16942 | + |
2380c486 JR |
16943 | + /* |
16944 | + * We need to duplicate pageset1's map because memory_bm_next_pfn's | |
16945 | + * state gets stomped on by the PagePageset1() test in setup_pbes. | |
16946 | + */ | |
e999739a | 16947 | + memory_bm_create(&dup_map1, GFP_ATOMIC, 0); |
2380c486 JR |
16948 | + memory_bm_dup(pageset1_map, &dup_map1); |
16949 | + | |
e999739a | 16950 | + memory_bm_create(&dup_map2, GFP_ATOMIC, 0); |
2380c486 JR |
16951 | + memory_bm_dup(pageset1_map, &dup_map2); |
16952 | + | |
16953 | + memory_bm_position_reset(pageset1_map); | |
16954 | + memory_bm_position_reset(&dup_map1); | |
16955 | + memory_bm_position_reset(&dup_map2); | |
16956 | + | |
16957 | + last_low_pbe_ptr = &restore_pblist; | |
16958 | + | |
16959 | + /* First, allocate pages for the start of our pbe lists. */ | |
16960 | + if (high_needed) { | |
16961 | + high_pbe_page = ___toi_get_nonconflicting_page(1); | |
16962 | + if (!high_pbe_page) { | |
16963 | + result = -ENOMEM; | |
16964 | + goto out; | |
16965 | + } | |
16966 | + this_high_pbe = (struct pbe *) kmap(high_pbe_page); | |
16967 | + memset(this_high_pbe, 0, PAGE_SIZE); | |
16968 | + } | |
16969 | + | |
16970 | + low_pbe_page = ___toi_get_nonconflicting_page(0); | |
16971 | + if (!low_pbe_page) { | |
16972 | + result = -ENOMEM; | |
16973 | + goto out; | |
16974 | + } | |
16975 | + this_low_pbe = (struct pbe *) page_address(low_pbe_page); | |
16976 | + | |
16977 | + /* | |
5dd10c98 | 16978 | + * Next, allocate the number of pages we need. |
2380c486 JR |
16979 | + */ |
16980 | + | |
5dd10c98 AM |
16981 | + i = low_needed + high_needed; |
16982 | + | |
2380c486 | 16983 | + do { |
5dd10c98 AM |
16984 | + int is_high; |
16985 | + | |
16986 | + if (i == low_needed) | |
16987 | + flags &= ~__GFP_HIGHMEM; | |
16988 | + | |
2380c486 | 16989 | + page = toi_alloc_page(30, flags); |
5dd10c98 | 16990 | + BUG_ON(!page); |
2380c486 | 16991 | + |
5dd10c98 | 16992 | + SetPagePageset1Copy(page); |
2380c486 JR |
16993 | + is_high = PageHighMem(page); |
16994 | + | |
16995 | + if (PagePageset1(page)) { | |
5dd10c98 AM |
16996 | + if (is_high) |
16997 | + high_direct++; | |
16998 | + else | |
16999 | + low_direct++; | |
2380c486 JR |
17000 | + } else { |
17001 | + if (is_high) | |
17002 | + highallocd++; | |
17003 | + else | |
17004 | + lowallocd++; | |
17005 | + } | |
5dd10c98 | 17006 | + } while (--i); |
2380c486 JR |
17007 | + |
17008 | + high_needed -= high_direct; | |
17009 | + low_needed -= low_direct; | |
17010 | + | |
17011 | + /* | |
17012 | + * Do we need to use some lowmem pages for the copies of highmem | |
17013 | + * pages? | |
17014 | + */ | |
17015 | + if (high_needed > highallocd) { | |
17016 | + low_pages_for_highmem = high_needed - highallocd; | |
17017 | + high_needed -= low_pages_for_highmem; | |
17018 | + low_needed += low_pages_for_highmem; | |
17019 | + } | |
17020 | + | |
2380c486 JR |
17021 | + /* |
17022 | + * Now generate our pbes (which will be used for the atomic restore), | |
17023 | + * and free unneeded pages. | |
17024 | + */ | |
17025 | + memory_bm_position_reset(pageset1_copy_map); | |
17026 | + for (pfn = memory_bm_next_pfn(pageset1_copy_map); pfn != BM_END_OF_MAP; | |
17027 | + pfn = memory_bm_next_pfn(pageset1_copy_map)) { | |
17028 | + int is_high; | |
17029 | + page = pfn_to_page(pfn); | |
17030 | + is_high = PageHighMem(page); | |
17031 | + | |
17032 | + if (PagePageset1(page)) | |
17033 | + continue; | |
17034 | + | |
2380c486 JR |
17035 | + /* Nope. We're going to use this page. Add a pbe. */ |
17036 | + if (is_high || low_pages_for_highmem) { | |
17037 | + struct page *orig_page; | |
17038 | + high_pbes_done++; | |
17039 | + if (!is_high) | |
17040 | + low_pages_for_highmem--; | |
17041 | + do { | |
17042 | + orig_high_pfn = memory_bm_next_pfn(&dup_map1); | |
17043 | + BUG_ON(orig_high_pfn == BM_END_OF_MAP); | |
17044 | + orig_page = pfn_to_page(orig_high_pfn); | |
17045 | + } while (!PageHighMem(orig_page) || | |
5dd10c98 | 17046 | + PagePageset1Copy(orig_page)); |
2380c486 JR |
17047 | + |
17048 | + this_high_pbe->orig_address = orig_page; | |
17049 | + this_high_pbe->address = page; | |
17050 | + this_high_pbe->next = NULL; | |
17051 | + if (last_high_pbe_page != high_pbe_page) { | |
17052 | + *last_high_pbe_ptr = | |
17053 | + (struct pbe *) high_pbe_page; | |
17054 | + if (!last_high_pbe_page) | |
17055 | + last_high_pbe_page = high_pbe_page; | |
17056 | + } else | |
17057 | + *last_high_pbe_ptr = this_high_pbe; | |
17058 | + last_high_pbe_ptr = &this_high_pbe->next; | |
17059 | + if (last_high_pbe_page != high_pbe_page) { | |
17060 | + kunmap(last_high_pbe_page); | |
17061 | + last_high_pbe_page = high_pbe_page; | |
17062 | + } | |
17063 | + this_high_pbe = get_next_pbe(&high_pbe_page, | |
17064 | + this_high_pbe, 1); | |
17065 | + if (IS_ERR(this_high_pbe)) { | |
17066 | + printk(KERN_INFO | |
17067 | + "This high pbe is an error.\n"); | |
17068 | + return -ENOMEM; | |
17069 | + } | |
17070 | + } else { | |
17071 | + struct page *orig_page; | |
17072 | + low_pbes_done++; | |
17073 | + do { | |
17074 | + orig_low_pfn = memory_bm_next_pfn(&dup_map2); | |
17075 | + BUG_ON(orig_low_pfn == BM_END_OF_MAP); | |
17076 | + orig_page = pfn_to_page(orig_low_pfn); | |
17077 | + } while (PageHighMem(orig_page) || | |
5dd10c98 | 17078 | + PagePageset1Copy(orig_page)); |
2380c486 JR |
17079 | + |
17080 | + this_low_pbe->orig_address = page_address(orig_page); | |
17081 | + this_low_pbe->address = page_address(page); | |
17082 | + this_low_pbe->next = NULL; | |
17083 | + *last_low_pbe_ptr = this_low_pbe; | |
17084 | + last_low_pbe_ptr = &this_low_pbe->next; | |
17085 | + this_low_pbe = get_next_pbe(&low_pbe_page, | |
17086 | + this_low_pbe, 0); | |
17087 | + if (IS_ERR(this_low_pbe)) { | |
17088 | + printk(KERN_INFO "this_low_pbe is an error.\n"); | |
17089 | + return -ENOMEM; | |
17090 | + } | |
17091 | + } | |
17092 | + } | |
17093 | + | |
17094 | + if (high_pbe_page) | |
17095 | + kunmap(high_pbe_page); | |
17096 | + | |
17097 | + if (last_high_pbe_page != high_pbe_page) { | |
17098 | + if (last_high_pbe_page) | |
17099 | + kunmap(last_high_pbe_page); | |
17100 | + toi__free_page(29, high_pbe_page); | |
17101 | + } | |
17102 | + | |
17103 | + free_conflicting_pages(); | |
17104 | + | |
17105 | +out: | |
17106 | + memory_bm_free(&dup_map1, 0); | |
17107 | + memory_bm_free(&dup_map2, 0); | |
e999739a | 17108 | + |
2380c486 JR |
17109 | + return result; |
17110 | +} | |
17111 | + | |
17112 | +int add_boot_kernel_data_pbe(void) | |
17113 | +{ | |
17114 | + this_low_pbe->address = (char *) __toi_get_nonconflicting_page(); | |
17115 | + if (!this_low_pbe->address) { | |
17116 | + printk(KERN_INFO "Failed to get bkd atomic restore buffer."); | |
17117 | + return -ENOMEM; | |
17118 | + } | |
17119 | + | |
17120 | + toi_bkd.size = sizeof(toi_bkd); | |
17121 | + memcpy(this_low_pbe->address, &toi_bkd, sizeof(toi_bkd)); | |
17122 | + | |
17123 | + *last_low_pbe_ptr = this_low_pbe; | |
17124 | + this_low_pbe->orig_address = (char *) boot_kernel_data_buffer; | |
17125 | + this_low_pbe->next = NULL; | |
17126 | + return 0; | |
17127 | +} | |
17128 | diff --git a/kernel/power/tuxonice_pagedir.h b/kernel/power/tuxonice_pagedir.h | |
17129 | new file mode 100644 | |
5dd10c98 | 17130 | index 0000000..d08e4b1 |
2380c486 JR |
17131 | --- /dev/null |
17132 | +++ b/kernel/power/tuxonice_pagedir.h | |
17133 | @@ -0,0 +1,50 @@ | |
17134 | +/* | |
17135 | + * kernel/power/tuxonice_pagedir.h | |
17136 | + * | |
5dd10c98 | 17137 | + * Copyright (C) 2006-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
17138 | + * |
17139 | + * This file is released under the GPLv2. | |
17140 | + * | |
17141 | + * Declarations for routines for handling pagesets. | |
17142 | + */ | |
17143 | + | |
17144 | +#ifndef KERNEL_POWER_PAGEDIR_H | |
17145 | +#define KERNEL_POWER_PAGEDIR_H | |
17146 | + | |
17147 | +/* Pagedir | |
17148 | + * | |
17149 | + * Contains the metadata for a set of pages saved in the image. | |
17150 | + */ | |
17151 | + | |
17152 | +struct pagedir { | |
17153 | + int id; | |
92bca44c | 17154 | + unsigned long size; |
2380c486 | 17155 | +#ifdef CONFIG_HIGHMEM |
92bca44c | 17156 | + unsigned long size_high; |
2380c486 JR |
17157 | +#endif |
17158 | +}; | |
17159 | + | |
17160 | +#ifdef CONFIG_HIGHMEM | |
17161 | +#define get_highmem_size(pagedir) (pagedir.size_high) | |
17162 | +#define set_highmem_size(pagedir, sz) do { pagedir.size_high = sz; } while (0) | |
17163 | +#define inc_highmem_size(pagedir) do { pagedir.size_high++; } while (0) | |
17164 | +#define get_lowmem_size(pagedir) (pagedir.size - pagedir.size_high) | |
17165 | +#else | |
17166 | +#define get_highmem_size(pagedir) (0) | |
17167 | +#define set_highmem_size(pagedir, sz) do { } while (0) | |
17168 | +#define inc_highmem_size(pagedir) do { } while (0) | |
17169 | +#define get_lowmem_size(pagedir) (pagedir.size) | |
17170 | +#endif | |
17171 | + | |
17172 | +extern struct pagedir pagedir1, pagedir2; | |
17173 | + | |
17174 | +extern void toi_copy_pageset1(void); | |
17175 | + | |
17176 | +extern int toi_get_pageset1_load_addresses(void); | |
17177 | + | |
17178 | +extern unsigned long __toi_get_nonconflicting_page(void); | |
17179 | +struct page *___toi_get_nonconflicting_page(int can_be_highmem); | |
17180 | + | |
17181 | +extern void toi_reset_alt_image_pageset2_pfn(void); | |
17182 | +extern int add_boot_kernel_data_pbe(void); | |
17183 | +#endif | |
17184 | diff --git a/kernel/power/tuxonice_pageflags.c b/kernel/power/tuxonice_pageflags.c | |
17185 | new file mode 100644 | |
5dd10c98 | 17186 | index 0000000..e9ec5b5 |
2380c486 JR |
17187 | --- /dev/null |
17188 | +++ b/kernel/power/tuxonice_pageflags.c | |
9474138d | 17189 | @@ -0,0 +1,28 @@ |
2380c486 JR |
17190 | +/* |
17191 | + * kernel/power/tuxonice_pageflags.c | |
17192 | + * | |
5dd10c98 | 17193 | + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
17194 | + * |
17195 | + * This file is released under the GPLv2. | |
17196 | + * | |
17197 | + * Routines for serialising and relocating pageflags in which we | |
17198 | + * store our image metadata. | |
17199 | + */ | |
17200 | + | |
17201 | +#include <linux/list.h> | |
17202 | +#include "tuxonice_pageflags.h" | |
17203 | +#include "power.h" | |
17204 | + | |
17205 | +int toi_pageflags_space_needed(void) | |
17206 | +{ | |
17207 | + int total = 0; | |
17208 | + struct bm_block *bb; | |
17209 | + | |
17210 | + total = sizeof(unsigned int); | |
17211 | + | |
17212 | + list_for_each_entry(bb, &pageset1_map->blocks, hook) | |
17213 | + total += 2 * sizeof(unsigned long) + PAGE_SIZE; | |
17214 | + | |
17215 | + return total; | |
17216 | +} | |
9474138d | 17217 | +EXPORT_SYMBOL_GPL(toi_pageflags_space_needed); |
2380c486 JR |
17218 | diff --git a/kernel/power/tuxonice_pageflags.h b/kernel/power/tuxonice_pageflags.h |
17219 | new file mode 100644 | |
5dd10c98 | 17220 | index 0000000..d5aa7b1 |
2380c486 JR |
17221 | --- /dev/null |
17222 | +++ b/kernel/power/tuxonice_pageflags.h | |
9474138d | 17223 | @@ -0,0 +1,72 @@ |
2380c486 JR |
17224 | +/* |
17225 | + * kernel/power/tuxonice_pageflags.h | |
17226 | + * | |
5dd10c98 | 17227 | + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
17228 | + * |
17229 | + * This file is released under the GPLv2. | |
17230 | + */ | |
17231 | + | |
17232 | +#ifndef KERNEL_POWER_TUXONICE_PAGEFLAGS_H | |
17233 | +#define KERNEL_POWER_TUXONICE_PAGEFLAGS_H | |
17234 | + | |
2380c486 JR |
17235 | +extern struct memory_bitmap *pageset1_map; |
17236 | +extern struct memory_bitmap *pageset1_copy_map; | |
17237 | +extern struct memory_bitmap *pageset2_map; | |
17238 | +extern struct memory_bitmap *page_resave_map; | |
17239 | +extern struct memory_bitmap *io_map; | |
17240 | +extern struct memory_bitmap *nosave_map; | |
17241 | +extern struct memory_bitmap *free_map; | |
17242 | + | |
17243 | +#define PagePageset1(page) \ | |
17244 | + (memory_bm_test_bit(pageset1_map, page_to_pfn(page))) | |
17245 | +#define SetPagePageset1(page) \ | |
17246 | + (memory_bm_set_bit(pageset1_map, page_to_pfn(page))) | |
17247 | +#define ClearPagePageset1(page) \ | |
17248 | + (memory_bm_clear_bit(pageset1_map, page_to_pfn(page))) | |
17249 | + | |
17250 | +#define PagePageset1Copy(page) \ | |
17251 | + (memory_bm_test_bit(pageset1_copy_map, page_to_pfn(page))) | |
17252 | +#define SetPagePageset1Copy(page) \ | |
17253 | + (memory_bm_set_bit(pageset1_copy_map, page_to_pfn(page))) | |
17254 | +#define ClearPagePageset1Copy(page) \ | |
17255 | + (memory_bm_clear_bit(pageset1_copy_map, page_to_pfn(page))) | |
17256 | + | |
17257 | +#define PagePageset2(page) \ | |
17258 | + (memory_bm_test_bit(pageset2_map, page_to_pfn(page))) | |
17259 | +#define SetPagePageset2(page) \ | |
17260 | + (memory_bm_set_bit(pageset2_map, page_to_pfn(page))) | |
17261 | +#define ClearPagePageset2(page) \ | |
17262 | + (memory_bm_clear_bit(pageset2_map, page_to_pfn(page))) | |
17263 | + | |
17264 | +#define PageWasRW(page) \ | |
17265 | + (memory_bm_test_bit(pageset2_map, page_to_pfn(page))) | |
17266 | +#define SetPageWasRW(page) \ | |
17267 | + (memory_bm_set_bit(pageset2_map, page_to_pfn(page))) | |
17268 | +#define ClearPageWasRW(page) \ | |
17269 | + (memory_bm_clear_bit(pageset2_map, page_to_pfn(page))) | |
17270 | + | |
17271 | +#define PageResave(page) (page_resave_map ? \ | |
17272 | + memory_bm_test_bit(page_resave_map, page_to_pfn(page)) : 0) | |
17273 | +#define SetPageResave(page) \ | |
17274 | + (memory_bm_set_bit(page_resave_map, page_to_pfn(page))) | |
17275 | +#define ClearPageResave(page) \ | |
17276 | + (memory_bm_clear_bit(page_resave_map, page_to_pfn(page))) | |
17277 | + | |
17278 | +#define PageNosave(page) (nosave_map ? \ | |
17279 | + memory_bm_test_bit(nosave_map, page_to_pfn(page)) : 0) | |
17280 | +#define SetPageNosave(page) \ | |
17281 | + (memory_bm_set_bit(nosave_map, page_to_pfn(page))) | |
17282 | +#define ClearPageNosave(page) \ | |
17283 | + (memory_bm_clear_bit(nosave_map, page_to_pfn(page))) | |
17284 | + | |
17285 | +#define PageNosaveFree(page) (free_map ? \ | |
17286 | + memory_bm_test_bit(free_map, page_to_pfn(page)) : 0) | |
17287 | +#define SetPageNosaveFree(page) \ | |
17288 | + (memory_bm_set_bit(free_map, page_to_pfn(page))) | |
17289 | +#define ClearPageNosaveFree(page) \ | |
17290 | + (memory_bm_clear_bit(free_map, page_to_pfn(page))) | |
17291 | + | |
17292 | +extern void save_pageflags(struct memory_bitmap *pagemap); | |
17293 | +extern int load_pageflags(struct memory_bitmap *pagemap); | |
17294 | +extern int toi_pageflags_space_needed(void); | |
17295 | +#endif | |
17296 | diff --git a/kernel/power/tuxonice_power_off.c b/kernel/power/tuxonice_power_off.c | |
17297 | new file mode 100644 | |
5dd10c98 | 17298 | index 0000000..07e39c0 |
2380c486 JR |
17299 | --- /dev/null |
17300 | +++ b/kernel/power/tuxonice_power_off.c | |
7e46296a | 17301 | @@ -0,0 +1,285 @@ |
2380c486 JR |
17302 | +/* |
17303 | + * kernel/power/tuxonice_power_off.c | |
17304 | + * | |
5dd10c98 | 17305 | + * Copyright (C) 2006-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
17306 | + * |
17307 | + * This file is released under the GPLv2. | |
17308 | + * | |
17309 | + * Support for powering down. | |
17310 | + */ | |
17311 | + | |
17312 | +#include <linux/device.h> | |
17313 | +#include <linux/suspend.h> | |
17314 | +#include <linux/mm.h> | |
17315 | +#include <linux/pm.h> | |
17316 | +#include <linux/reboot.h> | |
17317 | +#include <linux/cpu.h> | |
17318 | +#include <linux/console.h> | |
17319 | +#include <linux/fs.h> | |
17320 | +#include "tuxonice.h" | |
17321 | +#include "tuxonice_ui.h" | |
17322 | +#include "tuxonice_power_off.h" | |
17323 | +#include "tuxonice_sysfs.h" | |
17324 | +#include "tuxonice_modules.h" | |
17325 | +#include "tuxonice_io.h" | |
17326 | + | |
17327 | +unsigned long toi_poweroff_method; /* 0 - Kernel power off */ | |
17328 | +EXPORT_SYMBOL_GPL(toi_poweroff_method); | |
17329 | + | |
17330 | +static int wake_delay; | |
17331 | +static char lid_state_file[256], wake_alarm_dir[256]; | |
17332 | +static struct file *lid_file, *alarm_file, *epoch_file; | |
17333 | +static int post_wake_state = -1; | |
17334 | + | |
17335 | +static int did_suspend_to_both; | |
17336 | + | |
17337 | +/* | |
17338 | + * __toi_power_down | |
17339 | + * Functionality : Powers down or reboots the computer once the image | |
17340 | + * has been written to disk. | |
17341 | + * Key Assumptions : Able to reboot/power down via code called or that | |
17342 | + * the warning emitted if the calls fail will be visible | |
17343 | + * to the user (ie printk resumes devices). | |
17344 | + */ | |
17345 | + | |
17346 | +static void __toi_power_down(int method) | |
17347 | +{ | |
17348 | + int error; | |
17349 | + | |
17350 | + toi_cond_pause(1, test_action_state(TOI_REBOOT) ? "Ready to reboot." : | |
17351 | + "Powering down."); | |
17352 | + | |
17353 | + if (test_result_state(TOI_ABORTED)) | |
17354 | + goto out; | |
17355 | + | |
17356 | + if (test_action_state(TOI_REBOOT)) | |
17357 | + kernel_restart(NULL); | |
17358 | + | |
17359 | + switch (method) { | |
17360 | + case 0: | |
17361 | + break; | |
17362 | + case 3: | |
17363 | + /* | |
17364 | + * Re-read the overwritten part of pageset2 to make post-resume | |
17365 | + * faster. | |
17366 | + */ | |
17367 | + if (read_pageset2(1)) | |
e999739a | 17368 | + panic("Attempt to reload pagedir 2 failed. " |
17369 | + "Try rebooting."); | |
2380c486 | 17370 | + |
7e46296a AM |
17371 | + pm_prepare_console(); |
17372 | + | |
2380c486 JR |
17373 | + error = pm_notifier_call_chain(PM_SUSPEND_PREPARE); |
17374 | + if (!error) { | |
17375 | + error = suspend_devices_and_enter(PM_SUSPEND_MEM); | |
17376 | + if (!error) | |
17377 | + did_suspend_to_both = 1; | |
17378 | + } | |
17379 | + pm_notifier_call_chain(PM_POST_SUSPEND); | |
7e46296a | 17380 | + pm_restore_console(); |
2380c486 JR |
17381 | + |
17382 | + /* Success - we're now post-resume-from-ram */ | |
17383 | + if (did_suspend_to_both) | |
17384 | + return; | |
17385 | + | |
17386 | + /* Failed to suspend to ram - do normal power off */ | |
17387 | + break; | |
17388 | + case 4: | |
17389 | + /* | |
17390 | + * If succeeds, doesn't return. If fails, do a simple | |
17391 | + * powerdown. | |
17392 | + */ | |
17393 | + hibernation_platform_enter(); | |
17394 | + break; | |
17395 | + case 5: | |
17396 | + /* Historic entry only now */ | |
17397 | + break; | |
17398 | + } | |
17399 | + | |
17400 | + if (method && method != 5) | |
17401 | + toi_cond_pause(1, | |
17402 | + "Falling back to alternate power off method."); | |
17403 | + | |
17404 | + if (test_result_state(TOI_ABORTED)) | |
17405 | + goto out; | |
17406 | + | |
17407 | + kernel_power_off(); | |
17408 | + kernel_halt(); | |
17409 | + toi_cond_pause(1, "Powerdown failed."); | |
17410 | + while (1) | |
17411 | + cpu_relax(); | |
17412 | + | |
17413 | +out: | |
17414 | + if (read_pageset2(1)) | |
17415 | + panic("Attempt to reload pagedir 2 failed. Try rebooting."); | |
17416 | + return; | |
17417 | +} | |
17418 | + | |
17419 | +#define CLOSE_FILE(file) \ | |
17420 | + if (file) { \ | |
17421 | + filp_close(file, NULL); file = NULL; \ | |
17422 | + } | |
17423 | + | |
17424 | +static void powerdown_cleanup(int toi_or_resume) | |
17425 | +{ | |
17426 | + if (!toi_or_resume) | |
17427 | + return; | |
17428 | + | |
17429 | + CLOSE_FILE(lid_file); | |
17430 | + CLOSE_FILE(alarm_file); | |
17431 | + CLOSE_FILE(epoch_file); | |
17432 | +} | |
17433 | + | |
17434 | +static void open_file(char *format, char *arg, struct file **var, int mode, | |
17435 | + char *desc) | |
17436 | +{ | |
17437 | + char buf[256]; | |
17438 | + | |
17439 | + if (strlen(arg)) { | |
17440 | + sprintf(buf, format, arg); | |
17441 | + *var = filp_open(buf, mode, 0); | |
17442 | + if (IS_ERR(*var) || !*var) { | |
17443 | + printk(KERN_INFO "Failed to open %s file '%s' (%p).\n", | |
17444 | + desc, buf, *var); | |
17445 | + *var = NULL; | |
17446 | + } | |
17447 | + } | |
17448 | +} | |
17449 | + | |
17450 | +static int powerdown_init(int toi_or_resume) | |
17451 | +{ | |
17452 | + if (!toi_or_resume) | |
17453 | + return 0; | |
17454 | + | |
17455 | + did_suspend_to_both = 0; | |
17456 | + | |
17457 | + open_file("/proc/acpi/button/%s/state", lid_state_file, &lid_file, | |
17458 | + O_RDONLY, "lid"); | |
17459 | + | |
17460 | + if (strlen(wake_alarm_dir)) { | |
17461 | + open_file("/sys/class/rtc/%s/wakealarm", wake_alarm_dir, | |
17462 | + &alarm_file, O_WRONLY, "alarm"); | |
17463 | + | |
17464 | + open_file("/sys/class/rtc/%s/since_epoch", wake_alarm_dir, | |
17465 | + &epoch_file, O_RDONLY, "epoch"); | |
17466 | + } | |
17467 | + | |
17468 | + return 0; | |
17469 | +} | |
17470 | + | |
17471 | +static int lid_closed(void) | |
17472 | +{ | |
17473 | + char array[25]; | |
17474 | + ssize_t size; | |
17475 | + loff_t pos = 0; | |
17476 | + | |
17477 | + if (!lid_file) | |
17478 | + return 0; | |
17479 | + | |
17480 | + size = vfs_read(lid_file, (char __user *) array, 25, &pos); | |
17481 | + if ((int) size < 1) { | |
17482 | + printk(KERN_INFO "Failed to read lid state file (%d).\n", | |
17483 | + (int) size); | |
17484 | + return 0; | |
17485 | + } | |
17486 | + | |
17487 | + if (!strcmp(array, "state: closed\n")) | |
17488 | + return 1; | |
17489 | + | |
17490 | + return 0; | |
17491 | +} | |
17492 | + | |
17493 | +static void write_alarm_file(int value) | |
17494 | +{ | |
17495 | + ssize_t size; | |
17496 | + char buf[40]; | |
17497 | + loff_t pos = 0; | |
17498 | + | |
17499 | + if (!alarm_file) | |
17500 | + return; | |
17501 | + | |
17502 | + sprintf(buf, "%d\n", value); | |
17503 | + | |
17504 | + size = vfs_write(alarm_file, (char __user *)buf, strlen(buf), &pos); | |
17505 | + | |
17506 | + if (size < 0) | |
17507 | + printk(KERN_INFO "Error %d writing alarm value %s.\n", | |
17508 | + (int) size, buf); | |
17509 | +} | |
17510 | + | |
17511 | +/** | |
17512 | + * toi_check_resleep: See whether to powerdown again after waking. | |
17513 | + * | |
17514 | + * After waking, check whether we should powerdown again in a (usually | |
17515 | + * different) way. We only do this if the lid switch is still closed. | |
17516 | + */ | |
17517 | +void toi_check_resleep(void) | |
17518 | +{ | |
17519 | + /* We only return if we suspended to ram and woke. */ | |
17520 | + if (lid_closed() && post_wake_state >= 0) | |
17521 | + __toi_power_down(post_wake_state); | |
17522 | +} | |
17523 | + | |
17524 | +void toi_power_down(void) | |
17525 | +{ | |
17526 | + if (alarm_file && wake_delay) { | |
17527 | + char array[25]; | |
17528 | + loff_t pos = 0; | |
17529 | + size_t size = vfs_read(epoch_file, (char __user *) array, 25, | |
17530 | + &pos); | |
17531 | + | |
17532 | + if (((int) size) < 1) | |
17533 | + printk(KERN_INFO "Failed to read epoch file (%d).\n", | |
17534 | + (int) size); | |
17535 | + else { | |
9474138d AM |
17536 | + unsigned long since_epoch; |
17537 | + if (!strict_strtoul(array, 0, &since_epoch)) { | |
17538 | + /* Clear any wakeup time. */ | |
17539 | + write_alarm_file(0); | |
2380c486 | 17540 | + |
9474138d AM |
17541 | + /* Set new wakeup time. */ |
17542 | + write_alarm_file(since_epoch + wake_delay); | |
17543 | + } | |
2380c486 JR |
17544 | + } |
17545 | + } | |
17546 | + | |
17547 | + __toi_power_down(toi_poweroff_method); | |
17548 | + | |
17549 | + toi_check_resleep(); | |
17550 | +} | |
17551 | +EXPORT_SYMBOL_GPL(toi_power_down); | |
17552 | + | |
17553 | +static struct toi_sysfs_data sysfs_params[] = { | |
17554 | +#if defined(CONFIG_ACPI) | |
17555 | + SYSFS_STRING("lid_file", SYSFS_RW, lid_state_file, 256, 0, NULL), | |
17556 | + SYSFS_INT("wake_delay", SYSFS_RW, &wake_delay, 0, INT_MAX, 0, NULL), | |
17557 | + SYSFS_STRING("wake_alarm_dir", SYSFS_RW, wake_alarm_dir, 256, 0, NULL), | |
17558 | + SYSFS_INT("post_wake_state", SYSFS_RW, &post_wake_state, -1, 5, 0, | |
17559 | + NULL), | |
17560 | + SYSFS_UL("powerdown_method", SYSFS_RW, &toi_poweroff_method, 0, 5, 0), | |
17561 | + SYSFS_INT("did_suspend_to_both", SYSFS_READONLY, &did_suspend_to_both, | |
17562 | + 0, 0, 0, NULL) | |
17563 | +#endif | |
17564 | +}; | |
17565 | + | |
17566 | +static struct toi_module_ops powerdown_ops = { | |
17567 | + .type = MISC_HIDDEN_MODULE, | |
17568 | + .name = "poweroff", | |
17569 | + .initialise = powerdown_init, | |
17570 | + .cleanup = powerdown_cleanup, | |
17571 | + .directory = "[ROOT]", | |
17572 | + .module = THIS_MODULE, | |
17573 | + .sysfs_data = sysfs_params, | |
17574 | + .num_sysfs_entries = sizeof(sysfs_params) / | |
17575 | + sizeof(struct toi_sysfs_data), | |
17576 | +}; | |
17577 | + | |
17578 | +int toi_poweroff_init(void) | |
17579 | +{ | |
17580 | + return toi_register_module(&powerdown_ops); | |
17581 | +} | |
17582 | + | |
17583 | +void toi_poweroff_exit(void) | |
17584 | +{ | |
17585 | + toi_unregister_module(&powerdown_ops); | |
17586 | +} | |
17587 | diff --git a/kernel/power/tuxonice_power_off.h b/kernel/power/tuxonice_power_off.h | |
17588 | new file mode 100644 | |
5dd10c98 | 17589 | index 0000000..9aa0ea8 |
2380c486 JR |
17590 | --- /dev/null |
17591 | +++ b/kernel/power/tuxonice_power_off.h | |
17592 | @@ -0,0 +1,24 @@ | |
17593 | +/* | |
17594 | + * kernel/power/tuxonice_power_off.h | |
17595 | + * | |
5dd10c98 | 17596 | + * Copyright (C) 2006-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
17597 | + * |
17598 | + * This file is released under the GPLv2. | |
17599 | + * | |
17600 | + * Support for the powering down. | |
17601 | + */ | |
17602 | + | |
17603 | +int toi_pm_state_finish(void); | |
17604 | +void toi_power_down(void); | |
17605 | +extern unsigned long toi_poweroff_method; | |
17606 | +int toi_poweroff_init(void); | |
17607 | +void toi_poweroff_exit(void); | |
17608 | +void toi_check_resleep(void); | |
17609 | + | |
17610 | +extern int platform_begin(int platform_mode); | |
17611 | +extern int platform_pre_snapshot(int platform_mode); | |
17612 | +extern void platform_leave(int platform_mode); | |
17613 | +extern void platform_end(int platform_mode); | |
17614 | +extern void platform_finish(int platform_mode); | |
17615 | +extern int platform_pre_restore(int platform_mode); | |
17616 | +extern void platform_restore_cleanup(int platform_mode); | |
17617 | diff --git a/kernel/power/tuxonice_prepare_image.c b/kernel/power/tuxonice_prepare_image.c | |
17618 | new file mode 100644 | |
cacc47f8 | 17619 | index 0000000..9f74df0 |
2380c486 JR |
17620 | --- /dev/null |
17621 | +++ b/kernel/power/tuxonice_prepare_image.c | |
cacc47f8 | 17622 | @@ -0,0 +1,1107 @@ |
2380c486 JR |
17623 | +/* |
17624 | + * kernel/power/tuxonice_prepare_image.c | |
17625 | + * | |
5dd10c98 | 17626 | + * Copyright (C) 2003-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
17627 | + * |
17628 | + * This file is released under the GPLv2. | |
17629 | + * | |
17630 | + * We need to eat memory until we can: | |
17631 | + * 1. Perform the save without changing anything (RAM_NEEDED < #pages) | |
17632 | + * 2. Fit it all in available space (toiActiveAllocator->available_space() >= | |
17633 | + * main_storage_needed()) | |
17634 | + * 3. Reload the pagedir and pageset1 to places that don't collide with their | |
17635 | + * final destinations, not knowing to what extent the resumed kernel will | |
17636 | + * overlap with the one loaded at boot time. I think the resumed kernel | |
17637 | + * should overlap completely, but I don't want to rely on this as it is | |
17638 | + * an unproven assumption. We therefore assume there will be no overlap at | |
17639 | + * all (worse case). | |
17640 | + * 4. Meet the user's requested limit (if any) on the size of the image. | |
17641 | + * The limit is in MB, so pages/256 (assuming 4K pages). | |
17642 | + * | |
17643 | + */ | |
17644 | + | |
2380c486 JR |
17645 | +#include <linux/highmem.h> |
17646 | +#include <linux/freezer.h> | |
17647 | +#include <linux/hardirq.h> | |
17648 | +#include <linux/mmzone.h> | |
17649 | +#include <linux/console.h> | |
17650 | + | |
17651 | +#include "tuxonice_pageflags.h" | |
17652 | +#include "tuxonice_modules.h" | |
17653 | +#include "tuxonice_io.h" | |
17654 | +#include "tuxonice_ui.h" | |
2380c486 | 17655 | +#include "tuxonice_prepare_image.h" |
2380c486 | 17656 | +#include "tuxonice.h" |
7e46296a | 17657 | +#include "tuxonice_extent.h" |
2380c486 JR |
17658 | +#include "tuxonice_checksum.h" |
17659 | +#include "tuxonice_sysfs.h" | |
17660 | +#include "tuxonice_alloc.h" | |
17661 | +#include "tuxonice_atomic_copy.h" | |
cacc47f8 | 17662 | +#include "tuxonice_builtin.h" |
2380c486 | 17663 | + |
92bca44c | 17664 | +static unsigned long num_nosave, main_storage_allocated, storage_limit, |
0ada99ac | 17665 | + header_storage_needed; |
7e46296a AM |
17666 | +unsigned long extra_pd1_pages_allowance = |
17667 | + CONFIG_TOI_DEFAULT_EXTRA_PAGES_ALLOWANCE; | |
17668 | +long image_size_limit; | |
2380c486 JR |
17669 | +static int no_ps2_needed; |
17670 | + | |
17671 | +struct attention_list { | |
17672 | + struct task_struct *task; | |
17673 | + struct attention_list *next; | |
17674 | +}; | |
17675 | + | |
17676 | +static struct attention_list *attention_list; | |
17677 | + | |
17678 | +#define PAGESET1 0 | |
17679 | +#define PAGESET2 1 | |
17680 | + | |
17681 | +void free_attention_list(void) | |
17682 | +{ | |
17683 | + struct attention_list *last = NULL; | |
17684 | + | |
17685 | + while (attention_list) { | |
17686 | + last = attention_list; | |
17687 | + attention_list = attention_list->next; | |
9474138d | 17688 | + toi_kfree(6, last, sizeof(*last)); |
2380c486 JR |
17689 | + } |
17690 | +} | |
17691 | + | |
17692 | +static int build_attention_list(void) | |
17693 | +{ | |
17694 | + int i, task_count = 0; | |
17695 | + struct task_struct *p; | |
17696 | + struct attention_list *next; | |
17697 | + | |
17698 | + /* | |
17699 | + * Count all userspace process (with task->mm) marked PF_NOFREEZE. | |
17700 | + */ | |
cacc47f8 | 17701 | + toi_read_lock_tasklist(); |
2380c486 JR |
17702 | + for_each_process(p) |
17703 | + if ((p->flags & PF_NOFREEZE) || p == current) | |
17704 | + task_count++; | |
cacc47f8 | 17705 | + toi_read_unlock_tasklist(); |
2380c486 JR |
17706 | + |
17707 | + /* | |
17708 | + * Allocate attention list structs. | |
17709 | + */ | |
17710 | + for (i = 0; i < task_count; i++) { | |
17711 | + struct attention_list *this = | |
17712 | + toi_kzalloc(6, sizeof(struct attention_list), | |
17713 | + TOI_WAIT_GFP); | |
17714 | + if (!this) { | |
17715 | + printk(KERN_INFO "Failed to allocate slab for " | |
17716 | + "attention list.\n"); | |
17717 | + free_attention_list(); | |
17718 | + return 1; | |
17719 | + } | |
17720 | + this->next = NULL; | |
17721 | + if (attention_list) | |
17722 | + this->next = attention_list; | |
17723 | + attention_list = this; | |
17724 | + } | |
17725 | + | |
17726 | + next = attention_list; | |
cacc47f8 | 17727 | + toi_read_lock_tasklist(); |
2380c486 JR |
17728 | + for_each_process(p) |
17729 | + if ((p->flags & PF_NOFREEZE) || p == current) { | |
17730 | + next->task = p; | |
17731 | + next = next->next; | |
17732 | + } | |
cacc47f8 | 17733 | + toi_read_unlock_tasklist(); |
2380c486 JR |
17734 | + return 0; |
17735 | +} | |
17736 | + | |
17737 | +static void pageset2_full(void) | |
17738 | +{ | |
17739 | + struct zone *zone; | |
17740 | + struct page *page; | |
17741 | + unsigned long flags; | |
17742 | + int i; | |
17743 | + | |
92bca44c | 17744 | + for_each_populated_zone(zone) { |
2380c486 JR |
17745 | + spin_lock_irqsave(&zone->lru_lock, flags); |
17746 | + for_each_lru(i) { | |
17747 | + if (!zone_page_state(zone, NR_LRU_BASE + i)) | |
17748 | + continue; | |
17749 | + | |
e999739a | 17750 | + list_for_each_entry(page, &zone->lru[i].list, lru) { |
17751 | + struct address_space *mapping; | |
17752 | + | |
17753 | + mapping = page_mapping(page); | |
17754 | + if (!mapping || !mapping->host || | |
17755 | + !(mapping->host->i_flags & S_ATOMIC_COPY)) | |
17756 | + SetPagePageset2(page); | |
17757 | + } | |
2380c486 JR |
17758 | + } |
17759 | + spin_unlock_irqrestore(&zone->lru_lock, flags); | |
17760 | + } | |
17761 | +} | |
17762 | + | |
17763 | +/* | |
17764 | + * toi_mark_task_as_pageset | |
17765 | + * Functionality : Marks all the saveable pages belonging to a given process | |
17766 | + * as belonging to a particular pageset. | |
17767 | + */ | |
17768 | + | |
17769 | +static void toi_mark_task_as_pageset(struct task_struct *t, int pageset2) | |
17770 | +{ | |
17771 | + struct vm_area_struct *vma; | |
17772 | + struct mm_struct *mm; | |
17773 | + | |
17774 | + mm = t->active_mm; | |
17775 | + | |
17776 | + if (!mm || !mm->mmap) | |
17777 | + return; | |
17778 | + | |
17779 | + if (!irqs_disabled()) | |
17780 | + down_read(&mm->mmap_sem); | |
17781 | + | |
17782 | + for (vma = mm->mmap; vma; vma = vma->vm_next) { | |
17783 | + unsigned long posn; | |
17784 | + | |
92bca44c AM |
17785 | + if (!vma->vm_start || |
17786 | + vma->vm_flags & (VM_IO | VM_RESERVED | VM_PFNMAP)) | |
2380c486 JR |
17787 | + continue; |
17788 | + | |
17789 | + for (posn = vma->vm_start; posn < vma->vm_end; | |
17790 | + posn += PAGE_SIZE) { | |
17791 | + struct page *page = follow_page(vma, posn, 0); | |
e999739a | 17792 | + struct address_space *mapping; |
17793 | + | |
17794 | + if (!page || !pfn_valid(page_to_pfn(page))) | |
17795 | + continue; | |
17796 | + | |
17797 | + mapping = page_mapping(page); | |
17798 | + if (mapping && mapping->host && | |
17799 | + mapping->host->i_flags & S_ATOMIC_COPY) | |
2380c486 JR |
17800 | + continue; |
17801 | + | |
17802 | + if (pageset2) | |
17803 | + SetPagePageset2(page); | |
17804 | + else { | |
17805 | + ClearPagePageset2(page); | |
17806 | + SetPagePageset1(page); | |
17807 | + } | |
17808 | + } | |
17809 | + } | |
17810 | + | |
17811 | + if (!irqs_disabled()) | |
17812 | + up_read(&mm->mmap_sem); | |
17813 | +} | |
17814 | + | |
e999739a | 17815 | +static void mark_tasks(int pageset) |
17816 | +{ | |
17817 | + struct task_struct *p; | |
17818 | + | |
cacc47f8 | 17819 | + toi_read_lock_tasklist(); |
e999739a | 17820 | + for_each_process(p) { |
17821 | + if (!p->mm) | |
17822 | + continue; | |
17823 | + | |
17824 | + if (p->flags & PF_KTHREAD) | |
17825 | + continue; | |
17826 | + | |
17827 | + toi_mark_task_as_pageset(p, pageset); | |
17828 | + } | |
cacc47f8 | 17829 | + toi_read_unlock_tasklist(); |
e999739a | 17830 | + |
17831 | +} | |
17832 | + | |
2380c486 JR |
17833 | +/* mark_pages_for_pageset2 |
17834 | + * | |
17835 | + * Description: Mark unshared pages in processes not needed for hibernate as | |
17836 | + * being able to be written out in a separate pagedir. | |
17837 | + * HighMem pages are simply marked as pageset2. They won't be | |
17838 | + * needed during hibernate. | |
17839 | + */ | |
17840 | + | |
17841 | +static void toi_mark_pages_for_pageset2(void) | |
17842 | +{ | |
2380c486 JR |
17843 | + struct attention_list *this = attention_list; |
17844 | + | |
17845 | + memory_bm_clear(pageset2_map); | |
17846 | + | |
17847 | + if (test_action_state(TOI_NO_PAGESET2) || no_ps2_needed) | |
17848 | + return; | |
17849 | + | |
17850 | + if (test_action_state(TOI_PAGESET2_FULL)) | |
17851 | + pageset2_full(); | |
e999739a | 17852 | + else |
17853 | + mark_tasks(PAGESET2); | |
2380c486 JR |
17854 | + |
17855 | + /* | |
17856 | + * Because the tasks in attention_list are ones related to hibernating, | |
17857 | + * we know that they won't go away under us. | |
17858 | + */ | |
17859 | + | |
17860 | + while (this) { | |
17861 | + if (!test_result_state(TOI_ABORTED)) | |
17862 | + toi_mark_task_as_pageset(this->task, PAGESET1); | |
17863 | + this = this->next; | |
17864 | + } | |
17865 | +} | |
17866 | + | |
17867 | +/* | |
17868 | + * The atomic copy of pageset1 is stored in pageset2 pages. | |
17869 | + * But if pageset1 is larger (normally only just after boot), | |
17870 | + * we need to allocate extra pages to store the atomic copy. | |
17871 | + * The following data struct and functions are used to handle | |
17872 | + * the allocation and freeing of that memory. | |
17873 | + */ | |
17874 | + | |
92bca44c | 17875 | +static unsigned long extra_pages_allocated; |
2380c486 JR |
17876 | + |
17877 | +struct extras { | |
17878 | + struct page *page; | |
17879 | + int order; | |
17880 | + struct extras *next; | |
17881 | +}; | |
17882 | + | |
17883 | +static struct extras *extras_list; | |
17884 | + | |
17885 | +/* toi_free_extra_pagedir_memory | |
17886 | + * | |
17887 | + * Description: Free previously allocated extra pagedir memory. | |
17888 | + */ | |
17889 | +void toi_free_extra_pagedir_memory(void) | |
17890 | +{ | |
17891 | + /* Free allocated pages */ | |
17892 | + while (extras_list) { | |
17893 | + struct extras *this = extras_list; | |
17894 | + int i; | |
17895 | + | |
17896 | + extras_list = this->next; | |
17897 | + | |
17898 | + for (i = 0; i < (1 << this->order); i++) | |
17899 | + ClearPageNosave(this->page + i); | |
17900 | + | |
17901 | + toi_free_pages(9, this->page, this->order); | |
9474138d | 17902 | + toi_kfree(7, this, sizeof(*this)); |
2380c486 JR |
17903 | + } |
17904 | + | |
17905 | + extra_pages_allocated = 0; | |
17906 | +} | |
17907 | + | |
17908 | +/* toi_allocate_extra_pagedir_memory | |
17909 | + * | |
17910 | + * Description: Allocate memory for making the atomic copy of pagedir1 in the | |
17911 | + * case where it is bigger than pagedir2. | |
17912 | + * Arguments: int num_to_alloc: Number of extra pages needed. | |
17913 | + * Result: int. Number of extra pages we now have allocated. | |
17914 | + */ | |
17915 | +static int toi_allocate_extra_pagedir_memory(int extra_pages_needed) | |
17916 | +{ | |
17917 | + int j, order, num_to_alloc = extra_pages_needed - extra_pages_allocated; | |
17918 | + gfp_t flags = TOI_ATOMIC_GFP; | |
17919 | + | |
17920 | + if (num_to_alloc < 1) | |
17921 | + return 0; | |
17922 | + | |
17923 | + order = fls(num_to_alloc); | |
17924 | + if (order >= MAX_ORDER) | |
17925 | + order = MAX_ORDER - 1; | |
17926 | + | |
17927 | + while (num_to_alloc) { | |
17928 | + struct page *newpage; | |
17929 | + unsigned long virt; | |
17930 | + struct extras *extras_entry; | |
17931 | + | |
17932 | + while ((1 << order) > num_to_alloc) | |
17933 | + order--; | |
17934 | + | |
17935 | + extras_entry = (struct extras *) toi_kzalloc(7, | |
17936 | + sizeof(struct extras), TOI_ATOMIC_GFP); | |
17937 | + | |
17938 | + if (!extras_entry) | |
17939 | + return extra_pages_allocated; | |
17940 | + | |
17941 | + virt = toi_get_free_pages(9, flags, order); | |
17942 | + while (!virt && order) { | |
17943 | + order--; | |
17944 | + virt = toi_get_free_pages(9, flags, order); | |
17945 | + } | |
17946 | + | |
17947 | + if (!virt) { | |
9474138d | 17948 | + toi_kfree(7, extras_entry, sizeof(*extras_entry)); |
2380c486 JR |
17949 | + return extra_pages_allocated; |
17950 | + } | |
17951 | + | |
17952 | + newpage = virt_to_page(virt); | |
17953 | + | |
17954 | + extras_entry->page = newpage; | |
17955 | + extras_entry->order = order; | |
17956 | + extras_entry->next = NULL; | |
17957 | + | |
17958 | + if (extras_list) | |
17959 | + extras_entry->next = extras_list; | |
17960 | + | |
17961 | + extras_list = extras_entry; | |
17962 | + | |
17963 | + for (j = 0; j < (1 << order); j++) { | |
17964 | + SetPageNosave(newpage + j); | |
17965 | + SetPagePageset1Copy(newpage + j); | |
17966 | + } | |
17967 | + | |
17968 | + extra_pages_allocated += (1 << order); | |
17969 | + num_to_alloc -= (1 << order); | |
17970 | + } | |
17971 | + | |
17972 | + return extra_pages_allocated; | |
17973 | +} | |
17974 | + | |
17975 | +/* | |
17976 | + * real_nr_free_pages: Count pcp pages for a zone type or all zones | |
17977 | + * (-1 for all, otherwise zone_idx() result desired). | |
17978 | + */ | |
92bca44c | 17979 | +unsigned long real_nr_free_pages(unsigned long zone_idx_mask) |
2380c486 JR |
17980 | +{ |
17981 | + struct zone *zone; | |
17982 | + int result = 0, cpu; | |
17983 | + | |
17984 | + /* PCP lists */ | |
9474138d | 17985 | + for_each_populated_zone(zone) { |
2380c486 JR |
17986 | + if (!(zone_idx_mask & (1 << zone_idx(zone)))) |
17987 | + continue; | |
17988 | + | |
17989 | + for_each_online_cpu(cpu) { | |
de6743ae AM |
17990 | + struct per_cpu_pageset *pset = |
17991 | + per_cpu_ptr(zone->pageset, cpu); | |
2380c486 JR |
17992 | + struct per_cpu_pages *pcp = &pset->pcp; |
17993 | + result += pcp->count; | |
17994 | + } | |
17995 | + | |
17996 | + result += zone_page_state(zone, NR_FREE_PAGES); | |
17997 | + } | |
17998 | + return result; | |
17999 | +} | |
18000 | +EXPORT_SYMBOL_GPL(real_nr_free_pages); | |
18001 | + | |
18002 | +/* | |
18003 | + * Discover how much extra memory will be required by the drivers | |
18004 | + * when they're asked to hibernate. We can then ensure that amount | |
18005 | + * of memory is available when we really want it. | |
18006 | + */ | |
18007 | +static void get_extra_pd1_allowance(void) | |
18008 | +{ | |
92bca44c | 18009 | + unsigned long orig_num_free = real_nr_free_pages(all_zones_mask), final; |
2380c486 JR |
18010 | + |
18011 | + toi_prepare_status(CLEAR_BAR, "Finding allowance for drivers."); | |
18012 | + | |
92bca44c AM |
18013 | + if (toi_go_atomic(PMSG_FREEZE, 1)) |
18014 | + return; | |
2380c486 | 18015 | + |
92bca44c AM |
18016 | + final = real_nr_free_pages(all_zones_mask); |
18017 | + toi_end_atomic(ATOMIC_ALL_STEPS, 1, 0); | |
18018 | + | |
18019 | + extra_pd1_pages_allowance = (orig_num_free > final) ? | |
18020 | + orig_num_free - final + MIN_EXTRA_PAGES_ALLOWANCE : | |
18021 | + MIN_EXTRA_PAGES_ALLOWANCE; | |
2380c486 JR |
18022 | +} |
18023 | + | |
18024 | +/* | |
18025 | + * Amount of storage needed, possibly taking into account the | |
18026 | + * expected compression ratio and possibly also ignoring our | |
18027 | + * allowance for extra pages. | |
18028 | + */ | |
92bca44c | 18029 | +static unsigned long main_storage_needed(int use_ecr, |
2380c486 JR |
18030 | + int ignore_extra_pd1_allow) |
18031 | +{ | |
18032 | + return (pagedir1.size + pagedir2.size + | |
18033 | + (ignore_extra_pd1_allow ? 0 : extra_pd1_pages_allowance)) * | |
18034 | + (use_ecr ? toi_expected_compression_ratio() : 100) / 100; | |
18035 | +} | |
18036 | + | |
18037 | +/* | |
18038 | + * Storage needed for the image header, in bytes until the return. | |
18039 | + */ | |
92bca44c | 18040 | +unsigned long get_header_storage_needed(void) |
2380c486 | 18041 | +{ |
92bca44c | 18042 | + unsigned long bytes = sizeof(struct toi_header) + |
2380c486 | 18043 | + toi_header_storage_for_modules() + |
5dd10c98 AM |
18044 | + toi_pageflags_space_needed() + |
18045 | + fs_info_space_needed(); | |
2380c486 JR |
18046 | + |
18047 | + return DIV_ROUND_UP(bytes, PAGE_SIZE); | |
18048 | +} | |
9474138d | 18049 | +EXPORT_SYMBOL_GPL(get_header_storage_needed); |
2380c486 JR |
18050 | + |
18051 | +/* | |
18052 | + * When freeing memory, pages from either pageset might be freed. | |
18053 | + * | |
18054 | + * When seeking to free memory to be able to hibernate, for every ps1 page | |
18055 | + * freed, we need 2 less pages for the atomic copy because there is one less | |
18056 | + * page to copy and one more page into which data can be copied. | |
18057 | + * | |
18058 | + * Freeing ps2 pages saves us nothing directly. No more memory is available | |
18059 | + * for the atomic copy. Indirectly, a ps1 page might be freed (slab?), but | |
18060 | + * that's too much work to figure out. | |
18061 | + * | |
18062 | + * => ps1_to_free functions | |
18063 | + * | |
18064 | + * Of course if we just want to reduce the image size, because of storage | |
18065 | + * limitations or an image size limit either ps will do. | |
18066 | + * | |
18067 | + * => any_to_free function | |
18068 | + */ | |
18069 | + | |
92bca44c | 18070 | +static unsigned long lowpages_usable_for_highmem_copy(void) |
2380c486 | 18071 | +{ |
92bca44c AM |
18072 | + unsigned long needed = get_lowmem_size(pagedir1) + |
18073 | + extra_pd1_pages_allowance + MIN_FREE_RAM + | |
18074 | + toi_memory_for_modules(0), | |
18075 | + available = get_lowmem_size(pagedir2) + | |
18076 | + real_nr_free_low_pages() + extra_pages_allocated; | |
18077 | + | |
18078 | + return available > needed ? available - needed : 0; | |
2380c486 JR |
18079 | +} |
18080 | + | |
92bca44c | 18081 | +static unsigned long highpages_ps1_to_free(void) |
2380c486 | 18082 | +{ |
92bca44c AM |
18083 | + unsigned long need = get_highmem_size(pagedir1), |
18084 | + available = get_highmem_size(pagedir2) + | |
18085 | + real_nr_free_high_pages() + | |
18086 | + lowpages_usable_for_highmem_copy(); | |
18087 | + | |
18088 | + return need > available ? DIV_ROUND_UP(need - available, 2) : 0; | |
2380c486 JR |
18089 | +} |
18090 | + | |
92bca44c AM |
18091 | +static unsigned long lowpages_ps1_to_free(void) |
18092 | +{ | |
18093 | + unsigned long needed = get_lowmem_size(pagedir1) + | |
18094 | + extra_pd1_pages_allowance + MIN_FREE_RAM + | |
18095 | + toi_memory_for_modules(0), | |
18096 | + available = get_lowmem_size(pagedir2) + | |
18097 | + real_nr_free_low_pages() + extra_pages_allocated; | |
18098 | + | |
18099 | + return needed > available ? DIV_ROUND_UP(needed - available, 2) : 0; | |
18100 | +} | |
18101 | + | |
18102 | +static unsigned long current_image_size(void) | |
2380c486 | 18103 | +{ |
0ada99ac | 18104 | + return pagedir1.size + pagedir2.size + header_storage_needed; |
2380c486 JR |
18105 | +} |
18106 | + | |
92bca44c | 18107 | +static unsigned long storage_still_required(void) |
2380c486 | 18108 | +{ |
92bca44c AM |
18109 | + unsigned long needed = main_storage_needed(1, 1); |
18110 | + return needed > storage_limit ? needed - storage_limit : 0; | |
2380c486 JR |
18111 | +} |
18112 | + | |
92bca44c | 18113 | +static unsigned long ram_still_required(void) |
2380c486 | 18114 | +{ |
92bca44c AM |
18115 | + unsigned long needed = MIN_FREE_RAM + toi_memory_for_modules(0) + |
18116 | + 2 * extra_pd1_pages_allowance, | |
de6743ae | 18117 | + available = real_nr_free_low_pages() + extra_pages_allocated; |
92bca44c | 18118 | + return needed > available ? needed - available : 0; |
2380c486 JR |
18119 | +} |
18120 | + | |
92bca44c | 18121 | +static unsigned long any_to_free(int use_image_size_limit) |
2380c486 | 18122 | +{ |
92bca44c AM |
18123 | + int use_soft_limit = use_image_size_limit && image_size_limit > 0; |
18124 | + unsigned long current_size = current_image_size(), | |
18125 | + soft_limit = use_soft_limit ? (image_size_limit << 8) : 0, | |
7e46296a AM |
18126 | + to_free = use_soft_limit ? (current_size > soft_limit ? |
18127 | + current_size - soft_limit : 0) : 0, | |
92bca44c AM |
18128 | + storage_limit = storage_still_required(), |
18129 | + ram_limit = ram_still_required(), | |
7e46296a | 18130 | + first_max = max(to_free, storage_limit); |
2380c486 JR |
18131 | + |
18132 | + return max(first_max, ram_limit); | |
18133 | +} | |
18134 | + | |
18135 | +static int need_pageset2(void) | |
18136 | +{ | |
18137 | + return (real_nr_free_low_pages() + extra_pages_allocated - | |
18138 | + 2 * extra_pd1_pages_allowance - MIN_FREE_RAM - | |
18139 | + toi_memory_for_modules(0) - pagedir1.size) < pagedir2.size; | |
18140 | +} | |
18141 | + | |
18142 | +/* amount_needed | |
18143 | + * | |
18144 | + * Calculates the amount by which the image size needs to be reduced to meet | |
18145 | + * our constraints. | |
18146 | + */ | |
92bca44c | 18147 | +static unsigned long amount_needed(int use_image_size_limit) |
2380c486 JR |
18148 | +{ |
18149 | + return max(highpages_ps1_to_free() + lowpages_ps1_to_free(), | |
18150 | + any_to_free(use_image_size_limit)); | |
18151 | +} | |
18152 | + | |
92bca44c | 18153 | +static int image_not_ready(int use_image_size_limit) |
2380c486 JR |
18154 | +{ |
18155 | + toi_message(TOI_EAT_MEMORY, TOI_LOW, 1, | |
7e46296a AM |
18156 | + "Amount still needed (%lu) > 0:%u," |
18157 | + " Storage allocd: %lu < %lu: %u.\n", | |
2380c486 JR |
18158 | + amount_needed(use_image_size_limit), |
18159 | + (amount_needed(use_image_size_limit) > 0), | |
2380c486 JR |
18160 | + main_storage_allocated, |
18161 | + main_storage_needed(1, 1), | |
18162 | + main_storage_allocated < main_storage_needed(1, 1)); | |
18163 | + | |
18164 | + toi_cond_pause(0, NULL); | |
18165 | + | |
18166 | + return (amount_needed(use_image_size_limit) > 0) || | |
2380c486 JR |
18167 | + main_storage_allocated < main_storage_needed(1, 1); |
18168 | +} | |
18169 | + | |
18170 | +static void display_failure_reason(int tries_exceeded) | |
18171 | +{ | |
92bca44c | 18172 | + unsigned long storage_required = storage_still_required(), |
2380c486 JR |
18173 | + ram_required = ram_still_required(), |
18174 | + high_ps1 = highpages_ps1_to_free(), | |
18175 | + low_ps1 = lowpages_ps1_to_free(); | |
18176 | + | |
18177 | + printk(KERN_INFO "Failed to prepare the image because...\n"); | |
18178 | + | |
92bca44c | 18179 | + if (!storage_limit) { |
2380c486 JR |
18180 | + printk(KERN_INFO "- You need some storage available to be " |
18181 | + "able to hibernate.\n"); | |
18182 | + return; | |
18183 | + } | |
18184 | + | |
18185 | + if (tries_exceeded) | |
18186 | + printk(KERN_INFO "- The maximum number of iterations was " | |
18187 | + "reached without successfully preparing the " | |
18188 | + "image.\n"); | |
18189 | + | |
2380c486 | 18190 | + if (storage_required) { |
92bca44c AM |
18191 | + printk(KERN_INFO " - We need at least %lu pages of storage " |
18192 | + "(ignoring the header), but only have %lu.\n", | |
2380c486 JR |
18193 | + main_storage_needed(1, 1), |
18194 | + main_storage_allocated); | |
18195 | + set_abort_result(TOI_INSUFFICIENT_STORAGE); | |
18196 | + } | |
18197 | + | |
18198 | + if (ram_required) { | |
92bca44c | 18199 | + printk(KERN_INFO " - We need %lu more free pages of low " |
2380c486 JR |
18200 | + "memory.\n", ram_required); |
18201 | + printk(KERN_INFO " Minimum free : %8d\n", MIN_FREE_RAM); | |
92bca44c | 18202 | + printk(KERN_INFO " + Reqd. by modules : %8lu\n", |
2380c486 | 18203 | + toi_memory_for_modules(0)); |
92bca44c | 18204 | + printk(KERN_INFO " + 2 * extra allow : %8lu\n", |
2380c486 | 18205 | + 2 * extra_pd1_pages_allowance); |
92bca44c | 18206 | + printk(KERN_INFO " - Currently free : %8lu\n", |
2380c486 | 18207 | + real_nr_free_low_pages()); |
de6743ae AM |
18208 | + printk(KERN_INFO " - Pages allocd : %8lu\n", |
18209 | + extra_pages_allocated); | |
2380c486 | 18210 | + printk(KERN_INFO " : ========\n"); |
92bca44c | 18211 | + printk(KERN_INFO " Still needed : %8lu\n", |
2380c486 JR |
18212 | + ram_required); |
18213 | + | |
18214 | + /* Print breakdown of memory needed for modules */ | |
18215 | + toi_memory_for_modules(1); | |
18216 | + set_abort_result(TOI_UNABLE_TO_FREE_ENOUGH_MEMORY); | |
18217 | + } | |
18218 | + | |
18219 | + if (high_ps1) { | |
92bca44c | 18220 | + printk(KERN_INFO "- We need to free %lu highmem pageset 1 " |
2380c486 JR |
18221 | + "pages.\n", high_ps1); |
18222 | + set_abort_result(TOI_UNABLE_TO_FREE_ENOUGH_MEMORY); | |
18223 | + } | |
18224 | + | |
18225 | + if (low_ps1) { | |
18226 | + printk(KERN_INFO " - We need to free %ld lowmem pageset 1 " | |
18227 | + "pages.\n", low_ps1); | |
18228 | + set_abort_result(TOI_UNABLE_TO_FREE_ENOUGH_MEMORY); | |
18229 | + } | |
18230 | +} | |
18231 | + | |
18232 | +static void display_stats(int always, int sub_extra_pd1_allow) | |
18233 | +{ | |
18234 | + char buffer[255]; | |
18235 | + snprintf(buffer, 254, | |
7e46296a AM |
18236 | + "Free:%lu(%lu). Sets:%lu(%lu),%lu(%lu). " |
18237 | + "Nosave:%lu-%lu=%lu. Storage:%lu/%lu(%lu=>%lu). " | |
18238 | + "Needed:%lu,%lu,%lu(%u,%lu,%lu,%ld) (PS2:%s)\n", | |
2380c486 JR |
18239 | + |
18240 | + /* Free */ | |
18241 | + real_nr_free_pages(all_zones_mask), | |
18242 | + real_nr_free_low_pages(), | |
18243 | + | |
18244 | + /* Sets */ | |
18245 | + pagedir1.size, pagedir1.size - get_highmem_size(pagedir1), | |
18246 | + pagedir2.size, pagedir2.size - get_highmem_size(pagedir2), | |
18247 | + | |
2380c486 JR |
18248 | + /* Nosave */ |
18249 | + num_nosave, extra_pages_allocated, | |
18250 | + num_nosave - extra_pages_allocated, | |
18251 | + | |
18252 | + /* Storage */ | |
18253 | + main_storage_allocated, | |
92bca44c | 18254 | + storage_limit, |
2380c486 JR |
18255 | + main_storage_needed(1, sub_extra_pd1_allow), |
18256 | + main_storage_needed(1, 1), | |
18257 | + | |
18258 | + /* Needed */ | |
18259 | + lowpages_ps1_to_free(), highpages_ps1_to_free(), | |
18260 | + any_to_free(1), | |
18261 | + MIN_FREE_RAM, toi_memory_for_modules(0), | |
92bca44c | 18262 | + extra_pd1_pages_allowance, |
7e46296a | 18263 | + image_size_limit, |
2380c486 JR |
18264 | + |
18265 | + need_pageset2() ? "yes" : "no"); | |
18266 | + | |
18267 | + if (always) | |
18268 | + printk("%s", buffer); | |
18269 | + else | |
18270 | + toi_message(TOI_EAT_MEMORY, TOI_MEDIUM, 1, buffer); | |
18271 | +} | |
18272 | + | |
18273 | +/* generate_free_page_map | |
18274 | + * | |
18275 | + * Description: This routine generates a bitmap of free pages from the | |
18276 | + * lists used by the memory manager. We then use the bitmap | |
18277 | + * to quickly calculate which pages to save and in which | |
18278 | + * pagesets. | |
18279 | + */ | |
18280 | +static void generate_free_page_map(void) | |
18281 | +{ | |
5dd10c98 | 18282 | + int order, cpu, t; |
2380c486 JR |
18283 | + unsigned long flags, i; |
18284 | + struct zone *zone; | |
18285 | + struct list_head *curr; | |
5dd10c98 AM |
18286 | + unsigned long pfn; |
18287 | + struct page *page; | |
2380c486 | 18288 | + |
9474138d | 18289 | + for_each_populated_zone(zone) { |
5dd10c98 AM |
18290 | + |
18291 | + if (!zone->spanned_pages) | |
18292 | + continue; | |
18293 | + | |
2380c486 JR |
18294 | + spin_lock_irqsave(&zone->lock, flags); |
18295 | + | |
5dd10c98 AM |
18296 | + for (i = 0; i < zone->spanned_pages; i++) { |
18297 | + pfn = ZONE_START(zone) + i; | |
18298 | + | |
18299 | + if (!pfn_valid(pfn)) | |
18300 | + continue; | |
18301 | + | |
18302 | + page = pfn_to_page(pfn); | |
18303 | + | |
18304 | + ClearPageNosaveFree(page); | |
18305 | + } | |
2380c486 JR |
18306 | + |
18307 | + for_each_migratetype_order(order, t) { | |
18308 | + list_for_each(curr, | |
18309 | + &zone->free_area[order].free_list[t]) { | |
18310 | + unsigned long j; | |
18311 | + | |
18312 | + pfn = page_to_pfn(list_entry(curr, struct page, | |
18313 | + lru)); | |
18314 | + for (j = 0; j < (1UL << order); j++) | |
18315 | + SetPageNosaveFree(pfn_to_page(pfn + j)); | |
18316 | + } | |
18317 | + } | |
18318 | + | |
18319 | + for_each_online_cpu(cpu) { | |
de6743ae AM |
18320 | + struct per_cpu_pageset *pset = |
18321 | + per_cpu_ptr(zone->pageset, cpu); | |
2380c486 JR |
18322 | + struct per_cpu_pages *pcp = &pset->pcp; |
18323 | + struct page *page; | |
7e46296a | 18324 | + int t; |
2380c486 | 18325 | + |
7e46296a AM |
18326 | + for (t = 0; t < MIGRATE_PCPTYPES; t++) |
18327 | + list_for_each_entry(page, &pcp->lists[t], lru) | |
18328 | + SetPageNosaveFree(page); | |
2380c486 JR |
18329 | + } |
18330 | + | |
18331 | + spin_unlock_irqrestore(&zone->lock, flags); | |
18332 | + } | |
18333 | +} | |
18334 | + | |
18335 | +/* size_of_free_region | |
18336 | + * | |
18337 | + * Description: Return the number of pages that are free, beginning with and | |
18338 | + * including this one. | |
18339 | + */ | |
18340 | +static int size_of_free_region(struct zone *zone, unsigned long start_pfn) | |
18341 | +{ | |
18342 | + unsigned long this_pfn = start_pfn, | |
18343 | + end_pfn = ZONE_START(zone) + zone->spanned_pages - 1; | |
18344 | + | |
18345 | + while (this_pfn <= end_pfn && PageNosaveFree(pfn_to_page(this_pfn))) | |
18346 | + this_pfn++; | |
18347 | + | |
18348 | + return this_pfn - start_pfn; | |
18349 | +} | |
18350 | + | |
18351 | +/* flag_image_pages | |
18352 | + * | |
18353 | + * This routine generates our lists of pages to be stored in each | |
18354 | + * pageset. Since we store the data using extents, and adding new | |
18355 | + * extents might allocate a new extent page, this routine may well | |
18356 | + * be called more than once. | |
18357 | + */ | |
18358 | +static void flag_image_pages(int atomic_copy) | |
18359 | +{ | |
18360 | + int num_free = 0; | |
18361 | + unsigned long loop; | |
18362 | + struct zone *zone; | |
18363 | + | |
18364 | + pagedir1.size = 0; | |
18365 | + pagedir2.size = 0; | |
18366 | + | |
18367 | + set_highmem_size(pagedir1, 0); | |
18368 | + set_highmem_size(pagedir2, 0); | |
18369 | + | |
18370 | + num_nosave = 0; | |
18371 | + | |
18372 | + memory_bm_clear(pageset1_map); | |
18373 | + | |
18374 | + generate_free_page_map(); | |
18375 | + | |
18376 | + /* | |
18377 | + * Pages not to be saved are marked Nosave irrespective of being | |
18378 | + * reserved. | |
18379 | + */ | |
9474138d | 18380 | + for_each_populated_zone(zone) { |
2380c486 JR |
18381 | + int highmem = is_highmem(zone); |
18382 | + | |
2380c486 JR |
18383 | + for (loop = 0; loop < zone->spanned_pages; loop++) { |
18384 | + unsigned long pfn = ZONE_START(zone) + loop; | |
18385 | + struct page *page; | |
18386 | + int chunk_size; | |
18387 | + | |
18388 | + if (!pfn_valid(pfn)) | |
18389 | + continue; | |
18390 | + | |
18391 | + chunk_size = size_of_free_region(zone, pfn); | |
18392 | + if (chunk_size) { | |
18393 | + num_free += chunk_size; | |
18394 | + loop += chunk_size - 1; | |
18395 | + continue; | |
18396 | + } | |
18397 | + | |
18398 | + page = pfn_to_page(pfn); | |
18399 | + | |
18400 | + if (PageNosave(page)) { | |
18401 | + num_nosave++; | |
18402 | + continue; | |
18403 | + } | |
18404 | + | |
18405 | + page = highmem ? saveable_highmem_page(zone, pfn) : | |
18406 | + saveable_page(zone, pfn); | |
18407 | + | |
18408 | + if (!page) { | |
18409 | + num_nosave++; | |
18410 | + continue; | |
18411 | + } | |
18412 | + | |
18413 | + if (PagePageset2(page)) { | |
18414 | + pagedir2.size++; | |
18415 | + if (PageHighMem(page)) | |
18416 | + inc_highmem_size(pagedir2); | |
18417 | + else | |
18418 | + SetPagePageset1Copy(page); | |
18419 | + if (PageResave(page)) { | |
18420 | + SetPagePageset1(page); | |
18421 | + ClearPagePageset1Copy(page); | |
18422 | + pagedir1.size++; | |
18423 | + if (PageHighMem(page)) | |
18424 | + inc_highmem_size(pagedir1); | |
18425 | + } | |
18426 | + } else { | |
18427 | + pagedir1.size++; | |
18428 | + SetPagePageset1(page); | |
18429 | + if (PageHighMem(page)) | |
18430 | + inc_highmem_size(pagedir1); | |
18431 | + } | |
18432 | + } | |
18433 | + } | |
18434 | + | |
18435 | + if (!atomic_copy) | |
18436 | + toi_message(TOI_EAT_MEMORY, TOI_MEDIUM, 0, | |
18437 | + "Count data pages: Set1 (%d) + Set2 (%d) + Nosave (%ld)" | |
18438 | + " + NumFree (%d) = %d.\n", | |
18439 | + pagedir1.size, pagedir2.size, num_nosave, num_free, | |
18440 | + pagedir1.size + pagedir2.size + num_nosave + num_free); | |
18441 | +} | |
18442 | + | |
18443 | +void toi_recalculate_image_contents(int atomic_copy) | |
18444 | +{ | |
18445 | + memory_bm_clear(pageset1_map); | |
18446 | + if (!atomic_copy) { | |
18447 | + unsigned long pfn; | |
18448 | + memory_bm_position_reset(pageset2_map); | |
18449 | + for (pfn = memory_bm_next_pfn(pageset2_map); | |
18450 | + pfn != BM_END_OF_MAP; | |
18451 | + pfn = memory_bm_next_pfn(pageset2_map)) | |
18452 | + ClearPagePageset1Copy(pfn_to_page(pfn)); | |
18453 | + /* Need to call this before getting pageset1_size! */ | |
18454 | + toi_mark_pages_for_pageset2(); | |
18455 | + } | |
18456 | + flag_image_pages(atomic_copy); | |
18457 | + | |
18458 | + if (!atomic_copy) { | |
92bca44c | 18459 | + storage_limit = toiActiveAllocator->storage_available(); |
2380c486 JR |
18460 | + display_stats(0, 0); |
18461 | + } | |
18462 | +} | |
18463 | + | |
18464 | +/* update_image | |
18465 | + * | |
18466 | + * Allocate [more] memory and storage for the image. | |
18467 | + */ | |
18468 | +static void update_image(int ps2_recalc) | |
18469 | +{ | |
92bca44c AM |
18470 | + int old_header_req; |
18471 | + unsigned long seek, wanted, got; | |
2380c486 | 18472 | + |
2380c486 JR |
18473 | + /* Include allowance for growth in pagedir1 while writing pagedir 2 */ |
18474 | + wanted = pagedir1.size + extra_pd1_pages_allowance - | |
18475 | + get_lowmem_size(pagedir2); | |
18476 | + if (wanted > extra_pages_allocated) { | |
18477 | + got = toi_allocate_extra_pagedir_memory(wanted); | |
18478 | + if (wanted < got) { | |
18479 | + toi_message(TOI_EAT_MEMORY, TOI_LOW, 1, | |
18480 | + "Want %d extra pages for pageset1, got %d.\n", | |
18481 | + wanted, got); | |
18482 | + return; | |
18483 | + } | |
18484 | + } | |
18485 | + | |
18486 | + if (ps2_recalc) | |
18487 | + goto recalc; | |
18488 | + | |
18489 | + thaw_kernel_threads(); | |
18490 | + | |
18491 | + /* | |
18492 | + * Allocate remaining storage space, if possible, up to the | |
18493 | + * maximum we know we'll need. It's okay to allocate the | |
18494 | + * maximum if the writer is the swapwriter, but | |
18495 | + * we don't want to grab all available space on an NFS share. | |
18496 | + * We therefore ignore the expected compression ratio here, | |
18497 | + * thereby trying to allocate the maximum image size we could | |
18498 | + * need (assuming compression doesn't expand the image), but | |
18499 | + * don't complain if we can't get the full amount we're after. | |
18500 | + */ | |
18501 | + | |
0ada99ac | 18502 | + do { |
92bca44c AM |
18503 | + int result; |
18504 | + | |
0ada99ac | 18505 | + old_header_req = header_storage_needed; |
18506 | + toiActiveAllocator->reserve_header_space(header_storage_needed); | |
2380c486 | 18507 | + |
0ada99ac | 18508 | + /* How much storage is free with the reservation applied? */ |
92bca44c AM |
18509 | + storage_limit = toiActiveAllocator->storage_available(); |
18510 | + seek = min(storage_limit, main_storage_needed(0, 0)); | |
2380c486 | 18511 | + |
7e46296a | 18512 | + result = toiActiveAllocator->allocate_storage(seek); |
92bca44c AM |
18513 | + if (result) |
18514 | + printk("Failed to allocate storage (%d).\n", result); | |
2380c486 | 18515 | + |
0ada99ac | 18516 | + main_storage_allocated = |
18517 | + toiActiveAllocator->storage_allocated(); | |
2380c486 | 18518 | + |
0ada99ac | 18519 | + /* Need more header because more storage allocated? */ |
18520 | + header_storage_needed = get_header_storage_needed(); | |
2380c486 | 18521 | + |
0ada99ac | 18522 | + } while (header_storage_needed > old_header_req); |
2380c486 JR |
18523 | + |
18524 | + if (freeze_processes()) | |
18525 | + set_abort_result(TOI_FREEZING_FAILED); | |
18526 | + | |
18527 | +recalc: | |
18528 | + toi_recalculate_image_contents(0); | |
18529 | +} | |
18530 | + | |
18531 | +/* attempt_to_freeze | |
18532 | + * | |
18533 | + * Try to freeze processes. | |
18534 | + */ | |
18535 | + | |
18536 | +static int attempt_to_freeze(void) | |
18537 | +{ | |
18538 | + int result; | |
18539 | + | |
18540 | + /* Stop processes before checking again */ | |
18541 | + thaw_processes(); | |
18542 | + toi_prepare_status(CLEAR_BAR, "Freezing processes & syncing " | |
18543 | + "filesystems."); | |
18544 | + result = freeze_processes(); | |
18545 | + | |
18546 | + if (result) | |
18547 | + set_abort_result(TOI_FREEZING_FAILED); | |
18548 | + | |
18549 | + return result; | |
18550 | +} | |
18551 | + | |
18552 | +/* eat_memory | |
18553 | + * | |
18554 | + * Try to free some memory, either to meet hard or soft constraints on the image | |
18555 | + * characteristics. | |
18556 | + * | |
18557 | + * Hard constraints: | |
18558 | + * - Pageset1 must be < half of memory; | |
18559 | + * - We must have enough memory free at resume time to have pageset1 | |
18560 | + * be able to be loaded in pages that don't conflict with where it has to | |
18561 | + * be restored. | |
18562 | + * Soft constraints | |
18563 | + * - User specificied image size limit. | |
18564 | + */ | |
18565 | +static void eat_memory(void) | |
18566 | +{ | |
92bca44c | 18567 | + unsigned long amount_wanted = 0; |
2380c486 JR |
18568 | + int did_eat_memory = 0; |
18569 | + | |
18570 | + /* | |
18571 | + * Note that if we have enough storage space and enough free memory, we | |
18572 | + * may exit without eating anything. We give up when the last 10 | |
18573 | + * iterations ate no extra pages because we're not going to get much | |
18574 | + * more anyway, but the few pages we get will take a lot of time. | |
18575 | + * | |
18576 | + * We freeze processes before beginning, and then unfreeze them if we | |
18577 | + * need to eat memory until we think we have enough. If our attempts | |
18578 | + * to freeze fail, we give up and abort. | |
18579 | + */ | |
18580 | + | |
2380c486 JR |
18581 | + amount_wanted = amount_needed(1); |
18582 | + | |
18583 | + switch (image_size_limit) { | |
18584 | + case -1: /* Don't eat any memory */ | |
18585 | + if (amount_wanted > 0) { | |
18586 | + set_abort_result(TOI_WOULD_EAT_MEMORY); | |
18587 | + return; | |
18588 | + } | |
18589 | + break; | |
18590 | + case -2: /* Free caches only */ | |
18591 | + drop_pagecache(); | |
18592 | + toi_recalculate_image_contents(0); | |
18593 | + amount_wanted = amount_needed(1); | |
2380c486 JR |
18594 | + break; |
18595 | + default: | |
18596 | + break; | |
18597 | + } | |
18598 | + | |
18599 | + if (amount_wanted > 0 && !test_result_state(TOI_ABORTED) && | |
18600 | + image_size_limit != -1) { | |
de6743ae AM |
18601 | + unsigned long request = amount_wanted; |
18602 | + unsigned long high_req = max(highpages_ps1_to_free(), | |
18603 | + any_to_free(1)); | |
18604 | + unsigned long low_req = lowpages_ps1_to_free(); | |
18605 | + unsigned long got = 0; | |
2380c486 JR |
18606 | + |
18607 | + toi_prepare_status(CLEAR_BAR, | |
18608 | + "Seeking to free %ldMB of memory.", | |
18609 | + MB(amount_wanted)); | |
18610 | + | |
18611 | + thaw_kernel_threads(); | |
18612 | + | |
18613 | + /* | |
18614 | + * Ask for too many because shrink_all_memory doesn't | |
18615 | + * currently return enough most of the time. | |
18616 | + */ | |
de6743ae AM |
18617 | + |
18618 | + if (low_req) | |
18619 | + got = shrink_memory_mask(low_req, GFP_KERNEL); | |
18620 | + if (high_req) | |
18621 | + shrink_memory_mask(high_req - got, GFP_HIGHUSER); | |
2380c486 JR |
18622 | + |
18623 | + did_eat_memory = 1; | |
18624 | + | |
18625 | + toi_recalculate_image_contents(0); | |
18626 | + | |
18627 | + amount_wanted = amount_needed(1); | |
18628 | + | |
de6743ae AM |
18629 | + printk(KERN_DEBUG "Asked shrink_all_memory for %ld low pages &" |
18630 | + " %ld pages from anywhere, got %ld.\n", | |
18631 | + high_req, low_req, | |
7e46296a | 18632 | + request - amount_wanted); |
2380c486 JR |
18633 | + |
18634 | + toi_cond_pause(0, NULL); | |
18635 | + | |
18636 | + if (freeze_processes()) | |
18637 | + set_abort_result(TOI_FREEZING_FAILED); | |
18638 | + } | |
18639 | + | |
18640 | + if (did_eat_memory) | |
18641 | + toi_recalculate_image_contents(0); | |
18642 | +} | |
18643 | + | |
18644 | +/* toi_prepare_image | |
18645 | + * | |
18646 | + * Entry point to the whole image preparation section. | |
18647 | + * | |
18648 | + * We do four things: | |
18649 | + * - Freeze processes; | |
18650 | + * - Ensure image size constraints are met; | |
18651 | + * - Complete all the preparation for saving the image, | |
18652 | + * including allocation of storage. The only memory | |
18653 | + * that should be needed when we're finished is that | |
18654 | + * for actually storing the image (and we know how | |
18655 | + * much is needed for that because the modules tell | |
18656 | + * us). | |
18657 | + * - Make sure that all dirty buffers are written out. | |
18658 | + */ | |
18659 | +#define MAX_TRIES 2 | |
18660 | +int toi_prepare_image(void) | |
18661 | +{ | |
18662 | + int result = 1, tries = 1; | |
18663 | + | |
2380c486 JR |
18664 | + main_storage_allocated = 0; |
18665 | + no_ps2_needed = 0; | |
18666 | + | |
18667 | + if (attempt_to_freeze()) | |
18668 | + return 1; | |
18669 | + | |
18670 | + if (!extra_pd1_pages_allowance) | |
18671 | + get_extra_pd1_allowance(); | |
18672 | + | |
92bca44c | 18673 | + storage_limit = toiActiveAllocator->storage_available(); |
2380c486 | 18674 | + |
92bca44c | 18675 | + if (!storage_limit) { |
2380c486 JR |
18676 | + printk(KERN_INFO "No storage available. Didn't try to prepare " |
18677 | + "an image.\n"); | |
18678 | + display_failure_reason(0); | |
18679 | + set_abort_result(TOI_NOSTORAGE_AVAILABLE); | |
18680 | + return 1; | |
18681 | + } | |
18682 | + | |
18683 | + if (build_attention_list()) { | |
18684 | + abort_hibernate(TOI_UNABLE_TO_PREPARE_IMAGE, | |
18685 | + "Unable to successfully prepare the image.\n"); | |
18686 | + return 1; | |
18687 | + } | |
18688 | + | |
9474138d AM |
18689 | + toi_recalculate_image_contents(0); |
18690 | + | |
2380c486 JR |
18691 | + do { |
18692 | + toi_prepare_status(CLEAR_BAR, | |
18693 | + "Preparing Image. Try %d.", tries); | |
18694 | + | |
18695 | + eat_memory(); | |
18696 | + | |
18697 | + if (test_result_state(TOI_ABORTED)) | |
18698 | + break; | |
18699 | + | |
18700 | + update_image(0); | |
18701 | + | |
18702 | + tries++; | |
18703 | + | |
18704 | + } while (image_not_ready(1) && tries <= MAX_TRIES && | |
18705 | + !test_result_state(TOI_ABORTED)); | |
18706 | + | |
18707 | + result = image_not_ready(0); | |
18708 | + | |
18709 | + if (!test_result_state(TOI_ABORTED)) { | |
18710 | + if (result) { | |
18711 | + display_stats(1, 0); | |
18712 | + display_failure_reason(tries > MAX_TRIES); | |
18713 | + abort_hibernate(TOI_UNABLE_TO_PREPARE_IMAGE, | |
18714 | + "Unable to successfully prepare the image.\n"); | |
18715 | + } else { | |
18716 | + /* Pageset 2 needed? */ | |
18717 | + if (!need_pageset2() && | |
18718 | + test_action_state(TOI_NO_PS2_IF_UNNEEDED)) { | |
18719 | + no_ps2_needed = 1; | |
9474138d | 18720 | + toi_recalculate_image_contents(0); |
2380c486 JR |
18721 | + update_image(1); |
18722 | + } | |
18723 | + | |
18724 | + toi_cond_pause(1, "Image preparation complete."); | |
18725 | + } | |
18726 | + } | |
18727 | + | |
18728 | + return result ? result : allocate_checksum_pages(); | |
18729 | +} | |
18730 | diff --git a/kernel/power/tuxonice_prepare_image.h b/kernel/power/tuxonice_prepare_image.h | |
18731 | new file mode 100644 | |
5dd10c98 | 18732 | index 0000000..7b52e9e |
2380c486 JR |
18733 | --- /dev/null |
18734 | +++ b/kernel/power/tuxonice_prepare_image.h | |
0ada99ac | 18735 | @@ -0,0 +1,36 @@ |
2380c486 JR |
18736 | +/* |
18737 | + * kernel/power/tuxonice_prepare_image.h | |
18738 | + * | |
5dd10c98 | 18739 | + * Copyright (C) 2003-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
18740 | + * |
18741 | + * This file is released under the GPLv2. | |
18742 | + * | |
18743 | + */ | |
18744 | + | |
18745 | +#include <asm/sections.h> | |
18746 | + | |
18747 | +extern int toi_prepare_image(void); | |
18748 | +extern void toi_recalculate_image_contents(int storage_available); | |
92bca44c | 18749 | +extern unsigned long real_nr_free_pages(unsigned long zone_idx_mask); |
7e46296a | 18750 | +extern long image_size_limit; |
2380c486 | 18751 | +extern void toi_free_extra_pagedir_memory(void); |
92bca44c | 18752 | +extern unsigned long extra_pd1_pages_allowance; |
2380c486 JR |
18753 | +extern void free_attention_list(void); |
18754 | + | |
18755 | +#define MIN_FREE_RAM 100 | |
18756 | +#define MIN_EXTRA_PAGES_ALLOWANCE 500 | |
18757 | + | |
18758 | +#define all_zones_mask ((unsigned long) ((1 << MAX_NR_ZONES) - 1)) | |
18759 | +#ifdef CONFIG_HIGHMEM | |
18760 | +#define real_nr_free_high_pages() (real_nr_free_pages(1 << ZONE_HIGHMEM)) | |
18761 | +#define real_nr_free_low_pages() (real_nr_free_pages(all_zones_mask - \ | |
18762 | + (1 << ZONE_HIGHMEM))) | |
18763 | +#else | |
18764 | +#define real_nr_free_high_pages() (0) | |
18765 | +#define real_nr_free_low_pages() (real_nr_free_pages(all_zones_mask)) | |
18766 | + | |
18767 | +/* For eat_memory function */ | |
18768 | +#define ZONE_HIGHMEM (MAX_NR_ZONES + 1) | |
18769 | +#endif | |
18770 | + | |
92bca44c | 18771 | +unsigned long get_header_storage_needed(void); |
2380c486 JR |
18772 | diff --git a/kernel/power/tuxonice_storage.c b/kernel/power/tuxonice_storage.c |
18773 | new file mode 100644 | |
5dd10c98 | 18774 | index 0000000..be962ee |
2380c486 JR |
18775 | --- /dev/null |
18776 | +++ b/kernel/power/tuxonice_storage.c | |
18777 | @@ -0,0 +1,282 @@ | |
18778 | +/* | |
18779 | + * kernel/power/tuxonice_storage.c | |
18780 | + * | |
5dd10c98 | 18781 | + * Copyright (C) 2005-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
18782 | + * |
18783 | + * This file is released under the GPLv2. | |
18784 | + * | |
18785 | + * Routines for talking to a userspace program that manages storage. | |
18786 | + * | |
18787 | + * The kernel side: | |
18788 | + * - starts the userspace program; | |
18789 | + * - sends messages telling it when to open and close the connection; | |
18790 | + * - tells it when to quit; | |
18791 | + * | |
18792 | + * The user space side: | |
18793 | + * - passes messages regarding status; | |
18794 | + * | |
18795 | + */ | |
18796 | + | |
18797 | +#include <linux/suspend.h> | |
18798 | +#include <linux/freezer.h> | |
18799 | + | |
18800 | +#include "tuxonice_sysfs.h" | |
18801 | +#include "tuxonice_modules.h" | |
18802 | +#include "tuxonice_netlink.h" | |
18803 | +#include "tuxonice_storage.h" | |
18804 | +#include "tuxonice_ui.h" | |
18805 | + | |
18806 | +static struct user_helper_data usm_helper_data; | |
18807 | +static struct toi_module_ops usm_ops; | |
18808 | +static int message_received, usm_prepare_count; | |
18809 | +static int storage_manager_last_action, storage_manager_action; | |
18810 | + | |
18811 | +static int usm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |
18812 | +{ | |
18813 | + int type; | |
18814 | + int *data; | |
18815 | + | |
18816 | + type = nlh->nlmsg_type; | |
18817 | + | |
18818 | + /* A control message: ignore them */ | |
18819 | + if (type < NETLINK_MSG_BASE) | |
18820 | + return 0; | |
18821 | + | |
18822 | + /* Unknown message: reply with EINVAL */ | |
18823 | + if (type >= USM_MSG_MAX) | |
18824 | + return -EINVAL; | |
18825 | + | |
18826 | + /* All operations require privileges, even GET */ | |
18827 | + if (security_netlink_recv(skb, CAP_NET_ADMIN)) | |
18828 | + return -EPERM; | |
18829 | + | |
18830 | + /* Only allow one task to receive NOFREEZE privileges */ | |
18831 | + if (type == NETLINK_MSG_NOFREEZE_ME && usm_helper_data.pid != -1) | |
18832 | + return -EBUSY; | |
18833 | + | |
18834 | + data = (int *) NLMSG_DATA(nlh); | |
18835 | + | |
18836 | + switch (type) { | |
18837 | + case USM_MSG_SUCCESS: | |
18838 | + case USM_MSG_FAILED: | |
18839 | + message_received = type; | |
18840 | + complete(&usm_helper_data.wait_for_process); | |
18841 | + break; | |
18842 | + default: | |
18843 | + printk(KERN_INFO "Storage manager doesn't recognise " | |
18844 | + "message %d.\n", type); | |
18845 | + } | |
18846 | + | |
18847 | + return 1; | |
18848 | +} | |
18849 | + | |
18850 | +#ifdef CONFIG_NET | |
18851 | +static int activations; | |
18852 | + | |
18853 | +int toi_activate_storage(int force) | |
18854 | +{ | |
18855 | + int tries = 1; | |
18856 | + | |
18857 | + if (usm_helper_data.pid == -1 || !usm_ops.enabled) | |
18858 | + return 0; | |
18859 | + | |
18860 | + message_received = 0; | |
18861 | + activations++; | |
18862 | + | |
18863 | + if (activations > 1 && !force) | |
18864 | + return 0; | |
18865 | + | |
18866 | + while ((!message_received || message_received == USM_MSG_FAILED) && | |
18867 | + tries < 2) { | |
18868 | + toi_prepare_status(DONT_CLEAR_BAR, "Activate storage attempt " | |
18869 | + "%d.\n", tries); | |
18870 | + | |
18871 | + init_completion(&usm_helper_data.wait_for_process); | |
18872 | + | |
18873 | + toi_send_netlink_message(&usm_helper_data, | |
18874 | + USM_MSG_CONNECT, | |
18875 | + NULL, 0); | |
18876 | + | |
18877 | + /* Wait 2 seconds for the userspace process to make contact */ | |
18878 | + wait_for_completion_timeout(&usm_helper_data.wait_for_process, | |
18879 | + 2*HZ); | |
18880 | + | |
18881 | + tries++; | |
18882 | + } | |
18883 | + | |
18884 | + return 0; | |
18885 | +} | |
18886 | + | |
18887 | +int toi_deactivate_storage(int force) | |
18888 | +{ | |
18889 | + if (usm_helper_data.pid == -1 || !usm_ops.enabled) | |
18890 | + return 0; | |
18891 | + | |
18892 | + message_received = 0; | |
18893 | + activations--; | |
18894 | + | |
18895 | + if (activations && !force) | |
18896 | + return 0; | |
18897 | + | |
18898 | + init_completion(&usm_helper_data.wait_for_process); | |
18899 | + | |
18900 | + toi_send_netlink_message(&usm_helper_data, | |
18901 | + USM_MSG_DISCONNECT, | |
18902 | + NULL, 0); | |
18903 | + | |
18904 | + wait_for_completion_timeout(&usm_helper_data.wait_for_process, 2*HZ); | |
18905 | + | |
18906 | + if (!message_received || message_received == USM_MSG_FAILED) { | |
18907 | + printk(KERN_INFO "Returning failure disconnecting storage.\n"); | |
18908 | + return 1; | |
18909 | + } | |
18910 | + | |
18911 | + return 0; | |
18912 | +} | |
18913 | +#endif | |
18914 | + | |
18915 | +static void storage_manager_simulate(void) | |
18916 | +{ | |
18917 | + printk(KERN_INFO "--- Storage manager simulate ---\n"); | |
18918 | + toi_prepare_usm(); | |
18919 | + schedule(); | |
18920 | + printk(KERN_INFO "--- Activate storage 1 ---\n"); | |
18921 | + toi_activate_storage(1); | |
18922 | + schedule(); | |
18923 | + printk(KERN_INFO "--- Deactivate storage 1 ---\n"); | |
18924 | + toi_deactivate_storage(1); | |
18925 | + schedule(); | |
18926 | + printk(KERN_INFO "--- Cleanup usm ---\n"); | |
18927 | + toi_cleanup_usm(); | |
18928 | + schedule(); | |
18929 | + printk(KERN_INFO "--- Storage manager simulate ends ---\n"); | |
18930 | +} | |
18931 | + | |
18932 | +static int usm_storage_needed(void) | |
18933 | +{ | |
18934 | + return strlen(usm_helper_data.program); | |
18935 | +} | |
18936 | + | |
18937 | +static int usm_save_config_info(char *buf) | |
18938 | +{ | |
18939 | + int len = strlen(usm_helper_data.program); | |
18940 | + memcpy(buf, usm_helper_data.program, len); | |
18941 | + return len; | |
18942 | +} | |
18943 | + | |
18944 | +static void usm_load_config_info(char *buf, int size) | |
18945 | +{ | |
18946 | + /* Don't load the saved path if one has already been set */ | |
18947 | + if (usm_helper_data.program[0]) | |
18948 | + return; | |
18949 | + | |
18950 | + memcpy(usm_helper_data.program, buf, size); | |
18951 | +} | |
18952 | + | |
18953 | +static int usm_memory_needed(void) | |
18954 | +{ | |
18955 | + /* ball park figure of 32 pages */ | |
18956 | + return 32 * PAGE_SIZE; | |
18957 | +} | |
18958 | + | |
18959 | +/* toi_prepare_usm | |
18960 | + */ | |
18961 | +int toi_prepare_usm(void) | |
18962 | +{ | |
18963 | + usm_prepare_count++; | |
18964 | + | |
18965 | + if (usm_prepare_count > 1 || !usm_ops.enabled) | |
18966 | + return 0; | |
18967 | + | |
18968 | + usm_helper_data.pid = -1; | |
18969 | + | |
18970 | + if (!*usm_helper_data.program) | |
18971 | + return 0; | |
18972 | + | |
18973 | + toi_netlink_setup(&usm_helper_data); | |
18974 | + | |
18975 | + if (usm_helper_data.pid == -1) | |
18976 | + printk(KERN_INFO "TuxOnIce Storage Manager wanted, but couldn't" | |
18977 | + " start it.\n"); | |
18978 | + | |
18979 | + toi_activate_storage(0); | |
18980 | + | |
18981 | + return usm_helper_data.pid != -1; | |
18982 | +} | |
18983 | + | |
18984 | +void toi_cleanup_usm(void) | |
18985 | +{ | |
18986 | + usm_prepare_count--; | |
18987 | + | |
18988 | + if (usm_helper_data.pid > -1 && !usm_prepare_count) { | |
18989 | + toi_deactivate_storage(0); | |
18990 | + toi_netlink_close(&usm_helper_data); | |
18991 | + } | |
18992 | +} | |
18993 | + | |
18994 | +static void storage_manager_activate(void) | |
18995 | +{ | |
18996 | + if (storage_manager_action == storage_manager_last_action) | |
18997 | + return; | |
18998 | + | |
18999 | + if (storage_manager_action) | |
19000 | + toi_prepare_usm(); | |
19001 | + else | |
19002 | + toi_cleanup_usm(); | |
19003 | + | |
19004 | + storage_manager_last_action = storage_manager_action; | |
19005 | +} | |
19006 | + | |
19007 | +/* | |
19008 | + * User interface specific /sys/power/tuxonice entries. | |
19009 | + */ | |
19010 | + | |
19011 | +static struct toi_sysfs_data sysfs_params[] = { | |
19012 | + SYSFS_NONE("simulate_atomic_copy", storage_manager_simulate), | |
19013 | + SYSFS_INT("enabled", SYSFS_RW, &usm_ops.enabled, 0, 1, 0, NULL), | |
19014 | + SYSFS_STRING("program", SYSFS_RW, usm_helper_data.program, 254, 0, | |
19015 | + NULL), | |
19016 | + SYSFS_INT("activate_storage", SYSFS_RW , &storage_manager_action, 0, 1, | |
19017 | + 0, storage_manager_activate) | |
19018 | +}; | |
19019 | + | |
19020 | +static struct toi_module_ops usm_ops = { | |
19021 | + .type = MISC_MODULE, | |
19022 | + .name = "usm", | |
19023 | + .directory = "storage_manager", | |
19024 | + .module = THIS_MODULE, | |
19025 | + .storage_needed = usm_storage_needed, | |
19026 | + .save_config_info = usm_save_config_info, | |
19027 | + .load_config_info = usm_load_config_info, | |
19028 | + .memory_needed = usm_memory_needed, | |
19029 | + | |
19030 | + .sysfs_data = sysfs_params, | |
19031 | + .num_sysfs_entries = sizeof(sysfs_params) / | |
19032 | + sizeof(struct toi_sysfs_data), | |
19033 | +}; | |
19034 | + | |
19035 | +/* toi_usm_sysfs_init | |
19036 | + * Description: Boot time initialisation for user interface. | |
19037 | + */ | |
19038 | +int toi_usm_init(void) | |
19039 | +{ | |
19040 | + usm_helper_data.nl = NULL; | |
19041 | + usm_helper_data.program[0] = '\0'; | |
19042 | + usm_helper_data.pid = -1; | |
19043 | + usm_helper_data.skb_size = 0; | |
19044 | + usm_helper_data.pool_limit = 6; | |
19045 | + usm_helper_data.netlink_id = NETLINK_TOI_USM; | |
19046 | + usm_helper_data.name = "userspace storage manager"; | |
19047 | + usm_helper_data.rcv_msg = usm_user_rcv_msg; | |
19048 | + usm_helper_data.interface_version = 2; | |
19049 | + usm_helper_data.must_init = 0; | |
19050 | + init_completion(&usm_helper_data.wait_for_process); | |
19051 | + | |
19052 | + return toi_register_module(&usm_ops); | |
19053 | +} | |
19054 | + | |
19055 | +void toi_usm_exit(void) | |
19056 | +{ | |
19057 | + toi_netlink_close_complete(&usm_helper_data); | |
19058 | + toi_unregister_module(&usm_ops); | |
19059 | +} | |
19060 | diff --git a/kernel/power/tuxonice_storage.h b/kernel/power/tuxonice_storage.h | |
19061 | new file mode 100644 | |
5dd10c98 | 19062 | index 0000000..8c6b5a7 |
2380c486 JR |
19063 | --- /dev/null |
19064 | +++ b/kernel/power/tuxonice_storage.h | |
19065 | @@ -0,0 +1,45 @@ | |
19066 | +/* | |
19067 | + * kernel/power/tuxonice_storage.h | |
19068 | + * | |
5dd10c98 | 19069 | + * Copyright (C) 2005-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
19070 | + * |
19071 | + * This file is released under the GPLv2. | |
19072 | + */ | |
19073 | + | |
19074 | +#ifdef CONFIG_NET | |
19075 | +int toi_prepare_usm(void); | |
19076 | +void toi_cleanup_usm(void); | |
19077 | + | |
19078 | +int toi_activate_storage(int force); | |
19079 | +int toi_deactivate_storage(int force); | |
19080 | +extern int toi_usm_init(void); | |
19081 | +extern void toi_usm_exit(void); | |
19082 | +#else | |
19083 | +static inline int toi_usm_init(void) { return 0; } | |
19084 | +static inline void toi_usm_exit(void) { } | |
19085 | + | |
19086 | +static inline int toi_activate_storage(int force) | |
19087 | +{ | |
19088 | + return 0; | |
19089 | +} | |
19090 | + | |
19091 | +static inline int toi_deactivate_storage(int force) | |
19092 | +{ | |
19093 | + return 0; | |
19094 | +} | |
19095 | + | |
19096 | +static inline int toi_prepare_usm(void) { return 0; } | |
19097 | +static inline void toi_cleanup_usm(void) { } | |
19098 | +#endif | |
19099 | + | |
19100 | +enum { | |
19101 | + USM_MSG_BASE = 0x10, | |
19102 | + | |
19103 | + /* Kernel -> Userspace */ | |
19104 | + USM_MSG_CONNECT = 0x30, | |
19105 | + USM_MSG_DISCONNECT = 0x31, | |
19106 | + USM_MSG_SUCCESS = 0x40, | |
19107 | + USM_MSG_FAILED = 0x41, | |
19108 | + | |
19109 | + USM_MSG_MAX, | |
19110 | +}; | |
19111 | diff --git a/kernel/power/tuxonice_swap.c b/kernel/power/tuxonice_swap.c | |
19112 | new file mode 100644 | |
cacc47f8 | 19113 | index 0000000..a4dbceb |
2380c486 JR |
19114 | --- /dev/null |
19115 | +++ b/kernel/power/tuxonice_swap.c | |
5dd10c98 | 19116 | @@ -0,0 +1,487 @@ |
2380c486 JR |
19117 | +/* |
19118 | + * kernel/power/tuxonice_swap.c | |
19119 | + * | |
5dd10c98 | 19120 | + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
19121 | + * |
19122 | + * Distributed under GPLv2. | |
19123 | + * | |
19124 | + * This file encapsulates functions for usage of swap space as a | |
19125 | + * backing store. | |
19126 | + */ | |
19127 | + | |
19128 | +#include <linux/suspend.h> | |
2380c486 JR |
19129 | +#include <linux/blkdev.h> |
19130 | +#include <linux/swapops.h> | |
19131 | +#include <linux/swap.h> | |
19132 | +#include <linux/syscalls.h> | |
cacc47f8 | 19133 | +#include <linux/fs_uuid.h> |
2380c486 JR |
19134 | + |
19135 | +#include "tuxonice.h" | |
19136 | +#include "tuxonice_sysfs.h" | |
19137 | +#include "tuxonice_modules.h" | |
19138 | +#include "tuxonice_io.h" | |
19139 | +#include "tuxonice_ui.h" | |
19140 | +#include "tuxonice_extent.h" | |
7e46296a | 19141 | +#include "tuxonice_bio.h" |
2380c486 JR |
19142 | +#include "tuxonice_alloc.h" |
19143 | +#include "tuxonice_builtin.h" | |
19144 | + | |
19145 | +static struct toi_module_ops toi_swapops; | |
19146 | + | |
7e46296a AM |
19147 | +/* For swapfile automatically swapon/off'd. */ |
19148 | +static char swapfilename[255] = ""; | |
19149 | +static int toi_swapon_status; | |
2380c486 | 19150 | + |
7e46296a AM |
19151 | +/* Swap Pages */ |
19152 | +static unsigned long swap_allocated; | |
2380c486 | 19153 | + |
7e46296a | 19154 | +static struct sysinfo swapinfo; |
2380c486 | 19155 | + |
7e46296a AM |
19156 | +/** |
19157 | + * enable_swapfile: Swapon the user specified swapfile prior to hibernating. | |
19158 | + * | |
19159 | + * Activate the given swapfile if it wasn't already enabled. Remember whether | |
19160 | + * we really did swapon it for swapoffing later. | |
2380c486 | 19161 | + */ |
7e46296a | 19162 | +static void enable_swapfile(void) |
2380c486 | 19163 | +{ |
7e46296a | 19164 | + int activateswapresult = -EINVAL; |
2380c486 | 19165 | + |
7e46296a AM |
19166 | + if (swapfilename[0]) { |
19167 | + /* Attempt to swap on with maximum priority */ | |
19168 | + activateswapresult = sys_swapon(swapfilename, 0xFFFF); | |
19169 | + if (activateswapresult && activateswapresult != -EBUSY) | |
19170 | + printk(KERN_ERR "TuxOnIce: The swapfile/partition " | |
19171 | + "specified by /sys/power/tuxonice/swap/swapfile" | |
19172 | + " (%s) could not be turned on (error %d). " | |
19173 | + "Attempting to continue.\n", | |
19174 | + swapfilename, activateswapresult); | |
19175 | + if (!activateswapresult) | |
19176 | + toi_swapon_status = 1; | |
19177 | + } | |
2380c486 JR |
19178 | +} |
19179 | + | |
7e46296a AM |
19180 | +/** |
19181 | + * disable_swapfile: Swapoff any file swaponed at the start of the cycle. | |
19182 | + * | |
19183 | + * If we did successfully swapon a file at the start of the cycle, swapoff | |
19184 | + * it now (finishing up). | |
19185 | + */ | |
19186 | +static void disable_swapfile(void) | |
2380c486 | 19187 | +{ |
7e46296a AM |
19188 | + if (!toi_swapon_status) |
19189 | + return; | |
2380c486 | 19190 | + |
7e46296a AM |
19191 | + sys_swapoff(swapfilename); |
19192 | + toi_swapon_status = 0; | |
2380c486 JR |
19193 | +} |
19194 | + | |
7e46296a AM |
19195 | +static int add_blocks_to_extent_chain(struct toi_bdev_info *chain, |
19196 | + unsigned long start, unsigned long end) | |
2380c486 JR |
19197 | +{ |
19198 | + if (test_action_state(TOI_TEST_BIO)) | |
7e46296a AM |
19199 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Adding extent %lu-%lu to " |
19200 | + "chain %p.", start << chain->bmap_shift, | |
19201 | + end << chain->bmap_shift, chain); | |
2380c486 | 19202 | + |
7e46296a | 19203 | + return toi_add_to_extent_chain(&chain->blocks, start, end); |
2380c486 JR |
19204 | +} |
19205 | + | |
19206 | + | |
7e46296a | 19207 | +static int get_main_pool_phys_params(struct toi_bdev_info *chain) |
2380c486 JR |
19208 | +{ |
19209 | + struct hibernate_extent *extentpointer = NULL; | |
92bca44c | 19210 | + unsigned long address, extent_min = 0, extent_max = 0; |
7e46296a AM |
19211 | + int empty = 1; |
19212 | + | |
19213 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "get main pool phys params for " | |
19214 | + "chain %d.", chain->allocator_index); | |
19215 | + | |
19216 | + if (!chain->allocations.first) | |
19217 | + return 0; | |
2380c486 | 19218 | + |
7e46296a AM |
19219 | + if (chain->blocks.first) |
19220 | + toi_put_extent_chain(&chain->blocks); | |
2380c486 | 19221 | + |
7e46296a | 19222 | + toi_extent_for_each(&chain->allocations, extentpointer, address) { |
2380c486 | 19223 | + swp_entry_t swap_address = (swp_entry_t) { address }; |
5dd10c98 AM |
19224 | + struct block_device *bdev; |
19225 | + sector_t new_sector = map_swap_entry(swap_address, &bdev); | |
2380c486 | 19226 | + |
7e46296a AM |
19227 | + if (empty) { |
19228 | + empty = 0; | |
19229 | + extent_min = extent_max = new_sector; | |
e999739a | 19230 | + continue; |
7e46296a | 19231 | + } |
e999739a | 19232 | + |
7e46296a | 19233 | + if (new_sector == extent_max + 1) { |
2380c486 JR |
19234 | + extent_max++; |
19235 | + continue; | |
19236 | + } | |
19237 | + | |
7e46296a | 19238 | + if (add_blocks_to_extent_chain(chain, extent_min, extent_max)) { |
9474138d AM |
19239 | + printk(KERN_ERR "Out of memory while making block " |
19240 | + "chains.\n"); | |
2380c486 | 19241 | + return -ENOMEM; |
0ada99ac | 19242 | + } |
2380c486 JR |
19243 | + |
19244 | + extent_min = new_sector; | |
19245 | + extent_max = new_sector; | |
2380c486 JR |
19246 | + } |
19247 | + | |
7e46296a AM |
19248 | + if (!empty && |
19249 | + add_blocks_to_extent_chain(chain, extent_min, extent_max)) { | |
9474138d AM |
19250 | + printk(KERN_ERR "Out of memory while making block chains.\n"); |
19251 | + return -ENOMEM; | |
0ada99ac | 19252 | + } |
2380c486 | 19253 | + |
7e46296a | 19254 | + return 0; |
2380c486 JR |
19255 | +} |
19256 | + | |
19257 | +/* | |
e999739a | 19258 | + * Like si_swapinfo, except that we don't include ram backed swap (compcache!) |
19259 | + * and don't need to use the spinlocks (userspace is stopped when this | |
19260 | + * function is called). | |
19261 | + */ | |
5dd10c98 | 19262 | +void si_swapinfo_no_compcache(void) |
e999739a | 19263 | +{ |
19264 | + unsigned int i; | |
19265 | + | |
19266 | + si_swapinfo(&swapinfo); | |
5dd10c98 AM |
19267 | + swapinfo.freeswap = 0; |
19268 | + swapinfo.totalswap = 0; | |
e999739a | 19269 | + |
19270 | + for (i = 0; i < MAX_SWAPFILES; i++) { | |
19271 | + struct swap_info_struct *si = get_swap_info_struct(i); | |
5dd10c98 | 19272 | + if (si && (si->flags & SWP_WRITEOK) && |
e999739a | 19273 | + (strncmp(si->bdev->bd_disk->disk_name, "ram", 3))) { |
5dd10c98 AM |
19274 | + swapinfo.totalswap += si->inuse_pages; |
19275 | + swapinfo.freeswap += si->pages - si->inuse_pages; | |
e999739a | 19276 | + } |
19277 | + } | |
19278 | +} | |
19279 | +/* | |
2380c486 JR |
19280 | + * We can't just remember the value from allocation time, because other |
19281 | + * processes might have allocated swap in the mean time. | |
19282 | + */ | |
92bca44c | 19283 | +static unsigned long toi_swap_storage_available(void) |
2380c486 | 19284 | +{ |
7e46296a | 19285 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "In toi_swap_storage_available."); |
5dd10c98 | 19286 | + si_swapinfo_no_compcache(); |
7e46296a | 19287 | + return swapinfo.freeswap + swap_allocated; |
2380c486 JR |
19288 | +} |
19289 | + | |
19290 | +static int toi_swap_initialise(int starting_cycle) | |
19291 | +{ | |
19292 | + if (!starting_cycle) | |
19293 | + return 0; | |
19294 | + | |
19295 | + enable_swapfile(); | |
7e46296a | 19296 | + return 0; |
2380c486 JR |
19297 | +} |
19298 | + | |
19299 | +static void toi_swap_cleanup(int ending_cycle) | |
19300 | +{ | |
19301 | + if (ending_cycle) | |
19302 | + disable_swapfile(); | |
7e46296a | 19303 | +} |
2380c486 | 19304 | + |
7e46296a AM |
19305 | +static void toi_swap_free_storage(struct toi_bdev_info *chain) |
19306 | +{ | |
19307 | + /* Free swap entries */ | |
19308 | + struct hibernate_extent *extentpointer; | |
19309 | + unsigned long extentvalue; | |
2380c486 | 19310 | + |
7e46296a AM |
19311 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Freeing storage for chain %p.", |
19312 | + chain); | |
2380c486 | 19313 | + |
7e46296a AM |
19314 | + swap_allocated -= chain->allocations.size; |
19315 | + toi_extent_for_each(&chain->allocations, extentpointer, extentvalue) | |
19316 | + swap_free((swp_entry_t) { extentvalue }); | |
2380c486 | 19317 | + |
7e46296a AM |
19318 | + toi_put_extent_chain(&chain->allocations); |
19319 | +} | |
2380c486 | 19320 | + |
7e46296a AM |
19321 | +static void free_swap_range(unsigned long min, unsigned long max) |
19322 | +{ | |
19323 | + int j; | |
2380c486 | 19324 | + |
7e46296a AM |
19325 | + for (j = min; j <= max; j++) |
19326 | + swap_free((swp_entry_t) { j }); | |
19327 | + swap_allocated -= (max - min + 1); | |
19328 | +} | |
2380c486 | 19329 | + |
7e46296a AM |
19330 | +/* |
19331 | + * Allocation of a single swap type. Swap priorities are handled at the higher | |
19332 | + * level. | |
19333 | + */ | |
19334 | +static int toi_swap_allocate_storage(struct toi_bdev_info *chain, | |
19335 | + unsigned long request) | |
19336 | +{ | |
19337 | + int to_add = 0; | |
19338 | + unsigned long gotten = 0; | |
19339 | + unsigned long extent_min = 0, extent_max = 0; | |
2380c486 | 19340 | + |
7e46296a AM |
19341 | + toi_message(TOI_IO, TOI_VERBOSE, 0, " Swap allocate storage: Asked to" |
19342 | + " allocate %lu pages from device %d.", request, | |
19343 | + chain->allocator_index); | |
2380c486 | 19344 | + |
7e46296a AM |
19345 | + while (gotten < request) { |
19346 | + swp_entry_t entry; | |
19347 | + unsigned long new_value; | |
2380c486 | 19348 | + |
7e46296a AM |
19349 | + entry = get_swap_page_of_type(chain->allocator_index); |
19350 | + if (!entry.val) | |
19351 | + break; | |
2380c486 | 19352 | + |
7e46296a AM |
19353 | + swap_allocated++; |
19354 | + new_value = entry.val; | |
19355 | + gotten++; | |
2380c486 | 19356 | + |
7e46296a AM |
19357 | + if (!to_add) { |
19358 | + to_add = 1; | |
19359 | + extent_min = new_value; | |
19360 | + extent_max = new_value; | |
2380c486 JR |
19361 | + continue; |
19362 | + } | |
19363 | + | |
7e46296a AM |
19364 | + if (new_value == extent_max + 1) { |
19365 | + extent_max++; | |
2380c486 JR |
19366 | + continue; |
19367 | + } | |
19368 | + | |
7e46296a AM |
19369 | + if (toi_add_to_extent_chain(&chain->allocations, extent_min, |
19370 | + extent_max)) { | |
19371 | + printk(KERN_INFO "Failed to allocate extent for " | |
19372 | + "%lu-%lu.\n", extent_min, extent_max); | |
19373 | + free_swap_range(extent_min, extent_max); | |
19374 | + swap_free(entry); | |
19375 | + gotten -= (extent_max - extent_min); | |
19376 | + /* Don't try to add again below */ | |
19377 | + to_add = 0; | |
19378 | + break; | |
19379 | + } | |
19380 | + | |
19381 | + extent_min = new_value; | |
19382 | + extent_max = new_value; | |
2380c486 JR |
19383 | + } |
19384 | + | |
7e46296a AM |
19385 | + if (to_add) { |
19386 | + int this_result = toi_add_to_extent_chain(&chain->allocations, | |
19387 | + extent_min, extent_max); | |
2380c486 | 19388 | + |
7e46296a AM |
19389 | + if (this_result) { |
19390 | + free_swap_range(extent_min, extent_max); | |
19391 | + gotten -= (extent_max - extent_min + 1); | |
19392 | + } | |
19393 | + } | |
2380c486 | 19394 | + |
7e46296a AM |
19395 | + toi_message(TOI_IO, TOI_VERBOSE, 0, " Allocated %lu pages.", gotten); |
19396 | + return gotten; | |
2380c486 JR |
19397 | +} |
19398 | + | |
7e46296a | 19399 | +static int toi_swap_register_storage(void) |
2380c486 | 19400 | +{ |
5dd10c98 | 19401 | + int i, result = 0; |
7e46296a AM |
19402 | + |
19403 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_swap_register_storage."); | |
19404 | + for (i = 0; i < MAX_SWAPFILES; i++) { | |
19405 | + struct swap_info_struct *si = get_swap_info_struct(i); | |
19406 | + struct toi_bdev_info *devinfo; | |
5dd10c98 AM |
19407 | + unsigned char *p; |
19408 | + unsigned char buf[256]; | |
19409 | + struct fs_info *fs_info; | |
19410 | + | |
19411 | + if (!si || !(si->flags & SWP_WRITEOK) || | |
7e46296a AM |
19412 | + !strncmp(si->bdev->bd_disk->disk_name, "ram", 3)) |
19413 | + continue; | |
19414 | + | |
19415 | + devinfo = toi_kzalloc(39, sizeof(struct toi_bdev_info), | |
19416 | + GFP_ATOMIC); | |
19417 | + if (!devinfo) { | |
19418 | + printk("Failed to allocate devinfo struct for swap " | |
19419 | + "device %d.\n", i); | |
19420 | + return -ENOMEM; | |
19421 | + } | |
19422 | + | |
19423 | + devinfo->bdev = si->bdev; | |
19424 | + devinfo->allocator = &toi_swapops; | |
19425 | + devinfo->allocator_index = i; | |
19426 | + | |
5dd10c98 AM |
19427 | + fs_info = fs_info_from_block_dev(si->bdev); |
19428 | + if (fs_info && !IS_ERR(fs_info)) { | |
19429 | + memcpy(devinfo->uuid, &fs_info->uuid, 16); | |
19430 | + free_fs_info(fs_info); | |
19431 | + } else | |
19432 | + result = (int) PTR_ERR(fs_info); | |
19433 | + | |
19434 | + if (!fs_info) | |
19435 | + printk("fs_info from block dev returned %d.\n", result); | |
7e46296a AM |
19436 | + devinfo->dev_t = si->bdev->bd_dev; |
19437 | + devinfo->prio = si->prio; | |
19438 | + devinfo->bmap_shift = 3; | |
19439 | + devinfo->blocks_per_page = 1; | |
19440 | + | |
5dd10c98 AM |
19441 | + p = d_path(&si->swap_file->f_path, buf, sizeof(buf)); |
19442 | + sprintf(devinfo->name, "swap on %s", p); | |
19443 | + | |
7e46296a AM |
19444 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Registering swap storage:" |
19445 | + " Device %d (%lx), prio %d.", i, | |
19446 | + (unsigned long) devinfo->dev_t, devinfo->prio); | |
19447 | + toi_bio_ops.register_storage(devinfo); | |
19448 | + } | |
19449 | + | |
2380c486 JR |
19450 | + return 0; |
19451 | +} | |
19452 | + | |
19453 | +/* | |
19454 | + * workspace_size | |
19455 | + * | |
19456 | + * Description: | |
19457 | + * Returns the number of bytes of RAM needed for this | |
19458 | + * code to do its work. (Used when calculating whether | |
19459 | + * we have enough memory to be able to hibernate & resume). | |
19460 | + * | |
19461 | + */ | |
19462 | +static int toi_swap_memory_needed(void) | |
19463 | +{ | |
19464 | + return 1; | |
19465 | +} | |
19466 | + | |
19467 | +/* | |
19468 | + * Print debug info | |
19469 | + * | |
19470 | + * Description: | |
19471 | + */ | |
19472 | +static int toi_swap_print_debug_stats(char *buffer, int size) | |
19473 | +{ | |
19474 | + int len = 0; | |
2380c486 | 19475 | + |
7e46296a | 19476 | + len = scnprintf(buffer, size, "- Swap Allocator enabled.\n"); |
2380c486 JR |
19477 | + if (swapfilename[0]) |
19478 | + len += scnprintf(buffer+len, size-len, | |
19479 | + " Attempting to automatically swapon: %s.\n", | |
19480 | + swapfilename); | |
19481 | + | |
5dd10c98 | 19482 | + si_swapinfo_no_compcache(); |
2380c486 JR |
19483 | + |
19484 | + len += scnprintf(buffer+len, size-len, | |
92bca44c | 19485 | + " Swap available for image: %lu pages.\n", |
5dd10c98 | 19486 | + swapinfo.freeswap + swap_allocated); |
2380c486 JR |
19487 | + |
19488 | + return len; | |
19489 | +} | |
19490 | + | |
2380c486 JR |
19491 | +static int header_locations_read_sysfs(const char *page, int count) |
19492 | +{ | |
19493 | + int i, printedpartitionsmessage = 0, len = 0, haveswap = 0; | |
19494 | + struct inode *swapf = NULL; | |
19495 | + int zone; | |
19496 | + char *path_page = (char *) toi_get_free_page(10, GFP_KERNEL); | |
19497 | + char *path, *output = (char *) page; | |
19498 | + int path_len; | |
19499 | + | |
19500 | + if (!page) | |
19501 | + return 0; | |
19502 | + | |
19503 | + for (i = 0; i < MAX_SWAPFILES; i++) { | |
19504 | + struct swap_info_struct *si = get_swap_info_struct(i); | |
19505 | + | |
5dd10c98 | 19506 | + if (!si || !(si->flags & SWP_WRITEOK)) |
2380c486 JR |
19507 | + continue; |
19508 | + | |
19509 | + if (S_ISBLK(si->swap_file->f_mapping->host->i_mode)) { | |
19510 | + haveswap = 1; | |
19511 | + if (!printedpartitionsmessage) { | |
19512 | + len += sprintf(output + len, | |
19513 | + "For swap partitions, simply use the " | |
19514 | + "format: resume=swap:/dev/hda1.\n"); | |
19515 | + printedpartitionsmessage = 1; | |
19516 | + } | |
19517 | + } else { | |
19518 | + path_len = 0; | |
19519 | + | |
19520 | + path = d_path(&si->swap_file->f_path, path_page, | |
19521 | + PAGE_SIZE); | |
5dd10c98 | 19522 | + path_len = snprintf(path_page, PAGE_SIZE, "%s", path); |
2380c486 JR |
19523 | + |
19524 | + haveswap = 1; | |
19525 | + swapf = si->swap_file->f_mapping->host; | |
19526 | + zone = bmap(swapf, 0); | |
19527 | + if (!zone) { | |
19528 | + len += sprintf(output + len, | |
19529 | + "Swapfile %s has been corrupted. Reuse" | |
19530 | + " mkswap on it and try again.\n", | |
19531 | + path_page); | |
19532 | + } else { | |
5dd10c98 | 19533 | + char name_buffer[BDEVNAME_SIZE]; |
2380c486 JR |
19534 | + len += sprintf(output + len, |
19535 | + "For swapfile `%s`," | |
19536 | + " use resume=swap:/dev/%s:0x%x.\n", | |
19537 | + path_page, | |
19538 | + bdevname(si->bdev, name_buffer), | |
19539 | + zone << (swapf->i_blkbits - 9)); | |
19540 | + } | |
19541 | + } | |
19542 | + } | |
19543 | + | |
19544 | + if (!haveswap) | |
19545 | + len = sprintf(output, "You need to turn on swap partitions " | |
19546 | + "before examining this file.\n"); | |
19547 | + | |
19548 | + toi_free_page(10, (unsigned long) path_page); | |
19549 | + return len; | |
19550 | +} | |
19551 | + | |
19552 | +static struct toi_sysfs_data sysfs_params[] = { | |
19553 | + SYSFS_STRING("swapfilename", SYSFS_RW, swapfilename, 255, 0, NULL), | |
19554 | + SYSFS_CUSTOM("headerlocations", SYSFS_READONLY, | |
19555 | + header_locations_read_sysfs, NULL, 0, NULL), | |
19556 | + SYSFS_INT("enabled", SYSFS_RW, &toi_swapops.enabled, 0, 1, 0, | |
19557 | + attempt_to_parse_resume_device2), | |
19558 | +}; | |
19559 | + | |
7e46296a AM |
19560 | +static struct toi_bio_allocator_ops toi_bio_swapops = { |
19561 | + .register_storage = toi_swap_register_storage, | |
19562 | + .storage_available = toi_swap_storage_available, | |
19563 | + .allocate_storage = toi_swap_allocate_storage, | |
19564 | + .bmap = get_main_pool_phys_params, | |
19565 | + .free_storage = toi_swap_free_storage, | |
19566 | +}; | |
19567 | + | |
2380c486 | 19568 | +static struct toi_module_ops toi_swapops = { |
7e46296a | 19569 | + .type = BIO_ALLOCATOR_MODULE, |
2380c486 JR |
19570 | + .name = "swap storage", |
19571 | + .directory = "swap", | |
19572 | + .module = THIS_MODULE, | |
19573 | + .memory_needed = toi_swap_memory_needed, | |
19574 | + .print_debug_info = toi_swap_print_debug_stats, | |
2380c486 JR |
19575 | + .initialise = toi_swap_initialise, |
19576 | + .cleanup = toi_swap_cleanup, | |
7e46296a | 19577 | + .bio_allocator_ops = &toi_bio_swapops, |
2380c486 JR |
19578 | + |
19579 | + .sysfs_data = sysfs_params, | |
19580 | + .num_sysfs_entries = sizeof(sysfs_params) / | |
19581 | + sizeof(struct toi_sysfs_data), | |
19582 | +}; | |
19583 | + | |
19584 | +/* ---- Registration ---- */ | |
19585 | +static __init int toi_swap_load(void) | |
19586 | +{ | |
2380c486 JR |
19587 | + return toi_register_module(&toi_swapops); |
19588 | +} | |
19589 | + | |
19590 | +#ifdef MODULE | |
19591 | +static __exit void toi_swap_unload(void) | |
19592 | +{ | |
19593 | + toi_unregister_module(&toi_swapops); | |
19594 | +} | |
19595 | + | |
19596 | +module_init(toi_swap_load); | |
19597 | +module_exit(toi_swap_unload); | |
19598 | +MODULE_LICENSE("GPL"); | |
19599 | +MODULE_AUTHOR("Nigel Cunningham"); | |
19600 | +MODULE_DESCRIPTION("TuxOnIce SwapAllocator"); | |
19601 | +#else | |
19602 | +late_initcall(toi_swap_load); | |
19603 | +#endif | |
19604 | diff --git a/kernel/power/tuxonice_sysfs.c b/kernel/power/tuxonice_sysfs.c | |
19605 | new file mode 100644 | |
5dd10c98 | 19606 | index 0000000..0088409 |
2380c486 JR |
19607 | --- /dev/null |
19608 | +++ b/kernel/power/tuxonice_sysfs.c | |
9474138d | 19609 | @@ -0,0 +1,335 @@ |
2380c486 JR |
19610 | +/* |
19611 | + * kernel/power/tuxonice_sysfs.c | |
19612 | + * | |
5dd10c98 | 19613 | + * Copyright (C) 2002-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
19614 | + * |
19615 | + * This file is released under the GPLv2. | |
19616 | + * | |
19617 | + * This file contains support for sysfs entries for tuning TuxOnIce. | |
19618 | + * | |
19619 | + * We have a generic handler that deals with the most common cases, and | |
19620 | + * hooks for special handlers to use. | |
19621 | + */ | |
19622 | + | |
19623 | +#include <linux/suspend.h> | |
2380c486 JR |
19624 | + |
19625 | +#include "tuxonice_sysfs.h" | |
19626 | +#include "tuxonice.h" | |
19627 | +#include "tuxonice_storage.h" | |
19628 | +#include "tuxonice_alloc.h" | |
19629 | + | |
19630 | +static int toi_sysfs_initialised; | |
19631 | + | |
19632 | +static void toi_initialise_sysfs(void); | |
19633 | + | |
19634 | +static struct toi_sysfs_data sysfs_params[]; | |
19635 | + | |
19636 | +#define to_sysfs_data(_attr) container_of(_attr, struct toi_sysfs_data, attr) | |
19637 | + | |
19638 | +static void toi_main_wrapper(void) | |
19639 | +{ | |
9474138d | 19640 | + toi_try_hibernate(); |
2380c486 JR |
19641 | +} |
19642 | + | |
19643 | +static ssize_t toi_attr_show(struct kobject *kobj, struct attribute *attr, | |
19644 | + char *page) | |
19645 | +{ | |
19646 | + struct toi_sysfs_data *sysfs_data = to_sysfs_data(attr); | |
19647 | + int len = 0; | |
19648 | + int full_prep = sysfs_data->flags & SYSFS_NEEDS_SM_FOR_READ; | |
19649 | + | |
19650 | + if (full_prep && toi_start_anything(0)) | |
19651 | + return -EBUSY; | |
19652 | + | |
19653 | + if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_READ) | |
19654 | + toi_prepare_usm(); | |
19655 | + | |
19656 | + switch (sysfs_data->type) { | |
19657 | + case TOI_SYSFS_DATA_CUSTOM: | |
19658 | + len = (sysfs_data->data.special.read_sysfs) ? | |
19659 | + (sysfs_data->data.special.read_sysfs)(page, PAGE_SIZE) | |
19660 | + : 0; | |
19661 | + break; | |
19662 | + case TOI_SYSFS_DATA_BIT: | |
19663 | + len = sprintf(page, "%d\n", | |
19664 | + -test_bit(sysfs_data->data.bit.bit, | |
19665 | + sysfs_data->data.bit.bit_vector)); | |
19666 | + break; | |
19667 | + case TOI_SYSFS_DATA_INTEGER: | |
19668 | + len = sprintf(page, "%d\n", | |
19669 | + *(sysfs_data->data.integer.variable)); | |
19670 | + break; | |
19671 | + case TOI_SYSFS_DATA_LONG: | |
19672 | + len = sprintf(page, "%ld\n", | |
19673 | + *(sysfs_data->data.a_long.variable)); | |
19674 | + break; | |
19675 | + case TOI_SYSFS_DATA_UL: | |
19676 | + len = sprintf(page, "%lu\n", | |
19677 | + *(sysfs_data->data.ul.variable)); | |
19678 | + break; | |
19679 | + case TOI_SYSFS_DATA_STRING: | |
19680 | + len = sprintf(page, "%s\n", | |
19681 | + sysfs_data->data.string.variable); | |
19682 | + break; | |
19683 | + } | |
19684 | + | |
19685 | + if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_READ) | |
19686 | + toi_cleanup_usm(); | |
19687 | + | |
19688 | + if (full_prep) | |
19689 | + toi_finish_anything(0); | |
19690 | + | |
19691 | + return len; | |
19692 | +} | |
19693 | + | |
19694 | +#define BOUND(_variable, _type) do { \ | |
19695 | + if (*_variable < sysfs_data->data._type.minimum) \ | |
19696 | + *_variable = sysfs_data->data._type.minimum; \ | |
19697 | + else if (*_variable > sysfs_data->data._type.maximum) \ | |
19698 | + *_variable = sysfs_data->data._type.maximum; \ | |
19699 | +} while (0) | |
19700 | + | |
19701 | +static ssize_t toi_attr_store(struct kobject *kobj, struct attribute *attr, | |
19702 | + const char *my_buf, size_t count) | |
19703 | +{ | |
19704 | + int assigned_temp_buffer = 0, result = count; | |
19705 | + struct toi_sysfs_data *sysfs_data = to_sysfs_data(attr); | |
19706 | + | |
19707 | + if (toi_start_anything((sysfs_data->flags & SYSFS_HIBERNATE_OR_RESUME))) | |
19708 | + return -EBUSY; | |
19709 | + | |
19710 | + ((char *) my_buf)[count] = 0; | |
19711 | + | |
19712 | + if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_WRITE) | |
19713 | + toi_prepare_usm(); | |
19714 | + | |
19715 | + switch (sysfs_data->type) { | |
19716 | + case TOI_SYSFS_DATA_CUSTOM: | |
19717 | + if (sysfs_data->data.special.write_sysfs) | |
19718 | + result = (sysfs_data->data.special.write_sysfs)(my_buf, | |
19719 | + count); | |
19720 | + break; | |
19721 | + case TOI_SYSFS_DATA_BIT: | |
19722 | + { | |
9474138d AM |
19723 | + unsigned long value; |
19724 | + result = strict_strtoul(my_buf, 0, &value); | |
19725 | + if (result) | |
19726 | + break; | |
2380c486 JR |
19727 | + if (value) |
19728 | + set_bit(sysfs_data->data.bit.bit, | |
19729 | + (sysfs_data->data.bit.bit_vector)); | |
19730 | + else | |
19731 | + clear_bit(sysfs_data->data.bit.bit, | |
19732 | + (sysfs_data->data.bit.bit_vector)); | |
19733 | + } | |
19734 | + break; | |
19735 | + case TOI_SYSFS_DATA_INTEGER: | |
19736 | + { | |
9474138d AM |
19737 | + long temp; |
19738 | + result = strict_strtol(my_buf, 0, &temp); | |
19739 | + if (result) | |
19740 | + break; | |
19741 | + *(sysfs_data->data.integer.variable) = (int) temp; | |
19742 | + BOUND(sysfs_data->data.integer.variable, integer); | |
2380c486 JR |
19743 | + break; |
19744 | + } | |
19745 | + case TOI_SYSFS_DATA_LONG: | |
19746 | + { | |
19747 | + long *variable = | |
19748 | + sysfs_data->data.a_long.variable; | |
9474138d AM |
19749 | + result = strict_strtol(my_buf, 0, variable); |
19750 | + if (result) | |
19751 | + break; | |
2380c486 JR |
19752 | + BOUND(variable, a_long); |
19753 | + break; | |
19754 | + } | |
19755 | + case TOI_SYSFS_DATA_UL: | |
19756 | + { | |
19757 | + unsigned long *variable = | |
19758 | + sysfs_data->data.ul.variable; | |
9474138d AM |
19759 | + result = strict_strtoul(my_buf, 0, variable); |
19760 | + if (result) | |
19761 | + break; | |
2380c486 JR |
19762 | + BOUND(variable, ul); |
19763 | + break; | |
19764 | + } | |
19765 | + break; | |
19766 | + case TOI_SYSFS_DATA_STRING: | |
19767 | + { | |
19768 | + int copy_len = count; | |
19769 | + char *variable = | |
19770 | + sysfs_data->data.string.variable; | |
19771 | + | |
19772 | + if (sysfs_data->data.string.max_length && | |
19773 | + (copy_len > sysfs_data->data.string.max_length)) | |
19774 | + copy_len = sysfs_data->data.string.max_length; | |
19775 | + | |
19776 | + if (!variable) { | |
19777 | + variable = (char *) toi_get_zeroed_page(31, | |
19778 | + TOI_ATOMIC_GFP); | |
19779 | + sysfs_data->data.string.variable = variable; | |
19780 | + assigned_temp_buffer = 1; | |
19781 | + } | |
19782 | + strncpy(variable, my_buf, copy_len); | |
19783 | + if (copy_len && my_buf[copy_len - 1] == '\n') | |
19784 | + variable[count - 1] = 0; | |
19785 | + variable[count] = 0; | |
19786 | + } | |
19787 | + break; | |
19788 | + } | |
19789 | + | |
9474138d AM |
19790 | + if (!result) |
19791 | + result = count; | |
19792 | + | |
2380c486 | 19793 | + /* Side effect routine? */ |
9474138d | 19794 | + if (result == count && sysfs_data->write_side_effect) |
2380c486 JR |
19795 | + sysfs_data->write_side_effect(); |
19796 | + | |
19797 | + /* Free temporary buffers */ | |
19798 | + if (assigned_temp_buffer) { | |
19799 | + toi_free_page(31, | |
19800 | + (unsigned long) sysfs_data->data.string.variable); | |
19801 | + sysfs_data->data.string.variable = NULL; | |
19802 | + } | |
19803 | + | |
19804 | + if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_WRITE) | |
19805 | + toi_cleanup_usm(); | |
19806 | + | |
19807 | + toi_finish_anything(sysfs_data->flags & SYSFS_HIBERNATE_OR_RESUME); | |
19808 | + | |
19809 | + return result; | |
19810 | +} | |
19811 | + | |
19812 | +static struct sysfs_ops toi_sysfs_ops = { | |
19813 | + .show = &toi_attr_show, | |
19814 | + .store = &toi_attr_store, | |
19815 | +}; | |
19816 | + | |
19817 | +static struct kobj_type toi_ktype = { | |
19818 | + .sysfs_ops = &toi_sysfs_ops, | |
19819 | +}; | |
19820 | + | |
19821 | +struct kobject *tuxonice_kobj; | |
19822 | + | |
19823 | +/* Non-module sysfs entries. | |
19824 | + * | |
19825 | + * This array contains entries that are automatically registered at | |
19826 | + * boot. Modules and the console code register their own entries separately. | |
19827 | + */ | |
19828 | + | |
19829 | +static struct toi_sysfs_data sysfs_params[] = { | |
19830 | + SYSFS_CUSTOM("do_hibernate", SYSFS_WRITEONLY, NULL, NULL, | |
19831 | + SYSFS_HIBERNATING, toi_main_wrapper), | |
19832 | + SYSFS_CUSTOM("do_resume", SYSFS_WRITEONLY, NULL, NULL, | |
9474138d | 19833 | + SYSFS_RESUMING, toi_try_resume) |
2380c486 JR |
19834 | +}; |
19835 | + | |
19836 | +void remove_toi_sysdir(struct kobject *kobj) | |
19837 | +{ | |
19838 | + if (!kobj) | |
19839 | + return; | |
19840 | + | |
19841 | + kobject_put(kobj); | |
19842 | +} | |
19843 | + | |
19844 | +struct kobject *make_toi_sysdir(char *name) | |
19845 | +{ | |
19846 | + struct kobject *kobj = kobject_create_and_add(name, tuxonice_kobj); | |
19847 | + | |
19848 | + if (!kobj) { | |
19849 | + printk(KERN_INFO "TuxOnIce: Can't allocate kobject for sysfs " | |
19850 | + "dir!\n"); | |
19851 | + return NULL; | |
19852 | + } | |
19853 | + | |
19854 | + kobj->ktype = &toi_ktype; | |
19855 | + | |
19856 | + return kobj; | |
19857 | +} | |
19858 | + | |
19859 | +/* toi_register_sysfs_file | |
19860 | + * | |
19861 | + * Helper for registering a new /sysfs/tuxonice entry. | |
19862 | + */ | |
19863 | + | |
19864 | +int toi_register_sysfs_file( | |
19865 | + struct kobject *kobj, | |
19866 | + struct toi_sysfs_data *toi_sysfs_data) | |
19867 | +{ | |
19868 | + int result; | |
19869 | + | |
19870 | + if (!toi_sysfs_initialised) | |
19871 | + toi_initialise_sysfs(); | |
19872 | + | |
19873 | + result = sysfs_create_file(kobj, &toi_sysfs_data->attr); | |
19874 | + if (result) | |
19875 | + printk(KERN_INFO "TuxOnIce: sysfs_create_file for %s " | |
19876 | + "returned %d.\n", | |
19877 | + toi_sysfs_data->attr.name, result); | |
19878 | + kobj->ktype = &toi_ktype; | |
19879 | + | |
19880 | + return result; | |
19881 | +} | |
19882 | +EXPORT_SYMBOL_GPL(toi_register_sysfs_file); | |
19883 | + | |
19884 | +/* toi_unregister_sysfs_file | |
19885 | + * | |
19886 | + * Helper for removing unwanted /sys/power/tuxonice entries. | |
19887 | + * | |
19888 | + */ | |
19889 | +void toi_unregister_sysfs_file(struct kobject *kobj, | |
19890 | + struct toi_sysfs_data *toi_sysfs_data) | |
19891 | +{ | |
19892 | + sysfs_remove_file(kobj, &toi_sysfs_data->attr); | |
19893 | +} | |
19894 | +EXPORT_SYMBOL_GPL(toi_unregister_sysfs_file); | |
19895 | + | |
19896 | +void toi_cleanup_sysfs(void) | |
19897 | +{ | |
19898 | + int i, | |
19899 | + numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data); | |
19900 | + | |
19901 | + if (!toi_sysfs_initialised) | |
19902 | + return; | |
19903 | + | |
19904 | + for (i = 0; i < numfiles; i++) | |
19905 | + toi_unregister_sysfs_file(tuxonice_kobj, &sysfs_params[i]); | |
19906 | + | |
19907 | + kobject_put(tuxonice_kobj); | |
19908 | + toi_sysfs_initialised = 0; | |
19909 | +} | |
19910 | + | |
19911 | +/* toi_initialise_sysfs | |
19912 | + * | |
19913 | + * Initialise the /sysfs/tuxonice directory. | |
19914 | + */ | |
19915 | + | |
19916 | +static void toi_initialise_sysfs(void) | |
19917 | +{ | |
19918 | + int i; | |
19919 | + int numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data); | |
19920 | + | |
19921 | + if (toi_sysfs_initialised) | |
19922 | + return; | |
19923 | + | |
19924 | + /* Make our TuxOnIce directory a child of /sys/power */ | |
19925 | + tuxonice_kobj = kobject_create_and_add("tuxonice", power_kobj); | |
19926 | + if (!tuxonice_kobj) | |
19927 | + return; | |
19928 | + | |
19929 | + toi_sysfs_initialised = 1; | |
19930 | + | |
19931 | + for (i = 0; i < numfiles; i++) | |
19932 | + toi_register_sysfs_file(tuxonice_kobj, &sysfs_params[i]); | |
19933 | +} | |
19934 | + | |
19935 | +int toi_sysfs_init(void) | |
19936 | +{ | |
19937 | + toi_initialise_sysfs(); | |
19938 | + return 0; | |
19939 | +} | |
19940 | + | |
19941 | +void toi_sysfs_exit(void) | |
19942 | +{ | |
19943 | + toi_cleanup_sysfs(); | |
19944 | +} | |
19945 | diff --git a/kernel/power/tuxonice_sysfs.h b/kernel/power/tuxonice_sysfs.h | |
19946 | new file mode 100644 | |
5dd10c98 | 19947 | index 0000000..4185c6d |
2380c486 JR |
19948 | --- /dev/null |
19949 | +++ b/kernel/power/tuxonice_sysfs.h | |
9474138d | 19950 | @@ -0,0 +1,137 @@ |
2380c486 JR |
19951 | +/* |
19952 | + * kernel/power/tuxonice_sysfs.h | |
19953 | + * | |
5dd10c98 | 19954 | + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
19955 | + * |
19956 | + * This file is released under the GPLv2. | |
19957 | + */ | |
19958 | + | |
19959 | +#include <linux/sysfs.h> | |
2380c486 JR |
19960 | + |
19961 | +struct toi_sysfs_data { | |
19962 | + struct attribute attr; | |
19963 | + int type; | |
19964 | + int flags; | |
19965 | + union { | |
19966 | + struct { | |
19967 | + unsigned long *bit_vector; | |
19968 | + int bit; | |
19969 | + } bit; | |
19970 | + struct { | |
19971 | + int *variable; | |
19972 | + int minimum; | |
19973 | + int maximum; | |
19974 | + } integer; | |
19975 | + struct { | |
19976 | + long *variable; | |
19977 | + long minimum; | |
19978 | + long maximum; | |
19979 | + } a_long; | |
19980 | + struct { | |
19981 | + unsigned long *variable; | |
19982 | + unsigned long minimum; | |
19983 | + unsigned long maximum; | |
19984 | + } ul; | |
19985 | + struct { | |
19986 | + char *variable; | |
19987 | + int max_length; | |
19988 | + } string; | |
19989 | + struct { | |
19990 | + int (*read_sysfs) (const char *buffer, int count); | |
19991 | + int (*write_sysfs) (const char *buffer, int count); | |
19992 | + void *data; | |
19993 | + } special; | |
19994 | + } data; | |
19995 | + | |
19996 | + /* Side effects routine. Used, eg, for reparsing the | |
19997 | + * resume= entry when it changes */ | |
19998 | + void (*write_side_effect) (void); | |
19999 | + struct list_head sysfs_data_list; | |
20000 | +}; | |
20001 | + | |
20002 | +enum { | |
20003 | + TOI_SYSFS_DATA_NONE = 1, | |
20004 | + TOI_SYSFS_DATA_CUSTOM, | |
20005 | + TOI_SYSFS_DATA_BIT, | |
20006 | + TOI_SYSFS_DATA_INTEGER, | |
20007 | + TOI_SYSFS_DATA_UL, | |
20008 | + TOI_SYSFS_DATA_LONG, | |
20009 | + TOI_SYSFS_DATA_STRING | |
20010 | +}; | |
20011 | + | |
20012 | +#define SYSFS_WRITEONLY 0200 | |
20013 | +#define SYSFS_READONLY 0444 | |
20014 | +#define SYSFS_RW 0644 | |
20015 | + | |
20016 | +#define SYSFS_BIT(_name, _mode, _ul, _bit, _flags) { \ | |
20017 | + .attr = {.name = _name , .mode = _mode }, \ | |
20018 | + .type = TOI_SYSFS_DATA_BIT, \ | |
20019 | + .flags = _flags, \ | |
20020 | + .data = { .bit = { .bit_vector = _ul, .bit = _bit } } } | |
20021 | + | |
20022 | +#define SYSFS_INT(_name, _mode, _int, _min, _max, _flags, _wse) { \ | |
20023 | + .attr = {.name = _name , .mode = _mode }, \ | |
20024 | + .type = TOI_SYSFS_DATA_INTEGER, \ | |
20025 | + .flags = _flags, \ | |
20026 | + .data = { .integer = { .variable = _int, .minimum = _min, \ | |
20027 | + .maximum = _max } }, \ | |
20028 | + .write_side_effect = _wse } | |
20029 | + | |
20030 | +#define SYSFS_UL(_name, _mode, _ul, _min, _max, _flags) { \ | |
20031 | + .attr = {.name = _name , .mode = _mode }, \ | |
20032 | + .type = TOI_SYSFS_DATA_UL, \ | |
20033 | + .flags = _flags, \ | |
20034 | + .data = { .ul = { .variable = _ul, .minimum = _min, \ | |
20035 | + .maximum = _max } } } | |
20036 | + | |
20037 | +#define SYSFS_LONG(_name, _mode, _long, _min, _max, _flags) { \ | |
20038 | + .attr = {.name = _name , .mode = _mode }, \ | |
20039 | + .type = TOI_SYSFS_DATA_LONG, \ | |
20040 | + .flags = _flags, \ | |
20041 | + .data = { .a_long = { .variable = _long, .minimum = _min, \ | |
20042 | + .maximum = _max } } } | |
20043 | + | |
20044 | +#define SYSFS_STRING(_name, _mode, _string, _max_len, _flags, _wse) { \ | |
20045 | + .attr = {.name = _name , .mode = _mode }, \ | |
20046 | + .type = TOI_SYSFS_DATA_STRING, \ | |
20047 | + .flags = _flags, \ | |
20048 | + .data = { .string = { .variable = _string, .max_length = _max_len } }, \ | |
20049 | + .write_side_effect = _wse } | |
20050 | + | |
20051 | +#define SYSFS_CUSTOM(_name, _mode, _read, _write, _flags, _wse) { \ | |
20052 | + .attr = {.name = _name , .mode = _mode }, \ | |
20053 | + .type = TOI_SYSFS_DATA_CUSTOM, \ | |
20054 | + .flags = _flags, \ | |
20055 | + .data = { .special = { .read_sysfs = _read, .write_sysfs = _write } }, \ | |
20056 | + .write_side_effect = _wse } | |
20057 | + | |
20058 | +#define SYSFS_NONE(_name, _wse) { \ | |
20059 | + .attr = {.name = _name , .mode = SYSFS_WRITEONLY }, \ | |
20060 | + .type = TOI_SYSFS_DATA_NONE, \ | |
20061 | + .write_side_effect = _wse, \ | |
20062 | +} | |
20063 | + | |
20064 | +/* Flags */ | |
20065 | +#define SYSFS_NEEDS_SM_FOR_READ 1 | |
20066 | +#define SYSFS_NEEDS_SM_FOR_WRITE 2 | |
20067 | +#define SYSFS_HIBERNATE 4 | |
20068 | +#define SYSFS_RESUME 8 | |
20069 | +#define SYSFS_HIBERNATE_OR_RESUME (SYSFS_HIBERNATE | SYSFS_RESUME) | |
20070 | +#define SYSFS_HIBERNATING (SYSFS_HIBERNATE | SYSFS_NEEDS_SM_FOR_WRITE) | |
20071 | +#define SYSFS_RESUMING (SYSFS_RESUME | SYSFS_NEEDS_SM_FOR_WRITE) | |
20072 | +#define SYSFS_NEEDS_SM_FOR_BOTH \ | |
20073 | + (SYSFS_NEEDS_SM_FOR_READ | SYSFS_NEEDS_SM_FOR_WRITE) | |
20074 | + | |
20075 | +int toi_register_sysfs_file(struct kobject *kobj, | |
20076 | + struct toi_sysfs_data *toi_sysfs_data); | |
20077 | +void toi_unregister_sysfs_file(struct kobject *kobj, | |
20078 | + struct toi_sysfs_data *toi_sysfs_data); | |
20079 | + | |
20080 | +extern struct kobject *tuxonice_kobj; | |
20081 | + | |
20082 | +struct kobject *make_toi_sysdir(char *name); | |
20083 | +void remove_toi_sysdir(struct kobject *obj); | |
20084 | +extern void toi_cleanup_sysfs(void); | |
20085 | + | |
20086 | +extern int toi_sysfs_init(void); | |
20087 | +extern void toi_sysfs_exit(void); | |
20088 | diff --git a/kernel/power/tuxonice_ui.c b/kernel/power/tuxonice_ui.c | |
20089 | new file mode 100644 | |
5dd10c98 | 20090 | index 0000000..b0b3b40 |
2380c486 JR |
20091 | --- /dev/null |
20092 | +++ b/kernel/power/tuxonice_ui.c | |
20093 | @@ -0,0 +1,250 @@ | |
20094 | +/* | |
20095 | + * kernel/power/tuxonice_ui.c | |
20096 | + * | |
20097 | + * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu> | |
20098 | + * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz> | |
20099 | + * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr> | |
5dd10c98 | 20100 | + * Copyright (C) 2002-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
20101 | + * |
20102 | + * This file is released under the GPLv2. | |
20103 | + * | |
20104 | + * Routines for TuxOnIce's user interface. | |
20105 | + * | |
20106 | + * The user interface code talks to a userspace program via a | |
20107 | + * netlink socket. | |
20108 | + * | |
20109 | + * The kernel side: | |
20110 | + * - starts the userui program; | |
20111 | + * - sends text messages and progress bar status; | |
20112 | + * | |
20113 | + * The user space side: | |
20114 | + * - passes messages regarding user requests (abort, toggle reboot etc) | |
20115 | + * | |
20116 | + */ | |
20117 | + | |
20118 | +#define __KERNEL_SYSCALLS__ | |
20119 | + | |
20120 | +#include <linux/reboot.h> | |
20121 | + | |
20122 | +#include "tuxonice_sysfs.h" | |
20123 | +#include "tuxonice_modules.h" | |
20124 | +#include "tuxonice.h" | |
20125 | +#include "tuxonice_ui.h" | |
20126 | +#include "tuxonice_netlink.h" | |
20127 | +#include "tuxonice_power_off.h" | |
20128 | +#include "tuxonice_builtin.h" | |
20129 | + | |
20130 | +static char local_printf_buf[1024]; /* Same as printk - should be safe */ | |
20131 | +struct ui_ops *toi_current_ui; | |
20132 | +EXPORT_SYMBOL_GPL(toi_current_ui); | |
20133 | + | |
20134 | +/** | |
20135 | + * toi_wait_for_keypress - Wait for keypress via userui or /dev/console. | |
20136 | + * | |
20137 | + * @timeout: Maximum time to wait. | |
20138 | + * | |
20139 | + * Wait for a keypress, either from userui or /dev/console if userui isn't | |
20140 | + * available. The non-userui path is particularly for at boot-time, prior | |
20141 | + * to userui being started, when we have an important warning to give to | |
20142 | + * the user. | |
20143 | + */ | |
20144 | +static char toi_wait_for_keypress(int timeout) | |
20145 | +{ | |
20146 | + if (toi_current_ui && toi_current_ui->wait_for_key(timeout)) | |
20147 | + return ' '; | |
20148 | + | |
20149 | + return toi_wait_for_keypress_dev_console(timeout); | |
20150 | +} | |
20151 | + | |
20152 | +/* toi_early_boot_message() | |
20153 | + * Description: Handle errors early in the process of booting. | |
20154 | + * The user may press C to continue booting, perhaps | |
20155 | + * invalidating the image, or space to reboot. | |
20156 | + * This works from either the serial console or normally | |
20157 | + * attached keyboard. | |
20158 | + * | |
20159 | + * Note that we come in here from init, while the kernel is | |
20160 | + * locked. If we want to get events from the serial console, | |
20161 | + * we need to temporarily unlock the kernel. | |
20162 | + * | |
20163 | + * toi_early_boot_message may also be called post-boot. | |
20164 | + * In this case, it simply printks the message and returns. | |
20165 | + * | |
20166 | + * Arguments: int Whether we are able to erase the image. | |
20167 | + * int default_answer. What to do when we timeout. This | |
20168 | + * will normally be continue, but the user might | |
20169 | + * provide command line options (__setup) to override | |
20170 | + * particular cases. | |
20171 | + * Char *. Pointer to a string explaining why we're moaning. | |
20172 | + */ | |
20173 | + | |
20174 | +#define say(message, a...) printk(KERN_EMERG message, ##a) | |
20175 | + | |
20176 | +void toi_early_boot_message(int message_detail, int default_answer, | |
20177 | + char *warning_reason, ...) | |
20178 | +{ | |
20179 | +#if defined(CONFIG_VT) || defined(CONFIG_SERIAL_CONSOLE) | |
20180 | + unsigned long orig_state = get_toi_state(), continue_req = 0; | |
20181 | + unsigned long orig_loglevel = console_loglevel; | |
20182 | + int can_ask = 1; | |
20183 | +#else | |
20184 | + int can_ask = 0; | |
20185 | +#endif | |
20186 | + | |
20187 | + va_list args; | |
20188 | + int printed_len; | |
20189 | + | |
20190 | + if (!toi_wait) { | |
20191 | + set_toi_state(TOI_CONTINUE_REQ); | |
20192 | + can_ask = 0; | |
20193 | + } | |
20194 | + | |
20195 | + if (warning_reason) { | |
20196 | + va_start(args, warning_reason); | |
20197 | + printed_len = vsnprintf(local_printf_buf, | |
20198 | + sizeof(local_printf_buf), | |
20199 | + warning_reason, | |
20200 | + args); | |
20201 | + va_end(args); | |
20202 | + } | |
20203 | + | |
20204 | + if (!test_toi_state(TOI_BOOT_TIME)) { | |
20205 | + printk("TuxOnIce: %s\n", local_printf_buf); | |
20206 | + return; | |
20207 | + } | |
20208 | + | |
20209 | + if (!can_ask) { | |
20210 | + continue_req = !!default_answer; | |
20211 | + goto post_ask; | |
20212 | + } | |
20213 | + | |
20214 | +#if defined(CONFIG_VT) || defined(CONFIG_SERIAL_CONSOLE) | |
20215 | + console_loglevel = 7; | |
20216 | + | |
20217 | + say("=== TuxOnIce ===\n\n"); | |
20218 | + if (warning_reason) { | |
20219 | + say("BIG FAT WARNING!! %s\n\n", local_printf_buf); | |
20220 | + switch (message_detail) { | |
20221 | + case 0: | |
20222 | + say("If you continue booting, note that any image WILL" | |
20223 | + "NOT BE REMOVED.\nTuxOnIce is unable to do so " | |
20224 | + "because the appropriate modules aren't\n" | |
20225 | + "loaded. You should manually remove the image " | |
20226 | + "to avoid any\npossibility of corrupting your " | |
20227 | + "filesystem(s) later.\n"); | |
20228 | + break; | |
20229 | + case 1: | |
20230 | + say("If you want to use the current TuxOnIce image, " | |
20231 | + "reboot and try\nagain with the same kernel " | |
20232 | + "that you hibernated from. If you want\n" | |
20233 | + "to forget that image, continue and the image " | |
20234 | + "will be erased.\n"); | |
20235 | + break; | |
20236 | + } | |
20237 | + say("Press SPACE to reboot or C to continue booting with " | |
20238 | + "this kernel\n\n"); | |
20239 | + if (toi_wait > 0) | |
20240 | + say("Default action if you don't select one in %d " | |
20241 | + "seconds is: %s.\n", | |
20242 | + toi_wait, | |
20243 | + default_answer == TOI_CONTINUE_REQ ? | |
20244 | + "continue booting" : "reboot"); | |
20245 | + } else { | |
20246 | + say("BIG FAT WARNING!!\n\n" | |
20247 | + "You have tried to resume from this image before.\n" | |
20248 | + "If it failed once, it may well fail again.\n" | |
20249 | + "Would you like to remove the image and boot " | |
20250 | + "normally?\nThis will be equivalent to entering " | |
20251 | + "noresume on the\nkernel command line.\n\n" | |
20252 | + "Press SPACE to remove the image or C to continue " | |
20253 | + "resuming.\n\n"); | |
20254 | + if (toi_wait > 0) | |
20255 | + say("Default action if you don't select one in %d " | |
20256 | + "seconds is: %s.\n", toi_wait, | |
20257 | + !!default_answer ? | |
20258 | + "continue resuming" : "remove the image"); | |
20259 | + } | |
20260 | + console_loglevel = orig_loglevel; | |
20261 | + | |
20262 | + set_toi_state(TOI_SANITY_CHECK_PROMPT); | |
20263 | + clear_toi_state(TOI_CONTINUE_REQ); | |
20264 | + | |
20265 | + if (toi_wait_for_keypress(toi_wait) == 0) /* We timed out */ | |
20266 | + continue_req = !!default_answer; | |
20267 | + else | |
20268 | + continue_req = test_toi_state(TOI_CONTINUE_REQ); | |
20269 | + | |
20270 | +#endif /* CONFIG_VT or CONFIG_SERIAL_CONSOLE */ | |
20271 | + | |
20272 | +post_ask: | |
20273 | + if ((warning_reason) && (!continue_req)) | |
20274 | + machine_restart(NULL); | |
20275 | + | |
20276 | + restore_toi_state(orig_state); | |
20277 | + if (continue_req) | |
20278 | + set_toi_state(TOI_CONTINUE_REQ); | |
20279 | +} | |
20280 | +EXPORT_SYMBOL_GPL(toi_early_boot_message); | |
20281 | +#undef say | |
20282 | + | |
20283 | +/* | |
20284 | + * User interface specific /sys/power/tuxonice entries. | |
20285 | + */ | |
20286 | + | |
20287 | +static struct toi_sysfs_data sysfs_params[] = { | |
20288 | +#if defined(CONFIG_NET) && defined(CONFIG_SYSFS) | |
20289 | + SYSFS_INT("default_console_level", SYSFS_RW, | |
20290 | + &toi_bkd.toi_default_console_level, 0, 7, 0, NULL), | |
20291 | + SYSFS_UL("debug_sections", SYSFS_RW, &toi_bkd.toi_debug_state, 0, | |
20292 | + 1 << 30, 0), | |
20293 | + SYSFS_BIT("log_everything", SYSFS_RW, &toi_bkd.toi_action, TOI_LOGALL, | |
20294 | + 0) | |
20295 | +#endif | |
20296 | +}; | |
20297 | + | |
20298 | +static struct toi_module_ops userui_ops = { | |
20299 | + .type = MISC_HIDDEN_MODULE, | |
20300 | + .name = "printk ui", | |
20301 | + .directory = "user_interface", | |
20302 | + .module = THIS_MODULE, | |
20303 | + .sysfs_data = sysfs_params, | |
20304 | + .num_sysfs_entries = sizeof(sysfs_params) / | |
20305 | + sizeof(struct toi_sysfs_data), | |
20306 | +}; | |
20307 | + | |
20308 | +int toi_register_ui_ops(struct ui_ops *this_ui) | |
20309 | +{ | |
20310 | + if (toi_current_ui) { | |
20311 | + printk(KERN_INFO "Only one TuxOnIce user interface module can " | |
20312 | + "be loaded at a time."); | |
20313 | + return -EBUSY; | |
20314 | + } | |
20315 | + | |
20316 | + toi_current_ui = this_ui; | |
20317 | + | |
20318 | + return 0; | |
20319 | +} | |
20320 | +EXPORT_SYMBOL_GPL(toi_register_ui_ops); | |
20321 | + | |
20322 | +void toi_remove_ui_ops(struct ui_ops *this_ui) | |
20323 | +{ | |
20324 | + if (toi_current_ui != this_ui) | |
20325 | + return; | |
20326 | + | |
20327 | + toi_current_ui = NULL; | |
20328 | +} | |
20329 | +EXPORT_SYMBOL_GPL(toi_remove_ui_ops); | |
20330 | + | |
20331 | +/* toi_console_sysfs_init | |
20332 | + * Description: Boot time initialisation for user interface. | |
20333 | + */ | |
20334 | + | |
20335 | +int toi_ui_init(void) | |
20336 | +{ | |
20337 | + return toi_register_module(&userui_ops); | |
20338 | +} | |
20339 | + | |
20340 | +void toi_ui_exit(void) | |
20341 | +{ | |
20342 | + toi_unregister_module(&userui_ops); | |
20343 | +} | |
20344 | diff --git a/kernel/power/tuxonice_ui.h b/kernel/power/tuxonice_ui.h | |
20345 | new file mode 100644 | |
5dd10c98 | 20346 | index 0000000..85fb7cb |
2380c486 JR |
20347 | --- /dev/null |
20348 | +++ b/kernel/power/tuxonice_ui.h | |
5dd10c98 | 20349 | @@ -0,0 +1,97 @@ |
2380c486 JR |
20350 | +/* |
20351 | + * kernel/power/tuxonice_ui.h | |
20352 | + * | |
5dd10c98 | 20353 | + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
20354 | + */ |
20355 | + | |
20356 | +enum { | |
20357 | + DONT_CLEAR_BAR, | |
20358 | + CLEAR_BAR | |
20359 | +}; | |
20360 | + | |
20361 | +enum { | |
20362 | + /* Userspace -> Kernel */ | |
20363 | + USERUI_MSG_ABORT = 0x11, | |
20364 | + USERUI_MSG_SET_STATE = 0x12, | |
20365 | + USERUI_MSG_GET_STATE = 0x13, | |
20366 | + USERUI_MSG_GET_DEBUG_STATE = 0x14, | |
20367 | + USERUI_MSG_SET_DEBUG_STATE = 0x15, | |
20368 | + USERUI_MSG_SPACE = 0x18, | |
20369 | + USERUI_MSG_GET_POWERDOWN_METHOD = 0x1A, | |
20370 | + USERUI_MSG_SET_POWERDOWN_METHOD = 0x1B, | |
20371 | + USERUI_MSG_GET_LOGLEVEL = 0x1C, | |
20372 | + USERUI_MSG_SET_LOGLEVEL = 0x1D, | |
20373 | + USERUI_MSG_PRINTK = 0x1E, | |
20374 | + | |
20375 | + /* Kernel -> Userspace */ | |
20376 | + USERUI_MSG_MESSAGE = 0x21, | |
20377 | + USERUI_MSG_PROGRESS = 0x22, | |
20378 | + USERUI_MSG_POST_ATOMIC_RESTORE = 0x25, | |
20379 | + | |
20380 | + USERUI_MSG_MAX, | |
20381 | +}; | |
20382 | + | |
20383 | +struct userui_msg_params { | |
20384 | + u32 a, b, c, d; | |
20385 | + char text[255]; | |
20386 | +}; | |
20387 | + | |
20388 | +struct ui_ops { | |
20389 | + char (*wait_for_key) (int timeout); | |
20390 | + u32 (*update_status) (u32 value, u32 maximum, const char *fmt, ...); | |
20391 | + void (*prepare_status) (int clearbar, const char *fmt, ...); | |
20392 | + void (*cond_pause) (int pause, char *message); | |
20393 | + void (*abort)(int result_code, const char *fmt, ...); | |
20394 | + void (*prepare)(void); | |
20395 | + void (*cleanup)(void); | |
2380c486 JR |
20396 | + void (*message)(u32 section, u32 level, u32 normally_logged, |
20397 | + const char *fmt, ...); | |
20398 | +}; | |
20399 | + | |
20400 | +extern struct ui_ops *toi_current_ui; | |
20401 | + | |
20402 | +#define toi_update_status(val, max, fmt, args...) \ | |
20403 | + (toi_current_ui ? (toi_current_ui->update_status) (val, max, fmt, ##args) : \ | |
20404 | + max) | |
20405 | + | |
2380c486 | 20406 | +#define toi_prepare_console(void) \ |
e999739a | 20407 | + do { if (toi_current_ui) \ |
2380c486 JR |
20408 | + (toi_current_ui->prepare)(); \ |
20409 | + } while (0) | |
20410 | + | |
20411 | +#define toi_cleanup_console(void) \ | |
e999739a | 20412 | + do { if (toi_current_ui) \ |
2380c486 JR |
20413 | + (toi_current_ui->cleanup)(); \ |
20414 | + } while (0) | |
20415 | + | |
20416 | +#define abort_hibernate(result, fmt, args...) \ | |
20417 | + do { if (toi_current_ui) \ | |
20418 | + (toi_current_ui->abort)(result, fmt, ##args); \ | |
20419 | + else { \ | |
20420 | + set_abort_result(result); \ | |
20421 | + } \ | |
20422 | + } while (0) | |
20423 | + | |
20424 | +#define toi_cond_pause(pause, message) \ | |
20425 | + do { if (toi_current_ui) \ | |
20426 | + (toi_current_ui->cond_pause)(pause, message); \ | |
20427 | + } while (0) | |
20428 | + | |
20429 | +#define toi_prepare_status(clear, fmt, args...) \ | |
20430 | + do { if (toi_current_ui) \ | |
20431 | + (toi_current_ui->prepare_status)(clear, fmt, ##args); \ | |
20432 | + else \ | |
20433 | + printk(KERN_ERR fmt "%s", ##args, "\n"); \ | |
20434 | + } while (0) | |
20435 | + | |
20436 | +#define toi_message(sn, lev, log, fmt, a...) \ | |
20437 | +do { \ | |
20438 | + if (toi_current_ui && (!sn || test_debug_state(sn))) \ | |
20439 | + toi_current_ui->message(sn, lev, log, fmt, ##a); \ | |
20440 | +} while (0) | |
20441 | + | |
20442 | +__exit void toi_ui_cleanup(void); | |
20443 | +extern int toi_ui_init(void); | |
20444 | +extern void toi_ui_exit(void); | |
20445 | +extern int toi_register_ui_ops(struct ui_ops *this_ui); | |
20446 | +extern void toi_remove_ui_ops(struct ui_ops *this_ui); | |
20447 | diff --git a/kernel/power/tuxonice_userui.c b/kernel/power/tuxonice_userui.c | |
20448 | new file mode 100644 | |
5dd10c98 | 20449 | index 0000000..625d863 |
2380c486 JR |
20450 | --- /dev/null |
20451 | +++ b/kernel/power/tuxonice_userui.c | |
7e46296a | 20452 | @@ -0,0 +1,668 @@ |
2380c486 JR |
20453 | +/* |
20454 | + * kernel/power/user_ui.c | |
20455 | + * | |
20456 | + * Copyright (C) 2005-2007 Bernard Blackham | |
5dd10c98 | 20457 | + * Copyright (C) 2002-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
20458 | + * |
20459 | + * This file is released under the GPLv2. | |
20460 | + * | |
20461 | + * Routines for TuxOnIce's user interface. | |
20462 | + * | |
20463 | + * The user interface code talks to a userspace program via a | |
20464 | + * netlink socket. | |
20465 | + * | |
20466 | + * The kernel side: | |
20467 | + * - starts the userui program; | |
20468 | + * - sends text messages and progress bar status; | |
20469 | + * | |
20470 | + * The user space side: | |
20471 | + * - passes messages regarding user requests (abort, toggle reboot etc) | |
20472 | + * | |
20473 | + */ | |
20474 | + | |
20475 | +#define __KERNEL_SYSCALLS__ | |
20476 | + | |
20477 | +#include <linux/suspend.h> | |
20478 | +#include <linux/freezer.h> | |
20479 | +#include <linux/console.h> | |
20480 | +#include <linux/ctype.h> | |
20481 | +#include <linux/tty.h> | |
20482 | +#include <linux/vt_kern.h> | |
2380c486 JR |
20483 | +#include <linux/reboot.h> |
20484 | +#include <linux/kmod.h> | |
20485 | +#include <linux/security.h> | |
20486 | +#include <linux/syscalls.h> | |
5dd10c98 | 20487 | +#include <linux/vt.h> |
2380c486 JR |
20488 | + |
20489 | +#include "tuxonice_sysfs.h" | |
20490 | +#include "tuxonice_modules.h" | |
20491 | +#include "tuxonice.h" | |
20492 | +#include "tuxonice_ui.h" | |
20493 | +#include "tuxonice_netlink.h" | |
20494 | +#include "tuxonice_power_off.h" | |
20495 | + | |
20496 | +static char local_printf_buf[1024]; /* Same as printk - should be safe */ | |
20497 | + | |
20498 | +static struct user_helper_data ui_helper_data; | |
20499 | +static struct toi_module_ops userui_ops; | |
20500 | +static int orig_kmsg; | |
20501 | + | |
20502 | +static char lastheader[512]; | |
20503 | +static int lastheader_message_len; | |
20504 | +static int ui_helper_changed; /* Used at resume-time so don't overwrite value | |
20505 | + set from initrd/ramfs. */ | |
20506 | + | |
20507 | +/* Number of distinct progress amounts that userspace can display */ | |
20508 | +static int progress_granularity = 30; | |
20509 | + | |
20510 | +static DECLARE_WAIT_QUEUE_HEAD(userui_wait_for_key); | |
20511 | + | |
20512 | +/** | |
20513 | + * ui_nl_set_state - Update toi_action based on a message from userui. | |
20514 | + * | |
20515 | + * @n: The bit (1 << bit) to set. | |
20516 | + */ | |
20517 | +static void ui_nl_set_state(int n) | |
20518 | +{ | |
20519 | + /* Only let them change certain settings */ | |
20520 | + static const u32 toi_action_mask = | |
20521 | + (1 << TOI_REBOOT) | (1 << TOI_PAUSE) | | |
20522 | + (1 << TOI_LOGALL) | | |
20523 | + (1 << TOI_SINGLESTEP) | | |
20524 | + (1 << TOI_PAUSE_NEAR_PAGESET_END); | |
7e46296a | 20525 | + static unsigned long new_action; |
2380c486 | 20526 | + |
7e46296a | 20527 | + new_action = (toi_bkd.toi_action & (~toi_action_mask)) | |
2380c486 JR |
20528 | + (n & toi_action_mask); |
20529 | + | |
7e46296a AM |
20530 | + printk(KERN_DEBUG "n is %x. Action flags being changed from %lx " |
20531 | + "to %lx.", n, toi_bkd.toi_action, new_action); | |
20532 | + toi_bkd.toi_action = new_action; | |
20533 | + | |
2380c486 JR |
20534 | + if (!test_action_state(TOI_PAUSE) && |
20535 | + !test_action_state(TOI_SINGLESTEP)) | |
20536 | + wake_up_interruptible(&userui_wait_for_key); | |
20537 | +} | |
20538 | + | |
20539 | +/** | |
20540 | + * userui_post_atomic_restore - Tell userui that atomic restore just happened. | |
20541 | + * | |
20542 | + * Tell userui that atomic restore just occured, so that it can do things like | |
20543 | + * redrawing the screen, re-getting settings and so on. | |
20544 | + */ | |
5dd10c98 | 20545 | +static void userui_post_atomic_restore(struct toi_boot_kernel_data *bkd) |
2380c486 JR |
20546 | +{ |
20547 | + toi_send_netlink_message(&ui_helper_data, | |
20548 | + USERUI_MSG_POST_ATOMIC_RESTORE, NULL, 0); | |
20549 | +} | |
20550 | + | |
20551 | +/** | |
20552 | + * userui_storage_needed - Report how much memory in image header is needed. | |
20553 | + */ | |
20554 | +static int userui_storage_needed(void) | |
20555 | +{ | |
20556 | + return sizeof(ui_helper_data.program) + 1 + sizeof(int); | |
20557 | +} | |
20558 | + | |
20559 | +/** | |
20560 | + * userui_save_config_info - Fill buffer with config info for image header. | |
20561 | + * | |
20562 | + * @buf: Buffer into which to put the config info we want to save. | |
20563 | + */ | |
20564 | +static int userui_save_config_info(char *buf) | |
20565 | +{ | |
20566 | + *((int *) buf) = progress_granularity; | |
20567 | + memcpy(buf + sizeof(int), ui_helper_data.program, | |
20568 | + sizeof(ui_helper_data.program)); | |
20569 | + return sizeof(ui_helper_data.program) + sizeof(int) + 1; | |
20570 | +} | |
20571 | + | |
20572 | +/** | |
20573 | + * userui_load_config_info - Restore config info from buffer. | |
20574 | + * | |
20575 | + * @buf: Buffer containing header info loaded. | |
20576 | + * @size: Size of data loaded for this module. | |
20577 | + */ | |
20578 | +static void userui_load_config_info(char *buf, int size) | |
20579 | +{ | |
20580 | + progress_granularity = *((int *) buf); | |
20581 | + size -= sizeof(int); | |
20582 | + | |
20583 | + /* Don't load the saved path if one has already been set */ | |
20584 | + if (ui_helper_changed) | |
20585 | + return; | |
20586 | + | |
20587 | + if (size > sizeof(ui_helper_data.program)) | |
20588 | + size = sizeof(ui_helper_data.program); | |
20589 | + | |
20590 | + memcpy(ui_helper_data.program, buf + sizeof(int), size); | |
20591 | + ui_helper_data.program[sizeof(ui_helper_data.program)-1] = '\0'; | |
20592 | +} | |
20593 | + | |
20594 | +/** | |
20595 | + * set_ui_program_set: Record that userui program was changed. | |
20596 | + * | |
20597 | + * Side effect routine for when the userui program is set. In an initrd or | |
20598 | + * ramfs, the user may set a location for the userui program. If this happens, | |
20599 | + * we don't want to reload the value that was saved in the image header. This | |
20600 | + * routine allows us to flag that we shouldn't restore the program name from | |
20601 | + * the image header. | |
20602 | + */ | |
20603 | +static void set_ui_program_set(void) | |
20604 | +{ | |
20605 | + ui_helper_changed = 1; | |
20606 | +} | |
20607 | + | |
20608 | +/** | |
20609 | + * userui_memory_needed - Tell core how much memory to reserve for us. | |
20610 | + */ | |
20611 | +static int userui_memory_needed(void) | |
20612 | +{ | |
20613 | + /* ball park figure of 128 pages */ | |
20614 | + return 128 * PAGE_SIZE; | |
20615 | +} | |
20616 | + | |
20617 | +/** | |
20618 | + * userui_update_status - Update the progress bar and (if on) in-bar message. | |
20619 | + * | |
20620 | + * @value: Current progress percentage numerator. | |
20621 | + * @maximum: Current progress percentage denominator. | |
20622 | + * @fmt: Message to be displayed in the middle of the progress bar. | |
20623 | + * | |
20624 | + * Note that a NULL message does not mean that any previous message is erased! | |
20625 | + * For that, you need toi_prepare_status with clearbar on. | |
20626 | + * | |
20627 | + * Returns an unsigned long, being the next numerator (as determined by the | |
20628 | + * maximum and progress granularity) where status needs to be updated. | |
20629 | + * This is to reduce unnecessary calls to update_status. | |
20630 | + */ | |
20631 | +static u32 userui_update_status(u32 value, u32 maximum, const char *fmt, ...) | |
20632 | +{ | |
20633 | + static u32 last_step = 9999; | |
20634 | + struct userui_msg_params msg; | |
20635 | + u32 this_step, next_update; | |
20636 | + int bitshift; | |
20637 | + | |
20638 | + if (ui_helper_data.pid == -1) | |
20639 | + return 0; | |
20640 | + | |
20641 | + if ((!maximum) || (!progress_granularity)) | |
20642 | + return maximum; | |
20643 | + | |
20644 | + if (value < 0) | |
20645 | + value = 0; | |
20646 | + | |
20647 | + if (value > maximum) | |
20648 | + value = maximum; | |
20649 | + | |
20650 | + /* Try to avoid math problems - we can't do 64 bit math here | |
20651 | + * (and shouldn't need it - anyone got screen resolution | |
20652 | + * of 65536 pixels or more?) */ | |
20653 | + bitshift = fls(maximum) - 16; | |
20654 | + if (bitshift > 0) { | |
20655 | + u32 temp_maximum = maximum >> bitshift; | |
20656 | + u32 temp_value = value >> bitshift; | |
20657 | + this_step = (u32) | |
20658 | + (temp_value * progress_granularity / temp_maximum); | |
20659 | + next_update = (((this_step + 1) * temp_maximum / | |
20660 | + progress_granularity) + 1) << bitshift; | |
20661 | + } else { | |
20662 | + this_step = (u32) (value * progress_granularity / maximum); | |
20663 | + next_update = ((this_step + 1) * maximum / | |
20664 | + progress_granularity) + 1; | |
20665 | + } | |
20666 | + | |
20667 | + if (this_step == last_step) | |
20668 | + return next_update; | |
20669 | + | |
20670 | + memset(&msg, 0, sizeof(msg)); | |
20671 | + | |
20672 | + msg.a = this_step; | |
20673 | + msg.b = progress_granularity; | |
20674 | + | |
20675 | + if (fmt) { | |
20676 | + va_list args; | |
20677 | + va_start(args, fmt); | |
20678 | + vsnprintf(msg.text, sizeof(msg.text), fmt, args); | |
20679 | + va_end(args); | |
20680 | + msg.text[sizeof(msg.text)-1] = '\0'; | |
20681 | + } | |
20682 | + | |
20683 | + toi_send_netlink_message(&ui_helper_data, USERUI_MSG_PROGRESS, | |
20684 | + &msg, sizeof(msg)); | |
20685 | + last_step = this_step; | |
20686 | + | |
20687 | + return next_update; | |
20688 | +} | |
20689 | + | |
20690 | +/** | |
20691 | + * userui_message - Display a message without necessarily logging it. | |
20692 | + * | |
20693 | + * @section: Type of message. Messages can be filtered by type. | |
20694 | + * @level: Degree of importance of the message. Lower values = higher priority. | |
20695 | + * @normally_logged: Whether logged even if log_everything is off. | |
20696 | + * @fmt: Message (and parameters). | |
20697 | + * | |
20698 | + * This function is intended to do the same job as printk, but without normally | |
20699 | + * logging what is printed. The point is to be able to get debugging info on | |
20700 | + * screen without filling the logs with "1/534. ^M 2/534^M. 3/534^M" | |
20701 | + * | |
20702 | + * It may be called from an interrupt context - can't sleep! | |
20703 | + */ | |
20704 | +static void userui_message(u32 section, u32 level, u32 normally_logged, | |
20705 | + const char *fmt, ...) | |
20706 | +{ | |
20707 | + struct userui_msg_params msg; | |
20708 | + | |
20709 | + if ((level) && (level > console_loglevel)) | |
20710 | + return; | |
20711 | + | |
20712 | + memset(&msg, 0, sizeof(msg)); | |
20713 | + | |
20714 | + msg.a = section; | |
20715 | + msg.b = level; | |
20716 | + msg.c = normally_logged; | |
20717 | + | |
20718 | + if (fmt) { | |
20719 | + va_list args; | |
20720 | + va_start(args, fmt); | |
20721 | + vsnprintf(msg.text, sizeof(msg.text), fmt, args); | |
20722 | + va_end(args); | |
20723 | + msg.text[sizeof(msg.text)-1] = '\0'; | |
20724 | + } | |
20725 | + | |
20726 | + if (test_action_state(TOI_LOGALL)) | |
20727 | + printk(KERN_INFO "%s\n", msg.text); | |
20728 | + | |
20729 | + toi_send_netlink_message(&ui_helper_data, USERUI_MSG_MESSAGE, | |
20730 | + &msg, sizeof(msg)); | |
20731 | +} | |
20732 | + | |
20733 | +/** | |
20734 | + * wait_for_key_via_userui - Wait for userui to receive a keypress. | |
20735 | + */ | |
20736 | +static void wait_for_key_via_userui(void) | |
20737 | +{ | |
20738 | + DECLARE_WAITQUEUE(wait, current); | |
20739 | + | |
20740 | + add_wait_queue(&userui_wait_for_key, &wait); | |
20741 | + set_current_state(TASK_INTERRUPTIBLE); | |
20742 | + | |
20743 | + interruptible_sleep_on(&userui_wait_for_key); | |
20744 | + | |
20745 | + set_current_state(TASK_RUNNING); | |
20746 | + remove_wait_queue(&userui_wait_for_key, &wait); | |
20747 | +} | |
20748 | + | |
20749 | +/** | |
20750 | + * userui_prepare_status - Display high level messages. | |
20751 | + * | |
20752 | + * @clearbar: Whether to clear the progress bar. | |
20753 | + * @fmt...: New message for the title. | |
20754 | + * | |
20755 | + * Prepare the 'nice display', drawing the header and version, along with the | |
20756 | + * current action and perhaps also resetting the progress bar. | |
20757 | + */ | |
20758 | +static void userui_prepare_status(int clearbar, const char *fmt, ...) | |
20759 | +{ | |
20760 | + va_list args; | |
20761 | + | |
20762 | + if (fmt) { | |
20763 | + va_start(args, fmt); | |
20764 | + lastheader_message_len = vsnprintf(lastheader, 512, fmt, args); | |
20765 | + va_end(args); | |
20766 | + } | |
20767 | + | |
20768 | + if (clearbar) | |
20769 | + toi_update_status(0, 1, NULL); | |
20770 | + | |
20771 | + if (ui_helper_data.pid == -1) | |
20772 | + printk(KERN_EMERG "%s\n", lastheader); | |
20773 | + else | |
20774 | + toi_message(0, TOI_STATUS, 1, lastheader, NULL); | |
20775 | +} | |
20776 | + | |
20777 | +/** | |
20778 | + * toi_wait_for_keypress - Wait for keypress via userui. | |
20779 | + * | |
20780 | + * @timeout: Maximum time to wait. | |
20781 | + * | |
20782 | + * Wait for a keypress from userui. | |
20783 | + * | |
20784 | + * FIXME: Implement timeout? | |
20785 | + */ | |
20786 | +static char userui_wait_for_keypress(int timeout) | |
20787 | +{ | |
20788 | + char key = '\0'; | |
20789 | + | |
20790 | + if (ui_helper_data.pid != -1) { | |
20791 | + wait_for_key_via_userui(); | |
20792 | + key = ' '; | |
20793 | + } | |
20794 | + | |
20795 | + return key; | |
20796 | +} | |
20797 | + | |
20798 | +/** | |
20799 | + * userui_abort_hibernate - Abort a cycle & tell user if they didn't request it. | |
20800 | + * | |
20801 | + * @result_code: Reason why we're aborting (1 << bit). | |
20802 | + * @fmt: Message to display if telling the user what's going on. | |
20803 | + * | |
20804 | + * Abort a cycle. If this wasn't at the user's request (and we're displaying | |
20805 | + * output), tell the user why and wait for them to acknowledge the message. | |
20806 | + */ | |
20807 | +static void userui_abort_hibernate(int result_code, const char *fmt, ...) | |
20808 | +{ | |
20809 | + va_list args; | |
20810 | + int printed_len = 0; | |
20811 | + | |
20812 | + set_result_state(result_code); | |
20813 | + | |
20814 | + if (test_result_state(TOI_ABORTED)) | |
20815 | + return; | |
20816 | + | |
20817 | + set_result_state(TOI_ABORTED); | |
20818 | + | |
20819 | + if (test_result_state(TOI_ABORT_REQUESTED)) | |
20820 | + return; | |
20821 | + | |
20822 | + va_start(args, fmt); | |
20823 | + printed_len = vsnprintf(local_printf_buf, sizeof(local_printf_buf), | |
20824 | + fmt, args); | |
20825 | + va_end(args); | |
20826 | + if (ui_helper_data.pid != -1) | |
20827 | + printed_len = sprintf(local_printf_buf + printed_len, | |
20828 | + " (Press SPACE to continue)"); | |
20829 | + | |
20830 | + toi_prepare_status(CLEAR_BAR, "%s", local_printf_buf); | |
20831 | + | |
20832 | + if (ui_helper_data.pid != -1) | |
20833 | + userui_wait_for_keypress(0); | |
20834 | +} | |
20835 | + | |
20836 | +/** | |
20837 | + * request_abort_hibernate - Abort hibernating or resuming at user request. | |
20838 | + * | |
20839 | + * Handle the user requesting the cancellation of a hibernation or resume by | |
20840 | + * pressing escape. | |
20841 | + */ | |
20842 | +static void request_abort_hibernate(void) | |
20843 | +{ | |
7e46296a AM |
20844 | + if (test_result_state(TOI_ABORT_REQUESTED) || |
20845 | + !test_action_state(TOI_CAN_CANCEL)) | |
2380c486 JR |
20846 | + return; |
20847 | + | |
20848 | + if (test_toi_state(TOI_NOW_RESUMING)) { | |
20849 | + toi_prepare_status(CLEAR_BAR, "Escape pressed. " | |
20850 | + "Powering down again."); | |
20851 | + set_toi_state(TOI_STOP_RESUME); | |
20852 | + while (!test_toi_state(TOI_IO_STOPPED)) | |
20853 | + schedule(); | |
20854 | + if (toiActiveAllocator->mark_resume_attempted) | |
20855 | + toiActiveAllocator->mark_resume_attempted(0); | |
20856 | + toi_power_down(); | |
20857 | + } | |
20858 | + | |
20859 | + toi_prepare_status(CLEAR_BAR, "--- ESCAPE PRESSED :" | |
20860 | + " ABORTING HIBERNATION ---"); | |
20861 | + set_abort_result(TOI_ABORT_REQUESTED); | |
20862 | + wake_up_interruptible(&userui_wait_for_key); | |
20863 | +} | |
20864 | + | |
20865 | +/** | |
20866 | + * userui_user_rcv_msg - Receive a netlink message from userui. | |
20867 | + * | |
20868 | + * @skb: skb received. | |
20869 | + * @nlh: Netlink header received. | |
20870 | + */ | |
20871 | +static int userui_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |
20872 | +{ | |
20873 | + int type; | |
20874 | + int *data; | |
20875 | + | |
20876 | + type = nlh->nlmsg_type; | |
20877 | + | |
20878 | + /* A control message: ignore them */ | |
20879 | + if (type < NETLINK_MSG_BASE) | |
20880 | + return 0; | |
20881 | + | |
20882 | + /* Unknown message: reply with EINVAL */ | |
20883 | + if (type >= USERUI_MSG_MAX) | |
20884 | + return -EINVAL; | |
20885 | + | |
20886 | + /* All operations require privileges, even GET */ | |
20887 | + if (security_netlink_recv(skb, CAP_NET_ADMIN)) | |
20888 | + return -EPERM; | |
20889 | + | |
20890 | + /* Only allow one task to receive NOFREEZE privileges */ | |
20891 | + if (type == NETLINK_MSG_NOFREEZE_ME && ui_helper_data.pid != -1) { | |
20892 | + printk(KERN_INFO "Got NOFREEZE_ME request when " | |
20893 | + "ui_helper_data.pid is %d.\n", ui_helper_data.pid); | |
20894 | + return -EBUSY; | |
20895 | + } | |
20896 | + | |
20897 | + data = (int *) NLMSG_DATA(nlh); | |
20898 | + | |
20899 | + switch (type) { | |
20900 | + case USERUI_MSG_ABORT: | |
20901 | + request_abort_hibernate(); | |
20902 | + return 0; | |
20903 | + case USERUI_MSG_GET_STATE: | |
20904 | + toi_send_netlink_message(&ui_helper_data, | |
20905 | + USERUI_MSG_GET_STATE, &toi_bkd.toi_action, | |
20906 | + sizeof(toi_bkd.toi_action)); | |
20907 | + return 0; | |
20908 | + case USERUI_MSG_GET_DEBUG_STATE: | |
20909 | + toi_send_netlink_message(&ui_helper_data, | |
20910 | + USERUI_MSG_GET_DEBUG_STATE, | |
20911 | + &toi_bkd.toi_debug_state, | |
20912 | + sizeof(toi_bkd.toi_debug_state)); | |
20913 | + return 0; | |
20914 | + case USERUI_MSG_SET_STATE: | |
20915 | + if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(int))) | |
20916 | + return -EINVAL; | |
20917 | + ui_nl_set_state(*data); | |
20918 | + return 0; | |
20919 | + case USERUI_MSG_SET_DEBUG_STATE: | |
20920 | + if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(int))) | |
20921 | + return -EINVAL; | |
20922 | + toi_bkd.toi_debug_state = (*data); | |
20923 | + return 0; | |
20924 | + case USERUI_MSG_SPACE: | |
20925 | + wake_up_interruptible(&userui_wait_for_key); | |
20926 | + return 0; | |
20927 | + case USERUI_MSG_GET_POWERDOWN_METHOD: | |
20928 | + toi_send_netlink_message(&ui_helper_data, | |
20929 | + USERUI_MSG_GET_POWERDOWN_METHOD, | |
20930 | + &toi_poweroff_method, | |
20931 | + sizeof(toi_poweroff_method)); | |
20932 | + return 0; | |
20933 | + case USERUI_MSG_SET_POWERDOWN_METHOD: | |
20934 | + if (nlh->nlmsg_len != NLMSG_LENGTH(sizeof(char))) | |
20935 | + return -EINVAL; | |
20936 | + toi_poweroff_method = (unsigned long)(*data); | |
20937 | + return 0; | |
20938 | + case USERUI_MSG_GET_LOGLEVEL: | |
20939 | + toi_send_netlink_message(&ui_helper_data, | |
20940 | + USERUI_MSG_GET_LOGLEVEL, | |
20941 | + &toi_bkd.toi_default_console_level, | |
20942 | + sizeof(toi_bkd.toi_default_console_level)); | |
20943 | + return 0; | |
20944 | + case USERUI_MSG_SET_LOGLEVEL: | |
20945 | + if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(int))) | |
20946 | + return -EINVAL; | |
20947 | + toi_bkd.toi_default_console_level = (*data); | |
20948 | + return 0; | |
20949 | + case USERUI_MSG_PRINTK: | |
e999739a | 20950 | + printk(KERN_INFO "%s", (char *) data); |
2380c486 JR |
20951 | + return 0; |
20952 | + } | |
20953 | + | |
20954 | + /* Unhandled here */ | |
20955 | + return 1; | |
20956 | +} | |
20957 | + | |
20958 | +/** | |
20959 | + * userui_cond_pause - Possibly pause at user request. | |
20960 | + * | |
20961 | + * @pause: Whether to pause or just display the message. | |
20962 | + * @message: Message to display at the start of pausing. | |
20963 | + * | |
20964 | + * Potentially pause and wait for the user to tell us to continue. We normally | |
20965 | + * only pause when @pause is set. While paused, the user can do things like | |
20966 | + * changing the loglevel, toggling the display of debugging sections and such | |
20967 | + * like. | |
20968 | + */ | |
20969 | +static void userui_cond_pause(int pause, char *message) | |
20970 | +{ | |
20971 | + int displayed_message = 0, last_key = 0; | |
20972 | + | |
20973 | + while (last_key != 32 && | |
20974 | + ui_helper_data.pid != -1 && | |
20975 | + ((test_action_state(TOI_PAUSE) && pause) || | |
20976 | + (test_action_state(TOI_SINGLESTEP)))) { | |
20977 | + if (!displayed_message) { | |
20978 | + toi_prepare_status(DONT_CLEAR_BAR, | |
20979 | + "%s Press SPACE to continue.%s", | |
20980 | + message ? message : "", | |
20981 | + (test_action_state(TOI_SINGLESTEP)) ? | |
20982 | + " Single step on." : ""); | |
20983 | + displayed_message = 1; | |
20984 | + } | |
20985 | + last_key = userui_wait_for_keypress(0); | |
20986 | + } | |
20987 | + schedule(); | |
20988 | +} | |
20989 | + | |
20990 | +/** | |
20991 | + * userui_prepare_console - Prepare the console for use. | |
20992 | + * | |
20993 | + * Prepare a console for use, saving current kmsg settings and attempting to | |
20994 | + * start userui. Console loglevel changes are handled by userui. | |
20995 | + */ | |
20996 | +static void userui_prepare_console(void) | |
20997 | +{ | |
5dd10c98 | 20998 | + orig_kmsg = vt_kmsg_redirect(fg_console + 1); |
2380c486 JR |
20999 | + |
21000 | + ui_helper_data.pid = -1; | |
21001 | + | |
21002 | + if (!userui_ops.enabled) { | |
e999739a | 21003 | + printk(KERN_INFO "TuxOnIce: Userui disabled.\n"); |
2380c486 JR |
21004 | + return; |
21005 | + } | |
21006 | + | |
21007 | + if (*ui_helper_data.program) | |
21008 | + toi_netlink_setup(&ui_helper_data); | |
21009 | + else | |
21010 | + printk(KERN_INFO "TuxOnIce: Userui program not configured.\n"); | |
21011 | +} | |
21012 | + | |
21013 | +/** | |
21014 | + * userui_cleanup_console - Cleanup after a cycle. | |
21015 | + * | |
21016 | + * Tell userui to cleanup, and restore kmsg_redirect to its original value. | |
21017 | + */ | |
21018 | + | |
21019 | +static void userui_cleanup_console(void) | |
21020 | +{ | |
21021 | + if (ui_helper_data.pid > -1) | |
21022 | + toi_netlink_close(&ui_helper_data); | |
21023 | + | |
5dd10c98 | 21024 | + vt_kmsg_redirect(orig_kmsg); |
2380c486 JR |
21025 | +} |
21026 | + | |
21027 | +/* | |
21028 | + * User interface specific /sys/power/tuxonice entries. | |
21029 | + */ | |
21030 | + | |
21031 | +static struct toi_sysfs_data sysfs_params[] = { | |
21032 | +#if defined(CONFIG_NET) && defined(CONFIG_SYSFS) | |
21033 | + SYSFS_BIT("enable_escape", SYSFS_RW, &toi_bkd.toi_action, | |
21034 | + TOI_CAN_CANCEL, 0), | |
21035 | + SYSFS_BIT("pause_between_steps", SYSFS_RW, &toi_bkd.toi_action, | |
21036 | + TOI_PAUSE, 0), | |
21037 | + SYSFS_INT("enabled", SYSFS_RW, &userui_ops.enabled, 0, 1, 0, NULL), | |
21038 | + SYSFS_INT("progress_granularity", SYSFS_RW, &progress_granularity, 1, | |
21039 | + 2048, 0, NULL), | |
21040 | + SYSFS_STRING("program", SYSFS_RW, ui_helper_data.program, 255, 0, | |
21041 | + set_ui_program_set), | |
21042 | + SYSFS_INT("debug", SYSFS_RW, &ui_helper_data.debug, 0, 1, 0, NULL) | |
21043 | +#endif | |
21044 | +}; | |
21045 | + | |
21046 | +static struct toi_module_ops userui_ops = { | |
21047 | + .type = MISC_MODULE, | |
21048 | + .name = "userui", | |
21049 | + .shared_directory = "user_interface", | |
21050 | + .module = THIS_MODULE, | |
21051 | + .storage_needed = userui_storage_needed, | |
21052 | + .save_config_info = userui_save_config_info, | |
21053 | + .load_config_info = userui_load_config_info, | |
21054 | + .memory_needed = userui_memory_needed, | |
5dd10c98 | 21055 | + .post_atomic_restore = userui_post_atomic_restore, |
2380c486 JR |
21056 | + .sysfs_data = sysfs_params, |
21057 | + .num_sysfs_entries = sizeof(sysfs_params) / | |
21058 | + sizeof(struct toi_sysfs_data), | |
21059 | +}; | |
21060 | + | |
21061 | +static struct ui_ops my_ui_ops = { | |
2380c486 JR |
21062 | + .update_status = userui_update_status, |
21063 | + .message = userui_message, | |
21064 | + .prepare_status = userui_prepare_status, | |
21065 | + .abort = userui_abort_hibernate, | |
21066 | + .cond_pause = userui_cond_pause, | |
21067 | + .prepare = userui_prepare_console, | |
21068 | + .cleanup = userui_cleanup_console, | |
21069 | + .wait_for_key = userui_wait_for_keypress, | |
21070 | +}; | |
21071 | + | |
21072 | +/** | |
21073 | + * toi_user_ui_init - Boot time initialisation for user interface. | |
21074 | + * | |
21075 | + * Invoked from the core init routine. | |
21076 | + */ | |
21077 | +static __init int toi_user_ui_init(void) | |
21078 | +{ | |
21079 | + int result; | |
21080 | + | |
21081 | + ui_helper_data.nl = NULL; | |
21082 | + strncpy(ui_helper_data.program, CONFIG_TOI_USERUI_DEFAULT_PATH, 255); | |
21083 | + ui_helper_data.pid = -1; | |
21084 | + ui_helper_data.skb_size = sizeof(struct userui_msg_params); | |
21085 | + ui_helper_data.pool_limit = 6; | |
21086 | + ui_helper_data.netlink_id = NETLINK_TOI_USERUI; | |
21087 | + ui_helper_data.name = "userspace ui"; | |
21088 | + ui_helper_data.rcv_msg = userui_user_rcv_msg; | |
21089 | + ui_helper_data.interface_version = 8; | |
21090 | + ui_helper_data.must_init = 0; | |
21091 | + ui_helper_data.not_ready = userui_cleanup_console; | |
21092 | + init_completion(&ui_helper_data.wait_for_process); | |
21093 | + result = toi_register_module(&userui_ops); | |
21094 | + if (!result) | |
21095 | + result = toi_register_ui_ops(&my_ui_ops); | |
21096 | + if (result) | |
21097 | + toi_unregister_module(&userui_ops); | |
21098 | + | |
21099 | + return result; | |
21100 | +} | |
21101 | + | |
21102 | +#ifdef MODULE | |
21103 | +/** | |
21104 | + * toi_user_ui_ext - Cleanup code for if the core is unloaded. | |
21105 | + */ | |
21106 | +static __exit void toi_user_ui_exit(void) | |
21107 | +{ | |
21108 | + toi_netlink_close_complete(&ui_helper_data); | |
21109 | + toi_remove_ui_ops(&my_ui_ops); | |
21110 | + toi_unregister_module(&userui_ops); | |
21111 | +} | |
21112 | + | |
21113 | +module_init(toi_user_ui_init); | |
21114 | +module_exit(toi_user_ui_exit); | |
21115 | +MODULE_AUTHOR("Nigel Cunningham"); | |
21116 | +MODULE_DESCRIPTION("TuxOnIce Userui Support"); | |
21117 | +MODULE_LICENSE("GPL"); | |
21118 | +#else | |
21119 | +late_initcall(toi_user_ui_init); | |
21120 | +#endif | |
21121 | diff --git a/kernel/power/user.c b/kernel/power/user.c | |
5bd2511a | 21122 | index e819e17..193abc1 100644 |
2380c486 JR |
21123 | --- a/kernel/power/user.c |
21124 | +++ b/kernel/power/user.c | |
92bca44c | 21125 | @@ -64,6 +64,7 @@ static struct snapshot_data { |
2380c486 JR |
21126 | } snapshot_state; |
21127 | ||
21128 | atomic_t snapshot_device_available = ATOMIC_INIT(1); | |
21129 | +EXPORT_SYMBOL_GPL(snapshot_device_available); | |
21130 | ||
21131 | static int snapshot_open(struct inode *inode, struct file *filp) | |
21132 | { | |
21133 | diff --git a/kernel/printk.c b/kernel/printk.c | |
5bd2511a | 21134 | index 444b770..49ddbab 100644 |
2380c486 JR |
21135 | --- a/kernel/printk.c |
21136 | +++ b/kernel/printk.c | |
21137 | @@ -32,6 +32,7 @@ | |
21138 | #include <linux/security.h> | |
21139 | #include <linux/bootmem.h> | |
21140 | #include <linux/syscalls.h> | |
21141 | +#include <linux/suspend.h> | |
9474138d | 21142 | #include <linux/kexec.h> |
5bd2511a | 21143 | #include <linux/kdb.h> |
5dd10c98 | 21144 | #include <linux/ratelimit.h> |
5bd2511a | 21145 | @@ -70,6 +71,7 @@ int console_printk[4] = { |
2380c486 JR |
21146 | MINIMUM_CONSOLE_LOGLEVEL, /* minimum_console_loglevel */ |
21147 | DEFAULT_CONSOLE_LOGLEVEL, /* default_console_loglevel */ | |
21148 | }; | |
21149 | +EXPORT_SYMBOL_GPL(console_printk); | |
21150 | ||
de6743ae AM |
21151 | /* |
21152 | * Low level drivers may need that to know if they can schedule in | |
5bd2511a | 21153 | @@ -974,6 +976,7 @@ void suspend_console(void) |
2380c486 JR |
21154 | console_suspended = 1; |
21155 | up(&console_sem); | |
21156 | } | |
21157 | +EXPORT_SYMBOL_GPL(suspend_console); | |
21158 | ||
21159 | void resume_console(void) | |
21160 | { | |
5bd2511a | 21161 | @@ -983,6 +986,7 @@ void resume_console(void) |
2380c486 JR |
21162 | console_suspended = 0; |
21163 | release_console_sem(); | |
21164 | } | |
21165 | +EXPORT_SYMBOL_GPL(resume_console); | |
21166 | ||
21167 | /** | |
21168 | * acquire_console_sem - lock the console system for exclusive use. | |
2380c486 | 21169 | diff --git a/mm/bootmem.c b/mm/bootmem.c |
de6743ae | 21170 | index 58c66cc..f79d461 100644 |
2380c486 JR |
21171 | --- a/mm/bootmem.c |
21172 | +++ b/mm/bootmem.c | |
de6743ae | 21173 | @@ -25,6 +25,7 @@ |
2380c486 JR |
21174 | unsigned long max_low_pfn; |
21175 | unsigned long min_low_pfn; | |
21176 | unsigned long max_pfn; | |
21177 | +EXPORT_SYMBOL_GPL(max_pfn); | |
21178 | ||
21179 | #ifdef CONFIG_CRASH_DUMP | |
21180 | /* | |
21181 | diff --git a/mm/highmem.c b/mm/highmem.c | |
5bd2511a | 21182 | index 66baa20..2dd71c1 100644 |
2380c486 JR |
21183 | --- a/mm/highmem.c |
21184 | +++ b/mm/highmem.c | |
92bca44c | 21185 | @@ -57,6 +57,7 @@ unsigned int nr_free_highpages (void) |
2380c486 JR |
21186 | |
21187 | return pages; | |
21188 | } | |
21189 | +EXPORT_SYMBOL_GPL(nr_free_highpages); | |
21190 | ||
21191 | static int pkmap_count[LAST_PKMAP]; | |
21192 | static unsigned int last_pkmap_nr; | |
21193 | diff --git a/mm/memory.c b/mm/memory.c | |
5bd2511a | 21194 | index 119b7cc..ffd5f08 100644 |
2380c486 JR |
21195 | --- a/mm/memory.c |
21196 | +++ b/mm/memory.c | |
5bd2511a | 21197 | @@ -1340,6 +1340,7 @@ no_page_table: |
7e46296a | 21198 | return ERR_PTR(-EFAULT); |
2380c486 JR |
21199 | return page; |
21200 | } | |
21201 | +EXPORT_SYMBOL_GPL(follow_page); | |
21202 | ||
7e46296a AM |
21203 | int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, |
21204 | unsigned long start, int nr_pages, unsigned int gup_flags, | |
2380c486 | 21205 | diff --git a/mm/mmzone.c b/mm/mmzone.c |
9474138d | 21206 | index f5b7d17..72a6770 100644 |
2380c486 JR |
21207 | --- a/mm/mmzone.c |
21208 | +++ b/mm/mmzone.c | |
9474138d | 21209 | @@ -14,6 +14,7 @@ struct pglist_data *first_online_pgdat(void) |
2380c486 JR |
21210 | { |
21211 | return NODE_DATA(first_online_node); | |
21212 | } | |
21213 | +EXPORT_SYMBOL_GPL(first_online_pgdat); | |
21214 | ||
21215 | struct pglist_data *next_online_pgdat(struct pglist_data *pgdat) | |
21216 | { | |
9474138d | 21217 | @@ -23,6 +24,7 @@ struct pglist_data *next_online_pgdat(struct pglist_data *pgdat) |
2380c486 JR |
21218 | return NULL; |
21219 | return NODE_DATA(nid); | |
21220 | } | |
21221 | +EXPORT_SYMBOL_GPL(next_online_pgdat); | |
21222 | ||
21223 | /* | |
21224 | * next_zone - helper magic for for_each_zone() | |
9474138d | 21225 | @@ -42,6 +44,7 @@ struct zone *next_zone(struct zone *zone) |
2380c486 JR |
21226 | } |
21227 | return zone; | |
21228 | } | |
21229 | +EXPORT_SYMBOL_GPL(next_zone); | |
21230 | ||
21231 | static inline int zref_in_nodemask(struct zoneref *zref, nodemask_t *nodes) | |
21232 | { | |
21233 | diff --git a/mm/page-writeback.c b/mm/page-writeback.c | |
5bd2511a | 21234 | index bbd396a..39e7638 100644 |
2380c486 JR |
21235 | --- a/mm/page-writeback.c |
21236 | +++ b/mm/page-writeback.c | |
7e46296a | 21237 | @@ -99,6 +99,7 @@ unsigned int dirty_expire_interval = 30 * 100; /* centiseconds */ |
2380c486 JR |
21238 | * Flag that makes the machine dump writes/reads and block dirtyings. |
21239 | */ | |
21240 | int block_dump; | |
21241 | +EXPORT_SYMBOL_GPL(block_dump); | |
21242 | ||
21243 | /* | |
21244 | * Flag that puts the machine in "laptop mode". Doubles as a timeout in jiffies: | |
21245 | diff --git a/mm/page_alloc.c b/mm/page_alloc.c | |
5bd2511a | 21246 | index 431214b..1226024 100644 |
2380c486 JR |
21247 | --- a/mm/page_alloc.c |
21248 | +++ b/mm/page_alloc.c | |
5bd2511a | 21249 | @@ -2273,6 +2273,26 @@ static unsigned int nr_free_zone_pages(int offset) |
2380c486 JR |
21250 | return sum; |
21251 | } | |
21252 | ||
21253 | +static unsigned int nr_unallocated_zone_pages(int offset) | |
21254 | +{ | |
21255 | + struct zoneref *z; | |
21256 | + struct zone *zone; | |
21257 | + | |
21258 | + /* Just pick one node, since fallback list is circular */ | |
21259 | + unsigned int sum = 0; | |
21260 | + | |
21261 | + struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL); | |
21262 | + | |
21263 | + for_each_zone_zonelist(zone, z, zonelist, offset) { | |
92bca44c | 21264 | + unsigned long high = high_wmark_pages(zone); |
2380c486 JR |
21265 | + unsigned long left = zone_page_state(zone, NR_FREE_PAGES); |
21266 | + if (left > high) | |
21267 | + sum += left - high; | |
21268 | + } | |
21269 | + | |
21270 | + return sum; | |
21271 | +} | |
21272 | + | |
21273 | /* | |
21274 | * Amount of free RAM allocatable within ZONE_DMA and ZONE_NORMAL | |
21275 | */ | |
5bd2511a | 21276 | @@ -2283,6 +2303,15 @@ unsigned int nr_free_buffer_pages(void) |
2380c486 JR |
21277 | EXPORT_SYMBOL_GPL(nr_free_buffer_pages); |
21278 | ||
21279 | /* | |
21280 | + * Amount of free RAM allocatable within ZONE_DMA and ZONE_NORMAL | |
21281 | + */ | |
21282 | +unsigned int nr_unallocated_buffer_pages(void) | |
21283 | +{ | |
21284 | + return nr_unallocated_zone_pages(gfp_zone(GFP_USER)); | |
21285 | +} | |
21286 | +EXPORT_SYMBOL_GPL(nr_unallocated_buffer_pages); | |
21287 | + | |
21288 | +/* | |
21289 | * Amount of free RAM allocatable within all zones | |
21290 | */ | |
21291 | unsigned int nr_free_pagecache_pages(void) | |
e999739a | 21292 | diff --git a/mm/shmem.c b/mm/shmem.c |
5bd2511a | 21293 | index f65f840..3024d35 100644 |
e999739a | 21294 | --- a/mm/shmem.c |
21295 | +++ b/mm/shmem.c | |
5bd2511a | 21296 | @@ -1568,6 +1568,8 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode |
e999739a | 21297 | memset(info, 0, (char *)inode - (char *)info); |
21298 | spin_lock_init(&info->lock); | |
21299 | info->flags = flags & VM_NORESERVE; | |
21300 | + if (flags & VM_ATOMIC_COPY) | |
21301 | + inode->i_flags |= S_ATOMIC_COPY; | |
21302 | INIT_LIST_HEAD(&info->swaplist); | |
92bca44c | 21303 | cache_no_acl(inode); |
e999739a | 21304 | |
e999739a | 21305 | diff --git a/mm/swap_state.c b/mm/swap_state.c |
de6743ae | 21306 | index e10f583..86bc26a 100644 |
e999739a | 21307 | --- a/mm/swap_state.c |
21308 | +++ b/mm/swap_state.c | |
de6743ae | 21309 | @@ -47,6 +47,7 @@ struct address_space swapper_space = { |
e999739a | 21310 | .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear), |
21311 | .backing_dev_info = &swap_backing_dev_info, | |
21312 | }; | |
21313 | +EXPORT_SYMBOL_GPL(swapper_space); | |
21314 | ||
21315 | #define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0) | |
21316 | ||
2380c486 | 21317 | diff --git a/mm/swapfile.c b/mm/swapfile.c |
5bd2511a | 21318 | index 03aa2d5..55176da 100644 |
2380c486 JR |
21319 | --- a/mm/swapfile.c |
21320 | +++ b/mm/swapfile.c | |
5dd10c98 AM |
21321 | @@ -39,7 +39,6 @@ |
21322 | static bool swap_count_continued(struct swap_info_struct *, pgoff_t, | |
21323 | unsigned char); | |
21324 | static void free_swap_count_continuations(struct swap_info_struct *); | |
21325 | -static sector_t map_swap_entry(swp_entry_t, struct block_device**); | |
21326 | ||
21327 | static DEFINE_SPINLOCK(swap_lock); | |
21328 | static unsigned int nr_swapfiles; | |
5bd2511a | 21329 | @@ -480,6 +479,7 @@ noswap: |
2380c486 JR |
21330 | spin_unlock(&swap_lock); |
21331 | return (swp_entry_t) {0}; | |
21332 | } | |
21333 | +EXPORT_SYMBOL_GPL(get_swap_page); | |
21334 | ||
92bca44c | 21335 | /* The only caller of this function is now susupend routine */ |
2380c486 | 21336 | swp_entry_t get_swap_page_of_type(int type) |
5bd2511a | 21337 | @@ -502,6 +502,7 @@ swp_entry_t get_swap_page_of_type(int type) |
7e46296a AM |
21338 | spin_unlock(&swap_lock); |
21339 | return (swp_entry_t) {0}; | |
21340 | } | |
21341 | +EXPORT_SYMBOL_GPL(get_swap_page_of_type); | |
21342 | ||
5dd10c98 | 21343 | static struct swap_info_struct *swap_info_get(swp_entry_t entry) |
7e46296a | 21344 | { |
5bd2511a | 21345 | @@ -626,6 +627,7 @@ void swapcache_free(swp_entry_t entry, struct page *page) |
5dd10c98 | 21346 | spin_unlock(&swap_lock); |
2380c486 JR |
21347 | } |
21348 | } | |
21349 | +EXPORT_SYMBOL_GPL(swap_free); | |
21350 | ||
21351 | /* | |
21352 | * How many references to page are currently swapped out? | |
5bd2511a | 21353 | @@ -1302,7 +1304,7 @@ static void drain_mmlist(void) |
5dd10c98 AM |
21354 | * Note that the type of this function is sector_t, but it returns page offset |
21355 | * into the bdev, not sector offset. | |
21356 | */ | |
21357 | -static sector_t map_swap_entry(swp_entry_t entry, struct block_device **bdev) | |
21358 | +sector_t map_swap_entry(swp_entry_t entry, struct block_device **bdev) | |
21359 | { | |
21360 | struct swap_info_struct *sis; | |
21361 | struct swap_extent *start_se; | |
5bd2511a | 21362 | @@ -1329,6 +1331,7 @@ static sector_t map_swap_entry(swp_entry_t entry, struct block_device **bdev) |
2380c486 JR |
21363 | BUG_ON(se == start_se); /* It *must* be present */ |
21364 | } | |
21365 | } | |
5dd10c98 | 21366 | +EXPORT_SYMBOL_GPL(map_swap_entry); |
2380c486 | 21367 | |
2380c486 | 21368 | /* |
5dd10c98 | 21369 | * Returns the page offset into bdev for the specified page's swap entry. |
5bd2511a | 21370 | @@ -1671,6 +1674,7 @@ out_dput: |
2380c486 JR |
21371 | out: |
21372 | return err; | |
21373 | } | |
21374 | +EXPORT_SYMBOL_GPL(sys_swapoff); | |
21375 | ||
21376 | #ifdef CONFIG_PROC_FS | |
21377 | /* iterator */ | |
5bd2511a | 21378 | @@ -2100,6 +2104,7 @@ out: |
2380c486 JR |
21379 | } |
21380 | return error; | |
21381 | } | |
21382 | +EXPORT_SYMBOL_GPL(sys_swapon); | |
21383 | ||
21384 | void si_swapinfo(struct sysinfo *val) | |
21385 | { | |
5bd2511a | 21386 | @@ -2117,6 +2122,7 @@ void si_swapinfo(struct sysinfo *val) |
2380c486 JR |
21387 | val->totalswap = total_swap_pages + nr_to_be_unused; |
21388 | spin_unlock(&swap_lock); | |
21389 | } | |
21390 | +EXPORT_SYMBOL_GPL(si_swapinfo); | |
21391 | ||
21392 | /* | |
21393 | * Verify that a swap entry is valid and increment its swap map count. | |
5bd2511a | 21394 | @@ -2228,6 +2234,13 @@ int swapcache_prepare(swp_entry_t entry) |
5dd10c98 | 21395 | return __swap_duplicate(entry, SWAP_HAS_CACHE); |
2380c486 | 21396 | } |
2380c486 | 21397 | |
5dd10c98 AM |
21398 | + |
21399 | +struct swap_info_struct *get_swap_info_struct(unsigned type) | |
21400 | +{ | |
21401 | + return swap_info[type]; | |
21402 | +} | |
21403 | +EXPORT_SYMBOL_GPL(get_swap_info_struct); | |
21404 | + | |
2380c486 JR |
21405 | /* |
21406 | * swap_lock prevents swap_map being freed. Don't grab an extra | |
5dd10c98 | 21407 | * reference on the swaphandle, it doesn't matter if it becomes unused. |
2380c486 | 21408 | diff --git a/mm/vmscan.c b/mm/vmscan.c |
5bd2511a | 21409 | index 9c7e57c..cd5995c 100644 |
2380c486 JR |
21410 | --- a/mm/vmscan.c |
21411 | +++ b/mm/vmscan.c | |
5bd2511a | 21412 | @@ -2335,6 +2335,9 @@ void wakeup_kswapd(struct zone *zone, int order) |
2380c486 JR |
21413 | if (!populated_zone(zone)) |
21414 | return; | |
21415 | ||
21416 | + if (freezer_is_on()) | |
21417 | + return; | |
21418 | + | |
21419 | pgdat = zone->zone_pgdat; | |
92bca44c | 21420 | if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0)) |
2380c486 | 21421 | return; |
5bd2511a | 21422 | @@ -2391,11 +2394,11 @@ unsigned long zone_reclaimable_pages(struct zone *zone) |
de6743ae AM |
21423 | * LRU order by reclaiming preferentially |
21424 | * inactive > active > active referenced > active mapped | |
21425 | */ | |
21426 | -unsigned long shrink_all_memory(unsigned long nr_to_reclaim) | |
21427 | +unsigned long shrink_memory_mask(unsigned long nr_to_reclaim, gfp_t mask) | |
21428 | { | |
21429 | struct reclaim_state reclaim_state; | |
21430 | struct scan_control sc = { | |
21431 | - .gfp_mask = GFP_HIGHUSER_MOVABLE, | |
21432 | + .gfp_mask = mask, | |
21433 | .may_swap = 1, | |
21434 | .may_unmap = 1, | |
21435 | .may_writepage = 1, | |
5bd2511a | 21436 | @@ -2421,6 +2424,13 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) |
2380c486 | 21437 | |
5dd10c98 | 21438 | return nr_reclaimed; |
2380c486 | 21439 | } |
de6743ae AM |
21440 | +EXPORT_SYMBOL_GPL(shrink_memory_mask); |
21441 | + | |
21442 | +unsigned long shrink_all_memory(unsigned long nr_to_reclaim) | |
21443 | +{ | |
21444 | + return shrink_memory_mask(nr_to_reclaim, GFP_HIGHUSER_MOVABLE); | |
21445 | +} | |
2380c486 | 21446 | +EXPORT_SYMBOL_GPL(shrink_all_memory); |
92bca44c | 21447 | #endif /* CONFIG_HIBERNATION */ |
2380c486 JR |
21448 | |
21449 | /* It's optimal to keep kswapds on the same CPUs as their memory, but |