]>
Commit | Line | Data |
---|---|---|
5dd10c98 | 1 | diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt |
85eb3c9d | 2 | index 2b2407d..50fc7fb 100644 |
5dd10c98 AM |
3 | --- a/Documentation/kernel-parameters.txt |
4 | +++ b/Documentation/kernel-parameters.txt | |
85eb3c9d | 5 | @@ -2775,6 +2775,9 @@ and is between 256 and 4096 characters. It is defined in the file |
de6743ae AM |
6 | HIGHMEM regardless of setting |
7 | of CONFIG_HIGHPTE. | |
5dd10c98 AM |
8 | |
9 | + uuid_debug= (Boolean) whether to enable debugging of TuxOnIce's | |
10 | + uuid support. | |
11 | + | |
12 | vdso= [X86,SH] | |
13 | vdso=2: enable compat VDSO (default with COMPAT_VDSO) | |
14 | vdso=1: enable VDSO (default) | |
2380c486 JR |
15 | diff --git a/Documentation/power/tuxonice-internals.txt b/Documentation/power/tuxonice-internals.txt |
16 | new file mode 100644 | |
e999739a | 17 | index 0000000..7a96186 |
2380c486 JR |
18 | --- /dev/null |
19 | +++ b/Documentation/power/tuxonice-internals.txt | |
e999739a | 20 | @@ -0,0 +1,477 @@ |
2380c486 | 21 | + TuxOnIce 3.0 Internal Documentation. |
e999739a | 22 | + Updated to 26 March 2009 |
2380c486 JR |
23 | + |
24 | +1. Introduction. | |
25 | + | |
26 | + TuxOnIce 3.0 is an addition to the Linux Kernel, designed to | |
27 | + allow the user to quickly shutdown and quickly boot a computer, without | |
28 | + needing to close documents or programs. It is equivalent to the | |
29 | + hibernate facility in some laptops. This implementation, however, | |
30 | + requires no special BIOS or hardware support. | |
31 | + | |
32 | + The code in these files is based upon the original implementation | |
33 | + prepared by Gabor Kuti and additional work by Pavel Machek and a | |
34 | + host of others. This code has been substantially reworked by Nigel | |
35 | + Cunningham, again with the help and testing of many others, not the | |
36 | + least of whom is Michael Frank. At its heart, however, the operation is | |
37 | + essentially the same as Gabor's version. | |
38 | + | |
39 | +2. Overview of operation. | |
40 | + | |
41 | + The basic sequence of operations is as follows: | |
42 | + | |
43 | + a. Quiesce all other activity. | |
44 | + b. Ensure enough memory and storage space are available, and attempt | |
45 | + to free memory/storage if necessary. | |
46 | + c. Allocate the required memory and storage space. | |
47 | + d. Write the image. | |
48 | + e. Power down. | |
49 | + | |
50 | + There are a number of complicating factors which mean that things are | |
51 | + not as simple as the above would imply, however... | |
52 | + | |
53 | + o The activity of each process must be stopped at a point where it will | |
54 | + not be holding locks necessary for saving the image, or unexpectedly | |
55 | + restart operations due to something like a timeout and thereby make | |
56 | + our image inconsistent. | |
57 | + | |
58 | + o It is desirous that we sync outstanding I/O to disk before calculating | |
59 | + image statistics. This reduces corruption if one should suspend but | |
60 | + then not resume, and also makes later parts of the operation safer (see | |
61 | + below). | |
62 | + | |
63 | + o We need to get as close as we can to an atomic copy of the data. | |
64 | + Inconsistencies in the image will result in inconsistent memory contents at | |
65 | + resume time, and thus in instability of the system and/or file system | |
66 | + corruption. This would appear to imply a maximum image size of one half of | |
67 | + the amount of RAM, but we have a solution... (again, below). | |
68 | + | |
69 | + o In 2.6, we choose to play nicely with the other suspend-to-disk | |
70 | + implementations. | |
71 | + | |
72 | +3. Detailed description of internals. | |
73 | + | |
74 | + a. Quiescing activity. | |
75 | + | |
76 | + Safely quiescing the system is achieved using three separate but related | |
77 | + aspects. | |
78 | + | |
79 | + First, we note that the vast majority of processes don't need to run during | |
80 | + suspend. They can be 'frozen'. We therefore implement a refrigerator | |
81 | + routine, which processes enter and in which they remain until the cycle is | |
82 | + complete. Processes enter the refrigerator via try_to_freeze() invocations | |
83 | + at appropriate places. A process cannot be frozen in any old place. It | |
84 | + must not be holding locks that will be needed for writing the image or | |
85 | + freezing other processes. For this reason, userspace processes generally | |
86 | + enter the refrigerator via the signal handling code, and kernel threads at | |
87 | + the place in their event loops where they drop locks and yield to other | |
88 | + processes or sleep. | |
89 | + | |
90 | + The task of freezing processes is complicated by the fact that there can be | |
91 | + interdependencies between processes. Freezing process A before process B may | |
92 | + mean that process B cannot be frozen, because it stops at waiting for | |
93 | + process A rather than in the refrigerator. This issue is seen where | |
94 | + userspace waits on freezeable kernel threads or fuse filesystem threads. To | |
95 | + address this issue, we implement the following algorithm for quiescing | |
96 | + activity: | |
97 | + | |
98 | + - Freeze filesystems (including fuse - userspace programs starting | |
99 | + new requests are immediately frozen; programs already running | |
100 | + requests complete their work before being frozen in the next | |
101 | + step) | |
102 | + - Freeze userspace | |
103 | + - Thaw filesystems (this is safe now that userspace is frozen and no | |
104 | + fuse requests are outstanding). | |
105 | + - Invoke sys_sync (noop on fuse). | |
106 | + - Freeze filesystems | |
107 | + - Freeze kernel threads | |
108 | + | |
109 | + If we need to free memory, we thaw kernel threads and filesystems, but not | |
110 | + userspace. We can then free caches without worrying about deadlocks due to | |
111 | + swap files being on frozen filesystems or such like. | |
112 | + | |
113 | + b. Ensure enough memory & storage are available. | |
114 | + | |
115 | + We have a number of constraints to meet in order to be able to successfully | |
116 | + suspend and resume. | |
117 | + | |
118 | + First, the image will be written in two parts, described below. One of these | |
119 | + parts needs to have an atomic copy made, which of course implies a maximum | |
120 | + size of one half of the amount of system memory. The other part ('pageset') | |
121 | + is not atomically copied, and can therefore be as large or small as desired. | |
122 | + | |
123 | + Second, we have constraints on the amount of storage available. In these | |
124 | + calculations, we may also consider any compression that will be done. The | |
125 | + cryptoapi module allows the user to configure an expected compression ratio. | |
126 | + | |
127 | + Third, the user can specify an arbitrary limit on the image size, in | |
128 | + megabytes. This limit is treated as a soft limit, so that we don't fail the | |
129 | + attempt to suspend if we cannot meet this constraint. | |
130 | + | |
131 | + c. Allocate the required memory and storage space. | |
132 | + | |
133 | + Having done the initial freeze, we determine whether the above constraints | |
134 | + are met, and seek to allocate the metadata for the image. If the constraints | |
135 | + are not met, or we fail to allocate the required space for the metadata, we | |
136 | + seek to free the amount of memory that we calculate is needed and try again. | |
137 | + We allow up to four iterations of this loop before aborting the cycle. If we | |
138 | + do fail, it should only be because of a bug in TuxOnIce's calculations. | |
139 | + | |
140 | + These steps are merged together in the prepare_image function, found in | |
141 | + prepare_image.c. The functions are merged because of the cyclical nature | |
142 | + of the problem of calculating how much memory and storage is needed. Since | |
143 | + the data structures containing the information about the image must | |
144 | + themselves take memory and use storage, the amount of memory and storage | |
145 | + required changes as we prepare the image. Since the changes are not large, | |
146 | + only one or two iterations will be required to achieve a solution. | |
147 | + | |
148 | + The recursive nature of the algorithm is miminised by keeping user space | |
149 | + frozen while preparing the image, and by the fact that our records of which | |
150 | + pages are to be saved and which pageset they are saved in use bitmaps (so | |
151 | + that changes in number or fragmentation of the pages to be saved don't | |
152 | + feedback via changes in the amount of memory needed for metadata). The | |
153 | + recursiveness is thus limited to any extra slab pages allocated to store the | |
154 | + extents that record storage used, and the effects of seeking to free memory. | |
155 | + | |
156 | + d. Write the image. | |
157 | + | |
158 | + We previously mentioned the need to create an atomic copy of the data, and | |
159 | + the half-of-memory limitation that is implied in this. This limitation is | |
160 | + circumvented by dividing the memory to be saved into two parts, called | |
161 | + pagesets. | |
162 | + | |
e999739a | 163 | + Pageset2 contains most of the page cache - the pages on the active and |
164 | + inactive LRU lists that aren't needed or modified while TuxOnIce is | |
165 | + running, so they can be safely written without an atomic copy. They are | |
166 | + therefore saved first and reloaded last. While saving these pages, | |
167 | + TuxOnIce carefully ensures that the work of writing the pages doesn't make | |
168 | + the image inconsistent. With the support for Kernel (Video) Mode Setting | |
169 | + going into the kernel at the time of writing, we need to check for pages | |
170 | + on the LRU that are used by KMS, and exclude them from pageset2. They are | |
171 | + atomically copied as part of pageset 1. | |
2380c486 JR |
172 | + |
173 | + Once pageset2 has been saved, we prepare to do the atomic copy of remaining | |
174 | + memory. As part of the preparation, we power down drivers, thereby providing | |
175 | + them with the opportunity to have their state recorded in the image. The | |
176 | + amount of memory allocated by drivers for this is usually negligible, but if | |
177 | + DRI is in use, video drivers may require significants amounts. Ideally we | |
178 | + would be able to query drivers while preparing the image as to the amount of | |
179 | + memory they will need. Unfortunately no such mechanism exists at the time of | |
180 | + writing. For this reason, TuxOnIce allows the user to set an | |
181 | + 'extra_pages_allowance', which is used to seek to ensure sufficient memory | |
182 | + is available for drivers at this point. TuxOnIce also lets the user set this | |
183 | + value to 0. In this case, a test driver suspend is done while preparing the | |
e999739a | 184 | + image, and the difference (plus a margin) used instead. TuxOnIce will also |
185 | + automatically restart the hibernation process (twice at most) if it finds | |
186 | + that the extra pages allowance is not sufficient. It will then use what was | |
187 | + actually needed (plus a margin, again). Failure to hibernate should thus | |
188 | + be an extremely rare occurence. | |
2380c486 JR |
189 | + |
190 | + Having suspended the drivers, we save the CPU context before making an | |
191 | + atomic copy of pageset1, resuming the drivers and saving the atomic copy. | |
192 | + After saving the two pagesets, we just need to save our metadata before | |
193 | + powering down. | |
194 | + | |
195 | + As we mentioned earlier, the contents of pageset2 pages aren't needed once | |
196 | + they've been saved. We therefore use them as the destination of our atomic | |
197 | + copy. In the unlikely event that pageset1 is larger, extra pages are | |
198 | + allocated while the image is being prepared. This is normally only a real | |
199 | + possibility when the system has just been booted and the page cache is | |
200 | + small. | |
201 | + | |
202 | + This is where we need to be careful about syncing, however. Pageset2 will | |
203 | + probably contain filesystem meta data. If this is overwritten with pageset1 | |
204 | + and then a sync occurs, the filesystem will be corrupted - at least until | |
205 | + resume time and another sync of the restored data. Since there is a | |
206 | + possibility that the user might not resume or (may it never be!) that | |
e999739a | 207 | + TuxOnIce might oops, we do our utmost to avoid syncing filesystems after |
2380c486 JR |
208 | + copying pageset1. |
209 | + | |
210 | + e. Power down. | |
211 | + | |
212 | + Powering down uses standard kernel routines. TuxOnIce supports powering down | |
213 | + using the ACPI S3, S4 and S5 methods or the kernel's non-ACPI power-off. | |
214 | + Supporting suspend to ram (S3) as a power off option might sound strange, | |
215 | + but it allows the user to quickly get their system up and running again if | |
216 | + the battery doesn't run out (we just need to re-read the overwritten pages) | |
217 | + and if the battery does run out (or the user removes power), they can still | |
218 | + resume. | |
219 | + | |
220 | +4. Data Structures. | |
221 | + | |
222 | + TuxOnIce uses three main structures to store its metadata and configuration | |
223 | + information: | |
224 | + | |
225 | + a) Pageflags bitmaps. | |
226 | + | |
227 | + TuxOnIce records which pages will be in pageset1, pageset2, the destination | |
228 | + of the atomic copy and the source of the atomically restored image using | |
e999739a | 229 | + bitmaps. The code used is that written for swsusp, with small improvements |
230 | + to match TuxOnIce's requirements. | |
2380c486 JR |
231 | + |
232 | + The pageset1 bitmap is thus easily stored in the image header for use at | |
233 | + resume time. | |
234 | + | |
235 | + As mentioned above, using bitmaps also means that the amount of memory and | |
236 | + storage required for recording the above information is constant. This | |
237 | + greatly simplifies the work of preparing the image. In earlier versions of | |
238 | + TuxOnIce, extents were used to record which pages would be stored. In that | |
239 | + case, however, eating memory could result in greater fragmentation of the | |
240 | + lists of pages, which in turn required more memory to store the extents and | |
241 | + more storage in the image header. These could in turn require further | |
242 | + freeing of memory, and another iteration. All of this complexity is removed | |
243 | + by having bitmaps. | |
244 | + | |
245 | + Bitmaps also make a lot of sense because TuxOnIce only ever iterates | |
246 | + through the lists. There is therefore no cost to not being able to find the | |
247 | + nth page in order 0 time. We only need to worry about the cost of finding | |
248 | + the n+1th page, given the location of the nth page. Bitwise optimisations | |
249 | + help here. | |
250 | + | |
2380c486 JR |
251 | + b) Extents for block data. |
252 | + | |
253 | + TuxOnIce supports writing the image to multiple block devices. In the case | |
254 | + of swap, multiple partitions and/or files may be in use, and we happily use | |
e999739a | 255 | + them all (with the exception of compcache pages, which we allocate but do |
256 | + not use). This use of multiple block devices is accomplished as follows: | |
2380c486 JR |
257 | + |
258 | + Whatever the actual source of the allocated storage, the destination of the | |
259 | + image can be viewed in terms of one or more block devices, and on each | |
260 | + device, a list of sectors. To simplify matters, we only use contiguous, | |
261 | + PAGE_SIZE aligned sectors, like the swap code does. | |
262 | + | |
263 | + Since sector numbers on each bdev may well not start at 0, it makes much | |
264 | + more sense to use extents here. Contiguous ranges of pages can thus be | |
265 | + represented in the extents by contiguous values. | |
266 | + | |
267 | + Variations in block size are taken account of in transforming this data | |
268 | + into the parameters for bio submission. | |
269 | + | |
270 | + We can thus implement a layer of abstraction wherein the core of TuxOnIce | |
271 | + doesn't have to worry about which device we're currently writing to or | |
272 | + where in the device we are. It simply requests that the next page in the | |
273 | + pageset or header be written, leaving the details to this lower layer. | |
274 | + The lower layer remembers where in the sequence of devices and blocks each | |
275 | + pageset starts. The header always starts at the beginning of the allocated | |
276 | + storage. | |
277 | + | |
278 | + So extents are: | |
279 | + | |
280 | + struct extent { | |
281 | + unsigned long minimum, maximum; | |
282 | + struct extent *next; | |
283 | + } | |
284 | + | |
285 | + These are combined into chains of extents for a device: | |
286 | + | |
287 | + struct extent_chain { | |
288 | + int size; /* size of the extent ie sum (max-min+1) */ | |
289 | + int allocs, frees; | |
290 | + char *name; | |
291 | + struct extent *first, *last_touched; | |
292 | + }; | |
293 | + | |
294 | + For each bdev, we need to store a little more info: | |
295 | + | |
296 | + struct suspend_bdev_info { | |
297 | + struct block_device *bdev; | |
298 | + dev_t dev_t; | |
299 | + int bmap_shift; | |
300 | + int blocks_per_page; | |
301 | + }; | |
302 | + | |
303 | + The dev_t is used to identify the device in the stored image. As a result, | |
304 | + we expect devices at resume time to have the same major and minor numbers | |
305 | + as they had while suspending. This is primarily a concern where the user | |
306 | + utilises LVM for storage, as they will need to dmsetup their partitions in | |
307 | + such a way as to maintain this consistency at resume time. | |
308 | + | |
e999739a | 309 | + bmap_shift and blocks_per_page apply the effects of variations in blocks |
310 | + per page settings for the filesystem and underlying bdev. For most | |
2380c486 JR |
311 | + filesystems, these are the same, but for xfs, they can have independant |
312 | + values. | |
313 | + | |
314 | + Combining these two structures together, we have everything we need to | |
315 | + record what devices and what blocks on each device are being used to | |
316 | + store the image, and to submit i/o using bio_submit. | |
317 | + | |
318 | + The last elements in the picture are a means of recording how the storage | |
319 | + is being used. | |
320 | + | |
321 | + We do this first and foremost by implementing a layer of abstraction on | |
322 | + top of the devices and extent chains which allows us to view however many | |
323 | + devices there might be as one long storage tape, with a single 'head' that | |
324 | + tracks a 'current position' on the tape: | |
325 | + | |
326 | + struct extent_iterate_state { | |
327 | + struct extent_chain *chains; | |
328 | + int num_chains; | |
329 | + int current_chain; | |
330 | + struct extent *current_extent; | |
331 | + unsigned long current_offset; | |
332 | + }; | |
333 | + | |
334 | + That is, *chains points to an array of size num_chains of extent chains. | |
335 | + For the filewriter, this is always a single chain. For the swapwriter, the | |
336 | + array is of size MAX_SWAPFILES. | |
337 | + | |
338 | + current_chain, current_extent and current_offset thus point to the current | |
339 | + index in the chains array (and into a matching array of struct | |
340 | + suspend_bdev_info), the current extent in that chain (to optimise access), | |
341 | + and the current value in the offset. | |
342 | + | |
343 | + The image is divided into three parts: | |
344 | + - The header | |
345 | + - Pageset 1 | |
346 | + - Pageset 2 | |
347 | + | |
348 | + The header always starts at the first device and first block. We know its | |
349 | + size before we begin to save the image because we carefully account for | |
350 | + everything that will be stored in it. | |
351 | + | |
352 | + The second pageset (LRU) is stored first. It begins on the next page after | |
353 | + the end of the header. | |
354 | + | |
355 | + The first pageset is stored second. It's start location is only known once | |
356 | + pageset2 has been saved, since pageset2 may be compressed as it is written. | |
357 | + This location is thus recorded at the end of saving pageset2. It is page | |
358 | + aligned also. | |
359 | + | |
360 | + Since this information is needed at resume time, and the location of extents | |
361 | + in memory will differ at resume time, this needs to be stored in a portable | |
362 | + way: | |
363 | + | |
364 | + struct extent_iterate_saved_state { | |
365 | + int chain_num; | |
366 | + int extent_num; | |
367 | + unsigned long offset; | |
368 | + }; | |
369 | + | |
370 | + We can thus implement a layer of abstraction wherein the core of TuxOnIce | |
371 | + doesn't have to worry about which device we're currently writing to or | |
372 | + where in the device we are. It simply requests that the next page in the | |
373 | + pageset or header be written, leaving the details to this layer, and | |
374 | + invokes the routines to remember and restore the position, without having | |
375 | + to worry about the details of how the data is arranged on disk or such like. | |
376 | + | |
377 | + c) Modules | |
378 | + | |
379 | + One aim in designing TuxOnIce was to make it flexible. We wanted to allow | |
380 | + for the implementation of different methods of transforming a page to be | |
381 | + written to disk and different methods of getting the pages stored. | |
382 | + | |
383 | + In early versions (the betas and perhaps Suspend1), compression support was | |
384 | + inlined in the image writing code, and the data structures and code for | |
385 | + managing swap were intertwined with the rest of the code. A number of people | |
386 | + had expressed interest in implementing image encryption, and alternative | |
387 | + methods of storing the image. | |
388 | + | |
389 | + In order to achieve this, TuxOnIce was given a modular design. | |
390 | + | |
391 | + A module is a single file which encapsulates the functionality needed | |
392 | + to transform a pageset of data (encryption or compression, for example), | |
393 | + or to write the pageset to a device. The former type of module is called | |
394 | + a 'page-transformer', the later a 'writer'. | |
395 | + | |
396 | + Modules are linked together in pipeline fashion. There may be zero or more | |
397 | + page transformers in a pipeline, and there is always exactly one writer. | |
398 | + The pipeline follows this pattern: | |
399 | + | |
400 | + --------------------------------- | |
401 | + | TuxOnIce Core | | |
402 | + --------------------------------- | |
403 | + | | |
404 | + | | |
405 | + --------------------------------- | |
406 | + | Page transformer 1 | | |
407 | + --------------------------------- | |
408 | + | | |
409 | + | | |
410 | + --------------------------------- | |
411 | + | Page transformer 2 | | |
412 | + --------------------------------- | |
413 | + | | |
414 | + | | |
415 | + --------------------------------- | |
416 | + | Writer | | |
417 | + --------------------------------- | |
418 | + | |
419 | + During the writing of an image, the core code feeds pages one at a time | |
420 | + to the first module. This module performs whatever transformations it | |
421 | + implements on the incoming data, completely consuming the incoming data and | |
e999739a | 422 | + feeding output in a similar manner to the next module. |
2380c486 JR |
423 | + |
424 | + All routines are SMP safe, and the final result of the transformations is | |
425 | + written with an index (provided by the core) and size of the output by the | |
426 | + writer. As a result, we can have multithreaded I/O without needing to | |
427 | + worry about the sequence in which pages are written (or read). | |
428 | + | |
429 | + During reading, the pipeline works in the reverse direction. The core code | |
430 | + calls the first module with the address of a buffer which should be filled. | |
431 | + (Note that the buffer size is always PAGE_SIZE at this time). This module | |
432 | + will in turn request data from the next module and so on down until the | |
433 | + writer is made to read from the stored image. | |
434 | + | |
435 | + Part of definition of the structure of a module thus looks like this: | |
436 | + | |
437 | + int (*rw_init) (int rw, int stream_number); | |
438 | + int (*rw_cleanup) (int rw); | |
439 | + int (*write_chunk) (struct page *buffer_page); | |
440 | + int (*read_chunk) (struct page *buffer_page, int sync); | |
441 | + | |
442 | + It should be noted that the _cleanup routine may be called before the | |
443 | + full stream of data has been read or written. While writing the image, | |
444 | + the user may (depending upon settings) choose to abort suspending, and | |
445 | + if we are in the midst of writing the last portion of the image, a portion | |
446 | + of the second pageset may be reread. This may also happen if an error | |
447 | + occurs and we seek to abort the process of writing the image. | |
448 | + | |
449 | + The modular design is also useful in a number of other ways. It provides | |
450 | + a means where by we can add support for: | |
451 | + | |
452 | + - providing overall initialisation and cleanup routines; | |
453 | + - serialising configuration information in the image header; | |
454 | + - providing debugging information to the user; | |
455 | + - determining memory and image storage requirements; | |
456 | + - dis/enabling components at run-time; | |
457 | + - configuring the module (see below); | |
458 | + | |
459 | + ...and routines for writers specific to their work: | |
460 | + - Parsing a resume= location; | |
461 | + - Determining whether an image exists; | |
462 | + - Marking a resume as having been attempted; | |
463 | + - Invalidating an image; | |
464 | + | |
465 | + Since some parts of the core - the user interface and storage manager | |
466 | + support - have use for some of these functions, they are registered as | |
467 | + 'miscellaneous' modules as well. | |
468 | + | |
469 | + d) Sysfs data structures. | |
470 | + | |
471 | + This brings us naturally to support for configuring TuxOnIce. We desired to | |
472 | + provide a way to make TuxOnIce as flexible and configurable as possible. | |
e999739a | 473 | + The user shouldn't have to reboot just because they want to now hibernate to |
2380c486 JR |
474 | + a file instead of a partition, for example. |
475 | + | |
476 | + To accomplish this, TuxOnIce implements a very generic means whereby the | |
477 | + core and modules can register new sysfs entries. All TuxOnIce entries use | |
e999739a | 478 | + a single _store and _show routine, both of which are found in |
479 | + tuxonice_sysfs.c in the kernel/power directory. These routines handle the | |
480 | + most common operations - getting and setting the values of bits, integers, | |
481 | + longs, unsigned longs and strings in one place, and allow overrides for | |
482 | + customised get and set options as well as side-effect routines for all | |
483 | + reads and writes. | |
2380c486 JR |
484 | + |
485 | + When combined with some simple macros, a new sysfs entry can then be defined | |
486 | + in just a couple of lines: | |
487 | + | |
e999739a | 488 | + SYSFS_INT("progress_granularity", SYSFS_RW, &progress_granularity, 1, |
489 | + 2048, 0, NULL), | |
2380c486 JR |
490 | + |
491 | + This defines a sysfs entry named "progress_granularity" which is rw and | |
492 | + allows the user to access an integer stored at &progress_granularity, giving | |
493 | + it a value between 1 and 2048 inclusive. | |
494 | + | |
495 | + Sysfs entries are registered under /sys/power/tuxonice, and entries for | |
496 | + modules are located in a subdirectory named after the module. | |
497 | + | |
498 | diff --git a/Documentation/power/tuxonice.txt b/Documentation/power/tuxonice.txt | |
499 | new file mode 100644 | |
92bca44c | 500 | index 0000000..3bf0575 |
2380c486 JR |
501 | --- /dev/null |
502 | +++ b/Documentation/power/tuxonice.txt | |
9474138d | 503 | @@ -0,0 +1,948 @@ |
2380c486 JR |
504 | + --- TuxOnIce, version 3.0 --- |
505 | + | |
506 | +1. What is it? | |
507 | +2. Why would you want it? | |
508 | +3. What do you need to use it? | |
509 | +4. Why not just use the version already in the kernel? | |
510 | +5. How do you use it? | |
511 | +6. What do all those entries in /sys/power/tuxonice do? | |
512 | +7. How do you get support? | |
513 | +8. I think I've found a bug. What should I do? | |
514 | +9. When will XXX be supported? | |
515 | +10 How does it work? | |
516 | +11. Who wrote TuxOnIce? | |
517 | + | |
518 | +1. What is it? | |
519 | + | |
520 | + Imagine you're sitting at your computer, working away. For some reason, you | |
521 | + need to turn off your computer for a while - perhaps it's time to go home | |
522 | + for the day. When you come back to your computer next, you're going to want | |
523 | + to carry on where you left off. Now imagine that you could push a button and | |
524 | + have your computer store the contents of its memory to disk and power down. | |
525 | + Then, when you next start up your computer, it loads that image back into | |
526 | + memory and you can carry on from where you were, just as if you'd never | |
527 | + turned the computer off. You have far less time to start up, no reopening of | |
528 | + applications or finding what directory you put that file in yesterday. | |
529 | + That's what TuxOnIce does. | |
530 | + | |
531 | + TuxOnIce has a long heritage. It began life as work by Gabor Kuti, who, | |
532 | + with some help from Pavel Machek, got an early version going in 1999. The | |
533 | + project was then taken over by Florent Chabaud while still in alpha version | |
534 | + numbers. Nigel Cunningham came on the scene when Florent was unable to | |
535 | + continue, moving the project into betas, then 1.0, 2.0 and so on up to | |
536 | + the present series. During the 2.0 series, the name was contracted to | |
537 | + Suspend2 and the website suspend2.net created. Beginning around July 2007, | |
538 | + a transition to calling the software TuxOnIce was made, to seek to help | |
539 | + make it clear that TuxOnIce is more concerned with hibernation than suspend | |
540 | + to ram. | |
541 | + | |
542 | + Pavel Machek's swsusp code, which was merged around 2.5.17 retains the | |
543 | + original name, and was essentially a fork of the beta code until Rafael | |
544 | + Wysocki came on the scene in 2005 and began to improve it further. | |
545 | + | |
546 | +2. Why would you want it? | |
547 | + | |
548 | + Why wouldn't you want it? | |
549 | + | |
550 | + Being able to save the state of your system and quickly restore it improves | |
551 | + your productivity - you get a useful system in far less time than through | |
552 | + the normal boot process. You also get to be completely 'green', using zero | |
553 | + power, or as close to that as possible (the computer may still provide | |
554 | + minimal power to some devices, so they can initiate a power on, but that | |
555 | + will be the same amount of power as would be used if you told the computer | |
556 | + to shutdown. | |
557 | + | |
558 | +3. What do you need to use it? | |
559 | + | |
560 | + a. Kernel Support. | |
561 | + | |
562 | + i) The TuxOnIce patch. | |
563 | + | |
564 | + TuxOnIce is part of the Linux Kernel. This version is not part of Linus's | |
565 | + 2.6 tree at the moment, so you will need to download the kernel source and | |
566 | + apply the latest patch. Having done that, enable the appropriate options in | |
567 | + make [menu|x]config (under Power Management Options - look for "Enhanced | |
568 | + Hibernation"), compile and install your kernel. TuxOnIce works with SMP, | |
569 | + Highmem, preemption, fuse filesystems, x86-32, PPC and x86_64. | |
570 | + | |
571 | + TuxOnIce patches are available from http://tuxonice.net. | |
572 | + | |
573 | + ii) Compression support. | |
574 | + | |
575 | + Compression support is implemented via the cryptoapi. You will therefore want | |
576 | + to select any Cryptoapi transforms that you want to use on your image from | |
9474138d AM |
577 | + the Cryptoapi menu while configuring your kernel. We recommend the use of the |
578 | + LZO compression method - it is very fast and still achieves good compression. | |
2380c486 | 579 | + |
9474138d | 580 | + You can also tell TuxOnIce to write its image to an encrypted and/or |
2380c486 JR |
581 | + compressed filesystem/swap partition. In that case, you don't need to do |
582 | + anything special for TuxOnIce when it comes to kernel configuration. | |
583 | + | |
584 | + iii) Configuring other options. | |
585 | + | |
586 | + While you're configuring your kernel, try to configure as much as possible | |
587 | + to build as modules. We recommend this because there are a number of drivers | |
588 | + that are still in the process of implementing proper power management | |
589 | + support. In those cases, the best way to work around their current lack is | |
590 | + to build them as modules and remove the modules while hibernating. You might | |
591 | + also bug the driver authors to get their support up to speed, or even help! | |
592 | + | |
593 | + b. Storage. | |
594 | + | |
595 | + i) Swap. | |
596 | + | |
597 | + TuxOnIce can store the hibernation image in your swap partition, a swap file or | |
598 | + a combination thereof. Whichever combination you choose, you will probably | |
599 | + want to create enough swap space to store the largest image you could have, | |
600 | + plus the space you'd normally use for swap. A good rule of thumb would be | |
601 | + to calculate the amount of swap you'd want without using TuxOnIce, and then | |
602 | + add the amount of memory you have. This swapspace can be arranged in any way | |
603 | + you'd like. It can be in one partition or file, or spread over a number. The | |
604 | + only requirement is that they be active when you start a hibernation cycle. | |
605 | + | |
606 | + There is one exception to this requirement. TuxOnIce has the ability to turn | |
607 | + on one swap file or partition at the start of hibernating and turn it back off | |
608 | + at the end. If you want to ensure you have enough memory to store a image | |
609 | + when your memory is fully used, you might want to make one swap partition or | |
610 | + file for 'normal' use, and another for TuxOnIce to activate & deactivate | |
611 | + automatically. (Further details below). | |
612 | + | |
613 | + ii) Normal files. | |
614 | + | |
615 | + TuxOnIce includes a 'file allocator'. The file allocator can store your | |
616 | + image in a simple file. Since Linux has the concept of everything being a | |
617 | + file, this is more powerful than it initially sounds. If, for example, you | |
618 | + were to set up a network block device file, you could hibernate to a network | |
619 | + server. This has been tested and works to a point, but nbd itself isn't | |
620 | + stateless enough for our purposes. | |
621 | + | |
622 | + Take extra care when setting up the file allocator. If you just type | |
623 | + commands without thinking and then try to hibernate, you could cause | |
624 | + irreversible corruption on your filesystems! Make sure you have backups. | |
625 | + | |
626 | + Most people will only want to hibernate to a local file. To achieve that, do | |
627 | + something along the lines of: | |
628 | + | |
629 | + echo "TuxOnIce" > /hibernation-file | |
92bca44c | 630 | + dd if=/dev/zero bs=1M count=512 >> /hibernation-file |
2380c486 JR |
631 | + |
632 | + This will create a 512MB file called /hibernation-file. To get TuxOnIce to use | |
633 | + it: | |
634 | + | |
635 | + echo /hibernation-file > /sys/power/tuxonice/file/target | |
636 | + | |
637 | + Then | |
638 | + | |
639 | + cat /sys/power/tuxonice/resume | |
640 | + | |
641 | + Put the results of this into your bootloader's configuration (see also step | |
642 | + C, below): | |
643 | + | |
644 | + ---EXAMPLE-ONLY-DON'T-COPY-AND-PASTE--- | |
645 | + # cat /sys/power/tuxonice/resume | |
646 | + file:/dev/hda2:0x1e001 | |
647 | + | |
648 | + In this example, we would edit the append= line of our lilo.conf|menu.lst | |
649 | + so that it included: | |
650 | + | |
651 | + resume=file:/dev/hda2:0x1e001 | |
652 | + ---EXAMPLE-ONLY-DON'T-COPY-AND-PASTE--- | |
653 | + | |
654 | + For those who are thinking 'Could I make the file sparse?', the answer is | |
655 | + 'No!'. At the moment, there is no way for TuxOnIce to fill in the holes in | |
656 | + a sparse file while hibernating. In the longer term (post merge!), I'd like | |
657 | + to change things so that the file could be dynamically resized and have | |
658 | + holes filled as needed. Right now, however, that's not possible and not a | |
659 | + priority. | |
660 | + | |
661 | + c. Bootloader configuration. | |
662 | + | |
663 | + Using TuxOnIce also requires that you add an extra parameter to | |
664 | + your lilo.conf or equivalent. Here's an example for a swap partition: | |
665 | + | |
666 | + append="resume=swap:/dev/hda1" | |
667 | + | |
668 | + This would tell TuxOnIce that /dev/hda1 is a swap partition you | |
669 | + have. TuxOnIce will use the swap signature of this partition as a | |
670 | + pointer to your data when you hibernate. This means that (in this example) | |
671 | + /dev/hda1 doesn't need to be _the_ swap partition where all of your data | |
672 | + is actually stored. It just needs to be a swap partition that has a | |
673 | + valid signature. | |
674 | + | |
675 | + You don't need to have a swap partition for this purpose. TuxOnIce | |
676 | + can also use a swap file, but usage is a little more complex. Having made | |
677 | + your swap file, turn it on and do | |
678 | + | |
679 | + cat /sys/power/tuxonice/swap/headerlocations | |
680 | + | |
681 | + (this assumes you've already compiled your kernel with TuxOnIce | |
682 | + support and booted it). The results of the cat command will tell you | |
683 | + what you need to put in lilo.conf: | |
684 | + | |
685 | + For swap partitions like /dev/hda1, simply use resume=/dev/hda1. | |
686 | + For swapfile `swapfile`, use resume=swap:/dev/hda2:0x242d. | |
687 | + | |
688 | + If the swapfile changes for any reason (it is moved to a different | |
689 | + location, it is deleted and recreated, or the filesystem is | |
690 | + defragmented) then you will have to check | |
691 | + /sys/power/tuxonice/swap/headerlocations for a new resume_block value. | |
692 | + | |
693 | + Once you've compiled and installed the kernel and adjusted your bootloader | |
694 | + configuration, you should only need to reboot for the most basic part | |
695 | + of TuxOnIce to be ready. | |
696 | + | |
697 | + If you only compile in the swap allocator, or only compile in the file | |
698 | + allocator, you don't need to add the "swap:" part of the resume= | |
699 | + parameters above. resume=/dev/hda2:0x242d will work just as well. If you | |
700 | + have compiled both and your storage is on swap, you can also use this | |
701 | + format (the swap allocator is the default allocator). | |
702 | + | |
703 | + When compiling your kernel, one of the options in the 'Power Management | |
704 | + Support' menu, just above the 'Enhanced Hibernation (TuxOnIce)' entry is | |
705 | + called 'Default resume partition'. This can be used to set a default value | |
706 | + for the resume= parameter. | |
707 | + | |
708 | + d. The hibernate script. | |
709 | + | |
710 | + Since the driver model in 2.6 kernels is still being developed, you may need | |
711 | + to do more than just configure TuxOnIce. Users of TuxOnIce usually start the | |
712 | + process via a script which prepares for the hibernation cycle, tells the | |
713 | + kernel to do its stuff and then restore things afterwards. This script might | |
714 | + involve: | |
715 | + | |
716 | + - Switching to a text console and back if X doesn't like the video card | |
717 | + status on resume. | |
718 | + - Un/reloading drivers that don't play well with hibernation. | |
719 | + | |
720 | + Note that you might not be able to unload some drivers if there are | |
721 | + processes using them. You might have to kill off processes that hold | |
722 | + devices open. Hint: if your X server accesses an USB mouse, doing a | |
723 | + 'chvt' to a text console releases the device and you can unload the | |
724 | + module. | |
725 | + | |
726 | + Check out the latest script (available on tuxonice.net). | |
727 | + | |
728 | + e. The userspace user interface. | |
729 | + | |
730 | + TuxOnIce has very limited support for displaying status if you only apply | |
731 | + the kernel patch - it can printk messages, but that is all. In addition, | |
732 | + some of the functions mentioned in this document (such as cancelling a cycle | |
733 | + or performing interactive debugging) are unavailable. To utilise these | |
734 | + functions, or simply get a nice display, you need the 'userui' component. | |
735 | + Userui comes in three flavours, usplash, fbsplash and text. Text should | |
736 | + work on any console. Usplash and fbsplash require the appropriate | |
737 | + (distro specific?) support. | |
738 | + | |
739 | + To utilise a userui, TuxOnIce just needs to be told where to find the | |
740 | + userspace binary: | |
741 | + | |
742 | + echo "/usr/local/sbin/tuxoniceui_fbsplash" > /sys/power/tuxonice/user_interface/program | |
743 | + | |
744 | + The hibernate script can do this for you, and a default value for this | |
745 | + setting can be configured when compiling the kernel. This path is also | |
746 | + stored in the image header, so if you have an initrd or initramfs, you can | |
747 | + use the userui during the first part of resuming (prior to the atomic | |
748 | + restore) by putting the binary in the same path in your initrd/ramfs. | |
749 | + Alternatively, you can put it in a different location and do an echo | |
750 | + similar to the above prior to the echo > do_resume. The value saved in the | |
751 | + image header will then be ignored. | |
752 | + | |
753 | +4. Why not just use the version already in the kernel? | |
754 | + | |
755 | + The version in the vanilla kernel has a number of drawbacks. The most | |
756 | + serious of these are: | |
757 | + - it has a maximum image size of 1/2 total memory; | |
758 | + - it doesn't allocate storage until after it has snapshotted memory. | |
759 | + This means that you can't be sure hibernating will work until you | |
760 | + see it start to write the image; | |
761 | + - it does not allow you to press escape to cancel a cycle; | |
762 | + - it does not allow you to press escape to cancel resuming; | |
763 | + - it does not allow you to automatically swapon a file when | |
764 | + starting a cycle; | |
765 | + - it does not allow you to use multiple swap partitions or files; | |
766 | + - it does not allow you to use ordinary files; | |
767 | + - it just invalidates an image and continues to boot if you | |
768 | + accidentally boot the wrong kernel after hibernating; | |
769 | + - it doesn't support any sort of nice display while hibernating; | |
770 | + - it is moving toward requiring that you have an initrd/initramfs | |
771 | + to ever have a hope of resuming (uswsusp). While uswsusp will | |
772 | + address some of the concerns above, it won't address all of them, | |
773 | + and will be more complicated to get set up; | |
774 | + - it doesn't have support for suspend-to-both (write a hibernation | |
775 | + image, then suspend to ram; I think this is known as ReadySafe | |
776 | + under M$). | |
777 | + | |
778 | +5. How do you use it? | |
779 | + | |
780 | + A hibernation cycle can be started directly by doing: | |
781 | + | |
782 | + echo > /sys/power/tuxonice/do_hibernate | |
783 | + | |
784 | + In practice, though, you'll probably want to use the hibernate script | |
785 | + to unload modules, configure the kernel the way you like it and so on. | |
786 | + In that case, you'd do (as root): | |
787 | + | |
788 | + hibernate | |
789 | + | |
790 | + See the hibernate script's man page for more details on the options it | |
791 | + takes. | |
792 | + | |
793 | + If you're using the text or splash user interface modules, one feature of | |
794 | + TuxOnIce that you might find useful is that you can press Escape at any time | |
795 | + during hibernating, and the process will be aborted. | |
796 | + | |
797 | + Due to the way hibernation works, this means you'll have your system back and | |
798 | + perfectly usable almost instantly. The only exception is when it's at the | |
799 | + very end of writing the image. Then it will need to reload a small (usually | |
800 | + 4-50MBs, depending upon the image characteristics) portion first. | |
801 | + | |
802 | + Likewise, when resuming, you can press escape and resuming will be aborted. | |
803 | + The computer will then powerdown again according to settings at that time for | |
804 | + the powerdown method or rebooting. | |
805 | + | |
806 | + You can change the settings for powering down while the image is being | |
807 | + written by pressing 'R' to toggle rebooting and 'O' to toggle between | |
808 | + suspending to ram and powering down completely). | |
809 | + | |
810 | + If you run into problems with resuming, adding the "noresume" option to | |
811 | + the kernel command line will let you skip the resume step and recover your | |
812 | + system. This option shouldn't normally be needed, because TuxOnIce modifies | |
813 | + the image header prior to the atomic restore, and will thus prompt you | |
814 | + if it detects that you've tried to resume an image before (this flag is | |
815 | + removed if you press Escape to cancel a resume, so you won't be prompted | |
816 | + then). | |
817 | + | |
818 | + Recent kernels (2.6.24 onwards) add support for resuming from a different | |
819 | + kernel to the one that was hibernated (thanks to Rafael for his work on | |
820 | + this - I've just embraced and enhanced the support for TuxOnIce). This | |
821 | + should further reduce the need for you to use the noresume option. | |
822 | + | |
823 | +6. What do all those entries in /sys/power/tuxonice do? | |
824 | + | |
825 | + /sys/power/tuxonice is the directory which contains files you can use to | |
826 | + tune and configure TuxOnIce to your liking. The exact contents of | |
827 | + the directory will depend upon the version of TuxOnIce you're | |
828 | + running and the options you selected at compile time. In the following | |
829 | + descriptions, names in brackets refer to compile time options. | |
830 | + (Note that they're all dependant upon you having selected CONFIG_TUXONICE | |
831 | + in the first place!). | |
832 | + | |
833 | + Since the values of these settings can open potential security risks, the | |
834 | + writeable ones are accessible only to the root user. You may want to | |
835 | + configure sudo to allow you to invoke your hibernate script as an ordinary | |
836 | + user. | |
837 | + | |
9474138d AM |
838 | + - alloc/failure_test |
839 | + | |
840 | + This debugging option provides a way of testing TuxOnIce's handling of | |
841 | + memory allocation failures. Each allocation type that TuxOnIce makes has | |
842 | + been given a unique number (see the source code). Echo the appropriate | |
843 | + number into this entry, and when TuxOnIce attempts to do that allocation, | |
844 | + it will pretend there was a failure and act accordingly. | |
845 | + | |
846 | + - alloc/find_max_mem_allocated | |
847 | + | |
848 | + This debugging option will cause TuxOnIce to find the maximum amount of | |
849 | + memory it used during a cycle, and report that information in debugging | |
850 | + information at the end of the cycle. | |
851 | + | |
852 | + - alt_resume_param | |
853 | + | |
854 | + Instead of powering down after writing a hibernation image, TuxOnIce | |
855 | + supports resuming from a different image. This entry lets you set the | |
856 | + location of the signature for that image (the resume= value you'd use | |
857 | + for it). Using an alternate image and keep_image mode, you can do things | |
858 | + like using an alternate image to power down an uninterruptible power | |
859 | + supply. | |
860 | + | |
861 | + - block_io/target_outstanding_io | |
862 | + | |
863 | + This value controls the amount of memory that the block I/O code says it | |
864 | + needs when the core code is calculating how much memory is needed for | |
865 | + hibernating and for resuming. It doesn't directly control the amount of | |
866 | + I/O that is submitted at any one time - that depends on the amount of | |
867 | + available memory (we may have more available than we asked for), the | |
868 | + throughput that is being achieved and the ability of the CPU to keep up | |
869 | + with disk throughput (particularly where we're compressing pages). | |
870 | + | |
2380c486 JR |
871 | + - checksum/enabled |
872 | + | |
873 | + Use cryptoapi hashing routines to verify that Pageset2 pages don't change | |
874 | + while we're saving the first part of the image, and to get any pages that | |
875 | + do change resaved in the atomic copy. This should normally not be needed, | |
876 | + but if you're seeing issues, please enable this. If your issues stop you | |
877 | + being able to resume, enable this option, hibernate and cancel the cycle | |
878 | + after the atomic copy is done. If the debugging info shows a non-zero | |
879 | + number of pages resaved, please report this to Nigel. | |
880 | + | |
881 | + - compression/algorithm | |
882 | + | |
883 | + Set the cryptoapi algorithm used for compressing the image. | |
884 | + | |
885 | + - compression/expected_compression | |
886 | + | |
887 | + These values allow you to set an expected compression ratio, which TuxOnice | |
888 | + will use in calculating whether it meets constraints on the image size. If | |
889 | + this expected compression ratio is not attained, the hibernation cycle will | |
890 | + abort, so it is wise to allow some spare. You can see what compression | |
891 | + ratio is achieved in the logs after hibernating. | |
892 | + | |
893 | + - debug_info: | |
894 | + | |
895 | + This file returns information about your configuration that may be helpful | |
896 | + in diagnosing problems with hibernating. | |
897 | + | |
9474138d AM |
898 | + - did_suspend_to_both: |
899 | + | |
900 | + This file can be used when you hibernate with powerdown method 3 (ie suspend | |
901 | + to ram after writing the image). There can be two outcomes in this case. We | |
902 | + can resume from the suspend-to-ram before the battery runs out, or we can run | |
903 | + out of juice and and up resuming like normal. This entry lets you find out, | |
904 | + post resume, which way we went. If the value is 1, we resumed from suspend | |
905 | + to ram. This can be useful when actions need to be run post suspend-to-ram | |
906 | + that don't need to be run if we did the normal resume from power off. | |
907 | + | |
2380c486 JR |
908 | + - do_hibernate: |
909 | + | |
910 | + When anything is written to this file, the kernel side of TuxOnIce will | |
911 | + begin to attempt to write an image to disk and power down. You'll normally | |
912 | + want to run the hibernate script instead, to get modules unloaded first. | |
913 | + | |
914 | + - do_resume: | |
915 | + | |
916 | + When anything is written to this file TuxOnIce will attempt to read and | |
917 | + restore an image. If there is no image, it will return almost immediately. | |
918 | + If an image exists, the echo > will never return. Instead, the original | |
919 | + kernel context will be restored and the original echo > do_hibernate will | |
920 | + return. | |
921 | + | |
922 | + - */enabled | |
923 | + | |
924 | + These option can be used to temporarily disable various parts of TuxOnIce. | |
925 | + | |
926 | + - extra_pages_allowance | |
927 | + | |
928 | + When TuxOnIce does its atomic copy, it calls the driver model suspend | |
929 | + and resume methods. If you have DRI enabled with a driver such as fglrx, | |
930 | + this can result in the driver allocating a substantial amount of memory | |
931 | + for storing its state. Extra_pages_allowance tells TuxOnIce how much | |
932 | + extra memory it should ensure is available for those allocations. If | |
933 | + your attempts at hibernating end with a message in dmesg indicating that | |
934 | + insufficient extra pages were allowed, you need to increase this value. | |
935 | + | |
936 | + - file/target: | |
937 | + | |
938 | + Read this value to get the current setting. Write to it to point TuxOnice | |
939 | + at a new storage location for the file allocator. See section 3.b.ii above | |
940 | + for details of how to set up the file allocator. | |
941 | + | |
942 | + - freezer_test | |
943 | + | |
944 | + This entry can be used to get TuxOnIce to just test the freezer and prepare | |
945 | + an image without actually doing a hibernation cycle. It is useful for | |
946 | + diagnosing freezing and image preparation issues. | |
947 | + | |
9474138d AM |
948 | + - full_pageset2 |
949 | + | |
950 | + TuxOnIce divides the pages that are stored in an image into two sets. The | |
951 | + difference between the two sets is that pages in pageset 1 are atomically | |
952 | + copied, and pages in pageset 2 are written to disk without being copied | |
953 | + first. A page CAN be written to disk without being copied first if and only | |
954 | + if its contents will not be modified or used at any time after userspace | |
955 | + processes are frozen. A page MUST be in pageset 1 if its contents are | |
956 | + modified or used at any time after userspace processes have been frozen. | |
957 | + | |
958 | + Normally (ie if this option is enabled), TuxOnIce will put all pages on the | |
959 | + per-zone LRUs in pageset2, then remove those pages used by any userspace | |
960 | + user interface helper and TuxOnIce storage manager that are running, | |
961 | + together with pages used by the GEM memory manager introduced around 2.6.28 | |
962 | + kernels. | |
963 | + | |
964 | + If this option is disabled, a much more conservative approach will be taken. | |
965 | + The only pages in pageset2 will be those belonging to userspace processes, | |
966 | + with the exclusion of those belonging to the TuxOnIce userspace helpers | |
967 | + mentioned above. This will result in a much smaller pageset2, and will | |
968 | + therefore result in smaller images than are possible with this option | |
969 | + enabled. | |
970 | + | |
971 | + - ignore_rootfs | |
972 | + | |
973 | + TuxOnIce records which device is mounted as the root filesystem when | |
974 | + writing the hibernation image. It will normally check at resume time that | |
975 | + this device isn't already mounted - that would be a cause of filesystem | |
976 | + corruption. In some particular cases (RAM based root filesystems), you | |
977 | + might want to disable this check. This option allows you to do that. | |
978 | + | |
2380c486 JR |
979 | + - image_exists: |
980 | + | |
981 | + Can be used in a script to determine whether a valid image exists at the | |
982 | + location currently pointed to by resume=. Returns up to three lines. | |
983 | + The first is whether an image exists (-1 for unsure, otherwise 0 or 1). | |
984 | + If an image eixsts, additional lines will return the machine and version. | |
985 | + Echoing anything to this entry removes any current image. | |
986 | + | |
987 | + - image_size_limit: | |
988 | + | |
989 | + The maximum size of hibernation image written to disk, measured in megabytes | |
990 | + (1024*1024). | |
991 | + | |
992 | + - last_result: | |
993 | + | |
994 | + The result of the last hibernation cycle, as defined in | |
995 | + include/linux/suspend-debug.h with the values SUSPEND_ABORTED to | |
996 | + SUSPEND_KEPT_IMAGE. This is a bitmask. | |
997 | + | |
9474138d AM |
998 | + - late_cpu_hotplug: |
999 | + | |
1000 | + This sysfs entry controls whether cpu hotplugging is done - as normal - just | |
1001 | + before (unplug) and after (replug) the atomic copy/restore (so that all | |
1002 | + CPUs/cores are available for multithreaded I/O). The alternative is to | |
1003 | + unplug all secondary CPUs/cores at the start of hibernating/resuming, and | |
1004 | + replug them at the end of resuming. No multithreaded I/O will be possible in | |
1005 | + this configuration, but the odd machine has been reported to require it. | |
1006 | + | |
1007 | + - lid_file: | |
1008 | + | |
1009 | + This determines which ACPI button file we look in to determine whether the | |
1010 | + lid is open or closed after resuming from suspend to disk or power off. | |
1011 | + If the entry is set to "lid/LID", we'll open /proc/acpi/button/lid/LID/state | |
1012 | + and check its contents at the appropriate moment. See post_wake_state below | |
1013 | + for more details on how this entry is used. | |
1014 | + | |
2380c486 JR |
1015 | + - log_everything (CONFIG_PM_DEBUG): |
1016 | + | |
1017 | + Setting this option results in all messages printed being logged. Normally, | |
1018 | + only a subset are logged, so as to not slow the process and not clutter the | |
1019 | + logs. Useful for debugging. It can be toggled during a cycle by pressing | |
1020 | + 'L'. | |
1021 | + | |
9474138d AM |
1022 | + - no_load_direct: |
1023 | + | |
1024 | + This is a debugging option. If, when loading the atomically copied pages of | |
1025 | + an image, TuxOnIce finds that the destination address for a page is free, | |
1026 | + it will normally allocate the image, load the data directly into that | |
1027 | + address and skip it in the atomic restore. If this option is disabled, the | |
1028 | + page will be loaded somewhere else and atomically restored like other pages. | |
1029 | + | |
1030 | + - no_flusher_thread: | |
1031 | + | |
1032 | + When doing multithreaded I/O (see below), the first online CPU can be used | |
1033 | + to _just_ submit compressed pages when writing the image, rather than | |
1034 | + compressing and submitting data. This option is normally disabled, but has | |
1035 | + been included because Nigel would like to see whether it will be more useful | |
1036 | + as the number of cores/cpus in computers increases. | |
1037 | + | |
1038 | + - no_multithreaded_io: | |
1039 | + | |
1040 | + TuxOnIce will normally create one thread per cpu/core on your computer, | |
1041 | + each of which will then perform I/O. This will generally result in | |
1042 | + throughput that's the maximum the storage medium can handle. There | |
1043 | + shouldn't be any reason to disable multithreaded I/O now, but this option | |
1044 | + has been retained for debugging purposes. | |
1045 | + | |
1046 | + - no_pageset2 | |
1047 | + | |
1048 | + See the entry for full_pageset2 above for an explanation of pagesets. | |
1049 | + Enabling this option causes TuxOnIce to do an atomic copy of all pages, | |
1050 | + thereby limiting the maximum image size to 1/2 of memory, as swsusp does. | |
1051 | + | |
1052 | + - no_pageset2_if_unneeded | |
1053 | + | |
1054 | + See the entry for full_pageset2 above for an explanation of pagesets. | |
1055 | + Enabling this option causes TuxOnIce to act like no_pageset2 was enabled | |
1056 | + if and only it isn't needed anyway. This option may still make TuxOnIce | |
1057 | + less reliable because pageset2 pages are normally used to store the | |
1058 | + atomic copy - drivers that want to do allocations of larger amounts of | |
1059 | + memory in one shot will be more likely to find that those amounts aren't | |
1060 | + available if this option is enabled. | |
1061 | + | |
2380c486 JR |
1062 | + - pause_between_steps (CONFIG_PM_DEBUG): |
1063 | + | |
1064 | + This option is used during debugging, to make TuxOnIce pause between | |
1065 | + each step of the process. It is ignored when the nice display is on. | |
1066 | + | |
9474138d AM |
1067 | + - post_wake_state: |
1068 | + | |
1069 | + TuxOnIce provides support for automatically waking after a user-selected | |
1070 | + delay, and using a different powerdown method if the lid is still closed. | |
1071 | + (Yes, we're assuming a laptop). This entry lets you choose what state | |
1072 | + should be entered next. The values are those described under | |
1073 | + powerdown_method, below. It can be used to suspend to RAM after hibernating, | |
1074 | + then powerdown properly (say) 20 minutes. It can also be used to power down | |
1075 | + properly, then wake at (say) 6.30am and suspend to RAM until you're ready | |
1076 | + to use the machine. | |
1077 | + | |
2380c486 JR |
1078 | + - powerdown_method: |
1079 | + | |
1080 | + Used to select a method by which TuxOnIce should powerdown after writing the | |
1081 | + image. Currently: | |
1082 | + | |
1083 | + 0: Don't use ACPI to power off. | |
1084 | + 3: Attempt to enter Suspend-to-ram. | |
1085 | + 4: Attempt to enter ACPI S4 mode. | |
1086 | + 5: Attempt to power down via ACPI S5 mode. | |
1087 | + | |
1088 | + Note that these options are highly dependant upon your hardware & software: | |
1089 | + | |
1090 | + 3: When succesful, your machine suspends to ram instead of powering off. | |
1091 | + The advantage of using this mode is that it doesn't matter whether your | |
1092 | + battery has enough charge to make it through to your next resume. If it | |
1093 | + lasts, you will simply resume from suspend to ram (and the image on disk | |
1094 | + will be discarded). If the battery runs out, you will resume from disk | |
1095 | + instead. The disadvantage is that it takes longer than a normal | |
1096 | + suspend-to-ram to enter the state, since the suspend-to-disk image needs | |
1097 | + to be written first. | |
1098 | + 4/5: When successful, your machine will be off and comsume (almost) no power. | |
1099 | + But it might still react to some external events like opening the lid or | |
1100 | + trafic on a network or usb device. For the bios, resume is then the same | |
1101 | + as warm boot, similar to a situation where you used the command `reboot' | |
1102 | + to reboot your machine. If your machine has problems on warm boot or if | |
1103 | + you want to protect your machine with the bios password, this is probably | |
1104 | + not the right choice. Mode 4 may be necessary on some machines where ACPI | |
1105 | + wake up methods need to be run to properly reinitialise hardware after a | |
1106 | + hibernation cycle. | |
1107 | + 0: Switch the machine completely off. The only possible wakeup is the power | |
1108 | + button. For the bios, resume is then the same as a cold boot, in | |
1109 | + particular you would have to provide your bios boot password if your | |
1110 | + machine uses that feature for booting. | |
1111 | + | |
1112 | + - progressbar_granularity_limit: | |
1113 | + | |
1114 | + This option can be used to limit the granularity of the progress bar | |
1115 | + displayed with a bootsplash screen. The value is the maximum number of | |
1116 | + steps. That is, 10 will make the progress bar jump in 10% increments. | |
1117 | + | |
1118 | + - reboot: | |
1119 | + | |
1120 | + This option causes TuxOnIce to reboot rather than powering down | |
1121 | + at the end of saving an image. It can be toggled during a cycle by pressing | |
1122 | + 'R'. | |
1123 | + | |
9474138d AM |
1124 | + - resume: |
1125 | + | |
1126 | + This sysfs entry can be used to read and set the location in which TuxOnIce | |
1127 | + will look for the signature of an image - the value set using resume= at | |
1128 | + boot time or CONFIG_PM_STD_PARTITION ("Default resume partition"). By | |
1129 | + writing to this file as well as modifying your bootloader's configuration | |
1130 | + file (eg menu.lst), you can set or reset the location of your image or the | |
1131 | + method of storing the image without rebooting. | |
1132 | + | |
1133 | + - replace_swsusp (CONFIG_TOI_REPLACE_SWSUSP): | |
1134 | + | |
1135 | + This option makes | |
1136 | + | |
1137 | + echo disk > /sys/power/state | |
1138 | + | |
1139 | + activate TuxOnIce instead of swsusp. Regardless of whether this option is | |
1140 | + enabled, any invocation of swsusp's resume time trigger will cause TuxOnIce | |
1141 | + to check for an image too. This is due to the fact that at resume time, we | |
1142 | + can't know whether this option was enabled until we see if an image is there | |
1143 | + for us to resume from. (And when an image exists, we don't care whether we | |
1144 | + did replace swsusp anyway - we just want to resume). | |
1145 | + | |
2380c486 JR |
1146 | + - resume_commandline: |
1147 | + | |
1148 | + This entry can be read after resuming to see the commandline that was used | |
1149 | + when resuming began. You might use this to set up two bootloader entries | |
1150 | + that are the same apart from the fact that one includes a extra append= | |
1151 | + argument "at_work=1". You could then grep resume_commandline in your | |
1152 | + post-resume scripts and configure networking (for example) differently | |
1153 | + depending upon whether you're at home or work. resume_commandline can be | |
1154 | + set to arbitrary text if you wish to remove sensitive contents. | |
1155 | + | |
1156 | + - swap/swapfilename: | |
1157 | + | |
1158 | + This entry is used to specify the swapfile or partition that | |
1159 | + TuxOnIce will attempt to swapon/swapoff automatically. Thus, if | |
1160 | + I normally use /dev/hda1 for swap, and want to use /dev/hda2 for specifically | |
1161 | + for my hibernation image, I would | |
1162 | + | |
1163 | + echo /dev/hda2 > /sys/power/tuxonice/swap/swapfile | |
1164 | + | |
1165 | + /dev/hda2 would then be automatically swapon'd and swapoff'd. Note that the | |
1166 | + swapon and swapoff occur while other processes are frozen (including kswapd) | |
1167 | + so this swap file will not be used up when attempting to free memory. The | |
1168 | + parition/file is also given the highest priority, so other swapfiles/partitions | |
1169 | + will only be used to save the image when this one is filled. | |
1170 | + | |
1171 | + The value of this file is used by headerlocations along with any currently | |
1172 | + activated swapfiles/partitions. | |
1173 | + | |
1174 | + - swap/headerlocations: | |
1175 | + | |
1176 | + This option tells you the resume= options to use for swap devices you | |
1177 | + currently have activated. It is particularly useful when you only want to | |
1178 | + use a swap file to store your image. See above for further details. | |
1179 | + | |
9474138d | 1180 | + - test_bio |
2380c486 | 1181 | + |
9474138d AM |
1182 | + This is a debugging option. When enabled, TuxOnIce will not hibernate. |
1183 | + Instead, when asked to write an image, it will skip the atomic copy, | |
1184 | + just doing the writing of the image and then returning control to the | |
1185 | + user at the point where it would have powered off. This is useful for | |
1186 | + testing throughput in different configurations. | |
1187 | + | |
1188 | + - test_filter_speed | |
1189 | + | |
1190 | + This is a debugging option. When enabled, TuxOnIce will not hibernate. | |
1191 | + Instead, when asked to write an image, it will not write anything or do | |
1192 | + an atomic copy, but will only run any enabled compression algorithm on the | |
1193 | + data that would have been written (the source pages of the atomic copy in | |
1194 | + the case of pageset 1). This is useful for comparing the performance of | |
1195 | + compression algorithms and for determining the extent to which an upgrade | |
1196 | + to your storage method would improve hibernation speed. | |
2380c486 JR |
1197 | + |
1198 | + - user_interface/debug_sections (CONFIG_PM_DEBUG): | |
1199 | + | |
1200 | + This value, together with the console log level, controls what debugging | |
1201 | + information is displayed. The console log level determines the level of | |
1202 | + detail, and this value determines what detail is displayed. This value is | |
1203 | + a bit vector, and the meaning of the bits can be found in the kernel tree | |
1204 | + in include/linux/tuxonice.h. It can be overridden using the kernel's | |
1205 | + command line option suspend_dbg. | |
1206 | + | |
1207 | + - user_interface/default_console_level (CONFIG_PM_DEBUG): | |
1208 | + | |
1209 | + This determines the value of the console log level at the start of a | |
1210 | + hibernation cycle. If debugging is compiled in, the console log level can be | |
1211 | + changed during a cycle by pressing the digit keys. Meanings are: | |
1212 | + | |
1213 | + 0: Nice display. | |
1214 | + 1: Nice display plus numerical progress. | |
1215 | + 2: Errors only. | |
1216 | + 3: Low level debugging info. | |
1217 | + 4: Medium level debugging info. | |
1218 | + 5: High level debugging info. | |
1219 | + 6: Verbose debugging info. | |
1220 | + | |
1221 | + - user_interface/enable_escape: | |
1222 | + | |
1223 | + Setting this to "1" will enable you abort a hibernation cycle or resuming by | |
1224 | + pressing escape, "0" (default) disables this feature. Note that enabling | |
1225 | + this option means that you cannot initiate a hibernation cycle and then walk | |
9474138d | 1226 | + away from your computer, expecting it to be secure. With feature disabled, |
2380c486 JR |
1227 | + you can validly have this expectation once TuxOnice begins to write the |
1228 | + image to disk. (Prior to this point, it is possible that TuxOnice might | |
1229 | + about because of failure to freeze all processes or because constraints | |
1230 | + on its ability to save the image are not met). | |
1231 | + | |
9474138d AM |
1232 | + - user_interface/program |
1233 | + | |
1234 | + This entry is used to tell TuxOnice what userspace program to use for | |
1235 | + providing a user interface while hibernating. The program uses a netlink | |
1236 | + socket to pass messages back and forward to the kernel, allowing all of the | |
1237 | + functions formerly implemented in the kernel user interface components. | |
1238 | + | |
2380c486 JR |
1239 | + - version: |
1240 | + | |
1241 | + The version of TuxOnIce you have compiled into the currently running kernel. | |
1242 | + | |
9474138d AM |
1243 | + - wake_alarm_dir: |
1244 | + | |
1245 | + As mentioned above (post_wake_state), TuxOnIce supports automatically waking | |
1246 | + after some delay. This entry allows you to select which wake alarm to use. | |
1247 | + It should contain the value "rtc0" if you're wanting to use | |
1248 | + /sys/class/rtc/rtc0. | |
1249 | + | |
1250 | + - wake_delay: | |
1251 | + | |
1252 | + This value determines the delay from the end of writing the image until the | |
1253 | + wake alarm is triggered. You can set an absolute time by writing the desired | |
1254 | + time into /sys/class/rtc/<wake_alarm_dir>/wakealarm and leaving these values | |
1255 | + empty. | |
1256 | + | |
1257 | + Note that for the wakeup to actually occur, you may need to modify entries | |
1258 | + in /proc/acpi/wakeup. This is done by echoing the name of the button in the | |
1259 | + first column (eg PBTN) into the file. | |
1260 | + | |
2380c486 JR |
1261 | +7. How do you get support? |
1262 | + | |
1263 | + Glad you asked. TuxOnIce is being actively maintained and supported | |
1264 | + by Nigel (the guy doing most of the kernel coding at the moment), Bernard | |
1265 | + (who maintains the hibernate script and userspace user interface components) | |
1266 | + and its users. | |
1267 | + | |
1268 | + Resources availble include HowTos, FAQs and a Wiki, all available via | |
1269 | + tuxonice.net. You can find the mailing lists there. | |
1270 | + | |
1271 | +8. I think I've found a bug. What should I do? | |
1272 | + | |
1273 | + By far and a way, the most common problems people have with TuxOnIce | |
1274 | + related to drivers not having adequate power management support. In this | |
1275 | + case, it is not a bug with TuxOnIce, but we can still help you. As we | |
1276 | + mentioned above, such issues can usually be worked around by building the | |
1277 | + functionality as modules and unloading them while hibernating. Please visit | |
1278 | + the Wiki for up-to-date lists of known issues and work arounds. | |
1279 | + | |
1280 | + If this information doesn't help, try running: | |
1281 | + | |
1282 | + hibernate --bug-report | |
1283 | + | |
1284 | + ..and sending the output to the users mailing list. | |
1285 | + | |
1286 | + Good information on how to provide us with useful information from an | |
1287 | + oops is found in the file REPORTING-BUGS, in the top level directory | |
1288 | + of the kernel tree. If you get an oops, please especially note the | |
1289 | + information about running what is printed on the screen through ksymoops. | |
1290 | + The raw information is useless. | |
1291 | + | |
1292 | +9. When will XXX be supported? | |
1293 | + | |
1294 | + If there's a feature missing from TuxOnIce that you'd like, feel free to | |
1295 | + ask. We try to be obliging, within reason. | |
1296 | + | |
1297 | + Patches are welcome. Please send to the list. | |
1298 | + | |
1299 | +10. How does it work? | |
1300 | + | |
1301 | + TuxOnIce does its work in a number of steps. | |
1302 | + | |
1303 | + a. Freezing system activity. | |
1304 | + | |
1305 | + The first main stage in hibernating is to stop all other activity. This is | |
1306 | + achieved in stages. Processes are considered in fours groups, which we will | |
1307 | + describe in reverse order for clarity's sake: Threads with the PF_NOFREEZE | |
1308 | + flag, kernel threads without this flag, userspace processes with the | |
1309 | + PF_SYNCTHREAD flag and all other processes. The first set (PF_NOFREEZE) are | |
1310 | + untouched by the refrigerator code. They are allowed to run during hibernating | |
1311 | + and resuming, and are used to support user interaction, storage access or the | |
1312 | + like. Other kernel threads (those unneeded while hibernating) are frozen last. | |
1313 | + This leaves us with userspace processes that need to be frozen. When a | |
1314 | + process enters one of the *_sync system calls, we set a PF_SYNCTHREAD flag on | |
1315 | + that process for the duration of that call. Processes that have this flag are | |
1316 | + frozen after processes without it, so that we can seek to ensure that dirty | |
1317 | + data is synced to disk as quickly as possible in a situation where other | |
1318 | + processes may be submitting writes at the same time. Freezing the processes | |
1319 | + that are submitting data stops new I/O from being submitted. Syncthreads can | |
1320 | + then cleanly finish their work. So the order is: | |
1321 | + | |
1322 | + - Userspace processes without PF_SYNCTHREAD or PF_NOFREEZE; | |
1323 | + - Userspace processes with PF_SYNCTHREAD (they won't have NOFREEZE); | |
1324 | + - Kernel processes without PF_NOFREEZE. | |
1325 | + | |
1326 | + b. Eating memory. | |
1327 | + | |
1328 | + For a successful hibernation cycle, you need to have enough disk space to store the | |
1329 | + image and enough memory for the various limitations of TuxOnIce's | |
1330 | + algorithm. You can also specify a maximum image size. In order to attain | |
1331 | + to those constraints, TuxOnIce may 'eat' memory. If, after freezing | |
1332 | + processes, the constraints aren't met, TuxOnIce will thaw all the | |
1333 | + other processes and begin to eat memory until its calculations indicate | |
1334 | + the constraints are met. It will then freeze processes again and recheck | |
1335 | + its calculations. | |
1336 | + | |
1337 | + c. Allocation of storage. | |
1338 | + | |
1339 | + Next, TuxOnIce allocates the storage that will be used to save | |
1340 | + the image. | |
1341 | + | |
1342 | + The core of TuxOnIce knows nothing about how or where pages are stored. We | |
1343 | + therefore request the active allocator (remember you might have compiled in | |
1344 | + more than one!) to allocate enough storage for our expect image size. If | |
1345 | + this request cannot be fulfilled, we eat more memory and try again. If it | |
1346 | + is fulfiled, we seek to allocate additional storage, just in case our | |
1347 | + expected compression ratio (if any) isn't achieved. This time, however, we | |
1348 | + just continue if we can't allocate enough storage. | |
1349 | + | |
1350 | + If these calls to our allocator change the characteristics of the image | |
1351 | + such that we haven't allocated enough memory, we also loop. (The allocator | |
1352 | + may well need to allocate space for its storage information). | |
1353 | + | |
1354 | + d. Write the first part of the image. | |
1355 | + | |
1356 | + TuxOnIce stores the image in two sets of pages called 'pagesets'. | |
1357 | + Pageset 2 contains pages on the active and inactive lists; essentially | |
1358 | + the page cache. Pageset 1 contains all other pages, including the kernel. | |
1359 | + We use two pagesets for one important reason: We need to make an atomic copy | |
1360 | + of the kernel to ensure consistency of the image. Without a second pageset, | |
1361 | + that would limit us to an image that was at most half the amount of memory | |
1362 | + available. Using two pagesets allows us to store a full image. Since pageset | |
1363 | + 2 pages won't be needed in saving pageset 1, we first save pageset 2 pages. | |
1364 | + We can then make our atomic copy of the remaining pages using both pageset 2 | |
1365 | + pages and any other pages that are free. While saving both pagesets, we are | |
1366 | + careful not to corrupt the image. Among other things, we use lowlevel block | |
1367 | + I/O routines that don't change the pagecache contents. | |
1368 | + | |
1369 | + The next step, then, is writing pageset 2. | |
1370 | + | |
1371 | + e. Suspending drivers and storing processor context. | |
1372 | + | |
1373 | + Having written pageset2, TuxOnIce calls the power management functions to | |
1374 | + notify drivers of the hibernation, and saves the processor state in preparation | |
1375 | + for the atomic copy of memory we are about to make. | |
1376 | + | |
1377 | + f. Atomic copy. | |
1378 | + | |
1379 | + At this stage, everything else but the TuxOnIce code is halted. Processes | |
1380 | + are frozen or idling, drivers are quiesced and have stored (ideally and where | |
1381 | + necessary) their configuration in memory we are about to atomically copy. | |
1382 | + In our lowlevel architecture specific code, we have saved the CPU state. | |
1383 | + We can therefore now do our atomic copy before resuming drivers etc. | |
1384 | + | |
1385 | + g. Save the atomic copy (pageset 1). | |
1386 | + | |
1387 | + TuxOnice can then write the atomic copy of the remaining pages. Since we | |
1388 | + have copied the pages into other locations, we can continue to use the | |
1389 | + normal block I/O routines without fear of corruption our image. | |
1390 | + | |
1391 | + f. Save the image header. | |
1392 | + | |
1393 | + Nearly there! We save our settings and other parameters needed for | |
1394 | + reloading pageset 1 in an 'image header'. We also tell our allocator to | |
1395 | + serialise its data at this stage, so that it can reread the image at resume | |
1396 | + time. | |
1397 | + | |
1398 | + g. Set the image header. | |
1399 | + | |
1400 | + Finally, we edit the header at our resume= location. The signature is | |
1401 | + changed by the allocator to reflect the fact that an image exists, and to | |
1402 | + point to the start of that data if necessary (swap allocator). | |
1403 | + | |
1404 | + h. Power down. | |
1405 | + | |
1406 | + Or reboot if we're debugging and the appropriate option is selected. | |
1407 | + | |
1408 | + Whew! | |
1409 | + | |
1410 | + Reloading the image. | |
1411 | + -------------------- | |
1412 | + | |
1413 | + Reloading the image is essentially the reverse of all the above. We load | |
1414 | + our copy of pageset 1, being careful to choose locations that aren't going | |
1415 | + to be overwritten as we copy it back (We start very early in the boot | |
1416 | + process, so there are no other processes to quiesce here). We then copy | |
1417 | + pageset 1 back to its original location in memory and restore the process | |
1418 | + context. We are now running with the original kernel. Next, we reload the | |
1419 | + pageset 2 pages, free the memory and swap used by TuxOnIce, restore | |
1420 | + the pageset header and restart processes. Sounds easy in comparison to | |
1421 | + hibernating, doesn't it! | |
1422 | + | |
1423 | + There is of course more to TuxOnIce than this, but this explanation | |
1424 | + should be a good start. If there's interest, I'll write further | |
1425 | + documentation on range pages and the low level I/O. | |
1426 | + | |
1427 | +11. Who wrote TuxOnIce? | |
1428 | + | |
1429 | + (Answer based on the writings of Florent Chabaud, credits in files and | |
1430 | + Nigel's limited knowledge; apologies to anyone missed out!) | |
1431 | + | |
1432 | + The main developers of TuxOnIce have been... | |
1433 | + | |
1434 | + Gabor Kuti | |
1435 | + Pavel Machek | |
1436 | + Florent Chabaud | |
1437 | + Bernard Blackham | |
1438 | + Nigel Cunningham | |
1439 | + | |
1440 | + Significant portions of swsusp, the code in the vanilla kernel which | |
1441 | + TuxOnIce enhances, have been worked on by Rafael Wysocki. Thanks should | |
1442 | + also be expressed to him. | |
1443 | + | |
1444 | + The above mentioned developers have been aided in their efforts by a host | |
1445 | + of hundreds, if not thousands of testers and people who have submitted bug | |
1446 | + fixes & suggestions. Of special note are the efforts of Michael Frank, who | |
1447 | + had his computers repetitively hibernate and resume for literally tens of | |
1448 | + thousands of cycles and developed scripts to stress the system and test | |
1449 | + TuxOnIce far beyond the point most of us (Nigel included!) would consider | |
1450 | + testing. His efforts have contributed as much to TuxOnIce as any of the | |
1451 | + names above. | |
1452 | diff --git a/MAINTAINERS b/MAINTAINERS | |
85eb3c9d | 1453 | index 02f75fc..bd6a84f 100644 |
2380c486 JR |
1454 | --- a/MAINTAINERS |
1455 | +++ b/MAINTAINERS | |
85eb3c9d | 1456 | @@ -5700,6 +5700,13 @@ S: Maintained |
9474138d AM |
1457 | F: drivers/tc/ |
1458 | F: include/linux/tc.h | |
2380c486 JR |
1459 | |
1460 | +TUXONICE (ENHANCED HIBERNATION) | |
1461 | +P: Nigel Cunningham | |
1462 | +M: nigel@tuxonice.net | |
e999739a | 1463 | +L: tuxonice-devel@tuxonice.net |
2380c486 JR |
1464 | +W: http://tuxonice.net |
1465 | +S: Maintained | |
1466 | + | |
1467 | U14-34F SCSI DRIVER | |
92bca44c AM |
1468 | M: Dario Ballabio <ballabio_dario@emc.com> |
1469 | L: linux-scsi@vger.kernel.org | |
2380c486 | 1470 | diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c |
85eb3c9d | 1471 | index a87ead0..b8ac86e 100644 |
2380c486 JR |
1472 | --- a/arch/powerpc/mm/pgtable_32.c |
1473 | +++ b/arch/powerpc/mm/pgtable_32.c | |
85eb3c9d | 1474 | @@ -423,6 +423,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable) |
2380c486 JR |
1475 | |
1476 | change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0)); | |
1477 | } | |
1478 | +EXPORT_SYMBOL_GPL(kernel_map_pages); | |
1479 | #endif /* CONFIG_DEBUG_PAGEALLOC */ | |
1480 | ||
1481 | static int fixmaps; | |
1482 | diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c | |
85eb3c9d | 1483 | index e3af342..482be9c 100644 |
2380c486 JR |
1484 | --- a/arch/x86/kernel/reboot.c |
1485 | +++ b/arch/x86/kernel/reboot.c | |
85eb3c9d | 1486 | @@ -726,6 +726,7 @@ void machine_restart(char *cmd) |
2380c486 JR |
1487 | { |
1488 | machine_ops.restart(cmd); | |
1489 | } | |
1490 | +EXPORT_SYMBOL_GPL(machine_restart); | |
1491 | ||
1492 | void machine_halt(void) | |
1493 | { | |
1494 | diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c | |
5bd2511a | 1495 | index 532e793..bad27ae 100644 |
2380c486 JR |
1496 | --- a/arch/x86/mm/pageattr.c |
1497 | +++ b/arch/x86/mm/pageattr.c | |
5bd2511a | 1498 | @@ -1354,6 +1354,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable) |
2380c486 JR |
1499 | */ |
1500 | __flush_tlb_all(); | |
1501 | } | |
1502 | +EXPORT_SYMBOL_GPL(kernel_map_pages); | |
1503 | ||
1504 | #ifdef CONFIG_HIBERNATION | |
1505 | ||
5bd2511a | 1506 | @@ -1368,7 +1369,7 @@ bool kernel_page_present(struct page *page) |
2380c486 JR |
1507 | pte = lookup_address((unsigned long)page_address(page), &level); |
1508 | return (pte_val(*pte) & _PAGE_PRESENT); | |
1509 | } | |
1510 | - | |
1511 | +EXPORT_SYMBOL_GPL(kernel_page_present); | |
1512 | #endif /* CONFIG_HIBERNATION */ | |
1513 | ||
1514 | #endif /* CONFIG_DEBUG_PAGEALLOC */ | |
92bca44c | 1515 | diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c |
5bd2511a | 1516 | index 1290ba5..2280003 100644 |
92bca44c AM |
1517 | --- a/arch/x86/power/cpu.c |
1518 | +++ b/arch/x86/power/cpu.c | |
5bd2511a | 1519 | @@ -114,9 +114,7 @@ void save_processor_state(void) |
2380c486 JR |
1520 | { |
1521 | __save_processor_state(&saved_context); | |
1522 | } | |
92bca44c AM |
1523 | -#ifdef CONFIG_X86_32 |
1524 | EXPORT_SYMBOL(save_processor_state); | |
1525 | -#endif | |
2380c486 JR |
1526 | |
1527 | static void do_fpu_end(void) | |
1528 | { | |
1529 | diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c | |
de6743ae | 1530 | index 3769079..4dabd68 100644 |
2380c486 JR |
1531 | --- a/arch/x86/power/hibernate_32.c |
1532 | +++ b/arch/x86/power/hibernate_32.c | |
de6743ae AM |
1533 | @@ -9,6 +9,7 @@ |
1534 | #include <linux/gfp.h> | |
2380c486 JR |
1535 | #include <linux/suspend.h> |
1536 | #include <linux/bootmem.h> | |
1537 | +#include <linux/module.h> | |
1538 | ||
1539 | #include <asm/system.h> | |
1540 | #include <asm/page.h> | |
de6743ae | 1541 | @@ -164,6 +165,7 @@ int swsusp_arch_resume(void) |
2380c486 JR |
1542 | restore_image(); |
1543 | return 0; | |
1544 | } | |
1545 | +EXPORT_SYMBOL_GPL(swsusp_arch_resume); | |
1546 | ||
1547 | /* | |
1548 | * pfn_is_nosave - check if given pfn is in the 'nosave' section | |
1549 | diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c | |
de6743ae | 1550 | index d24f983..803b20a 100644 |
2380c486 JR |
1551 | --- a/arch/x86/power/hibernate_64.c |
1552 | +++ b/arch/x86/power/hibernate_64.c | |
de6743ae AM |
1553 | @@ -11,6 +11,7 @@ |
1554 | #include <linux/gfp.h> | |
2380c486 JR |
1555 | #include <linux/smp.h> |
1556 | #include <linux/suspend.h> | |
1557 | +#include <linux/module.h> | |
1558 | #include <asm/proto.h> | |
1559 | #include <asm/page.h> | |
1560 | #include <asm/pgtable.h> | |
de6743ae | 1561 | @@ -119,6 +120,7 @@ int swsusp_arch_resume(void) |
2380c486 JR |
1562 | restore_image(); |
1563 | return 0; | |
1564 | } | |
1565 | +EXPORT_SYMBOL_GPL(swsusp_arch_resume); | |
1566 | ||
1567 | /* | |
1568 | * pfn_is_nosave - check if given pfn is in the 'nosave' section | |
de6743ae | 1569 | @@ -169,3 +171,4 @@ int arch_hibernation_header_restore(void *addr) |
2380c486 JR |
1570 | restore_cr3 = rdr->cr3; |
1571 | return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL; | |
1572 | } | |
1573 | +EXPORT_SYMBOL_GPL(arch_hibernation_header_restore); | |
7e46296a | 1574 | diff --git a/block/Makefile b/block/Makefile |
5bd2511a | 1575 | index 0bb499a..49f36d0 100644 |
7e46296a AM |
1576 | --- a/block/Makefile |
1577 | +++ b/block/Makefile | |
5bd2511a | 1578 | @@ -5,7 +5,8 @@ |
7e46296a AM |
1579 | obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ |
1580 | blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \ | |
1581 | blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ | |
5bd2511a AM |
1582 | - blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o |
1583 | + blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o \ | |
1584 | + uuid.o | |
7e46296a AM |
1585 | |
1586 | obj-$(CONFIG_BLK_DEV_BSG) += bsg.o | |
5dd10c98 | 1587 | obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o |
7e46296a | 1588 | diff --git a/block/blk-core.c b/block/blk-core.c |
85eb3c9d | 1589 | index f0640d7..b485de4 100644 |
7e46296a AM |
1590 | --- a/block/blk-core.c |
1591 | +++ b/block/blk-core.c | |
1592 | @@ -37,6 +37,9 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap); | |
1593 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); | |
1594 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); | |
1595 | ||
1596 | +int trap_non_toi_io; | |
1597 | +EXPORT_SYMBOL_GPL(trap_non_toi_io); | |
1598 | + | |
1599 | static int __make_request(struct request_queue *q, struct bio *bio); | |
1600 | ||
1601 | /* | |
85eb3c9d | 1602 | @@ -1579,6 +1582,9 @@ void submit_bio(int rw, struct bio *bio) |
7e46296a AM |
1603 | |
1604 | bio->bi_rw |= rw; | |
1605 | ||
1606 | + if (unlikely(trap_non_toi_io)) | |
1607 | + BUG_ON(!bio_rw_flagged(bio, BIO_RW_TUXONICE)); | |
1608 | + | |
1609 | /* | |
1610 | * If it's a regular read/write or a barrier with data attached, | |
1611 | * go through the normal accounting stuff before submission. | |
1612 | diff --git a/block/genhd.c b/block/genhd.c | |
5bd2511a | 1613 | index 59a2db6..6875d7d 100644 |
7e46296a AM |
1614 | --- a/block/genhd.c |
1615 | +++ b/block/genhd.c | |
1616 | @@ -18,6 +18,8 @@ | |
1617 | #include <linux/buffer_head.h> | |
1618 | #include <linux/mutex.h> | |
1619 | #include <linux/idr.h> | |
1620 | +#include <linux/ctype.h> | |
cacc47f8 | 1621 | +#include <linux/fs_uuid.h> |
7e46296a AM |
1622 | |
1623 | #include "blk.h" | |
1624 | ||
cacc47f8 | 1625 | @@ -1286,3 +1288,84 @@ int invalidate_partition(struct gendisk *disk, int partno) |
7e46296a AM |
1626 | } |
1627 | ||
1628 | EXPORT_SYMBOL(invalidate_partition); | |
1629 | + | |
cacc47f8 | 1630 | +dev_t blk_lookup_fs_info(struct fs_info *seek) |
7e46296a AM |
1631 | +{ |
1632 | + dev_t devt = MKDEV(0, 0); | |
1633 | + struct class_dev_iter iter; | |
1634 | + struct device *dev; | |
cacc47f8 | 1635 | + int best_score = 0; |
7e46296a AM |
1636 | + |
1637 | + class_dev_iter_init(&iter, &block_class, NULL, &disk_type); | |
cacc47f8 | 1638 | + while (best_score < 3 && (dev = class_dev_iter_next(&iter))) { |
7e46296a AM |
1639 | + struct gendisk *disk = dev_to_disk(dev); |
1640 | + struct disk_part_iter piter; | |
1641 | + struct hd_struct *part; | |
1642 | + | |
1643 | + disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); | |
1644 | + | |
cacc47f8 AM |
1645 | + while (best_score < 3 && (part = disk_part_iter_next(&piter))) { |
1646 | + int score = part_matches_fs_info(part, seek); | |
1647 | + if (score > best_score) { | |
7e46296a | 1648 | + devt = part_devt(part); |
cacc47f8 | 1649 | + best_score = score; |
7e46296a AM |
1650 | + } |
1651 | + } | |
1652 | + disk_part_iter_exit(&piter); | |
1653 | + } | |
1654 | + class_dev_iter_exit(&iter); | |
1655 | + return devt; | |
1656 | +} | |
cacc47f8 | 1657 | +EXPORT_SYMBOL_GPL(blk_lookup_fs_info); |
5dd10c98 AM |
1658 | + |
1659 | +/* Caller uses NULL, key to start. For each match found, we return a bdev on | |
1660 | + * which we have done blkdev_get, and we do the blkdev_put on block devices | |
1661 | + * that are passed to us. When no more matches are found, we return NULL. | |
1662 | + */ | |
1663 | +struct block_device *next_bdev_of_type(struct block_device *last, | |
1664 | + const char *key) | |
1665 | +{ | |
1666 | + dev_t devt = MKDEV(0, 0); | |
1667 | + struct class_dev_iter iter; | |
1668 | + struct device *dev; | |
1669 | + struct block_device *next = NULL, *bdev; | |
1670 | + int got_last = 0; | |
1671 | + | |
1672 | + if (!key) | |
1673 | + goto out; | |
1674 | + | |
1675 | + class_dev_iter_init(&iter, &block_class, NULL, &disk_type); | |
1676 | + while (!devt && (dev = class_dev_iter_next(&iter))) { | |
1677 | + struct gendisk *disk = dev_to_disk(dev); | |
1678 | + struct disk_part_iter piter; | |
1679 | + struct hd_struct *part; | |
1680 | + | |
1681 | + disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); | |
1682 | + | |
1683 | + while ((part = disk_part_iter_next(&piter))) { | |
1684 | + bdev = bdget(part_devt(part)); | |
1685 | + if (last && !got_last) { | |
1686 | + if (last == bdev) | |
1687 | + got_last = 1; | |
1688 | + continue; | |
1689 | + } | |
1690 | + | |
1691 | + if (blkdev_get(bdev, FMODE_READ)) | |
1692 | + continue; | |
1693 | + | |
1694 | + if (bdev_matches_key(bdev, key)) { | |
1695 | + next = bdev; | |
1696 | + break; | |
1697 | + } | |
1698 | + | |
1699 | + blkdev_put(bdev, FMODE_READ); | |
1700 | + } | |
1701 | + disk_part_iter_exit(&piter); | |
1702 | + } | |
1703 | + class_dev_iter_exit(&iter); | |
1704 | +out: | |
1705 | + if (last) | |
1706 | + blkdev_put(last, FMODE_READ); | |
1707 | + return next; | |
1708 | +} | |
1709 | +EXPORT_SYMBOL_GPL(next_bdev_of_type); | |
7e46296a AM |
1710 | diff --git a/block/uuid.c b/block/uuid.c |
1711 | new file mode 100644 | |
85eb3c9d | 1712 | index 0000000..d5dea70 |
7e46296a AM |
1713 | --- /dev/null |
1714 | +++ b/block/uuid.c | |
cacc47f8 | 1715 | @@ -0,0 +1,492 @@ |
7e46296a AM |
1716 | +#include <linux/blkdev.h> |
1717 | +#include <linux/ctype.h> | |
cacc47f8 | 1718 | +#include <linux/fs_uuid.h> |
de6743ae | 1719 | +#include <linux/slab.h> |
7e46296a | 1720 | + |
5dd10c98 AM |
1721 | +static int debug_enabled; |
1722 | + | |
1723 | +#define PRINTK(fmt, args...) do { \ | |
1724 | + if (debug_enabled) \ | |
1725 | + printk(KERN_DEBUG fmt, ## args); \ | |
1726 | + } while(0) | |
1727 | + | |
1728 | +#define PRINT_HEX_DUMP(v1, v2, v3, v4, v5, v6, v7, v8) \ | |
1729 | + do { \ | |
1730 | + if (debug_enabled) \ | |
1731 | + print_hex_dump(v1, v2, v3, v4, v5, v6, v7, v8); \ | |
1732 | + } while(0) | |
7e46296a AM |
1733 | + |
1734 | +/* | |
1735 | + * Simple UUID translation | |
1736 | + */ | |
1737 | + | |
1738 | +struct uuid_info { | |
5dd10c98 | 1739 | + const char *key; |
7e46296a AM |
1740 | + const char *name; |
1741 | + long bkoff; | |
1742 | + unsigned sboff; | |
1743 | + unsigned sig_len; | |
1744 | + const char *magic; | |
1745 | + int uuid_offset; | |
5dd10c98 AM |
1746 | + int last_mount_offset; |
1747 | + int last_mount_size; | |
7e46296a AM |
1748 | +}; |
1749 | + | |
1750 | +/* | |
1751 | + * Based on libuuid's blkid_magic array. Note that I don't | |
1752 | + * have uuid offsets for all of these yet - mssing ones are 0x0. | |
1753 | + * Further information welcome. | |
1754 | + * | |
5dd10c98 | 1755 | + * Rearranged by page of fs signature for optimisation. |
7e46296a AM |
1756 | + */ |
1757 | +static struct uuid_info uuid_list[] = { | |
5dd10c98 AM |
1758 | + { NULL, "oracleasm", 0, 32, 8, "ORCLDISK", 0x0, 0, 0 }, |
1759 | + { "ntfs", "ntfs", 0, 3, 8, "NTFS ", 0x0, 0, 0 }, | |
1760 | + { "vfat", "vfat", 0, 0x52, 5, "MSWIN", 0x0, 0, 0 }, | |
1761 | + { "vfat", "vfat", 0, 0x52, 8, "FAT32 ", 0x0, 0, 0 }, | |
1762 | + { "vfat", "vfat", 0, 0x36, 5, "MSDOS", 0x0, 0, 0 }, | |
1763 | + { "vfat", "vfat", 0, 0x36, 8, "FAT16 ", 0x0, 0, 0 }, | |
1764 | + { "vfat", "vfat", 0, 0x36, 8, "FAT12 ", 0x0, 0, 0 }, | |
1765 | + { "vfat", "vfat", 0, 0, 1, "\353", 0x0, 0, 0 }, | |
1766 | + { "vfat", "vfat", 0, 0, 1, "\351", 0x0, 0, 0 }, | |
1767 | + { "vfat", "vfat", 0, 0x1fe, 2, "\125\252", 0x0, 0, 0 }, | |
85eb3c9d | 1768 | + { "xfs", "xfs", 0, 0, 4, "XFSB", 0x20, 0, 0 }, |
5dd10c98 AM |
1769 | + { "romfs", "romfs", 0, 0, 8, "-rom1fs-", 0x0, 0, 0 }, |
1770 | + { "bfs", "bfs", 0, 0, 4, "\316\372\173\033", 0, 0, 0 }, | |
1771 | + { "cramfs", "cramfs", 0, 0, 4, "E=\315\050", 0x0, 0, 0 }, | |
1772 | + { "qnx4", "qnx4", 0, 4, 6, "QNX4FS", 0, 0, 0 }, | |
1773 | + { NULL, "crypt_LUKS", 0, 0, 6, "LUKS\xba\xbe", 0x0, 0, 0 }, | |
1774 | + { "squashfs", "squashfs", 0, 0, 4, "sqsh", 0, 0, 0 }, | |
1775 | + { "squashfs", "squashfs", 0, 0, 4, "hsqs", 0, 0, 0 }, | |
1776 | + { "ocfs", "ocfs", 0, 8, 9, "OracleCFS", 0x0, 0, 0 }, | |
1777 | + { "lvm2pv", "lvm2pv", 0, 0x018, 8, "LVM2 001", 0x0, 0, 0 }, | |
1778 | + { "sysv", "sysv", 0, 0x3f8, 4, "\020~\030\375", 0, 0, 0 }, | |
1779 | + { "ext", "ext", 1, 0x38, 2, "\123\357", 0x468, 0x42c, 4 }, | |
1780 | + { "minix", "minix", 1, 0x10, 2, "\177\023", 0, 0, 0 }, | |
1781 | + { "minix", "minix", 1, 0x10, 2, "\217\023", 0, 0, 0 }, | |
1782 | + { "minix", "minix", 1, 0x10, 2, "\150\044", 0, 0, 0 }, | |
1783 | + { "minix", "minix", 1, 0x10, 2, "\170\044", 0, 0, 0 }, | |
1784 | + { "lvm2pv", "lvm2pv", 1, 0x018, 8, "LVM2 001", 0x0, 0, 0 }, | |
1785 | + { "vxfs", "vxfs", 1, 0, 4, "\365\374\001\245", 0, 0, 0 }, | |
1786 | + { "hfsplus", "hfsplus", 1, 0, 2, "BD", 0x0, 0, 0 }, | |
1787 | + { "hfsplus", "hfsplus", 1, 0, 2, "H+", 0x0, 0, 0 }, | |
1788 | + { "hfsplus", "hfsplus", 1, 0, 2, "HX", 0x0, 0, 0 }, | |
1789 | + { "hfs", "hfs", 1, 0, 2, "BD", 0x0, 0, 0 }, | |
1790 | + { "ocfs2", "ocfs2", 1, 0, 6, "OCFSV2", 0x0, 0, 0 }, | |
1791 | + { "lvm2pv", "lvm2pv", 0, 0x218, 8, "LVM2 001", 0x0, 0, 0 }, | |
1792 | + { "lvm2pv", "lvm2pv", 1, 0x218, 8, "LVM2 001", 0x0, 0, 0 }, | |
1793 | + { "ocfs2", "ocfs2", 2, 0, 6, "OCFSV2", 0x0, 0, 0 }, | |
1794 | + { "swap", "swap", 0, 0xff6, 10, "SWAP-SPACE", 0x40c, 0, 0 }, | |
1795 | + { "swap", "swap", 0, 0xff6, 10, "SWAPSPACE2", 0x40c, 0, 0 }, | |
1796 | + { "swap", "swsuspend", 0, 0xff6, 9, "S1SUSPEND", 0x40c, 0, 0 }, | |
1797 | + { "swap", "swsuspend", 0, 0xff6, 9, "S2SUSPEND", 0x40c, 0, 0 }, | |
1798 | + { "swap", "swsuspend", 0, 0xff6, 9, "ULSUSPEND", 0x40c, 0, 0 }, | |
1799 | + { "ocfs2", "ocfs2", 4, 0, 6, "OCFSV2", 0x0, 0, 0 }, | |
1800 | + { "ocfs2", "ocfs2", 8, 0, 6, "OCFSV2", 0x0, 0, 0 }, | |
1801 | + { "hpfs", "hpfs", 8, 0, 4, "I\350\225\371", 0, 0, 0 }, | |
1802 | + { "reiserfs", "reiserfs", 8, 0x34, 8, "ReIsErFs", 0x10054, 0, 0 }, | |
1803 | + { "reiserfs", "reiserfs", 8, 20, 8, "ReIsErFs", 0x10054, 0, 0 }, | |
1804 | + { "zfs", "zfs", 8, 0, 8, "\0\0\x02\xf5\xb0\x07\xb1\x0c", 0x0, 0, 0 }, | |
1805 | + { "zfs", "zfs", 8, 0, 8, "\x0c\xb1\x07\xb0\xf5\x02\0\0", 0x0, 0, 0 }, | |
1806 | + { "ufs", "ufs", 8, 0x55c, 4, "T\031\001\000", 0, 0, 0 }, | |
1807 | + { "swap", "swap", 0, 0x1ff6, 10, "SWAP-SPACE", 0x40c, 0, 0 }, | |
1808 | + { "swap", "swap", 0, 0x1ff6, 10, "SWAPSPACE2", 0x40c, 0, 0 }, | |
1809 | + { "swap", "swsuspend", 0, 0x1ff6, 9, "S1SUSPEND", 0x40c, 0, 0 }, | |
1810 | + { "swap", "swsuspend", 0, 0x1ff6, 9, "S2SUSPEND", 0x40c, 0, 0 }, | |
1811 | + { "swap", "swsuspend", 0, 0x1ff6, 9, "ULSUSPEND", 0x40c, 0, 0 }, | |
1812 | + { "reiserfs", "reiserfs", 64, 0x34, 9, "ReIsEr2Fs", 0x10054, 0, 0 }, | |
1813 | + { "reiserfs", "reiserfs", 64, 0x34, 9, "ReIsEr3Fs", 0x10054, 0, 0 }, | |
1814 | + { "reiserfs", "reiserfs", 64, 0x34, 8, "ReIsErFs", 0x10054, 0, 0 }, | |
1815 | + { "reiser4", "reiser4", 64, 0, 7, "ReIsEr4", 0x100544, 0, 0 }, | |
1816 | + { "gfs2", "gfs2", 64, 0, 4, "\x01\x16\x19\x70", 0x0, 0, 0 }, | |
1817 | + { "gfs", "gfs", 64, 0, 4, "\x01\x16\x19\x70", 0x0, 0, 0 }, | |
1818 | + { "btrfs", "btrfs", 64, 0x40, 8, "_BHRfS_M", 0x0, 0, 0 }, | |
1819 | + { "swap", "swap", 0, 0x3ff6, 10, "SWAP-SPACE", 0x40c, 0, 0 }, | |
1820 | + { "swap", "swap", 0, 0x3ff6, 10, "SWAPSPACE2", 0x40c, 0, 0 }, | |
1821 | + { "swap", "swsuspend", 0, 0x3ff6, 9, "S1SUSPEND", 0x40c, 0, 0 }, | |
1822 | + { "swap", "swsuspend", 0, 0x3ff6, 9, "S2SUSPEND", 0x40c, 0, 0 }, | |
1823 | + { "swap", "swsuspend", 0, 0x3ff6, 9, "ULSUSPEND", 0x40c, 0, 0 }, | |
1824 | + { "udf", "udf", 32, 1, 5, "BEA01", 0x0, 0, 0 }, | |
1825 | + { "udf", "udf", 32, 1, 5, "BOOT2", 0x0, 0, 0 }, | |
1826 | + { "udf", "udf", 32, 1, 5, "CD001", 0x0, 0, 0 }, | |
1827 | + { "udf", "udf", 32, 1, 5, "CDW02", 0x0, 0, 0 }, | |
1828 | + { "udf", "udf", 32, 1, 5, "NSR02", 0x0, 0, 0 }, | |
1829 | + { "udf", "udf", 32, 1, 5, "NSR03", 0x0, 0, 0 }, | |
1830 | + { "udf", "udf", 32, 1, 5, "TEA01", 0x0, 0, 0 }, | |
1831 | + { "iso9660", "iso9660", 32, 1, 5, "CD001", 0x0, 0, 0 }, | |
1832 | + { "iso9660", "iso9660", 32, 9, 5, "CDROM", 0x0, 0, 0 }, | |
1833 | + { "jfs", "jfs", 32, 0, 4, "JFS1", 0x88, 0, 0 }, | |
1834 | + { "swap", "swap", 0, 0x7ff6, 10, "SWAP-SPACE", 0x40c, 0, 0 }, | |
1835 | + { "swap", "swap", 0, 0x7ff6, 10, "SWAPSPACE2", 0x40c, 0, 0 }, | |
1836 | + { "swap", "swsuspend", 0, 0x7ff6, 9, "S1SUSPEND", 0x40c, 0, 0 }, | |
1837 | + { "swap", "swsuspend", 0, 0x7ff6, 9, "S2SUSPEND", 0x40c, 0, 0 }, | |
1838 | + { "swap", "swsuspend", 0, 0x7ff6, 9, "ULSUSPEND", 0x40c, 0, 0 }, | |
1839 | + { "swap", "swap", 0, 0xfff6, 10, "SWAP-SPACE", 0x40c, 0, 0 }, | |
1840 | + { "swap", "swap", 0, 0xfff6, 10, "SWAPSPACE2", 0x40c, 0, 0 }, | |
1841 | + { "swap", "swsuspend", 0, 0xfff6, 9, "S1SUSPEND", 0x40c, 0, 0 }, | |
1842 | + { "swap", "swsuspend", 0, 0xfff6, 9, "S2SUSPEND", 0x40c, 0, 0 }, | |
1843 | + { "swap", "swsuspend", 0, 0xfff6, 9, "ULSUSPEND", 0x40c, 0, 0 }, | |
1844 | + { "zfs", "zfs", 264, 0, 8, "\0\0\x02\xf5\xb0\x07\xb1\x0c", 0x0, 0, 0 }, | |
1845 | + { "zfs", "zfs", 264, 0, 8, "\x0c\xb1\x07\xb0\xf5\x02\0\0", 0x0, 0, 0 }, | |
1846 | + { NULL, NULL, 0, 0, 0, NULL, 0x0, 0, 0 } | |
7e46296a AM |
1847 | +}; |
1848 | + | |
5dd10c98 AM |
1849 | +static int null_uuid(const char *uuid) |
1850 | +{ | |
1851 | + int i; | |
1852 | + | |
1853 | + for (i = 0; i < 16 && !uuid[i]; i++); | |
1854 | + | |
1855 | + return (i == 16); | |
1856 | +} | |
1857 | + | |
1858 | + | |
7e46296a AM |
1859 | +static void uuid_end_bio(struct bio *bio, int err) |
1860 | +{ | |
1861 | + struct page *page = bio->bi_io_vec[0].bv_page; | |
1862 | + | |
cacc47f8 AM |
1863 | + if(!test_bit(BIO_UPTODATE, &bio->bi_flags)) |
1864 | + SetPageError(page); | |
7e46296a AM |
1865 | + |
1866 | + unlock_page(page); | |
1867 | + bio_put(bio); | |
1868 | +} | |
1869 | + | |
1870 | + | |
1871 | +/** | |
1872 | + * submit - submit BIO request | |
7e46296a | 1873 | + * @dev: The block device we're using. |
5dd10c98 | 1874 | + * @page_num: The page we're reading. |
7e46296a AM |
1875 | + * |
1876 | + * Based on Patrick Mochell's pmdisk code from long ago: "Straight from the | |
1877 | + * textbook - allocate and initialize the bio. If we're writing, make sure | |
1878 | + * the page is marked as dirty. Then submit it and carry on." | |
7e46296a | 1879 | + **/ |
5dd10c98 | 1880 | +static struct page *read_bdev_page(struct block_device *dev, int page_num) |
7e46296a AM |
1881 | +{ |
1882 | + struct bio *bio = NULL; | |
5dd10c98 | 1883 | + struct page *page = alloc_page(GFP_NOFS); |
7e46296a | 1884 | + |
5dd10c98 AM |
1885 | + if (!page) { |
1886 | + printk(KERN_ERR "Failed to allocate a page for reading data " | |
1887 | + "in UUID checks."); | |
7e46296a | 1888 | + return NULL; |
5dd10c98 | 1889 | + } |
7e46296a | 1890 | + |
5dd10c98 | 1891 | + bio = bio_alloc(GFP_NOFS, 1); |
7e46296a | 1892 | + bio->bi_bdev = dev; |
5dd10c98 | 1893 | + bio->bi_sector = page_num << 3; |
7e46296a AM |
1894 | + bio->bi_end_io = uuid_end_bio; |
1895 | + | |
5dd10c98 AM |
1896 | + PRINTK("Submitting bio on device %lx, page %d.\n", |
1897 | + (unsigned long) dev->bd_dev, page_num); | |
1898 | + | |
7e46296a | 1899 | + if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { |
5dd10c98 AM |
1900 | + printk(KERN_DEBUG "ERROR: adding page to bio at %d\n", |
1901 | + page_num); | |
7e46296a AM |
1902 | + bio_put(bio); |
1903 | + __free_page(page); | |
5dd10c98 | 1904 | + printk(KERN_DEBUG "read_bdev_page freed page %p (in error " |
7e46296a | 1905 | + "path).\n", page); |
cacc47f8 | 1906 | + return NULL; |
7e46296a AM |
1907 | + } |
1908 | + | |
5dd10c98 | 1909 | + lock_page(page); |
7e46296a AM |
1910 | + submit_bio(READ | (1 << BIO_RW_SYNCIO) | |
1911 | + (1 << BIO_RW_UNPLUG), bio); | |
1912 | + | |
5dd10c98 | 1913 | + wait_on_page_locked(page); |
cacc47f8 AM |
1914 | + if (PageError(page)) { |
1915 | + __free_page(page); | |
1916 | + page = NULL; | |
1917 | + } | |
7e46296a AM |
1918 | + return page; |
1919 | +} | |
1920 | + | |
5dd10c98 AM |
1921 | +int bdev_matches_key(struct block_device *bdev, const char *key) |
1922 | +{ | |
1923 | + unsigned char *data = NULL; | |
1924 | + struct page *data_page = NULL; | |
1925 | + | |
1926 | + int dev_offset, pg_num, pg_off, i; | |
1927 | + int last_pg_num = -1; | |
1928 | + int result = 0; | |
1929 | + char buf[50]; | |
1930 | + | |
1931 | + if (null_uuid(key)) { | |
1932 | + PRINTK("Refusing to find a NULL key.\n"); | |
1933 | + return 0; | |
1934 | + } | |
1935 | + | |
1936 | + if (!bdev->bd_disk) { | |
1937 | + bdevname(bdev, buf); | |
1938 | + PRINTK("bdev %s has no bd_disk.\n", buf); | |
1939 | + return 0; | |
1940 | + } | |
1941 | + | |
1942 | + if (!bdev->bd_disk->queue) { | |
1943 | + bdevname(bdev, buf); | |
1944 | + PRINTK("bdev %s has no queue.\n", buf); | |
1945 | + return 0; | |
1946 | + } | |
1947 | + | |
1948 | + for (i = 0; uuid_list[i].name; i++) { | |
1949 | + struct uuid_info *dat = &uuid_list[i]; | |
1950 | + | |
1951 | + if (!dat->key || strcmp(dat->key, key)) | |
1952 | + continue; | |
1953 | + | |
1954 | + dev_offset = (dat->bkoff << 10) + dat->sboff; | |
1955 | + pg_num = dev_offset >> 12; | |
1956 | + pg_off = dev_offset & 0xfff; | |
1957 | + | |
1958 | + if ((((pg_num + 1) << 3) - 1) > bdev->bd_part->nr_sects >> 1) | |
1959 | + continue; | |
1960 | + | |
1961 | + if (pg_num != last_pg_num) { | |
1962 | + if (data_page) | |
1963 | + __free_page(data_page); | |
1964 | + data_page = read_bdev_page(bdev, pg_num); | |
cacc47f8 AM |
1965 | + if (!data_page) |
1966 | + continue; | |
5dd10c98 AM |
1967 | + data = page_address(data_page); |
1968 | + } | |
1969 | + | |
1970 | + last_pg_num = pg_num; | |
1971 | + | |
1972 | + if (strncmp(&data[pg_off], dat->magic, dat->sig_len)) | |
1973 | + continue; | |
1974 | + | |
1975 | + result = 1; | |
1976 | + break; | |
1977 | + } | |
1978 | + | |
1979 | + if (data_page) | |
1980 | + __free_page(data_page); | |
1981 | + | |
1982 | + return result; | |
1983 | +} | |
7e46296a | 1984 | + |
cacc47f8 AM |
1985 | +/* |
1986 | + * part_matches_fs_info - Does the given partition match the details given? | |
1987 | + * | |
1988 | + * Returns a score saying how good the match is. | |
1989 | + * 0 = no UUID match. | |
1990 | + * 1 = UUID but last mount time differs. | |
1991 | + * 2 = UUID, last mount time but not dev_t | |
1992 | + * 3 = perfect match | |
1993 | + * | |
1994 | + * This lets us cope elegantly with probing resulting in dev_ts changing | |
1995 | + * from boot to boot, and with the case where a user copies a partition | |
1996 | + * (UUID is non unique), and we need to check the last mount time of the | |
1997 | + * correct partition. | |
1998 | + */ | |
1999 | +int part_matches_fs_info(struct hd_struct *part, struct fs_info *seek) | |
7e46296a AM |
2000 | +{ |
2001 | + struct block_device *bdev; | |
cacc47f8 | 2002 | + struct fs_info *got; |
7e46296a AM |
2003 | + int result = 0; |
2004 | + char buf[50]; | |
2005 | + | |
cacc47f8 | 2006 | + if (null_uuid((char *) &seek->uuid)) { |
5dd10c98 AM |
2007 | + PRINTK("Refusing to find a NULL uuid.\n"); |
2008 | + return 0; | |
2009 | + } | |
2010 | + | |
7e46296a AM |
2011 | + bdev = bdget(part_devt(part)); |
2012 | + | |
cacc47f8 | 2013 | + PRINTK("part_matches fs info considering %x.\n", part_devt(part)); |
7e46296a AM |
2014 | + |
2015 | + if (blkdev_get(bdev, FMODE_READ)) { | |
2016 | + PRINTK("blkdev_get failed.\n"); | |
2017 | + return 0; | |
2018 | + } | |
2019 | + | |
2020 | + if (!bdev->bd_disk) { | |
2021 | + bdevname(bdev, buf); | |
2022 | + PRINTK("bdev %s has no bd_disk.\n", buf); | |
2023 | + goto out; | |
2024 | + } | |
2025 | + | |
2026 | + if (!bdev->bd_disk->queue) { | |
2027 | + bdevname(bdev, buf); | |
2028 | + PRINTK("bdev %s has no queue.\n", buf); | |
2029 | + goto out; | |
2030 | + } | |
2031 | + | |
cacc47f8 | 2032 | + got = fs_info_from_block_dev(bdev); |
7e46296a | 2033 | + |
cacc47f8 AM |
2034 | + if (got && !memcmp(got->uuid, seek->uuid, 16)) { |
2035 | + PRINTK(" Having matching UUID.\n"); | |
2036 | + PRINTK(" Got: LMS %d, LM %p.\n", got->last_mount_size, got->last_mount); | |
2037 | + PRINTK(" Seek: LMS %d, LM %p.\n", seek->last_mount_size, seek->last_mount); | |
2038 | + result = 1; | |
7e46296a | 2039 | + |
cacc47f8 AM |
2040 | + if (got->last_mount_size == seek->last_mount_size && |
2041 | + got->last_mount && seek->last_mount && | |
2042 | + !memcmp(got->last_mount, seek->last_mount, | |
2043 | + got->last_mount_size)) { | |
2044 | + result = 2; | |
7e46296a | 2045 | + |
cacc47f8 | 2046 | + PRINTK(" Matching last mount time.\n"); |
7e46296a | 2047 | + |
cacc47f8 AM |
2048 | + if (part_devt(part) == seek->dev_t) { |
2049 | + result = 3; | |
2050 | + PRINTK(" Matching dev_t.\n"); | |
2051 | + } else | |
2052 | + PRINTK("Dev_ts differ (%x vs %x).\n", part_devt(part), seek->dev_t); | |
7e46296a AM |
2053 | + } |
2054 | + } | |
2055 | + | |
cacc47f8 AM |
2056 | + PRINTK(" Score for %x is %d.\n", part_devt(part), result); |
2057 | + free_fs_info(got); | |
7e46296a AM |
2058 | +out: |
2059 | + blkdev_put(bdev, FMODE_READ); | |
2060 | + return result; | |
2061 | +} | |
2062 | + | |
5dd10c98 AM |
2063 | +void free_fs_info(struct fs_info *fs_info) |
2064 | +{ | |
2065 | + if (!fs_info || IS_ERR(fs_info)) | |
2066 | + return; | |
2067 | + | |
2068 | + if (fs_info->last_mount) | |
2069 | + kfree(fs_info->last_mount); | |
2070 | + | |
2071 | + kfree(fs_info); | |
2072 | +} | |
2073 | +EXPORT_SYMBOL_GPL(free_fs_info); | |
2074 | + | |
2075 | +struct fs_info *fs_info_from_block_dev(struct block_device *bdev) | |
7e46296a AM |
2076 | +{ |
2077 | + unsigned char *data = NULL; | |
2078 | + struct page *data_page = NULL; | |
2079 | + | |
2080 | + int dev_offset, pg_num, pg_off; | |
2081 | + int uuid_pg_num, uuid_pg_off, i; | |
2082 | + unsigned char *uuid_data = NULL; | |
2083 | + struct page *uuid_data_page = NULL; | |
2084 | + | |
5dd10c98 | 2085 | + int last_pg_num = -1, last_uuid_pg_num = 0; |
7e46296a | 2086 | + char buf[50]; |
5dd10c98 | 2087 | + struct fs_info *fs_info = NULL; |
7e46296a AM |
2088 | + |
2089 | + bdevname(bdev, buf); | |
2090 | + | |
cacc47f8 | 2091 | + PRINTK("uuid_from_block_dev looking for partition type of %s.\n", buf); |
7e46296a AM |
2092 | + |
2093 | + for (i = 0; uuid_list[i].name; i++) { | |
2094 | + struct uuid_info *dat = &uuid_list[i]; | |
2095 | + dev_offset = (dat->bkoff << 10) + dat->sboff; | |
2096 | + pg_num = dev_offset >> 12; | |
2097 | + pg_off = dev_offset & 0xfff; | |
2098 | + uuid_pg_num = dat->uuid_offset >> 12; | |
2099 | + uuid_pg_off = dat->uuid_offset & 0xfff; | |
2100 | + | |
2101 | + if ((((pg_num + 1) << 3) - 1) > bdev->bd_part->nr_sects >> 1) | |
2102 | + continue; | |
2103 | + | |
5dd10c98 AM |
2104 | + /* Ignore partition types with no UUID offset */ |
2105 | + if (!dat->uuid_offset) | |
2106 | + continue; | |
2107 | + | |
2108 | + if (pg_num != last_pg_num) { | |
7e46296a AM |
2109 | + if (data_page) |
2110 | + __free_page(data_page); | |
5dd10c98 | 2111 | + data_page = read_bdev_page(bdev, pg_num); |
cacc47f8 AM |
2112 | + if (!data_page) |
2113 | + continue; | |
7e46296a AM |
2114 | + data = page_address(data_page); |
2115 | + } | |
2116 | + | |
2117 | + last_pg_num = pg_num; | |
2118 | + | |
2119 | + if (strncmp(&data[pg_off], dat->magic, dat->sig_len)) | |
2120 | + continue; | |
2121 | + | |
7e46296a AM |
2122 | + PRINTK("This partition looks like %s.\n", dat->name); |
2123 | + | |
5dd10c98 AM |
2124 | + fs_info = kzalloc(sizeof(struct fs_info), GFP_KERNEL); |
2125 | + | |
2126 | + if (!fs_info) { | |
2127 | + PRINTK("Failed to allocate fs_info struct."); | |
2128 | + fs_info = ERR_PTR(-ENOMEM); | |
2129 | + break; | |
2130 | + } | |
2131 | + | |
2132 | + /* UUID can't be off the end of the disk */ | |
2133 | + if ((uuid_pg_num > bdev->bd_part->nr_sects >> 3) || | |
2134 | + !dat->uuid_offset) | |
2135 | + goto no_uuid; | |
2136 | + | |
7e46296a AM |
2137 | + if (!uuid_data || uuid_pg_num != last_uuid_pg_num) { |
2138 | + if (uuid_data_page) | |
2139 | + __free_page(uuid_data_page); | |
5dd10c98 | 2140 | + uuid_data_page = read_bdev_page(bdev, uuid_pg_num); |
cacc47f8 AM |
2141 | + if (!uuid_data_page) |
2142 | + continue; | |
7e46296a AM |
2143 | + uuid_data = page_address(uuid_data_page); |
2144 | + } | |
2145 | + | |
2146 | + last_uuid_pg_num = uuid_pg_num; | |
5dd10c98 | 2147 | + memcpy(&fs_info->uuid, &uuid_data[uuid_pg_off], 16); |
cacc47f8 | 2148 | + fs_info->dev_t = bdev->bd_dev; |
5dd10c98 AM |
2149 | + |
2150 | +no_uuid: | |
2151 | + PRINT_HEX_DUMP(KERN_EMERG, "fs_info_from_block_dev " | |
2152 | + "returning uuid ", DUMP_PREFIX_NONE, 16, 1, | |
2153 | + fs_info->uuid, 16, 0); | |
2154 | + | |
2155 | + if (dat->last_mount_size) { | |
2156 | + int pg = dat->last_mount_offset >> 12, sz; | |
2157 | + int off = dat->last_mount_offset & 0xfff; | |
2158 | + struct page *last_mount = read_bdev_page(bdev, pg); | |
2159 | + unsigned char *last_mount_data; | |
2160 | + char *ptr; | |
2161 | + | |
2162 | + if (!last_mount) { | |
2163 | + fs_info = ERR_PTR(-ENOMEM); | |
2164 | + break; | |
2165 | + } | |
2166 | + last_mount_data = page_address(last_mount); | |
2167 | + sz = dat->last_mount_size; | |
2168 | + ptr = kmalloc(sz, GFP_KERNEL); | |
2169 | + | |
2170 | + if (!ptr) { | |
2171 | + printk(KERN_EMERG "fs_info_from_block_dev " | |
2172 | + "failed to get memory for last mount " | |
2173 | + "timestamp."); | |
2174 | + free_fs_info(fs_info); | |
2175 | + fs_info = ERR_PTR(-ENOMEM); | |
2176 | + } else { | |
2177 | + fs_info->last_mount = ptr; | |
2178 | + fs_info->last_mount_size = sz; | |
2179 | + memcpy(ptr, &last_mount_data[off], sz); | |
2180 | + } | |
7e46296a | 2181 | + |
5dd10c98 | 2182 | + __free_page(last_mount); |
7e46296a | 2183 | + } |
5dd10c98 | 2184 | + break; |
7e46296a AM |
2185 | + } |
2186 | + | |
2187 | + if (data_page) | |
2188 | + __free_page(data_page); | |
2189 | + | |
2190 | + if (uuid_data_page) | |
2191 | + __free_page(uuid_data_page); | |
2192 | + | |
5dd10c98 | 2193 | + return fs_info; |
7e46296a | 2194 | +} |
5dd10c98 AM |
2195 | +EXPORT_SYMBOL_GPL(fs_info_from_block_dev); |
2196 | + | |
2197 | +static int __init uuid_debug_setup(char *str) | |
2198 | +{ | |
2199 | + int value; | |
2200 | + | |
2201 | + if (sscanf(str, "=%d", &value)) | |
2202 | + debug_enabled = value; | |
2203 | + | |
2204 | + return 1; | |
2205 | +} | |
2206 | + | |
2207 | +__setup("uuid_debug", uuid_debug_setup); | |
2208 | diff --git a/crypto/Kconfig b/crypto/Kconfig | |
85eb3c9d | 2209 | index df332c1..b0101c1 100644 |
5dd10c98 AM |
2210 | --- a/crypto/Kconfig |
2211 | +++ b/crypto/Kconfig | |
85eb3c9d | 2212 | @@ -824,6 +824,13 @@ config CRYPTO_LZO |
5dd10c98 AM |
2213 | help |
2214 | This is the LZO algorithm. | |
2215 | ||
2216 | +config CRYPTO_LZF | |
2217 | + tristate "LZF compression algorithm" | |
2218 | + select CRYPTO_ALGAPI | |
2219 | + help | |
2220 | + This is the LZF algorithm. It is especially useful for TuxOnIce, | |
2221 | + because it achieves good compression quickly. | |
2222 | + | |
2223 | comment "Random Number Generation" | |
2224 | ||
2225 | config CRYPTO_ANSI_CPRNG | |
2226 | diff --git a/crypto/Makefile b/crypto/Makefile | |
de6743ae | 2227 | index d7e6441..76b9a9e 100644 |
5dd10c98 AM |
2228 | --- a/crypto/Makefile |
2229 | +++ b/crypto/Makefile | |
de6743ae | 2230 | @@ -78,6 +78,7 @@ obj-$(CONFIG_CRYPTO_DEFLATE) += deflate.o |
5dd10c98 AM |
2231 | obj-$(CONFIG_CRYPTO_ZLIB) += zlib.o |
2232 | obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o | |
2233 | obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o | |
2234 | +obj-$(CONFIG_CRYPTO_LZF) += lzf.o | |
2235 | obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o | |
2236 | obj-$(CONFIG_CRYPTO_LZO) += lzo.o | |
2237 | obj-$(CONFIG_CRYPTO_RNG2) += rng.o | |
2238 | diff --git a/crypto/lzf.c b/crypto/lzf.c | |
2239 | new file mode 100644 | |
2240 | index 0000000..ccaf83a | |
2241 | --- /dev/null | |
2242 | +++ b/crypto/lzf.c | |
2243 | @@ -0,0 +1,326 @@ | |
2244 | +/* | |
2245 | + * Cryptoapi LZF compression module. | |
2246 | + * | |
2247 | + * Copyright (c) 2004-2008 Nigel Cunningham <nigel at tuxonice net> | |
2248 | + * | |
2249 | + * based on the deflate.c file: | |
2250 | + * | |
2251 | + * Copyright (c) 2003 James Morris <jmorris@intercode.com.au> | |
2252 | + * | |
2253 | + * and upon the LZF compression module donated to the TuxOnIce project with | |
2254 | + * the following copyright: | |
2255 | + * | |
2256 | + * This program is free software; you can redistribute it and/or modify it | |
2257 | + * under the terms of the GNU General Public License as published by the Free | |
2258 | + * Software Foundation; either version 2 of the License, or (at your option) | |
2259 | + * any later version. | |
2260 | + * Copyright (c) 2000-2003 Marc Alexander Lehmann <pcg@goof.com> | |
2261 | + * | |
2262 | + * Redistribution and use in source and binary forms, with or without modifica- | |
2263 | + * tion, are permitted provided that the following conditions are met: | |
2264 | + * | |
2265 | + * 1. Redistributions of source code must retain the above copyright notice, | |
2266 | + * this list of conditions and the following disclaimer. | |
2267 | + * | |
2268 | + * 2. Redistributions in binary form must reproduce the above copyright | |
2269 | + * notice, this list of conditions and the following disclaimer in the | |
2270 | + * documentation and/or other materials provided with the distribution. | |
2271 | + * | |
2272 | + * 3. The name of the author may not be used to endorse or promote products | |
2273 | + * derived from this software without specific prior written permission. | |
2274 | + * | |
2275 | + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED | |
2276 | + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- | |
2277 | + * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO | |
2278 | + * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- | |
2279 | + * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
2280 | + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; | |
2281 | + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, | |
2282 | + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- | |
2283 | + * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
2284 | + * OF THE POSSIBILITY OF SUCH DAMAGE. | |
2285 | + * | |
2286 | + * Alternatively, the contents of this file may be used under the terms of | |
2287 | + * the GNU General Public License version 2 (the "GPL"), in which case the | |
2288 | + * provisions of the GPL are applicable instead of the above. If you wish to | |
2289 | + * allow the use of your version of this file only under the terms of the | |
2290 | + * GPL and not to allow others to use your version of this file under the | |
2291 | + * BSD license, indicate your decision by deleting the provisions above and | |
2292 | + * replace them with the notice and other provisions required by the GPL. If | |
2293 | + * you do not delete the provisions above, a recipient may use your version | |
2294 | + * of this file under either the BSD or the GPL. | |
2295 | + */ | |
2296 | + | |
2297 | +#include <linux/kernel.h> | |
2298 | +#include <linux/module.h> | |
2299 | +#include <linux/init.h> | |
2300 | +#include <linux/module.h> | |
2301 | +#include <linux/crypto.h> | |
2302 | +#include <linux/err.h> | |
2303 | +#include <linux/vmalloc.h> | |
2304 | +#include <linux/string.h> | |
2305 | + | |
2306 | +struct lzf_ctx { | |
2307 | + void *hbuf; | |
2308 | + unsigned int bufofs; | |
2309 | +}; | |
2310 | + | |
2311 | +/* | |
2312 | + * size of hashtable is (1 << hlog) * sizeof (char *) | |
2313 | + * decompression is independent of the hash table size | |
2314 | + * the difference between 15 and 14 is very small | |
2315 | + * for small blocks (and 14 is also faster). | |
2316 | + * For a low-memory configuration, use hlog == 13; | |
2317 | + * For best compression, use 15 or 16. | |
2318 | + */ | |
2319 | +static const int hlog = 13; | |
2320 | + | |
2321 | +/* | |
2322 | + * don't play with this unless you benchmark! | |
2323 | + * decompression is not dependent on the hash function | |
2324 | + * the hashing function might seem strange, just believe me | |
2325 | + * it works ;) | |
2326 | + */ | |
2327 | +static inline u16 first(const u8 *p) | |
2328 | +{ | |
2329 | + return ((p[0]) << 8) + p[1]; | |
2330 | +} | |
2331 | + | |
2332 | +static inline u16 next(u8 v, const u8 *p) | |
2333 | +{ | |
2334 | + return ((v) << 8) + p[2]; | |
2335 | +} | |
2336 | + | |
2337 | +static inline u32 idx(unsigned int h) | |
2338 | +{ | |
2339 | + return (((h ^ (h << 5)) >> (3*8 - hlog)) + h*3) & ((1 << hlog) - 1); | |
2340 | +} | |
2341 | + | |
2342 | +/* | |
2343 | + * IDX works because it is very similar to a multiplicative hash, e.g. | |
2344 | + * (h * 57321 >> (3*8 - hlog)) | |
2345 | + * the next one is also quite good, albeit slow ;) | |
2346 | + * (int)(cos(h & 0xffffff) * 1e6) | |
2347 | + */ | |
2348 | + | |
2349 | +static const int max_lit = (1 << 5); | |
2350 | +static const int max_off = (1 << 13); | |
2351 | +static const int max_ref = ((1 << 8) + (1 << 3)); | |
2352 | + | |
2353 | +/* | |
2354 | + * compressed format | |
2355 | + * | |
2356 | + * 000LLLLL <L+1> ; literal | |
2357 | + * LLLOOOOO oooooooo ; backref L | |
2358 | + * 111OOOOO LLLLLLLL oooooooo ; backref L+7 | |
2359 | + * | |
2360 | + */ | |
2361 | + | |
2362 | +static void lzf_compress_exit(struct crypto_tfm *tfm) | |
2363 | +{ | |
2364 | + struct lzf_ctx *ctx = crypto_tfm_ctx(tfm); | |
2365 | + | |
2366 | + if (!ctx->hbuf) | |
2367 | + return; | |
2368 | + | |
2369 | + vfree(ctx->hbuf); | |
2370 | + ctx->hbuf = NULL; | |
2371 | +} | |
2372 | + | |
2373 | +static int lzf_compress_init(struct crypto_tfm *tfm) | |
2374 | +{ | |
2375 | + struct lzf_ctx *ctx = crypto_tfm_ctx(tfm); | |
2376 | + | |
2377 | + /* Get LZF ready to go */ | |
2378 | + ctx->hbuf = vmalloc_32((1 << hlog) * sizeof(char *)); | |
2379 | + if (ctx->hbuf) | |
2380 | + return 0; | |
2381 | + | |
2382 | + printk(KERN_WARNING "Failed to allocate %ld bytes for lzf workspace\n", | |
2383 | + (long) ((1 << hlog) * sizeof(char *))); | |
2384 | + return -ENOMEM; | |
2385 | +} | |
2386 | + | |
2387 | +static int lzf_compress(struct crypto_tfm *tfm, const u8 *in_data, | |
2388 | + unsigned int in_len, u8 *out_data, unsigned int *out_len) | |
2389 | +{ | |
2390 | + struct lzf_ctx *ctx = crypto_tfm_ctx(tfm); | |
2391 | + const u8 **htab = ctx->hbuf; | |
2392 | + const u8 **hslot; | |
2393 | + const u8 *ip = in_data; | |
2394 | + u8 *op = out_data; | |
2395 | + const u8 *in_end = ip + in_len; | |
2396 | + u8 *out_end = op + *out_len - 3; | |
2397 | + const u8 *ref; | |
2398 | + | |
2399 | + unsigned int hval = first(ip); | |
2400 | + unsigned long off; | |
2401 | + int lit = 0; | |
2402 | + | |
2403 | + memset(htab, 0, sizeof(htab)); | |
2404 | + | |
2405 | + for (;;) { | |
2406 | + if (ip < in_end - 2) { | |
2407 | + hval = next(hval, ip); | |
2408 | + hslot = htab + idx(hval); | |
2409 | + ref = *hslot; | |
2410 | + *hslot = ip; | |
2411 | + | |
2412 | + off = ip - ref - 1; | |
2413 | + if (off < max_off | |
2414 | + && ip + 4 < in_end && ref > in_data | |
2415 | + && *(u16 *) ref == *(u16 *) ip && ref[2] == ip[2] | |
2416 | + ) { | |
2417 | + /* match found at *ref++ */ | |
2418 | + unsigned int len = 2; | |
2419 | + unsigned int maxlen = in_end - ip - len; | |
2420 | + maxlen = maxlen > max_ref ? max_ref : maxlen; | |
2421 | + | |
2422 | + do { | |
2423 | + len++; | |
2424 | + } while (len < maxlen && ref[len] == ip[len]); | |
2425 | + | |
2426 | + if (op + lit + 1 + 3 >= out_end) { | |
2427 | + *out_len = PAGE_SIZE; | |
2428 | + return 0; | |
2429 | + } | |
2430 | + | |
2431 | + if (lit) { | |
2432 | + *op++ = lit - 1; | |
2433 | + lit = -lit; | |
2434 | + do { | |
2435 | + *op++ = ip[lit]; | |
2436 | + } while (++lit); | |
2437 | + } | |
2438 | + | |
2439 | + len -= 2; | |
2440 | + ip++; | |
2441 | + | |
2442 | + if (len < 7) { | |
2443 | + *op++ = (off >> 8) + (len << 5); | |
2444 | + } else { | |
2445 | + *op++ = (off >> 8) + (7 << 5); | |
2446 | + *op++ = len - 7; | |
2447 | + } | |
2448 | + | |
2449 | + *op++ = off; | |
2450 | + | |
2451 | + ip += len; | |
2452 | + hval = first(ip); | |
2453 | + hval = next(hval, ip); | |
2454 | + htab[idx(hval)] = ip; | |
2455 | + ip++; | |
2456 | + continue; | |
2457 | + } | |
2458 | + } else if (ip == in_end) | |
2459 | + break; | |
2460 | + | |
2461 | + /* one more literal byte we must copy */ | |
2462 | + lit++; | |
2463 | + ip++; | |
2464 | + | |
2465 | + if (lit == max_lit) { | |
2466 | + if (op + 1 + max_lit >= out_end) { | |
2467 | + *out_len = PAGE_SIZE; | |
2468 | + return 0; | |
2469 | + } | |
2470 | + | |
2471 | + *op++ = max_lit - 1; | |
2472 | + memcpy(op, ip - max_lit, max_lit); | |
2473 | + op += max_lit; | |
2474 | + lit = 0; | |
2475 | + } | |
2476 | + } | |
2477 | + | |
2478 | + if (lit) { | |
2479 | + if (op + lit + 1 >= out_end) { | |
2480 | + *out_len = PAGE_SIZE; | |
2481 | + return 0; | |
2482 | + } | |
2483 | + | |
2484 | + *op++ = lit - 1; | |
2485 | + lit = -lit; | |
2486 | + do { | |
2487 | + *op++ = ip[lit]; | |
2488 | + } while (++lit); | |
2489 | + } | |
2490 | + | |
2491 | + *out_len = op - out_data; | |
2492 | + return 0; | |
2493 | +} | |
2494 | + | |
2495 | +static int lzf_decompress(struct crypto_tfm *tfm, const u8 *src, | |
2496 | + unsigned int slen, u8 *dst, unsigned int *dlen) | |
2497 | +{ | |
2498 | + u8 const *ip = src; | |
2499 | + u8 *op = dst; | |
2500 | + u8 const *const in_end = ip + slen; | |
2501 | + u8 *const out_end = op + *dlen; | |
2502 | + | |
2503 | + *dlen = PAGE_SIZE; | |
2504 | + do { | |
2505 | + unsigned int ctrl = *ip++; | |
2506 | + | |
2507 | + if (ctrl < (1 << 5)) { | |
2508 | + /* literal run */ | |
2509 | + ctrl++; | |
2510 | + | |
2511 | + if (op + ctrl > out_end) | |
2512 | + return 0; | |
2513 | + memcpy(op, ip, ctrl); | |
2514 | + op += ctrl; | |
2515 | + ip += ctrl; | |
2516 | + } else { /* back reference */ | |
2517 | + | |
2518 | + unsigned int len = ctrl >> 5; | |
2519 | + | |
2520 | + u8 *ref = op - ((ctrl & 0x1f) << 8) - 1; | |
2521 | + | |
2522 | + if (len == 7) | |
2523 | + len += *ip++; | |
2524 | + | |
2525 | + ref -= *ip++; | |
2526 | + len += 2; | |
2527 | + | |
2528 | + if (op + len > out_end || ref < (u8 *) dst) | |
2529 | + return 0; | |
2530 | + | |
2531 | + do { | |
2532 | + *op++ = *ref++; | |
2533 | + } while (--len); | |
2534 | + } | |
2535 | + } while (op < out_end && ip < in_end); | |
2536 | + | |
2537 | + *dlen = op - (u8 *) dst; | |
2538 | + return 0; | |
2539 | +} | |
2540 | + | |
2541 | +static struct crypto_alg alg = { | |
2542 | + .cra_name = "lzf", | |
2543 | + .cra_flags = CRYPTO_ALG_TYPE_COMPRESS, | |
2544 | + .cra_ctxsize = sizeof(struct lzf_ctx), | |
2545 | + .cra_module = THIS_MODULE, | |
2546 | + .cra_list = LIST_HEAD_INIT(alg.cra_list), | |
2547 | + .cra_init = lzf_compress_init, | |
2548 | + .cra_exit = lzf_compress_exit, | |
2549 | + .cra_u = { .compress = { | |
2550 | + .coa_compress = lzf_compress, | |
2551 | + .coa_decompress = lzf_decompress } } | |
2552 | +}; | |
2553 | + | |
2554 | +static int __init init(void) | |
2555 | +{ | |
2556 | + return crypto_register_alg(&alg); | |
2557 | +} | |
2558 | + | |
2559 | +static void __exit fini(void) | |
2560 | +{ | |
2561 | + crypto_unregister_alg(&alg); | |
2562 | +} | |
2563 | + | |
2564 | +module_init(init); | |
2565 | +module_exit(fini); | |
2566 | + | |
2567 | +MODULE_LICENSE("GPL"); | |
2568 | +MODULE_DESCRIPTION("LZF Compression Algorithm"); | |
2569 | +MODULE_AUTHOR("Marc Alexander Lehmann & Nigel Cunningham"); | |
2380c486 | 2570 | diff --git a/drivers/char/vt.c b/drivers/char/vt.c |
5bd2511a | 2571 | index 7cdb6ee..f114914 100644 |
2380c486 JR |
2572 | --- a/drivers/char/vt.c |
2573 | +++ b/drivers/char/vt.c | |
de6743ae | 2574 | @@ -2461,6 +2461,7 @@ int vt_kmsg_redirect(int new) |
5dd10c98 AM |
2575 | else |
2576 | return kmsg_con; | |
2577 | } | |
2578 | +EXPORT_SYMBOL_GPL(vt_kmsg_redirect); | |
2380c486 JR |
2579 | |
2580 | /* | |
5dd10c98 | 2581 | * Console on virtual terminal |
e999739a | 2582 | diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c |
5bd2511a | 2583 | index 33dad3f..47fb186 100644 |
e999739a | 2584 | --- a/drivers/gpu/drm/drm_gem.c |
2585 | +++ b/drivers/gpu/drm/drm_gem.c | |
5bd2511a AM |
2586 | @@ -133,7 +133,8 @@ int drm_gem_object_init(struct drm_device *dev, |
2587 | BUG_ON((size & (PAGE_SIZE - 1)) != 0); | |
e999739a | 2588 | |
2589 | obj->dev = dev; | |
2590 | - obj->filp = shmem_file_setup("drm mm object", size, VM_NORESERVE); | |
2591 | + obj->filp = shmem_file_setup("drm mm object", size, | |
2592 | + VM_NORESERVE | VM_ATOMIC_COPY); | |
92bca44c | 2593 | if (IS_ERR(obj->filp)) |
5bd2511a | 2594 | return -ENOMEM; |
92bca44c | 2595 | |
2380c486 | 2596 | diff --git a/drivers/md/md.c b/drivers/md/md.c |
85eb3c9d | 2597 | index 7476d95..7a53775 100644 |
2380c486 JR |
2598 | --- a/drivers/md/md.c |
2599 | +++ b/drivers/md/md.c | |
85eb3c9d | 2600 | @@ -6634,6 +6634,9 @@ void md_do_sync(mddev_t *mddev) |
2380c486 JR |
2601 | mddev->curr_resync = 2; |
2602 | ||
2603 | try_again: | |
2604 | + while (freezer_is_on()) | |
2605 | + yield(); | |
2606 | + | |
5dd10c98 | 2607 | if (kthread_should_stop()) |
2380c486 | 2608 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); |
5dd10c98 | 2609 | |
85eb3c9d | 2610 | @@ -6656,6 +6659,10 @@ void md_do_sync(mddev_t *mddev) |
2380c486 JR |
2611 | * time 'round when curr_resync == 2 |
2612 | */ | |
2613 | continue; | |
2614 | + | |
2615 | + while (freezer_is_on()) | |
2616 | + yield(); | |
2617 | + | |
2618 | /* We need to wait 'interruptible' so as not to | |
2619 | * contribute to the load average, and not to | |
2620 | * be caught by 'softlockup' | |
85eb3c9d | 2621 | @@ -6668,6 +6675,7 @@ void md_do_sync(mddev_t *mddev) |
2380c486 JR |
2622 | " share one or more physical units)\n", |
2623 | desc, mdname(mddev), mdname(mddev2)); | |
2624 | mddev_put(mddev2); | |
2625 | + try_to_freeze(); | |
2626 | if (signal_pending(current)) | |
2627 | flush_signals(current); | |
2628 | schedule(); | |
85eb3c9d | 2629 | @@ -6777,6 +6785,9 @@ void md_do_sync(mddev_t *mddev) |
92bca44c AM |
2630 | || kthread_should_stop()); |
2631 | } | |
9474138d | 2632 | |
2380c486 JR |
2633 | + while (freezer_is_on()) |
2634 | + yield(); | |
2635 | + | |
2636 | if (kthread_should_stop()) | |
2637 | goto interrupted; | |
9474138d | 2638 | |
85eb3c9d | 2639 | @@ -6821,6 +6832,9 @@ void md_do_sync(mddev_t *mddev) |
2380c486 JR |
2640 | last_mark = next; |
2641 | } | |
2642 | ||
2643 | + while (freezer_is_on()) | |
2644 | + yield(); | |
2645 | + | |
2646 | ||
2647 | if (kthread_should_stop()) | |
2648 | goto interrupted; | |
85eb3c9d AM |
2649 | @@ -6995,6 +7009,8 @@ void md_check_recovery(mddev_t *mddev) |
2650 | { | |
2651 | mdk_rdev_t *rdev; | |
2652 | ||
2653 | + if (unlikely(freezer_is_on())) | |
2654 | + return; | |
2655 | ||
2656 | if (mddev->bitmap) | |
2657 | bitmap_daemon_work(mddev); | |
9474138d | 2658 | diff --git a/fs/block_dev.c b/fs/block_dev.c |
85eb3c9d | 2659 | index 4c54c86..79a77ac 100644 |
9474138d AM |
2660 | --- a/fs/block_dev.c |
2661 | +++ b/fs/block_dev.c | |
5bd2511a | 2662 | @@ -295,6 +295,93 @@ out: |
2380c486 JR |
2663 | } |
2664 | EXPORT_SYMBOL(thaw_bdev); | |
2665 | ||
2666 | +#ifdef CONFIG_FS_FREEZER_DEBUG | |
2667 | +#define FS_PRINTK(fmt, args...) printk(fmt, ## args) | |
2668 | +#else | |
2669 | +#define FS_PRINTK(fmt, args...) | |
2670 | +#endif | |
2671 | + | |
2672 | +/* #define DEBUG_FS_FREEZING */ | |
2673 | + | |
2674 | +/** | |
2675 | + * freeze_filesystems - lock all filesystems and force them into a consistent | |
2676 | + * state | |
2677 | + * @which: What combination of fuse & non-fuse to freeze. | |
2678 | + */ | |
2679 | +void freeze_filesystems(int which) | |
2680 | +{ | |
2681 | + struct super_block *sb; | |
2682 | + | |
2683 | + lockdep_off(); | |
2684 | + | |
2685 | + /* | |
2686 | + * Freeze in reverse order so filesystems dependant upon others are | |
2687 | + * frozen in the right order (eg. loopback on ext3). | |
2688 | + */ | |
2689 | + list_for_each_entry_reverse(sb, &super_blocks, s_list) { | |
2690 | + FS_PRINTK(KERN_INFO "Considering %s.%s: (root %p, bdev %x)", | |
2691 | + sb->s_type->name ? sb->s_type->name : "?", | |
2692 | + sb->s_subtype ? sb->s_subtype : "", sb->s_root, | |
2693 | + sb->s_bdev ? sb->s_bdev->bd_dev : 0); | |
2694 | + | |
2695 | + if (sb->s_type->fs_flags & FS_IS_FUSE && | |
2696 | + sb->s_frozen == SB_UNFROZEN && | |
2697 | + which & FS_FREEZER_FUSE) { | |
2698 | + sb->s_frozen = SB_FREEZE_TRANS; | |
2699 | + sb->s_flags |= MS_FROZEN; | |
2700 | + FS_PRINTK("Fuse filesystem done.\n"); | |
2701 | + continue; | |
2702 | + } | |
2703 | + | |
2704 | + if (!sb->s_root || !sb->s_bdev || | |
2705 | + (sb->s_frozen == SB_FREEZE_TRANS) || | |
2706 | + (sb->s_flags & MS_RDONLY) || | |
2707 | + (sb->s_flags & MS_FROZEN) || | |
2708 | + !(which & FS_FREEZER_NORMAL)) { | |
2709 | + FS_PRINTK(KERN_INFO "Nope.\n"); | |
2710 | + continue; | |
2711 | + } | |
2712 | + | |
2713 | + FS_PRINTK(KERN_INFO "Freezing %x... ", sb->s_bdev->bd_dev); | |
2714 | + freeze_bdev(sb->s_bdev); | |
2715 | + sb->s_flags |= MS_FROZEN; | |
2716 | + FS_PRINTK(KERN_INFO "Done.\n"); | |
2717 | + } | |
2718 | + | |
2719 | + lockdep_on(); | |
2720 | +} | |
2721 | + | |
2722 | +/** | |
2723 | + * thaw_filesystems - unlock all filesystems | |
2724 | + * @which: What combination of fuse & non-fuse to thaw. | |
2725 | + */ | |
2726 | +void thaw_filesystems(int which) | |
2727 | +{ | |
2728 | + struct super_block *sb; | |
2729 | + | |
2730 | + lockdep_off(); | |
2731 | + | |
2732 | + list_for_each_entry(sb, &super_blocks, s_list) { | |
2733 | + if (!(sb->s_flags & MS_FROZEN)) | |
2734 | + continue; | |
2735 | + | |
2736 | + if (sb->s_type->fs_flags & FS_IS_FUSE) { | |
2737 | + if (!(which & FS_FREEZER_FUSE)) | |
2738 | + continue; | |
2739 | + | |
2740 | + sb->s_frozen = SB_UNFROZEN; | |
2741 | + } else { | |
2742 | + if (!(which & FS_FREEZER_NORMAL)) | |
2743 | + continue; | |
2744 | + | |
2745 | + thaw_bdev(sb->s_bdev, sb); | |
2746 | + } | |
2747 | + sb->s_flags &= ~MS_FROZEN; | |
2748 | + } | |
2749 | + | |
2750 | + lockdep_on(); | |
2751 | +} | |
2752 | + | |
9474138d AM |
2753 | static int blkdev_writepage(struct page *page, struct writeback_control *wbc) |
2754 | { | |
2755 | return block_write_full_page(page, blkdev_get_block, wbc); | |
2380c486 | 2756 | diff --git a/fs/drop_caches.c b/fs/drop_caches.c |
5bd2511a | 2757 | index 83c4f60..8f7ec03 100644 |
2380c486 JR |
2758 | --- a/fs/drop_caches.c |
2759 | +++ b/fs/drop_caches.c | |
2760 | @@ -8,6 +8,7 @@ | |
2761 | #include <linux/writeback.h> | |
2762 | #include <linux/sysctl.h> | |
2763 | #include <linux/gfp.h> | |
2764 | +#include <linux/module.h> | |
2765 | ||
2766 | /* A global variable is a bit ugly, but it keeps the code simple */ | |
2767 | int sysctl_drop_caches; | |
5bd2511a AM |
2768 | @@ -42,6 +43,13 @@ static void drop_slab(void) |
2769 | } while (nr_objects > 10); | |
2380c486 JR |
2770 | } |
2771 | ||
5bd2511a | 2772 | +/* For TuxOnIce */ |
2380c486 | 2773 | +void drop_pagecache(void) |
5bd2511a AM |
2774 | +{ |
2775 | + iterate_supers(drop_pagecache_sb, NULL); | |
2776 | +} | |
2380c486 | 2777 | +EXPORT_SYMBOL_GPL(drop_pagecache); |
5bd2511a | 2778 | + |
2380c486 | 2779 | int drop_caches_sysctl_handler(ctl_table *table, int write, |
7e46296a | 2780 | void __user *buffer, size_t *length, loff_t *ppos) |
5bd2511a | 2781 | { |
2380c486 | 2782 | diff --git a/fs/fuse/control.c b/fs/fuse/control.c |
7e46296a | 2783 | index 3773fd6..6272b60 100644 |
2380c486 JR |
2784 | --- a/fs/fuse/control.c |
2785 | +++ b/fs/fuse/control.c | |
7e46296a | 2786 | @@ -341,6 +341,7 @@ static void fuse_ctl_kill_sb(struct super_block *sb) |
2380c486 JR |
2787 | static struct file_system_type fuse_ctl_fs_type = { |
2788 | .owner = THIS_MODULE, | |
2789 | .name = "fusectl", | |
2790 | + .fs_flags = FS_IS_FUSE, | |
2791 | .get_sb = fuse_ctl_get_sb, | |
2792 | .kill_sb = fuse_ctl_kill_sb, | |
2793 | }; | |
2794 | diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c | |
5bd2511a | 2795 | index 9424796..25c6277 100644 |
2380c486 JR |
2796 | --- a/fs/fuse/dev.c |
2797 | +++ b/fs/fuse/dev.c | |
2798 | @@ -7,6 +7,7 @@ | |
2799 | */ | |
2800 | ||
2801 | #include "fuse_i.h" | |
2802 | +#include "fuse.h" | |
2803 | ||
2804 | #include <linux/init.h> | |
2805 | #include <linux/module.h> | |
2806 | @@ -16,6 +17,7 @@ | |
2807 | #include <linux/pagemap.h> | |
2808 | #include <linux/file.h> | |
2809 | #include <linux/slab.h> | |
2810 | +#include <linux/freezer.h> | |
5bd2511a AM |
2811 | #include <linux/pipe_fs_i.h> |
2812 | #include <linux/swap.h> | |
2813 | #include <linux/splice.h> | |
2814 | @@ -961,6 +963,8 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, | |
2815 | struct fuse_in *in; | |
2816 | unsigned reqsize; | |
2380c486 JR |
2817 | |
2818 | + FUSE_MIGHT_FREEZE(file->f_mapping->host->i_sb, "fuse_dev_read"); | |
2819 | + | |
2820 | restart: | |
2821 | spin_lock(&fc->lock); | |
2822 | err = -EAGAIN; | |
5bd2511a | 2823 | @@ -1395,6 +1399,9 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov, |
2380c486 JR |
2824 | if (!fc) |
2825 | return -EPERM; | |
2826 | ||
2827 | + FUSE_MIGHT_FREEZE(iocb->ki_filp->f_mapping->host->i_sb, | |
2828 | + "fuse_dev_write"); | |
2829 | + | |
5bd2511a AM |
2830 | fuse_copy_init(&cs, fc, 0, iov, nr_segs); |
2831 | ||
2832 | return fuse_dev_do_write(fc, &cs, iov_length(iov, nr_segs)); | |
2380c486 | 2833 | diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c |
5bd2511a | 2834 | index 3cdc5f7..725cb5a 100644 |
2380c486 JR |
2835 | --- a/fs/fuse/dir.c |
2836 | +++ b/fs/fuse/dir.c | |
2837 | @@ -7,12 +7,14 @@ | |
2838 | */ | |
2839 | ||
2840 | #include "fuse_i.h" | |
2841 | +#include "fuse.h" | |
2842 | ||
2843 | #include <linux/pagemap.h> | |
2844 | #include <linux/file.h> | |
2845 | #include <linux/gfp.h> | |
2846 | #include <linux/sched.h> | |
2847 | #include <linux/namei.h> | |
2848 | +#include <linux/freezer.h> | |
2849 | ||
2850 | #if BITS_PER_LONG >= 64 | |
2851 | static inline void fuse_dentry_settime(struct dentry *entry, u64 time) | |
2852 | @@ -174,6 +176,9 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) | |
2853 | return 0; | |
2854 | ||
2855 | fc = get_fuse_conn(inode); | |
2856 | + | |
2857 | + FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_dentry_revalidate"); | |
2858 | + | |
2859 | req = fuse_get_req(fc); | |
2860 | if (IS_ERR(req)) | |
2861 | return 0; | |
2862 | @@ -268,6 +273,8 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name, | |
2863 | if (name->len > FUSE_NAME_MAX) | |
2864 | goto out; | |
2865 | ||
2866 | + FUSE_MIGHT_FREEZE(sb, "fuse_lookup_name"); | |
2867 | + | |
2868 | req = fuse_get_req(fc); | |
2869 | err = PTR_ERR(req); | |
2870 | if (IS_ERR(req)) | |
2871 | @@ -331,6 +338,8 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, | |
2872 | if (err) | |
2873 | goto out_err; | |
2874 | ||
2875 | + FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_lookup"); | |
2876 | + | |
2877 | err = -EIO; | |
2878 | if (inode && get_node_id(inode) == FUSE_ROOT_ID) | |
2879 | goto out_iput; | |
7e46296a | 2880 | @@ -392,6 +401,8 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, |
2380c486 JR |
2881 | if (IS_ERR(forget_req)) |
2882 | return PTR_ERR(forget_req); | |
2883 | ||
2884 | + FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_create_open"); | |
2885 | + | |
2886 | req = fuse_get_req(fc); | |
2887 | err = PTR_ERR(req); | |
2888 | if (IS_ERR(req)) | |
7e46296a | 2889 | @@ -485,6 +496,8 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req, |
2380c486 JR |
2890 | int err; |
2891 | struct fuse_req *forget_req; | |
2892 | ||
2893 | + FUSE_MIGHT_FREEZE(dir->i_sb, "create_new_entry"); | |
2894 | + | |
2895 | forget_req = fuse_get_req(fc); | |
2896 | if (IS_ERR(forget_req)) { | |
2897 | fuse_put_request(fc, req); | |
7e46296a | 2898 | @@ -587,7 +600,11 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, int mode) |
2380c486 JR |
2899 | { |
2900 | struct fuse_mkdir_in inarg; | |
2901 | struct fuse_conn *fc = get_fuse_conn(dir); | |
2902 | - struct fuse_req *req = fuse_get_req(fc); | |
2903 | + struct fuse_req *req; | |
2904 | + | |
2905 | + FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_mkdir"); | |
2906 | + | |
2907 | + req = fuse_get_req(fc); | |
2908 | if (IS_ERR(req)) | |
2909 | return PTR_ERR(req); | |
2910 | ||
7e46296a | 2911 | @@ -611,7 +628,11 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry, |
2380c486 JR |
2912 | { |
2913 | struct fuse_conn *fc = get_fuse_conn(dir); | |
2914 | unsigned len = strlen(link) + 1; | |
2915 | - struct fuse_req *req = fuse_get_req(fc); | |
2916 | + struct fuse_req *req; | |
2917 | + | |
2918 | + FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_symlink"); | |
2919 | + | |
2920 | + req = fuse_get_req(fc); | |
2921 | if (IS_ERR(req)) | |
2922 | return PTR_ERR(req); | |
2923 | ||
7e46296a | 2924 | @@ -628,7 +649,11 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) |
2380c486 JR |
2925 | { |
2926 | int err; | |
2927 | struct fuse_conn *fc = get_fuse_conn(dir); | |
2928 | - struct fuse_req *req = fuse_get_req(fc); | |
2929 | + struct fuse_req *req; | |
2930 | + | |
2931 | + FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_unlink"); | |
2932 | + | |
2933 | + req = fuse_get_req(fc); | |
2934 | if (IS_ERR(req)) | |
2935 | return PTR_ERR(req); | |
2936 | ||
7e46296a | 2937 | @@ -661,7 +686,11 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) |
2380c486 JR |
2938 | { |
2939 | int err; | |
2940 | struct fuse_conn *fc = get_fuse_conn(dir); | |
2941 | - struct fuse_req *req = fuse_get_req(fc); | |
2942 | + struct fuse_req *req; | |
2943 | + | |
2944 | + FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_rmdir"); | |
2945 | + | |
2946 | + req = fuse_get_req(fc); | |
2947 | if (IS_ERR(req)) | |
2948 | return PTR_ERR(req); | |
2949 | ||
2950 | diff --git a/fs/fuse/file.c b/fs/fuse/file.c | |
5bd2511a | 2951 | index ada0ade..ca89e06 100644 |
2380c486 JR |
2952 | --- a/fs/fuse/file.c |
2953 | +++ b/fs/fuse/file.c | |
2954 | @@ -7,11 +7,13 @@ | |
2955 | */ | |
2956 | ||
2957 | #include "fuse_i.h" | |
2958 | +#include "fuse.h" | |
2959 | ||
2960 | #include <linux/pagemap.h> | |
2961 | #include <linux/slab.h> | |
2962 | #include <linux/kernel.h> | |
2963 | #include <linux/sched.h> | |
2964 | +#include <linux/freezer.h> | |
92bca44c | 2965 | #include <linux/module.h> |
2380c486 JR |
2966 | |
2967 | static const struct file_operations fuse_direct_io_file_operations; | |
92bca44c | 2968 | @@ -109,6 +111,8 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, |
2380c486 | 2969 | int err; |
92bca44c | 2970 | int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN; |
2380c486 | 2971 | |
92bca44c | 2972 | + FUSE_MIGHT_FREEZE(file->f_path.dentry->d_inode->i_sb, "fuse_send_open"); |
2380c486 | 2973 | + |
92bca44c AM |
2974 | ff = fuse_file_alloc(fc); |
2975 | if (!ff) | |
2976 | return -ENOMEM; | |
2977 | @@ -316,6 +320,8 @@ static int fuse_flush(struct file *file, fl_owner_t id) | |
2380c486 JR |
2978 | if (fc->no_flush) |
2979 | return 0; | |
2980 | ||
2981 | + FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_flush"); | |
2982 | + | |
2983 | req = fuse_get_req_nofail(fc, file); | |
2984 | memset(&inarg, 0, sizeof(inarg)); | |
2985 | inarg.fh = ff->fh; | |
5bd2511a | 2986 | @@ -366,6 +372,8 @@ int fuse_fsync_common(struct file *file, int datasync, int isdir) |
2380c486 JR |
2987 | if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir)) |
2988 | return 0; | |
2989 | ||
2990 | + FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_fsync_common"); | |
2991 | + | |
2992 | /* | |
2993 | * Start writeback against all dirty pages of the inode, then | |
2994 | * wait for all outstanding writes, before sending the FSYNC | |
5bd2511a | 2995 | @@ -473,6 +481,8 @@ static int fuse_readpage(struct file *file, struct page *page) |
2380c486 JR |
2996 | if (is_bad_inode(inode)) |
2997 | goto out; | |
2998 | ||
2999 | + FUSE_MIGHT_FREEZE(file->f_mapping->host->i_sb, "fuse_readpage"); | |
3000 | + | |
3001 | /* | |
3002 | * Page writeback can extend beyond the liftime of the | |
3003 | * page-cache page, so make sure we read a properly synced | |
5bd2511a | 3004 | @@ -586,6 +596,9 @@ static int fuse_readpages_fill(void *_data, struct page *page) |
2380c486 JR |
3005 | struct inode *inode = data->inode; |
3006 | struct fuse_conn *fc = get_fuse_conn(inode); | |
3007 | ||
3008 | + FUSE_MIGHT_FREEZE(data->file->f_mapping->host->i_sb, | |
3009 | + "fuse_readpages_fill"); | |
3010 | + | |
3011 | fuse_wait_on_page_writeback(inode, page->index); | |
3012 | ||
3013 | if (req->num_pages && | |
5bd2511a | 3014 | @@ -617,6 +630,8 @@ static int fuse_readpages(struct file *file, struct address_space *mapping, |
2380c486 JR |
3015 | if (is_bad_inode(inode)) |
3016 | goto out; | |
3017 | ||
3018 | + FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_readpages"); | |
3019 | + | |
3020 | data.file = file; | |
3021 | data.inode = inode; | |
3022 | data.req = fuse_get_req(fc); | |
5bd2511a | 3023 | @@ -730,6 +745,8 @@ static int fuse_buffered_write(struct file *file, struct inode *inode, |
2380c486 JR |
3024 | if (is_bad_inode(inode)) |
3025 | return -EIO; | |
3026 | ||
3027 | + FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_buffered_write"); | |
3028 | + | |
3029 | /* | |
3030 | * Make sure writepages on the same page are not mixed up with | |
3031 | * plain writes. | |
5bd2511a | 3032 | @@ -889,6 +906,8 @@ static ssize_t fuse_perform_write(struct file *file, |
2380c486 JR |
3033 | struct fuse_req *req; |
3034 | ssize_t count; | |
3035 | ||
3036 | + FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_perform_write"); | |
3037 | + | |
3038 | req = fuse_get_req(fc); | |
3039 | if (IS_ERR(req)) { | |
3040 | err = PTR_ERR(req); | |
5bd2511a | 3041 | @@ -1033,6 +1052,8 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf, |
92bca44c AM |
3042 | ssize_t res = 0; |
3043 | struct fuse_req *req; | |
2380c486 JR |
3044 | |
3045 | + FUSE_MIGHT_FREEZE(file->f_mapping->host->i_sb, "fuse_direct_io"); | |
3046 | + | |
3047 | req = fuse_get_req(fc); | |
3048 | if (IS_ERR(req)) | |
3049 | return PTR_ERR(req); | |
5bd2511a | 3050 | @@ -1420,6 +1441,8 @@ static int fuse_getlk(struct file *file, struct file_lock *fl) |
2380c486 JR |
3051 | struct fuse_lk_out outarg; |
3052 | int err; | |
3053 | ||
3054 | + FUSE_MIGHT_FREEZE(file->f_mapping->host->i_sb, "fuse_getlk"); | |
3055 | + | |
3056 | req = fuse_get_req(fc); | |
3057 | if (IS_ERR(req)) | |
3058 | return PTR_ERR(req); | |
5bd2511a | 3059 | @@ -1455,6 +1478,8 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock) |
2380c486 JR |
3060 | if (fl->fl_flags & FL_CLOSE) |
3061 | return 0; | |
3062 | ||
3063 | + FUSE_MIGHT_FREEZE(file->f_mapping->host->i_sb, "fuse_setlk"); | |
3064 | + | |
3065 | req = fuse_get_req(fc); | |
3066 | if (IS_ERR(req)) | |
3067 | return PTR_ERR(req); | |
5bd2511a | 3068 | @@ -1521,6 +1546,8 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block) |
2380c486 JR |
3069 | if (!inode->i_sb->s_bdev || fc->no_bmap) |
3070 | return 0; | |
3071 | ||
3072 | + FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_bmap"); | |
3073 | + | |
3074 | req = fuse_get_req(fc); | |
3075 | if (IS_ERR(req)) | |
3076 | return 0; | |
3077 | diff --git a/fs/fuse/fuse.h b/fs/fuse/fuse.h | |
3078 | new file mode 100644 | |
3079 | index 0000000..170e49a | |
3080 | --- /dev/null | |
3081 | +++ b/fs/fuse/fuse.h | |
3082 | @@ -0,0 +1,13 @@ | |
3083 | +#define FUSE_MIGHT_FREEZE(superblock, desc) \ | |
3084 | +do { \ | |
3085 | + int printed = 0; \ | |
3086 | + while (superblock->s_frozen != SB_UNFROZEN) { \ | |
3087 | + if (!printed) { \ | |
3088 | + printk(KERN_INFO "%d frozen in " desc ".\n", \ | |
3089 | + current->pid); \ | |
3090 | + printed = 1; \ | |
3091 | + } \ | |
3092 | + try_to_freeze(); \ | |
3093 | + yield(); \ | |
3094 | + } \ | |
3095 | +} while (0) | |
3096 | diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c | |
de6743ae | 3097 | index ec14d19..2a82a08 100644 |
2380c486 JR |
3098 | --- a/fs/fuse/inode.c |
3099 | +++ b/fs/fuse/inode.c | |
7e46296a | 3100 | @@ -1062,7 +1062,7 @@ static void fuse_kill_sb_anon(struct super_block *sb) |
2380c486 JR |
3101 | static struct file_system_type fuse_fs_type = { |
3102 | .owner = THIS_MODULE, | |
3103 | .name = "fuse", | |
3104 | - .fs_flags = FS_HAS_SUBTYPE, | |
3105 | + .fs_flags = FS_HAS_SUBTYPE | FS_IS_FUSE, | |
3106 | .get_sb = fuse_get_sb, | |
92bca44c | 3107 | .kill_sb = fuse_kill_sb_anon, |
2380c486 | 3108 | }; |
7e46296a | 3109 | @@ -1094,7 +1094,7 @@ static struct file_system_type fuseblk_fs_type = { |
2380c486 JR |
3110 | .name = "fuseblk", |
3111 | .get_sb = fuse_get_sb_blk, | |
92bca44c | 3112 | .kill_sb = fuse_kill_sb_blk, |
2380c486 JR |
3113 | - .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE, |
3114 | + .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE | FS_IS_FUSE, | |
3115 | }; | |
3116 | ||
3117 | static inline int register_fuseblk(void) | |
3118 | diff --git a/fs/namei.c b/fs/namei.c | |
5bd2511a | 3119 | index 868d0cb..325b6cf 100644 |
2380c486 JR |
3120 | --- a/fs/namei.c |
3121 | +++ b/fs/namei.c | |
5bd2511a | 3122 | @@ -2256,6 +2256,8 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry) |
2380c486 JR |
3123 | if (!dir->i_op->unlink) |
3124 | return -EPERM; | |
3125 | ||
3126 | + vfs_check_frozen(dir->i_sb, SB_FREEZE_WRITE); | |
3127 | + | |
2380c486 | 3128 | mutex_lock(&dentry->d_inode->i_mutex); |
d031c9d6 | 3129 | if (d_mountpoint(dentry)) |
3130 | error = -EBUSY; | |
2380c486 | 3131 | diff --git a/fs/super.c b/fs/super.c |
85eb3c9d | 3132 | index c7765bd..197a0a3 100644 |
2380c486 JR |
3133 | --- a/fs/super.c |
3134 | +++ b/fs/super.c | |
5bd2511a | 3135 | @@ -34,6 +34,8 @@ |
2380c486 JR |
3136 | |
3137 | ||
3138 | LIST_HEAD(super_blocks); | |
3139 | +EXPORT_SYMBOL_GPL(super_blocks); | |
3140 | + | |
3141 | DEFINE_SPINLOCK(sb_lock); | |
3142 | ||
3143 | /** | |
3144 | diff --git a/include/linux/Kbuild b/include/linux/Kbuild | |
85eb3c9d | 3145 | index 9aa9bca..121a9f4 100644 |
2380c486 JR |
3146 | --- a/include/linux/Kbuild |
3147 | +++ b/include/linux/Kbuild | |
5bd2511a | 3148 | @@ -216,6 +216,7 @@ unifdef-y += filter.h |
2380c486 JR |
3149 | unifdef-y += flat.h |
3150 | unifdef-y += futex.h | |
3151 | unifdef-y += fs.h | |
3152 | +unifdef-y += freezer.h | |
3153 | unifdef-y += gameport.h | |
3154 | unifdef-y += generic_serial.h | |
5dd10c98 | 3155 | unifdef-y += hdlcdrv.h |
7e46296a | 3156 | diff --git a/include/linux/bio.h b/include/linux/bio.h |
5dd10c98 | 3157 | index 7fc5606..07e9b97 100644 |
7e46296a AM |
3158 | --- a/include/linux/bio.h |
3159 | +++ b/include/linux/bio.h | |
3160 | @@ -175,8 +175,11 @@ enum bio_rw_flags { | |
3161 | BIO_RW_META, | |
3162 | BIO_RW_DISCARD, | |
3163 | BIO_RW_NOIDLE, | |
3164 | + BIO_RW_TUXONICE, | |
3165 | }; | |
3166 | ||
3167 | +extern int trap_non_toi_io; | |
3168 | + | |
3169 | /* | |
3170 | * First four bits must match between bio->bi_rw and rq->cmd_flags, make | |
3171 | * that explicit here. | |
2380c486 | 3172 | diff --git a/include/linux/freezer.h b/include/linux/freezer.h |
de6743ae | 3173 | index da7e52b..a45b332 100644 |
2380c486 JR |
3174 | --- a/include/linux/freezer.h |
3175 | +++ b/include/linux/freezer.h | |
de6743ae | 3176 | @@ -124,6 +124,19 @@ static inline void set_freezable(void) |
2380c486 JR |
3177 | current->flags &= ~PF_NOFREEZE; |
3178 | } | |
3179 | ||
7e46296a | 3180 | +extern int freezer_state; |
2380c486 JR |
3181 | +#define FREEZER_OFF 0 |
3182 | +#define FREEZER_FILESYSTEMS_FROZEN 1 | |
3183 | +#define FREEZER_USERSPACE_FROZEN 2 | |
3184 | +#define FREEZER_FULLY_ON 3 | |
3185 | + | |
3186 | +static inline int freezer_is_on(void) | |
3187 | +{ | |
3188 | + return freezer_state == FREEZER_FULLY_ON; | |
3189 | +} | |
2380c486 JR |
3190 | + |
3191 | +extern void thaw_kernel_threads(void); | |
3192 | + | |
3193 | /* | |
3194 | * Tell the freezer that the current task should be frozen by it and that it | |
3195 | * should send a fake signal to the task to freeze it. | |
de6743ae | 3196 | @@ -175,6 +188,8 @@ static inline int freeze_processes(void) { BUG(); return 0; } |
2380c486 JR |
3197 | static inline void thaw_processes(void) {} |
3198 | ||
3199 | static inline int try_to_freeze(void) { return 0; } | |
3200 | +static inline int freezer_is_on(void) { return 0; } | |
3201 | +static inline void thaw_kernel_threads(void) { } | |
3202 | ||
3203 | static inline void freezer_do_not_count(void) {} | |
3204 | static inline void freezer_count(void) {} | |
3205 | diff --git a/include/linux/fs.h b/include/linux/fs.h | |
85eb3c9d | 3206 | index f0f447a..be4dd65 100644 |
2380c486 JR |
3207 | --- a/include/linux/fs.h |
3208 | +++ b/include/linux/fs.h | |
de6743ae | 3209 | @@ -176,6 +176,7 @@ struct inodes_stat_t { |
2380c486 JR |
3210 | #define FS_REQUIRES_DEV 1 |
3211 | #define FS_BINARY_MOUNTDATA 2 | |
3212 | #define FS_HAS_SUBTYPE 4 | |
3213 | +#define FS_IS_FUSE 8 /* Fuse filesystem - bdev freeze these too */ | |
3214 | #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ | |
3215 | #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() | |
3216 | * during rename() internally. | |
de6743ae | 3217 | @@ -209,6 +210,7 @@ struct inodes_stat_t { |
2380c486 JR |
3218 | #define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */ |
3219 | #define MS_I_VERSION (1<<23) /* Update inode I_version field */ | |
9474138d AM |
3220 | #define MS_STRICTATIME (1<<24) /* Always perform atime updates */ |
3221 | +#define MS_FROZEN (1<<25) /* Frozen by freeze_filesystems() */ | |
28757c75 | 3222 | #define MS_BORN (1<<29) |
2380c486 JR |
3223 | #define MS_ACTIVE (1<<30) |
3224 | #define MS_NOUSER (1<<31) | |
85eb3c9d | 3225 | @@ -236,6 +238,8 @@ struct inodes_stat_t { |
e999739a | 3226 | #define S_NOCMTIME 128 /* Do not update file c/mtime */ |
3227 | #define S_SWAPFILE 256 /* Do not truncate: swapon got its bmaps */ | |
3228 | #define S_PRIVATE 512 /* Inode is fs-internal */ | |
3229 | +#define S_ATOMIC_COPY 1024 /* Pages mapped with this inode need to be | |
3230 | + atomically copied (gem) */ | |
3231 | ||
3232 | /* | |
3233 | * Note that nosuid etc flags are inode-specific: setting some file-system | |
85eb3c9d | 3234 | @@ -383,6 +387,7 @@ struct inodes_stat_t { |
92bca44c AM |
3235 | #include <linux/capability.h> |
3236 | #include <linux/semaphore.h> | |
3237 | #include <linux/fiemap.h> | |
3238 | +#include <linux/freezer.h> | |
3239 | ||
3240 | #include <asm/atomic.h> | |
3241 | #include <asm/byteorder.h> | |
85eb3c9d | 3242 | @@ -1396,8 +1401,11 @@ enum { |
2380c486 JR |
3243 | SB_FREEZE_TRANS = 2, |
3244 | }; | |
3245 | ||
3246 | -#define vfs_check_frozen(sb, level) \ | |
3247 | - wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level))) | |
3248 | +#define vfs_check_frozen(sb, level) do { \ | |
3249 | + freezer_do_not_count(); \ | |
3250 | + wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level))); \ | |
3251 | + freezer_count(); \ | |
3252 | +} while (0) | |
3253 | ||
3254 | #define get_fs_excl() atomic_inc(¤t->fs_excl) | |
3255 | #define put_fs_excl() atomic_dec(¤t->fs_excl) | |
85eb3c9d | 3256 | @@ -1968,6 +1976,13 @@ extern struct super_block *freeze_bdev(struct block_device *); |
92bca44c AM |
3257 | extern void emergency_thaw_all(void); |
3258 | extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); | |
9474138d | 3259 | extern int fsync_bdev(struct block_device *); |
92bca44c AM |
3260 | +extern int fsync_super(struct super_block *); |
3261 | +extern int fsync_no_super(struct block_device *); | |
9474138d AM |
3262 | +#define FS_FREEZER_FUSE 1 |
3263 | +#define FS_FREEZER_NORMAL 2 | |
3264 | +#define FS_FREEZER_ALL (FS_FREEZER_FUSE | FS_FREEZER_NORMAL) | |
3265 | +void freeze_filesystems(int which); | |
3266 | +void thaw_filesystems(int which); | |
3267 | #else | |
3268 | static inline void bd_forget(struct inode *inode) {} | |
3269 | static inline int sync_blockdev(struct block_device *bdev) { return 0; } | |
cacc47f8 AM |
3270 | diff --git a/include/linux/fs_uuid.h b/include/linux/fs_uuid.h |
3271 | new file mode 100644 | |
3272 | index 0000000..3234135 | |
3273 | --- /dev/null | |
3274 | +++ b/include/linux/fs_uuid.h | |
3275 | @@ -0,0 +1,19 @@ | |
3276 | +#include <linux/device.h> | |
3277 | + | |
3278 | +struct hd_struct; | |
3279 | +struct block_device; | |
3280 | + | |
3281 | +struct fs_info { | |
3282 | + char uuid[16]; | |
3283 | + dev_t dev_t; | |
3284 | + char *last_mount; | |
3285 | + int last_mount_size; | |
3286 | +}; | |
3287 | + | |
3288 | +int part_matches_fs_info(struct hd_struct *part, struct fs_info *seek); | |
3289 | +dev_t blk_lookup_fs_info(struct fs_info *seek); | |
3290 | +struct fs_info *fs_info_from_block_dev(struct block_device *bdev); | |
3291 | +void free_fs_info(struct fs_info *fs_info); | |
3292 | +int bdev_matches_key(struct block_device *bdev, const char *key); | |
3293 | +struct block_device *next_bdev_of_type(struct block_device *last, | |
3294 | + const char *key); | |
2380c486 | 3295 | diff --git a/include/linux/mm.h b/include/linux/mm.h |
85eb3c9d | 3296 | index a2b4804..90d0dfa 100644 |
2380c486 JR |
3297 | --- a/include/linux/mm.h |
3298 | +++ b/include/linux/mm.h | |
de6743ae | 3299 | @@ -98,6 +98,7 @@ extern unsigned int kobjsize(const void *objp); |
7e46296a AM |
3300 | #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ |
3301 | #define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */ | |
3302 | #define VM_MAPPED_COPY 0x01000000 /* T if mapped copy of data (nommu mmap) */ | |
5dd10c98 | 3303 | +#define VM_ATOMIC_COPY 0x01000000 /* TOI should do atomic copy (mmu) */ |
7e46296a AM |
3304 | #define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */ |
3305 | #define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */ | |
e999739a | 3306 | |
5bd2511a | 3307 | @@ -1424,6 +1425,7 @@ int drop_caches_sysctl_handler(struct ctl_table *, int, |
2380c486 JR |
3308 | void __user *, size_t *, loff_t *); |
3309 | unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask, | |
3310 | unsigned long lru_pages); | |
3311 | +void drop_pagecache(void); | |
3312 | ||
3313 | #ifndef CONFIG_MMU | |
3314 | #define randomize_va_space 0 | |
3315 | diff --git a/include/linux/netlink.h b/include/linux/netlink.h | |
5bd2511a | 3316 | index 59d0669..5efa8e0 100644 |
2380c486 JR |
3317 | --- a/include/linux/netlink.h |
3318 | +++ b/include/linux/netlink.h | |
3319 | @@ -24,6 +24,8 @@ | |
3320 | /* leave room for NETLINK_DM (DM Events) */ | |
3321 | #define NETLINK_SCSITRANSPORT 18 /* SCSI Transports */ | |
3322 | #define NETLINK_ECRYPTFS 19 | |
3323 | +#define NETLINK_TOI_USERUI 20 /* TuxOnIce's userui */ | |
3324 | +#define NETLINK_TOI_USM 21 /* Userspace storage manager */ | |
3325 | ||
3326 | #define MAX_LINKS 32 | |
3327 | ||
3328 | diff --git a/include/linux/suspend.h b/include/linux/suspend.h | |
85eb3c9d | 3329 | index bc7d6bb..311897c 100644 |
2380c486 JR |
3330 | --- a/include/linux/suspend.h |
3331 | +++ b/include/linux/suspend.h | |
7e46296a AM |
3332 | @@ -329,4 +329,70 @@ static inline void unlock_system_sleep(void) |
3333 | } | |
3334 | #endif | |
2380c486 JR |
3335 | |
3336 | +enum { | |
3337 | + TOI_CAN_HIBERNATE, | |
3338 | + TOI_CAN_RESUME, | |
3339 | + TOI_RESUME_DEVICE_OK, | |
3340 | + TOI_NORESUME_SPECIFIED, | |
3341 | + TOI_SANITY_CHECK_PROMPT, | |
3342 | + TOI_CONTINUE_REQ, | |
3343 | + TOI_RESUMED_BEFORE, | |
3344 | + TOI_BOOT_TIME, | |
3345 | + TOI_NOW_RESUMING, | |
3346 | + TOI_IGNORE_LOGLEVEL, | |
3347 | + TOI_TRYING_TO_RESUME, | |
3348 | + TOI_LOADING_ALT_IMAGE, | |
3349 | + TOI_STOP_RESUME, | |
3350 | + TOI_IO_STOPPED, | |
3351 | + TOI_NOTIFIERS_PREPARE, | |
3352 | + TOI_CLUSTER_MODE, | |
3353 | + TOI_BOOT_KERNEL, | |
3354 | +}; | |
3355 | + | |
3356 | +#ifdef CONFIG_TOI | |
3357 | + | |
3358 | +/* Used in init dir files */ | |
3359 | +extern unsigned long toi_state; | |
3360 | +#define set_toi_state(bit) (set_bit(bit, &toi_state)) | |
3361 | +#define clear_toi_state(bit) (clear_bit(bit, &toi_state)) | |
3362 | +#define test_toi_state(bit) (test_bit(bit, &toi_state)) | |
3363 | +extern int toi_running; | |
3364 | + | |
3365 | +#define test_action_state(bit) (test_bit(bit, &toi_bkd.toi_action)) | |
9474138d | 3366 | +extern int try_tuxonice_hibernate(void); |
2380c486 JR |
3367 | + |
3368 | +#else /* !CONFIG_TOI */ | |
3369 | + | |
3370 | +#define toi_state (0) | |
3371 | +#define set_toi_state(bit) do { } while (0) | |
3372 | +#define clear_toi_state(bit) do { } while (0) | |
3373 | +#define test_toi_state(bit) (0) | |
3374 | +#define toi_running (0) | |
3375 | + | |
9474138d | 3376 | +static inline int try_tuxonice_hibernate(void) { return 0; } |
2380c486 JR |
3377 | +#define test_action_state(bit) (0) |
3378 | + | |
3379 | +#endif /* CONFIG_TOI */ | |
3380 | + | |
3381 | +#ifdef CONFIG_HIBERNATION | |
3382 | +#ifdef CONFIG_TOI | |
9474138d | 3383 | +extern void try_tuxonice_resume(void); |
2380c486 | 3384 | +#else |
9474138d | 3385 | +#define try_tuxonice_resume() do { } while (0) |
2380c486 JR |
3386 | +#endif |
3387 | + | |
3388 | +extern int resume_attempted; | |
3389 | +extern int software_resume(void); | |
3390 | + | |
3391 | +static inline void check_resume_attempted(void) | |
3392 | +{ | |
3393 | + if (resume_attempted) | |
3394 | + return; | |
3395 | + | |
3396 | + software_resume(); | |
3397 | +} | |
3398 | +#else | |
3399 | +#define check_resume_attempted() do { } while (0) | |
3400 | +#define resume_attempted (0) | |
3401 | +#endif | |
3402 | #endif /* _LINUX_SUSPEND_H */ | |
3403 | diff --git a/include/linux/swap.h b/include/linux/swap.h | |
5bd2511a | 3404 | index ff4acea..5aa8559 100644 |
2380c486 JR |
3405 | --- a/include/linux/swap.h |
3406 | +++ b/include/linux/swap.h | |
5bd2511a | 3407 | @@ -198,6 +198,7 @@ struct swap_list_t { |
2380c486 JR |
3408 | extern unsigned long totalram_pages; |
3409 | extern unsigned long totalreserve_pages; | |
3410 | extern unsigned int nr_free_buffer_pages(void); | |
3411 | +extern unsigned int nr_unallocated_buffer_pages(void); | |
3412 | extern unsigned int nr_free_pagecache_pages(void); | |
3413 | ||
3414 | /* Definition of global_page_state not available yet */ | |
5bd2511a | 3415 | @@ -248,6 +249,8 @@ extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, |
de6743ae AM |
3416 | int nid); |
3417 | extern int __isolate_lru_page(struct page *page, int mode, int file); | |
3418 | extern unsigned long shrink_all_memory(unsigned long nr_pages); | |
3419 | +extern unsigned long shrink_memory_mask(unsigned long nr_to_reclaim, | |
3420 | + gfp_t mask); | |
3421 | extern int vm_swappiness; | |
3422 | extern int remove_mapping(struct address_space *mapping, struct page *page); | |
3423 | extern long vm_total_pages; | |
5bd2511a | 3424 | @@ -327,8 +330,10 @@ extern void swapcache_free(swp_entry_t, struct page *page); |
5dd10c98 AM |
3425 | extern int free_swap_and_cache(swp_entry_t); |
3426 | extern int swap_type_of(dev_t, sector_t, struct block_device **); | |
3427 | extern unsigned int count_swap_pages(int, int); | |
3428 | +extern sector_t map_swap_entry(swp_entry_t entry, struct block_device **); | |
3429 | extern sector_t map_swap_page(struct page *, struct block_device **); | |
3430 | extern sector_t swapdev_block(int, pgoff_t); | |
3431 | +extern struct swap_info_struct *get_swap_info_struct(unsigned); | |
3432 | extern int reuse_swap_page(struct page *); | |
3433 | extern int try_to_free_swap(struct page *); | |
3434 | struct backing_dev_info; | |
2380c486 | 3435 | diff --git a/init/do_mounts.c b/init/do_mounts.c |
de6743ae | 3436 | index 02e3ca4..5af8c3e 100644 |
2380c486 JR |
3437 | --- a/init/do_mounts.c |
3438 | +++ b/init/do_mounts.c | |
de6743ae | 3439 | @@ -144,6 +144,7 @@ fail: |
2380c486 JR |
3440 | done: |
3441 | return res; | |
3442 | } | |
3443 | +EXPORT_SYMBOL_GPL(name_to_dev_t); | |
3444 | ||
3445 | static int __init root_dev_setup(char *line) | |
3446 | { | |
de6743ae | 3447 | @@ -414,6 +415,8 @@ void __init prepare_namespace(void) |
2380c486 JR |
3448 | if (is_floppy && rd_doload && rd_load_disk(0)) |
3449 | ROOT_DEV = Root_RAM0; | |
3450 | ||
3451 | + check_resume_attempted(); | |
3452 | + | |
3453 | mount_root(); | |
3454 | out: | |
7e46296a | 3455 | devtmpfs_mount("dev"); |
2380c486 | 3456 | diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c |
de6743ae | 3457 | index 2b10853..ec3e087 100644 |
2380c486 JR |
3458 | --- a/init/do_mounts_initrd.c |
3459 | +++ b/init/do_mounts_initrd.c | |
3460 | @@ -6,6 +6,7 @@ | |
3461 | #include <linux/romfs_fs.h> | |
3462 | #include <linux/initrd.h> | |
3463 | #include <linux/sched.h> | |
3464 | +#include <linux/suspend.h> | |
3465 | #include <linux/freezer.h> | |
3466 | ||
3467 | #include "do_mounts.h" | |
de6743ae | 3468 | @@ -64,6 +65,11 @@ static void __init handle_initrd(void) |
2380c486 JR |
3469 | |
3470 | current->flags &= ~PF_FREEZER_SKIP; | |
3471 | ||
3472 | + if (!resume_attempted) | |
3473 | + printk(KERN_ERR "TuxOnIce: No attempt was made to resume from " | |
3474 | + "any image that might exist.\n"); | |
3475 | + clear_toi_state(TOI_BOOT_TIME); | |
3476 | + | |
3477 | /* move initrd to rootfs' /old */ | |
3478 | sys_fchdir(old_fd); | |
3479 | sys_mount("/", ".", NULL, MS_MOVE, NULL); | |
3480 | diff --git a/init/main.c b/init/main.c | |
85eb3c9d | 3481 | index a42fdf4..5a4febe 100644 |
2380c486 JR |
3482 | --- a/init/main.c |
3483 | +++ b/init/main.c | |
5bd2511a | 3484 | @@ -117,6 +117,7 @@ extern void softirq_init(void); |
2380c486 JR |
3485 | char __initdata boot_command_line[COMMAND_LINE_SIZE]; |
3486 | /* Untouched saved command line (eg. for /proc) */ | |
3487 | char *saved_command_line; | |
3488 | +EXPORT_SYMBOL_GPL(saved_command_line); | |
3489 | /* Command line for parameter parsing */ | |
3490 | static char *static_command_line; | |
3491 | ||
3492 | diff --git a/kernel/cpu.c b/kernel/cpu.c | |
5bd2511a | 3493 | index 97d1b42..b6e21bb 100644 |
2380c486 JR |
3494 | --- a/kernel/cpu.c |
3495 | +++ b/kernel/cpu.c | |
5bd2511a AM |
3496 | @@ -428,6 +428,7 @@ int disable_nonboot_cpus(void) |
3497 | cpu_maps_update_done(); | |
2380c486 JR |
3498 | return error; |
3499 | } | |
3500 | +EXPORT_SYMBOL_GPL(disable_nonboot_cpus); | |
3501 | ||
7e46296a | 3502 | void __weak arch_enable_nonboot_cpus_begin(void) |
2380c486 | 3503 | { |
5bd2511a | 3504 | @@ -466,6 +467,7 @@ void __ref enable_nonboot_cpus(void) |
2380c486 JR |
3505 | out: |
3506 | cpu_maps_update_done(); | |
3507 | } | |
3508 | +EXPORT_SYMBOL_GPL(enable_nonboot_cpus); | |
3509 | ||
3510 | static int alloc_frozen_cpus(void) | |
3511 | { | |
2380c486 | 3512 | diff --git a/kernel/kmod.c b/kernel/kmod.c |
5bd2511a | 3513 | index 6e9b196..19247e0 100644 |
2380c486 JR |
3514 | --- a/kernel/kmod.c |
3515 | +++ b/kernel/kmod.c | |
5bd2511a | 3516 | @@ -290,6 +290,7 @@ int usermodehelper_disable(void) |
2380c486 JR |
3517 | usermodehelper_disabled = 0; |
3518 | return -EAGAIN; | |
3519 | } | |
3520 | +EXPORT_SYMBOL_GPL(usermodehelper_disable); | |
3521 | ||
3522 | /** | |
3523 | * usermodehelper_enable - allow new helpers to be started again | |
5bd2511a | 3524 | @@ -298,6 +299,7 @@ void usermodehelper_enable(void) |
2380c486 JR |
3525 | { |
3526 | usermodehelper_disabled = 0; | |
3527 | } | |
3528 | +EXPORT_SYMBOL_GPL(usermodehelper_enable); | |
3529 | ||
3530 | static void helper_lock(void) | |
3531 | { | |
92bca44c | 3532 | diff --git a/kernel/pid.c b/kernel/pid.c |
5bd2511a | 3533 | index e9fd8c1..32d2697 100644 |
92bca44c AM |
3534 | --- a/kernel/pid.c |
3535 | +++ b/kernel/pid.c | |
de6743ae | 3536 | @@ -384,6 +384,7 @@ struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns) |
92bca44c AM |
3537 | { |
3538 | return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID); | |
3539 | } | |
3540 | +EXPORT_SYMBOL_GPL(find_task_by_pid_ns); | |
3541 | ||
3542 | struct task_struct *find_task_by_vpid(pid_t vnr) | |
3543 | { | |
2380c486 | 3544 | diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig |
85eb3c9d | 3545 | index ca6066a..27524f6 100644 |
2380c486 JR |
3546 | --- a/kernel/power/Kconfig |
3547 | +++ b/kernel/power/Kconfig | |
de6743ae | 3548 | @@ -47,6 +47,13 @@ config CAN_PM_TRACE |
2380c486 JR |
3549 | def_bool y |
3550 | depends on PM_DEBUG && PM_SLEEP && EXPERIMENTAL | |
3551 | ||
3552 | +config FS_FREEZER_DEBUG | |
3553 | + bool "Filesystem freezer debugging" | |
3554 | + depends on PM_DEBUG | |
3555 | + default n | |
3556 | + ---help--- | |
3557 | + This option enables debugging of the filesystem freezing code. | |
3558 | + | |
3559 | config PM_TRACE | |
3560 | bool | |
3561 | help | |
85eb3c9d | 3562 | @@ -198,6 +205,238 @@ config PM_STD_PARTITION |
2380c486 JR |
3563 | suspended image to. It will simply pick the first available swap |
3564 | device. | |
3565 | ||
3566 | +menuconfig TOI_CORE | |
3567 | + tristate "Enhanced Hibernation (TuxOnIce)" | |
3568 | + depends on HIBERNATION | |
3569 | + default y | |
3570 | + ---help--- | |
3571 | + TuxOnIce is the 'new and improved' suspend support. | |
3572 | + | |
3573 | + See the TuxOnIce home page (tuxonice.net) | |
3574 | + for FAQs, HOWTOs and other documentation. | |
3575 | + | |
3576 | + comment "Image Storage (you need at least one allocator)" | |
3577 | + depends on TOI_CORE | |
3578 | + | |
3579 | + config TOI_FILE | |
3580 | + tristate "File Allocator" | |
3581 | + depends on TOI_CORE | |
3582 | + default y | |
3583 | + ---help--- | |
3584 | + This option enables support for storing an image in a | |
5dd10c98 AM |
3585 | + simple file. You might want this if your swap is |
3586 | + sometimes full enough that you don't have enough spare | |
3587 | + space to store an image. | |
2380c486 JR |
3588 | + |
3589 | + config TOI_SWAP | |
3590 | + tristate "Swap Allocator" | |
3591 | + depends on TOI_CORE && SWAP | |
3592 | + default y | |
3593 | + ---help--- | |
3594 | + This option enables support for storing an image in your | |
3595 | + swap space. | |
3596 | + | |
3597 | + comment "General Options" | |
3598 | + depends on TOI_CORE | |
3599 | + | |
2380c486 JR |
3600 | + config TOI_CRYPTO |
3601 | + tristate "Compression support" | |
3602 | + depends on TOI_CORE && CRYPTO | |
3603 | + default y | |
3604 | + ---help--- | |
3605 | + This option adds support for using cryptoapi compression | |
9474138d AM |
3606 | + algorithms. Compression is particularly useful as it can |
3607 | + more than double your suspend and resume speed (depending | |
3608 | + upon how well your image compresses). | |
2380c486 JR |
3609 | + |
3610 | + You probably want this, so say Y here. | |
3611 | + | |
3612 | + comment "No compression support available without Cryptoapi support." | |
3613 | + depends on TOI_CORE && !CRYPTO | |
3614 | + | |
3615 | + config TOI_USERUI | |
3616 | + tristate "Userspace User Interface support" | |
3617 | + depends on TOI_CORE && NET && (VT || SERIAL_CONSOLE) | |
3618 | + default y | |
3619 | + ---help--- | |
3620 | + This option enabled support for a userspace based user interface | |
3621 | + to TuxOnIce, which allows you to have a nice display while suspending | |
3622 | + and resuming, and also enables features such as pressing escape to | |
3623 | + cancel a cycle or interactive debugging. | |
3624 | + | |
3625 | + config TOI_USERUI_DEFAULT_PATH | |
3626 | + string "Default userui program location" | |
e999739a | 3627 | + default "/usr/local/sbin/tuxoniceui_text" |
2380c486 JR |
3628 | + depends on TOI_USERUI |
3629 | + ---help--- | |
3630 | + This entry allows you to specify a default path to the userui binary. | |
3631 | + | |
3632 | + config TOI_KEEP_IMAGE | |
3633 | + bool "Allow Keep Image Mode" | |
3634 | + depends on TOI_CORE | |
3635 | + ---help--- | |
3636 | + This option allows you to keep and image and reuse it. It is intended | |
3637 | + __ONLY__ for use with systems where all filesystems are mounted read- | |
3638 | + only (kiosks, for example). To use it, compile this option in and boot | |
3639 | + normally. Set the KEEP_IMAGE flag in /sys/power/tuxonice and suspend. | |
3640 | + When you resume, the image will not be removed. You will be unable to turn | |
3641 | + off swap partitions (assuming you are using the swap allocator), but future | |
3642 | + suspends simply do a power-down. The image can be updated using the | |
3643 | + kernel command line parameter suspend_act= to turn off the keep image | |
3644 | + bit. Keep image mode is a little less user friendly on purpose - it | |
3645 | + should not be used without thought! | |
3646 | + | |
3647 | + config TOI_REPLACE_SWSUSP | |
3648 | + bool "Replace swsusp by default" | |
3649 | + default y | |
3650 | + depends on TOI_CORE | |
3651 | + ---help--- | |
3652 | + TuxOnIce can replace swsusp. This option makes that the default state, | |
3653 | + requiring you to echo 0 > /sys/power/tuxonice/replace_swsusp if you want | |
3654 | + to use the vanilla kernel functionality. Note that your initrd/ramfs will | |
3655 | + need to do this before trying to resume, too. | |
3656 | + With overriding swsusp enabled, echoing disk to /sys/power/state will | |
3657 | + start a TuxOnIce cycle. If resume= doesn't specify an allocator and both | |
3658 | + the swap and file allocators are compiled in, the swap allocator will be | |
3659 | + used by default. | |
3660 | + | |
3661 | + config TOI_IGNORE_LATE_INITCALL | |
3662 | + bool "Wait for initrd/ramfs to run, by default" | |
3663 | + default n | |
3664 | + depends on TOI_CORE | |
3665 | + ---help--- | |
3666 | + When booting, TuxOnIce can check for an image and start to resume prior | |
3667 | + to any initrd/ramfs running (via a late initcall). | |
3668 | + | |
3669 | + If you don't have an initrd/ramfs, this is what you want to happen - | |
3670 | + otherwise you won't be able to safely resume. You should set this option | |
3671 | + to 'No'. | |
3672 | + | |
3673 | + If, however, you want your initrd/ramfs to run anyway before resuming, | |
3674 | + you need to tell TuxOnIce to ignore that earlier opportunity to resume. | |
3675 | + This can be done either by using this compile time option, or by | |
3676 | + overriding this option with the boot-time parameter toi_initramfs_resume_only=1. | |
3677 | + | |
3678 | + Note that if TuxOnIce can't resume at the earlier opportunity, the | |
3679 | + value of this option won't matter - the initramfs/initrd (if any) will | |
3680 | + run anyway. | |
3681 | + | |
3682 | + menuconfig TOI_CLUSTER | |
3683 | + tristate "Cluster support" | |
3684 | + default n | |
3685 | + depends on TOI_CORE && NET && BROKEN | |
3686 | + ---help--- | |
3687 | + Support for linking multiple machines in a cluster so that they suspend | |
3688 | + and resume together. | |
3689 | + | |
3690 | + config TOI_DEFAULT_CLUSTER_INTERFACE | |
3691 | + string "Default cluster interface" | |
3692 | + depends on TOI_CLUSTER | |
3693 | + ---help--- | |
3694 | + The default interface on which to communicate with other nodes in | |
3695 | + the cluster. | |
3696 | + | |
3697 | + If no value is set here, cluster support will be disabled by default. | |
3698 | + | |
3699 | + config TOI_DEFAULT_CLUSTER_KEY | |
3700 | + string "Default cluster key" | |
3701 | + default "Default" | |
3702 | + depends on TOI_CLUSTER | |
3703 | + ---help--- | |
3704 | + The default key used by this node. All nodes in the same cluster | |
3705 | + have the same key. Multiple clusters may coexist on the same lan | |
3706 | + by using different values for this key. | |
3707 | + | |
3708 | + config TOI_CLUSTER_IMAGE_TIMEOUT | |
3709 | + int "Timeout when checking for image" | |
3710 | + default 15 | |
3711 | + depends on TOI_CLUSTER | |
3712 | + ---help--- | |
3713 | + Timeout (seconds) before continuing to boot when waiting to see | |
3714 | + whether other nodes might have an image. Set to -1 to wait | |
3715 | + indefinitely. In WAIT_UNTIL_NODES is non zero, we might continue | |
3716 | + booting sooner than this timeout. | |
3717 | + | |
3718 | + config TOI_CLUSTER_WAIT_UNTIL_NODES | |
3719 | + int "Nodes without image before continuing" | |
3720 | + default 0 | |
3721 | + depends on TOI_CLUSTER | |
3722 | + ---help--- | |
3723 | + When booting and no image is found, we wait to see if other nodes | |
3724 | + have an image before continuing to boot. This value lets us | |
3725 | + continue after seeing a certain number of nodes without an image, | |
3726 | + instead of continuing to wait for the timeout. Set to 0 to only | |
3727 | + use the timeout. | |
3728 | + | |
3729 | + config TOI_DEFAULT_CLUSTER_PRE_HIBERNATE | |
3730 | + string "Default pre-hibernate script" | |
3731 | + depends on TOI_CLUSTER | |
3732 | + ---help--- | |
3733 | + The default script to be called when starting to hibernate. | |
3734 | + | |
3735 | + config TOI_DEFAULT_CLUSTER_POST_HIBERNATE | |
3736 | + string "Default post-hibernate script" | |
3737 | + depends on TOI_CLUSTER | |
3738 | + ---help--- | |
3739 | + The default script to be called after resuming from hibernation. | |
3740 | + | |
3741 | + config TOI_DEFAULT_WAIT | |
3742 | + int "Default waiting time for emergency boot messages" | |
3743 | + default "25" | |
3744 | + range -1 32768 | |
3745 | + depends on TOI_CORE | |
3746 | + help | |
3747 | + TuxOnIce can display warnings very early in the process of resuming, | |
3748 | + if (for example) it appears that you have booted a kernel that doesn't | |
3749 | + match an image on disk. It can then give you the opportunity to either | |
3750 | + continue booting that kernel, or reboot the machine. This option can be | |
3751 | + used to control how long to wait in such circumstances. -1 means wait | |
3752 | + forever. 0 means don't wait at all (do the default action, which will | |
3753 | + generally be to continue booting and remove the image). Values of 1 or | |
3754 | + more indicate a number of seconds (up to 255) to wait before doing the | |
3755 | + default. | |
3756 | + | |
3757 | + config TOI_DEFAULT_EXTRA_PAGES_ALLOWANCE | |
3758 | + int "Default extra pages allowance" | |
3759 | + default "2000" | |
3760 | + range 500 32768 | |
3761 | + depends on TOI_CORE | |
3762 | + help | |
3763 | + This value controls the default for the allowance TuxOnIce makes for | |
3764 | + drivers to allocate extra memory during the atomic copy. The default | |
3765 | + value of 2000 will be okay in most cases. If you are using | |
3766 | + DRI, the easiest way to find what value to use is to try to hibernate | |
3767 | + and look at how many pages were actually needed in the sysfs entry | |
3768 | + /sys/power/tuxonice/debug_info (first number on the last line), adding | |
3769 | + a little extra because the value is not always the same. | |
3770 | + | |
3771 | + config TOI_CHECKSUM | |
3772 | + bool "Checksum pageset2" | |
3773 | + default n | |
3774 | + depends on TOI_CORE | |
3775 | + select CRYPTO | |
3776 | + select CRYPTO_ALGAPI | |
3777 | + select CRYPTO_MD4 | |
3778 | + ---help--- | |
3779 | + Adds support for checksumming pageset2 pages, to ensure you really get an | |
3780 | + atomic copy. Since some filesystems (XFS especially) change metadata even | |
3781 | + when there's no other activity, we need this to check for pages that have | |
3782 | + been changed while we were saving the page cache. If your debugging output | |
3783 | + always says no pages were resaved, you may be able to safely disable this | |
3784 | + option. | |
3785 | + | |
3786 | +config TOI | |
3787 | + bool | |
3788 | + depends on TOI_CORE!=n | |
3789 | + default y | |
3790 | + | |
3791 | +config TOI_EXPORTS | |
3792 | + bool | |
3793 | + depends on TOI_SWAP=m || TOI_FILE=m || \ | |
3794 | + TOI_CRYPTO=m || TOI_CLUSTER=m || \ | |
3795 | + TOI_USERUI=m || TOI_CORE=m | |
3796 | + default y | |
3797 | + | |
3798 | config APM_EMULATION | |
3799 | tristate "Advanced Power Management Emulation" | |
3800 | depends on PM && SYS_SUPPORTS_APM_EMULATION | |
3801 | diff --git a/kernel/power/Makefile b/kernel/power/Makefile | |
85eb3c9d | 3802 | index f9063c6..a3a7444 100644 |
2380c486 JR |
3803 | --- a/kernel/power/Makefile |
3804 | +++ b/kernel/power/Makefile | |
7e46296a | 3805 | @@ -3,6 +3,35 @@ ifeq ($(CONFIG_PM_DEBUG),y) |
2380c486 JR |
3806 | EXTRA_CFLAGS += -DDEBUG |
3807 | endif | |
3808 | ||
7e46296a | 3809 | +tuxonice_core-y := tuxonice_modules.o |
2380c486 JR |
3810 | + |
3811 | +obj-$(CONFIG_TOI) += tuxonice_builtin.o | |
3812 | + | |
92bca44c | 3813 | +tuxonice_core-$(CONFIG_PM_DEBUG) += tuxonice_alloc.o |
2380c486 | 3814 | + |
7e46296a AM |
3815 | +# Compile these in after allocation debugging, if used. |
3816 | + | |
3817 | +tuxonice_core-y += tuxonice_sysfs.o tuxonice_highlevel.o \ | |
3818 | + tuxonice_io.o tuxonice_pagedir.o tuxonice_prepare_image.o \ | |
3819 | + tuxonice_extent.o tuxonice_pageflags.o tuxonice_ui.o \ | |
3820 | + tuxonice_power_off.o tuxonice_atomic_copy.o | |
3821 | + | |
92bca44c | 3822 | +tuxonice_core-$(CONFIG_TOI_CHECKSUM) += tuxonice_checksum.o |
2380c486 | 3823 | + |
92bca44c | 3824 | +tuxonice_core-$(CONFIG_NET) += tuxonice_storage.o tuxonice_netlink.o |
2380c486 JR |
3825 | + |
3826 | +obj-$(CONFIG_TOI_CORE) += tuxonice_core.o | |
3827 | +obj-$(CONFIG_TOI_CRYPTO) += tuxonice_compress.o | |
3828 | + | |
7e46296a AM |
3829 | +tuxonice_bio-y := tuxonice_bio_core.o tuxonice_bio_chains.o \ |
3830 | + tuxonice_bio_signature.o | |
3831 | + | |
3832 | +obj-$(CONFIG_TOI_SWAP) += tuxonice_bio.o tuxonice_swap.o | |
3833 | +obj-$(CONFIG_TOI_FILE) += tuxonice_bio.o tuxonice_file.o | |
2380c486 JR |
3834 | +obj-$(CONFIG_TOI_CLUSTER) += tuxonice_cluster.o |
3835 | + | |
3836 | +obj-$(CONFIG_TOI_USERUI) += tuxonice_userui.o | |
3837 | + | |
3838 | obj-$(CONFIG_PM) += main.o | |
3839 | obj-$(CONFIG_PM_SLEEP) += console.o | |
3840 | obj-$(CONFIG_FREEZER) += process.o | |
7e46296a | 3841 | diff --git a/kernel/power/console.c b/kernel/power/console.c |
5dd10c98 | 3842 | index 218e5af..95a6bdc 100644 |
7e46296a AM |
3843 | --- a/kernel/power/console.c |
3844 | +++ b/kernel/power/console.c | |
5dd10c98 AM |
3845 | @@ -24,6 +24,7 @@ int pm_prepare_console(void) |
3846 | orig_kmsg = vt_kmsg_redirect(SUSPEND_CONSOLE); | |
7e46296a AM |
3847 | return 0; |
3848 | } | |
3849 | +EXPORT_SYMBOL_GPL(pm_prepare_console); | |
3850 | ||
3851 | void pm_restore_console(void) | |
3852 | { | |
5dd10c98 AM |
3853 | @@ -32,4 +33,5 @@ void pm_restore_console(void) |
3854 | vt_kmsg_redirect(orig_kmsg); | |
7e46296a AM |
3855 | } |
3856 | } | |
3857 | +EXPORT_SYMBOL_GPL(pm_restore_console); | |
3858 | #endif | |
92bca44c | 3859 | diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c |
de6743ae | 3860 | index aa9e916..4a836b4 100644 |
92bca44c AM |
3861 | --- a/kernel/power/hibernate.c |
3862 | +++ b/kernel/power/hibernate.c | |
de6743ae | 3863 | @@ -26,11 +26,12 @@ |
9474138d AM |
3864 | #include <scsi/scsi_scan.h> |
3865 | #include <asm/suspend.h> | |
2380c486 | 3866 | |
9474138d | 3867 | -#include "power.h" |
2380c486 JR |
3868 | +#include "tuxonice.h" |
3869 | ||
92bca44c | 3870 | |
2380c486 JR |
3871 | static int noresume = 0; |
3872 | -static char resume_file[256] = CONFIG_PM_STD_PARTITION; | |
3873 | +char resume_file[256] = CONFIG_PM_STD_PARTITION; | |
3874 | +EXPORT_SYMBOL_GPL(resume_file); | |
2380c486 JR |
3875 | dev_t swsusp_resume_device; |
3876 | sector_t swsusp_resume_block; | |
5dd10c98 | 3877 | int in_suspend __nosavedata = 0; |
de6743ae | 3878 | @@ -117,55 +118,60 @@ static int hibernation_test(int level) { return 0; } |
2380c486 JR |
3879 | * hibernation |
3880 | */ | |
3881 | ||
3882 | -static int platform_begin(int platform_mode) | |
3883 | +int platform_begin(int platform_mode) | |
3884 | { | |
3885 | return (platform_mode && hibernation_ops) ? | |
3886 | hibernation_ops->begin() : 0; | |
3887 | } | |
3888 | +EXPORT_SYMBOL_GPL(platform_begin); | |
3889 | ||
3890 | /** | |
3891 | * platform_end - tell the platform driver that we've entered the | |
3892 | * working state | |
3893 | */ | |
3894 | ||
3895 | -static void platform_end(int platform_mode) | |
3896 | +void platform_end(int platform_mode) | |
3897 | { | |
3898 | if (platform_mode && hibernation_ops) | |
3899 | hibernation_ops->end(); | |
3900 | } | |
3901 | +EXPORT_SYMBOL_GPL(platform_end); | |
3902 | ||
3903 | /** | |
3904 | * platform_pre_snapshot - prepare the machine for hibernation using the | |
3905 | * platform driver if so configured and return an error code if it fails | |
3906 | */ | |
3907 | ||
3908 | -static int platform_pre_snapshot(int platform_mode) | |
3909 | +int platform_pre_snapshot(int platform_mode) | |
3910 | { | |
3911 | return (platform_mode && hibernation_ops) ? | |
3912 | hibernation_ops->pre_snapshot() : 0; | |
3913 | } | |
3914 | +EXPORT_SYMBOL_GPL(platform_pre_snapshot); | |
3915 | ||
3916 | /** | |
3917 | * platform_leave - prepare the machine for switching to the normal mode | |
3918 | * of operation using the platform driver (called with interrupts disabled) | |
3919 | */ | |
3920 | ||
3921 | -static void platform_leave(int platform_mode) | |
3922 | +void platform_leave(int platform_mode) | |
3923 | { | |
3924 | if (platform_mode && hibernation_ops) | |
3925 | hibernation_ops->leave(); | |
3926 | } | |
3927 | +EXPORT_SYMBOL_GPL(platform_leave); | |
3928 | ||
3929 | /** | |
3930 | * platform_finish - switch the machine to the normal mode of operation | |
3931 | * using the platform driver (must be called after platform_prepare()) | |
3932 | */ | |
3933 | ||
3934 | -static void platform_finish(int platform_mode) | |
3935 | +void platform_finish(int platform_mode) | |
3936 | { | |
3937 | if (platform_mode && hibernation_ops) | |
3938 | hibernation_ops->finish(); | |
3939 | } | |
3940 | +EXPORT_SYMBOL_GPL(platform_finish); | |
3941 | ||
3942 | /** | |
3943 | * platform_pre_restore - prepare the platform for the restoration from a | |
de6743ae | 3944 | @@ -173,11 +179,12 @@ static void platform_finish(int platform_mode) |
2380c486 JR |
3945 | * called, platform_restore_cleanup() must be called. |
3946 | */ | |
3947 | ||
3948 | -static int platform_pre_restore(int platform_mode) | |
3949 | +int platform_pre_restore(int platform_mode) | |
3950 | { | |
3951 | return (platform_mode && hibernation_ops) ? | |
3952 | hibernation_ops->pre_restore() : 0; | |
3953 | } | |
3954 | +EXPORT_SYMBOL_GPL(platform_pre_restore); | |
3955 | ||
3956 | /** | |
3957 | * platform_restore_cleanup - switch the platform to the normal mode of | |
de6743ae | 3958 | @@ -186,22 +193,24 @@ static int platform_pre_restore(int platform_mode) |
2380c486 JR |
3959 | * regardless of the result of platform_pre_restore(). |
3960 | */ | |
3961 | ||
3962 | -static void platform_restore_cleanup(int platform_mode) | |
3963 | +void platform_restore_cleanup(int platform_mode) | |
3964 | { | |
3965 | if (platform_mode && hibernation_ops) | |
3966 | hibernation_ops->restore_cleanup(); | |
3967 | } | |
3968 | +EXPORT_SYMBOL_GPL(platform_restore_cleanup); | |
3969 | ||
3970 | /** | |
3971 | * platform_recover - recover the platform from a failure to suspend | |
3972 | * devices. | |
3973 | */ | |
3974 | ||
3975 | -static void platform_recover(int platform_mode) | |
3976 | +void platform_recover(int platform_mode) | |
3977 | { | |
3978 | if (platform_mode && hibernation_ops && hibernation_ops->recover) | |
3979 | hibernation_ops->recover(); | |
3980 | } | |
3981 | +EXPORT_SYMBOL_GPL(platform_recover); | |
3982 | ||
3983 | /** | |
5dd10c98 | 3984 | * swsusp_show_speed - print the time elapsed between two events. |
de6743ae | 3985 | @@ -535,6 +544,7 @@ int hibernation_platform_enter(void) |
92bca44c | 3986 | |
2380c486 JR |
3987 | return error; |
3988 | } | |
3989 | +EXPORT_SYMBOL_GPL(hibernation_platform_enter); | |
3990 | ||
3991 | /** | |
92bca44c | 3992 | * power_down - Shut the machine down for hibernation. |
de6743ae | 3993 | @@ -586,6 +596,9 @@ int hibernate(void) |
2380c486 JR |
3994 | { |
3995 | int error; | |
3996 | ||
3997 | + if (test_action_state(TOI_REPLACE_SWSUSP)) | |
9474138d | 3998 | + return try_tuxonice_hibernate(); |
2380c486 JR |
3999 | + |
4000 | mutex_lock(&pm_mutex); | |
4001 | /* The snapshot device should not be opened while we're running */ | |
4002 | if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { | |
de6743ae | 4003 | @@ -666,11 +679,19 @@ int hibernate(void) |
2380c486 JR |
4004 | * |
4005 | */ | |
4006 | ||
4007 | -static int software_resume(void) | |
4008 | +int software_resume(void) | |
4009 | { | |
4010 | int error; | |
4011 | unsigned int flags; | |
92bca44c | 4012 | |
2380c486 JR |
4013 | + resume_attempted = 1; |
4014 | + | |
4015 | + /* | |
4016 | + * We can't know (until an image header - if any - is loaded), whether | |
4017 | + * we did override swsusp. We therefore ensure that both are tried. | |
4018 | + */ | |
9474138d | 4019 | + try_tuxonice_resume(); |
92bca44c | 4020 | + |
2380c486 JR |
4021 | /* |
4022 | * If the user said "noresume".. bail out early. | |
92bca44c | 4023 | */ |
de6743ae | 4024 | @@ -999,6 +1020,7 @@ static int __init resume_offset_setup(char *str) |
2380c486 JR |
4025 | static int __init noresume_setup(char *str) |
4026 | { | |
4027 | noresume = 1; | |
4028 | + set_toi_state(TOI_NORESUME_SPECIFIED); | |
4029 | return 1; | |
4030 | } | |
4031 | ||
4032 | diff --git a/kernel/power/main.c b/kernel/power/main.c | |
de6743ae | 4033 | index b58800b..d23adf9 100644 |
2380c486 JR |
4034 | --- a/kernel/power/main.c |
4035 | +++ b/kernel/power/main.c | |
7e46296a | 4036 | @@ -16,6 +16,7 @@ |
2380c486 JR |
4037 | #include "power.h" |
4038 | ||
4039 | DEFINE_MUTEX(pm_mutex); | |
4040 | +EXPORT_SYMBOL_GPL(pm_mutex); | |
4041 | ||
4042 | unsigned int pm_flags; | |
4043 | EXPORT_SYMBOL(pm_flags); | |
7e46296a | 4044 | @@ -24,7 +25,8 @@ EXPORT_SYMBOL(pm_flags); |
2380c486 JR |
4045 | |
4046 | /* Routines for PM-transition notifications */ | |
4047 | ||
4048 | -static BLOCKING_NOTIFIER_HEAD(pm_chain_head); | |
4049 | +BLOCKING_NOTIFIER_HEAD(pm_chain_head); | |
4050 | +EXPORT_SYMBOL_GPL(pm_chain_head); | |
4051 | ||
4052 | int register_pm_notifier(struct notifier_block *nb) | |
4053 | { | |
7e46296a | 4054 | @@ -43,6 +45,7 @@ int pm_notifier_call_chain(unsigned long val) |
92bca44c AM |
4055 | return (blocking_notifier_call_chain(&pm_chain_head, val, NULL) |
4056 | == NOTIFY_BAD) ? -EINVAL : 0; | |
2380c486 JR |
4057 | } |
4058 | +EXPORT_SYMBOL_GPL(pm_notifier_call_chain); | |
4059 | ||
de6743ae AM |
4060 | /* If set, devices may be suspended and resumed asynchronously. */ |
4061 | int pm_async_enabled = 1; | |
4062 | @@ -136,6 +139,7 @@ power_attr(pm_test); | |
92bca44c | 4063 | #endif /* CONFIG_PM_SLEEP */ |
2380c486 JR |
4064 | |
4065 | struct kobject *power_kobj; | |
4066 | +EXPORT_SYMBOL_GPL(power_kobj); | |
4067 | ||
4068 | /** | |
4069 | * state - control system power state. | |
4070 | diff --git a/kernel/power/power.h b/kernel/power/power.h | |
85eb3c9d | 4071 | index 006270f..28010f4 100644 |
2380c486 JR |
4072 | --- a/kernel/power/power.h |
4073 | +++ b/kernel/power/power.h | |
9474138d | 4074 | @@ -31,8 +31,12 @@ static inline char *check_image_kernel(struct swsusp_info *info) |
2380c486 JR |
4075 | return arch_hibernation_header_restore(info) ? |
4076 | "architecture specific data" : NULL; | |
4077 | } | |
4078 | +#else | |
e999739a | 4079 | +extern char *check_image_kernel(struct swsusp_info *info); |
2380c486 | 4080 | #endif /* CONFIG_ARCH_HIBERNATION_HEADER */ |
e999739a | 4081 | +extern int init_header(struct swsusp_info *info); |
2380c486 JR |
4082 | |
4083 | +extern char resume_file[256]; | |
4084 | /* | |
4085 | * Keep some memory free so that I/O operations can succeed without paging | |
4086 | * [Might this be more than 4 MB?] | |
9474138d | 4087 | @@ -49,6 +53,7 @@ static inline char *check_image_kernel(struct swsusp_info *info) |
2380c486 JR |
4088 | extern int hibernation_snapshot(int platform_mode); |
4089 | extern int hibernation_restore(int platform_mode); | |
4090 | extern int hibernation_platform_enter(void); | |
4091 | +extern void platform_recover(int platform_mode); | |
4092 | #endif | |
4093 | ||
4094 | extern int pfn_is_nosave(unsigned long); | |
9474138d | 4095 | @@ -63,6 +68,8 @@ static struct kobj_attribute _name##_attr = { \ |
2380c486 JR |
4096 | .store = _name##_store, \ |
4097 | } | |
4098 | ||
4099 | +extern struct pbe *restore_pblist; | |
4100 | + | |
4101 | /* Preferred image size in bytes (default 500 MB) */ | |
4102 | extern unsigned long image_size; | |
4103 | extern int in_suspend; | |
85eb3c9d | 4104 | @@ -233,3 +240,93 @@ static inline void suspend_thaw_processes(void) |
2380c486 JR |
4105 | { |
4106 | } | |
4107 | #endif | |
4108 | + | |
4109 | +extern struct page *saveable_page(struct zone *z, unsigned long p); | |
4110 | +#ifdef CONFIG_HIGHMEM | |
4111 | +extern struct page *saveable_highmem_page(struct zone *z, unsigned long p); | |
4112 | +#else | |
4113 | +static | |
4114 | +inline struct page *saveable_highmem_page(struct zone *z, unsigned long p) | |
4115 | +{ | |
4116 | + return NULL; | |
4117 | +} | |
4118 | +#endif | |
4119 | + | |
4120 | +#define PBES_PER_PAGE (PAGE_SIZE / sizeof(struct pbe)) | |
4121 | +extern struct list_head nosave_regions; | |
4122 | + | |
4123 | +/** | |
4124 | + * This structure represents a range of page frames the contents of which | |
4125 | + * should not be saved during the suspend. | |
4126 | + */ | |
4127 | + | |
4128 | +struct nosave_region { | |
4129 | + struct list_head list; | |
4130 | + unsigned long start_pfn; | |
4131 | + unsigned long end_pfn; | |
4132 | +}; | |
4133 | + | |
4134 | +#ifndef PHYS_PFN_OFFSET | |
4135 | +#define PHYS_PFN_OFFSET 0 | |
4136 | +#endif | |
4137 | + | |
4138 | +#define ZONE_START(thiszone) ((thiszone)->zone_start_pfn - PHYS_PFN_OFFSET) | |
4139 | + | |
4140 | +#define BM_END_OF_MAP (~0UL) | |
4141 | + | |
7e46296a | 4142 | +#define BM_BITS_PER_BLOCK (PAGE_SIZE * BITS_PER_BYTE) |
2380c486 JR |
4143 | + |
4144 | +struct bm_block { | |
4145 | + struct list_head hook; /* hook into a list of bitmap blocks */ | |
4146 | + unsigned long start_pfn; /* pfn represented by the first bit */ | |
4147 | + unsigned long end_pfn; /* pfn represented by the last bit plus 1 */ | |
4148 | + unsigned long *data; /* bitmap representing pages */ | |
4149 | +}; | |
4150 | + | |
4151 | +/* struct bm_position is used for browsing memory bitmaps */ | |
4152 | + | |
4153 | +struct bm_position { | |
4154 | + struct bm_block *block; | |
4155 | + int bit; | |
4156 | +}; | |
4157 | + | |
4158 | +struct memory_bitmap { | |
4159 | + struct list_head blocks; /* list of bitmap blocks */ | |
4160 | + struct linked_page *p_list; /* list of pages used to store zone | |
4161 | + * bitmap objects and bitmap block | |
4162 | + * objects | |
4163 | + */ | |
85eb3c9d AM |
4164 | + struct bm_position *states; /* most recently used bit position */ |
4165 | + int num_states; /* when iterating over a bitmap and | |
4166 | + * number of states we support. | |
2380c486 JR |
4167 | + */ |
4168 | +}; | |
4169 | + | |
2380c486 JR |
4170 | +extern int memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, |
4171 | + int safe_needed); | |
85eb3c9d AM |
4172 | +extern int memory_bm_create_index(struct memory_bitmap *bm, gfp_t gfp_mask, |
4173 | + int safe_needed, int index); | |
2380c486 JR |
4174 | +extern void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free); |
4175 | +extern void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn); | |
4176 | +extern void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn); | |
85eb3c9d | 4177 | +extern void memory_bm_clear_bit_index(struct memory_bitmap *bm, unsigned long pfn, int index); |
2380c486 | 4178 | +extern int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn); |
85eb3c9d | 4179 | +extern int memory_bm_test_bit_index(struct memory_bitmap *bm, unsigned long pfn, int index); |
2380c486 | 4180 | +extern unsigned long memory_bm_next_pfn(struct memory_bitmap *bm); |
85eb3c9d AM |
4181 | +extern unsigned long memory_bm_next_pfn_index(struct memory_bitmap *bm, |
4182 | + int index); | |
2380c486 JR |
4183 | +extern void memory_bm_position_reset(struct memory_bitmap *bm); |
4184 | +extern void memory_bm_clear(struct memory_bitmap *bm); | |
4185 | +extern void memory_bm_copy(struct memory_bitmap *source, | |
4186 | + struct memory_bitmap *dest); | |
4187 | +extern void memory_bm_dup(struct memory_bitmap *source, | |
4188 | + struct memory_bitmap *dest); | |
85eb3c9d | 4189 | +extern int memory_bm_set_iterators(struct memory_bitmap *bm, int number); |
2380c486 JR |
4190 | + |
4191 | +#ifdef CONFIG_TOI | |
4192 | +struct toi_module_ops; | |
4193 | +extern int memory_bm_read(struct memory_bitmap *bm, int (*rw_chunk) | |
4194 | + (int rw, struct toi_module_ops *owner, char *buffer, int buffer_size)); | |
4195 | +extern int memory_bm_write(struct memory_bitmap *bm, int (*rw_chunk) | |
4196 | + (int rw, struct toi_module_ops *owner, char *buffer, int buffer_size)); | |
4197 | +#endif | |
2380c486 | 4198 | diff --git a/kernel/power/process.c b/kernel/power/process.c |
de6743ae | 4199 | index 71ae290..8733143 100644 |
2380c486 JR |
4200 | --- a/kernel/power/process.c |
4201 | +++ b/kernel/power/process.c | |
5dd10c98 | 4202 | @@ -15,6 +15,13 @@ |
2380c486 JR |
4203 | #include <linux/syscalls.h> |
4204 | #include <linux/freezer.h> | |
5dd10c98 | 4205 | #include <linux/delay.h> |
2380c486 JR |
4206 | +#include <linux/buffer_head.h> |
4207 | + | |
4208 | +int freezer_state; | |
4209 | +EXPORT_SYMBOL_GPL(freezer_state); | |
92bca44c | 4210 | + |
de6743ae | 4211 | +int freezer_sync = 1; |
92bca44c | 4212 | +EXPORT_SYMBOL_GPL(freezer_sync); |
2380c486 JR |
4213 | |
4214 | /* | |
4215 | * Timeout for stopping processes | |
de6743ae | 4216 | @@ -112,17 +119,26 @@ int freeze_processes(void) |
2380c486 JR |
4217 | { |
4218 | int error; | |
4219 | ||
4220 | - printk("Freezing user space processes ... "); | |
4221 | + printk(KERN_INFO "Stopping fuse filesystems.\n"); | |
4222 | + freeze_filesystems(FS_FREEZER_FUSE); | |
4223 | + freezer_state = FREEZER_FILESYSTEMS_FROZEN; | |
4224 | + printk(KERN_INFO "Freezing user space processes ... "); | |
4225 | error = try_to_freeze_tasks(true); | |
4226 | if (error) | |
4227 | goto Exit; | |
9474138d | 4228 | printk("done.\n"); |
2380c486 JR |
4229 | |
4230 | - printk("Freezing remaining freezable tasks ... "); | |
92bca44c AM |
4231 | + if (freezer_sync) |
4232 | + sys_sync(); | |
2380c486 JR |
4233 | + printk(KERN_INFO "Stopping normal filesystems.\n"); |
4234 | + freeze_filesystems(FS_FREEZER_NORMAL); | |
4235 | + freezer_state = FREEZER_USERSPACE_FROZEN; | |
4236 | + printk(KERN_INFO "Freezing remaining freezable tasks ... "); | |
4237 | error = try_to_freeze_tasks(false); | |
4238 | if (error) | |
4239 | goto Exit; | |
4240 | printk("done."); | |
4241 | + freezer_state = FREEZER_FULLY_ON; | |
92bca44c AM |
4242 | |
4243 | oom_killer_disable(); | |
2380c486 | 4244 | Exit: |
de6743ae | 4245 | @@ -131,6 +147,7 @@ int freeze_processes(void) |
92bca44c | 4246 | |
2380c486 JR |
4247 | return error; |
4248 | } | |
4249 | +EXPORT_SYMBOL_GPL(freeze_processes); | |
4250 | ||
4251 | static void thaw_tasks(bool nosig_only) | |
4252 | { | |
de6743ae | 4253 | @@ -154,12 +171,39 @@ static void thaw_tasks(bool nosig_only) |
2380c486 JR |
4254 | |
4255 | void thaw_processes(void) | |
4256 | { | |
2380c486 JR |
4257 | + int old_state = freezer_state; |
4258 | + | |
4259 | + if (old_state == FREEZER_OFF) | |
4260 | + return; | |
4261 | + | |
2380c486 JR |
4262 | + freezer_state = FREEZER_OFF; |
4263 | + | |
92bca44c AM |
4264 | oom_killer_enable(); |
4265 | ||
2380c486 JR |
4266 | + printk(KERN_INFO "Restarting all filesystems ...\n"); |
4267 | + thaw_filesystems(FS_FREEZER_ALL); | |
4268 | + | |
4269 | + printk(KERN_INFO "Restarting tasks ... "); | |
2380c486 JR |
4270 | + if (old_state == FREEZER_FULLY_ON) |
4271 | + thaw_tasks(true); | |
92bca44c AM |
4272 | + |
4273 | printk("Restarting tasks ... "); | |
4274 | - thaw_tasks(true); | |
2380c486 JR |
4275 | thaw_tasks(false); |
4276 | schedule(); | |
4277 | printk("done.\n"); | |
4278 | } | |
4279 | +EXPORT_SYMBOL_GPL(thaw_processes); | |
4280 | ||
4281 | +void thaw_kernel_threads(void) | |
4282 | +{ | |
4283 | + freezer_state = FREEZER_USERSPACE_FROZEN; | |
4284 | + printk(KERN_INFO "Restarting normal filesystems.\n"); | |
4285 | + thaw_filesystems(FS_FREEZER_NORMAL); | |
4286 | + thaw_tasks(true); | |
4287 | +} | |
4288 | + | |
4289 | +/* | |
4290 | + * It's ugly putting this EXPORT down here, but it's necessary so that it | |
4291 | + * doesn't matter whether the fs-freezing patch is applied or not. | |
4292 | + */ | |
4293 | +EXPORT_SYMBOL_GPL(thaw_kernel_threads); | |
4294 | diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c | |
85eb3c9d | 4295 | index 25ce010..1d09a68 100644 |
2380c486 JR |
4296 | --- a/kernel/power/snapshot.c |
4297 | +++ b/kernel/power/snapshot.c | |
de6743ae | 4298 | @@ -35,6 +35,8 @@ |
2380c486 JR |
4299 | #include <asm/io.h> |
4300 | ||
4301 | #include "power.h" | |
4302 | +#include "tuxonice_builtin.h" | |
4303 | +#include "tuxonice_pagedir.h" | |
4304 | ||
4305 | static int swsusp_page_is_free(struct page *); | |
4306 | static void swsusp_set_page_forbidden(struct page *); | |
de6743ae | 4307 | @@ -54,6 +56,10 @@ unsigned long image_size = 500 * 1024 * 1024; |
2380c486 JR |
4308 | * directly to their "original" page frames. |
4309 | */ | |
4310 | struct pbe *restore_pblist; | |
4311 | +EXPORT_SYMBOL_GPL(restore_pblist); | |
4312 | + | |
4313 | +int resume_attempted; | |
4314 | +EXPORT_SYMBOL_GPL(resume_attempted); | |
4315 | ||
4316 | /* Pointer to an auxiliary buffer (1 page) */ | |
4317 | static void *buffer; | |
de6743ae | 4318 | @@ -96,6 +102,9 @@ static void *get_image_page(gfp_t gfp_mask, int safe_needed) |
2380c486 JR |
4319 | |
4320 | unsigned long get_safe_page(gfp_t gfp_mask) | |
4321 | { | |
4322 | + if (toi_running) | |
4323 | + return toi_get_nonconflicting_page(); | |
4324 | + | |
4325 | return (unsigned long)get_image_page(gfp_mask, PG_SAFE); | |
4326 | } | |
4327 | ||
85eb3c9d | 4328 | @@ -232,47 +241,53 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size) |
2380c486 JR |
4329 | * the represented memory area. |
4330 | */ | |
4331 | ||
4332 | -#define BM_END_OF_MAP (~0UL) | |
4333 | - | |
7e46296a | 4334 | -#define BM_BITS_PER_BLOCK (PAGE_SIZE * BITS_PER_BYTE) |
2380c486 JR |
4335 | - |
4336 | -struct bm_block { | |
4337 | - struct list_head hook; /* hook into a list of bitmap blocks */ | |
4338 | - unsigned long start_pfn; /* pfn represented by the first bit */ | |
4339 | - unsigned long end_pfn; /* pfn represented by the last bit plus 1 */ | |
4340 | - unsigned long *data; /* bitmap representing pages */ | |
4341 | -}; | |
4342 | - | |
4343 | static inline unsigned long bm_block_bits(struct bm_block *bb) | |
4344 | { | |
4345 | return bb->end_pfn - bb->start_pfn; | |
4346 | } | |
4347 | ||
4348 | -/* strcut bm_position is used for browsing memory bitmaps */ | |
85eb3c9d AM |
4349 | +/* Functions that operate on memory bitmaps */ |
4350 | ||
2380c486 JR |
4351 | -struct bm_position { |
4352 | - struct bm_block *block; | |
4353 | - int bit; | |
4354 | -}; | |
85eb3c9d AM |
4355 | +void memory_bm_position_reset_index(struct memory_bitmap *bm, int index) |
4356 | +{ | |
4357 | + bm->states[index].block = list_entry(bm->blocks.next, | |
4358 | + struct bm_block, hook); | |
4359 | + bm->states[index].bit = 0; | |
4360 | +} | |
4361 | +EXPORT_SYMBOL_GPL(memory_bm_position_reset_index); | |
4362 | ||
2380c486 JR |
4363 | -struct memory_bitmap { |
4364 | - struct list_head blocks; /* list of bitmap blocks */ | |
4365 | - struct linked_page *p_list; /* list of pages used to store zone | |
4366 | - * bitmap objects and bitmap block | |
4367 | - * objects | |
4368 | - */ | |
4369 | - struct bm_position cur; /* most recently used bit position */ | |
4370 | -}; | |
85eb3c9d AM |
4371 | +void memory_bm_position_reset(struct memory_bitmap *bm) |
4372 | +{ | |
4373 | + int i; | |
4374 | ||
4375 | -/* Functions that operate on memory bitmaps */ | |
4376 | + for (i = 0; i < bm->num_states; i++) { | |
4377 | + bm->states[i].block = list_entry(bm->blocks.next, | |
4378 | + struct bm_block, hook); | |
4379 | + bm->states[i].bit = 0; | |
4380 | + } | |
4381 | +} | |
4382 | +EXPORT_SYMBOL_GPL(memory_bm_position_reset); | |
2380c486 JR |
4383 | |
4384 | -static void memory_bm_position_reset(struct memory_bitmap *bm) | |
85eb3c9d | 4385 | +int memory_bm_set_iterators(struct memory_bitmap *bm, int number) |
2380c486 | 4386 | { |
85eb3c9d AM |
4387 | - bm->cur.block = list_entry(bm->blocks.next, struct bm_block, hook); |
4388 | - bm->cur.bit = 0; | |
9474138d | 4389 | -} |
85eb3c9d AM |
4390 | + int bytes = number * sizeof(struct bm_position); |
4391 | + struct bm_position *new_states; | |
4392 | + | |
4393 | + if (number < bm->num_states) | |
4394 | + return 0; | |
9474138d AM |
4395 | |
4396 | -static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free); | |
85eb3c9d AM |
4397 | + new_states = kmalloc(bytes, GFP_KERNEL); |
4398 | + if (!new_states) | |
4399 | + return -ENOMEM; | |
4400 | + | |
4401 | + if (bm->states) | |
4402 | + kfree(bm->states); | |
4403 | + | |
4404 | + bm->states = new_states; | |
4405 | + bm->num_states = number; | |
4406 | + return 0; | |
9474138d | 4407 | +} |
85eb3c9d | 4408 | +EXPORT_SYMBOL_GPL(memory_bm_set_iterators); |
2380c486 | 4409 | |
2380c486 JR |
4410 | /** |
4411 | * create_bm_block_list - create a list of block bitmap objects | |
85eb3c9d | 4412 | @@ -380,8 +395,8 @@ static int create_mem_extents(struct list_head *list, gfp_t gfp_mask) |
2380c486 JR |
4413 | /** |
4414 | * memory_bm_create - allocate memory for a memory bitmap | |
4415 | */ | |
4416 | -static int | |
85eb3c9d AM |
4417 | -memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) |
4418 | +int memory_bm_create_index(struct memory_bitmap *bm, gfp_t gfp_mask, | |
4419 | + int safe_needed, int states) | |
2380c486 JR |
4420 | { |
4421 | struct chain_allocator ca; | |
85eb3c9d AM |
4422 | struct list_head mem_extents; |
4423 | @@ -425,6 +440,9 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) | |
4424 | } | |
4425 | } | |
4426 | ||
4427 | + if (!error) | |
4428 | + error = memory_bm_set_iterators(bm, states); | |
4429 | + | |
4430 | bm->p_list = ca.chain; | |
4431 | memory_bm_position_reset(bm); | |
4432 | Exit: | |
4433 | @@ -436,11 +454,18 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) | |
2380c486 JR |
4434 | memory_bm_free(bm, PG_UNSAFE_CLEAR); |
4435 | goto Exit; | |
4436 | } | |
85eb3c9d AM |
4437 | +EXPORT_SYMBOL_GPL(memory_bm_create_index); |
4438 | + | |
4439 | +int memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) | |
4440 | +{ | |
4441 | + return memory_bm_create_index(bm, gfp_mask, safe_needed, 1); | |
4442 | +} | |
2380c486 JR |
4443 | +EXPORT_SYMBOL_GPL(memory_bm_create); |
4444 | ||
4445 | /** | |
4446 | * memory_bm_free - free memory occupied by the memory bitmap @bm | |
4447 | */ | |
4448 | -static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) | |
4449 | +void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) | |
4450 | { | |
4451 | struct bm_block *bb; | |
4452 | ||
85eb3c9d AM |
4453 | @@ -451,15 +476,22 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) |
4454 | free_list_of_pages(bm->p_list, clear_nosave_free); | |
2380c486 JR |
4455 | |
4456 | INIT_LIST_HEAD(&bm->blocks); | |
85eb3c9d AM |
4457 | + |
4458 | + if (bm->states) { | |
4459 | + kfree(bm->states); | |
4460 | + bm->states = NULL; | |
4461 | + bm->num_states = 0; | |
4462 | + } | |
2380c486 JR |
4463 | } |
4464 | +EXPORT_SYMBOL_GPL(memory_bm_free); | |
4465 | ||
4466 | /** | |
4467 | * memory_bm_find_bit - find the bit in the bitmap @bm that corresponds | |
85eb3c9d AM |
4468 | * to given pfn. The cur_zone_bm member of @bm and the cur_block member |
4469 | - * of @bm->cur_zone_bm are updated. | |
4470 | + * of @bm->states[i]_zone_bm are updated. | |
4471 | */ | |
4472 | -static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, | |
4473 | - void **addr, unsigned int *bit_nr) | |
4474 | +static int memory_bm_find_bit_index(struct memory_bitmap *bm, unsigned long pfn, | |
4475 | + void **addr, unsigned int *bit_nr, int state) | |
4476 | { | |
4477 | struct bm_block *bb; | |
4478 | ||
4479 | @@ -467,7 +499,7 @@ static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, | |
4480 | * Check if the pfn corresponds to the current bitmap block and find | |
4481 | * the block where it fits if this is not the case. | |
4482 | */ | |
4483 | - bb = bm->cur.block; | |
4484 | + bb = bm->states[state].block; | |
4485 | if (pfn < bb->start_pfn) | |
4486 | list_for_each_entry_continue_reverse(bb, &bm->blocks, hook) | |
4487 | if (pfn >= bb->start_pfn) | |
4488 | @@ -482,15 +514,21 @@ static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, | |
4489 | return -EFAULT; | |
4490 | ||
4491 | /* The block has been found */ | |
4492 | - bm->cur.block = bb; | |
4493 | + bm->states[state].block = bb; | |
4494 | pfn -= bb->start_pfn; | |
4495 | - bm->cur.bit = pfn + 1; | |
4496 | + bm->states[state].bit = pfn + 1; | |
4497 | *bit_nr = pfn; | |
4498 | *addr = bb->data; | |
2380c486 JR |
4499 | return 0; |
4500 | } | |
4501 | ||
4502 | -static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) | |
85eb3c9d AM |
4503 | +static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, |
4504 | + void **addr, unsigned int *bit_nr) | |
4505 | +{ | |
4506 | + return memory_bm_find_bit_index(bm, pfn, addr, bit_nr, 0); | |
4507 | +} | |
4508 | + | |
2380c486 JR |
4509 | +void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) |
4510 | { | |
4511 | void *addr; | |
4512 | unsigned int bit; | |
85eb3c9d | 4513 | @@ -500,6 +538,7 @@ static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) |
2380c486 JR |
4514 | BUG_ON(error); |
4515 | set_bit(bit, addr); | |
4516 | } | |
4517 | +EXPORT_SYMBOL_GPL(memory_bm_set_bit); | |
4518 | ||
9474138d | 4519 | static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn) |
2380c486 | 4520 | { |
85eb3c9d | 4521 | @@ -513,27 +552,43 @@ static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn) |
2380c486 JR |
4522 | return error; |
4523 | } | |
4524 | ||
4525 | -static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn) | |
85eb3c9d AM |
4526 | +void memory_bm_clear_bit_index(struct memory_bitmap *bm, unsigned long pfn, |
4527 | + int index) | |
2380c486 JR |
4528 | { |
4529 | void *addr; | |
4530 | unsigned int bit; | |
85eb3c9d AM |
4531 | int error; |
4532 | ||
4533 | - error = memory_bm_find_bit(bm, pfn, &addr, &bit); | |
4534 | + error = memory_bm_find_bit_index(bm, pfn, &addr, &bit, index); | |
2380c486 JR |
4535 | BUG_ON(error); |
4536 | clear_bit(bit, addr); | |
4537 | } | |
85eb3c9d AM |
4538 | +EXPORT_SYMBOL_GPL(memory_bm_clear_bit_index); |
4539 | + | |
4540 | +void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn) | |
4541 | +{ | |
4542 | + memory_bm_clear_bit_index(bm, pfn, 0); | |
4543 | +} | |
2380c486 JR |
4544 | +EXPORT_SYMBOL_GPL(memory_bm_clear_bit); |
4545 | ||
4546 | -static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) | |
85eb3c9d AM |
4547 | +int memory_bm_test_bit_index(struct memory_bitmap *bm, unsigned long pfn, |
4548 | + int index) | |
2380c486 JR |
4549 | { |
4550 | void *addr; | |
4551 | unsigned int bit; | |
85eb3c9d AM |
4552 | int error; |
4553 | ||
4554 | - error = memory_bm_find_bit(bm, pfn, &addr, &bit); | |
4555 | + error = memory_bm_find_bit_index(bm, pfn, &addr, &bit, index); | |
2380c486 JR |
4556 | BUG_ON(error); |
4557 | return test_bit(bit, addr); | |
4558 | } | |
85eb3c9d AM |
4559 | +EXPORT_SYMBOL_GPL(memory_bm_test_bit_index); |
4560 | + | |
4561 | +int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) | |
4562 | +{ | |
4563 | + return memory_bm_test_bit_index(bm, pfn, 0); | |
4564 | +} | |
2380c486 JR |
4565 | +EXPORT_SYMBOL_GPL(memory_bm_test_bit); |
4566 | ||
4567 | static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn) | |
4568 | { | |
85eb3c9d | 4569 | @@ -552,43 +607,184 @@ static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn) |
2380c486 JR |
4570 | * this function. |
4571 | */ | |
4572 | ||
4573 | -static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) | |
85eb3c9d | 4574 | +unsigned long memory_bm_next_pfn_index(struct memory_bitmap *bm, int index) |
2380c486 JR |
4575 | { |
4576 | struct bm_block *bb; | |
4577 | int bit; | |
4578 | ||
4579 | - bb = bm->cur.block; | |
85eb3c9d | 4580 | + bb = bm->states[index].block; |
2380c486 JR |
4581 | do { |
4582 | - bit = bm->cur.bit; | |
85eb3c9d | 4583 | + bit = bm->states[index].bit; |
2380c486 JR |
4584 | bit = find_next_bit(bb->data, bm_block_bits(bb), bit); |
4585 | if (bit < bm_block_bits(bb)) | |
4586 | goto Return_pfn; | |
4587 | ||
4588 | bb = list_entry(bb->hook.next, struct bm_block, hook); | |
4589 | - bm->cur.block = bb; | |
4590 | - bm->cur.bit = 0; | |
85eb3c9d AM |
4591 | + bm->states[index].block = bb; |
4592 | + bm->states[index].bit = 0; | |
2380c486 JR |
4593 | } while (&bb->hook != &bm->blocks); |
4594 | ||
85eb3c9d AM |
4595 | - memory_bm_position_reset(bm); |
4596 | + memory_bm_position_reset_index(bm, index); | |
2380c486 JR |
4597 | return BM_END_OF_MAP; |
4598 | ||
4599 | Return_pfn: | |
4600 | - bm->cur.bit = bit + 1; | |
85eb3c9d | 4601 | + bm->states[index].bit = bit + 1; |
2380c486 JR |
4602 | return bb->start_pfn + bit; |
4603 | } | |
85eb3c9d | 4604 | +EXPORT_SYMBOL_GPL(memory_bm_next_pfn_index); |
2380c486 JR |
4605 | |
4606 | -/** | |
4607 | - * This structure represents a range of page frames the contents of which | |
4608 | - * should not be saved during the suspend. | |
4609 | - */ | |
85eb3c9d | 4610 | +unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) |
2380c486 | 4611 | +{ |
85eb3c9d AM |
4612 | + return memory_bm_next_pfn_index(bm, 0); |
4613 | +} | |
4614 | +EXPORT_SYMBOL_GPL(memory_bm_next_pfn); | |
2380c486 JR |
4615 | |
4616 | -struct nosave_region { | |
4617 | - struct list_head list; | |
4618 | - unsigned long start_pfn; | |
4619 | - unsigned long end_pfn; | |
4620 | -}; | |
85eb3c9d AM |
4621 | +void memory_bm_clear(struct memory_bitmap *bm) |
4622 | +{ | |
4623 | + unsigned long pfn; | |
4624 | ||
4625 | -static LIST_HEAD(nosave_regions); | |
2380c486 JR |
4626 | + memory_bm_position_reset(bm); |
4627 | + pfn = memory_bm_next_pfn(bm); | |
4628 | + while (pfn != BM_END_OF_MAP) { | |
4629 | + memory_bm_clear_bit(bm, pfn); | |
4630 | + pfn = memory_bm_next_pfn(bm); | |
4631 | + } | |
4632 | +} | |
4633 | +EXPORT_SYMBOL_GPL(memory_bm_clear); | |
4634 | + | |
4635 | +void memory_bm_copy(struct memory_bitmap *source, struct memory_bitmap *dest) | |
4636 | +{ | |
4637 | + unsigned long pfn; | |
4638 | + | |
4639 | + memory_bm_position_reset(source); | |
4640 | + pfn = memory_bm_next_pfn(source); | |
4641 | + while (pfn != BM_END_OF_MAP) { | |
4642 | + memory_bm_set_bit(dest, pfn); | |
4643 | + pfn = memory_bm_next_pfn(source); | |
4644 | + } | |
4645 | +} | |
4646 | +EXPORT_SYMBOL_GPL(memory_bm_copy); | |
4647 | + | |
4648 | +void memory_bm_dup(struct memory_bitmap *source, struct memory_bitmap *dest) | |
4649 | +{ | |
4650 | + memory_bm_clear(dest); | |
4651 | + memory_bm_copy(source, dest); | |
4652 | +} | |
4653 | +EXPORT_SYMBOL_GPL(memory_bm_dup); | |
4654 | + | |
4655 | +#ifdef CONFIG_TOI | |
4656 | +#define DEFINE_MEMORY_BITMAP(name) \ | |
4657 | +struct memory_bitmap *name; \ | |
4658 | +EXPORT_SYMBOL_GPL(name) | |
4659 | + | |
4660 | +DEFINE_MEMORY_BITMAP(pageset1_map); | |
4661 | +DEFINE_MEMORY_BITMAP(pageset1_copy_map); | |
4662 | +DEFINE_MEMORY_BITMAP(pageset2_map); | |
4663 | +DEFINE_MEMORY_BITMAP(page_resave_map); | |
4664 | +DEFINE_MEMORY_BITMAP(io_map); | |
4665 | +DEFINE_MEMORY_BITMAP(nosave_map); | |
4666 | +DEFINE_MEMORY_BITMAP(free_map); | |
9474138d | 4667 | + |
2380c486 JR |
4668 | +int memory_bm_write(struct memory_bitmap *bm, int (*rw_chunk) |
4669 | + (int rw, struct toi_module_ops *owner, char *buffer, int buffer_size)) | |
4670 | +{ | |
4671 | + int result = 0; | |
4672 | + unsigned int nr = 0; | |
4673 | + struct bm_block *bb; | |
4674 | + | |
4675 | + if (!bm) | |
4676 | + return result; | |
85eb3c9d | 4677 | + |
2380c486 JR |
4678 | + list_for_each_entry(bb, &bm->blocks, hook) |
4679 | + nr++; | |
4680 | + | |
4681 | + result = (*rw_chunk)(WRITE, NULL, (char *) &nr, sizeof(unsigned int)); | |
4682 | + if (result) | |
4683 | + return result; | |
4684 | + | |
4685 | + list_for_each_entry(bb, &bm->blocks, hook) { | |
4686 | + result = (*rw_chunk)(WRITE, NULL, (char *) &bb->start_pfn, | |
4687 | + 2 * sizeof(unsigned long)); | |
4688 | + if (result) | |
4689 | + return result; | |
4690 | + | |
4691 | + result = (*rw_chunk)(WRITE, NULL, (char *) bb->data, PAGE_SIZE); | |
4692 | + if (result) | |
4693 | + return result; | |
4694 | + } | |
4695 | + | |
4696 | + return 0; | |
4697 | +} | |
4698 | +EXPORT_SYMBOL_GPL(memory_bm_write); | |
4699 | + | |
4700 | +int memory_bm_read(struct memory_bitmap *bm, int (*rw_chunk) | |
4701 | + (int rw, struct toi_module_ops *owner, char *buffer, int buffer_size)) | |
4702 | +{ | |
4703 | + int result = 0; | |
4704 | + unsigned int nr, i; | |
4705 | + struct bm_block *bb; | |
4706 | + | |
4707 | + if (!bm) | |
4708 | + return result; | |
4709 | + | |
4710 | + result = memory_bm_create(bm, GFP_KERNEL, 0); | |
4711 | + | |
4712 | + if (result) | |
4713 | + return result; | |
4714 | + | |
4715 | + result = (*rw_chunk)(READ, NULL, (char *) &nr, sizeof(unsigned int)); | |
4716 | + if (result) | |
4717 | + goto Free; | |
4718 | + | |
4719 | + for (i = 0; i < nr; i++) { | |
4720 | + unsigned long pfn; | |
4721 | + | |
4722 | + result = (*rw_chunk)(READ, NULL, (char *) &pfn, | |
4723 | + sizeof(unsigned long)); | |
4724 | + if (result) | |
4725 | + goto Free; | |
4726 | + | |
4727 | + list_for_each_entry(bb, &bm->blocks, hook) | |
4728 | + if (bb->start_pfn == pfn) | |
4729 | + break; | |
4730 | + | |
4731 | + if (&bb->hook == &bm->blocks) { | |
4732 | + printk(KERN_ERR | |
4733 | + "TuxOnIce: Failed to load memory bitmap.\n"); | |
4734 | + result = -EINVAL; | |
4735 | + goto Free; | |
4736 | + } | |
4737 | + | |
4738 | + result = (*rw_chunk)(READ, NULL, (char *) &pfn, | |
4739 | + sizeof(unsigned long)); | |
4740 | + if (result) | |
4741 | + goto Free; | |
4742 | + | |
4743 | + if (pfn != bb->end_pfn) { | |
4744 | + printk(KERN_ERR | |
4745 | + "TuxOnIce: Failed to load memory bitmap. " | |
4746 | + "End PFN doesn't match what was saved.\n"); | |
4747 | + result = -EINVAL; | |
4748 | + goto Free; | |
4749 | + } | |
4750 | + | |
4751 | + result = (*rw_chunk)(READ, NULL, (char *) bb->data, PAGE_SIZE); | |
4752 | + | |
4753 | + if (result) | |
4754 | + goto Free; | |
4755 | + } | |
4756 | + | |
4757 | + return 0; | |
4758 | + | |
4759 | +Free: | |
4760 | + memory_bm_free(bm, PG_ANY); | |
4761 | + return result; | |
4762 | +} | |
4763 | +EXPORT_SYMBOL_GPL(memory_bm_read); | |
4764 | +#endif | |
4765 | + | |
4766 | +LIST_HEAD(nosave_regions); | |
4767 | +EXPORT_SYMBOL_GPL(nosave_regions); | |
4768 | ||
4769 | /** | |
4770 | * register_nosave_region - register a range of page frames the contents | |
85eb3c9d | 4771 | @@ -824,7 +1020,7 @@ static unsigned int count_free_highmem_pages(void) |
2380c486 JR |
4772 | * We should save the page if it isn't Nosave or NosaveFree, or Reserved, |
4773 | * and it isn't a part of a free chunk of pages. | |
4774 | */ | |
4775 | -static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn) | |
4776 | +struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn) | |
4777 | { | |
4778 | struct page *page; | |
4779 | ||
85eb3c9d | 4780 | @@ -843,6 +1039,7 @@ static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn) |
2380c486 JR |
4781 | |
4782 | return page; | |
4783 | } | |
4784 | +EXPORT_SYMBOL_GPL(saveable_highmem_page); | |
4785 | ||
4786 | /** | |
4787 | * count_highmem_pages - compute the total number of saveable highmem | |
85eb3c9d | 4788 | @@ -868,11 +1065,6 @@ static unsigned int count_highmem_pages(void) |
2380c486 JR |
4789 | } |
4790 | return n; | |
4791 | } | |
4792 | -#else | |
4793 | -static inline void *saveable_highmem_page(struct zone *z, unsigned long p) | |
4794 | -{ | |
4795 | - return NULL; | |
4796 | -} | |
4797 | #endif /* CONFIG_HIGHMEM */ | |
4798 | ||
4799 | /** | |
85eb3c9d | 4800 | @@ -883,7 +1075,7 @@ static inline void *saveable_highmem_page(struct zone *z, unsigned long p) |
2380c486 JR |
4801 | * of pages statically defined as 'unsaveable', and it isn't a part of |
4802 | * a free chunk of pages. | |
4803 | */ | |
4804 | -static struct page *saveable_page(struct zone *zone, unsigned long pfn) | |
4805 | +struct page *saveable_page(struct zone *zone, unsigned long pfn) | |
4806 | { | |
4807 | struct page *page; | |
4808 | ||
85eb3c9d | 4809 | @@ -905,6 +1097,7 @@ static struct page *saveable_page(struct zone *zone, unsigned long pfn) |
2380c486 JR |
4810 | |
4811 | return page; | |
4812 | } | |
4813 | +EXPORT_SYMBOL_GPL(saveable_page); | |
4814 | ||
4815 | /** | |
4816 | * count_data_pages - compute the total number of saveable non-highmem | |
85eb3c9d | 4817 | @@ -1501,6 +1694,9 @@ asmlinkage int swsusp_save(void) |
2380c486 JR |
4818 | { |
4819 | unsigned int nr_pages, nr_highmem; | |
4820 | ||
4821 | + if (toi_running) | |
4822 | + return toi_post_context_save(); | |
4823 | + | |
d031c9d6 | 4824 | printk(KERN_INFO "PM: Creating hibernation image:\n"); |
2380c486 JR |
4825 | |
4826 | drain_local_pages(NULL); | |
85eb3c9d | 4827 | @@ -1541,14 +1737,14 @@ asmlinkage int swsusp_save(void) |
2380c486 JR |
4828 | } |
4829 | ||
4830 | #ifndef CONFIG_ARCH_HIBERNATION_HEADER | |
4831 | -static int init_header_complete(struct swsusp_info *info) | |
e999739a | 4832 | +int init_header_complete(struct swsusp_info *info) |
2380c486 JR |
4833 | { |
4834 | memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname)); | |
4835 | info->version_code = LINUX_VERSION_CODE; | |
4836 | return 0; | |
4837 | } | |
4838 | ||
4839 | -static char *check_image_kernel(struct swsusp_info *info) | |
e999739a | 4840 | +char *check_image_kernel(struct swsusp_info *info) |
2380c486 JR |
4841 | { |
4842 | if (info->version_code != LINUX_VERSION_CODE) | |
4843 | return "kernel version"; | |
85eb3c9d | 4844 | @@ -1562,6 +1758,7 @@ static char *check_image_kernel(struct swsusp_info *info) |
2380c486 JR |
4845 | return "machine"; |
4846 | return NULL; | |
4847 | } | |
e999739a | 4848 | +EXPORT_SYMBOL_GPL(check_image_kernel); |
2380c486 JR |
4849 | #endif /* CONFIG_ARCH_HIBERNATION_HEADER */ |
4850 | ||
4851 | unsigned long snapshot_get_image_size(void) | |
85eb3c9d | 4852 | @@ -1569,7 +1766,7 @@ unsigned long snapshot_get_image_size(void) |
2380c486 JR |
4853 | return nr_copy_pages + nr_meta_pages + 1; |
4854 | } | |
4855 | ||
4856 | -static int init_header(struct swsusp_info *info) | |
e999739a | 4857 | +int init_header(struct swsusp_info *info) |
2380c486 JR |
4858 | { |
4859 | memset(info, 0, sizeof(struct swsusp_info)); | |
4860 | info->num_physpages = num_physpages; | |
85eb3c9d | 4861 | @@ -1579,6 +1776,7 @@ static int init_header(struct swsusp_info *info) |
2380c486 | 4862 | info->size <<= PAGE_SHIFT; |
e999739a | 4863 | return init_header_complete(info); |
2380c486 | 4864 | } |
e999739a | 4865 | +EXPORT_SYMBOL_GPL(init_header); |
2380c486 JR |
4866 | |
4867 | /** | |
4868 | * pack_pfns - pfns corresponding to the set bits found in the bitmap @bm | |
92bca44c | 4869 | diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c |
85eb3c9d | 4870 | index f37cb7d..ad20590 100644 |
92bca44c AM |
4871 | --- a/kernel/power/suspend.c |
4872 | +++ b/kernel/power/suspend.c | |
85eb3c9d | 4873 | @@ -236,6 +236,7 @@ int suspend_devices_and_enter(suspend_state_t state) |
92bca44c AM |
4874 | suspend_ops->recover(); |
4875 | goto Resume_devices; | |
4876 | } | |
4877 | +EXPORT_SYMBOL_GPL(suspend_devices_and_enter); | |
4878 | ||
4879 | /** | |
4880 | * suspend_finish - Do final work before exiting suspend sequence. | |
2380c486 JR |
4881 | diff --git a/kernel/power/tuxonice.h b/kernel/power/tuxonice.h |
4882 | new file mode 100644 | |
85eb3c9d | 4883 | index 0000000..70d1df1 |
2380c486 JR |
4884 | --- /dev/null |
4885 | +++ b/kernel/power/tuxonice.h | |
85eb3c9d | 4886 | @@ -0,0 +1,213 @@ |
2380c486 JR |
4887 | +/* |
4888 | + * kernel/power/tuxonice.h | |
4889 | + * | |
5dd10c98 | 4890 | + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
4891 | + * |
4892 | + * This file is released under the GPLv2. | |
4893 | + * | |
4894 | + * It contains declarations used throughout swsusp. | |
4895 | + * | |
4896 | + */ | |
4897 | + | |
4898 | +#ifndef KERNEL_POWER_TOI_H | |
4899 | +#define KERNEL_POWER_TOI_H | |
4900 | + | |
4901 | +#include <linux/delay.h> | |
4902 | +#include <linux/bootmem.h> | |
4903 | +#include <linux/suspend.h> | |
4904 | +#include <linux/fs.h> | |
4905 | +#include <linux/kmod.h> | |
4906 | +#include <asm/setup.h> | |
4907 | +#include "tuxonice_pageflags.h" | |
9474138d | 4908 | +#include "power.h" |
2380c486 | 4909 | + |
85eb3c9d | 4910 | +#define TOI_CORE_VERSION "3.2-rc2" |
cacc47f8 | 4911 | +#define TOI_HEADER_VERSION 3 |
5dd10c98 | 4912 | +#define MY_BOOT_KERNEL_DATA_VERSION 3 |
2380c486 JR |
4913 | + |
4914 | +struct toi_boot_kernel_data { | |
4915 | + int version; | |
4916 | + int size; | |
4917 | + unsigned long toi_action; | |
4918 | + unsigned long toi_debug_state; | |
4919 | + u32 toi_default_console_level; | |
4920 | + int toi_io_time[2][2]; | |
4921 | + char toi_nosave_commandline[COMMAND_LINE_SIZE]; | |
5dd10c98 AM |
4922 | + unsigned long pages_used[33]; |
4923 | + unsigned long compress_bytes_in; | |
4924 | + unsigned long compress_bytes_out; | |
2380c486 JR |
4925 | +}; |
4926 | + | |
4927 | +extern struct toi_boot_kernel_data toi_bkd; | |
4928 | + | |
4929 | +/* Location of book kernel data struct in kernel being resumed */ | |
4930 | +extern unsigned long boot_kernel_data_buffer; | |
4931 | + | |
4932 | +/* == Action states == */ | |
4933 | + | |
4934 | +enum { | |
4935 | + TOI_REBOOT, | |
4936 | + TOI_PAUSE, | |
4937 | + TOI_LOGALL, | |
4938 | + TOI_CAN_CANCEL, | |
4939 | + TOI_KEEP_IMAGE, | |
4940 | + TOI_FREEZER_TEST, | |
4941 | + TOI_SINGLESTEP, | |
4942 | + TOI_PAUSE_NEAR_PAGESET_END, | |
4943 | + TOI_TEST_FILTER_SPEED, | |
4944 | + TOI_TEST_BIO, | |
4945 | + TOI_NO_PAGESET2, | |
2380c486 JR |
4946 | + TOI_IGNORE_ROOTFS, |
4947 | + TOI_REPLACE_SWSUSP, | |
4948 | + TOI_PAGESET2_FULL, | |
4949 | + TOI_ABORT_ON_RESAVE_NEEDED, | |
4950 | + TOI_NO_MULTITHREADED_IO, | |
5dd10c98 | 4951 | + TOI_NO_DIRECT_LOAD, /* Obsolete */ |
2380c486 JR |
4952 | + TOI_LATE_CPU_HOTPLUG, |
4953 | + TOI_GET_MAX_MEM_ALLOCD, | |
4954 | + TOI_NO_FLUSHER_THREAD, | |
4955 | + TOI_NO_PS2_IF_UNNEEDED | |
4956 | +}; | |
4957 | + | |
4958 | +#define clear_action_state(bit) (test_and_clear_bit(bit, &toi_bkd.toi_action)) | |
4959 | + | |
4960 | +/* == Result states == */ | |
4961 | + | |
4962 | +enum { | |
4963 | + TOI_ABORTED, | |
4964 | + TOI_ABORT_REQUESTED, | |
4965 | + TOI_NOSTORAGE_AVAILABLE, | |
4966 | + TOI_INSUFFICIENT_STORAGE, | |
4967 | + TOI_FREEZING_FAILED, | |
4968 | + TOI_KEPT_IMAGE, | |
4969 | + TOI_WOULD_EAT_MEMORY, | |
4970 | + TOI_UNABLE_TO_FREE_ENOUGH_MEMORY, | |
4971 | + TOI_PM_SEM, | |
4972 | + TOI_DEVICE_REFUSED, | |
4973 | + TOI_SYSDEV_REFUSED, | |
4974 | + TOI_EXTRA_PAGES_ALLOW_TOO_SMALL, | |
4975 | + TOI_UNABLE_TO_PREPARE_IMAGE, | |
4976 | + TOI_FAILED_MODULE_INIT, | |
4977 | + TOI_FAILED_MODULE_CLEANUP, | |
4978 | + TOI_FAILED_IO, | |
4979 | + TOI_OUT_OF_MEMORY, | |
4980 | + TOI_IMAGE_ERROR, | |
4981 | + TOI_PLATFORM_PREP_FAILED, | |
4982 | + TOI_CPU_HOTPLUG_FAILED, | |
4983 | + TOI_ARCH_PREPARE_FAILED, | |
4984 | + TOI_RESAVE_NEEDED, | |
4985 | + TOI_CANT_SUSPEND, | |
4986 | + TOI_NOTIFIERS_PREPARE_FAILED, | |
4987 | + TOI_PRE_SNAPSHOT_FAILED, | |
4988 | + TOI_PRE_RESTORE_FAILED, | |
4989 | + TOI_USERMODE_HELPERS_ERR, | |
4990 | + TOI_CANT_USE_ALT_RESUME, | |
0ada99ac | 4991 | + TOI_HEADER_TOO_BIG, |
2380c486 JR |
4992 | + TOI_NUM_RESULT_STATES /* Used in printing debug info only */ |
4993 | +}; | |
4994 | + | |
4995 | +extern unsigned long toi_result; | |
4996 | + | |
4997 | +#define set_result_state(bit) (test_and_set_bit(bit, &toi_result)) | |
4998 | +#define set_abort_result(bit) (test_and_set_bit(TOI_ABORTED, &toi_result), \ | |
4999 | + test_and_set_bit(bit, &toi_result)) | |
5000 | +#define clear_result_state(bit) (test_and_clear_bit(bit, &toi_result)) | |
5001 | +#define test_result_state(bit) (test_bit(bit, &toi_result)) | |
5002 | + | |
5003 | +/* == Debug sections and levels == */ | |
5004 | + | |
5005 | +/* debugging levels. */ | |
5006 | +enum { | |
5007 | + TOI_STATUS = 0, | |
5008 | + TOI_ERROR = 2, | |
5009 | + TOI_LOW, | |
5010 | + TOI_MEDIUM, | |
5011 | + TOI_HIGH, | |
5012 | + TOI_VERBOSE, | |
5013 | +}; | |
5014 | + | |
5015 | +enum { | |
5016 | + TOI_ANY_SECTION, | |
5017 | + TOI_EAT_MEMORY, | |
5018 | + TOI_IO, | |
5019 | + TOI_HEADER, | |
5020 | + TOI_WRITER, | |
5021 | + TOI_MEMORY, | |
85eb3c9d AM |
5022 | + TOI_PAGEDIR, |
5023 | + TOI_COMPRESS, | |
2380c486 JR |
5024 | +}; |
5025 | + | |
5026 | +#define set_debug_state(bit) (test_and_set_bit(bit, &toi_bkd.toi_debug_state)) | |
5027 | +#define clear_debug_state(bit) \ | |
5028 | + (test_and_clear_bit(bit, &toi_bkd.toi_debug_state)) | |
5029 | +#define test_debug_state(bit) (test_bit(bit, &toi_bkd.toi_debug_state)) | |
5030 | + | |
5031 | +/* == Steps in hibernating == */ | |
5032 | + | |
5033 | +enum { | |
5034 | + STEP_HIBERNATE_PREPARE_IMAGE, | |
5035 | + STEP_HIBERNATE_SAVE_IMAGE, | |
5036 | + STEP_HIBERNATE_POWERDOWN, | |
5037 | + STEP_RESUME_CAN_RESUME, | |
5038 | + STEP_RESUME_LOAD_PS1, | |
5039 | + STEP_RESUME_DO_RESTORE, | |
5040 | + STEP_RESUME_READ_PS2, | |
5041 | + STEP_RESUME_GO, | |
5042 | + STEP_RESUME_ALT_IMAGE, | |
5043 | + STEP_CLEANUP, | |
5044 | + STEP_QUIET_CLEANUP | |
5045 | +}; | |
5046 | + | |
5047 | +/* == TuxOnIce states == | |
5048 | + (see also include/linux/suspend.h) */ | |
5049 | + | |
5050 | +#define get_toi_state() (toi_state) | |
5051 | +#define restore_toi_state(saved_state) \ | |
5052 | + do { toi_state = saved_state; } while (0) | |
5053 | + | |
5054 | +/* == Module support == */ | |
5055 | + | |
5056 | +struct toi_core_fns { | |
5057 | + int (*post_context_save)(void); | |
5058 | + unsigned long (*get_nonconflicting_page)(void); | |
5059 | + int (*try_hibernate)(void); | |
5060 | + void (*try_resume)(void); | |
5061 | +}; | |
5062 | + | |
5063 | +extern struct toi_core_fns *toi_core_fns; | |
5064 | + | |
5065 | +/* == All else == */ | |
5066 | +#define KB(x) ((x) << (PAGE_SHIFT - 10)) | |
5067 | +#define MB(x) ((x) >> (20 - PAGE_SHIFT)) | |
5068 | + | |
5069 | +extern int toi_start_anything(int toi_or_resume); | |
5070 | +extern void toi_finish_anything(int toi_or_resume); | |
5071 | + | |
5072 | +extern int save_image_part1(void); | |
5073 | +extern int toi_atomic_restore(void); | |
5074 | + | |
9474138d AM |
5075 | +extern int toi_try_hibernate(void); |
5076 | +extern void toi_try_resume(void); | |
2380c486 JR |
5077 | + |
5078 | +extern int __toi_post_context_save(void); | |
5079 | + | |
5080 | +extern unsigned int nr_hibernates; | |
5081 | +extern char alt_resume_param[256]; | |
5082 | + | |
5083 | +extern void copyback_post(void); | |
5084 | +extern int toi_hibernate(void); | |
92bca44c | 5085 | +extern unsigned long extra_pd1_pages_used; |
2380c486 JR |
5086 | + |
5087 | +#define SECTOR_SIZE 512 | |
5088 | + | |
5089 | +extern void toi_early_boot_message(int can_erase_image, int default_answer, | |
5090 | + char *warning_reason, ...); | |
5091 | + | |
2380c486 JR |
5092 | +extern int do_check_can_resume(void); |
5093 | +extern int do_toi_step(int step); | |
5094 | +extern int toi_launch_userspace_program(char *command, int channel_no, | |
5095 | + enum umh_wait wait, int debug); | |
5096 | + | |
7e46296a AM |
5097 | +extern char tuxonice_signature[9]; |
5098 | +extern int freezer_sync; | |
2380c486 JR |
5099 | +#endif |
5100 | diff --git a/kernel/power/tuxonice_alloc.c b/kernel/power/tuxonice_alloc.c | |
5101 | new file mode 100644 | |
5dd10c98 | 5102 | index 0000000..891c5b2 |
2380c486 JR |
5103 | --- /dev/null |
5104 | +++ b/kernel/power/tuxonice_alloc.c | |
7e46296a | 5105 | @@ -0,0 +1,313 @@ |
2380c486 JR |
5106 | +/* |
5107 | + * kernel/power/tuxonice_alloc.c | |
5108 | + * | |
5dd10c98 | 5109 | + * Copyright (C) 2008-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
5110 | + * |
5111 | + * This file is released under the GPLv2. | |
5112 | + * | |
5113 | + */ | |
5114 | + | |
5115 | +#ifdef CONFIG_PM_DEBUG | |
5116 | +#include <linux/module.h> | |
5117 | +#include <linux/slab.h> | |
5118 | +#include "tuxonice_modules.h" | |
5119 | +#include "tuxonice_alloc.h" | |
5120 | +#include "tuxonice_sysfs.h" | |
5121 | +#include "tuxonice.h" | |
5122 | + | |
7e46296a | 5123 | +#define TOI_ALLOC_PATHS 40 |
2380c486 JR |
5124 | + |
5125 | +static DEFINE_MUTEX(toi_alloc_mutex); | |
5126 | + | |
5127 | +static struct toi_module_ops toi_alloc_ops; | |
5128 | + | |
5129 | +static int toi_fail_num; | |
7e46296a | 5130 | +static int trace_allocs; |
2380c486 JR |
5131 | +static atomic_t toi_alloc_count[TOI_ALLOC_PATHS], |
5132 | + toi_free_count[TOI_ALLOC_PATHS], | |
5133 | + toi_test_count[TOI_ALLOC_PATHS], | |
5134 | + toi_fail_count[TOI_ALLOC_PATHS]; | |
5135 | +static int toi_cur_allocd[TOI_ALLOC_PATHS], toi_max_allocd[TOI_ALLOC_PATHS]; | |
5136 | +static int cur_allocd, max_allocd; | |
5137 | + | |
5138 | +static char *toi_alloc_desc[TOI_ALLOC_PATHS] = { | |
5139 | + "", /* 0 */ | |
5140 | + "get_io_info_struct", | |
5141 | + "extent", | |
5142 | + "extent (loading chain)", | |
5143 | + "userui channel", | |
5144 | + "userui arg", /* 5 */ | |
5145 | + "attention list metadata", | |
5146 | + "extra pagedir memory metadata", | |
5147 | + "bdev metadata", | |
5148 | + "extra pagedir memory", | |
5149 | + "header_locations_read", /* 10 */ | |
5150 | + "bio queue", | |
5151 | + "prepare_readahead", | |
5152 | + "i/o buffer", | |
5153 | + "writer buffer in bio_init", | |
5154 | + "checksum buffer", /* 15 */ | |
5155 | + "compression buffer", | |
5156 | + "filewriter signature op", | |
5157 | + "set resume param alloc1", | |
5158 | + "set resume param alloc2", | |
5159 | + "debugging info buffer", /* 20 */ | |
5160 | + "check can resume buffer", | |
5161 | + "write module config buffer", | |
5162 | + "read module config buffer", | |
5163 | + "write image header buffer", | |
5164 | + "read pageset1 buffer", /* 25 */ | |
5165 | + "get_have_image_data buffer", | |
5166 | + "checksum page", | |
5167 | + "worker rw loop", | |
5168 | + "get nonconflicting page", | |
5169 | + "ps1 load addresses", /* 30 */ | |
5170 | + "remove swap image", | |
5171 | + "swap image exists", | |
5172 | + "swap parse sig location", | |
5173 | + "sysfs kobj", | |
5174 | + "swap mark resume attempted buffer", /* 35 */ | |
5175 | + "cluster member", | |
5176 | + "boot kernel data buffer", | |
7e46296a AM |
5177 | + "setting swap signature", |
5178 | + "block i/o bdev struct" | |
2380c486 JR |
5179 | +}; |
5180 | + | |
5181 | +#define MIGHT_FAIL(FAIL_NUM, FAIL_VAL) \ | |
5182 | + do { \ | |
5183 | + BUG_ON(FAIL_NUM >= TOI_ALLOC_PATHS); \ | |
5184 | + \ | |
5185 | + if (FAIL_NUM == toi_fail_num) { \ | |
5186 | + atomic_inc(&toi_test_count[FAIL_NUM]); \ | |
5187 | + toi_fail_num = 0; \ | |
5188 | + return FAIL_VAL; \ | |
5189 | + } \ | |
5190 | + } while (0) | |
5191 | + | |
9474138d | 5192 | +static void alloc_update_stats(int fail_num, void *result, int size) |
2380c486 JR |
5193 | +{ |
5194 | + if (!result) { | |
5195 | + atomic_inc(&toi_fail_count[fail_num]); | |
5196 | + return; | |
5197 | + } | |
5198 | + | |
5199 | + atomic_inc(&toi_alloc_count[fail_num]); | |
5200 | + if (unlikely(test_action_state(TOI_GET_MAX_MEM_ALLOCD))) { | |
5201 | + mutex_lock(&toi_alloc_mutex); | |
5202 | + toi_cur_allocd[fail_num]++; | |
7e46296a | 5203 | + cur_allocd += size; |
2380c486 JR |
5204 | + if (unlikely(cur_allocd > max_allocd)) { |
5205 | + int i; | |
5206 | + | |
5207 | + for (i = 0; i < TOI_ALLOC_PATHS; i++) | |
5208 | + toi_max_allocd[i] = toi_cur_allocd[i]; | |
5209 | + max_allocd = cur_allocd; | |
5210 | + } | |
5211 | + mutex_unlock(&toi_alloc_mutex); | |
5212 | + } | |
5213 | +} | |
5214 | + | |
9474138d | 5215 | +static void free_update_stats(int fail_num, int size) |
2380c486 JR |
5216 | +{ |
5217 | + BUG_ON(fail_num >= TOI_ALLOC_PATHS); | |
5218 | + atomic_inc(&toi_free_count[fail_num]); | |
7e46296a AM |
5219 | + if (unlikely(atomic_read(&toi_free_count[fail_num]) > |
5220 | + atomic_read(&toi_alloc_count[fail_num]))) | |
5221 | + dump_stack(); | |
2380c486 JR |
5222 | + if (unlikely(test_action_state(TOI_GET_MAX_MEM_ALLOCD))) { |
5223 | + mutex_lock(&toi_alloc_mutex); | |
7e46296a | 5224 | + cur_allocd -= size; |
2380c486 JR |
5225 | + toi_cur_allocd[fail_num]--; |
5226 | + mutex_unlock(&toi_alloc_mutex); | |
5227 | + } | |
5228 | +} | |
5229 | + | |
5230 | +void *toi_kzalloc(int fail_num, size_t size, gfp_t flags) | |
5231 | +{ | |
5232 | + void *result; | |
5233 | + | |
5234 | + if (toi_alloc_ops.enabled) | |
5235 | + MIGHT_FAIL(fail_num, NULL); | |
5236 | + result = kzalloc(size, flags); | |
5237 | + if (toi_alloc_ops.enabled) | |
9474138d | 5238 | + alloc_update_stats(fail_num, result, size); |
7e46296a AM |
5239 | + if (fail_num == trace_allocs) |
5240 | + dump_stack(); | |
2380c486 JR |
5241 | + return result; |
5242 | +} | |
5243 | +EXPORT_SYMBOL_GPL(toi_kzalloc); | |
5244 | + | |
5245 | +unsigned long toi_get_free_pages(int fail_num, gfp_t mask, | |
5246 | + unsigned int order) | |
5247 | +{ | |
5248 | + unsigned long result; | |
5249 | + | |
5250 | + if (toi_alloc_ops.enabled) | |
5251 | + MIGHT_FAIL(fail_num, 0); | |
5252 | + result = __get_free_pages(mask, order); | |
5253 | + if (toi_alloc_ops.enabled) | |
9474138d AM |
5254 | + alloc_update_stats(fail_num, (void *) result, |
5255 | + PAGE_SIZE << order); | |
7e46296a AM |
5256 | + if (fail_num == trace_allocs) |
5257 | + dump_stack(); | |
2380c486 JR |
5258 | + return result; |
5259 | +} | |
5260 | +EXPORT_SYMBOL_GPL(toi_get_free_pages); | |
5261 | + | |
5262 | +struct page *toi_alloc_page(int fail_num, gfp_t mask) | |
5263 | +{ | |
5264 | + struct page *result; | |
5265 | + | |
5266 | + if (toi_alloc_ops.enabled) | |
5267 | + MIGHT_FAIL(fail_num, NULL); | |
5268 | + result = alloc_page(mask); | |
5269 | + if (toi_alloc_ops.enabled) | |
9474138d | 5270 | + alloc_update_stats(fail_num, (void *) result, PAGE_SIZE); |
7e46296a AM |
5271 | + if (fail_num == trace_allocs) |
5272 | + dump_stack(); | |
2380c486 JR |
5273 | + return result; |
5274 | +} | |
5275 | +EXPORT_SYMBOL_GPL(toi_alloc_page); | |
5276 | + | |
5277 | +unsigned long toi_get_zeroed_page(int fail_num, gfp_t mask) | |
5278 | +{ | |
5279 | + unsigned long result; | |
5280 | + | |
7e46296a AM |
5281 | + if (fail_num == trace_allocs) |
5282 | + dump_stack(); | |
2380c486 JR |
5283 | + if (toi_alloc_ops.enabled) |
5284 | + MIGHT_FAIL(fail_num, 0); | |
5285 | + result = get_zeroed_page(mask); | |
5286 | + if (toi_alloc_ops.enabled) | |
9474138d | 5287 | + alloc_update_stats(fail_num, (void *) result, PAGE_SIZE); |
7e46296a AM |
5288 | + if (fail_num == trace_allocs) |
5289 | + dump_stack(); | |
2380c486 JR |
5290 | + return result; |
5291 | +} | |
5292 | +EXPORT_SYMBOL_GPL(toi_get_zeroed_page); | |
5293 | + | |
9474138d | 5294 | +void toi_kfree(int fail_num, const void *arg, int size) |
2380c486 JR |
5295 | +{ |
5296 | + if (arg && toi_alloc_ops.enabled) | |
9474138d | 5297 | + free_update_stats(fail_num, size); |
2380c486 | 5298 | + |
7e46296a AM |
5299 | + if (fail_num == trace_allocs) |
5300 | + dump_stack(); | |
2380c486 JR |
5301 | + kfree(arg); |
5302 | +} | |
5303 | +EXPORT_SYMBOL_GPL(toi_kfree); | |
5304 | + | |
5305 | +void toi_free_page(int fail_num, unsigned long virt) | |
5306 | +{ | |
5307 | + if (virt && toi_alloc_ops.enabled) | |
9474138d | 5308 | + free_update_stats(fail_num, PAGE_SIZE); |
2380c486 | 5309 | + |
7e46296a AM |
5310 | + if (fail_num == trace_allocs) |
5311 | + dump_stack(); | |
2380c486 JR |
5312 | + free_page(virt); |
5313 | +} | |
5314 | +EXPORT_SYMBOL_GPL(toi_free_page); | |
5315 | + | |
5316 | +void toi__free_page(int fail_num, struct page *page) | |
5317 | +{ | |
5318 | + if (page && toi_alloc_ops.enabled) | |
9474138d | 5319 | + free_update_stats(fail_num, PAGE_SIZE); |
2380c486 | 5320 | + |
7e46296a AM |
5321 | + if (fail_num == trace_allocs) |
5322 | + dump_stack(); | |
2380c486 JR |
5323 | + __free_page(page); |
5324 | +} | |
5325 | +EXPORT_SYMBOL_GPL(toi__free_page); | |
5326 | + | |
5327 | +void toi_free_pages(int fail_num, struct page *page, int order) | |
5328 | +{ | |
5329 | + if (page && toi_alloc_ops.enabled) | |
9474138d | 5330 | + free_update_stats(fail_num, PAGE_SIZE << order); |
2380c486 | 5331 | + |
7e46296a AM |
5332 | + if (fail_num == trace_allocs) |
5333 | + dump_stack(); | |
2380c486 JR |
5334 | + __free_pages(page, order); |
5335 | +} | |
5336 | + | |
5337 | +void toi_alloc_print_debug_stats(void) | |
5338 | +{ | |
5339 | + int i, header_done = 0; | |
5340 | + | |
5341 | + if (!toi_alloc_ops.enabled) | |
5342 | + return; | |
5343 | + | |
5344 | + for (i = 0; i < TOI_ALLOC_PATHS; i++) | |
5345 | + if (atomic_read(&toi_alloc_count[i]) != | |
5346 | + atomic_read(&toi_free_count[i])) { | |
5347 | + if (!header_done) { | |
5348 | + printk(KERN_INFO "Idx Allocs Frees Tests " | |
7e46296a | 5349 | + " Fails Max Description\n"); |
2380c486 JR |
5350 | + header_done = 1; |
5351 | + } | |
5352 | + | |
5353 | + printk(KERN_INFO "%3d %7d %7d %7d %7d %7d %s\n", i, | |
5354 | + atomic_read(&toi_alloc_count[i]), | |
5355 | + atomic_read(&toi_free_count[i]), | |
5356 | + atomic_read(&toi_test_count[i]), | |
5357 | + atomic_read(&toi_fail_count[i]), | |
5358 | + toi_max_allocd[i], | |
5359 | + toi_alloc_desc[i]); | |
5360 | + } | |
5361 | +} | |
5362 | +EXPORT_SYMBOL_GPL(toi_alloc_print_debug_stats); | |
5363 | + | |
5364 | +static int toi_alloc_initialise(int starting_cycle) | |
5365 | +{ | |
5366 | + int i; | |
5367 | + | |
7e46296a AM |
5368 | + if (!starting_cycle) |
5369 | + return 0; | |
5370 | + | |
5371 | + for (i = 0; i < TOI_ALLOC_PATHS; i++) { | |
5372 | + atomic_set(&toi_alloc_count[i], 0); | |
5373 | + atomic_set(&toi_free_count[i], 0); | |
5374 | + atomic_set(&toi_test_count[i], 0); | |
5375 | + atomic_set(&toi_fail_count[i], 0); | |
5376 | + toi_cur_allocd[i] = 0; | |
5377 | + toi_max_allocd[i] = 0; | |
5378 | + }; | |
2380c486 | 5379 | + |
7e46296a AM |
5380 | + max_allocd = 0; |
5381 | + cur_allocd = 0; | |
2380c486 JR |
5382 | + return 0; |
5383 | +} | |
5384 | + | |
5385 | +static struct toi_sysfs_data sysfs_params[] = { | |
5386 | + SYSFS_INT("failure_test", SYSFS_RW, &toi_fail_num, 0, 99, 0, NULL), | |
7e46296a AM |
5387 | + SYSFS_INT("trace", SYSFS_RW, &trace_allocs, 0, TOI_ALLOC_PATHS, 0, |
5388 | + NULL), | |
2380c486 JR |
5389 | + SYSFS_BIT("find_max_mem_allocated", SYSFS_RW, &toi_bkd.toi_action, |
5390 | + TOI_GET_MAX_MEM_ALLOCD, 0), | |
5391 | + SYSFS_INT("enabled", SYSFS_RW, &toi_alloc_ops.enabled, 0, 1, 0, | |
5392 | + NULL) | |
5393 | +}; | |
5394 | + | |
5395 | +static struct toi_module_ops toi_alloc_ops = { | |
5396 | + .type = MISC_HIDDEN_MODULE, | |
5397 | + .name = "allocation debugging", | |
5398 | + .directory = "alloc", | |
5399 | + .module = THIS_MODULE, | |
5400 | + .early = 1, | |
5401 | + .initialise = toi_alloc_initialise, | |
5402 | + | |
5403 | + .sysfs_data = sysfs_params, | |
5404 | + .num_sysfs_entries = sizeof(sysfs_params) / | |
5405 | + sizeof(struct toi_sysfs_data), | |
5406 | +}; | |
5407 | + | |
5408 | +int toi_alloc_init(void) | |
5409 | +{ | |
5410 | + int result = toi_register_module(&toi_alloc_ops); | |
2380c486 JR |
5411 | + return result; |
5412 | +} | |
5413 | + | |
5414 | +void toi_alloc_exit(void) | |
5415 | +{ | |
5416 | + toi_unregister_module(&toi_alloc_ops); | |
5417 | +} | |
5418 | +#endif | |
5419 | diff --git a/kernel/power/tuxonice_alloc.h b/kernel/power/tuxonice_alloc.h | |
5420 | new file mode 100644 | |
de6743ae | 5421 | index 0000000..77e0f0d |
2380c486 JR |
5422 | --- /dev/null |
5423 | +++ b/kernel/power/tuxonice_alloc.h | |
de6743ae | 5424 | @@ -0,0 +1,52 @@ |
2380c486 JR |
5425 | +/* |
5426 | + * kernel/power/tuxonice_alloc.h | |
5427 | + * | |
5dd10c98 | 5428 | + * Copyright (C) 2008-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
5429 | + * |
5430 | + * This file is released under the GPLv2. | |
5431 | + * | |
5432 | + */ | |
5433 | + | |
de6743ae | 5434 | +#include <linux/slab.h> |
5dd10c98 | 5435 | +#define TOI_WAIT_GFP (GFP_NOFS | __GFP_NOWARN) |
2380c486 JR |
5436 | +#define TOI_ATOMIC_GFP (GFP_ATOMIC | __GFP_NOWARN) |
5437 | + | |
5438 | +#ifdef CONFIG_PM_DEBUG | |
5439 | +extern void *toi_kzalloc(int fail_num, size_t size, gfp_t flags); | |
9474138d | 5440 | +extern void toi_kfree(int fail_num, const void *arg, int size); |
2380c486 JR |
5441 | + |
5442 | +extern unsigned long toi_get_free_pages(int fail_num, gfp_t mask, | |
5443 | + unsigned int order); | |
5444 | +#define toi_get_free_page(FAIL_NUM, MASK) toi_get_free_pages(FAIL_NUM, MASK, 0) | |
5445 | +extern unsigned long toi_get_zeroed_page(int fail_num, gfp_t mask); | |
5446 | +extern void toi_free_page(int fail_num, unsigned long buf); | |
5447 | +extern void toi__free_page(int fail_num, struct page *page); | |
5448 | +extern void toi_free_pages(int fail_num, struct page *page, int order); | |
5449 | +extern struct page *toi_alloc_page(int fail_num, gfp_t mask); | |
5450 | +extern int toi_alloc_init(void); | |
5451 | +extern void toi_alloc_exit(void); | |
5452 | + | |
5453 | +extern void toi_alloc_print_debug_stats(void); | |
5454 | + | |
5455 | +#else /* CONFIG_PM_DEBUG */ | |
5456 | + | |
5457 | +#define toi_kzalloc(FAIL, SIZE, FLAGS) (kzalloc(SIZE, FLAGS)) | |
9474138d | 5458 | +#define toi_kfree(FAIL, ALLOCN, SIZE) (kfree(ALLOCN)) |
2380c486 JR |
5459 | + |
5460 | +#define toi_get_free_pages(FAIL, FLAGS, ORDER) __get_free_pages(FLAGS, ORDER) | |
5461 | +#define toi_get_free_page(FAIL, FLAGS) __get_free_page(FLAGS) | |
5462 | +#define toi_get_zeroed_page(FAIL, FLAGS) get_zeroed_page(FLAGS) | |
5463 | +#define toi_free_page(FAIL, ALLOCN) do { free_page(ALLOCN); } while (0) | |
5464 | +#define toi__free_page(FAIL, PAGE) __free_page(PAGE) | |
5465 | +#define toi_free_pages(FAIL, PAGE, ORDER) __free_pages(PAGE, ORDER) | |
5466 | +#define toi_alloc_page(FAIL, MASK) alloc_page(MASK) | |
5467 | +static inline int toi_alloc_init(void) | |
5468 | +{ | |
5469 | + return 0; | |
5470 | +} | |
5471 | + | |
5472 | +static inline void toi_alloc_exit(void) { } | |
5473 | + | |
5474 | +static inline void toi_alloc_print_debug_stats(void) { } | |
5475 | + | |
5476 | +#endif | |
5477 | diff --git a/kernel/power/tuxonice_atomic_copy.c b/kernel/power/tuxonice_atomic_copy.c | |
5478 | new file mode 100644 | |
85eb3c9d | 5479 | index 0000000..1e96e26 |
2380c486 JR |
5480 | --- /dev/null |
5481 | +++ b/kernel/power/tuxonice_atomic_copy.c | |
85eb3c9d | 5482 | @@ -0,0 +1,429 @@ |
2380c486 JR |
5483 | +/* |
5484 | + * kernel/power/tuxonice_atomic_copy.c | |
5485 | + * | |
5dd10c98 | 5486 | + * Copyright 2004-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
5487 | + * |
5488 | + * Distributed under GPLv2. | |
5489 | + * | |
5490 | + * Routines for doing the atomic save/restore. | |
5491 | + */ | |
5492 | + | |
5493 | +#include <linux/suspend.h> | |
5494 | +#include <linux/highmem.h> | |
5495 | +#include <linux/cpu.h> | |
5496 | +#include <linux/freezer.h> | |
5497 | +#include <linux/console.h> | |
9474138d | 5498 | +#include <asm/suspend.h> |
2380c486 JR |
5499 | +#include "tuxonice.h" |
5500 | +#include "tuxonice_storage.h" | |
5501 | +#include "tuxonice_power_off.h" | |
5502 | +#include "tuxonice_ui.h" | |
2380c486 JR |
5503 | +#include "tuxonice_io.h" |
5504 | +#include "tuxonice_prepare_image.h" | |
5505 | +#include "tuxonice_pageflags.h" | |
5506 | +#include "tuxonice_checksum.h" | |
5507 | +#include "tuxonice_builtin.h" | |
5508 | +#include "tuxonice_atomic_copy.h" | |
5509 | +#include "tuxonice_alloc.h" | |
5dd10c98 | 5510 | +#include "tuxonice_modules.h" |
2380c486 | 5511 | + |
92bca44c | 5512 | +unsigned long extra_pd1_pages_used; |
2380c486 JR |
5513 | + |
5514 | +/** | |
5515 | + * free_pbe_list - free page backup entries used by the atomic copy code. | |
5516 | + * @list: List to free. | |
5517 | + * @highmem: Whether the list is in highmem. | |
5518 | + * | |
5519 | + * Normally, this function isn't used. If, however, we need to abort before | |
5520 | + * doing the atomic copy, we use this to free the pbes previously allocated. | |
5521 | + **/ | |
5522 | +static void free_pbe_list(struct pbe **list, int highmem) | |
5523 | +{ | |
5524 | + while (*list) { | |
5525 | + int i; | |
5526 | + struct pbe *free_pbe, *next_page = NULL; | |
5527 | + struct page *page; | |
5528 | + | |
5529 | + if (highmem) { | |
5530 | + page = (struct page *) *list; | |
5531 | + free_pbe = (struct pbe *) kmap(page); | |
5532 | + } else { | |
5533 | + page = virt_to_page(*list); | |
5534 | + free_pbe = *list; | |
5535 | + } | |
5536 | + | |
5537 | + for (i = 0; i < PBES_PER_PAGE; i++) { | |
5538 | + if (!free_pbe) | |
5539 | + break; | |
5540 | + if (highmem) | |
5541 | + toi__free_page(29, free_pbe->address); | |
5542 | + else | |
5543 | + toi_free_page(29, | |
5544 | + (unsigned long) free_pbe->address); | |
5545 | + free_pbe = free_pbe->next; | |
5546 | + } | |
5547 | + | |
5548 | + if (highmem) { | |
5549 | + if (free_pbe) | |
5550 | + next_page = free_pbe; | |
5551 | + kunmap(page); | |
5552 | + } else { | |
5553 | + if (free_pbe) | |
5554 | + next_page = free_pbe; | |
5555 | + } | |
5556 | + | |
5557 | + toi__free_page(29, page); | |
5558 | + *list = (struct pbe *) next_page; | |
5559 | + }; | |
5560 | +} | |
5561 | + | |
5562 | +/** | |
5563 | + * copyback_post - post atomic-restore actions | |
5564 | + * | |
5565 | + * After doing the atomic restore, we have a few more things to do: | |
5566 | + * 1) We want to retain some values across the restore, so we now copy | |
5567 | + * these from the nosave variables to the normal ones. | |
5568 | + * 2) Set the status flags. | |
5569 | + * 3) Resume devices. | |
5570 | + * 4) Tell userui so it can redraw & restore settings. | |
5571 | + * 5) Reread the page cache. | |
5572 | + **/ | |
5573 | +void copyback_post(void) | |
5574 | +{ | |
5575 | + struct toi_boot_kernel_data *bkd = | |
5576 | + (struct toi_boot_kernel_data *) boot_kernel_data_buffer; | |
5577 | + | |
5578 | + /* | |
5579 | + * The boot kernel's data may be larger (newer version) or | |
5580 | + * smaller (older version) than ours. Copy the minimum | |
5581 | + * of the two sizes, so that we don't overwrite valid values | |
5582 | + * from pre-atomic copy. | |
5583 | + */ | |
5584 | + | |
5585 | + memcpy(&toi_bkd, (char *) boot_kernel_data_buffer, | |
5586 | + min_t(int, sizeof(struct toi_boot_kernel_data), | |
5587 | + bkd->size)); | |
5588 | + | |
5589 | + if (toi_activate_storage(1)) | |
5590 | + panic("Failed to reactivate our storage."); | |
5591 | + | |
5dd10c98 | 5592 | + toi_post_atomic_restore_modules(bkd); |
2380c486 JR |
5593 | + |
5594 | + toi_cond_pause(1, "About to reload secondary pagedir."); | |
5595 | + | |
5596 | + if (read_pageset2(0)) | |
5597 | + panic("Unable to successfully reread the page cache."); | |
5598 | + | |
5599 | + /* | |
5600 | + * If the user wants to sleep again after resuming from full-off, | |
5601 | + * it's most likely to be in order to suspend to ram, so we'll | |
5602 | + * do this check after loading pageset2, to give them the fastest | |
5603 | + * wakeup when they are ready to use the computer again. | |
5604 | + */ | |
5605 | + toi_check_resleep(); | |
5606 | +} | |
5607 | + | |
5608 | +/** | |
5609 | + * toi_copy_pageset1 - do the atomic copy of pageset1 | |
5610 | + * | |
5611 | + * Make the atomic copy of pageset1. We can't use copy_page (as we once did) | |
5612 | + * because we can't be sure what side effects it has. On my old Duron, with | |
5613 | + * 3DNOW, kernel_fpu_begin increments preempt count, making our preempt | |
5614 | + * count at resume time 4 instead of 3. | |
5615 | + * | |
5616 | + * We don't want to call kmap_atomic unconditionally because it has the side | |
5617 | + * effect of incrementing the preempt count, which will leave it one too high | |
5618 | + * post resume (the page containing the preempt count will be copied after | |
5619 | + * its incremented. This is essentially the same problem. | |
5620 | + **/ | |
5621 | +void toi_copy_pageset1(void) | |
5622 | +{ | |
5623 | + int i; | |
5624 | + unsigned long source_index, dest_index; | |
5625 | + | |
5626 | + memory_bm_position_reset(pageset1_map); | |
5627 | + memory_bm_position_reset(pageset1_copy_map); | |
5628 | + | |
5629 | + source_index = memory_bm_next_pfn(pageset1_map); | |
5630 | + dest_index = memory_bm_next_pfn(pageset1_copy_map); | |
5631 | + | |
5632 | + for (i = 0; i < pagedir1.size; i++) { | |
5633 | + unsigned long *origvirt, *copyvirt; | |
5634 | + struct page *origpage, *copypage; | |
5635 | + int loop = (PAGE_SIZE / sizeof(unsigned long)) - 1, | |
5636 | + was_present1, was_present2; | |
5637 | + | |
5638 | + origpage = pfn_to_page(source_index); | |
5639 | + copypage = pfn_to_page(dest_index); | |
5640 | + | |
5641 | + origvirt = PageHighMem(origpage) ? | |
5642 | + kmap_atomic(origpage, KM_USER0) : | |
5643 | + page_address(origpage); | |
5644 | + | |
5645 | + copyvirt = PageHighMem(copypage) ? | |
5646 | + kmap_atomic(copypage, KM_USER1) : | |
5647 | + page_address(copypage); | |
5648 | + | |
5649 | + was_present1 = kernel_page_present(origpage); | |
5650 | + if (!was_present1) | |
5651 | + kernel_map_pages(origpage, 1, 1); | |
5652 | + | |
5653 | + was_present2 = kernel_page_present(copypage); | |
5654 | + if (!was_present2) | |
5655 | + kernel_map_pages(copypage, 1, 1); | |
5656 | + | |
5657 | + while (loop >= 0) { | |
5658 | + *(copyvirt + loop) = *(origvirt + loop); | |
5659 | + loop--; | |
5660 | + } | |
5661 | + | |
5662 | + if (!was_present1) | |
5663 | + kernel_map_pages(origpage, 1, 0); | |
5664 | + | |
5665 | + if (!was_present2) | |
5666 | + kernel_map_pages(copypage, 1, 0); | |
5667 | + | |
5668 | + if (PageHighMem(origpage)) | |
5669 | + kunmap_atomic(origvirt, KM_USER0); | |
5670 | + | |
5671 | + if (PageHighMem(copypage)) | |
5672 | + kunmap_atomic(copyvirt, KM_USER1); | |
5673 | + | |
5674 | + source_index = memory_bm_next_pfn(pageset1_map); | |
5675 | + dest_index = memory_bm_next_pfn(pageset1_copy_map); | |
5676 | + } | |
5677 | +} | |
5678 | + | |
5679 | +/** | |
5680 | + * __toi_post_context_save - steps after saving the cpu context | |
5681 | + * | |
5682 | + * Steps taken after saving the CPU state to make the actual | |
5683 | + * atomic copy. | |
5684 | + * | |
5685 | + * Called from swsusp_save in snapshot.c via toi_post_context_save. | |
5686 | + **/ | |
5687 | +int __toi_post_context_save(void) | |
5688 | +{ | |
92bca44c | 5689 | + unsigned long old_ps1_size = pagedir1.size; |
2380c486 JR |
5690 | + |
5691 | + check_checksums(); | |
5692 | + | |
5693 | + free_checksum_pages(); | |
5694 | + | |
5695 | + toi_recalculate_image_contents(1); | |
5696 | + | |
92bca44c AM |
5697 | + extra_pd1_pages_used = pagedir1.size > old_ps1_size ? |
5698 | + pagedir1.size - old_ps1_size : 0; | |
2380c486 JR |
5699 | + |
5700 | + if (extra_pd1_pages_used > extra_pd1_pages_allowance) { | |
92bca44c | 5701 | + printk(KERN_INFO "Pageset1 has grown by %lu pages. " |
2380c486 JR |
5702 | + "extra_pages_allowance is currently only %lu.\n", |
5703 | + pagedir1.size - old_ps1_size, | |
5704 | + extra_pd1_pages_allowance); | |
5705 | + | |
5706 | + /* | |
5707 | + * Highlevel code will see this, clear the state and | |
5708 | + * retry if we haven't already done so twice. | |
5709 | + */ | |
85eb3c9d AM |
5710 | + if (any_to_free(1)) { |
5711 | + set_abort_result(TOI_EXTRA_PAGES_ALLOW_TOO_SMALL); | |
5712 | + return 1; | |
5713 | + } | |
5714 | + if (try_allocate_extra_memory()) { | |
5715 | + printk(KERN_INFO "Failed to allocate the extra memory" | |
5716 | + " needed. Restarting the process."); | |
5717 | + set_abort_result(TOI_EXTRA_PAGES_ALLOW_TOO_SMALL); | |
5718 | + return 1; | |
5719 | + } | |
5720 | + printk(KERN_INFO "However it looks like there's enough" | |
5721 | + " free ram and storage to handle this, so " | |
5722 | + " continuing anyway."); | |
2380c486 JR |
5723 | + } |
5724 | + | |
5725 | + if (!test_action_state(TOI_TEST_FILTER_SPEED) && | |
5726 | + !test_action_state(TOI_TEST_BIO)) | |
5727 | + toi_copy_pageset1(); | |
5728 | + | |
5729 | + return 0; | |
5730 | +} | |
5731 | + | |
5732 | +/** | |
5733 | + * toi_hibernate - high level code for doing the atomic copy | |
5734 | + * | |
5735 | + * High-level code which prepares to do the atomic copy. Loosely based | |
5736 | + * on the swsusp version, but with the following twists: | |
5737 | + * - We set toi_running so the swsusp code uses our code paths. | |
5738 | + * - We give better feedback regarding what goes wrong if there is a | |
5739 | + * problem. | |
5740 | + * - We use an extra function to call the assembly, just in case this code | |
5741 | + * is in a module (return address). | |
5742 | + **/ | |
5743 | +int toi_hibernate(void) | |
5744 | +{ | |
5745 | + int error; | |
5746 | + | |
5747 | + toi_running = 1; /* For the swsusp code we use :< */ | |
5748 | + | |
5749 | + error = toi_lowlevel_builtin(); | |
5750 | + | |
5751 | + toi_running = 0; | |
5752 | + return error; | |
5753 | +} | |
5754 | + | |
5755 | +/** | |
5756 | + * toi_atomic_restore - prepare to do the atomic restore | |
5757 | + * | |
5758 | + * Get ready to do the atomic restore. This part gets us into the same | |
5759 | + * state we are in prior to do calling do_toi_lowlevel while | |
5760 | + * hibernating: hot-unplugging secondary cpus and freeze processes, | |
5761 | + * before starting the thread that will do the restore. | |
5762 | + **/ | |
5763 | +int toi_atomic_restore(void) | |
5764 | +{ | |
5765 | + int error; | |
5766 | + | |
5767 | + toi_running = 1; | |
5768 | + | |
5769 | + toi_prepare_status(DONT_CLEAR_BAR, "Atomic restore."); | |
5770 | + | |
5771 | + memcpy(&toi_bkd.toi_nosave_commandline, saved_command_line, | |
5772 | + strlen(saved_command_line)); | |
5773 | + | |
5dd10c98 AM |
5774 | + toi_pre_atomic_restore_modules(&toi_bkd); |
5775 | + | |
2380c486 JR |
5776 | + if (add_boot_kernel_data_pbe()) |
5777 | + goto Failed; | |
5778 | + | |
5779 | + toi_prepare_status(DONT_CLEAR_BAR, "Doing atomic copy/restore."); | |
5780 | + | |
5781 | + if (toi_go_atomic(PMSG_QUIESCE, 0)) | |
5782 | + goto Failed; | |
5783 | + | |
5784 | + /* We'll ignore saved state, but this gets preempt count (etc) right */ | |
5785 | + save_processor_state(); | |
5786 | + | |
5787 | + error = swsusp_arch_resume(); | |
5788 | + /* | |
5789 | + * Code below is only ever reached in case of failure. Otherwise | |
5790 | + * execution continues at place where swsusp_arch_suspend was called. | |
5791 | + * | |
5792 | + * We don't know whether it's safe to continue (this shouldn't happen), | |
5793 | + * so lets err on the side of caution. | |
5794 | + */ | |
5795 | + BUG(); | |
5796 | + | |
5797 | +Failed: | |
5798 | + free_pbe_list(&restore_pblist, 0); | |
5799 | +#ifdef CONFIG_HIGHMEM | |
5800 | + free_pbe_list(&restore_highmem_pblist, 1); | |
5801 | +#endif | |
2380c486 JR |
5802 | + toi_running = 0; |
5803 | + return 1; | |
5804 | +} | |
5805 | + | |
5806 | +/** | |
5807 | + * toi_go_atomic - do the actual atomic copy/restore | |
92bca44c AM |
5808 | + * @state: The state to use for dpm_suspend_start & power_down calls. |
5809 | + * @suspend_time: Whether we're suspending or resuming. | |
2380c486 JR |
5810 | + **/ |
5811 | +int toi_go_atomic(pm_message_t state, int suspend_time) | |
5812 | +{ | |
5813 | + if (suspend_time && platform_begin(1)) { | |
5814 | + set_abort_result(TOI_PLATFORM_PREP_FAILED); | |
2380c486 JR |
5815 | + return 1; |
5816 | + } | |
5817 | + | |
5818 | + suspend_console(); | |
5819 | + | |
92bca44c | 5820 | + if (dpm_suspend_start(state)) { |
2380c486 JR |
5821 | + set_abort_result(TOI_DEVICE_REFUSED); |
5822 | + toi_end_atomic(ATOMIC_STEP_DEVICE_RESUME, suspend_time, 3); | |
5823 | + return 1; | |
5824 | + } | |
5825 | + | |
9474138d AM |
5826 | + if (suspend_time && arch_prepare_suspend()) { |
5827 | + set_abort_result(TOI_ARCH_PREPARE_FAILED); | |
5828 | + toi_end_atomic(ATOMIC_STEP_DEVICE_RESUME, suspend_time, 1); | |
5829 | + return 1; | |
5830 | + } | |
5831 | + | |
92bca44c AM |
5832 | + /* At this point, dpm_suspend_start() has been called, but *not* |
5833 | + * dpm_suspend_noirq(). We *must* dpm_suspend_noirq() now. | |
9474138d AM |
5834 | + * Otherwise, drivers for some devices (e.g. interrupt controllers) |
5835 | + * become desynchronized with the actual state of the hardware | |
5836 | + * at resume time, and evil weirdness ensues. | |
5837 | + */ | |
5838 | + | |
92bca44c | 5839 | + if (dpm_suspend_noirq(state)) { |
9474138d AM |
5840 | + set_abort_result(TOI_DEVICE_REFUSED); |
5841 | + toi_end_atomic(ATOMIC_STEP_DEVICE_RESUME, suspend_time, 1); | |
5842 | + return 1; | |
5843 | + } | |
5844 | + | |
2380c486 JR |
5845 | + if (suspend_time && platform_pre_snapshot(1)) { |
5846 | + set_abort_result(TOI_PRE_SNAPSHOT_FAILED); | |
9474138d | 5847 | + toi_end_atomic(ATOMIC_STEP_PLATFORM_FINISH, suspend_time, 1); |
2380c486 JR |
5848 | + return 1; |
5849 | + } | |
5850 | + | |
5851 | + if (!suspend_time && platform_pre_restore(1)) { | |
5852 | + set_abort_result(TOI_PRE_RESTORE_FAILED); | |
9474138d | 5853 | + toi_end_atomic(ATOMIC_STEP_PLATFORM_FINISH, suspend_time, 1); |
2380c486 JR |
5854 | + return 1; |
5855 | + } | |
5856 | + | |
5857 | + if (test_action_state(TOI_LATE_CPU_HOTPLUG)) { | |
5858 | + if (disable_nonboot_cpus()) { | |
5859 | + set_abort_result(TOI_CPU_HOTPLUG_FAILED); | |
5860 | + toi_end_atomic(ATOMIC_STEP_CPU_HOTPLUG, | |
9474138d | 5861 | + suspend_time, 1); |
2380c486 JR |
5862 | + return 1; |
5863 | + } | |
5864 | + } | |
5865 | + | |
2380c486 JR |
5866 | + local_irq_disable(); |
5867 | + | |
2380c486 JR |
5868 | + if (sysdev_suspend(state)) { |
5869 | + set_abort_result(TOI_SYSDEV_REFUSED); | |
9474138d | 5870 | + toi_end_atomic(ATOMIC_STEP_IRQS, suspend_time, 1); |
2380c486 JR |
5871 | + return 1; |
5872 | + } | |
5873 | + | |
5874 | + return 0; | |
5875 | +} | |
5876 | + | |
5877 | +/** | |
5878 | + * toi_end_atomic - post atomic copy/restore routines | |
5879 | + * @stage: What step to start at. | |
5880 | + * @suspend_time: Whether we're suspending or resuming. | |
5881 | + * @error: Whether we're recovering from an error. | |
5882 | + **/ | |
5883 | +void toi_end_atomic(int stage, int suspend_time, int error) | |
5884 | +{ | |
5885 | + switch (stage) { | |
5886 | + case ATOMIC_ALL_STEPS: | |
5887 | + if (!suspend_time) | |
5888 | + platform_leave(1); | |
5889 | + sysdev_resume(); | |
2380c486 JR |
5890 | + case ATOMIC_STEP_IRQS: |
5891 | + local_irq_enable(); | |
2380c486 JR |
5892 | + case ATOMIC_STEP_CPU_HOTPLUG: |
5893 | + if (test_action_state(TOI_LATE_CPU_HOTPLUG)) | |
5894 | + enable_nonboot_cpus(); | |
9474138d | 5895 | + platform_restore_cleanup(1); |
2380c486 JR |
5896 | + case ATOMIC_STEP_PLATFORM_FINISH: |
5897 | + platform_finish(1); | |
92bca44c | 5898 | + dpm_resume_noirq(suspend_time ? |
9474138d | 5899 | + (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); |
2380c486 JR |
5900 | + case ATOMIC_STEP_DEVICE_RESUME: |
5901 | + if (suspend_time && (error & 2)) | |
5902 | + platform_recover(1); | |
92bca44c | 5903 | + dpm_resume_end(suspend_time ? |
2380c486 JR |
5904 | + ((error & 1) ? PMSG_RECOVER : PMSG_THAW) : |
5905 | + PMSG_RESTORE); | |
2380c486 | 5906 | + resume_console(); |
2380c486 JR |
5907 | + platform_end(1); |
5908 | + | |
5909 | + toi_prepare_status(DONT_CLEAR_BAR, "Post atomic."); | |
5910 | + } | |
5911 | +} | |
5912 | diff --git a/kernel/power/tuxonice_atomic_copy.h b/kernel/power/tuxonice_atomic_copy.h | |
5913 | new file mode 100644 | |
5dd10c98 | 5914 | index 0000000..e61b27b |
2380c486 JR |
5915 | --- /dev/null |
5916 | +++ b/kernel/power/tuxonice_atomic_copy.h | |
9474138d | 5917 | @@ -0,0 +1,20 @@ |
2380c486 JR |
5918 | +/* |
5919 | + * kernel/power/tuxonice_atomic_copy.h | |
5920 | + * | |
5dd10c98 | 5921 | + * Copyright 2008-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
5922 | + * |
5923 | + * Distributed under GPLv2. | |
5924 | + * | |
5925 | + * Routines for doing the atomic save/restore. | |
5926 | + */ | |
5927 | + | |
5928 | +enum { | |
5929 | + ATOMIC_ALL_STEPS, | |
2380c486 JR |
5930 | + ATOMIC_STEP_IRQS, |
5931 | + ATOMIC_STEP_CPU_HOTPLUG, | |
5932 | + ATOMIC_STEP_PLATFORM_FINISH, | |
5933 | + ATOMIC_STEP_DEVICE_RESUME, | |
2380c486 JR |
5934 | +}; |
5935 | + | |
5936 | +int toi_go_atomic(pm_message_t state, int toi_time); | |
5937 | +void toi_end_atomic(int stage, int toi_time, int error); | |
7e46296a | 5938 | diff --git a/kernel/power/tuxonice_bio.h b/kernel/power/tuxonice_bio.h |
2380c486 | 5939 | new file mode 100644 |
5dd10c98 | 5940 | index 0000000..9627ccc |
2380c486 | 5941 | --- /dev/null |
7e46296a | 5942 | +++ b/kernel/power/tuxonice_bio.h |
5dd10c98 | 5943 | @@ -0,0 +1,77 @@ |
2380c486 | 5944 | +/* |
7e46296a | 5945 | + * kernel/power/tuxonice_bio.h |
2380c486 | 5946 | + * |
5dd10c98 | 5947 | + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
5948 | + * |
5949 | + * Distributed under GPLv2. | |
5950 | + * | |
7e46296a AM |
5951 | + * This file contains declarations for functions exported from |
5952 | + * tuxonice_bio.c, which contains low level io functions. | |
2380c486 JR |
5953 | + */ |
5954 | + | |
7e46296a AM |
5955 | +#include <linux/buffer_head.h> |
5956 | +#include "tuxonice_extent.h" | |
2380c486 | 5957 | + |
7e46296a AM |
5958 | +void toi_put_extent_chain(struct hibernate_extent_chain *chain); |
5959 | +int toi_add_to_extent_chain(struct hibernate_extent_chain *chain, | |
5960 | + unsigned long start, unsigned long end); | |
2380c486 | 5961 | + |
7e46296a AM |
5962 | +struct hibernate_extent_saved_state { |
5963 | + int extent_num; | |
5964 | + struct hibernate_extent *extent_ptr; | |
5965 | + unsigned long offset; | |
5966 | +}; | |
2380c486 | 5967 | + |
7e46296a AM |
5968 | +struct toi_bdev_info { |
5969 | + struct toi_bdev_info *next; | |
5970 | + struct hibernate_extent_chain blocks; | |
5971 | + struct block_device *bdev; | |
5972 | + struct toi_module_ops *allocator; | |
5973 | + int allocator_index; | |
5974 | + struct hibernate_extent_chain allocations; | |
5dd10c98 | 5975 | + char name[266]; /* "swap on " or "file " + up to 256 chars */ |
2380c486 | 5976 | + |
7e46296a AM |
5977 | + /* Saved in header */ |
5978 | + char uuid[17]; | |
5979 | + dev_t dev_t; | |
5980 | + int prio; | |
5981 | + int bmap_shift; | |
5982 | + int blocks_per_page; | |
5dd10c98 | 5983 | + unsigned long pages_used; |
7e46296a AM |
5984 | + struct hibernate_extent_saved_state saved_state[4]; |
5985 | +}; | |
2380c486 | 5986 | + |
7e46296a AM |
5987 | +struct toi_extent_iterate_state { |
5988 | + struct toi_bdev_info *current_chain; | |
5989 | + int num_chains; | |
5990 | + int saved_chain_number[4]; | |
5991 | + struct toi_bdev_info *saved_chain_ptr[4]; | |
5992 | +}; | |
2380c486 | 5993 | + |
7e46296a AM |
5994 | +/* |
5995 | + * Our exported interface so the swapwriter and filewriter don't | |
5996 | + * need these functions duplicated. | |
5997 | + */ | |
5998 | +struct toi_bio_ops { | |
5999 | + int (*bdev_page_io) (int rw, struct block_device *bdev, long pos, | |
6000 | + struct page *page); | |
6001 | + int (*register_storage)(struct toi_bdev_info *new); | |
6002 | + void (*free_storage)(void); | |
6003 | +}; | |
2380c486 | 6004 | + |
7e46296a AM |
6005 | +struct toi_allocator_ops { |
6006 | + unsigned long (*toi_swap_storage_available) (void); | |
6007 | +}; | |
2380c486 | 6008 | + |
7e46296a | 6009 | +extern struct toi_bio_ops toi_bio_ops; |
2380c486 | 6010 | + |
7e46296a AM |
6011 | +extern char *toi_writer_buffer; |
6012 | +extern int toi_writer_buffer_posn; | |
6013 | + | |
6014 | +struct toi_bio_allocator_ops { | |
6015 | + int (*register_storage) (void); | |
6016 | + unsigned long (*storage_available)(void); | |
6017 | + int (*allocate_storage) (struct toi_bdev_info *, unsigned long); | |
6018 | + int (*bmap) (struct toi_bdev_info *); | |
6019 | + void (*free_storage) (struct toi_bdev_info *); | |
6020 | +}; | |
6021 | diff --git a/kernel/power/tuxonice_bio_chains.c b/kernel/power/tuxonice_bio_chains.c | |
6022 | new file mode 100644 | |
85eb3c9d | 6023 | index 0000000..c85f078 |
7e46296a AM |
6024 | --- /dev/null |
6025 | +++ b/kernel/power/tuxonice_bio_chains.c | |
85eb3c9d | 6026 | @@ -0,0 +1,1049 @@ |
7e46296a AM |
6027 | +/* |
6028 | + * kernel/power/tuxonice_bio_devinfo.c | |
6029 | + * | |
5dd10c98 | 6030 | + * Copyright (C) 2009-2010 Nigel Cunningham (nigel at tuxonice net) |
7e46296a AM |
6031 | + * |
6032 | + * Distributed under GPLv2. | |
6033 | + * | |
6034 | + */ | |
2380c486 | 6035 | + |
7e46296a AM |
6036 | +#include <linux/mm_types.h> |
6037 | +#include "tuxonice_bio.h" | |
6038 | +#include "tuxonice_bio_internal.h" | |
6039 | +#include "tuxonice_alloc.h" | |
6040 | +#include "tuxonice_ui.h" | |
6041 | +#include "tuxonice.h" | |
6042 | +#include "tuxonice_io.h" | |
2380c486 | 6043 | + |
7e46296a AM |
6044 | +static struct toi_bdev_info *prio_chain_head; |
6045 | +static int num_chains; | |
2380c486 JR |
6046 | + |
6047 | +/* Pointer to current entry being loaded/saved. */ | |
6048 | +struct toi_extent_iterate_state toi_writer_posn; | |
2380c486 | 6049 | + |
7e46296a AM |
6050 | +#define metadata_size (sizeof(struct toi_bdev_info) - \ |
6051 | + offsetof(struct toi_bdev_info, uuid)) | |
2380c486 | 6052 | + |
7e46296a AM |
6053 | +/* |
6054 | + * After section 0 (header) comes 2 => next_section[0] = 2 | |
6055 | + */ | |
6056 | +static int next_section[3] = { 2, 3, 1 }; | |
6057 | + | |
6058 | +/** | |
6059 | + * dump_block_chains - print the contents of the bdev info array. | |
6060 | + **/ | |
6061 | +void dump_block_chains(void) | |
6062 | +{ | |
6063 | + int i = 0; | |
6064 | + int j; | |
6065 | + struct toi_bdev_info *cur_chain = prio_chain_head; | |
2380c486 | 6066 | + |
7e46296a AM |
6067 | + while (cur_chain) { |
6068 | + struct hibernate_extent *this = cur_chain->blocks.first; | |
2380c486 | 6069 | + |
7e46296a | 6070 | + printk(KERN_DEBUG "Chain %d (prio %d):", i, cur_chain->prio); |
2380c486 | 6071 | + |
7e46296a AM |
6072 | + while (this) { |
6073 | + printk(KERN_CONT " [%lu-%lu]%s", this->start, | |
6074 | + this->end, this->next ? "," : ""); | |
6075 | + this = this->next; | |
6076 | + } | |
2380c486 | 6077 | + |
7e46296a AM |
6078 | + printk("\n"); |
6079 | + cur_chain = cur_chain->next; | |
5dd10c98 | 6080 | + i++; |
7e46296a AM |
6081 | + } |
6082 | + | |
6083 | + printk(KERN_DEBUG "Saved states:\n"); | |
6084 | + for (i = 0; i < 4; i++) { | |
6085 | + printk(KERN_DEBUG "Slot %d: Chain %d.\n", | |
6086 | + i, toi_writer_posn.saved_chain_number[i]); | |
6087 | + | |
6088 | + cur_chain = prio_chain_head; | |
6089 | + j = 0; | |
6090 | + while (cur_chain) { | |
6091 | + printk(KERN_DEBUG " Chain %d: Extent %d. Offset %lu.\n", | |
6092 | + j, cur_chain->saved_state[i].extent_num, | |
6093 | + cur_chain->saved_state[i].offset); | |
6094 | + cur_chain = cur_chain->next; | |
6095 | + j++; | |
6096 | + } | |
6097 | + printk(KERN_CONT "\n"); | |
6098 | + } | |
6099 | +} | |
2380c486 JR |
6100 | + |
6101 | +/** | |
2380c486 | 6102 | + * |
2380c486 | 6103 | + **/ |
7e46296a | 6104 | +static void toi_extent_chain_next(void) |
2380c486 | 6105 | +{ |
7e46296a | 6106 | + struct toi_bdev_info *this = toi_writer_posn.current_chain; |
2380c486 | 6107 | + |
7e46296a AM |
6108 | + if (!this->blocks.current_extent) |
6109 | + return; | |
2380c486 | 6110 | + |
7e46296a AM |
6111 | + if (this->blocks.current_offset == this->blocks.current_extent->end) { |
6112 | + if (this->blocks.current_extent->next) { | |
6113 | + this->blocks.current_extent = | |
6114 | + this->blocks.current_extent->next; | |
6115 | + this->blocks.current_offset = | |
6116 | + this->blocks.current_extent->start; | |
6117 | + } else { | |
6118 | + this->blocks.current_extent = NULL; | |
6119 | + this->blocks.current_offset = 0; | |
6120 | + } | |
6121 | + } else | |
6122 | + this->blocks.current_offset++; | |
6123 | +} | |
2380c486 JR |
6124 | + |
6125 | +/** | |
2380c486 | 6126 | + * |
7e46296a AM |
6127 | + */ |
6128 | + | |
6129 | +static struct toi_bdev_info *__find_next_chain_same_prio(void) | |
2380c486 | 6130 | +{ |
7e46296a AM |
6131 | + struct toi_bdev_info *start_chain = toi_writer_posn.current_chain; |
6132 | + struct toi_bdev_info *this = start_chain; | |
6133 | + int orig_prio = this->prio; | |
2380c486 | 6134 | + |
7e46296a AM |
6135 | + do { |
6136 | + this = this->next; | |
2380c486 | 6137 | + |
7e46296a AM |
6138 | + if (!this) |
6139 | + this = prio_chain_head; | |
6140 | + | |
6141 | + /* Back on original chain? Use it again. */ | |
6142 | + if (this == start_chain) | |
6143 | + return start_chain; | |
6144 | + | |
6145 | + } while (!this->blocks.current_extent || this->prio != orig_prio); | |
6146 | + | |
6147 | + return this; | |
2380c486 JR |
6148 | +} |
6149 | + | |
7e46296a | 6150 | +static void find_next_chain(void) |
2380c486 | 6151 | +{ |
7e46296a | 6152 | + struct toi_bdev_info *this; |
2380c486 | 6153 | + |
7e46296a | 6154 | + this = __find_next_chain_same_prio(); |
2380c486 | 6155 | + |
7e46296a AM |
6156 | + /* |
6157 | + * If we didn't get another chain of the same priority that we | |
6158 | + * can use, look for the next priority. | |
6159 | + */ | |
6160 | + while (this && !this->blocks.current_extent) | |
6161 | + this = this->next; | |
2380c486 | 6162 | + |
7e46296a | 6163 | + toi_writer_posn.current_chain = this; |
2380c486 JR |
6164 | +} |
6165 | + | |
6166 | +/** | |
7e46296a AM |
6167 | + * toi_extent_state_next - go to the next extent |
6168 | + * @blocks: The number of values to progress. | |
6169 | + * @stripe_mode: Whether to spread usage across all chains. | |
2380c486 | 6170 | + * |
7e46296a AM |
6171 | + * Given a state, progress to the next valid entry. We may begin in an |
6172 | + * invalid state, as we do when invoked after extent_state_goto_start below. | |
2380c486 | 6173 | + * |
7e46296a AM |
6174 | + * When using compression and expected_compression > 0, we let the image size |
6175 | + * be larger than storage, so we can validly run out of data to return. | |
2380c486 | 6176 | + **/ |
7e46296a | 6177 | +static unsigned long toi_extent_state_next(int blocks, int current_stream) |
2380c486 | 6178 | +{ |
7e46296a AM |
6179 | + int i; |
6180 | + | |
6181 | + if (!toi_writer_posn.current_chain) | |
e876a0dd | 6182 | + return -ENOSPC; |
7e46296a AM |
6183 | + |
6184 | + /* Assume chains always have lengths that are multiples of @blocks */ | |
6185 | + for (i = 0; i < blocks; i++) | |
6186 | + toi_extent_chain_next(); | |
6187 | + | |
6188 | + /* The header stream is not striped */ | |
6189 | + if (current_stream || | |
6190 | + !toi_writer_posn.current_chain->blocks.current_extent) | |
6191 | + find_next_chain(); | |
6192 | + | |
e876a0dd | 6193 | + return toi_writer_posn.current_chain ? 0 : -ENOSPC; |
7e46296a AM |
6194 | +} |
6195 | + | |
6196 | +static void toi_insert_chain_in_prio_list(struct toi_bdev_info *this) | |
6197 | +{ | |
6198 | + struct toi_bdev_info **prev_ptr; | |
6199 | + struct toi_bdev_info *cur; | |
6200 | + | |
6201 | + /* Loop through the existing chain, finding where to insert it */ | |
6202 | + prev_ptr = &prio_chain_head; | |
6203 | + cur = prio_chain_head; | |
6204 | + | |
6205 | + while (cur && cur->prio >= this->prio) { | |
6206 | + prev_ptr = &cur->next; | |
6207 | + cur = cur->next; | |
6208 | + } | |
6209 | + | |
6210 | + this->next = *prev_ptr; | |
6211 | + *prev_ptr = this; | |
6212 | + | |
6213 | + this = prio_chain_head; | |
6214 | + while (this) | |
6215 | + this = this->next; | |
6216 | + num_chains++; | |
2380c486 JR |
6217 | +} |
6218 | + | |
6219 | +/** | |
7e46296a AM |
6220 | + * toi_extent_state_goto_start - reinitialize an extent chain iterator |
6221 | + * @state: Iterator to reinitialize | |
2380c486 | 6222 | + **/ |
7e46296a | 6223 | +void toi_extent_state_goto_start(void) |
2380c486 | 6224 | +{ |
7e46296a AM |
6225 | + struct toi_bdev_info *this = prio_chain_head; |
6226 | + | |
6227 | + while (this) { | |
6228 | + toi_message(TOI_IO, TOI_VERBOSE, 0, | |
6229 | + "Setting current extent to %p.", this->blocks.first); | |
6230 | + this->blocks.current_extent = this->blocks.first; | |
6231 | + if (this->blocks.current_extent) { | |
6232 | + toi_message(TOI_IO, TOI_VERBOSE, 0, | |
6233 | + "Setting current offset to %lu.", | |
6234 | + this->blocks.current_extent->start); | |
6235 | + this->blocks.current_offset = | |
6236 | + this->blocks.current_extent->start; | |
6237 | + } | |
6238 | + | |
6239 | + this = this->next; | |
6240 | + } | |
6241 | + | |
6242 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Setting current chain to %p.", | |
6243 | + prio_chain_head); | |
6244 | + toi_writer_posn.current_chain = prio_chain_head; | |
6245 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Leaving extent state goto start."); | |
2380c486 JR |
6246 | +} |
6247 | + | |
6248 | +/** | |
7e46296a AM |
6249 | + * toi_extent_state_save - save state of the iterator |
6250 | + * @state: Current state of the chain | |
6251 | + * @saved_state: Iterator to populate | |
2380c486 | 6252 | + * |
7e46296a AM |
6253 | + * Given a state and a struct hibernate_extent_state_store, save the current |
6254 | + * position in a format that can be used with relocated chains (at | |
6255 | + * resume time). | |
2380c486 | 6256 | + **/ |
7e46296a | 6257 | +void toi_extent_state_save(int slot) |
2380c486 | 6258 | +{ |
7e46296a AM |
6259 | + struct toi_bdev_info *cur_chain = prio_chain_head; |
6260 | + struct hibernate_extent *extent; | |
6261 | + struct hibernate_extent_saved_state *chain_state; | |
6262 | + int i = 0; | |
2380c486 | 6263 | + |
7e46296a AM |
6264 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_extent_state_save, slot %d.", |
6265 | + slot); | |
2380c486 | 6266 | + |
7e46296a AM |
6267 | + if (!toi_writer_posn.current_chain) { |
6268 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "No current chain => " | |
6269 | + "chain_num = -1."); | |
6270 | + toi_writer_posn.saved_chain_number[slot] = -1; | |
6271 | + return; | |
6272 | + } | |
2380c486 | 6273 | + |
7e46296a AM |
6274 | + while (cur_chain) { |
6275 | + i++; | |
6276 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Saving chain %d (%p) " | |
6277 | + "state, slot %d.", i, cur_chain, slot); | |
2380c486 | 6278 | + |
7e46296a | 6279 | + chain_state = &cur_chain->saved_state[slot]; |
2380c486 | 6280 | + |
7e46296a | 6281 | + chain_state->offset = cur_chain->blocks.current_offset; |
2380c486 | 6282 | + |
7e46296a AM |
6283 | + if (toi_writer_posn.current_chain == cur_chain) { |
6284 | + toi_writer_posn.saved_chain_number[slot] = i; | |
6285 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "This is the chain " | |
6286 | + "we were on => chain_num is %d.", i); | |
6287 | + } | |
2380c486 | 6288 | + |
7e46296a AM |
6289 | + if (!cur_chain->blocks.current_extent) { |
6290 | + chain_state->extent_num = 0; | |
6291 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "No current extent " | |
6292 | + "for this chain => extent_num %d is 0.", | |
6293 | + i); | |
6294 | + cur_chain = cur_chain->next; | |
6295 | + continue; | |
6296 | + } | |
2380c486 | 6297 | + |
7e46296a AM |
6298 | + extent = cur_chain->blocks.first; |
6299 | + chain_state->extent_num = 1; | |
6300 | + | |
6301 | + while (extent != cur_chain->blocks.current_extent) { | |
6302 | + chain_state->extent_num++; | |
6303 | + extent = extent->next; | |
6304 | + } | |
6305 | + | |
6306 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "extent num %d is %d.", i, | |
6307 | + chain_state->extent_num); | |
6308 | + | |
6309 | + cur_chain = cur_chain->next; | |
6310 | + } | |
6311 | + toi_message(TOI_IO, TOI_VERBOSE, 0, | |
6312 | + "Completed saving extent state slot %d.", slot); | |
2380c486 JR |
6313 | +} |
6314 | + | |
6315 | +/** | |
7e46296a AM |
6316 | + * toi_extent_state_restore - restore the position saved by extent_state_save |
6317 | + * @state: State to populate | |
6318 | + * @saved_state: Iterator saved to restore | |
2380c486 | 6319 | + **/ |
7e46296a | 6320 | +void toi_extent_state_restore(int slot) |
2380c486 | 6321 | +{ |
7e46296a AM |
6322 | + int i = 0; |
6323 | + struct toi_bdev_info *cur_chain = prio_chain_head; | |
6324 | + struct hibernate_extent_saved_state *chain_state; | |
2380c486 | 6325 | + |
7e46296a AM |
6326 | + toi_message(TOI_IO, TOI_VERBOSE, 0, |
6327 | + "toi_extent_state_restore - slot %d.", slot); | |
2380c486 | 6328 | + |
7e46296a AM |
6329 | + if (toi_writer_posn.saved_chain_number[slot] == -1) { |
6330 | + toi_writer_posn.current_chain = NULL; | |
6331 | + return; | |
2380c486 JR |
6332 | + } |
6333 | + | |
7e46296a AM |
6334 | + while (cur_chain) { |
6335 | + int posn; | |
6336 | + int j; | |
6337 | + i++; | |
6338 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Restoring chain %d (%p) " | |
6339 | + "state, slot %d.", i, cur_chain, slot); | |
2380c486 | 6340 | + |
7e46296a | 6341 | + chain_state = &cur_chain->saved_state[slot]; |
2380c486 | 6342 | + |
7e46296a | 6343 | + posn = chain_state->extent_num; |
2380c486 | 6344 | + |
7e46296a AM |
6345 | + cur_chain->blocks.current_extent = cur_chain->blocks.first; |
6346 | + cur_chain->blocks.current_offset = chain_state->offset; | |
2380c486 | 6347 | + |
7e46296a AM |
6348 | + if (i == toi_writer_posn.saved_chain_number[slot]) { |
6349 | + toi_writer_posn.current_chain = cur_chain; | |
6350 | + toi_message(TOI_IO, TOI_VERBOSE, 0, | |
6351 | + "Found current chain."); | |
6352 | + } | |
2380c486 | 6353 | + |
7e46296a AM |
6354 | + for (j = 0; j < 4; j++) |
6355 | + if (i == toi_writer_posn.saved_chain_number[j]) { | |
6356 | + toi_writer_posn.saved_chain_ptr[j] = cur_chain; | |
6357 | + toi_message(TOI_IO, TOI_VERBOSE, 0, | |
6358 | + "Found saved chain ptr %d (%p) (offset" | |
6359 | + " %d).", j, cur_chain, | |
6360 | + cur_chain->saved_state[j].offset); | |
6361 | + } | |
2380c486 | 6362 | + |
7e46296a AM |
6363 | + if (posn) { |
6364 | + while (--posn) | |
6365 | + cur_chain->blocks.current_extent = | |
6366 | + cur_chain->blocks.current_extent->next; | |
6367 | + } else | |
6368 | + cur_chain->blocks.current_extent = NULL; | |
6369 | + | |
6370 | + cur_chain = cur_chain->next; | |
6371 | + } | |
6372 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Done."); | |
6373 | + if (test_action_state(TOI_LOGALL)) | |
6374 | + dump_block_chains(); | |
2380c486 JR |
6375 | +} |
6376 | + | |
7e46296a AM |
6377 | +/* |
6378 | + * Storage needed | |
2380c486 | 6379 | + * |
7e46296a AM |
6380 | + * Returns amount of space in the image header required |
6381 | + * for the chain data. This ignores the links between | |
6382 | + * pages, which we factor in when allocating the space. | |
6383 | + */ | |
6384 | +int toi_bio_devinfo_storage_needed(void) | |
2380c486 | 6385 | +{ |
7e46296a AM |
6386 | + int result = sizeof(num_chains); |
6387 | + struct toi_bdev_info *chain = prio_chain_head; | |
2380c486 | 6388 | + |
7e46296a AM |
6389 | + while (chain) { |
6390 | + result += metadata_size; | |
2380c486 | 6391 | + |
7e46296a AM |
6392 | + /* Chain size */ |
6393 | + result += sizeof(int); | |
2380c486 | 6394 | + |
7e46296a AM |
6395 | + /* Extents */ |
6396 | + result += (2 * sizeof(unsigned long) * | |
6397 | + chain->blocks.num_extents); | |
2380c486 | 6398 | + |
7e46296a AM |
6399 | + chain = chain->next; |
6400 | + } | |
2380c486 | 6401 | + |
7e46296a AM |
6402 | + result += 4 * sizeof(int); |
6403 | + return result; | |
2380c486 JR |
6404 | +} |
6405 | + | |
5dd10c98 AM |
6406 | +static unsigned long chain_pages_used(struct toi_bdev_info *chain) |
6407 | +{ | |
6408 | + struct hibernate_extent *this = chain->blocks.first; | |
6409 | + struct hibernate_extent_saved_state *state = &chain->saved_state[3]; | |
6410 | + unsigned long size = 0; | |
6411 | + int extent_idx = 1; | |
6412 | + | |
6413 | + if (!state->extent_num) { | |
6414 | + if (!this) | |
6415 | + return 0; | |
6416 | + else | |
6417 | + return chain->blocks.size; | |
6418 | + } | |
6419 | + | |
6420 | + while (extent_idx < state->extent_num) { | |
6421 | + size += (this->end - this->start + 1); | |
6422 | + this = this->next; | |
6423 | + extent_idx++; | |
6424 | + } | |
6425 | + | |
6426 | + /* We didn't use the one we're sitting on, so don't count it */ | |
6427 | + return size + state->offset - this->start; | |
6428 | +} | |
6429 | + | |
2380c486 | 6430 | +/** |
7e46296a AM |
6431 | + * toi_serialise_extent_chain - write a chain in the image |
6432 | + * @chain: Chain to write. | |
2380c486 | 6433 | + **/ |
7e46296a | 6434 | +static int toi_serialise_extent_chain(struct toi_bdev_info *chain) |
2380c486 | 6435 | +{ |
7e46296a AM |
6436 | + struct hibernate_extent *this; |
6437 | + int ret; | |
6438 | + int i = 1; | |
2380c486 | 6439 | + |
5dd10c98 AM |
6440 | + chain->pages_used = chain_pages_used(chain); |
6441 | + | |
7e46296a AM |
6442 | + if (test_action_state(TOI_LOGALL)) |
6443 | + dump_block_chains(); | |
6444 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Serialising chain (dev_t %lx).", | |
6445 | + chain->dev_t); | |
6446 | + /* Device info - dev_t, prio, bmap_shift, blocks per page, positions */ | |
6447 | + ret = toiActiveAllocator->rw_header_chunk(WRITE, &toi_blockwriter_ops, | |
6448 | + (char *) &chain->uuid, metadata_size); | |
6449 | + if (ret) | |
6450 | + return ret; | |
2380c486 | 6451 | + |
7e46296a AM |
6452 | + /* Num extents */ |
6453 | + ret = toiActiveAllocator->rw_header_chunk(WRITE, &toi_blockwriter_ops, | |
6454 | + (char *) &chain->blocks.num_extents, sizeof(int)); | |
6455 | + if (ret) | |
6456 | + return ret; | |
2380c486 | 6457 | + |
7e46296a AM |
6458 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "%d extents.", |
6459 | + chain->blocks.num_extents); | |
2380c486 | 6460 | + |
7e46296a AM |
6461 | + this = chain->blocks.first; |
6462 | + while (this) { | |
6463 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Extent %d.", i); | |
6464 | + ret = toiActiveAllocator->rw_header_chunk(WRITE, | |
6465 | + &toi_blockwriter_ops, | |
6466 | + (char *) this, 2 * sizeof(this->start)); | |
6467 | + if (ret) | |
6468 | + return ret; | |
6469 | + this = this->next; | |
6470 | + i++; | |
6471 | + } | |
2380c486 | 6472 | + |
7e46296a AM |
6473 | + return ret; |
6474 | +} | |
2380c486 | 6475 | + |
7e46296a AM |
6476 | +int toi_serialise_extent_chains(void) |
6477 | +{ | |
6478 | + struct toi_bdev_info *this = prio_chain_head; | |
6479 | + int result; | |
2380c486 | 6480 | + |
7e46296a AM |
6481 | + /* Write the number of chains */ |
6482 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Write number of chains (%d)", | |
6483 | + num_chains); | |
6484 | + result = toiActiveAllocator->rw_header_chunk(WRITE, | |
6485 | + &toi_blockwriter_ops, (char *) &num_chains, | |
6486 | + sizeof(int)); | |
6487 | + if (result) | |
6488 | + return result; | |
2380c486 | 6489 | + |
7e46296a AM |
6490 | + /* Then the chains themselves */ |
6491 | + while (this) { | |
6492 | + result = toi_serialise_extent_chain(this); | |
6493 | + if (result) | |
6494 | + return result; | |
6495 | + this = this->next; | |
2380c486 | 6496 | + } |
2380c486 | 6497 | + |
7e46296a AM |
6498 | + /* |
6499 | + * Finally, the chain we should be on at the start of each | |
6500 | + * section. | |
6501 | + */ | |
6502 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Saved chain numbers."); | |
6503 | + result = toiActiveAllocator->rw_header_chunk(WRITE, | |
6504 | + &toi_blockwriter_ops, | |
6505 | + (char *) &toi_writer_posn.saved_chain_number[0], | |
6506 | + 4 * sizeof(int)); | |
6507 | + | |
6508 | + return result; | |
2380c486 JR |
6509 | +} |
6510 | + | |
7e46296a | 6511 | +int toi_register_storage_chain(struct toi_bdev_info *new) |
2380c486 | 6512 | +{ |
7e46296a AM |
6513 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Inserting chain %p into list.", |
6514 | + new); | |
6515 | + toi_insert_chain_in_prio_list(new); | |
6516 | + return 0; | |
2380c486 JR |
6517 | +} |
6518 | + | |
7e46296a | 6519 | +static void free_bdev_info(struct toi_bdev_info *chain) |
2380c486 | 6520 | +{ |
7e46296a | 6521 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Free chain %p.", chain); |
2380c486 | 6522 | + |
7e46296a AM |
6523 | + toi_message(TOI_IO, TOI_VERBOSE, 0, " - Block extents."); |
6524 | + toi_put_extent_chain(&chain->blocks); | |
2380c486 | 6525 | + |
7e46296a AM |
6526 | + /* |
6527 | + * The allocator may need to do more than just free the chains | |
5dd10c98 | 6528 | + * (swap_free, for example). Don't call from boot kernel. |
7e46296a AM |
6529 | + */ |
6530 | + toi_message(TOI_IO, TOI_VERBOSE, 0, " - Allocator extents."); | |
5dd10c98 AM |
6531 | + if (chain->allocator) |
6532 | + chain->allocator->bio_allocator_ops->free_storage(chain); | |
2380c486 | 6533 | + |
7e46296a AM |
6534 | + /* |
6535 | + * Dropping out of reading atomic copy? Need to undo | |
6536 | + * toi_open_by_devnum. | |
6537 | + */ | |
6538 | + toi_message(TOI_IO, TOI_VERBOSE, 0, " - Bdev."); | |
6539 | + if (chain->bdev && !IS_ERR(chain->bdev) && | |
6540 | + chain->bdev != resume_block_device && | |
6541 | + chain->bdev != header_block_device && | |
6542 | + test_toi_state(TOI_TRYING_TO_RESUME)) | |
6543 | + toi_close_bdev(chain->bdev); | |
2380c486 | 6544 | + |
7e46296a AM |
6545 | + /* Poison */ |
6546 | + toi_message(TOI_IO, TOI_VERBOSE, 0, " - Struct."); | |
6547 | + toi_kfree(39, chain, sizeof(*chain)); | |
2380c486 | 6548 | + |
7e46296a AM |
6549 | + if (prio_chain_head == chain) |
6550 | + prio_chain_head = NULL; | |
2380c486 | 6551 | + |
7e46296a | 6552 | + num_chains--; |
2380c486 JR |
6553 | +} |
6554 | + | |
7e46296a | 6555 | +void free_all_bdev_info(void) |
0ada99ac | 6556 | +{ |
7e46296a AM |
6557 | + struct toi_bdev_info *this = prio_chain_head; |
6558 | + | |
6559 | + while (this) { | |
6560 | + struct toi_bdev_info *next = this->next; | |
6561 | + free_bdev_info(this); | |
6562 | + this = next; | |
6563 | + } | |
6564 | + | |
6565 | + memset((char *) &toi_writer_posn, 0, sizeof(toi_writer_posn)); | |
6566 | + prio_chain_head = NULL; | |
0ada99ac | 6567 | +} |
6568 | + | |
5dd10c98 AM |
6569 | +static void set_up_start_position(void) |
6570 | +{ | |
6571 | + toi_writer_posn.current_chain = prio_chain_head; | |
6572 | + go_next_page(0, 0); | |
6573 | +} | |
6574 | + | |
2380c486 | 6575 | +/** |
7e46296a AM |
6576 | + * toi_load_extent_chain - read back a chain saved in the image |
6577 | + * @chain: Chain to load | |
2380c486 | 6578 | + * |
7e46296a AM |
6579 | + * The linked list of extents is reconstructed from the disk. chain will point |
6580 | + * to the first entry. | |
2380c486 | 6581 | + **/ |
5dd10c98 | 6582 | +int toi_load_extent_chain(int index, int *num_loaded) |
2380c486 | 6583 | +{ |
7e46296a AM |
6584 | + struct toi_bdev_info *chain = toi_kzalloc(39, |
6585 | + sizeof(struct toi_bdev_info), GFP_ATOMIC); | |
6586 | + struct hibernate_extent *this, *last = NULL; | |
6587 | + int i, ret; | |
2380c486 | 6588 | + |
7e46296a AM |
6589 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Loading extent chain %d.", index); |
6590 | + /* Get dev_t, prio, bmap_shift, blocks per page, positions */ | |
6591 | + ret = toiActiveAllocator->rw_header_chunk_noreadahead(READ, NULL, | |
6592 | + (char *) &chain->uuid, metadata_size); | |
9474138d | 6593 | + |
7e46296a AM |
6594 | + if (ret) { |
6595 | + printk(KERN_ERR "Failed to read the size of extent chain.\n"); | |
6596 | + toi_kfree(39, chain, sizeof(*chain)); | |
6597 | + return 1; | |
6598 | + } | |
6599 | + | |
5dd10c98 AM |
6600 | + toi_bkd.pages_used[index] = chain->pages_used; |
6601 | + | |
7e46296a AM |
6602 | + ret = toiActiveAllocator->rw_header_chunk_noreadahead(READ, NULL, |
6603 | + (char *) &chain->blocks.num_extents, sizeof(int)); | |
6604 | + if (ret) { | |
6605 | + printk(KERN_ERR "Failed to read the size of extent chain.\n"); | |
6606 | + toi_kfree(39, chain, sizeof(*chain)); | |
6607 | + return 1; | |
6608 | + } | |
6609 | + | |
6610 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "%d extents.", | |
6611 | + chain->blocks.num_extents); | |
6612 | + | |
6613 | + for (i = 0; i < chain->blocks.num_extents; i++) { | |
6614 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Extent %d.", i + 1); | |
6615 | + | |
6616 | + this = toi_kzalloc(2, sizeof(struct hibernate_extent), | |
6617 | + TOI_ATOMIC_GFP); | |
6618 | + if (!this) { | |
6619 | + printk(KERN_INFO "Failed to allocate a new extent.\n"); | |
6620 | + free_bdev_info(chain); | |
6621 | + return -ENOMEM; | |
6622 | + } | |
6623 | + this->next = NULL; | |
6624 | + /* Get the next page */ | |
6625 | + ret = toiActiveAllocator->rw_header_chunk_noreadahead(READ, | |
6626 | + NULL, (char *) this, 2 * sizeof(this->start)); | |
6627 | + if (ret) { | |
6628 | + printk(KERN_INFO "Failed to read an extent.\n"); | |
6629 | + toi_kfree(2, this, sizeof(struct hibernate_extent)); | |
6630 | + free_bdev_info(chain); | |
6631 | + return 1; | |
6632 | + } | |
6633 | + | |
6634 | + if (last) | |
6635 | + last->next = this; | |
6636 | + else { | |
6637 | + char b1[32], b2[32], b3[32]; | |
6638 | + /* | |
6639 | + * Open the bdev | |
6640 | + */ | |
6641 | + toi_message(TOI_IO, TOI_VERBOSE, 0, | |
6642 | + "Chain dev_t is %s. Resume dev t is %s. Header" | |
6643 | + " bdev_t is %s.\n", | |
6644 | + format_dev_t(b1, chain->dev_t), | |
6645 | + format_dev_t(b2, resume_dev_t), | |
6646 | + format_dev_t(b3, toi_sig_data->header_dev_t)); | |
6647 | + | |
6648 | + if (chain->dev_t == resume_dev_t) | |
6649 | + chain->bdev = resume_block_device; | |
6650 | + else if (chain->dev_t == toi_sig_data->header_dev_t) | |
6651 | + chain->bdev = header_block_device; | |
6652 | + else { | |
6653 | + chain->bdev = toi_open_bdev(chain->uuid, | |
6654 | + chain->dev_t, 1); | |
6655 | + if (IS_ERR(chain->bdev)) { | |
6656 | + free_bdev_info(chain); | |
6657 | + return -ENODEV; | |
6658 | + } | |
6659 | + } | |
6660 | + | |
6661 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Chain bmap shift " | |
6662 | + "is %d and blocks per page is %d.", | |
6663 | + chain->bmap_shift, | |
6664 | + chain->blocks_per_page); | |
6665 | + | |
6666 | + chain->blocks.first = this; | |
6667 | + | |
6668 | + /* | |
6669 | + * Couldn't do this earlier, but can't do | |
6670 | + * goto_start now - we may have already used blocks | |
6671 | + * in the first chain. | |
6672 | + */ | |
6673 | + chain->blocks.current_extent = this; | |
6674 | + chain->blocks.current_offset = this->start; | |
6675 | + | |
6676 | + /* | |
6677 | + * Can't wait until we've read the whole chain | |
6678 | + * before we insert it in the list. We might need | |
6679 | + * this chain to read the next page in the header | |
6680 | + */ | |
6681 | + toi_insert_chain_in_prio_list(chain); | |
7e46296a | 6682 | + } |
5dd10c98 AM |
6683 | + |
6684 | + /* | |
6685 | + * We have to wait until 2 extents are loaded before setting up | |
6686 | + * properly because if the first extent has only one page, we | |
6687 | + * will need to put the position on the second extent. Sounds | |
6688 | + * obvious, but it wasn't! | |
6689 | + */ | |
6690 | + (*num_loaded)++; | |
6691 | + if ((*num_loaded) == 2) | |
6692 | + set_up_start_position(); | |
7e46296a AM |
6693 | + last = this; |
6694 | + } | |
6695 | + | |
6696 | + /* | |
6697 | + * Shouldn't get empty chains, but it's not impossible. Link them in so | |
6698 | + * they get freed properly later. | |
6699 | + */ | |
6700 | + if (!chain->blocks.num_extents) | |
6701 | + toi_insert_chain_in_prio_list(chain); | |
6702 | + | |
6703 | + if (!chain->blocks.current_extent) { | |
6704 | + chain->blocks.current_extent = chain->blocks.first; | |
6705 | + if (chain->blocks.current_extent) | |
6706 | + chain->blocks.current_offset = | |
6707 | + chain->blocks.current_extent->start; | |
6708 | + } | |
6709 | + return 0; | |
6710 | +} | |
6711 | + | |
6712 | +int toi_load_extent_chains(void) | |
6713 | +{ | |
6714 | + int result; | |
6715 | + int to_load; | |
6716 | + int i; | |
5dd10c98 | 6717 | + int extents_loaded = 0; |
7e46296a AM |
6718 | + |
6719 | + result = toiActiveAllocator->rw_header_chunk_noreadahead(READ, NULL, | |
6720 | + (char *) &to_load, | |
6721 | + sizeof(int)); | |
6722 | + if (result) | |
6723 | + return result; | |
6724 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "%d chains to read.", to_load); | |
6725 | + | |
6726 | + for (i = 0; i < to_load; i++) { | |
6727 | + toi_message(TOI_IO, TOI_VERBOSE, 0, " >> Loading chain %d/%d.", | |
6728 | + i, to_load); | |
5dd10c98 | 6729 | + result = toi_load_extent_chain(i, &extents_loaded); |
7e46296a AM |
6730 | + if (result) |
6731 | + return result; | |
6732 | + } | |
6733 | + | |
5dd10c98 AM |
6734 | + /* If we never got to a second extent, we still need to do this. */ |
6735 | + if (extents_loaded == 1) | |
6736 | + set_up_start_position(); | |
6737 | + | |
7e46296a AM |
6738 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Save chain numbers."); |
6739 | + result = toiActiveAllocator->rw_header_chunk_noreadahead(READ, | |
6740 | + &toi_blockwriter_ops, | |
6741 | + (char *) &toi_writer_posn.saved_chain_number[0], | |
6742 | + 4 * sizeof(int)); | |
6743 | + | |
6744 | + return result; | |
6745 | +} | |
6746 | + | |
6747 | +static int toi_end_of_stream(int writing, int section_barrier) | |
6748 | +{ | |
6749 | + struct toi_bdev_info *cur_chain = toi_writer_posn.current_chain; | |
6750 | + int compare_to = next_section[current_stream]; | |
6751 | + struct toi_bdev_info *compare_chain = | |
6752 | + toi_writer_posn.saved_chain_ptr[compare_to]; | |
6753 | + int compare_offset = compare_chain ? | |
6754 | + compare_chain->saved_state[compare_to].offset : 0; | |
6755 | + | |
6756 | + if (!section_barrier) | |
6757 | + return 0; | |
6758 | + | |
6759 | + if (!cur_chain) | |
6760 | + return 1; | |
6761 | + | |
6762 | + if (cur_chain == compare_chain && | |
6763 | + cur_chain->blocks.current_offset == compare_offset) { | |
6764 | + if (writing) { | |
6765 | + if (!current_stream) { | |
6766 | + debug_broken_header(); | |
6767 | + return 1; | |
6768 | + } | |
0ada99ac | 6769 | + } else { |
e999739a | 6770 | + more_readahead = 0; |
7e46296a AM |
6771 | + toi_message(TOI_IO, TOI_VERBOSE, 0, |
6772 | + "Reached the end of stream %d " | |
6773 | + "(not an error).", current_stream); | |
6774 | + return 1; | |
e999739a | 6775 | + } |
6776 | + } | |
6777 | + | |
7e46296a AM |
6778 | + return 0; |
6779 | +} | |
6780 | + | |
6781 | +/** | |
6782 | + * go_next_page - skip blocks to the start of the next page | |
6783 | + * @writing: Whether we're reading or writing the image. | |
6784 | + * | |
6785 | + * Go forward one page. | |
6786 | + **/ | |
6787 | +int go_next_page(int writing, int section_barrier) | |
6788 | +{ | |
6789 | + struct toi_bdev_info *cur_chain = toi_writer_posn.current_chain; | |
6790 | + int max = cur_chain ? cur_chain->blocks_per_page : 1; | |
2380c486 | 6791 | + |
7e46296a AM |
6792 | + /* Nope. Go foward a page - or maybe two. Don't stripe the header, |
6793 | + * so that bad fragmentation doesn't put the extent data containing | |
6794 | + * the location of the second page out of the first header page. | |
6795 | + */ | |
6796 | + if (toi_extent_state_next(max, current_stream)) { | |
2380c486 | 6797 | + /* Don't complain if readahead falls off the end */ |
0ada99ac | 6798 | + if (writing && section_barrier) { |
7e46296a AM |
6799 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Extent state eof. " |
6800 | + "Expected compression ratio too optimistic?"); | |
6801 | + if (test_action_state(TOI_LOGALL)) | |
6802 | + dump_block_chains(); | |
2380c486 | 6803 | + } |
7e46296a AM |
6804 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Ran out of extents to " |
6805 | + "read/write. (Not necessarily a fatal error."); | |
e876a0dd | 6806 | + return -ENOSPC; |
2380c486 JR |
6807 | + } |
6808 | + | |
2380c486 JR |
6809 | + return 0; |
6810 | +} | |
6811 | + | |
7e46296a | 6812 | +int devices_of_same_priority(struct toi_bdev_info *this) |
2380c486 | 6813 | +{ |
7e46296a AM |
6814 | + struct toi_bdev_info *check = prio_chain_head; |
6815 | + int i = 0; | |
6816 | + | |
6817 | + while (check) { | |
6818 | + if (check->prio == this->prio) | |
6819 | + i++; | |
6820 | + check = check->next; | |
6821 | + } | |
6822 | + | |
6823 | + return i; | |
2380c486 JR |
6824 | +} |
6825 | + | |
6826 | +/** | |
6827 | + * toi_bio_rw_page - do i/o on the next disk page in the image | |
6828 | + * @writing: Whether reading or writing. | |
6829 | + * @page: Page to do i/o on. | |
6830 | + * @is_readahead: Whether we're doing readahead | |
6831 | + * @free_group: The group used in allocating the page | |
6832 | + * | |
6833 | + * Submit a page for reading or writing, possibly readahead. | |
6834 | + * Pass the group used in allocating the page as well, as it should | |
6835 | + * be freed on completion of the bio if we're writing the page. | |
6836 | + **/ | |
7e46296a | 6837 | +int toi_bio_rw_page(int writing, struct page *page, |
2380c486 JR |
6838 | + int is_readahead, int free_group) |
6839 | +{ | |
7e46296a AM |
6840 | + int result = toi_end_of_stream(writing, 1); |
6841 | + struct toi_bdev_info *dev_info = toi_writer_posn.current_chain; | |
2380c486 | 6842 | + |
7e46296a | 6843 | + if (result) { |
85eb3c9d AM |
6844 | + if (writing) |
6845 | + abort_hibernate(TOI_INSUFFICIENT_STORAGE, | |
6846 | + "Insufficient storage for your image."); | |
6847 | + else | |
6848 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Seeking to " | |
6849 | + "read/write another page when stream has " | |
6850 | + "ended."); | |
e876a0dd | 6851 | + return -ENOSPC; |
7e46296a | 6852 | + } |
2380c486 | 6853 | + |
7e46296a | 6854 | + toi_message(TOI_IO, TOI_VERBOSE, 0, |
85eb3c9d | 6855 | + "%sing device %lx, sector %ld << %d.", |
7e46296a | 6856 | + writing ? "Writ" : "Read", |
85eb3c9d | 6857 | + dev_info->dev_t, dev_info->blocks.current_offset, |
7e46296a | 6858 | + dev_info->bmap_shift); |
2380c486 | 6859 | + |
7e46296a AM |
6860 | + result = toi_do_io(writing, dev_info->bdev, |
6861 | + dev_info->blocks.current_offset << dev_info->bmap_shift, | |
2380c486 | 6862 | + page, is_readahead, 0, free_group); |
2380c486 | 6863 | + |
7e46296a AM |
6864 | + /* Ignore the result here - will check end of stream if come in again */ |
6865 | + go_next_page(writing, 1); | |
2380c486 | 6866 | + |
7e46296a AM |
6867 | + if (result) |
6868 | + printk(KERN_ERR "toi_do_io returned %d.\n", result); | |
6869 | + return result; | |
2380c486 JR |
6870 | +} |
6871 | + | |
7e46296a | 6872 | +dev_t get_header_dev_t(void) |
2380c486 | 6873 | +{ |
7e46296a | 6874 | + return prio_chain_head->dev_t; |
2380c486 JR |
6875 | +} |
6876 | + | |
7e46296a | 6877 | +struct block_device *get_header_bdev(void) |
2380c486 | 6878 | +{ |
7e46296a AM |
6879 | + return prio_chain_head->bdev; |
6880 | +} | |
2380c486 | 6881 | + |
7e46296a AM |
6882 | +unsigned long get_headerblock(void) |
6883 | +{ | |
6884 | + return prio_chain_head->blocks.first->start << | |
6885 | + prio_chain_head->bmap_shift; | |
6886 | +} | |
2380c486 | 6887 | + |
7e46296a AM |
6888 | +int get_main_pool_phys_params(void) |
6889 | +{ | |
6890 | + struct toi_bdev_info *this = prio_chain_head; | |
6891 | + int result; | |
2380c486 | 6892 | + |
7e46296a AM |
6893 | + while (this) { |
6894 | + result = this->allocator->bio_allocator_ops->bmap(this); | |
6895 | + if (result) | |
6896 | + return result; | |
6897 | + this = this->next; | |
6898 | + } | |
2380c486 | 6899 | + |
7e46296a | 6900 | + return 0; |
2380c486 JR |
6901 | +} |
6902 | + | |
7e46296a | 6903 | +static int apply_header_reservation(void) |
2380c486 | 6904 | +{ |
7e46296a | 6905 | + int i; |
2380c486 | 6906 | + |
7e46296a AM |
6907 | + if (!header_pages_reserved) { |
6908 | + toi_message(TOI_IO, TOI_VERBOSE, 0, | |
6909 | + "No header pages reserved at the moment."); | |
6910 | + return 0; | |
6911 | + } | |
2380c486 | 6912 | + |
7e46296a | 6913 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Applying header reservation."); |
2380c486 | 6914 | + |
7e46296a AM |
6915 | + /* Apply header space reservation */ |
6916 | + toi_extent_state_goto_start(); | |
2380c486 | 6917 | + |
7e46296a AM |
6918 | + for (i = 0; i < header_pages_reserved; i++) |
6919 | + if (go_next_page(1, 0)) | |
e876a0dd | 6920 | + return -ENOSPC; |
2380c486 | 6921 | + |
7e46296a AM |
6922 | + /* The end of header pages will be the start of pageset 2 */ |
6923 | + toi_extent_state_save(2); | |
2380c486 | 6924 | + |
7e46296a AM |
6925 | + toi_message(TOI_IO, TOI_VERBOSE, 0, |
6926 | + "Finished applying header reservation."); | |
6927 | + return 0; | |
6928 | +} | |
2380c486 | 6929 | + |
7e46296a AM |
6930 | +static int toi_bio_register_storage(void) |
6931 | +{ | |
6932 | + int result = 0; | |
6933 | + struct toi_module_ops *this_module; | |
2380c486 | 6934 | + |
7e46296a AM |
6935 | + list_for_each_entry(this_module, &toi_modules, module_list) { |
6936 | + if (!this_module->enabled || | |
6937 | + this_module->type != BIO_ALLOCATOR_MODULE) | |
2380c486 | 6938 | + continue; |
7e46296a AM |
6939 | + toi_message(TOI_IO, TOI_VERBOSE, 0, |
6940 | + "Registering storage from %s.", | |
6941 | + this_module->name); | |
6942 | + result = this_module->bio_allocator_ops->register_storage(); | |
6943 | + if (result) | |
6944 | + break; | |
2380c486 JR |
6945 | + } |
6946 | + | |
0ada99ac | 6947 | + return result; |
2380c486 JR |
6948 | +} |
6949 | + | |
7e46296a | 6950 | +int toi_bio_allocate_storage(unsigned long request) |
2380c486 | 6951 | +{ |
7e46296a AM |
6952 | + struct toi_bdev_info *chain = prio_chain_head; |
6953 | + unsigned long to_get = request; | |
6954 | + unsigned long extra_pages, needed; | |
6955 | + int no_free = 0; | |
2380c486 | 6956 | + |
7e46296a AM |
6957 | + if (!chain) { |
6958 | + int result = toi_bio_register_storage(); | |
5dd10c98 AM |
6959 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_bio_allocate_storage: " |
6960 | + "Registering storage."); | |
7e46296a AM |
6961 | + if (result) |
6962 | + return 0; | |
6963 | + chain = prio_chain_head; | |
5dd10c98 AM |
6964 | + if (!chain) { |
6965 | + printk("TuxOnIce: No storage was registered.\n"); | |
6966 | + return 0; | |
6967 | + } | |
7e46296a | 6968 | + } |
5dd10c98 | 6969 | + |
7e46296a AM |
6970 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_bio_allocate_storage: " |
6971 | + "Request is %lu pages.", request); | |
6972 | + extra_pages = DIV_ROUND_UP(request * (sizeof(unsigned long) | |
6973 | + + sizeof(int)), PAGE_SIZE); | |
6974 | + needed = request + extra_pages + header_pages_reserved; | |
6975 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Adding %lu extra pages and %lu " | |
6976 | + "for header => %lu.", | |
6977 | + extra_pages, header_pages_reserved, needed); | |
6978 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Already allocated %lu pages.", | |
6979 | + raw_pages_allocd); | |
2380c486 | 6980 | + |
7e46296a AM |
6981 | + to_get = needed > raw_pages_allocd ? needed - raw_pages_allocd : 0; |
6982 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Need to get %lu pages.", to_get); | |
2380c486 | 6983 | + |
7e46296a AM |
6984 | + if (!to_get) |
6985 | + return apply_header_reservation(); | |
2380c486 | 6986 | + |
7e46296a | 6987 | + while (to_get && chain) { |
5dd10c98 AM |
6988 | + int num_group = devices_of_same_priority(chain); |
6989 | + int divisor = num_group - no_free; | |
7e46296a AM |
6990 | + int i; |
6991 | + unsigned long portion = DIV_ROUND_UP(to_get, divisor); | |
6992 | + unsigned long got = 0; | |
6993 | + unsigned long got_this_round = 0; | |
6994 | + struct toi_bdev_info *top = chain; | |
2380c486 | 6995 | + |
7e46296a AM |
6996 | + toi_message(TOI_IO, TOI_VERBOSE, 0, |
6997 | + " Start of loop. To get is %lu. Divisor is %d.", | |
6998 | + to_get, divisor); | |
6999 | + no_free = 0; | |
2380c486 | 7000 | + |
7e46296a AM |
7001 | + /* |
7002 | + * We're aiming to spread the allocated storage as evenly | |
7003 | + * as possible, but we also want to get all the storage we | |
7004 | + * can off this priority. | |
7005 | + */ | |
5dd10c98 | 7006 | + for (i = 0; i < num_group; i++) { |
7e46296a AM |
7007 | + struct toi_bio_allocator_ops *ops = |
7008 | + chain->allocator->bio_allocator_ops; | |
7009 | + toi_message(TOI_IO, TOI_VERBOSE, 0, | |
7010 | + " Asking for %lu pages from chain %p.", | |
7011 | + portion, chain); | |
7012 | + got = ops->allocate_storage(chain, portion); | |
7013 | + toi_message(TOI_IO, TOI_VERBOSE, 0, | |
7014 | + " Got %lu pages from allocator %p.", | |
7015 | + got, chain); | |
7016 | + if (!got) | |
7017 | + no_free++; | |
7018 | + got_this_round += got; | |
7019 | + chain = chain->next; | |
7020 | + } | |
7021 | + toi_message(TOI_IO, TOI_VERBOSE, 0, " Loop finished. Got a " | |
7022 | + "total of %lu pages from %d allocators.", | |
7023 | + got_this_round, divisor - no_free); | |
7024 | + | |
7025 | + raw_pages_allocd += got_this_round; | |
7026 | + to_get = needed > raw_pages_allocd ? needed - raw_pages_allocd : | |
7027 | + 0; | |
7028 | + | |
7029 | + /* | |
7030 | + * If we got anything from chains of this priority and we | |
7031 | + * still have storage to allocate, go over this priority | |
7032 | + * again. | |
7033 | + */ | |
7034 | + if (got_this_round && to_get) | |
7035 | + chain = top; | |
7036 | + else | |
7037 | + no_free = 0; | |
7038 | + } | |
7039 | + | |
7040 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Finished allocating. Calling " | |
7041 | + "get_main_pool_phys_params"); | |
7042 | + /* Now let swap allocator bmap the pages */ | |
7043 | + get_main_pool_phys_params(); | |
7044 | + | |
7045 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Done. Reserving header."); | |
7046 | + return apply_header_reservation(); | |
7047 | +} | |
5dd10c98 AM |
7048 | + |
7049 | +void toi_bio_chains_post_atomic(struct toi_boot_kernel_data *bkd) | |
7050 | +{ | |
7051 | + int i = 0; | |
7052 | + struct toi_bdev_info *cur_chain = prio_chain_head; | |
7053 | + | |
7054 | + while (cur_chain) { | |
7055 | + cur_chain->pages_used = bkd->pages_used[i]; | |
7056 | + cur_chain = cur_chain->next; | |
7057 | + i++; | |
7058 | + } | |
7059 | +} | |
7060 | + | |
7061 | +int toi_bio_chains_debug_info(char *buffer, int size) | |
7062 | +{ | |
7063 | + /* Show what we actually used */ | |
7064 | + struct toi_bdev_info *cur_chain = prio_chain_head; | |
7065 | + int len = 0; | |
7066 | + | |
7067 | + while (cur_chain) { | |
7068 | + len += scnprintf(buffer + len, size - len, " Used %lu pages " | |
7069 | + "from %s.\n", cur_chain->pages_used, | |
7070 | + cur_chain->name); | |
7071 | + cur_chain = cur_chain->next; | |
7072 | + } | |
7073 | + | |
7074 | + return len; | |
7075 | +} | |
7e46296a AM |
7076 | diff --git a/kernel/power/tuxonice_bio_core.c b/kernel/power/tuxonice_bio_core.c |
7077 | new file mode 100644 | |
85eb3c9d | 7078 | index 0000000..d58d3b3 |
7e46296a AM |
7079 | --- /dev/null |
7080 | +++ b/kernel/power/tuxonice_bio_core.c | |
85eb3c9d | 7081 | @@ -0,0 +1,1844 @@ |
7e46296a AM |
7082 | +/* |
7083 | + * kernel/power/tuxonice_bio.c | |
2380c486 | 7084 | + * |
5dd10c98 | 7085 | + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 | 7086 | + * |
7e46296a | 7087 | + * Distributed under GPLv2. |
2380c486 | 7088 | + * |
7e46296a AM |
7089 | + * This file contains block io functions for TuxOnIce. These are |
7090 | + * used by the swapwriter and it is planned that they will also | |
7091 | + * be used by the NFSwriter. | |
2380c486 | 7092 | + * |
7e46296a | 7093 | + */ |
2380c486 | 7094 | + |
7e46296a AM |
7095 | +#include <linux/blkdev.h> |
7096 | +#include <linux/syscalls.h> | |
7097 | +#include <linux/suspend.h> | |
7098 | +#include <linux/ctype.h> | |
cacc47f8 | 7099 | +#include <linux/fs_uuid.h> |
7e46296a | 7100 | +#include <scsi/scsi_scan.h> |
2380c486 | 7101 | + |
7e46296a AM |
7102 | +#include "tuxonice.h" |
7103 | +#include "tuxonice_sysfs.h" | |
7104 | +#include "tuxonice_modules.h" | |
7105 | +#include "tuxonice_prepare_image.h" | |
7106 | +#include "tuxonice_bio.h" | |
7107 | +#include "tuxonice_ui.h" | |
7108 | +#include "tuxonice_alloc.h" | |
7109 | +#include "tuxonice_io.h" | |
7110 | +#include "tuxonice_builtin.h" | |
7111 | +#include "tuxonice_bio_internal.h" | |
2380c486 | 7112 | + |
7e46296a AM |
7113 | +#define MEMORY_ONLY 1 |
7114 | +#define THROTTLE_WAIT 2 | |
2380c486 | 7115 | + |
7e46296a AM |
7116 | +/* #define MEASURE_MUTEX_CONTENTION */ |
7117 | +#ifndef MEASURE_MUTEX_CONTENTION | |
7118 | +#define my_mutex_lock(index, the_lock) mutex_lock(the_lock) | |
7119 | +#define my_mutex_unlock(index, the_lock) mutex_unlock(the_lock) | |
7120 | +#else | |
7121 | +unsigned long mutex_times[2][2][NR_CPUS]; | |
7122 | +#define my_mutex_lock(index, the_lock) do { \ | |
7123 | + int have_mutex; \ | |
7124 | + have_mutex = mutex_trylock(the_lock); \ | |
7125 | + if (!have_mutex) { \ | |
7126 | + mutex_lock(the_lock); \ | |
7127 | + mutex_times[index][0][smp_processor_id()]++; \ | |
7128 | + } else { \ | |
7129 | + mutex_times[index][1][smp_processor_id()]++; \ | |
7130 | + } | |
2380c486 | 7131 | + |
7e46296a AM |
7132 | +#define my_mutex_unlock(index, the_lock) \ |
7133 | + mutex_unlock(the_lock); \ | |
7134 | +} while (0) | |
7135 | +#endif | |
2380c486 | 7136 | + |
7e46296a AM |
7137 | +static int page_idx, reset_idx; |
7138 | + | |
7139 | +static int target_outstanding_io = 1024; | |
7140 | +static int max_outstanding_writes, max_outstanding_reads; | |
7141 | + | |
7142 | +static struct page *bio_queue_head, *bio_queue_tail; | |
7143 | +static atomic_t toi_bio_queue_size; | |
7144 | +static DEFINE_SPINLOCK(bio_queue_lock); | |
7145 | + | |
7146 | +static int free_mem_throttle, throughput_throttle; | |
7147 | +int more_readahead = 1; | |
7148 | +static struct page *readahead_list_head, *readahead_list_tail; | |
7149 | + | |
7150 | +static struct page *waiting_on; | |
7151 | + | |
7152 | +static atomic_t toi_io_in_progress, toi_io_done; | |
7153 | +static DECLARE_WAIT_QUEUE_HEAD(num_in_progress_wait); | |
7154 | + | |
7155 | +int current_stream; | |
7156 | +/* Not static, so that the allocators can setup and complete | |
7157 | + * writing the header */ | |
7158 | +char *toi_writer_buffer; | |
7159 | +int toi_writer_buffer_posn; | |
7160 | + | |
7161 | +static DEFINE_MUTEX(toi_bio_mutex); | |
7162 | +static DEFINE_MUTEX(toi_bio_readahead_mutex); | |
7163 | + | |
7164 | +static struct task_struct *toi_queue_flusher; | |
7165 | +static int toi_bio_queue_flush_pages(int dedicated_thread); | |
7166 | + | |
7167 | +struct toi_module_ops toi_blockwriter_ops; | |
7168 | + | |
7169 | +#define TOTAL_OUTSTANDING_IO (atomic_read(&toi_io_in_progress) + \ | |
7170 | + atomic_read(&toi_bio_queue_size)) | |
7171 | + | |
7172 | +unsigned long raw_pages_allocd, header_pages_reserved; | |
2380c486 JR |
7173 | + |
7174 | +/** | |
7e46296a AM |
7175 | + * set_free_mem_throttle - set the point where we pause to avoid oom. |
7176 | + * | |
7177 | + * Initially, this value is zero, but when we first fail to allocate memory, | |
7178 | + * we set it (plus a buffer) and thereafter throttle i/o once that limit is | |
7179 | + * reached. | |
2380c486 | 7180 | + **/ |
7e46296a | 7181 | +static void set_free_mem_throttle(void) |
2380c486 | 7182 | +{ |
7e46296a | 7183 | + int new_throttle = nr_unallocated_buffer_pages() + 256; |
2380c486 | 7184 | + |
7e46296a AM |
7185 | + if (new_throttle > free_mem_throttle) |
7186 | + free_mem_throttle = new_throttle; | |
2380c486 JR |
7187 | +} |
7188 | + | |
7e46296a AM |
7189 | +#define NUM_REASONS 7 |
7190 | +static atomic_t reasons[NUM_REASONS]; | |
7191 | +static char *reason_name[NUM_REASONS] = { | |
7192 | + "readahead not ready", | |
7193 | + "bio allocation", | |
7194 | + "synchronous I/O", | |
7195 | + "toi_bio_get_new_page", | |
7196 | + "memory low", | |
7197 | + "readahead buffer allocation", | |
7198 | + "throughput_throttle", | |
7199 | +}; | |
7200 | + | |
7201 | +/* User Specified Parameters. */ | |
7202 | +unsigned long resume_firstblock; | |
7203 | +dev_t resume_dev_t; | |
7204 | +struct block_device *resume_block_device; | |
7205 | +static atomic_t resume_bdev_open_count; | |
7206 | + | |
7207 | +struct block_device *header_block_device; | |
7208 | + | |
2380c486 | 7209 | +/** |
7e46296a AM |
7210 | + * toi_open_bdev: Open a bdev at resume time. |
7211 | + * | |
7212 | + * index: The swap index. May be MAX_SWAPFILES for the resume_dev_t | |
7213 | + * (the user can have resume= pointing at a swap partition/file that isn't | |
7214 | + * swapon'd when they hibernate. MAX_SWAPFILES+1 for the first page of the | |
7215 | + * header. It will be from a swap partition that was enabled when we hibernated, | |
7216 | + * but we don't know it's real index until we read that first page. | |
7217 | + * dev_t: The device major/minor. | |
7218 | + * display_errs: Whether to try to do this quietly. | |
7219 | + * | |
7220 | + * We stored a dev_t in the image header. Open the matching device without | |
7221 | + * requiring /dev/<whatever> in most cases and record the details needed | |
7222 | + * to close it later and avoid duplicating work. | |
7223 | + */ | |
7224 | +struct block_device *toi_open_bdev(char *uuid, dev_t default_device, | |
7225 | + int display_errs) | |
7226 | +{ | |
7227 | + struct block_device *bdev; | |
7228 | + dev_t device = default_device; | |
7229 | + char buf[32]; | |
85eb3c9d | 7230 | + int retried = 0; |
7e46296a | 7231 | + |
85eb3c9d | 7232 | +retry: |
7e46296a | 7233 | + if (uuid) { |
cacc47f8 AM |
7234 | + struct fs_info seek; |
7235 | + strncpy((char *) &seek.uuid, uuid, 16); | |
7236 | + seek.dev_t = 0; | |
7237 | + seek.last_mount_size = 0; | |
7238 | + device = blk_lookup_fs_info(&seek); | |
7e46296a AM |
7239 | + if (!device) { |
7240 | + device = default_device; | |
7241 | + printk(KERN_DEBUG "Unable to resolve uuid. Falling back" | |
7242 | + " to dev_t.\n"); | |
7243 | + } else | |
7244 | + printk(KERN_DEBUG "Resolved uuid to device %s.\n", | |
7245 | + format_dev_t(buf, device)); | |
2380c486 JR |
7246 | + } |
7247 | + | |
7e46296a AM |
7248 | + if (!device) { |
7249 | + printk(KERN_ERR "TuxOnIce attempting to open a " | |
7250 | + "blank dev_t!\n"); | |
7251 | + dump_stack(); | |
7252 | + return NULL; | |
2380c486 | 7253 | + } |
5dd10c98 | 7254 | + bdev = toi_open_by_devnum(device); |
2380c486 | 7255 | + |
7e46296a | 7256 | + if (IS_ERR(bdev) || !bdev) { |
85eb3c9d AM |
7257 | + if (!retried) { |
7258 | + retried = 1; | |
7259 | + wait_for_device_probe(); | |
7260 | + scsi_complete_async_scans(); | |
7261 | + goto retry; | |
7262 | + } | |
7e46296a AM |
7263 | + if (display_errs) |
7264 | + toi_early_boot_message(1, TOI_CONTINUE_REQ, | |
7265 | + "Failed to get access to block device " | |
7266 | + "\"%x\" (error %d).\n Maybe you need " | |
7267 | + "to run mknod and/or lvmsetup in an " | |
7268 | + "initrd/ramfs?", device, bdev); | |
7269 | + return ERR_PTR(-EINVAL); | |
2380c486 | 7270 | + } |
7e46296a AM |
7271 | + toi_message(TOI_IO, TOI_VERBOSE, 0, |
7272 | + "TuxOnIce got bdev %p for dev_t %x.", | |
7273 | + bdev, device); | |
2380c486 | 7274 | + |
7e46296a AM |
7275 | + return bdev; |
7276 | +} | |
2380c486 | 7277 | + |
7e46296a AM |
7278 | +static void toi_bio_reserve_header_space(unsigned long request) |
7279 | +{ | |
7280 | + header_pages_reserved = request; | |
2380c486 JR |
7281 | +} |
7282 | + | |
7283 | +/** | |
7e46296a AM |
7284 | + * do_bio_wait - wait for some TuxOnIce I/O to complete |
7285 | + * @reason: The array index of the reason we're waiting. | |
2380c486 | 7286 | + * |
7e46296a AM |
7287 | + * Wait for a particular page of I/O if we're after a particular page. |
7288 | + * If we're not after a particular page, wait instead for all in flight | |
7289 | + * I/O to be completed or for us to have enough free memory to be able | |
7290 | + * to submit more I/O. | |
0ada99ac | 7291 | + * |
7e46296a | 7292 | + * If we wait, we also update our statistics regarding why we waited. |
2380c486 | 7293 | + **/ |
7e46296a | 7294 | +static void do_bio_wait(int reason) |
2380c486 | 7295 | +{ |
7e46296a | 7296 | + struct page *was_waiting_on = waiting_on; |
2380c486 | 7297 | + |
7e46296a AM |
7298 | + /* On SMP, waiting_on can be reset, so we make a copy */ |
7299 | + if (was_waiting_on) { | |
5dd10c98 AM |
7300 | + wait_on_page_locked(was_waiting_on); |
7301 | + atomic_inc(&reasons[reason]); | |
7e46296a AM |
7302 | + } else { |
7303 | + atomic_inc(&reasons[reason]); | |
2380c486 | 7304 | + |
7e46296a AM |
7305 | + wait_event(num_in_progress_wait, |
7306 | + !atomic_read(&toi_io_in_progress) || | |
7307 | + nr_unallocated_buffer_pages() > free_mem_throttle); | |
2380c486 | 7308 | + } |
2380c486 JR |
7309 | +} |
7310 | + | |
7311 | +/** | |
7e46296a AM |
7312 | + * throttle_if_needed - wait for I/O completion if throttle points are reached |
7313 | + * @flags: What to check and how to act. | |
7314 | + * | |
7315 | + * Check whether we need to wait for some I/O to complete. We always check | |
7316 | + * whether we have enough memory available, but may also (depending upon | |
7317 | + * @reason) check if the throughput throttle limit has been reached. | |
2380c486 | 7318 | + **/ |
7e46296a | 7319 | +static int throttle_if_needed(int flags) |
2380c486 | 7320 | +{ |
7e46296a | 7321 | + int free_pages = nr_unallocated_buffer_pages(); |
2380c486 | 7322 | + |
7e46296a AM |
7323 | + /* Getting low on memory and I/O is in progress? */ |
7324 | + while (unlikely(free_pages < free_mem_throttle) && | |
e876a0dd AM |
7325 | + atomic_read(&toi_io_in_progress) && |
7326 | + !test_result_state(TOI_ABORTED)) { | |
7e46296a AM |
7327 | + if (!(flags & THROTTLE_WAIT)) |
7328 | + return -ENOMEM; | |
7329 | + do_bio_wait(4); | |
7330 | + free_pages = nr_unallocated_buffer_pages(); | |
7331 | + } | |
7332 | + | |
7333 | + while (!(flags & MEMORY_ONLY) && throughput_throttle && | |
e876a0dd AM |
7334 | + TOTAL_OUTSTANDING_IO >= throughput_throttle && |
7335 | + !test_result_state(TOI_ABORTED)) { | |
7e46296a AM |
7336 | + int result = toi_bio_queue_flush_pages(0); |
7337 | + if (result) | |
7338 | + return result; | |
7339 | + atomic_inc(&reasons[6]); | |
7340 | + wait_event(num_in_progress_wait, | |
7341 | + !atomic_read(&toi_io_in_progress) || | |
7342 | + TOTAL_OUTSTANDING_IO < throughput_throttle); | |
2380c486 JR |
7343 | + } |
7344 | + | |
7345 | + return 0; | |
7346 | +} | |
7347 | + | |
7348 | +/** | |
7e46296a AM |
7349 | + * update_throughput_throttle - update the raw throughput throttle |
7350 | + * @jif_index: The number of times this function has been called. | |
7351 | + * | |
5dd10c98 AM |
7352 | + * This function is called four times per second by the core, and used to limit |
7353 | + * the amount of I/O we submit at once, spreading out our waiting through the | |
7e46296a AM |
7354 | + * whole job and letting userui get an opportunity to do its work. |
7355 | + * | |
5dd10c98 | 7356 | + * We don't start limiting I/O until 1/4s has gone so that we get a |
7e46296a AM |
7357 | + * decent sample for our initial limit, and keep updating it because |
7358 | + * throughput may vary (on rotating media, eg) with our block number. | |
7359 | + * | |
7360 | + * We throttle to 1/10s worth of I/O. | |
2380c486 | 7361 | + **/ |
7e46296a | 7362 | +static void update_throughput_throttle(int jif_index) |
2380c486 | 7363 | +{ |
7e46296a | 7364 | + int done = atomic_read(&toi_io_done); |
5dd10c98 | 7365 | + throughput_throttle = done * 2 / 5 / jif_index; |
2380c486 JR |
7366 | +} |
7367 | + | |
7368 | +/** | |
7e46296a | 7369 | + * toi_finish_all_io - wait for all outstanding i/o to complete |
2380c486 | 7370 | + * |
7e46296a | 7371 | + * Flush any queued but unsubmitted I/O and wait for it all to complete. |
2380c486 | 7372 | + **/ |
7e46296a | 7373 | +static int toi_finish_all_io(void) |
2380c486 | 7374 | +{ |
7e46296a | 7375 | + int result = toi_bio_queue_flush_pages(0); |
85eb3c9d AM |
7376 | + toi_bio_queue_flusher_should_finish = 1; |
7377 | + wake_up(&toi_io_queue_flusher); | |
7e46296a | 7378 | + wait_event(num_in_progress_wait, !TOTAL_OUTSTANDING_IO); |
2380c486 JR |
7379 | + return result; |
7380 | +} | |
7381 | + | |
7382 | +/** | |
7e46296a AM |
7383 | + * toi_end_bio - bio completion function. |
7384 | + * @bio: bio that has completed. | |
7385 | + * @err: Error value. Yes, like end_swap_bio_read, we ignore it. | |
2380c486 | 7386 | + * |
7e46296a AM |
7387 | + * Function called by the block driver from interrupt context when I/O is |
7388 | + * completed. If we were writing the page, we want to free it and will have | |
7389 | + * set bio->bi_private to the parameter we should use in telling the page | |
7390 | + * allocation accounting code what the page was allocated for. If we're | |
7391 | + * reading the page, it will be in the singly linked list made from | |
7392 | + * page->private pointers. | |
2380c486 | 7393 | + **/ |
7e46296a | 7394 | +static void toi_end_bio(struct bio *bio, int err) |
2380c486 | 7395 | +{ |
7e46296a | 7396 | + struct page *page = bio->bi_io_vec[0].bv_page; |
2380c486 | 7397 | + |
7e46296a | 7398 | + BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags)); |
2380c486 | 7399 | + |
7e46296a AM |
7400 | + unlock_page(page); |
7401 | + bio_put(bio); | |
2380c486 | 7402 | + |
7e46296a AM |
7403 | + if (waiting_on == page) |
7404 | + waiting_on = NULL; | |
2380c486 | 7405 | + |
7e46296a | 7406 | + put_page(page); |
2380c486 | 7407 | + |
7e46296a AM |
7408 | + if (bio->bi_private) |
7409 | + toi__free_page((int) ((unsigned long) bio->bi_private) , page); | |
2380c486 | 7410 | + |
7e46296a | 7411 | + bio_put(bio); |
2380c486 | 7412 | + |
7e46296a AM |
7413 | + atomic_dec(&toi_io_in_progress); |
7414 | + atomic_inc(&toi_io_done); | |
2380c486 | 7415 | + |
7e46296a | 7416 | + wake_up(&num_in_progress_wait); |
2380c486 JR |
7417 | +} |
7418 | + | |
7419 | +/** | |
7e46296a AM |
7420 | + * submit - submit BIO request |
7421 | + * @writing: READ or WRITE. | |
7422 | + * @dev: The block device we're using. | |
7423 | + * @first_block: The first sector we're using. | |
7424 | + * @page: The page being used for I/O. | |
7425 | + * @free_group: If writing, the group that was used in allocating the page | |
7426 | + * and which will be used in freeing the page from the completion | |
7427 | + * routine. | |
2380c486 | 7428 | + * |
7e46296a AM |
7429 | + * Based on Patrick Mochell's pmdisk code from long ago: "Straight from the |
7430 | + * textbook - allocate and initialize the bio. If we're writing, make sure | |
7431 | + * the page is marked as dirty. Then submit it and carry on." | |
7432 | + * | |
7433 | + * If we're just testing the speed of our own code, we fake having done all | |
7434 | + * the hard work and all toi_end_bio immediately. | |
7435 | + **/ | |
7436 | +static int submit(int writing, struct block_device *dev, sector_t first_block, | |
7437 | + struct page *page, int free_group) | |
2380c486 | 7438 | +{ |
7e46296a AM |
7439 | + struct bio *bio = NULL; |
7440 | + int cur_outstanding_io, result; | |
2380c486 | 7441 | + |
7e46296a AM |
7442 | + /* |
7443 | + * Shouldn't throttle if reading - can deadlock in the single | |
7444 | + * threaded case as pages are only freed when we use the | |
7445 | + * readahead. | |
7446 | + */ | |
7447 | + if (writing) { | |
7448 | + result = throttle_if_needed(MEMORY_ONLY | THROTTLE_WAIT); | |
7449 | + if (result) | |
7450 | + return result; | |
7451 | + } | |
7452 | + | |
7453 | + while (!bio) { | |
7454 | + bio = bio_alloc(TOI_ATOMIC_GFP, 1); | |
7455 | + if (!bio) { | |
7456 | + set_free_mem_throttle(); | |
7457 | + do_bio_wait(1); | |
2380c486 | 7458 | + } |
0ada99ac | 7459 | + } |
2380c486 | 7460 | + |
7e46296a AM |
7461 | + bio->bi_bdev = dev; |
7462 | + bio->bi_sector = first_block; | |
7463 | + bio->bi_private = (void *) ((unsigned long) free_group); | |
7464 | + bio->bi_end_io = toi_end_bio; | |
2380c486 | 7465 | + |
7e46296a AM |
7466 | + if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { |
7467 | + printk(KERN_DEBUG "ERROR: adding page to bio at %lld\n", | |
7468 | + (unsigned long long) first_block); | |
7469 | + bio_put(bio); | |
7470 | + return -EFAULT; | |
7471 | + } | |
2380c486 | 7472 | + |
7e46296a | 7473 | + bio_get(bio); |
2380c486 | 7474 | + |
7e46296a AM |
7475 | + cur_outstanding_io = atomic_add_return(1, &toi_io_in_progress); |
7476 | + if (writing) { | |
7477 | + if (cur_outstanding_io > max_outstanding_writes) | |
7478 | + max_outstanding_writes = cur_outstanding_io; | |
7479 | + } else { | |
7480 | + if (cur_outstanding_io > max_outstanding_reads) | |
7481 | + max_outstanding_reads = cur_outstanding_io; | |
7482 | + } | |
2380c486 | 7483 | + |
7e46296a | 7484 | + |
85eb3c9d AM |
7485 | + /* Still read the header! */ |
7486 | + if (unlikely(test_action_state(TOI_TEST_BIO) && writing)) { | |
7e46296a AM |
7487 | + /* Fake having done the hard work */ |
7488 | + set_bit(BIO_UPTODATE, &bio->bi_flags); | |
7489 | + toi_end_bio(bio, 0); | |
7490 | + } else | |
85eb3c9d AM |
7491 | + submit_bio(writing |
7492 | + | (1 << BIO_RW_SYNCIO) | |
7493 | + | (1 << BIO_RW_TUXONICE) | |
7494 | + | (1 << BIO_RW_UNPLUG) | |
7495 | + /* | (1 << BIO_RW_NOIDLE) */, bio); | |
7e46296a AM |
7496 | + |
7497 | + return 0; | |
2380c486 JR |
7498 | +} |
7499 | + | |
7500 | +/** | |
7e46296a AM |
7501 | + * toi_do_io: Prepare to do some i/o on a page and submit or batch it. |
7502 | + * | |
7503 | + * @writing: Whether reading or writing. | |
7504 | + * @bdev: The block device which we're using. | |
7505 | + * @block0: The first sector we're reading or writing. | |
7506 | + * @page: The page on which I/O is being done. | |
7507 | + * @readahead_index: If doing readahead, the index (reset this flag when done). | |
7508 | + * @syncio: Whether the i/o is being done synchronously. | |
7509 | + * | |
7510 | + * Prepare and start a read or write operation. | |
7511 | + * | |
7512 | + * Note that we always work with our own page. If writing, we might be given a | |
7513 | + * compression buffer that will immediately be used to start compressing the | |
7514 | + * next page. For reading, we do readahead and therefore don't know the final | |
7515 | + * address where the data needs to go. | |
2380c486 | 7516 | + **/ |
7e46296a AM |
7517 | +int toi_do_io(int writing, struct block_device *bdev, long block0, |
7518 | + struct page *page, int is_readahead, int syncio, int free_group) | |
2380c486 | 7519 | +{ |
7e46296a | 7520 | + page->private = 0; |
2380c486 | 7521 | + |
7e46296a AM |
7522 | + /* Do here so we don't race against toi_bio_get_next_page_read */ |
7523 | + lock_page(page); | |
2380c486 | 7524 | + |
7e46296a AM |
7525 | + if (is_readahead) { |
7526 | + if (readahead_list_head) | |
7527 | + readahead_list_tail->private = (unsigned long) page; | |
7528 | + else | |
7529 | + readahead_list_head = page; | |
2380c486 | 7530 | + |
7e46296a AM |
7531 | + readahead_list_tail = page; |
7532 | + } | |
2380c486 | 7533 | + |
7e46296a AM |
7534 | + /* Done before submitting to avoid races. */ |
7535 | + if (syncio) | |
7536 | + waiting_on = page; | |
7537 | + | |
7538 | + /* Submit the page */ | |
7539 | + get_page(page); | |
7540 | + | |
7541 | + if (submit(writing, bdev, block0, page, free_group)) | |
7542 | + return -EFAULT; | |
7543 | + | |
7544 | + if (syncio) | |
7545 | + do_bio_wait(2); | |
7546 | + | |
7547 | + return 0; | |
2380c486 JR |
7548 | +} |
7549 | + | |
7550 | +/** | |
7e46296a AM |
7551 | + * toi_bdev_page_io - simpler interface to do directly i/o on a single page |
7552 | + * @writing: Whether reading or writing. | |
7553 | + * @bdev: Block device on which we're operating. | |
7554 | + * @pos: Sector at which page to read or write starts. | |
7555 | + * @page: Page to be read/written. | |
7556 | + * | |
7557 | + * A simple interface to submit a page of I/O and wait for its completion. | |
7558 | + * The caller must free the page used. | |
2380c486 | 7559 | + **/ |
7e46296a AM |
7560 | +static int toi_bdev_page_io(int writing, struct block_device *bdev, |
7561 | + long pos, struct page *page) | |
2380c486 | 7562 | +{ |
7e46296a | 7563 | + return toi_do_io(writing, bdev, pos, page, 0, 1, 0); |
2380c486 JR |
7564 | +} |
7565 | + | |
7566 | +/** | |
7e46296a AM |
7567 | + * toi_bio_memory_needed - report the amount of memory needed for block i/o |
7568 | + * | |
7569 | + * We want to have at least enough memory so as to have target_outstanding_io | |
7570 | + * or more transactions on the fly at once. If we can do more, fine. | |
2380c486 | 7571 | + **/ |
7e46296a | 7572 | +static int toi_bio_memory_needed(void) |
2380c486 | 7573 | +{ |
7e46296a AM |
7574 | + return target_outstanding_io * (PAGE_SIZE + sizeof(struct request) + |
7575 | + sizeof(struct bio)); | |
2380c486 JR |
7576 | +} |
7577 | + | |
7578 | +/** | |
7e46296a AM |
7579 | + * toi_bio_print_debug_stats - put out debugging info in the buffer provided |
7580 | + * @buffer: A buffer of size @size into which text should be placed. | |
7581 | + * @size: The size of @buffer. | |
7582 | + * | |
7583 | + * Fill a buffer with debugging info. This is used for both our debug_info sysfs | |
7584 | + * entry and for recording the same info in dmesg. | |
2380c486 | 7585 | + **/ |
7e46296a | 7586 | +static int toi_bio_print_debug_stats(char *buffer, int size) |
2380c486 | 7587 | +{ |
7e46296a | 7588 | + int len = 0; |
2380c486 | 7589 | + |
7e46296a AM |
7590 | + if (toiActiveAllocator != &toi_blockwriter_ops) { |
7591 | + len = scnprintf(buffer, size, | |
7592 | + "- Block I/O inactive.\n"); | |
7593 | + return len; | |
2380c486 JR |
7594 | + } |
7595 | + | |
7e46296a | 7596 | + len = scnprintf(buffer, size, "- Block I/O active.\n"); |
2380c486 | 7597 | + |
5dd10c98 AM |
7598 | + len += toi_bio_chains_debug_info(buffer + len, size - len); |
7599 | + | |
7e46296a AM |
7600 | + len += scnprintf(buffer + len, size - len, |
7601 | + "- Max outstanding reads %d. Max writes %d.\n", | |
7602 | + max_outstanding_reads, max_outstanding_writes); | |
2380c486 | 7603 | + |
7e46296a AM |
7604 | + len += scnprintf(buffer + len, size - len, |
7605 | + " Memory_needed: %d x (%lu + %u + %u) = %d bytes.\n", | |
7606 | + target_outstanding_io, | |
7607 | + PAGE_SIZE, (unsigned int) sizeof(struct request), | |
7608 | + (unsigned int) sizeof(struct bio), toi_bio_memory_needed()); | |
2380c486 | 7609 | + |
7e46296a AM |
7610 | +#ifdef MEASURE_MUTEX_CONTENTION |
7611 | + { | |
7612 | + int i; | |
2380c486 | 7613 | + |
7e46296a AM |
7614 | + len += scnprintf(buffer + len, size - len, |
7615 | + " Mutex contention while reading:\n Contended Free\n"); | |
2380c486 | 7616 | + |
7e46296a AM |
7617 | + for_each_online_cpu(i) |
7618 | + len += scnprintf(buffer + len, size - len, | |
7619 | + " %9lu %9lu\n", | |
7620 | + mutex_times[0][0][i], mutex_times[0][1][i]); | |
2380c486 | 7621 | + |
7e46296a AM |
7622 | + len += scnprintf(buffer + len, size - len, |
7623 | + " Mutex contention while writing:\n Contended Free\n"); | |
2380c486 | 7624 | + |
7e46296a AM |
7625 | + for_each_online_cpu(i) |
7626 | + len += scnprintf(buffer + len, size - len, | |
7627 | + " %9lu %9lu\n", | |
7628 | + mutex_times[1][0][i], mutex_times[1][1][i]); | |
2380c486 | 7629 | + |
7e46296a | 7630 | + } |
2380c486 | 7631 | +#endif |
2380c486 | 7632 | + |
7e46296a AM |
7633 | + return len + scnprintf(buffer + len, size - len, |
7634 | + " Free mem throttle point reached %d.\n", free_mem_throttle); | |
7635 | +} | |
2380c486 | 7636 | + |
7e46296a AM |
7637 | +static int total_header_bytes; |
7638 | +static int unowned; | |
2380c486 | 7639 | + |
7e46296a AM |
7640 | +void debug_broken_header(void) |
7641 | +{ | |
7642 | + printk(KERN_DEBUG "Image header too big for size allocated!\n"); | |
7643 | + print_toi_header_storage_for_modules(); | |
7644 | + printk(KERN_DEBUG "Page flags : %d.\n", toi_pageflags_space_needed()); | |
5dd10c98 | 7645 | + printk(KERN_DEBUG "toi_header : %zu.\n", sizeof(struct toi_header)); |
7e46296a AM |
7646 | + printk(KERN_DEBUG "Total unowned : %d.\n", unowned); |
7647 | + printk(KERN_DEBUG "Total used : %d (%ld pages).\n", total_header_bytes, | |
7648 | + DIV_ROUND_UP(total_header_bytes, PAGE_SIZE)); | |
7649 | + printk(KERN_DEBUG "Space needed now : %ld.\n", | |
7650 | + get_header_storage_needed()); | |
7651 | + dump_block_chains(); | |
7652 | + abort_hibernate(TOI_HEADER_TOO_BIG, "Header reservation too small."); | |
7653 | +} | |
2380c486 | 7654 | + |
7e46296a AM |
7655 | +/** |
7656 | + * toi_rw_init - prepare to read or write a stream in the image | |
7657 | + * @writing: Whether reading or writing. | |
7658 | + * @stream number: Section of the image being processed. | |
2380c486 | 7659 | + * |
7e46296a AM |
7660 | + * Prepare to read or write a section ('stream') in the image. |
7661 | + **/ | |
7662 | +static int toi_rw_init(int writing, int stream_number) | |
7663 | +{ | |
7664 | + if (stream_number) | |
7665 | + toi_extent_state_restore(stream_number); | |
7666 | + else | |
7667 | + toi_extent_state_goto_start(); | |
2380c486 | 7668 | + |
7e46296a AM |
7669 | + if (writing) { |
7670 | + reset_idx = 0; | |
7671 | + if (!current_stream) | |
7672 | + page_idx = 0; | |
7673 | + } else { | |
7674 | + reset_idx = 1; | |
7675 | + } | |
2380c486 | 7676 | + |
7e46296a AM |
7677 | + atomic_set(&toi_io_done, 0); |
7678 | + if (!toi_writer_buffer) | |
7679 | + toi_writer_buffer = (char *) toi_get_zeroed_page(11, | |
7680 | + TOI_ATOMIC_GFP); | |
7681 | + toi_writer_buffer_posn = writing ? 0 : PAGE_SIZE; | |
7682 | + | |
7683 | + current_stream = stream_number; | |
7684 | + | |
7685 | + more_readahead = 1; | |
7686 | + | |
7687 | + return toi_writer_buffer ? 0 : -ENOMEM; | |
7688 | +} | |
2380c486 JR |
7689 | + |
7690 | +/** | |
7e46296a AM |
7691 | + * toi_bio_queue_write - queue a page for writing |
7692 | + * @full_buffer: Pointer to a page to be queued | |
2380c486 | 7693 | + * |
7e46296a AM |
7694 | + * Add a page to the queue to be submitted. If we're the queue flusher, |
7695 | + * we'll do this once we've dropped toi_bio_mutex, so other threads can | |
7696 | + * continue to submit I/O while we're on the slow path doing the actual | |
7697 | + * submission. | |
2380c486 | 7698 | + **/ |
7e46296a | 7699 | +static void toi_bio_queue_write(char **full_buffer) |
2380c486 | 7700 | +{ |
7e46296a AM |
7701 | + struct page *page = virt_to_page(*full_buffer); |
7702 | + unsigned long flags; | |
2380c486 | 7703 | + |
7e46296a AM |
7704 | + *full_buffer = NULL; |
7705 | + page->private = 0; | |
2380c486 | 7706 | + |
7e46296a AM |
7707 | + spin_lock_irqsave(&bio_queue_lock, flags); |
7708 | + if (!bio_queue_head) | |
7709 | + bio_queue_head = page; | |
7710 | + else | |
7711 | + bio_queue_tail->private = (unsigned long) page; | |
2380c486 | 7712 | + |
7e46296a AM |
7713 | + bio_queue_tail = page; |
7714 | + atomic_inc(&toi_bio_queue_size); | |
2380c486 | 7715 | + |
7e46296a AM |
7716 | + spin_unlock_irqrestore(&bio_queue_lock, flags); |
7717 | + wake_up(&toi_io_queue_flusher); | |
7718 | +} | |
2380c486 | 7719 | + |
7e46296a AM |
7720 | +/** |
7721 | + * toi_rw_cleanup - Cleanup after i/o. | |
7722 | + * @writing: Whether we were reading or writing. | |
7723 | + * | |
7724 | + * Flush all I/O and clean everything up after reading or writing a | |
7725 | + * section of the image. | |
7726 | + **/ | |
7727 | +static int toi_rw_cleanup(int writing) | |
7728 | +{ | |
e876a0dd | 7729 | + int i, result = 0; |
2380c486 | 7730 | + |
7e46296a AM |
7731 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_rw_cleanup."); |
7732 | + if (writing) { | |
7e46296a AM |
7733 | + if (toi_writer_buffer_posn && !test_result_state(TOI_ABORTED)) |
7734 | + toi_bio_queue_write(&toi_writer_buffer); | |
2380c486 | 7735 | + |
e876a0dd AM |
7736 | + while (bio_queue_head && !result) |
7737 | + result = toi_bio_queue_flush_pages(0); | |
2380c486 | 7738 | + |
7e46296a AM |
7739 | + if (result) |
7740 | + return result; | |
2380c486 | 7741 | + |
7e46296a AM |
7742 | + if (current_stream == 2) |
7743 | + toi_extent_state_save(1); | |
7744 | + else if (current_stream == 1) | |
7745 | + toi_extent_state_save(3); | |
2380c486 JR |
7746 | + } |
7747 | + | |
7e46296a | 7748 | + result = toi_finish_all_io(); |
2380c486 | 7749 | + |
7e46296a AM |
7750 | + while (readahead_list_head) { |
7751 | + void *next = (void *) readahead_list_head->private; | |
7752 | + toi__free_page(12, readahead_list_head); | |
7753 | + readahead_list_head = next; | |
2380c486 JR |
7754 | + } |
7755 | + | |
7e46296a | 7756 | + readahead_list_tail = NULL; |
2380c486 | 7757 | + |
7e46296a AM |
7758 | + if (!current_stream) |
7759 | + return result; | |
2380c486 | 7760 | + |
7e46296a AM |
7761 | + for (i = 0; i < NUM_REASONS; i++) { |
7762 | + if (!atomic_read(&reasons[i])) | |
7763 | + continue; | |
7764 | + printk(KERN_DEBUG "Waited for i/o due to %s %d times.\n", | |
7765 | + reason_name[i], atomic_read(&reasons[i])); | |
7766 | + atomic_set(&reasons[i], 0); | |
2380c486 JR |
7767 | + } |
7768 | + | |
7e46296a AM |
7769 | + current_stream = 0; |
7770 | + return result; | |
2380c486 | 7771 | +} |
2380c486 | 7772 | + |
7e46296a AM |
7773 | +/** |
7774 | + * toi_start_one_readahead - start one page of readahead | |
7775 | + * @dedicated_thread: Is this a thread dedicated to doing readahead? | |
7776 | + * | |
7777 | + * Start one new page of readahead. If this is being called by a thread | |
7778 | + * whose only just is to submit readahead, don't quit because we failed | |
7779 | + * to allocate a page. | |
7780 | + **/ | |
7781 | +static int toi_start_one_readahead(int dedicated_thread) | |
2380c486 | 7782 | +{ |
7e46296a AM |
7783 | + char *buffer = NULL; |
7784 | + int oom = 0, result; | |
2380c486 | 7785 | + |
7e46296a | 7786 | + result = throttle_if_needed(dedicated_thread ? THROTTLE_WAIT : 0); |
5dd10c98 | 7787 | + if (result) |
7e46296a | 7788 | + return result; |
2380c486 | 7789 | + |
7e46296a | 7790 | + mutex_lock(&toi_bio_readahead_mutex); |
2380c486 | 7791 | + |
7e46296a AM |
7792 | + while (!buffer) { |
7793 | + buffer = (char *) toi_get_zeroed_page(12, | |
7794 | + TOI_ATOMIC_GFP); | |
7795 | + if (!buffer) { | |
7796 | + if (oom && !dedicated_thread) { | |
7797 | + mutex_unlock(&toi_bio_readahead_mutex); | |
7e46296a AM |
7798 | + return -ENOMEM; |
7799 | + } | |
2380c486 | 7800 | + |
7e46296a AM |
7801 | + oom = 1; |
7802 | + set_free_mem_throttle(); | |
7803 | + do_bio_wait(5); | |
7804 | + } | |
7805 | + } | |
2380c486 | 7806 | + |
7e46296a | 7807 | + result = toi_bio_rw_page(READ, virt_to_page(buffer), 1, 0); |
e876a0dd | 7808 | + if (result == -ENOSPC) |
7e46296a AM |
7809 | + toi__free_page(12, virt_to_page(buffer)); |
7810 | + mutex_unlock(&toi_bio_readahead_mutex); | |
5dd10c98 | 7811 | + if (result) { |
e876a0dd | 7812 | + if (result == -ENOSPC) |
5dd10c98 AM |
7813 | + toi_message(TOI_IO, TOI_VERBOSE, 0, |
7814 | + "Last readahead page submitted."); | |
7815 | + else | |
7816 | + printk(KERN_DEBUG "toi_bio_rw_page returned %d.\n", | |
7817 | + result); | |
7818 | + } | |
7e46296a | 7819 | + return result; |
2380c486 JR |
7820 | +} |
7821 | + | |
7e46296a AM |
7822 | +/** |
7823 | + * toi_start_new_readahead - start new readahead | |
7824 | + * @dedicated_thread: Are we dedicated to this task? | |
7825 | + * | |
7826 | + * Start readahead of image pages. | |
7827 | + * | |
7828 | + * We can be called as a thread dedicated to this task (may be helpful on | |
7829 | + * systems with lots of CPUs), in which case we don't exit until there's no | |
7830 | + * more readahead. | |
7831 | + * | |
7832 | + * If this is not called by a dedicated thread, we top up our queue until | |
7833 | + * there's no more readahead to submit, we've submitted the number given | |
7834 | + * in target_outstanding_io or the number in progress exceeds the target | |
7835 | + * outstanding I/O value. | |
7836 | + * | |
7837 | + * No mutex needed because this is only ever called by the first cpu. | |
7838 | + **/ | |
7839 | +static int toi_start_new_readahead(int dedicated_thread) | |
2380c486 | 7840 | +{ |
7e46296a | 7841 | + int last_result, num_submitted = 0; |
2380c486 | 7842 | + |
7e46296a AM |
7843 | + /* Start a new readahead? */ |
7844 | + if (!more_readahead) | |
7845 | + return 0; | |
2380c486 | 7846 | + |
7e46296a AM |
7847 | + do { |
7848 | + last_result = toi_start_one_readahead(dedicated_thread); | |
2380c486 | 7849 | + |
7e46296a | 7850 | + if (last_result) { |
e876a0dd | 7851 | + if (last_result == -ENOMEM || last_result == -ENOSPC) |
7e46296a | 7852 | + return 0; |
2380c486 | 7853 | + |
7e46296a AM |
7854 | + printk(KERN_DEBUG |
7855 | + "Begin read chunk returned %d.\n", | |
7856 | + last_result); | |
7857 | + } else | |
7858 | + num_submitted++; | |
92bca44c | 7859 | + |
7e46296a AM |
7860 | + } while (more_readahead && !last_result && |
7861 | + (dedicated_thread || | |
7862 | + (num_submitted < target_outstanding_io && | |
7863 | + atomic_read(&toi_io_in_progress) < target_outstanding_io))); | |
2380c486 | 7864 | + |
7e46296a AM |
7865 | + return last_result; |
7866 | +} | |
2380c486 | 7867 | + |
7e46296a AM |
7868 | +/** |
7869 | + * bio_io_flusher - start the dedicated I/O flushing routine | |
7870 | + * @writing: Whether we're writing the image. | |
7871 | + **/ | |
7872 | +static int bio_io_flusher(int writing) | |
7873 | +{ | |
7874 | + | |
7875 | + if (writing) | |
7876 | + return toi_bio_queue_flush_pages(1); | |
2380c486 | 7877 | + else |
7e46296a | 7878 | + return toi_start_new_readahead(1); |
2380c486 JR |
7879 | +} |
7880 | + | |
7e46296a AM |
7881 | +/** |
7882 | + * toi_bio_get_next_page_read - read a disk page, perhaps with readahead | |
7883 | + * @no_readahead: Whether we can use readahead | |
7884 | + * | |
7885 | + * Read a page from disk, submitting readahead and cleaning up finished i/o | |
7886 | + * while we wait for the page we're after. | |
7887 | + **/ | |
7888 | +static int toi_bio_get_next_page_read(int no_readahead) | |
2380c486 | 7889 | +{ |
85eb3c9d AM |
7890 | + char *virt; |
7891 | + struct page *old_readahead_list_head; | |
2380c486 | 7892 | + |
7e46296a AM |
7893 | + /* |
7894 | + * When reading the second page of the header, we have to | |
7895 | + * delay submitting the read until after we've gotten the | |
7896 | + * extents out of the first page. | |
7897 | + */ | |
7898 | + if (unlikely(no_readahead && toi_start_one_readahead(0))) { | |
7899 | + printk(KERN_EMERG "No readahead and toi_start_one_readahead " | |
7900 | + "returned non-zero.\n"); | |
7901 | + return -EIO; | |
7902 | + } | |
2380c486 | 7903 | + |
7e46296a AM |
7904 | + if (unlikely(!readahead_list_head)) { |
7905 | + /* | |
7906 | + * If the last page finishes exactly on the page | |
7907 | + * boundary, we will be called one extra time and | |
7908 | + * have no data to return. In this case, we should | |
7909 | + * not BUG(), like we used to! | |
7910 | + */ | |
7911 | + if (!more_readahead) { | |
7912 | + printk(KERN_EMERG "No more readahead.\n"); | |
e876a0dd | 7913 | + return -ENOSPC; |
7e46296a AM |
7914 | + } |
7915 | + if (unlikely(toi_start_one_readahead(0))) { | |
7916 | + printk(KERN_EMERG "No readahead and " | |
7917 | + "toi_start_one_readahead returned non-zero.\n"); | |
7918 | + return -EIO; | |
7919 | + } | |
2380c486 JR |
7920 | + } |
7921 | + | |
7e46296a AM |
7922 | + if (PageLocked(readahead_list_head)) { |
7923 | + waiting_on = readahead_list_head; | |
7924 | + do_bio_wait(0); | |
7925 | + } | |
2380c486 | 7926 | + |
7e46296a AM |
7927 | + virt = page_address(readahead_list_head); |
7928 | + memcpy(toi_writer_buffer, virt, PAGE_SIZE); | |
85eb3c9d AM |
7929 | + |
7930 | + mutex_lock(&toi_bio_readahead_mutex); | |
7931 | + old_readahead_list_head = readahead_list_head; | |
7932 | + readahead_list_head = (struct page *) readahead_list_head->private; | |
7933 | + mutex_unlock(&toi_bio_readahead_mutex); | |
7934 | + toi__free_page(12, old_readahead_list_head); | |
7e46296a | 7935 | + return 0; |
2380c486 | 7936 | +} |
2380c486 | 7937 | + |
7e46296a AM |
7938 | +/** |
7939 | + * toi_bio_queue_flush_pages - flush the queue of pages queued for writing | |
7940 | + * @dedicated_thread: Whether we're a dedicated thread | |
7941 | + * | |
7942 | + * Flush the queue of pages ready to be written to disk. | |
7943 | + * | |
7944 | + * If we're a dedicated thread, stay in here until told to leave, | |
7945 | + * sleeping in wait_event. | |
7946 | + * | |
7947 | + * The first thread is normally the only one to come in here. Another | |
7948 | + * thread can enter this routine too, though, via throttle_if_needed. | |
7949 | + * Since that's the case, we must be careful to only have one thread | |
7950 | + * doing this work at a time. Otherwise we have a race and could save | |
7951 | + * pages out of order. | |
7952 | + * | |
7953 | + * If an error occurs, free all remaining pages without submitting them | |
7954 | + * for I/O. | |
7955 | + **/ | |
2380c486 | 7956 | + |
7e46296a AM |
7957 | +int toi_bio_queue_flush_pages(int dedicated_thread) |
7958 | +{ | |
7959 | + unsigned long flags; | |
7960 | + int result = 0; | |
7961 | + static DEFINE_MUTEX(busy); | |
2380c486 | 7962 | + |
7e46296a AM |
7963 | + if (!mutex_trylock(&busy)) |
7964 | + return 0; | |
2380c486 | 7965 | + |
7e46296a AM |
7966 | +top: |
7967 | + spin_lock_irqsave(&bio_queue_lock, flags); | |
7968 | + while (bio_queue_head) { | |
7969 | + struct page *page = bio_queue_head; | |
7970 | + bio_queue_head = (struct page *) page->private; | |
7971 | + if (bio_queue_tail == page) | |
7972 | + bio_queue_tail = NULL; | |
7973 | + atomic_dec(&toi_bio_queue_size); | |
7974 | + spin_unlock_irqrestore(&bio_queue_lock, flags); | |
e876a0dd AM |
7975 | + |
7976 | + /* Don't generate more error messages if already had one */ | |
7977 | + if (!result) | |
7978 | + result = toi_bio_rw_page(WRITE, page, 0, 11); | |
7e46296a AM |
7979 | + /* |
7980 | + * If writing the page failed, don't drop out. | |
7981 | + * Flush the rest of the queue too. | |
7982 | + */ | |
7983 | + if (result) | |
7984 | + toi__free_page(11 , page); | |
7985 | + spin_lock_irqsave(&bio_queue_lock, flags); | |
7986 | + } | |
7987 | + spin_unlock_irqrestore(&bio_queue_lock, flags); | |
2380c486 | 7988 | + |
7e46296a AM |
7989 | + if (dedicated_thread) { |
7990 | + wait_event(toi_io_queue_flusher, bio_queue_head || | |
7991 | + toi_bio_queue_flusher_should_finish); | |
7992 | + if (likely(!toi_bio_queue_flusher_should_finish)) | |
7993 | + goto top; | |
7994 | + toi_bio_queue_flusher_should_finish = 0; | |
7995 | + } | |
2380c486 | 7996 | + |
7e46296a AM |
7997 | + mutex_unlock(&busy); |
7998 | + return result; | |
7999 | +} | |
2380c486 | 8000 | + |
7e46296a AM |
8001 | +/** |
8002 | + * toi_bio_get_new_page - get a new page for I/O | |
8003 | + * @full_buffer: Pointer to a page to allocate. | |
8004 | + **/ | |
8005 | +static int toi_bio_get_new_page(char **full_buffer) | |
2380c486 | 8006 | +{ |
7e46296a AM |
8007 | + int result = throttle_if_needed(THROTTLE_WAIT); |
8008 | + if (result) | |
8009 | + return result; | |
2380c486 | 8010 | + |
7e46296a AM |
8011 | + while (!*full_buffer) { |
8012 | + *full_buffer = (char *) toi_get_zeroed_page(11, TOI_ATOMIC_GFP); | |
8013 | + if (!*full_buffer) { | |
8014 | + set_free_mem_throttle(); | |
8015 | + do_bio_wait(3); | |
8016 | + } | |
2380c486 JR |
8017 | + } |
8018 | + | |
7e46296a | 8019 | + return 0; |
2380c486 JR |
8020 | +} |
8021 | + | |
7e46296a AM |
8022 | +/** |
8023 | + * toi_rw_buffer - combine smaller buffers into PAGE_SIZE I/O | |
8024 | + * @writing: Bool - whether writing (or reading). | |
8025 | + * @buffer: The start of the buffer to write or fill. | |
8026 | + * @buffer_size: The size of the buffer to write or fill. | |
8027 | + * @no_readahead: Don't try to start readhead (when getting extents). | |
8028 | + **/ | |
8029 | +static int toi_rw_buffer(int writing, char *buffer, int buffer_size, | |
8030 | + int no_readahead) | |
92bca44c | 8031 | +{ |
7e46296a | 8032 | + int bytes_left = buffer_size, result = 0; |
92bca44c | 8033 | + |
7e46296a AM |
8034 | + while (bytes_left) { |
8035 | + char *source_start = buffer + buffer_size - bytes_left; | |
8036 | + char *dest_start = toi_writer_buffer + toi_writer_buffer_posn; | |
8037 | + int capacity = PAGE_SIZE - toi_writer_buffer_posn; | |
8038 | + char *to = writing ? dest_start : source_start; | |
8039 | + char *from = writing ? source_start : dest_start; | |
92bca44c | 8040 | + |
7e46296a AM |
8041 | + if (bytes_left <= capacity) { |
8042 | + memcpy(to, from, bytes_left); | |
8043 | + toi_writer_buffer_posn += bytes_left; | |
8044 | + return 0; | |
8045 | + } | |
2380c486 | 8046 | + |
7e46296a AM |
8047 | + /* Complete this page and start a new one */ |
8048 | + memcpy(to, from, capacity); | |
8049 | + bytes_left -= capacity; | |
2380c486 | 8050 | + |
7e46296a AM |
8051 | + if (!writing) { |
8052 | + /* | |
8053 | + * Perform actual I/O: | |
8054 | + * read readahead_list_head into toi_writer_buffer | |
8055 | + */ | |
8056 | + int result = toi_bio_get_next_page_read(no_readahead); | |
8057 | + if (result) { | |
8058 | + printk("toi_bio_get_next_page_read " | |
8059 | + "returned %d.\n", result); | |
8060 | + return result; | |
8061 | + } | |
8062 | + } else { | |
8063 | + toi_bio_queue_write(&toi_writer_buffer); | |
8064 | + result = toi_bio_get_new_page(&toi_writer_buffer); | |
8065 | + if (result) { | |
8066 | + printk(KERN_ERR "toi_bio_get_new_page returned " | |
8067 | + "%d.\n", result); | |
8068 | + return result; | |
8069 | + } | |
8070 | + } | |
8071 | + | |
8072 | + toi_writer_buffer_posn = 0; | |
8073 | + toi_cond_pause(0, NULL); | |
8074 | + } | |
8075 | + | |
8076 | + return 0; | |
2380c486 JR |
8077 | +} |
8078 | + | |
7e46296a AM |
8079 | +/** |
8080 | + * toi_bio_read_page - read a page of the image | |
8081 | + * @pfn: The pfn where the data belongs. | |
8082 | + * @buffer_page: The page containing the (possibly compressed) data. | |
8083 | + * @buf_size: The number of bytes on @buffer_page used (PAGE_SIZE). | |
2380c486 | 8084 | + * |
7e46296a AM |
8085 | + * Read a (possibly compressed) page from the image, into buffer_page, |
8086 | + * returning its pfn and the buffer size. | |
8087 | + **/ | |
85eb3c9d AM |
8088 | +static int toi_bio_read_page(unsigned long *pfn, int buf_type, |
8089 | + void *buffer_page, unsigned int *buf_size) | |
7e46296a AM |
8090 | +{ |
8091 | + int result = 0; | |
8092 | + int this_idx; | |
85eb3c9d | 8093 | + char *buffer_virt = TOI_MAP(buf_type, buffer_page); |
2380c486 | 8094 | + |
7e46296a AM |
8095 | + /* |
8096 | + * Only call start_new_readahead if we don't have a dedicated thread | |
8097 | + * and we're the queue flusher. | |
8098 | + */ | |
8099 | + if (current == toi_queue_flusher && more_readahead) { | |
8100 | + int result2 = toi_start_new_readahead(0); | |
8101 | + if (result2) { | |
8102 | + printk(KERN_DEBUG "Queue flusher and " | |
8103 | + "toi_start_one_readahead returned non-zero.\n"); | |
8104 | + result = -EIO; | |
8105 | + goto out; | |
8106 | + } | |
8107 | + } | |
2380c486 | 8108 | + |
7e46296a | 8109 | + my_mutex_lock(0, &toi_bio_mutex); |
2380c486 | 8110 | + |
7e46296a AM |
8111 | + /* |
8112 | + * Structure in the image: | |
8113 | + * [destination pfn|page size|page data] | |
8114 | + * buf_size is PAGE_SIZE | |
85eb3c9d AM |
8115 | + * We can validly find there's nothing to read in a multithreaded |
8116 | + * situation. | |
7e46296a AM |
8117 | + */ |
8118 | + if (toi_rw_buffer(READ, (char *) &this_idx, sizeof(int), 0) || | |
8119 | + toi_rw_buffer(READ, (char *) pfn, sizeof(unsigned long), 0) || | |
8120 | + toi_rw_buffer(READ, (char *) buf_size, sizeof(int), 0) || | |
8121 | + toi_rw_buffer(READ, buffer_virt, *buf_size, 0)) { | |
85eb3c9d AM |
8122 | + result = -ENODATA; |
8123 | + goto out_unlock; | |
7e46296a | 8124 | + } |
2380c486 | 8125 | + |
7e46296a AM |
8126 | + if (reset_idx) { |
8127 | + page_idx = this_idx; | |
8128 | + reset_idx = 0; | |
8129 | + } else { | |
8130 | + page_idx++; | |
85eb3c9d AM |
8131 | + if (!this_idx) |
8132 | + result = -ENODATA; | |
8133 | + else if (page_idx != this_idx) | |
7e46296a AM |
8134 | + printk(KERN_ERR "Got page index %d, expected %d.\n", |
8135 | + this_idx, page_idx); | |
8136 | + } | |
2380c486 | 8137 | + |
85eb3c9d | 8138 | +out_unlock: |
7e46296a AM |
8139 | + my_mutex_unlock(0, &toi_bio_mutex); |
8140 | +out: | |
85eb3c9d | 8141 | + TOI_UNMAP(buf_type, buffer_page); |
7e46296a AM |
8142 | + return result; |
8143 | +} | |
2380c486 | 8144 | + |
7e46296a AM |
8145 | +/** |
8146 | + * toi_bio_write_page - write a page of the image | |
8147 | + * @pfn: The pfn where the data belongs. | |
8148 | + * @buffer_page: The page containing the (possibly compressed) data. | |
8149 | + * @buf_size: The number of bytes on @buffer_page used. | |
2380c486 | 8150 | + * |
7e46296a AM |
8151 | + * Write a (possibly compressed) page to the image from the buffer, together |
8152 | + * with it's index and buffer size. | |
8153 | + **/ | |
85eb3c9d AM |
8154 | +static int toi_bio_write_page(unsigned long pfn, int buf_type, |
8155 | + void *buffer_page, unsigned int buf_size) | |
7e46296a AM |
8156 | +{ |
8157 | + char *buffer_virt; | |
8158 | + int result = 0, result2 = 0; | |
2380c486 | 8159 | + |
7e46296a AM |
8160 | + if (unlikely(test_action_state(TOI_TEST_FILTER_SPEED))) |
8161 | + return 0; | |
2380c486 | 8162 | + |
7e46296a | 8163 | + my_mutex_lock(1, &toi_bio_mutex); |
2380c486 | 8164 | + |
7e46296a AM |
8165 | + if (test_result_state(TOI_ABORTED)) { |
8166 | + my_mutex_unlock(1, &toi_bio_mutex); | |
85eb3c9d | 8167 | + return 0; |
7e46296a | 8168 | + } |
2380c486 | 8169 | + |
85eb3c9d | 8170 | + buffer_virt = TOI_MAP(buf_type, buffer_page); |
7e46296a | 8171 | + page_idx++; |
2380c486 | 8172 | + |
7e46296a AM |
8173 | + /* |
8174 | + * Structure in the image: | |
8175 | + * [destination pfn|page size|page data] | |
8176 | + * buf_size is PAGE_SIZE | |
8177 | + */ | |
8178 | + if (toi_rw_buffer(WRITE, (char *) &page_idx, sizeof(int), 0) || | |
8179 | + toi_rw_buffer(WRITE, (char *) &pfn, sizeof(unsigned long), 0) || | |
8180 | + toi_rw_buffer(WRITE, (char *) &buf_size, sizeof(int), 0) || | |
8181 | + toi_rw_buffer(WRITE, buffer_virt, buf_size, 0)) { | |
8182 | + printk(KERN_DEBUG "toi_rw_buffer returned non-zero to " | |
8183 | + "toi_bio_write_page.\n"); | |
8184 | + result = -EIO; | |
8185 | + } | |
2380c486 | 8186 | + |
85eb3c9d | 8187 | + TOI_UNMAP(buf_type, buffer_page); |
7e46296a | 8188 | + my_mutex_unlock(1, &toi_bio_mutex); |
2380c486 | 8189 | + |
7e46296a AM |
8190 | + if (current == toi_queue_flusher) |
8191 | + result2 = toi_bio_queue_flush_pages(0); | |
2380c486 | 8192 | + |
7e46296a | 8193 | + return result ? result : result2; |
2380c486 JR |
8194 | +} |
8195 | + | |
7e46296a AM |
8196 | +/** |
8197 | + * _toi_rw_header_chunk - read or write a portion of the image header | |
8198 | + * @writing: Whether reading or writing. | |
8199 | + * @owner: The module for which we're writing. | |
8200 | + * Used for confirming that modules | |
8201 | + * don't use more header space than they asked for. | |
8202 | + * @buffer: Address of the data to write. | |
8203 | + * @buffer_size: Size of the data buffer. | |
8204 | + * @no_readahead: Don't try to start readhead (when getting extents). | |
2380c486 | 8205 | + * |
7e46296a AM |
8206 | + * Perform PAGE_SIZE I/O. Start readahead if needed. |
8207 | + **/ | |
8208 | +static int _toi_rw_header_chunk(int writing, struct toi_module_ops *owner, | |
8209 | + char *buffer, int buffer_size, int no_readahead) | |
2380c486 | 8210 | +{ |
7e46296a | 8211 | + int result = 0; |
2380c486 | 8212 | + |
7e46296a AM |
8213 | + if (owner) { |
8214 | + owner->header_used += buffer_size; | |
8215 | + toi_message(TOI_HEADER, TOI_LOW, 1, | |
8216 | + "Header: %s : %d bytes (%d/%d) from offset %d.", | |
8217 | + owner->name, | |
8218 | + buffer_size, owner->header_used, | |
8219 | + owner->header_requested, | |
8220 | + toi_writer_buffer_posn); | |
8221 | + if (owner->header_used > owner->header_requested && writing) { | |
8222 | + printk(KERN_EMERG "TuxOnIce module %s is using more " | |
8223 | + "header space (%u) than it requested (%u).\n", | |
8224 | + owner->name, | |
8225 | + owner->header_used, | |
8226 | + owner->header_requested); | |
8227 | + return buffer_size; | |
2380c486 | 8228 | + } |
7e46296a AM |
8229 | + } else { |
8230 | + unowned += buffer_size; | |
8231 | + toi_message(TOI_HEADER, TOI_LOW, 1, | |
8232 | + "Header: (No owner): %d bytes (%d total so far) from " | |
8233 | + "offset %d.", buffer_size, unowned, | |
8234 | + toi_writer_buffer_posn); | |
2380c486 | 8235 | + } |
2380c486 | 8236 | + |
7e46296a AM |
8237 | + if (!writing && !no_readahead && more_readahead) { |
8238 | + result = toi_start_new_readahead(0); | |
8239 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Start new readahead " | |
8240 | + "returned %d.", result); | |
2380c486 JR |
8241 | + } |
8242 | + | |
7e46296a AM |
8243 | + if (!result) { |
8244 | + result = toi_rw_buffer(writing, buffer, buffer_size, | |
8245 | + no_readahead); | |
8246 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "rw_buffer returned " | |
8247 | + "%d.", result); | |
2380c486 | 8248 | + } |
2380c486 | 8249 | + |
7e46296a AM |
8250 | + total_header_bytes += buffer_size; |
8251 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "_toi_rw_header_chunk returning " | |
8252 | + "%d.", result); | |
8253 | + return result; | |
8254 | +} | |
2380c486 | 8255 | + |
7e46296a AM |
8256 | +static int toi_rw_header_chunk(int writing, struct toi_module_ops *owner, |
8257 | + char *buffer, int size) | |
2380c486 | 8258 | +{ |
7e46296a | 8259 | + return _toi_rw_header_chunk(writing, owner, buffer, size, 1); |
2380c486 JR |
8260 | +} |
8261 | + | |
7e46296a AM |
8262 | +static int toi_rw_header_chunk_noreadahead(int writing, |
8263 | + struct toi_module_ops *owner, char *buffer, int size) | |
2380c486 | 8264 | +{ |
7e46296a | 8265 | + return _toi_rw_header_chunk(writing, owner, buffer, size, 1); |
2380c486 JR |
8266 | +} |
8267 | + | |
7e46296a AM |
8268 | +/** |
8269 | + * toi_bio_storage_needed - get the amount of storage needed for my fns | |
8270 | + **/ | |
8271 | +static int toi_bio_storage_needed(void) | |
2380c486 | 8272 | +{ |
7e46296a | 8273 | + return sizeof(int) + PAGE_SIZE + toi_bio_devinfo_storage_needed(); |
2380c486 JR |
8274 | +} |
8275 | + | |
7e46296a AM |
8276 | +/** |
8277 | + * toi_bio_save_config_info - save block I/O config to image header | |
8278 | + * @buf: PAGE_SIZE'd buffer into which data should be saved. | |
8279 | + **/ | |
8280 | +static int toi_bio_save_config_info(char *buf) | |
2380c486 | 8281 | +{ |
7e46296a AM |
8282 | + int *ints = (int *) buf; |
8283 | + ints[0] = target_outstanding_io; | |
8284 | + return sizeof(int); | |
2380c486 JR |
8285 | +} |
8286 | + | |
7e46296a AM |
8287 | +/** |
8288 | + * toi_bio_load_config_info - restore block I/O config | |
8289 | + * @buf: Data to be reloaded. | |
8290 | + * @size: Size of the buffer saved. | |
8291 | + **/ | |
8292 | +static void toi_bio_load_config_info(char *buf, int size) | |
2380c486 | 8293 | +{ |
7e46296a AM |
8294 | + int *ints = (int *) buf; |
8295 | + target_outstanding_io = ints[0]; | |
2380c486 JR |
8296 | +} |
8297 | + | |
5dd10c98 | 8298 | +void close_resume_dev_t(int force) |
2380c486 | 8299 | +{ |
5dd10c98 AM |
8300 | + if (!resume_block_device) |
8301 | + return; | |
8302 | + | |
8303 | + if (force) | |
8304 | + atomic_set(&resume_bdev_open_count, 0); | |
8305 | + else | |
8306 | + atomic_dec(&resume_bdev_open_count); | |
8307 | + | |
8308 | + if (!atomic_read(&resume_bdev_open_count)) { | |
7e46296a AM |
8309 | + toi_close_bdev(resume_block_device); |
8310 | + resume_block_device = NULL; | |
2380c486 JR |
8311 | + } |
8312 | +} | |
8313 | + | |
5dd10c98 | 8314 | +int open_resume_dev_t(int force, int quiet) |
2380c486 | 8315 | +{ |
5dd10c98 | 8316 | + if (force) { |
7e46296a | 8317 | + close_resume_dev_t(1); |
5dd10c98 AM |
8318 | + atomic_set(&resume_bdev_open_count, 1); |
8319 | + } else | |
7e46296a | 8320 | + atomic_inc(&resume_bdev_open_count); |
2380c486 | 8321 | + |
7e46296a | 8322 | + if (resume_block_device) |
2380c486 JR |
8323 | + return 0; |
8324 | + | |
7e46296a AM |
8325 | + resume_block_device = toi_open_bdev(NULL, resume_dev_t, 0); |
8326 | + if (IS_ERR(resume_block_device)) { | |
8327 | + if (!quiet) | |
8328 | + toi_early_boot_message(1, TOI_CONTINUE_REQ, | |
8329 | + "Failed to open device %x, where" | |
8330 | + " the header should be found.", | |
8331 | + resume_dev_t); | |
5dd10c98 AM |
8332 | + resume_block_device = NULL; |
8333 | + atomic_set(&resume_bdev_open_count, 0); | |
7e46296a | 8334 | + return 1; |
2380c486 JR |
8335 | + } |
8336 | + | |
2380c486 JR |
8337 | + return 0; |
8338 | +} | |
8339 | + | |
7e46296a AM |
8340 | +/** |
8341 | + * toi_bio_initialise - initialise bio code at start of some action | |
8342 | + * @starting_cycle: Whether starting a hibernation cycle, or just reading or | |
8343 | + * writing a sysfs value. | |
8344 | + **/ | |
8345 | +static int toi_bio_initialise(int starting_cycle) | |
2380c486 | 8346 | +{ |
7e46296a | 8347 | + int result; |
2380c486 | 8348 | + |
7e46296a AM |
8349 | + if (!starting_cycle || !resume_dev_t) |
8350 | + return 0; | |
2380c486 | 8351 | + |
7e46296a AM |
8352 | + max_outstanding_writes = 0; |
8353 | + max_outstanding_reads = 0; | |
8354 | + current_stream = 0; | |
8355 | + toi_queue_flusher = current; | |
8356 | +#ifdef MEASURE_MUTEX_CONTENTION | |
8357 | + { | |
8358 | + int i, j, k; | |
8359 | + | |
8360 | + for (i = 0; i < 2; i++) | |
8361 | + for (j = 0; j < 2; j++) | |
8362 | + for_each_online_cpu(k) | |
8363 | + mutex_times[i][j][k] = 0; | |
8364 | + } | |
8365 | +#endif | |
8366 | + result = open_resume_dev_t(0, 1); | |
8367 | + | |
8368 | + if (result) | |
8369 | + return result; | |
8370 | + | |
8371 | + return get_signature_page(); | |
2380c486 JR |
8372 | +} |
8373 | + | |
7e46296a | 8374 | +static unsigned long raw_to_real(unsigned long raw) |
2380c486 | 8375 | +{ |
85eb3c9d | 8376 | + unsigned long extra; |
2380c486 | 8377 | + |
85eb3c9d | 8378 | + extra = (raw * (sizeof(unsigned long) + sizeof(int)) + |
7e46296a AM |
8379 | + (PAGE_SIZE + sizeof(unsigned long) + sizeof(int) + 1)) / |
8380 | + (PAGE_SIZE + sizeof(unsigned long) + sizeof(int)); | |
2380c486 | 8381 | + |
85eb3c9d | 8382 | + return raw > extra ? raw - extra : 0; |
7e46296a AM |
8383 | +} |
8384 | + | |
8385 | +static unsigned long toi_bio_storage_available(void) | |
8386 | +{ | |
8387 | + unsigned long sum = 0; | |
8388 | + struct toi_module_ops *this_module; | |
8389 | + | |
8390 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
8391 | + if (!this_module->enabled || | |
8392 | + this_module->type != BIO_ALLOCATOR_MODULE) | |
8393 | + continue; | |
8394 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Seeking storage " | |
8395 | + "available from %s.", this_module->name); | |
8396 | + sum += this_module->bio_allocator_ops->storage_available(); | |
8397 | + } | |
8398 | + | |
8399 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Total storage available is %lu " | |
85eb3c9d AM |
8400 | + "pages (%d header pages).", sum, header_pages_reserved); |
8401 | + | |
8402 | + return sum > header_pages_reserved ? | |
8403 | + raw_to_real(sum - header_pages_reserved) : 0; | |
7e46296a AM |
8404 | + |
8405 | +} | |
8406 | + | |
8407 | +static unsigned long toi_bio_storage_allocated(void) | |
8408 | +{ | |
8409 | + return raw_pages_allocd > header_pages_reserved ? | |
8410 | + raw_to_real(raw_pages_allocd - header_pages_reserved) : 0; | |
2380c486 | 8411 | +} |
7e46296a | 8412 | + |
2380c486 | 8413 | +/* |
7e46296a AM |
8414 | + * If we have read part of the image, we might have filled memory with |
8415 | + * data that should be zeroed out. | |
2380c486 | 8416 | + */ |
7e46296a | 8417 | +static void toi_bio_noresume_reset(void) |
2380c486 | 8418 | +{ |
7e46296a AM |
8419 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_bio_noresume_reset."); |
8420 | + toi_rw_cleanup(READ); | |
8421 | + free_all_bdev_info(); | |
8422 | +} | |
2380c486 | 8423 | + |
7e46296a AM |
8424 | +/** |
8425 | + * toi_bio_cleanup - cleanup after some action | |
8426 | + * @finishing_cycle: Whether completing a cycle. | |
8427 | + **/ | |
8428 | +static void toi_bio_cleanup(int finishing_cycle) | |
8429 | +{ | |
8430 | + if (!finishing_cycle) | |
2380c486 JR |
8431 | + return; |
8432 | + | |
7e46296a AM |
8433 | + if (toi_writer_buffer) { |
8434 | + toi_free_page(11, (unsigned long) toi_writer_buffer); | |
8435 | + toi_writer_buffer = NULL; | |
8436 | + } | |
2380c486 | 8437 | + |
7e46296a | 8438 | + forget_signature_page(); |
2380c486 | 8439 | + |
7e46296a AM |
8440 | + if (header_block_device && toi_sig_data && |
8441 | + toi_sig_data->header_dev_t != resume_dev_t) | |
8442 | + toi_close_bdev(header_block_device); | |
2380c486 | 8443 | + |
7e46296a | 8444 | + header_block_device = NULL; |
5dd10c98 AM |
8445 | + |
8446 | + close_resume_dev_t(0); | |
7e46296a | 8447 | +} |
2380c486 | 8448 | + |
7e46296a AM |
8449 | +static int toi_bio_write_header_init(void) |
8450 | +{ | |
8451 | + int result; | |
2380c486 | 8452 | + |
7e46296a AM |
8453 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_bio_write_header_init"); |
8454 | + toi_rw_init(WRITE, 0); | |
8455 | + toi_writer_buffer_posn = 0; | |
2380c486 | 8456 | + |
7e46296a AM |
8457 | + /* Info needed to bootstrap goes at the start of the header. |
8458 | + * First we save the positions and devinfo, including the number | |
8459 | + * of header pages. Then we save the structs containing data needed | |
8460 | + * for reading the header pages back. | |
8461 | + * Note that even if header pages take more than one page, when we | |
8462 | + * read back the info, we will have restored the location of the | |
8463 | + * next header page by the time we go to use it. | |
8464 | + */ | |
2380c486 | 8465 | + |
7e46296a AM |
8466 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "serialise extent chains."); |
8467 | + result = toi_serialise_extent_chains(); | |
8468 | + | |
8469 | + if (result) | |
8470 | + return result; | |
8471 | + | |
8472 | + /* | |
8473 | + * Signature page hasn't been modified at this point. Write it in | |
8474 | + * the header so we can restore it later. | |
8475 | + */ | |
8476 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "serialise signature page."); | |
8477 | + return toi_rw_header_chunk_noreadahead(WRITE, &toi_blockwriter_ops, | |
8478 | + (char *) toi_cur_sig_page, | |
8479 | + PAGE_SIZE); | |
2380c486 JR |
8480 | +} |
8481 | + | |
7e46296a AM |
8482 | +static int toi_bio_write_header_cleanup(void) |
8483 | +{ | |
8484 | + int result = 0; | |
8485 | + | |
8486 | + if (toi_writer_buffer_posn) | |
8487 | + toi_bio_queue_write(&toi_writer_buffer); | |
8488 | + | |
8489 | + result = toi_finish_all_io(); | |
8490 | + | |
8491 | + unowned = 0; | |
8492 | + total_header_bytes = 0; | |
8493 | + | |
8494 | + /* Set signature to save we have an image */ | |
8495 | + if (!result) | |
8496 | + result = toi_bio_mark_have_image(); | |
8497 | + | |
8498 | + return result; | |
8499 | +} | |
2380c486 JR |
8500 | + |
8501 | +/* | |
7e46296a AM |
8502 | + * toi_bio_read_header_init() |
8503 | + * | |
8504 | + * Description: | |
8505 | + * 1. Attempt to read the device specified with resume=. | |
8506 | + * 2. Check the contents of the swap header for our signature. | |
8507 | + * 3. Warn, ignore, reset and/or continue as appropriate. | |
8508 | + * 4. If continuing, read the toi_swap configuration section | |
8509 | + * of the header and set up block device info so we can read | |
8510 | + * the rest of the header & image. | |
8511 | + * | |
8512 | + * Returns: | |
8513 | + * May not return if user choose to reboot at a warning. | |
8514 | + * -EINVAL if cannot resume at this time. Booting should continue | |
8515 | + * normally. | |
2380c486 | 8516 | + */ |
2380c486 | 8517 | + |
7e46296a | 8518 | +static int toi_bio_read_header_init(void) |
2380c486 | 8519 | +{ |
7e46296a AM |
8520 | + int result = 0; |
8521 | + char buf[32]; | |
8522 | + | |
8523 | + toi_writer_buffer_posn = 0; | |
8524 | + | |
8525 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_bio_read_header_init"); | |
8526 | + | |
8527 | + if (!toi_sig_data) { | |
8528 | + printk(KERN_INFO "toi_bio_read_header_init called when we " | |
8529 | + "haven't verified there is an image!\n"); | |
8530 | + return -EINVAL; | |
8531 | + } | |
8532 | + | |
8533 | + /* | |
8534 | + * If the header is not on the resume_swap_dev_t, get the resume device | |
8535 | + * first. | |
8536 | + */ | |
8537 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Header dev_t is %lx.", | |
8538 | + toi_sig_data->header_dev_t); | |
8539 | + if (toi_sig_data->have_uuid) { | |
cacc47f8 | 8540 | + struct fs_info seek; |
7e46296a | 8541 | + dev_t device; |
cacc47f8 AM |
8542 | + |
8543 | + strncpy((char *) seek.uuid, toi_sig_data->header_uuid, 16); | |
8544 | + seek.dev_t = toi_sig_data->header_dev_t; | |
8545 | + seek.last_mount_size = 0; | |
8546 | + device = blk_lookup_fs_info(&seek); | |
7e46296a | 8547 | + if (device) { |
cacc47f8 | 8548 | + printk("Using dev_t %s, returned by blk_lookup_fs_info.\n", |
7e46296a AM |
8549 | + format_dev_t(buf, device)); |
8550 | + toi_sig_data->header_dev_t = device; | |
8551 | + } | |
8552 | + } | |
8553 | + if (toi_sig_data->header_dev_t != resume_dev_t) { | |
8554 | + header_block_device = toi_open_bdev(NULL, | |
8555 | + toi_sig_data->header_dev_t, 1); | |
8556 | + | |
8557 | + if (IS_ERR(header_block_device)) | |
8558 | + return PTR_ERR(header_block_device); | |
8559 | + } else | |
8560 | + header_block_device = resume_block_device; | |
8561 | + | |
8562 | + if (!toi_writer_buffer) | |
8563 | + toi_writer_buffer = (char *) toi_get_zeroed_page(11, | |
8564 | + TOI_ATOMIC_GFP); | |
8565 | + more_readahead = 1; | |
8566 | + | |
8567 | + /* | |
8568 | + * Read toi_swap configuration. | |
8569 | + * Headerblock size taken into account already. | |
8570 | + */ | |
8571 | + result = toi_bio_ops.bdev_page_io(READ, header_block_device, | |
8572 | + toi_sig_data->first_header_block, | |
8573 | + virt_to_page((unsigned long) toi_writer_buffer)); | |
8574 | + if (result) | |
8575 | + return result; | |
8576 | + | |
8577 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "load extent chains."); | |
8578 | + result = toi_load_extent_chains(); | |
8579 | + | |
8580 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "load original signature page."); | |
8581 | + toi_orig_sig_page = (char *) toi_get_zeroed_page(38, TOI_ATOMIC_GFP); | |
8582 | + if (!toi_orig_sig_page) { | |
8583 | + printk(KERN_ERR "Failed to allocate memory for the current" | |
8584 | + " image signature.\n"); | |
8585 | + return -ENOMEM; | |
8586 | + } | |
8587 | + | |
8588 | + return toi_rw_header_chunk_noreadahead(READ, &toi_blockwriter_ops, | |
8589 | + (char *) toi_orig_sig_page, | |
8590 | + PAGE_SIZE); | |
2380c486 JR |
8591 | +} |
8592 | + | |
7e46296a | 8593 | +static int toi_bio_read_header_cleanup(void) |
2380c486 | 8594 | +{ |
7e46296a AM |
8595 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_bio_read_header_cleanup."); |
8596 | + return toi_rw_cleanup(READ); | |
2380c486 | 8597 | +} |
7e46296a AM |
8598 | + |
8599 | +/* Works only for digits and letters, but small and fast */ | |
8600 | +#define TOLOWER(x) ((x) | 0x20) | |
8601 | + | |
2380c486 | 8602 | +/* |
7e46296a AM |
8603 | + * UUID must be 32 chars long. It may have dashes, but nothing |
8604 | + * else. | |
2380c486 | 8605 | + */ |
7e46296a AM |
8606 | +char *uuid_from_commandline(char *commandline) |
8607 | +{ | |
8608 | + int low = 0; | |
8609 | + char *result = NULL, *output, *ptr; | |
2380c486 | 8610 | + |
7e46296a AM |
8611 | + if (strncmp(commandline, "UUID=", 5)) |
8612 | + return NULL; | |
2380c486 | 8613 | + |
7e46296a AM |
8614 | + result = kzalloc(17, GFP_KERNEL); |
8615 | + if (!result) { | |
8616 | + printk("Failed to kzalloc UUID text memory.\n"); | |
8617 | + return NULL; | |
8618 | + } | |
8619 | + | |
8620 | + ptr = commandline + 5; | |
8621 | + output = result; | |
8622 | + | |
8623 | + while (*ptr && (output - result) < 16) { | |
8624 | + if (isxdigit(*ptr)) { | |
8625 | + int value = isdigit(*ptr) ? *ptr - '0' : | |
8626 | + TOLOWER(*ptr) - 'a' + 10; | |
8627 | + if (low) { | |
8628 | + *output += value; | |
8629 | + output++; | |
8630 | + } else { | |
8631 | + *output = value << 4; | |
8632 | + } | |
8633 | + low = !low; | |
8634 | + } else if (*ptr != '-') | |
8635 | + break; | |
8636 | + ptr++; | |
8637 | + } | |
8638 | + | |
8639 | + if ((output - result) < 16 || *ptr) { | |
8640 | + printk(KERN_DEBUG "Found resume=UUID=, but the value looks " | |
8641 | + "invalid.\n"); | |
8642 | + kfree(result); | |
8643 | + result = NULL; | |
8644 | + } | |
8645 | + | |
8646 | + return result; | |
8647 | +} | |
8648 | + | |
5dd10c98 AM |
8649 | +#define retry_if_fails(command) \ |
8650 | +do { \ | |
8651 | + command; \ | |
8652 | + if (!resume_dev_t && !waited_for_device_probe) { \ | |
8653 | + wait_for_device_probe(); \ | |
8654 | + scsi_complete_async_scans(); \ | |
8655 | + command; \ | |
8656 | + waited_for_device_probe = 1; \ | |
8657 | + } \ | |
8658 | +} while(0) | |
8659 | + | |
7e46296a AM |
8660 | +/** |
8661 | + * try_to_open_resume_device: Try to parse and open resume= | |
2380c486 | 8662 | + * |
7e46296a AM |
8663 | + * Any "swap:" has been stripped away and we just have the path to deal with. |
8664 | + * We attempt to do name_to_dev_t, open and stat the file. Having opened the | |
8665 | + * file, get the struct block_device * to match. | |
8666 | + */ | |
8667 | +static int try_to_open_resume_device(char *commandline, int quiet) | |
8668 | +{ | |
8669 | + struct kstat stat; | |
8670 | + int error = 0; | |
8671 | + char *uuid = uuid_from_commandline(commandline); | |
5dd10c98 | 8672 | + int waited_for_device_probe = 0; |
7e46296a AM |
8673 | + |
8674 | + resume_dev_t = MKDEV(0, 0); | |
8675 | + | |
5dd10c98 AM |
8676 | + if (!strlen(commandline)) |
8677 | + retry_if_fails(toi_bio_scan_for_image(quiet)); | |
8678 | + | |
7e46296a | 8679 | + if (uuid) { |
cacc47f8 AM |
8680 | + struct fs_info seek; |
8681 | + strncpy((char *) &seek.uuid, uuid, 16); | |
8682 | + seek.dev_t = resume_dev_t; | |
8683 | + seek.last_mount_size = 0; | |
8684 | + retry_if_fails(resume_dev_t = blk_lookup_fs_info(&seek)); | |
7e46296a AM |
8685 | + kfree(uuid); |
8686 | + } | |
8687 | + | |
8688 | + if (!resume_dev_t) | |
5dd10c98 | 8689 | + retry_if_fails(resume_dev_t = name_to_dev_t(commandline)); |
7e46296a AM |
8690 | + |
8691 | + if (!resume_dev_t) { | |
8692 | + struct file *file = filp_open(commandline, | |
8693 | + O_RDONLY|O_LARGEFILE, 0); | |
8694 | + | |
8695 | + if (!IS_ERR(file) && file) { | |
8696 | + vfs_getattr(file->f_vfsmnt, file->f_dentry, &stat); | |
8697 | + filp_close(file, NULL); | |
8698 | + } else | |
8699 | + error = vfs_stat(commandline, &stat); | |
8700 | + if (!error) | |
8701 | + resume_dev_t = stat.rdev; | |
8702 | + } | |
8703 | + | |
8704 | + if (!resume_dev_t) { | |
8705 | + if (quiet) | |
8706 | + return 1; | |
8707 | + | |
8708 | + if (test_toi_state(TOI_TRYING_TO_RESUME)) | |
8709 | + toi_early_boot_message(1, toi_translate_err_default, | |
8710 | + "Failed to translate \"%s\" into a device id.\n", | |
8711 | + commandline); | |
8712 | + else | |
8713 | + printk("TuxOnIce: Can't translate \"%s\" into a device " | |
8714 | + "id yet.\n", commandline); | |
8715 | + return 1; | |
8716 | + } | |
8717 | + | |
8718 | + return open_resume_dev_t(1, quiet); | |
8719 | +} | |
8720 | + | |
8721 | +/* | |
8722 | + * Parse Image Location | |
2380c486 | 8723 | + * |
7e46296a AM |
8724 | + * Attempt to parse a resume= parameter. |
8725 | + * Swap Writer accepts: | |
8726 | + * resume=[swap:|file:]DEVNAME[:FIRSTBLOCK][@BLOCKSIZE] | |
2380c486 | 8727 | + * |
7e46296a AM |
8728 | + * Where: |
8729 | + * DEVNAME is convertable to a dev_t by name_to_dev_t | |
8730 | + * FIRSTBLOCK is the location of the first block in the swap file | |
8731 | + * (specifying for a swap partition is nonsensical but not prohibited). | |
8732 | + * Data is validated by attempting to read a swap header from the | |
8733 | + * location given. Failure will result in toi_swap refusing to | |
8734 | + * save an image, and a reboot with correct parameters will be | |
8735 | + * necessary. | |
8736 | + */ | |
8737 | +static int toi_bio_parse_sig_location(char *commandline, | |
8738 | + int only_allocator, int quiet) | |
8739 | +{ | |
8740 | + char *thischar, *devstart, *colon = NULL; | |
8741 | + int signature_found, result = -EINVAL, temp_result = 0; | |
8742 | + | |
8743 | + if (strncmp(commandline, "swap:", 5) && | |
8744 | + strncmp(commandline, "file:", 5)) { | |
8745 | + /* | |
5dd10c98 AM |
8746 | + * Failing swap:, we'll take a simple resume=/dev/hda2, or a |
8747 | + * blank value (scan) but fall through to other allocators | |
8748 | + * if /dev/ or UUID= isn't matched. | |
7e46296a AM |
8749 | + */ |
8750 | + if (strncmp(commandline, "/dev/", 5) && | |
5dd10c98 AM |
8751 | + strncmp(commandline, "UUID=", 5) && |
8752 | + strlen(commandline)) | |
7e46296a AM |
8753 | + return 1; |
8754 | + } else | |
8755 | + commandline += 5; | |
8756 | + | |
8757 | + devstart = commandline; | |
8758 | + thischar = commandline; | |
8759 | + while ((*thischar != ':') && (*thischar != '@') && | |
8760 | + ((thischar - commandline) < 250) && (*thischar)) | |
8761 | + thischar++; | |
8762 | + | |
8763 | + if (*thischar == ':') { | |
8764 | + colon = thischar; | |
8765 | + *colon = 0; | |
8766 | + thischar++; | |
8767 | + } | |
8768 | + | |
8769 | + while ((thischar - commandline) < 250 && *thischar) | |
8770 | + thischar++; | |
8771 | + | |
8772 | + if (colon) { | |
8773 | + unsigned long block; | |
8774 | + temp_result = strict_strtoul(colon + 1, 0, &block); | |
8775 | + if (!temp_result) | |
8776 | + resume_firstblock = (int) block; | |
8777 | + } else | |
8778 | + resume_firstblock = 0; | |
8779 | + | |
8780 | + clear_toi_state(TOI_CAN_HIBERNATE); | |
8781 | + clear_toi_state(TOI_CAN_RESUME); | |
8782 | + | |
8783 | + if (!temp_result) | |
8784 | + temp_result = try_to_open_resume_device(devstart, quiet); | |
8785 | + | |
8786 | + if (colon) | |
8787 | + *colon = ':'; | |
8788 | + | |
5dd10c98 | 8789 | + /* No error if we only scanned */ |
7e46296a | 8790 | + if (temp_result) |
5dd10c98 | 8791 | + return strlen(commandline) ? -EINVAL : 1; |
7e46296a AM |
8792 | + |
8793 | + signature_found = toi_bio_image_exists(quiet); | |
8794 | + | |
8795 | + if (signature_found != -1) { | |
8796 | + result = 0; | |
8797 | + /* | |
8798 | + * TODO: If only file storage, CAN_HIBERNATE should only be | |
8799 | + * set if file allocator's target is valid. | |
8800 | + */ | |
8801 | + set_toi_state(TOI_CAN_HIBERNATE); | |
8802 | + set_toi_state(TOI_CAN_RESUME); | |
8803 | + } else | |
8804 | + if (!quiet) | |
8805 | + printk(KERN_ERR "TuxOnIce: Block I/O: No " | |
8806 | + "signature found at %s.\n", devstart); | |
8807 | + | |
7e46296a AM |
8808 | + return result; |
8809 | +} | |
8810 | + | |
8811 | +static void toi_bio_release_storage(void) | |
8812 | +{ | |
8813 | + header_pages_reserved = 0; | |
8814 | + raw_pages_allocd = 0; | |
8815 | + | |
8816 | + free_all_bdev_info(); | |
8817 | +} | |
8818 | + | |
8819 | +/* toi_swap_remove_image | |
2380c486 | 8820 | + * |
7e46296a AM |
8821 | + */ |
8822 | +static int toi_bio_remove_image(void) | |
8823 | +{ | |
8824 | + int result; | |
8825 | + | |
8826 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_bio_remove_image."); | |
8827 | + | |
8828 | + result = toi_bio_restore_original_signature(); | |
8829 | + | |
8830 | + /* | |
8831 | + * We don't do a sanity check here: we want to restore the swap | |
8832 | + * whatever version of kernel made the hibernate image. | |
8833 | + * | |
8834 | + * We need to write swap, but swap may not be enabled so | |
8835 | + * we write the device directly | |
8836 | + * | |
8837 | + * If we don't have an current_signature_page, we didn't | |
8838 | + * read an image header, so don't change anything. | |
8839 | + */ | |
8840 | + | |
8841 | + toi_bio_release_storage(); | |
8842 | + | |
8843 | + return result; | |
8844 | +} | |
8845 | + | |
8846 | +struct toi_bio_ops toi_bio_ops = { | |
8847 | + .bdev_page_io = toi_bdev_page_io, | |
8848 | + .register_storage = toi_register_storage_chain, | |
8849 | + .free_storage = toi_bio_release_storage, | |
8850 | +}; | |
8851 | +EXPORT_SYMBOL_GPL(toi_bio_ops); | |
8852 | + | |
8853 | +static struct toi_sysfs_data sysfs_params[] = { | |
8854 | + SYSFS_INT("target_outstanding_io", SYSFS_RW, &target_outstanding_io, | |
8855 | + 0, 16384, 0, NULL), | |
8856 | +}; | |
8857 | + | |
8858 | +struct toi_module_ops toi_blockwriter_ops = { | |
8859 | + .type = WRITER_MODULE, | |
8860 | + .name = "block i/o", | |
8861 | + .directory = "block_io", | |
8862 | + .module = THIS_MODULE, | |
8863 | + .memory_needed = toi_bio_memory_needed, | |
8864 | + .print_debug_info = toi_bio_print_debug_stats, | |
8865 | + .storage_needed = toi_bio_storage_needed, | |
8866 | + .save_config_info = toi_bio_save_config_info, | |
8867 | + .load_config_info = toi_bio_load_config_info, | |
8868 | + .initialise = toi_bio_initialise, | |
8869 | + .cleanup = toi_bio_cleanup, | |
5dd10c98 | 8870 | + .post_atomic_restore = toi_bio_chains_post_atomic, |
7e46296a AM |
8871 | + |
8872 | + .rw_init = toi_rw_init, | |
8873 | + .rw_cleanup = toi_rw_cleanup, | |
8874 | + .read_page = toi_bio_read_page, | |
8875 | + .write_page = toi_bio_write_page, | |
8876 | + .rw_header_chunk = toi_rw_header_chunk, | |
8877 | + .rw_header_chunk_noreadahead = toi_rw_header_chunk_noreadahead, | |
8878 | + .io_flusher = bio_io_flusher, | |
8879 | + .update_throughput_throttle = update_throughput_throttle, | |
8880 | + .finish_all_io = toi_finish_all_io, | |
8881 | + | |
8882 | + .noresume_reset = toi_bio_noresume_reset, | |
8883 | + .storage_available = toi_bio_storage_available, | |
8884 | + .storage_allocated = toi_bio_storage_allocated, | |
8885 | + .reserve_header_space = toi_bio_reserve_header_space, | |
8886 | + .allocate_storage = toi_bio_allocate_storage, | |
8887 | + .image_exists = toi_bio_image_exists, | |
8888 | + .mark_resume_attempted = toi_bio_mark_resume_attempted, | |
8889 | + .write_header_init = toi_bio_write_header_init, | |
8890 | + .write_header_cleanup = toi_bio_write_header_cleanup, | |
8891 | + .read_header_init = toi_bio_read_header_init, | |
8892 | + .read_header_cleanup = toi_bio_read_header_cleanup, | |
5dd10c98 | 8893 | + .get_header_version = toi_bio_get_header_version, |
7e46296a AM |
8894 | + .remove_image = toi_bio_remove_image, |
8895 | + .parse_sig_location = toi_bio_parse_sig_location, | |
8896 | + | |
8897 | + .sysfs_data = sysfs_params, | |
8898 | + .num_sysfs_entries = sizeof(sysfs_params) / | |
8899 | + sizeof(struct toi_sysfs_data), | |
8900 | +}; | |
8901 | + | |
8902 | +/** | |
8903 | + * toi_block_io_load - load time routine for block I/O module | |
2380c486 | 8904 | + * |
7e46296a AM |
8905 | + * Register block i/o ops and sysfs entries. |
8906 | + **/ | |
8907 | +static __init int toi_block_io_load(void) | |
8908 | +{ | |
8909 | + return toi_register_module(&toi_blockwriter_ops); | |
8910 | +} | |
8911 | + | |
8912 | +#ifdef MODULE | |
8913 | +static __exit void toi_block_io_unload(void) | |
8914 | +{ | |
8915 | + toi_unregister_module(&toi_blockwriter_ops); | |
8916 | +} | |
8917 | + | |
8918 | +module_init(toi_block_io_load); | |
8919 | +module_exit(toi_block_io_unload); | |
8920 | +MODULE_LICENSE("GPL"); | |
8921 | +MODULE_AUTHOR("Nigel Cunningham"); | |
8922 | +MODULE_DESCRIPTION("TuxOnIce block io functions"); | |
8923 | +#else | |
8924 | +late_initcall(toi_block_io_load); | |
8925 | +#endif | |
8926 | diff --git a/kernel/power/tuxonice_bio_internal.h b/kernel/power/tuxonice_bio_internal.h | |
8927 | new file mode 100644 | |
5dd10c98 | 8928 | index 0000000..58c2481 |
7e46296a AM |
8929 | --- /dev/null |
8930 | +++ b/kernel/power/tuxonice_bio_internal.h | |
5dd10c98 AM |
8931 | @@ -0,0 +1,86 @@ |
8932 | +/* | |
8933 | + * kernel/power/tuxonice_bio_internal.h | |
8934 | + * | |
8935 | + * Copyright (C) 2009-2010 Nigel Cunningham (nigel at tuxonice net) | |
8936 | + * | |
8937 | + * Distributed under GPLv2. | |
8938 | + * | |
8939 | + * This file contains declarations for functions exported from | |
8940 | + * tuxonice_bio.c, which contains low level io functions. | |
8941 | + */ | |
8942 | + | |
7e46296a AM |
8943 | +/* Extent chains */ |
8944 | +void toi_extent_state_goto_start(void); | |
8945 | +void toi_extent_state_save(int slot); | |
8946 | +int go_next_page(int writing, int section_barrier); | |
8947 | +void toi_extent_state_restore(int slot); | |
8948 | +void free_all_bdev_info(void); | |
8949 | +int devices_of_same_priority(struct toi_bdev_info *this); | |
8950 | +int toi_register_storage_chain(struct toi_bdev_info *new); | |
8951 | +int toi_serialise_extent_chains(void); | |
8952 | +int toi_load_extent_chains(void); | |
8953 | +int toi_bio_rw_page(int writing, struct page *page, int is_readahead, | |
8954 | + int free_group); | |
8955 | +int toi_bio_restore_original_signature(void); | |
8956 | +int toi_bio_devinfo_storage_needed(void); | |
8957 | +unsigned long get_headerblock(void); | |
8958 | +dev_t get_header_dev_t(void); | |
8959 | +struct block_device *get_header_bdev(void); | |
8960 | +int toi_bio_allocate_storage(unsigned long request); | |
8961 | + | |
8962 | +/* Signature functions */ | |
8963 | +#define HaveImage "HaveImage" | |
8964 | +#define NoImage "TuxOnIce" | |
8965 | +#define sig_size (sizeof(HaveImage)) | |
8966 | + | |
8967 | +struct sig_data { | |
8968 | + char sig[sig_size]; | |
8969 | + int have_image; | |
8970 | + int resumed_before; | |
8971 | + | |
8972 | + char have_uuid; | |
8973 | + char header_uuid[17]; | |
8974 | + dev_t header_dev_t; | |
8975 | + unsigned long first_header_block; | |
5dd10c98 AM |
8976 | + |
8977 | + /* Repeat the signature to be sure we have a header version */ | |
8978 | + char sig2[sig_size]; | |
8979 | + int header_version; | |
7e46296a AM |
8980 | +}; |
8981 | + | |
8982 | +void forget_signature_page(void); | |
8983 | +int toi_check_for_signature(void); | |
8984 | +int toi_bio_image_exists(int quiet); | |
8985 | +int get_signature_page(void); | |
8986 | +int toi_bio_mark_resume_attempted(int); | |
8987 | +extern char *toi_cur_sig_page; | |
8988 | +extern char *toi_orig_sig_page; | |
8989 | +int toi_bio_mark_have_image(void); | |
8990 | +extern struct sig_data *toi_sig_data; | |
8991 | +extern dev_t resume_dev_t; | |
8992 | +extern struct block_device *resume_block_device; | |
8993 | +extern struct block_device *header_block_device; | |
8994 | +extern unsigned long resume_firstblock; | |
8995 | + | |
8996 | +struct block_device *open_bdev(dev_t device, int display_errs); | |
8997 | +extern int current_stream; | |
8998 | +extern int more_readahead; | |
8999 | +int toi_do_io(int writing, struct block_device *bdev, long block0, | |
9000 | + struct page *page, int is_readahead, int syncio, int free_group); | |
9001 | +int get_main_pool_phys_params(void); | |
9002 | + | |
9003 | +void toi_close_bdev(struct block_device *bdev); | |
9004 | +struct block_device *toi_open_bdev(char *uuid, dev_t default_device, | |
9005 | + int display_errs); | |
9006 | + | |
9007 | +extern struct toi_module_ops toi_blockwriter_ops; | |
9008 | +void dump_block_chains(void); | |
9009 | +void debug_broken_header(void); | |
9010 | +extern unsigned long raw_pages_allocd, header_pages_reserved; | |
5dd10c98 AM |
9011 | +int toi_bio_chains_debug_info(char *buffer, int size); |
9012 | +void toi_bio_chains_post_atomic(struct toi_boot_kernel_data *bkd); | |
9013 | +int toi_bio_scan_for_image(int quiet); | |
9014 | +int toi_bio_get_header_version(void); | |
9015 | + | |
9016 | +void close_resume_dev_t(int force); | |
9017 | +int open_resume_dev_t(int force, int quiet); | |
7e46296a AM |
9018 | diff --git a/kernel/power/tuxonice_bio_signature.c b/kernel/power/tuxonice_bio_signature.c |
9019 | new file mode 100644 | |
cacc47f8 | 9020 | index 0000000..2ebee7e |
7e46296a AM |
9021 | --- /dev/null |
9022 | +++ b/kernel/power/tuxonice_bio_signature.c | |
de6743ae | 9023 | @@ -0,0 +1,404 @@ |
7e46296a AM |
9024 | +/* |
9025 | + * kernel/power/tuxonice_bio_signature.c | |
2380c486 | 9026 | + * |
5dd10c98 | 9027 | + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 | 9028 | + * |
7e46296a | 9029 | + * Distributed under GPLv2. |
2380c486 | 9030 | + * |
7e46296a AM |
9031 | + */ |
9032 | + | |
cacc47f8 | 9033 | +#include <linux/fs_uuid.h> |
7e46296a AM |
9034 | + |
9035 | +#include "tuxonice.h" | |
9036 | +#include "tuxonice_sysfs.h" | |
9037 | +#include "tuxonice_modules.h" | |
9038 | +#include "tuxonice_prepare_image.h" | |
9039 | +#include "tuxonice_bio.h" | |
9040 | +#include "tuxonice_ui.h" | |
9041 | +#include "tuxonice_alloc.h" | |
9042 | +#include "tuxonice_io.h" | |
9043 | +#include "tuxonice_builtin.h" | |
9044 | +#include "tuxonice_bio_internal.h" | |
9045 | + | |
9046 | +struct sig_data *toi_sig_data; | |
9047 | + | |
9048 | +/* Struct of swap header pages */ | |
9049 | + | |
5dd10c98 AM |
9050 | +struct old_sig_data { |
9051 | + dev_t device; | |
9052 | + unsigned long sector; | |
9053 | + int resume_attempted; | |
9054 | + int orig_sig_type; | |
9055 | +}; | |
9056 | + | |
7e46296a AM |
9057 | +union diskpage { |
9058 | + union swap_header swh; /* swh.magic is the only member used */ | |
9059 | + struct sig_data sig_data; | |
5dd10c98 | 9060 | + struct old_sig_data old_sig_data; |
7e46296a AM |
9061 | +}; |
9062 | + | |
9063 | +union p_diskpage { | |
9064 | + union diskpage *pointer; | |
9065 | + char *ptr; | |
9066 | + unsigned long address; | |
9067 | +}; | |
9068 | + | |
9069 | +char *toi_cur_sig_page; | |
9070 | +char *toi_orig_sig_page; | |
9071 | +int have_image; | |
5dd10c98 | 9072 | +int have_old_image; |
7e46296a AM |
9073 | + |
9074 | +int get_signature_page(void) | |
9075 | +{ | |
9076 | + if (!toi_cur_sig_page) { | |
9077 | + toi_message(TOI_IO, TOI_VERBOSE, 0, | |
9078 | + "Allocating current signature page."); | |
9079 | + toi_cur_sig_page = (char *) toi_get_zeroed_page(38, | |
9080 | + TOI_ATOMIC_GFP); | |
9081 | + if (!toi_cur_sig_page) { | |
9082 | + printk(KERN_ERR "Failed to allocate memory for the " | |
9083 | + "current image signature.\n"); | |
9084 | + return -ENOMEM; | |
9085 | + } | |
9086 | + | |
9087 | + toi_sig_data = (struct sig_data *) toi_cur_sig_page; | |
9088 | + } | |
9089 | + | |
9090 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Reading signature from dev %lx," | |
9091 | + " sector %d.", | |
9092 | + resume_block_device->bd_dev, resume_firstblock); | |
9093 | + | |
9094 | + return toi_bio_ops.bdev_page_io(READ, resume_block_device, | |
9095 | + resume_firstblock, virt_to_page(toi_cur_sig_page)); | |
9096 | +} | |
9097 | + | |
9098 | +void forget_signature_page(void) | |
9099 | +{ | |
9100 | + if (toi_cur_sig_page) { | |
9101 | + toi_sig_data = NULL; | |
9102 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Freeing toi_cur_sig_page" | |
9103 | + " (%p).", toi_cur_sig_page); | |
9104 | + toi_free_page(38, (unsigned long) toi_cur_sig_page); | |
9105 | + toi_cur_sig_page = NULL; | |
9106 | + } | |
9107 | + | |
9108 | + if (toi_orig_sig_page) { | |
9109 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Freeing toi_orig_sig_page" | |
9110 | + " (%p).", toi_orig_sig_page); | |
9111 | + toi_free_page(38, (unsigned long) toi_orig_sig_page); | |
9112 | + toi_orig_sig_page = NULL; | |
9113 | + } | |
9114 | +} | |
9115 | + | |
5dd10c98 AM |
9116 | +/* |
9117 | + * We need to ensure we use the signature page that's currently on disk, | |
9118 | + * so as to not remove the image header. Post-atomic-restore, the orig sig | |
9119 | + * page will be empty, so we can use that as our method of knowing that we | |
9120 | + * need to load the on-disk signature and not use the non-image sig in | |
9121 | + * memory. (We're going to powerdown after writing the change, so it's safe. | |
9122 | + */ | |
7e46296a AM |
9123 | +int toi_bio_mark_resume_attempted(int flag) |
9124 | +{ | |
9125 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Make resume attempted = %d.", | |
9126 | + flag); | |
5dd10c98 AM |
9127 | + if (!toi_orig_sig_page) { |
9128 | + forget_signature_page(); | |
9129 | + get_signature_page(); | |
9130 | + } | |
7e46296a AM |
9131 | + toi_sig_data->resumed_before = flag; |
9132 | + return toi_bio_ops.bdev_page_io(WRITE, resume_block_device, | |
9133 | + resume_firstblock, virt_to_page(toi_cur_sig_page)); | |
9134 | +} | |
9135 | + | |
9136 | +int toi_bio_mark_have_image(void) | |
9137 | +{ | |
5dd10c98 | 9138 | + int result = 0; |
7e46296a | 9139 | + char buf[32]; |
5dd10c98 | 9140 | + struct fs_info *fs_info; |
7e46296a AM |
9141 | + |
9142 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Recording that an image exists."); | |
9143 | + memcpy(toi_sig_data->sig, tuxonice_signature, | |
9144 | + sizeof(tuxonice_signature)); | |
9145 | + toi_sig_data->have_image = 1; | |
9146 | + toi_sig_data->resumed_before = 0; | |
9147 | + toi_sig_data->header_dev_t = get_header_dev_t(); | |
9148 | + toi_sig_data->have_uuid = 0; | |
9149 | + | |
5dd10c98 AM |
9150 | + fs_info = fs_info_from_block_dev(get_header_bdev()); |
9151 | + if (fs_info && !IS_ERR(fs_info)) { | |
9152 | + memcpy(toi_sig_data->header_uuid, &fs_info->uuid, 16); | |
9153 | + free_fs_info(fs_info); | |
9154 | + } else | |
9155 | + result = (int) PTR_ERR(fs_info); | |
9156 | + | |
7e46296a AM |
9157 | + if (!result) { |
9158 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Got uuid for dev_t %s.", | |
9159 | + format_dev_t(buf, get_header_dev_t())); | |
9160 | + toi_sig_data->have_uuid = 1; | |
9161 | + } else | |
9162 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Could not get uuid for " | |
9163 | + "dev_t %s.", | |
9164 | + format_dev_t(buf, get_header_dev_t())); | |
9165 | + | |
9166 | + toi_sig_data->first_header_block = get_headerblock(); | |
9167 | + have_image = 1; | |
9168 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "header dev_t is %x. First block " | |
9169 | + "is %d.", toi_sig_data->header_dev_t, | |
9170 | + toi_sig_data->first_header_block); | |
9171 | + | |
5dd10c98 AM |
9172 | + memcpy(toi_sig_data->sig2, tuxonice_signature, |
9173 | + sizeof(tuxonice_signature)); | |
9174 | + toi_sig_data->header_version = TOI_HEADER_VERSION; | |
9175 | + | |
7e46296a AM |
9176 | + return toi_bio_ops.bdev_page_io(WRITE, resume_block_device, |
9177 | + resume_firstblock, virt_to_page(toi_cur_sig_page)); | |
9178 | +} | |
9179 | + | |
5dd10c98 AM |
9180 | +int remove_old_signature(void) |
9181 | +{ | |
9182 | + union p_diskpage swap_header_page = (union p_diskpage) toi_cur_sig_page; | |
9183 | + char *orig_sig, *no_image_signature_contents; | |
9184 | + char *header_start = (char *) toi_get_zeroed_page(38, TOI_ATOMIC_GFP); | |
9185 | + int result; | |
9186 | + struct block_device *header_bdev; | |
9187 | + struct old_sig_data *old_sig_data = | |
9188 | + &swap_header_page.pointer->old_sig_data; | |
9189 | + | |
9190 | + header_bdev = toi_open_bdev(NULL, old_sig_data->device, 1); | |
9191 | + result = toi_bio_ops.bdev_page_io(READ, header_bdev, | |
9192 | + old_sig_data->sector, virt_to_page(header_start)); | |
9193 | + | |
9194 | + if (result) | |
9195 | + goto out; | |
9196 | + | |
9197 | + /* | |
9198 | + * TODO: Get the original contents of the first bytes of the swap | |
9199 | + * header page. | |
9200 | + */ | |
9201 | + if (!old_sig_data->orig_sig_type) | |
9202 | + orig_sig = "SWAP-SPACE"; | |
9203 | + else | |
9204 | + orig_sig = "SWAPSPACE2"; | |
9205 | + | |
9206 | + memcpy(swap_header_page.pointer->swh.magic.magic, orig_sig, 10); | |
9207 | + memcpy(swap_header_page.ptr, header_start, | |
9208 | + sizeof(no_image_signature_contents)); | |
9209 | + | |
9210 | + result = toi_bio_ops.bdev_page_io(WRITE, resume_block_device, | |
9211 | + resume_firstblock, virt_to_page(swap_header_page.ptr)); | |
9212 | + | |
9213 | +out: | |
9214 | + toi_close_bdev(header_bdev); | |
9215 | + have_old_image = 0; | |
9216 | + toi_free_page(38, (unsigned long) header_start); | |
9217 | + return result; | |
9218 | +} | |
9219 | + | |
7e46296a AM |
9220 | +/* |
9221 | + * toi_bio_restore_original_signature - restore the original signature | |
2380c486 | 9222 | + * |
7e46296a AM |
9223 | + * At boot time (aborting pre atomic-restore), toi_orig_sig_page gets used. |
9224 | + * It will have the original signature page contents, stored in the image | |
9225 | + * header. Post atomic-restore, we use :toi_cur_sig_page, which will contain | |
9226 | + * the contents that were loaded when we started the cycle. | |
9227 | + */ | |
9228 | +int toi_bio_restore_original_signature(void) | |
9229 | +{ | |
9230 | + char *use = toi_orig_sig_page ? toi_orig_sig_page : toi_cur_sig_page; | |
9231 | + | |
5dd10c98 AM |
9232 | + if (have_old_image) |
9233 | + return remove_old_signature(); | |
9234 | + | |
7e46296a AM |
9235 | + if (!use) { |
9236 | + printk("toi_bio_restore_original_signature: No signature " | |
9237 | + "page loaded.\n"); | |
9238 | + return 0; | |
9239 | + } | |
9240 | + | |
9241 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Recording that no image exists."); | |
9242 | + have_image = 0; | |
9243 | + toi_sig_data->have_image = 0; | |
9244 | + return toi_bio_ops.bdev_page_io(WRITE, resume_block_device, | |
9245 | + resume_firstblock, virt_to_page(use)); | |
9246 | +} | |
9247 | + | |
9248 | +/* | |
9249 | + * check_for_signature - See whether we have an image. | |
2380c486 | 9250 | + * |
7e46296a AM |
9251 | + * Returns 0 if no image, 1 if there is one, -1 if indeterminate. |
9252 | + */ | |
9253 | +int toi_check_for_signature(void) | |
9254 | +{ | |
9255 | + union p_diskpage swap_header_page; | |
9256 | + int type; | |
9257 | + const char *normal_sigs[] = {"SWAP-SPACE", "SWAPSPACE2" }; | |
9258 | + const char *swsusp_sigs[] = {"S1SUSP", "S2SUSP", "S1SUSPEND" }; | |
9259 | + char *swap_header; | |
9260 | + | |
9261 | + if (!toi_cur_sig_page) { | |
9262 | + int result = get_signature_page(); | |
9263 | + | |
9264 | + if (result) | |
9265 | + return result; | |
9266 | + } | |
9267 | + | |
9268 | + /* | |
9269 | + * Start by looking for the binary header. | |
9270 | + */ | |
9271 | + if (!memcmp(tuxonice_signature, toi_cur_sig_page, | |
9272 | + sizeof(tuxonice_signature))) { | |
9273 | + have_image = toi_sig_data->have_image; | |
9274 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Have binary signature. " | |
9275 | + "Have image is %d.", have_image); | |
9276 | + if (have_image) | |
9277 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "header dev_t is " | |
9278 | + "%x. First block is %d.", | |
9279 | + toi_sig_data->header_dev_t, | |
9280 | + toi_sig_data->first_header_block); | |
9281 | + return toi_sig_data->have_image; | |
9282 | + } | |
9283 | + | |
9284 | + /* | |
9285 | + * Failing that, try old file allocator headers. | |
9286 | + */ | |
9287 | + | |
9288 | + if (!memcmp(HaveImage, toi_cur_sig_page, strlen(HaveImage))) { | |
9289 | + have_image = 1; | |
9290 | + return 1; | |
9291 | + } | |
9292 | + | |
9293 | + have_image = 0; | |
9294 | + | |
9295 | + if (!memcmp(NoImage, toi_cur_sig_page, strlen(NoImage))) | |
9296 | + return 0; | |
9297 | + | |
9298 | + /* | |
9299 | + * Nope? How about swap? | |
9300 | + */ | |
9301 | + swap_header_page = (union p_diskpage) toi_cur_sig_page; | |
9302 | + swap_header = swap_header_page.pointer->swh.magic.magic; | |
9303 | + | |
9304 | + /* Normal swapspace? */ | |
9305 | + for (type = 0; type < 2; type++) | |
9306 | + if (!memcmp(normal_sigs[type], swap_header, | |
9307 | + strlen(normal_sigs[type]))) | |
9308 | + return 0; | |
9309 | + | |
9310 | + /* Swsusp or uswsusp? */ | |
9311 | + for (type = 0; type < 3; type++) | |
9312 | + if (!memcmp(swsusp_sigs[type], swap_header, | |
9313 | + strlen(swsusp_sigs[type]))) | |
9314 | + return 2; | |
9315 | + | |
5dd10c98 AM |
9316 | + /* Old TuxOnIce version? */ |
9317 | + if (!memcmp(tuxonice_signature, swap_header, | |
9318 | + sizeof(tuxonice_signature) - 1)) { | |
9319 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Found old TuxOnIce " | |
9320 | + "signature."); | |
9321 | + have_old_image = 1; | |
9322 | + return 3; | |
9323 | + } | |
9324 | + | |
7e46296a AM |
9325 | + return -1; |
9326 | +} | |
9327 | + | |
9328 | +/* | |
9329 | + * Image_exists | |
2380c486 | 9330 | + * |
7e46296a | 9331 | + * Returns -1 if don't know, otherwise 0 (no) or 1 (yes). |
2380c486 | 9332 | + */ |
7e46296a AM |
9333 | +int toi_bio_image_exists(int quiet) |
9334 | +{ | |
9335 | + int result; | |
7e46296a | 9336 | + char *msg = NULL; |
2380c486 | 9337 | + |
7e46296a AM |
9338 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_bio_image_exists."); |
9339 | + | |
9340 | + if (!resume_dev_t) { | |
9341 | + if (!quiet) | |
9342 | + printk(KERN_INFO "Not even trying to read header " | |
9343 | + "because resume_dev_t is not set.\n"); | |
9344 | + return -1; | |
9345 | + } | |
9346 | + | |
5dd10c98 AM |
9347 | + if (open_resume_dev_t(0, quiet)) |
9348 | + return -1; | |
7e46296a AM |
9349 | + |
9350 | + result = toi_check_for_signature(); | |
9351 | + | |
9352 | + clear_toi_state(TOI_RESUMED_BEFORE); | |
9353 | + if (toi_sig_data->resumed_before) | |
9354 | + set_toi_state(TOI_RESUMED_BEFORE); | |
9355 | + | |
9356 | + if (quiet || result == -ENOMEM) | |
de6743ae | 9357 | + return result; |
7e46296a AM |
9358 | + |
9359 | + if (result == -1) | |
9360 | + msg = "TuxOnIce: Unable to find a signature." | |
9361 | + " Could you have moved a swap file?\n"; | |
9362 | + else if (!result) | |
9363 | + msg = "TuxOnIce: No image found.\n"; | |
9364 | + else if (result == 1) | |
9365 | + msg = "TuxOnIce: Image found.\n"; | |
9366 | + else if (result == 2) | |
9367 | + msg = "TuxOnIce: uswsusp or swsusp image found.\n"; | |
5dd10c98 AM |
9368 | + else if (result == 3) |
9369 | + msg = "TuxOnIce: Old implementation's signature found.\n"; | |
2380c486 | 9370 | + |
7e46296a AM |
9371 | + printk(KERN_INFO "%s", msg); |
9372 | + | |
7e46296a AM |
9373 | + return result; |
9374 | +} | |
5dd10c98 AM |
9375 | + |
9376 | +int toi_bio_scan_for_image(int quiet) | |
9377 | +{ | |
9378 | + struct block_device *bdev; | |
9379 | + char default_name[255] = ""; | |
9380 | + | |
9381 | + if (!quiet) | |
9382 | + printk(KERN_DEBUG "Scanning swap devices for TuxOnIce " | |
9383 | + "signature...\n"); | |
9384 | + for (bdev = next_bdev_of_type(NULL, "swap"); bdev; | |
9385 | + bdev = next_bdev_of_type(bdev, "swap")) { | |
9386 | + int result; | |
9387 | + char name[255] = ""; | |
9388 | + sprintf(name, "%u:%u", MAJOR(bdev->bd_dev), | |
9389 | + MINOR(bdev->bd_dev)); | |
9390 | + if (!quiet) | |
9391 | + printk(KERN_DEBUG "- Trying %s.\n", name); | |
9392 | + resume_block_device = bdev; | |
9393 | + resume_dev_t = bdev->bd_dev; | |
9394 | + | |
9395 | + result = toi_check_for_signature(); | |
9396 | + | |
9397 | + resume_block_device = NULL; | |
9398 | + resume_dev_t = MKDEV(0, 0); | |
9399 | + | |
9400 | + if (!default_name[0]) | |
9401 | + strcpy(default_name, name); | |
9402 | + | |
9403 | + if (result == 1) { | |
9404 | + /* Got one! */ | |
9405 | + strcpy(resume_file, name); | |
9406 | + next_bdev_of_type(bdev, NULL); | |
9407 | + if (!quiet) | |
9408 | + printk(KERN_DEBUG " ==> Image found on %s.\n", | |
9409 | + resume_file); | |
9410 | + return 1; | |
9411 | + } | |
9412 | + forget_signature_page(); | |
9413 | + } | |
9414 | + | |
9415 | + if (!quiet) | |
9416 | + printk(KERN_DEBUG "TuxOnIce scan: No image found.\n"); | |
9417 | + strcpy(resume_file, default_name); | |
9418 | + return 0; | |
9419 | +} | |
9420 | + | |
9421 | +int toi_bio_get_header_version(void) | |
9422 | +{ | |
9423 | + return (memcmp(toi_sig_data->sig2, tuxonice_signature, | |
9424 | + sizeof(tuxonice_signature))) ? | |
9425 | + 0 : toi_sig_data->header_version; | |
9426 | + | |
9427 | +} | |
7e46296a AM |
9428 | diff --git a/kernel/power/tuxonice_builtin.c b/kernel/power/tuxonice_builtin.c |
9429 | new file mode 100644 | |
85eb3c9d | 9430 | index 0000000..a3081b7 |
7e46296a AM |
9431 | --- /dev/null |
9432 | +++ b/kernel/power/tuxonice_builtin.c | |
85eb3c9d | 9433 | @@ -0,0 +1,380 @@ |
7e46296a | 9434 | +/* |
5dd10c98 | 9435 | + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net) |
7e46296a AM |
9436 | + * |
9437 | + * This file is released under the GPLv2. | |
9438 | + */ | |
9439 | +#include <linux/resume-trace.h> | |
9440 | +#include <linux/kernel.h> | |
9441 | +#include <linux/swap.h> | |
9442 | +#include <linux/syscalls.h> | |
9443 | +#include <linux/bio.h> | |
9444 | +#include <linux/root_dev.h> | |
9445 | +#include <linux/freezer.h> | |
9446 | +#include <linux/reboot.h> | |
9447 | +#include <linux/writeback.h> | |
9448 | +#include <linux/tty.h> | |
9449 | +#include <linux/crypto.h> | |
9450 | +#include <linux/cpu.h> | |
9451 | +#include <linux/ctype.h> | |
9452 | +#include "tuxonice_io.h" | |
2380c486 | 9453 | +#include "tuxonice.h" |
7e46296a AM |
9454 | +#include "tuxonice_extent.h" |
9455 | +#include "tuxonice_netlink.h" | |
9456 | +#include "tuxonice_prepare_image.h" | |
9457 | +#include "tuxonice_ui.h" | |
2380c486 | 9458 | +#include "tuxonice_sysfs.h" |
7e46296a AM |
9459 | +#include "tuxonice_pagedir.h" |
9460 | +#include "tuxonice_modules.h" | |
9461 | +#include "tuxonice_builtin.h" | |
9462 | +#include "tuxonice_power_off.h" | |
2380c486 | 9463 | + |
7e46296a AM |
9464 | +/* |
9465 | + * Highmem related functions (x86 only). | |
9466 | + */ | |
2380c486 | 9467 | + |
7e46296a | 9468 | +#ifdef CONFIG_HIGHMEM |
2380c486 | 9469 | + |
7e46296a AM |
9470 | +/** |
9471 | + * copyback_high: Restore highmem pages. | |
9472 | + * | |
9473 | + * Highmem data and pbe lists are/can be stored in highmem. | |
9474 | + * The format is slightly different to the lowmem pbe lists | |
9475 | + * used for the assembly code: the last pbe in each page is | |
9476 | + * a struct page * instead of struct pbe *, pointing to the | |
9477 | + * next page where pbes are stored (or NULL if happens to be | |
9478 | + * the end of the list). Since we don't want to generate | |
9479 | + * unnecessary deltas against swsusp code, we use a cast | |
9480 | + * instead of a union. | |
9481 | + **/ | |
2380c486 | 9482 | + |
7e46296a | 9483 | +static void copyback_high(void) |
2380c486 | 9484 | +{ |
7e46296a AM |
9485 | + struct page *pbe_page = (struct page *) restore_highmem_pblist; |
9486 | + struct pbe *this_pbe, *first_pbe; | |
9487 | + unsigned long *origpage, *copypage; | |
9488 | + int pbe_index = 1; | |
9489 | + | |
9490 | + if (!pbe_page) | |
9491 | + return; | |
9492 | + | |
9493 | + this_pbe = (struct pbe *) kmap_atomic(pbe_page, KM_BOUNCE_READ); | |
9494 | + first_pbe = this_pbe; | |
9495 | + | |
9496 | + while (this_pbe) { | |
9497 | + int loop = (PAGE_SIZE / sizeof(unsigned long)) - 1; | |
9498 | + | |
9499 | + origpage = kmap_atomic((struct page *) this_pbe->orig_address, | |
9500 | + KM_BIO_DST_IRQ); | |
9501 | + copypage = kmap_atomic((struct page *) this_pbe->address, | |
9502 | + KM_BIO_SRC_IRQ); | |
9503 | + | |
9504 | + while (loop >= 0) { | |
9505 | + *(origpage + loop) = *(copypage + loop); | |
9506 | + loop--; | |
9507 | + } | |
9508 | + | |
9509 | + kunmap_atomic(origpage, KM_BIO_DST_IRQ); | |
9510 | + kunmap_atomic(copypage, KM_BIO_SRC_IRQ); | |
9511 | + | |
9512 | + if (!this_pbe->next) | |
9513 | + break; | |
9514 | + | |
9515 | + if (pbe_index < PBES_PER_PAGE) { | |
9516 | + this_pbe++; | |
9517 | + pbe_index++; | |
9518 | + } else { | |
9519 | + pbe_page = (struct page *) this_pbe->next; | |
9520 | + kunmap_atomic(first_pbe, KM_BOUNCE_READ); | |
9521 | + if (!pbe_page) | |
9522 | + return; | |
9523 | + this_pbe = (struct pbe *) kmap_atomic(pbe_page, | |
9524 | + KM_BOUNCE_READ); | |
9525 | + first_pbe = this_pbe; | |
9526 | + pbe_index = 1; | |
9527 | + } | |
2380c486 | 9528 | + } |
7e46296a | 9529 | + kunmap_atomic(first_pbe, KM_BOUNCE_READ); |
2380c486 JR |
9530 | +} |
9531 | + | |
7e46296a AM |
9532 | +#else /* CONFIG_HIGHMEM */ |
9533 | +static void copyback_high(void) { } | |
9534 | +#endif | |
2380c486 | 9535 | + |
7e46296a AM |
9536 | +char toi_wait_for_keypress_dev_console(int timeout) |
9537 | +{ | |
9538 | + int fd, this_timeout = 255; | |
9539 | + char key = '\0'; | |
9540 | + struct termios t, t_backup; | |
9541 | + | |
9542 | + /* We should be guaranteed /dev/console exists after populate_rootfs() | |
9543 | + * in init/main.c. | |
9544 | + */ | |
9545 | + fd = sys_open("/dev/console", O_RDONLY, 0); | |
9546 | + if (fd < 0) { | |
9547 | + printk(KERN_INFO "Couldn't open /dev/console.\n"); | |
9548 | + return key; | |
9549 | + } | |
9550 | + | |
9551 | + if (sys_ioctl(fd, TCGETS, (long)&t) < 0) | |
9552 | + goto out_close; | |
9553 | + | |
9554 | + memcpy(&t_backup, &t, sizeof(t)); | |
9555 | + | |
9556 | + t.c_lflag &= ~(ISIG|ICANON|ECHO); | |
9557 | + t.c_cc[VMIN] = 0; | |
9558 | + | |
9559 | +new_timeout: | |
9560 | + if (timeout > 0) { | |
9561 | + this_timeout = timeout < 26 ? timeout : 25; | |
9562 | + timeout -= this_timeout; | |
9563 | + this_timeout *= 10; | |
9564 | + } | |
9565 | + | |
9566 | + t.c_cc[VTIME] = this_timeout; | |
9567 | + | |
9568 | + if (sys_ioctl(fd, TCSETS, (long)&t) < 0) | |
9569 | + goto out_restore; | |
9570 | + | |
9571 | + while (1) { | |
9572 | + if (sys_read(fd, &key, 1) <= 0) { | |
9573 | + if (timeout) | |
9574 | + goto new_timeout; | |
9575 | + key = '\0'; | |
9576 | + break; | |
9577 | + } | |
9578 | + key = tolower(key); | |
9579 | + if (test_toi_state(TOI_SANITY_CHECK_PROMPT)) { | |
9580 | + if (key == 'c') { | |
9581 | + set_toi_state(TOI_CONTINUE_REQ); | |
9582 | + break; | |
9583 | + } else if (key == ' ') | |
9584 | + break; | |
9585 | + } else | |
9586 | + break; | |
9587 | + } | |
2380c486 | 9588 | + |
7e46296a AM |
9589 | +out_restore: |
9590 | + sys_ioctl(fd, TCSETS, (long)&t_backup); | |
9591 | +out_close: | |
9592 | + sys_close(fd); | |
2380c486 | 9593 | + |
7e46296a AM |
9594 | + return key; |
9595 | +} | |
9596 | +EXPORT_SYMBOL_GPL(toi_wait_for_keypress_dev_console); | |
9597 | + | |
9598 | +struct toi_boot_kernel_data toi_bkd __nosavedata | |
9599 | + __attribute__((aligned(PAGE_SIZE))) = { | |
9600 | + MY_BOOT_KERNEL_DATA_VERSION, | |
9601 | + 0, | |
9602 | +#ifdef CONFIG_TOI_REPLACE_SWSUSP | |
9603 | + (1 << TOI_REPLACE_SWSUSP) | | |
9604 | +#endif | |
9605 | + (1 << TOI_NO_FLUSHER_THREAD) | | |
9606 | + (1 << TOI_PAGESET2_FULL) | (1 << TOI_LATE_CPU_HOTPLUG), | |
2380c486 | 9607 | +}; |
7e46296a | 9608 | +EXPORT_SYMBOL_GPL(toi_bkd); |
2380c486 | 9609 | + |
5dd10c98 | 9610 | +struct block_device *toi_open_by_devnum(dev_t dev) |
7e46296a AM |
9611 | +{ |
9612 | + struct block_device *bdev = bdget(dev); | |
9613 | + int err = -ENOMEM; | |
9614 | + if (bdev) | |
5dd10c98 | 9615 | + err = blkdev_get(bdev, FMODE_READ | FMODE_NDELAY); |
7e46296a AM |
9616 | + return err ? ERR_PTR(err) : bdev; |
9617 | +} | |
9618 | +EXPORT_SYMBOL_GPL(toi_open_by_devnum); | |
2380c486 | 9619 | + |
5dd10c98 AM |
9620 | +/** |
9621 | + * toi_close_bdev: Close a swap bdev. | |
9622 | + * | |
9623 | + * int: The swap entry number to close. | |
9624 | + */ | |
9625 | +void toi_close_bdev(struct block_device *bdev) | |
9626 | +{ | |
9627 | + blkdev_put(bdev, FMODE_READ | FMODE_NDELAY); | |
9628 | +} | |
9629 | +EXPORT_SYMBOL_GPL(toi_close_bdev); | |
9630 | + | |
7e46296a AM |
9631 | +int toi_wait = CONFIG_TOI_DEFAULT_WAIT; |
9632 | +EXPORT_SYMBOL_GPL(toi_wait); | |
2380c486 | 9633 | + |
7e46296a AM |
9634 | +struct toi_core_fns *toi_core_fns; |
9635 | +EXPORT_SYMBOL_GPL(toi_core_fns); | |
2380c486 | 9636 | + |
7e46296a AM |
9637 | +unsigned long toi_result; |
9638 | +EXPORT_SYMBOL_GPL(toi_result); | |
2380c486 | 9639 | + |
7e46296a AM |
9640 | +struct pagedir pagedir1 = {1}; |
9641 | +EXPORT_SYMBOL_GPL(pagedir1); | |
2380c486 | 9642 | + |
7e46296a AM |
9643 | +unsigned long toi_get_nonconflicting_page(void) |
9644 | +{ | |
9645 | + return toi_core_fns->get_nonconflicting_page(); | |
9646 | +} | |
2380c486 | 9647 | + |
7e46296a AM |
9648 | +int toi_post_context_save(void) |
9649 | +{ | |
9650 | + return toi_core_fns->post_context_save(); | |
9651 | +} | |
2380c486 | 9652 | + |
7e46296a AM |
9653 | +int try_tuxonice_hibernate(void) |
9654 | +{ | |
9655 | + if (!toi_core_fns) | |
9656 | + return -ENODEV; | |
2380c486 | 9657 | + |
7e46296a AM |
9658 | + return toi_core_fns->try_hibernate(); |
9659 | +} | |
2380c486 | 9660 | + |
7e46296a AM |
9661 | +static int num_resume_calls; |
9662 | +#ifdef CONFIG_TOI_IGNORE_LATE_INITCALL | |
9663 | +static int ignore_late_initcall = 1; | |
9664 | +#else | |
9665 | +static int ignore_late_initcall; | |
9666 | +#endif | |
2380c486 | 9667 | + |
7e46296a AM |
9668 | +int toi_translate_err_default = TOI_CONTINUE_REQ; |
9669 | +EXPORT_SYMBOL_GPL(toi_translate_err_default); | |
2380c486 | 9670 | + |
7e46296a | 9671 | +void try_tuxonice_resume(void) |
2380c486 | 9672 | +{ |
7e46296a AM |
9673 | + /* Don't let it wrap around eventually */ |
9674 | + if (num_resume_calls < 2) | |
9675 | + num_resume_calls++; | |
2380c486 | 9676 | + |
7e46296a AM |
9677 | + if (num_resume_calls == 1 && ignore_late_initcall) { |
9678 | + printk(KERN_INFO "TuxOnIce: Ignoring late initcall, as requested.\n"); | |
9679 | + return; | |
9680 | + } | |
2380c486 | 9681 | + |
7e46296a AM |
9682 | + if (toi_core_fns) |
9683 | + toi_core_fns->try_resume(); | |
9684 | + else | |
9685 | + printk(KERN_INFO "TuxOnIce core not loaded yet.\n"); | |
2380c486 JR |
9686 | +} |
9687 | + | |
7e46296a | 9688 | +int toi_lowlevel_builtin(void) |
2380c486 | 9689 | +{ |
7e46296a | 9690 | + int error = 0; |
2380c486 | 9691 | + |
7e46296a AM |
9692 | + save_processor_state(); |
9693 | + error = swsusp_arch_suspend(); | |
9694 | + if (error) | |
9695 | + printk(KERN_ERR "Error %d hibernating\n", error); | |
2380c486 | 9696 | + |
7e46296a AM |
9697 | + /* Restore control flow appears here */ |
9698 | + if (!toi_in_hibernate) { | |
9699 | + copyback_high(); | |
9700 | + set_toi_state(TOI_NOW_RESUMING); | |
2380c486 JR |
9701 | + } |
9702 | + | |
7e46296a AM |
9703 | + restore_processor_state(); |
9704 | + | |
9705 | + return error; | |
2380c486 | 9706 | +} |
7e46296a | 9707 | +EXPORT_SYMBOL_GPL(toi_lowlevel_builtin); |
2380c486 | 9708 | + |
7e46296a AM |
9709 | +unsigned long toi_compress_bytes_in; |
9710 | +EXPORT_SYMBOL_GPL(toi_compress_bytes_in); | |
2380c486 | 9711 | + |
7e46296a AM |
9712 | +unsigned long toi_compress_bytes_out; |
9713 | +EXPORT_SYMBOL_GPL(toi_compress_bytes_out); | |
2380c486 | 9714 | + |
7e46296a AM |
9715 | +unsigned long toi_state = ((1 << TOI_BOOT_TIME) | |
9716 | + (1 << TOI_IGNORE_LOGLEVEL) | | |
9717 | + (1 << TOI_IO_STOPPED)); | |
9718 | +EXPORT_SYMBOL_GPL(toi_state); | |
2380c486 | 9719 | + |
7e46296a AM |
9720 | +/* The number of hibernates we have started (some may have been cancelled) */ |
9721 | +unsigned int nr_hibernates; | |
9722 | +EXPORT_SYMBOL_GPL(nr_hibernates); | |
2380c486 | 9723 | + |
7e46296a AM |
9724 | +int toi_running; |
9725 | +EXPORT_SYMBOL_GPL(toi_running); | |
2380c486 | 9726 | + |
7e46296a AM |
9727 | +__nosavedata int toi_in_hibernate; |
9728 | +EXPORT_SYMBOL_GPL(toi_in_hibernate); | |
2380c486 | 9729 | + |
7e46296a AM |
9730 | +__nosavedata struct pbe *restore_highmem_pblist; |
9731 | +EXPORT_SYMBOL_GPL(restore_highmem_pblist); | |
2380c486 | 9732 | + |
cacc47f8 AM |
9733 | +void toi_read_lock_tasklist(void) |
9734 | +{ | |
9735 | + read_lock(&tasklist_lock); | |
9736 | +} | |
9737 | +EXPORT_SYMBOL_GPL(toi_read_lock_tasklist); | |
9738 | + | |
9739 | +void toi_read_unlock_tasklist(void) | |
9740 | +{ | |
9741 | + read_unlock(&tasklist_lock); | |
9742 | +} | |
9743 | +EXPORT_SYMBOL_GPL(toi_read_unlock_tasklist); | |
9744 | + | |
7e46296a AM |
9745 | +static int __init toi_wait_setup(char *str) |
9746 | +{ | |
9747 | + int value; | |
2380c486 | 9748 | + |
7e46296a AM |
9749 | + if (sscanf(str, "=%d", &value)) { |
9750 | + if (value < -1 || value > 255) | |
9751 | + printk(KERN_INFO "TuxOnIce_wait outside range -1 to " | |
9752 | + "255.\n"); | |
9753 | + else | |
9754 | + toi_wait = value; | |
9755 | + } | |
2380c486 | 9756 | + |
2380c486 JR |
9757 | + return 1; |
9758 | +} | |
9759 | + | |
7e46296a | 9760 | +__setup("toi_wait", toi_wait_setup); |
2380c486 | 9761 | + |
7e46296a AM |
9762 | +static int __init toi_translate_retry_setup(char *str) |
9763 | +{ | |
9764 | + toi_translate_err_default = 0; | |
9765 | + return 1; | |
9766 | +} | |
2380c486 | 9767 | + |
7e46296a | 9768 | +__setup("toi_translate_retry", toi_translate_retry_setup); |
2380c486 | 9769 | + |
7e46296a AM |
9770 | +static int __init toi_debug_setup(char *str) |
9771 | +{ | |
85eb3c9d | 9772 | + toi_bkd.toi_action |= (1 << TOI_LOGALL); |
7e46296a AM |
9773 | + toi_bkd.toi_debug_state = 255; |
9774 | + toi_bkd.toi_default_console_level = 7; | |
9775 | + return 1; | |
2380c486 JR |
9776 | +} |
9777 | + | |
7e46296a AM |
9778 | +__setup("toi_debug_setup", toi_debug_setup); |
9779 | + | |
85eb3c9d AM |
9780 | +static int __init toi_pause_setup(char *str) |
9781 | +{ | |
9782 | + toi_bkd.toi_action |= (1 << TOI_PAUSE); | |
9783 | + return 1; | |
9784 | +} | |
9785 | + | |
9786 | +__setup("toi_pause", toi_pause_setup); | |
9787 | + | |
7e46296a | 9788 | +static int __init toi_ignore_late_initcall_setup(char *str) |
2380c486 | 9789 | +{ |
7e46296a | 9790 | + int value; |
2380c486 | 9791 | + |
7e46296a AM |
9792 | + if (sscanf(str, "=%d", &value)) |
9793 | + ignore_late_initcall = value; | |
2380c486 | 9794 | + |
7e46296a AM |
9795 | + return 1; |
9796 | +} | |
9797 | + | |
9798 | +__setup("toi_initramfs_resume_only", toi_ignore_late_initcall_setup); | |
2380c486 | 9799 | + |
7e46296a AM |
9800 | +int toi_force_no_multithreaded; |
9801 | +EXPORT_SYMBOL_GPL(toi_force_no_multithreaded); | |
2380c486 | 9802 | + |
7e46296a AM |
9803 | +static int __init toi_force_no_multithreaded_setup(char *str) |
9804 | +{ | |
9805 | + int value; | |
2380c486 | 9806 | + |
7e46296a AM |
9807 | + if (sscanf(str, "=%d", &value)) |
9808 | + toi_force_no_multithreaded = value; | |
9809 | + | |
9810 | + return 1; | |
9811 | +} | |
2380c486 | 9812 | + |
7e46296a AM |
9813 | +__setup("toi_no_multithreaded", toi_force_no_multithreaded_setup); |
9814 | diff --git a/kernel/power/tuxonice_builtin.h b/kernel/power/tuxonice_builtin.h | |
9815 | new file mode 100644 | |
cacc47f8 | 9816 | index 0000000..ab67d31 |
7e46296a AM |
9817 | --- /dev/null |
9818 | +++ b/kernel/power/tuxonice_builtin.h | |
cacc47f8 | 9819 | @@ -0,0 +1,32 @@ |
2380c486 | 9820 | +/* |
5dd10c98 | 9821 | + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net) |
7e46296a AM |
9822 | + * |
9823 | + * This file is released under the GPLv2. | |
2380c486 | 9824 | + */ |
7e46296a | 9825 | +#include <asm/setup.h> |
2380c486 | 9826 | + |
7e46296a AM |
9827 | +extern struct toi_core_fns *toi_core_fns; |
9828 | +extern unsigned long toi_compress_bytes_in, toi_compress_bytes_out; | |
9829 | +extern unsigned int nr_hibernates; | |
9830 | +extern int toi_in_hibernate; | |
2380c486 | 9831 | + |
7e46296a | 9832 | +extern __nosavedata struct pbe *restore_highmem_pblist; |
2380c486 | 9833 | + |
7e46296a | 9834 | +int toi_lowlevel_builtin(void); |
2380c486 | 9835 | + |
7e46296a AM |
9836 | +#ifdef CONFIG_HIGHMEM |
9837 | +extern __nosavedata struct zone_data *toi_nosave_zone_list; | |
9838 | +extern __nosavedata unsigned long toi_nosave_max_pfn; | |
9839 | +#endif | |
2380c486 | 9840 | + |
7e46296a AM |
9841 | +extern unsigned long toi_get_nonconflicting_page(void); |
9842 | +extern int toi_post_context_save(void); | |
2380c486 | 9843 | + |
7e46296a | 9844 | +extern char toi_wait_for_keypress_dev_console(int timeout); |
5dd10c98 AM |
9845 | +extern struct block_device *toi_open_by_devnum(dev_t dev); |
9846 | +extern void toi_close_bdev(struct block_device *bdev); | |
7e46296a AM |
9847 | +extern int toi_wait; |
9848 | +extern int toi_translate_err_default; | |
9849 | +extern int toi_force_no_multithreaded; | |
cacc47f8 AM |
9850 | +extern void toi_read_lock_tasklist(void); |
9851 | +extern void toi_read_unlock_tasklist(void); | |
7e46296a AM |
9852 | diff --git a/kernel/power/tuxonice_checksum.c b/kernel/power/tuxonice_checksum.c |
9853 | new file mode 100644 | |
5dd10c98 | 9854 | index 0000000..3ec2c76 |
7e46296a AM |
9855 | --- /dev/null |
9856 | +++ b/kernel/power/tuxonice_checksum.c | |
5dd10c98 | 9857 | @@ -0,0 +1,377 @@ |
7e46296a AM |
9858 | +/* |
9859 | + * kernel/power/tuxonice_checksum.c | |
9860 | + * | |
5dd10c98 | 9861 | + * Copyright (C) 2006-2010 Nigel Cunningham (nigel at tuxonice net) |
7e46296a AM |
9862 | + * |
9863 | + * This file is released under the GPLv2. | |
9864 | + * | |
9865 | + * This file contains data checksum routines for TuxOnIce, | |
9866 | + * using cryptoapi. They are used to locate any modifications | |
9867 | + * made to pageset 2 while we're saving it. | |
9868 | + */ | |
2380c486 | 9869 | + |
7e46296a AM |
9870 | +#include <linux/suspend.h> |
9871 | +#include <linux/highmem.h> | |
9872 | +#include <linux/vmalloc.h> | |
9873 | +#include <linux/crypto.h> | |
9874 | +#include <linux/scatterlist.h> | |
2380c486 | 9875 | + |
7e46296a AM |
9876 | +#include "tuxonice.h" |
9877 | +#include "tuxonice_modules.h" | |
9878 | +#include "tuxonice_sysfs.h" | |
9879 | +#include "tuxonice_io.h" | |
9880 | +#include "tuxonice_pageflags.h" | |
9881 | +#include "tuxonice_checksum.h" | |
9882 | +#include "tuxonice_pagedir.h" | |
9883 | +#include "tuxonice_alloc.h" | |
2380c486 | 9884 | + |
7e46296a | 9885 | +static struct toi_module_ops toi_checksum_ops; |
2380c486 | 9886 | + |
7e46296a AM |
9887 | +/* Constant at the mo, but I might allow tuning later */ |
9888 | +static char toi_checksum_name[32] = "md4"; | |
9889 | +/* Bytes per checksum */ | |
9890 | +#define CHECKSUM_SIZE (16) | |
2380c486 | 9891 | + |
7e46296a | 9892 | +#define CHECKSUMS_PER_PAGE ((PAGE_SIZE - sizeof(void *)) / CHECKSUM_SIZE) |
2380c486 | 9893 | + |
7e46296a AM |
9894 | +struct cpu_context { |
9895 | + struct crypto_hash *transform; | |
9896 | + struct hash_desc desc; | |
9897 | + struct scatterlist sg[2]; | |
9898 | + char *buf; | |
9899 | +}; | |
2380c486 | 9900 | + |
7e46296a AM |
9901 | +static DEFINE_PER_CPU(struct cpu_context, contexts); |
9902 | +static int pages_allocated; | |
9903 | +static unsigned long page_list; | |
2380c486 | 9904 | + |
7e46296a | 9905 | +static int toi_num_resaved; |
2380c486 | 9906 | + |
7e46296a AM |
9907 | +static unsigned long this_checksum, next_page; |
9908 | +static int checksum_index; | |
2380c486 | 9909 | + |
7e46296a AM |
9910 | +static inline int checksum_pages_needed(void) |
9911 | +{ | |
9912 | + return DIV_ROUND_UP(pagedir2.size, CHECKSUMS_PER_PAGE); | |
9913 | +} | |
2380c486 | 9914 | + |
7e46296a | 9915 | +/* ---- Local buffer management ---- */ |
2380c486 | 9916 | + |
7e46296a AM |
9917 | +/* |
9918 | + * toi_checksum_cleanup | |
9919 | + * | |
9920 | + * Frees memory allocated for our labours. | |
9921 | + */ | |
9922 | +static void toi_checksum_cleanup(int ending_cycle) | |
9923 | +{ | |
9924 | + int cpu; | |
2380c486 | 9925 | + |
7e46296a AM |
9926 | + if (ending_cycle) { |
9927 | + for_each_online_cpu(cpu) { | |
9928 | + struct cpu_context *this = &per_cpu(contexts, cpu); | |
9929 | + if (this->transform) { | |
9930 | + crypto_free_hash(this->transform); | |
9931 | + this->transform = NULL; | |
9932 | + this->desc.tfm = NULL; | |
9933 | + } | |
2380c486 | 9934 | + |
7e46296a AM |
9935 | + if (this->buf) { |
9936 | + toi_free_page(27, (unsigned long) this->buf); | |
9937 | + this->buf = NULL; | |
9938 | + } | |
2380c486 | 9939 | + } |
7e46296a AM |
9940 | + } |
9941 | +} | |
2380c486 | 9942 | + |
7e46296a AM |
9943 | +/* |
9944 | + * toi_crypto_initialise | |
9945 | + * | |
9946 | + * Prepare to do some work by allocating buffers and transforms. | |
9947 | + * Returns: Int: Zero. Even if we can't set up checksum, we still | |
9948 | + * seek to hibernate. | |
9949 | + */ | |
9950 | +static int toi_checksum_initialise(int starting_cycle) | |
9951 | +{ | |
9952 | + int cpu; | |
2380c486 | 9953 | + |
7e46296a AM |
9954 | + if (!(starting_cycle & SYSFS_HIBERNATE) || !toi_checksum_ops.enabled) |
9955 | + return 0; | |
9956 | + | |
9957 | + if (!*toi_checksum_name) { | |
9958 | + printk(KERN_INFO "TuxOnIce: No checksum algorithm name set.\n"); | |
9959 | + return 1; | |
9960 | + } | |
9961 | + | |
9962 | + for_each_online_cpu(cpu) { | |
9963 | + struct cpu_context *this = &per_cpu(contexts, cpu); | |
9964 | + struct page *page; | |
9965 | + | |
9966 | + this->transform = crypto_alloc_hash(toi_checksum_name, 0, 0); | |
9967 | + if (IS_ERR(this->transform)) { | |
9968 | + printk(KERN_INFO "TuxOnIce: Failed to initialise the " | |
9969 | + "%s checksum algorithm: %ld.\n", | |
9970 | + toi_checksum_name, (long) this->transform); | |
9971 | + this->transform = NULL; | |
9972 | + return 1; | |
2380c486 | 9973 | + } |
2380c486 | 9974 | + |
7e46296a AM |
9975 | + this->desc.tfm = this->transform; |
9976 | + this->desc.flags = 0; | |
2380c486 | 9977 | + |
7e46296a AM |
9978 | + page = toi_alloc_page(27, GFP_KERNEL); |
9979 | + if (!page) | |
9980 | + return 1; | |
9981 | + this->buf = page_address(page); | |
9982 | + sg_init_one(&this->sg[0], this->buf, PAGE_SIZE); | |
9983 | + } | |
2380c486 JR |
9984 | + return 0; |
9985 | +} | |
9986 | + | |
9987 | +/* | |
7e46296a AM |
9988 | + * toi_checksum_print_debug_stats |
9989 | + * @buffer: Pointer to a buffer into which the debug info will be printed. | |
9990 | + * @size: Size of the buffer. | |
9991 | + * | |
9992 | + * Print information to be recorded for debugging purposes into a buffer. | |
9993 | + * Returns: Number of characters written to the buffer. | |
2380c486 | 9994 | + */ |
2380c486 | 9995 | + |
7e46296a AM |
9996 | +static int toi_checksum_print_debug_stats(char *buffer, int size) |
9997 | +{ | |
9998 | + int len; | |
2380c486 | 9999 | + |
7e46296a AM |
10000 | + if (!toi_checksum_ops.enabled) |
10001 | + return scnprintf(buffer, size, | |
10002 | + "- Checksumming disabled.\n"); | |
2380c486 | 10003 | + |
7e46296a AM |
10004 | + len = scnprintf(buffer, size, "- Checksum method is '%s'.\n", |
10005 | + toi_checksum_name); | |
10006 | + len += scnprintf(buffer + len, size - len, | |
10007 | + " %d pages resaved in atomic copy.\n", toi_num_resaved); | |
10008 | + return len; | |
10009 | +} | |
2380c486 | 10010 | + |
7e46296a AM |
10011 | +static int toi_checksum_memory_needed(void) |
10012 | +{ | |
10013 | + return toi_checksum_ops.enabled ? | |
10014 | + checksum_pages_needed() << PAGE_SHIFT : 0; | |
2380c486 JR |
10015 | +} |
10016 | + | |
7e46296a AM |
10017 | +static int toi_checksum_storage_needed(void) |
10018 | +{ | |
10019 | + if (toi_checksum_ops.enabled) | |
10020 | + return strlen(toi_checksum_name) + sizeof(int) + 1; | |
10021 | + else | |
10022 | + return 0; | |
10023 | +} | |
2380c486 | 10024 | + |
7e46296a AM |
10025 | +/* |
10026 | + * toi_checksum_save_config_info | |
10027 | + * @buffer: Pointer to a buffer of size PAGE_SIZE. | |
10028 | + * | |
10029 | + * Save informaton needed when reloading the image at resume time. | |
10030 | + * Returns: Number of bytes used for saving our data. | |
10031 | + */ | |
10032 | +static int toi_checksum_save_config_info(char *buffer) | |
10033 | +{ | |
10034 | + int namelen = strlen(toi_checksum_name) + 1; | |
10035 | + int total_len; | |
2380c486 | 10036 | + |
7e46296a AM |
10037 | + *((unsigned int *) buffer) = namelen; |
10038 | + strncpy(buffer + sizeof(unsigned int), toi_checksum_name, namelen); | |
10039 | + total_len = sizeof(unsigned int) + namelen; | |
10040 | + return total_len; | |
10041 | +} | |
2380c486 | 10042 | + |
7e46296a AM |
10043 | +/* toi_checksum_load_config_info |
10044 | + * @buffer: Pointer to the start of the data. | |
10045 | + * @size: Number of bytes that were saved. | |
10046 | + * | |
10047 | + * Description: Reload information needed for dechecksuming the image at | |
10048 | + * resume time. | |
10049 | + */ | |
10050 | +static void toi_checksum_load_config_info(char *buffer, int size) | |
2380c486 | 10051 | +{ |
7e46296a | 10052 | + int namelen; |
2380c486 | 10053 | + |
7e46296a AM |
10054 | + namelen = *((unsigned int *) (buffer)); |
10055 | + strncpy(toi_checksum_name, buffer + sizeof(unsigned int), | |
10056 | + namelen); | |
10057 | + return; | |
10058 | +} | |
2380c486 | 10059 | + |
7e46296a AM |
10060 | +/* |
10061 | + * Free Checksum Memory | |
10062 | + */ | |
2380c486 | 10063 | + |
7e46296a AM |
10064 | +void free_checksum_pages(void) |
10065 | +{ | |
10066 | + while (pages_allocated) { | |
10067 | + unsigned long next = *((unsigned long *) page_list); | |
10068 | + ClearPageNosave(virt_to_page(page_list)); | |
10069 | + toi_free_page(15, (unsigned long) page_list); | |
10070 | + page_list = next; | |
10071 | + pages_allocated--; | |
2380c486 | 10072 | + } |
2380c486 JR |
10073 | +} |
10074 | + | |
7e46296a AM |
10075 | +/* |
10076 | + * Allocate Checksum Memory | |
10077 | + */ | |
10078 | + | |
10079 | +int allocate_checksum_pages(void) | |
2380c486 | 10080 | +{ |
7e46296a | 10081 | + int pages_needed = checksum_pages_needed(); |
2380c486 | 10082 | + |
7e46296a AM |
10083 | + if (!toi_checksum_ops.enabled) |
10084 | + return 0; | |
10085 | + | |
10086 | + while (pages_allocated < pages_needed) { | |
10087 | + unsigned long *new_page = | |
10088 | + (unsigned long *) toi_get_zeroed_page(15, TOI_ATOMIC_GFP); | |
10089 | + if (!new_page) { | |
10090 | + printk(KERN_ERR "Unable to allocate checksum pages.\n"); | |
10091 | + return -ENOMEM; | |
2380c486 | 10092 | + } |
7e46296a AM |
10093 | + SetPageNosave(virt_to_page(new_page)); |
10094 | + (*new_page) = page_list; | |
10095 | + page_list = (unsigned long) new_page; | |
10096 | + pages_allocated++; | |
2380c486 | 10097 | + } |
2380c486 | 10098 | + |
7e46296a AM |
10099 | + next_page = (unsigned long) page_list; |
10100 | + checksum_index = 0; | |
2380c486 | 10101 | + |
7e46296a | 10102 | + return 0; |
2380c486 JR |
10103 | +} |
10104 | + | |
7e46296a | 10105 | +char *tuxonice_get_next_checksum(void) |
2380c486 | 10106 | +{ |
7e46296a AM |
10107 | + if (!toi_checksum_ops.enabled) |
10108 | + return NULL; | |
2380c486 | 10109 | + |
7e46296a AM |
10110 | + if (checksum_index % CHECKSUMS_PER_PAGE) |
10111 | + this_checksum += CHECKSUM_SIZE; | |
10112 | + else { | |
10113 | + this_checksum = next_page + sizeof(void *); | |
10114 | + next_page = *((unsigned long *) next_page); | |
10115 | + } | |
2380c486 | 10116 | + |
7e46296a AM |
10117 | + checksum_index++; |
10118 | + return (char *) this_checksum; | |
2380c486 JR |
10119 | +} |
10120 | + | |
7e46296a | 10121 | +int tuxonice_calc_checksum(struct page *page, char *checksum_locn) |
2380c486 | 10122 | +{ |
7e46296a AM |
10123 | + char *pa; |
10124 | + int result, cpu = smp_processor_id(); | |
10125 | + struct cpu_context *ctx = &per_cpu(contexts, cpu); | |
2380c486 | 10126 | + |
7e46296a AM |
10127 | + if (!toi_checksum_ops.enabled) |
10128 | + return 0; | |
2380c486 | 10129 | + |
7e46296a AM |
10130 | + pa = kmap(page); |
10131 | + memcpy(ctx->buf, pa, PAGE_SIZE); | |
10132 | + kunmap(page); | |
10133 | + result = crypto_hash_digest(&ctx->desc, ctx->sg, PAGE_SIZE, | |
10134 | + checksum_locn); | |
10135 | + if (result) | |
10136 | + printk(KERN_ERR "TuxOnIce checksumming: crypto_hash_digest " | |
10137 | + "returned %d.\n", result); | |
10138 | + return result; | |
2380c486 | 10139 | +} |
7e46296a AM |
10140 | +/* |
10141 | + * Calculate checksums | |
10142 | + */ | |
2380c486 | 10143 | + |
7e46296a | 10144 | +void check_checksums(void) |
2380c486 | 10145 | +{ |
7e46296a AM |
10146 | + int pfn, index = 0, cpu = smp_processor_id(); |
10147 | + char current_checksum[CHECKSUM_SIZE]; | |
10148 | + struct cpu_context *ctx = &per_cpu(contexts, cpu); | |
2380c486 | 10149 | + |
7e46296a | 10150 | + if (!toi_checksum_ops.enabled) |
2380c486 JR |
10151 | + return; |
10152 | + | |
7e46296a | 10153 | + next_page = (unsigned long) page_list; |
2380c486 | 10154 | + |
7e46296a AM |
10155 | + toi_num_resaved = 0; |
10156 | + this_checksum = 0; | |
2380c486 | 10157 | + |
7e46296a AM |
10158 | + memory_bm_position_reset(pageset2_map); |
10159 | + for (pfn = memory_bm_next_pfn(pageset2_map); pfn != BM_END_OF_MAP; | |
10160 | + pfn = memory_bm_next_pfn(pageset2_map)) { | |
10161 | + int ret; | |
10162 | + char *pa; | |
10163 | + struct page *page = pfn_to_page(pfn); | |
2380c486 | 10164 | + |
7e46296a AM |
10165 | + if (index % CHECKSUMS_PER_PAGE) { |
10166 | + this_checksum += CHECKSUM_SIZE; | |
10167 | + } else { | |
10168 | + this_checksum = next_page + sizeof(void *); | |
10169 | + next_page = *((unsigned long *) next_page); | |
10170 | + } | |
2380c486 | 10171 | + |
7e46296a AM |
10172 | + /* Done when IRQs disabled so must be atomic */ |
10173 | + pa = kmap_atomic(page, KM_USER1); | |
10174 | + memcpy(ctx->buf, pa, PAGE_SIZE); | |
10175 | + kunmap_atomic(pa, KM_USER1); | |
10176 | + ret = crypto_hash_digest(&ctx->desc, ctx->sg, PAGE_SIZE, | |
10177 | + current_checksum); | |
2380c486 | 10178 | + |
7e46296a AM |
10179 | + if (ret) { |
10180 | + printk(KERN_INFO "Digest failed. Returned %d.\n", ret); | |
10181 | + return; | |
10182 | + } | |
2380c486 | 10183 | + |
7e46296a AM |
10184 | + if (memcmp(current_checksum, (char *) this_checksum, |
10185 | + CHECKSUM_SIZE)) { | |
10186 | + SetPageResave(pfn_to_page(pfn)); | |
10187 | + toi_num_resaved++; | |
10188 | + if (test_action_state(TOI_ABORT_ON_RESAVE_NEEDED)) | |
10189 | + set_abort_result(TOI_RESAVE_NEEDED); | |
10190 | + } | |
2380c486 | 10191 | + |
7e46296a AM |
10192 | + index++; |
10193 | + } | |
2380c486 | 10194 | +} |
2380c486 | 10195 | + |
7e46296a AM |
10196 | +static struct toi_sysfs_data sysfs_params[] = { |
10197 | + SYSFS_INT("enabled", SYSFS_RW, &toi_checksum_ops.enabled, 0, 1, 0, | |
10198 | + NULL), | |
10199 | + SYSFS_BIT("abort_if_resave_needed", SYSFS_RW, &toi_bkd.toi_action, | |
10200 | + TOI_ABORT_ON_RESAVE_NEEDED, 0) | |
10201 | +}; | |
10202 | + | |
10203 | +/* | |
10204 | + * Ops structure. | |
2380c486 | 10205 | + */ |
7e46296a AM |
10206 | +static struct toi_module_ops toi_checksum_ops = { |
10207 | + .type = MISC_MODULE, | |
10208 | + .name = "checksumming", | |
10209 | + .directory = "checksum", | |
10210 | + .module = THIS_MODULE, | |
10211 | + .initialise = toi_checksum_initialise, | |
10212 | + .cleanup = toi_checksum_cleanup, | |
10213 | + .print_debug_info = toi_checksum_print_debug_stats, | |
10214 | + .save_config_info = toi_checksum_save_config_info, | |
10215 | + .load_config_info = toi_checksum_load_config_info, | |
10216 | + .memory_needed = toi_checksum_memory_needed, | |
10217 | + .storage_needed = toi_checksum_storage_needed, | |
2380c486 | 10218 | + |
7e46296a AM |
10219 | + .sysfs_data = sysfs_params, |
10220 | + .num_sysfs_entries = sizeof(sysfs_params) / | |
10221 | + sizeof(struct toi_sysfs_data), | |
10222 | +}; | |
2380c486 | 10223 | + |
7e46296a AM |
10224 | +/* ---- Registration ---- */ |
10225 | +int toi_checksum_init(void) | |
2380c486 | 10226 | +{ |
7e46296a AM |
10227 | + int result = toi_register_module(&toi_checksum_ops); |
10228 | + return result; | |
2380c486 JR |
10229 | +} |
10230 | + | |
7e46296a | 10231 | +void toi_checksum_exit(void) |
2380c486 | 10232 | +{ |
7e46296a | 10233 | + toi_unregister_module(&toi_checksum_ops); |
2380c486 | 10234 | +} |
7e46296a AM |
10235 | diff --git a/kernel/power/tuxonice_checksum.h b/kernel/power/tuxonice_checksum.h |
10236 | new file mode 100644 | |
5dd10c98 | 10237 | index 0000000..0f2812e |
7e46296a AM |
10238 | --- /dev/null |
10239 | +++ b/kernel/power/tuxonice_checksum.h | |
5dd10c98 | 10240 | @@ -0,0 +1,31 @@ |
7e46296a AM |
10241 | +/* |
10242 | + * kernel/power/tuxonice_checksum.h | |
2380c486 | 10243 | + * |
5dd10c98 | 10244 | + * Copyright (C) 2006-2010 Nigel Cunningham (nigel at tuxonice net) |
7e46296a AM |
10245 | + * |
10246 | + * This file is released under the GPLv2. | |
10247 | + * | |
10248 | + * This file contains data checksum routines for TuxOnIce, | |
10249 | + * using cryptoapi. They are used to locate any modifications | |
10250 | + * made to pageset 2 while we're saving it. | |
2380c486 | 10251 | + */ |
2380c486 | 10252 | + |
7e46296a AM |
10253 | +#if defined(CONFIG_TOI_CHECKSUM) |
10254 | +extern int toi_checksum_init(void); | |
10255 | +extern void toi_checksum_exit(void); | |
10256 | +void check_checksums(void); | |
10257 | +int allocate_checksum_pages(void); | |
10258 | +void free_checksum_pages(void); | |
10259 | +char *tuxonice_get_next_checksum(void); | |
10260 | +int tuxonice_calc_checksum(struct page *page, char *checksum_locn); | |
10261 | +#else | |
10262 | +static inline int toi_checksum_init(void) { return 0; } | |
10263 | +static inline void toi_checksum_exit(void) { } | |
10264 | +static inline void check_checksums(void) { }; | |
10265 | +static inline int allocate_checksum_pages(void) { return 0; }; | |
10266 | +static inline void free_checksum_pages(void) { }; | |
10267 | +static inline char *tuxonice_get_next_checksum(void) { return NULL; }; | |
10268 | +static inline int tuxonice_calc_checksum(struct page *page, char *checksum_locn) | |
10269 | + { return 0; } | |
10270 | +#endif | |
10271 | + | |
10272 | diff --git a/kernel/power/tuxonice_cluster.c b/kernel/power/tuxonice_cluster.c | |
10273 | new file mode 100644 | |
5dd10c98 | 10274 | index 0000000..0e5a262 |
7e46296a AM |
10275 | --- /dev/null |
10276 | +++ b/kernel/power/tuxonice_cluster.c | |
10277 | @@ -0,0 +1,1069 @@ | |
10278 | +/* | |
10279 | + * kernel/power/tuxonice_cluster.c | |
10280 | + * | |
5dd10c98 | 10281 | + * Copyright (C) 2006-2010 Nigel Cunningham (nigel at tuxonice net) |
7e46296a AM |
10282 | + * |
10283 | + * This file is released under the GPLv2. | |
10284 | + * | |
10285 | + * This file contains routines for cluster hibernation support. | |
10286 | + * | |
10287 | + * Based on ip autoconfiguration code in net/ipv4/ipconfig.c. | |
10288 | + * | |
10289 | + * How does it work? | |
10290 | + * | |
10291 | + * There is no 'master' node that tells everyone else what to do. All nodes | |
10292 | + * send messages to the broadcast address/port, maintain a list of peers | |
10293 | + * and figure out when to progress to the next step in hibernating or resuming. | |
10294 | + * This makes us more fault tolerant when it comes to nodes coming and going | |
10295 | + * (which may be more of an issue if we're hibernating when power supplies | |
10296 | + * are being unreliable). | |
10297 | + * | |
10298 | + * At boot time, we start a ktuxonice thread that handles communication with | |
10299 | + * other nodes. This node maintains a state machine that controls our progress | |
10300 | + * through hibernating and resuming, keeping us in step with other nodes. Nodes | |
10301 | + * are identified by their hw address. | |
10302 | + * | |
10303 | + * On startup, the node sends CLUSTER_PING on the configured interface's | |
10304 | + * broadcast address, port $toi_cluster_port (see below) and begins to listen | |
10305 | + * for other broadcast messages. CLUSTER_PING messages are repeated at | |
10306 | + * intervals of 5 minutes, with a random offset to spread traffic out. | |
10307 | + * | |
10308 | + * A hibernation cycle is initiated from any node via | |
10309 | + * | |
10310 | + * echo > /sys/power/tuxonice/do_hibernate | |
10311 | + * | |
10312 | + * and (possibily) the hibernate script. At each step of the process, the node | |
10313 | + * completes its work, and waits for all other nodes to signal completion of | |
10314 | + * their work (or timeout) before progressing to the next step. | |
10315 | + * | |
10316 | + * Request/state Action before reply Possible reply Next state | |
10317 | + * HIBERNATE capable, pre-script HIBERNATE|ACK NODE_PREP | |
10318 | + * HIBERNATE|NACK INIT_0 | |
10319 | + * | |
10320 | + * PREP prepare_image PREP|ACK IMAGE_WRITE | |
10321 | + * PREP|NACK INIT_0 | |
10322 | + * ABORT RUNNING | |
10323 | + * | |
10324 | + * IO write image IO|ACK power off | |
10325 | + * ABORT POST_RESUME | |
10326 | + * | |
10327 | + * (Boot time) check for image IMAGE|ACK RESUME_PREP | |
10328 | + * (Note 1) | |
10329 | + * IMAGE|NACK (Note 2) | |
10330 | + * | |
10331 | + * PREP prepare read image PREP|ACK IMAGE_READ | |
10332 | + * PREP|NACK (As NACK_IMAGE) | |
10333 | + * | |
10334 | + * IO read image IO|ACK POST_RESUME | |
10335 | + * | |
10336 | + * POST_RESUME thaw, post-script RUNNING | |
10337 | + * | |
10338 | + * INIT_0 init 0 | |
10339 | + * | |
10340 | + * Other messages: | |
10341 | + * | |
10342 | + * - PING: Request for all other live nodes to send a PONG. Used at startup to | |
10343 | + * announce presence, when a node is suspected dead and periodically, in case | |
10344 | + * segments of the network are [un]plugged. | |
10345 | + * | |
10346 | + * - PONG: Response to a PING. | |
10347 | + * | |
10348 | + * - ABORT: Request to cancel writing an image. | |
10349 | + * | |
10350 | + * - BYE: Notification that this node is shutting down. | |
10351 | + * | |
10352 | + * Note 1: Repeated at 3s intervals until we continue to boot/resume, so that | |
10353 | + * nodes which are slower to start up can get state synchronised. If a node | |
10354 | + * starting up sees other nodes sending RESUME_PREP or IMAGE_READ, it may send | |
10355 | + * ACK_IMAGE and they will wait for it to catch up. If it sees ACK_READ, it | |
10356 | + * must invalidate its image (if any) and boot normally. | |
10357 | + * | |
10358 | + * Note 2: May occur when one node lost power or powered off while others | |
10359 | + * hibernated. This node waits for others to complete resuming (ACK_READ) | |
10360 | + * before completing its boot, so that it appears as a fail node restarting. | |
2380c486 | 10361 | + * |
7e46296a AM |
10362 | + * If any node has an image, then it also has a list of nodes that hibernated |
10363 | + * in synchronisation with it. The node will wait for other nodes to appear | |
10364 | + * or timeout before beginning its restoration. | |
10365 | + * | |
10366 | + * If a node has no image, it needs to wait, in case other nodes which do have | |
10367 | + * an image are going to resume, but are taking longer to announce their | |
10368 | + * presence. For this reason, the user can specify a timeout value and a number | |
10369 | + * of nodes detected before we just continue. (We might want to assume in a | |
10370 | + * cluster of, say, 15 nodes, if 8 others have booted without finding an image, | |
10371 | + * the remaining nodes will too. This might help in situations where some nodes | |
10372 | + * are much slower to boot, or more subject to hardware failures or such like). | |
2380c486 | 10373 | + */ |
2380c486 | 10374 | + |
7e46296a AM |
10375 | +#include <linux/suspend.h> |
10376 | +#include <linux/module.h> | |
10377 | +#include <linux/moduleparam.h> | |
10378 | +#include <linux/if.h> | |
10379 | +#include <linux/rtnetlink.h> | |
10380 | +#include <linux/ip.h> | |
10381 | +#include <linux/udp.h> | |
10382 | +#include <linux/in.h> | |
10383 | +#include <linux/if_arp.h> | |
10384 | +#include <linux/kthread.h> | |
10385 | +#include <linux/wait.h> | |
10386 | +#include <linux/netdevice.h> | |
10387 | +#include <net/ip.h> | |
2380c486 | 10388 | + |
7e46296a AM |
10389 | +#include "tuxonice.h" |
10390 | +#include "tuxonice_modules.h" | |
10391 | +#include "tuxonice_sysfs.h" | |
10392 | +#include "tuxonice_alloc.h" | |
10393 | +#include "tuxonice_io.h" | |
2380c486 | 10394 | + |
7e46296a AM |
10395 | +#if 1 |
10396 | +#define PRINTK(a, b...) do { printk(a, ##b); } while (0) | |
10397 | +#else | |
10398 | +#define PRINTK(a, b...) do { } while (0) | |
10399 | +#endif | |
2380c486 | 10400 | + |
7e46296a AM |
10401 | +static int loopback_mode; |
10402 | +static int num_local_nodes = 1; | |
10403 | +#define MAX_LOCAL_NODES 8 | |
10404 | +#define SADDR (loopback_mode ? b->sid : h->saddr) | |
2380c486 | 10405 | + |
7e46296a | 10406 | +#define MYNAME "TuxOnIce Clustering" |
2380c486 | 10407 | + |
7e46296a AM |
10408 | +enum cluster_message { |
10409 | + MSG_ACK = 1, | |
10410 | + MSG_NACK = 2, | |
10411 | + MSG_PING = 4, | |
10412 | + MSG_ABORT = 8, | |
10413 | + MSG_BYE = 16, | |
10414 | + MSG_HIBERNATE = 32, | |
10415 | + MSG_IMAGE = 64, | |
10416 | + MSG_IO = 128, | |
10417 | + MSG_RUNNING = 256 | |
10418 | +}; | |
2380c486 | 10419 | + |
7e46296a AM |
10420 | +static char *str_message(int message) |
10421 | +{ | |
10422 | + switch (message) { | |
10423 | + case 4: | |
10424 | + return "Ping"; | |
10425 | + case 8: | |
10426 | + return "Abort"; | |
10427 | + case 9: | |
10428 | + return "Abort acked"; | |
10429 | + case 10: | |
10430 | + return "Abort nacked"; | |
10431 | + case 16: | |
10432 | + return "Bye"; | |
10433 | + case 17: | |
10434 | + return "Bye acked"; | |
10435 | + case 18: | |
10436 | + return "Bye nacked"; | |
10437 | + case 32: | |
10438 | + return "Hibernate request"; | |
10439 | + case 33: | |
10440 | + return "Hibernate ack"; | |
10441 | + case 34: | |
10442 | + return "Hibernate nack"; | |
10443 | + case 64: | |
10444 | + return "Image exists?"; | |
10445 | + case 65: | |
10446 | + return "Image does exist"; | |
10447 | + case 66: | |
10448 | + return "No image here"; | |
10449 | + case 128: | |
10450 | + return "I/O"; | |
10451 | + case 129: | |
10452 | + return "I/O okay"; | |
10453 | + case 130: | |
10454 | + return "I/O failed"; | |
10455 | + case 256: | |
10456 | + return "Running"; | |
10457 | + default: | |
10458 | + printk(KERN_ERR "Unrecognised message %d.\n", message); | |
10459 | + return "Unrecognised message (see dmesg)"; | |
10460 | + } | |
10461 | +} | |
2380c486 | 10462 | + |
7e46296a AM |
10463 | +#define MSG_ACK_MASK (MSG_ACK | MSG_NACK) |
10464 | +#define MSG_STATE_MASK (~MSG_ACK_MASK) | |
2380c486 | 10465 | + |
7e46296a AM |
10466 | +struct node_info { |
10467 | + struct list_head member_list; | |
10468 | + wait_queue_head_t member_events; | |
10469 | + spinlock_t member_list_lock; | |
10470 | + spinlock_t receive_lock; | |
10471 | + int peer_count, ignored_peer_count; | |
10472 | + struct toi_sysfs_data sysfs_data; | |
10473 | + enum cluster_message current_message; | |
10474 | +}; | |
2380c486 | 10475 | + |
7e46296a | 10476 | +struct node_info node_array[MAX_LOCAL_NODES]; |
2380c486 | 10477 | + |
7e46296a AM |
10478 | +struct cluster_member { |
10479 | + __be32 addr; | |
10480 | + enum cluster_message message; | |
10481 | + struct list_head list; | |
10482 | + int ignore; | |
10483 | +}; | |
2380c486 | 10484 | + |
7e46296a AM |
10485 | +#define toi_cluster_port_send 3501 |
10486 | +#define toi_cluster_port_recv 3502 | |
2380c486 | 10487 | + |
7e46296a AM |
10488 | +static struct net_device *net_dev; |
10489 | +static struct toi_module_ops toi_cluster_ops; | |
2380c486 | 10490 | + |
7e46296a AM |
10491 | +static int toi_recv(struct sk_buff *skb, struct net_device *dev, |
10492 | + struct packet_type *pt, struct net_device *orig_dev); | |
2380c486 | 10493 | + |
7e46296a AM |
10494 | +static struct packet_type toi_cluster_packet_type = { |
10495 | + .type = __constant_htons(ETH_P_IP), | |
10496 | + .func = toi_recv, | |
10497 | +}; | |
2380c486 | 10498 | + |
7e46296a AM |
10499 | +struct toi_pkt { /* BOOTP packet format */ |
10500 | + struct iphdr iph; /* IP header */ | |
10501 | + struct udphdr udph; /* UDP header */ | |
10502 | + u8 htype; /* HW address type */ | |
10503 | + u8 hlen; /* HW address length */ | |
10504 | + __be32 xid; /* Transaction ID */ | |
10505 | + __be16 secs; /* Seconds since we started */ | |
10506 | + __be16 flags; /* Just what it says */ | |
10507 | + u8 hw_addr[16]; /* Sender's HW address */ | |
10508 | + u16 message; /* Message */ | |
10509 | + unsigned long sid; /* Source ID for loopback testing */ | |
10510 | +}; | |
2380c486 | 10511 | + |
7e46296a | 10512 | +static char toi_cluster_iface[IFNAMSIZ] = CONFIG_TOI_DEFAULT_CLUSTER_INTERFACE; |
2380c486 | 10513 | + |
7e46296a | 10514 | +static int added_pack; |
2380c486 | 10515 | + |
7e46296a | 10516 | +static int others_have_image; |
2380c486 | 10517 | + |
7e46296a AM |
10518 | +/* Key used to allow multiple clusters on the same lan */ |
10519 | +static char toi_cluster_key[32] = CONFIG_TOI_DEFAULT_CLUSTER_KEY; | |
10520 | +static char pre_hibernate_script[255] = | |
10521 | + CONFIG_TOI_DEFAULT_CLUSTER_PRE_HIBERNATE; | |
10522 | +static char post_hibernate_script[255] = | |
10523 | + CONFIG_TOI_DEFAULT_CLUSTER_POST_HIBERNATE; | |
2380c486 | 10524 | + |
7e46296a AM |
10525 | +/* List of cluster members */ |
10526 | +static unsigned long continue_delay = 5 * HZ; | |
10527 | +static unsigned long cluster_message_timeout = 3 * HZ; | |
2380c486 | 10528 | + |
7e46296a | 10529 | +/* === Membership list === */ |
2380c486 | 10530 | + |
7e46296a AM |
10531 | +static void print_member_info(int index) |
10532 | +{ | |
10533 | + struct cluster_member *this; | |
2380c486 | 10534 | + |
7e46296a | 10535 | + printk(KERN_INFO "==> Dumping node %d.\n", index); |
2380c486 | 10536 | + |
7e46296a AM |
10537 | + list_for_each_entry(this, &node_array[index].member_list, list) |
10538 | + printk(KERN_INFO "%d.%d.%d.%d last message %s. %s\n", | |
10539 | + NIPQUAD(this->addr), | |
10540 | + str_message(this->message), | |
10541 | + this->ignore ? "(Ignored)" : ""); | |
10542 | + printk(KERN_INFO "== Done ==\n"); | |
2380c486 JR |
10543 | +} |
10544 | + | |
7e46296a | 10545 | +static struct cluster_member *__find_member(int index, __be32 addr) |
2380c486 | 10546 | +{ |
7e46296a AM |
10547 | + struct cluster_member *this; |
10548 | + | |
10549 | + list_for_each_entry(this, &node_array[index].member_list, list) { | |
10550 | + if (this->addr != addr) | |
10551 | + continue; | |
10552 | + | |
10553 | + return this; | |
2380c486 | 10554 | + } |
7e46296a AM |
10555 | + |
10556 | + return NULL; | |
2380c486 JR |
10557 | +} |
10558 | + | |
7e46296a | 10559 | +static void set_ignore(int index, __be32 addr, struct cluster_member *this) |
2380c486 | 10560 | +{ |
7e46296a AM |
10561 | + if (this->ignore) { |
10562 | + PRINTK("Node %d already ignoring %d.%d.%d.%d.\n", | |
10563 | + index, NIPQUAD(addr)); | |
10564 | + return; | |
2380c486 | 10565 | + } |
2380c486 | 10566 | + |
7e46296a AM |
10567 | + PRINTK("Node %d sees node %d.%d.%d.%d now being ignored.\n", |
10568 | + index, NIPQUAD(addr)); | |
10569 | + this->ignore = 1; | |
10570 | + node_array[index].ignored_peer_count++; | |
2380c486 JR |
10571 | +} |
10572 | + | |
7e46296a AM |
10573 | +static int __add_update_member(int index, __be32 addr, int message) |
10574 | +{ | |
10575 | + struct cluster_member *this; | |
2380c486 | 10576 | + |
7e46296a AM |
10577 | + this = __find_member(index, addr); |
10578 | + if (this) { | |
10579 | + if (this->message != message) { | |
10580 | + this->message = message; | |
10581 | + if ((message & MSG_NACK) && | |
10582 | + (message & (MSG_HIBERNATE | MSG_IMAGE | MSG_IO))) | |
10583 | + set_ignore(index, addr, this); | |
10584 | + PRINTK("Node %d sees node %d.%d.%d.%d now sending " | |
10585 | + "%s.\n", index, NIPQUAD(addr), | |
10586 | + str_message(message)); | |
10587 | + wake_up(&node_array[index].member_events); | |
10588 | + } | |
10589 | + return 0; | |
10590 | + } | |
2380c486 | 10591 | + |
7e46296a AM |
10592 | + this = (struct cluster_member *) toi_kzalloc(36, |
10593 | + sizeof(struct cluster_member), GFP_KERNEL); | |
2380c486 | 10594 | + |
7e46296a AM |
10595 | + if (!this) |
10596 | + return -1; | |
2380c486 | 10597 | + |
7e46296a AM |
10598 | + this->addr = addr; |
10599 | + this->message = message; | |
10600 | + this->ignore = 0; | |
10601 | + INIT_LIST_HEAD(&this->list); | |
2380c486 | 10602 | + |
7e46296a | 10603 | + node_array[index].peer_count++; |
2380c486 | 10604 | + |
7e46296a AM |
10605 | + PRINTK("Node %d sees node %d.%d.%d.%d sending %s.\n", index, |
10606 | + NIPQUAD(addr), str_message(message)); | |
2380c486 | 10607 | + |
7e46296a AM |
10608 | + if ((message & MSG_NACK) && |
10609 | + (message & (MSG_HIBERNATE | MSG_IMAGE | MSG_IO))) | |
10610 | + set_ignore(index, addr, this); | |
10611 | + list_add_tail(&this->list, &node_array[index].member_list); | |
10612 | + return 1; | |
10613 | +} | |
2380c486 | 10614 | + |
7e46296a AM |
10615 | +static int add_update_member(int index, __be32 addr, int message) |
10616 | +{ | |
10617 | + int result; | |
10618 | + unsigned long flags; | |
10619 | + spin_lock_irqsave(&node_array[index].member_list_lock, flags); | |
10620 | + result = __add_update_member(index, addr, message); | |
10621 | + spin_unlock_irqrestore(&node_array[index].member_list_lock, flags); | |
2380c486 | 10622 | + |
7e46296a | 10623 | + print_member_info(index); |
2380c486 | 10624 | + |
7e46296a | 10625 | + wake_up(&node_array[index].member_events); |
2380c486 | 10626 | + |
7e46296a | 10627 | + return result; |
2380c486 JR |
10628 | +} |
10629 | + | |
7e46296a | 10630 | +static void del_member(int index, __be32 addr) |
2380c486 | 10631 | +{ |
7e46296a AM |
10632 | + struct cluster_member *this; |
10633 | + unsigned long flags; | |
2380c486 | 10634 | + |
7e46296a AM |
10635 | + spin_lock_irqsave(&node_array[index].member_list_lock, flags); |
10636 | + this = __find_member(index, addr); | |
2380c486 | 10637 | + |
7e46296a AM |
10638 | + if (this) { |
10639 | + list_del_init(&this->list); | |
10640 | + toi_kfree(36, this, sizeof(*this)); | |
10641 | + node_array[index].peer_count--; | |
10642 | + } | |
2380c486 | 10643 | + |
7e46296a | 10644 | + spin_unlock_irqrestore(&node_array[index].member_list_lock, flags); |
2380c486 JR |
10645 | +} |
10646 | + | |
7e46296a | 10647 | +/* === Message transmission === */ |
2380c486 | 10648 | + |
7e46296a | 10649 | +static void toi_send_if(int message, unsigned long my_id); |
2380c486 | 10650 | + |
2380c486 | 10651 | +/* |
7e46296a | 10652 | + * Process received TOI packet. |
2380c486 | 10653 | + */ |
7e46296a AM |
10654 | +static int toi_recv(struct sk_buff *skb, struct net_device *dev, |
10655 | + struct packet_type *pt, struct net_device *orig_dev) | |
10656 | +{ | |
10657 | + struct toi_pkt *b; | |
10658 | + struct iphdr *h; | |
10659 | + int len, result, index; | |
10660 | + unsigned long addr, message, ack; | |
2380c486 | 10661 | + |
7e46296a AM |
10662 | + /* Perform verifications before taking the lock. */ |
10663 | + if (skb->pkt_type == PACKET_OTHERHOST) | |
10664 | + goto drop; | |
2380c486 | 10665 | + |
7e46296a AM |
10666 | + if (dev != net_dev) |
10667 | + goto drop; | |
2380c486 | 10668 | + |
7e46296a AM |
10669 | + skb = skb_share_check(skb, GFP_ATOMIC); |
10670 | + if (!skb) | |
10671 | + return NET_RX_DROP; | |
2380c486 | 10672 | + |
7e46296a AM |
10673 | + if (!pskb_may_pull(skb, |
10674 | + sizeof(struct iphdr) + | |
10675 | + sizeof(struct udphdr))) | |
10676 | + goto drop; | |
2380c486 | 10677 | + |
7e46296a AM |
10678 | + b = (struct toi_pkt *)skb_network_header(skb); |
10679 | + h = &b->iph; | |
2380c486 | 10680 | + |
7e46296a AM |
10681 | + if (h->ihl != 5 || h->version != 4 || h->protocol != IPPROTO_UDP) |
10682 | + goto drop; | |
2380c486 | 10683 | + |
7e46296a AM |
10684 | + /* Fragments are not supported */ |
10685 | + if (h->frag_off & htons(IP_OFFSET | IP_MF)) { | |
10686 | + if (net_ratelimit()) | |
10687 | + printk(KERN_ERR "TuxOnIce: Ignoring fragmented " | |
10688 | + "cluster message.\n"); | |
10689 | + goto drop; | |
10690 | + } | |
2380c486 | 10691 | + |
7e46296a AM |
10692 | + if (skb->len < ntohs(h->tot_len)) |
10693 | + goto drop; | |
2380c486 | 10694 | + |
7e46296a AM |
10695 | + if (ip_fast_csum((char *) h, h->ihl)) |
10696 | + goto drop; | |
2380c486 | 10697 | + |
7e46296a AM |
10698 | + if (b->udph.source != htons(toi_cluster_port_send) || |
10699 | + b->udph.dest != htons(toi_cluster_port_recv)) | |
10700 | + goto drop; | |
2380c486 | 10701 | + |
7e46296a AM |
10702 | + if (ntohs(h->tot_len) < ntohs(b->udph.len) + sizeof(struct iphdr)) |
10703 | + goto drop; | |
2380c486 | 10704 | + |
7e46296a | 10705 | + len = ntohs(b->udph.len) - sizeof(struct udphdr); |
2380c486 | 10706 | + |
7e46296a AM |
10707 | + /* Ok the front looks good, make sure we can get at the rest. */ |
10708 | + if (!pskb_may_pull(skb, skb->len)) | |
10709 | + goto drop; | |
0ada99ac | 10710 | + |
7e46296a AM |
10711 | + b = (struct toi_pkt *)skb_network_header(skb); |
10712 | + h = &b->iph; | |
0ada99ac | 10713 | + |
7e46296a AM |
10714 | + addr = SADDR; |
10715 | + PRINTK(">>> Message %s received from " NIPQUAD_FMT ".\n", | |
10716 | + str_message(b->message), NIPQUAD(addr)); | |
2380c486 | 10717 | + |
7e46296a AM |
10718 | + message = b->message & MSG_STATE_MASK; |
10719 | + ack = b->message & MSG_ACK_MASK; | |
2380c486 | 10720 | + |
7e46296a AM |
10721 | + for (index = 0; index < num_local_nodes; index++) { |
10722 | + int new_message = node_array[index].current_message, | |
10723 | + old_message = new_message; | |
2380c486 | 10724 | + |
7e46296a AM |
10725 | + if (index == SADDR || !old_message) { |
10726 | + PRINTK("Ignoring node %d (offline or self).\n", index); | |
10727 | + continue; | |
2380c486 JR |
10728 | + } |
10729 | + | |
7e46296a AM |
10730 | + /* One message at a time, please. */ |
10731 | + spin_lock(&node_array[index].receive_lock); | |
2380c486 | 10732 | + |
7e46296a AM |
10733 | + result = add_update_member(index, SADDR, b->message); |
10734 | + if (result == -1) { | |
10735 | + printk(KERN_INFO "Failed to add new cluster member " | |
10736 | + NIPQUAD_FMT ".\n", | |
10737 | + NIPQUAD(addr)); | |
10738 | + goto drop_unlock; | |
2380c486 | 10739 | + } |
0ada99ac | 10740 | + |
7e46296a AM |
10741 | + switch (b->message & MSG_STATE_MASK) { |
10742 | + case MSG_PING: | |
10743 | + break; | |
10744 | + case MSG_ABORT: | |
10745 | + break; | |
10746 | + case MSG_BYE: | |
10747 | + break; | |
10748 | + case MSG_HIBERNATE: | |
10749 | + /* Can I hibernate? */ | |
10750 | + new_message = MSG_HIBERNATE | | |
10751 | + ((index & 1) ? MSG_NACK : MSG_ACK); | |
10752 | + break; | |
10753 | + case MSG_IMAGE: | |
10754 | + /* Can I resume? */ | |
10755 | + new_message = MSG_IMAGE | | |
10756 | + ((index & 1) ? MSG_NACK : MSG_ACK); | |
10757 | + if (new_message != old_message) | |
10758 | + printk(KERN_ERR "Setting whether I can resume " | |
10759 | + "to %d.\n", new_message); | |
10760 | + break; | |
10761 | + case MSG_IO: | |
10762 | + new_message = MSG_IO | MSG_ACK; | |
10763 | + break; | |
10764 | + case MSG_RUNNING: | |
10765 | + break; | |
10766 | + default: | |
10767 | + if (net_ratelimit()) | |
10768 | + printk(KERN_ERR "Unrecognised TuxOnIce cluster" | |
10769 | + " message %d from " NIPQUAD_FMT ".\n", | |
10770 | + b->message, NIPQUAD(addr)); | |
10771 | + }; | |
0ada99ac | 10772 | + |
7e46296a AM |
10773 | + if (old_message != new_message) { |
10774 | + node_array[index].current_message = new_message; | |
10775 | + printk(KERN_INFO ">>> Sending new message for node " | |
10776 | + "%d.\n", index); | |
10777 | + toi_send_if(new_message, index); | |
10778 | + } else if (!ack) { | |
10779 | + printk(KERN_INFO ">>> Resending message for node %d.\n", | |
10780 | + index); | |
10781 | + toi_send_if(new_message, index); | |
0ada99ac | 10782 | + } |
7e46296a AM |
10783 | +drop_unlock: |
10784 | + spin_unlock(&node_array[index].receive_lock); | |
10785 | + }; | |
0ada99ac | 10786 | + |
7e46296a AM |
10787 | +drop: |
10788 | + /* Throw the packet out. */ | |
10789 | + kfree_skb(skb); | |
2380c486 JR |
10790 | + |
10791 | + return 0; | |
10792 | +} | |
10793 | + | |
10794 | +/* | |
7e46296a | 10795 | + * Send cluster message to single interface. |
2380c486 | 10796 | + */ |
7e46296a AM |
10797 | +static void toi_send_if(int message, unsigned long my_id) |
10798 | +{ | |
10799 | + struct sk_buff *skb; | |
10800 | + struct toi_pkt *b; | |
10801 | + int hh_len = LL_RESERVED_SPACE(net_dev); | |
10802 | + struct iphdr *h; | |
2380c486 | 10803 | + |
7e46296a AM |
10804 | + /* Allocate packet */ |
10805 | + skb = alloc_skb(sizeof(struct toi_pkt) + hh_len + 15, GFP_KERNEL); | |
10806 | + if (!skb) | |
10807 | + return; | |
10808 | + skb_reserve(skb, hh_len); | |
10809 | + b = (struct toi_pkt *) skb_put(skb, sizeof(struct toi_pkt)); | |
10810 | + memset(b, 0, sizeof(struct toi_pkt)); | |
10811 | + | |
10812 | + /* Construct IP header */ | |
10813 | + skb_reset_network_header(skb); | |
10814 | + h = ip_hdr(skb); | |
10815 | + h->version = 4; | |
10816 | + h->ihl = 5; | |
10817 | + h->tot_len = htons(sizeof(struct toi_pkt)); | |
10818 | + h->frag_off = htons(IP_DF); | |
10819 | + h->ttl = 64; | |
10820 | + h->protocol = IPPROTO_UDP; | |
10821 | + h->daddr = htonl(INADDR_BROADCAST); | |
10822 | + h->check = ip_fast_csum((unsigned char *) h, h->ihl); | |
10823 | + | |
10824 | + /* Construct UDP header */ | |
10825 | + b->udph.source = htons(toi_cluster_port_send); | |
10826 | + b->udph.dest = htons(toi_cluster_port_recv); | |
10827 | + b->udph.len = htons(sizeof(struct toi_pkt) - sizeof(struct iphdr)); | |
10828 | + /* UDP checksum not calculated -- explicitly allowed in BOOTP RFC */ | |
10829 | + | |
10830 | + /* Construct message */ | |
10831 | + b->message = message; | |
10832 | + b->sid = my_id; | |
10833 | + b->htype = net_dev->type; /* can cause undefined behavior */ | |
10834 | + b->hlen = net_dev->addr_len; | |
10835 | + memcpy(b->hw_addr, net_dev->dev_addr, net_dev->addr_len); | |
10836 | + b->secs = htons(3); /* 3 seconds */ | |
10837 | + | |
10838 | + /* Chain packet down the line... */ | |
10839 | + skb->dev = net_dev; | |
10840 | + skb->protocol = htons(ETH_P_IP); | |
10841 | + if ((dev_hard_header(skb, net_dev, ntohs(skb->protocol), | |
10842 | + net_dev->broadcast, net_dev->dev_addr, skb->len) < 0) || | |
10843 | + dev_queue_xmit(skb) < 0) | |
10844 | + printk(KERN_INFO "E"); | |
10845 | +} | |
10846 | + | |
10847 | +/* ========================================= */ | |
10848 | + | |
10849 | +/* kTOICluster */ | |
10850 | + | |
10851 | +static atomic_t num_cluster_threads; | |
10852 | +static DECLARE_WAIT_QUEUE_HEAD(clusterd_events); | |
10853 | + | |
10854 | +static int kTOICluster(void *data) | |
2380c486 | 10855 | +{ |
7e46296a AM |
10856 | + unsigned long my_id; |
10857 | + | |
10858 | + my_id = atomic_add_return(1, &num_cluster_threads) - 1; | |
10859 | + node_array[my_id].current_message = (unsigned long) data; | |
2380c486 | 10860 | + |
7e46296a | 10861 | + PRINTK("kTOICluster daemon %lu starting.\n", my_id); |
2380c486 | 10862 | + |
7e46296a | 10863 | + current->flags |= PF_NOFREEZE; |
2380c486 | 10864 | + |
7e46296a AM |
10865 | + while (node_array[my_id].current_message) { |
10866 | + toi_send_if(node_array[my_id].current_message, my_id); | |
10867 | + sleep_on_timeout(&clusterd_events, | |
10868 | + cluster_message_timeout); | |
10869 | + PRINTK("Link state %lu is %d.\n", my_id, | |
10870 | + node_array[my_id].current_message); | |
10871 | + } | |
2380c486 | 10872 | + |
7e46296a AM |
10873 | + toi_send_if(MSG_BYE, my_id); |
10874 | + atomic_dec(&num_cluster_threads); | |
10875 | + wake_up(&clusterd_events); | |
2380c486 | 10876 | + |
7e46296a AM |
10877 | + PRINTK("kTOICluster daemon %lu exiting.\n", my_id); |
10878 | + __set_current_state(TASK_RUNNING); | |
2380c486 JR |
10879 | + return 0; |
10880 | +} | |
10881 | + | |
7e46296a | 10882 | +static void kill_clusterd(void) |
2380c486 | 10883 | +{ |
7e46296a AM |
10884 | + int i; |
10885 | + | |
10886 | + for (i = 0; i < num_local_nodes; i++) { | |
10887 | + if (node_array[i].current_message) { | |
10888 | + PRINTK("Seeking to kill clusterd %d.\n", i); | |
10889 | + node_array[i].current_message = 0; | |
2380c486 JR |
10890 | + } |
10891 | + } | |
7e46296a AM |
10892 | + wait_event(clusterd_events, |
10893 | + !atomic_read(&num_cluster_threads)); | |
10894 | + PRINTK("All cluster daemons have exited.\n"); | |
10895 | +} | |
2380c486 | 10896 | + |
7e46296a AM |
10897 | +static int peers_not_in_message(int index, int message, int precise) |
10898 | +{ | |
10899 | + struct cluster_member *this; | |
10900 | + unsigned long flags; | |
10901 | + int result = 0; | |
10902 | + | |
10903 | + spin_lock_irqsave(&node_array[index].member_list_lock, flags); | |
10904 | + list_for_each_entry(this, &node_array[index].member_list, list) { | |
10905 | + if (this->ignore) | |
10906 | + continue; | |
10907 | + | |
10908 | + PRINTK("Peer %d.%d.%d.%d sending %s. " | |
10909 | + "Seeking %s.\n", | |
10910 | + NIPQUAD(this->addr), | |
10911 | + str_message(this->message), str_message(message)); | |
10912 | + if ((precise ? this->message : | |
10913 | + this->message & MSG_STATE_MASK) != | |
10914 | + message) | |
10915 | + result++; | |
10916 | + } | |
10917 | + spin_unlock_irqrestore(&node_array[index].member_list_lock, flags); | |
10918 | + PRINTK("%d peers in sought message.\n", result); | |
10919 | + return result; | |
2380c486 JR |
10920 | +} |
10921 | + | |
7e46296a | 10922 | +static void reset_ignored(int index) |
2380c486 | 10923 | +{ |
7e46296a AM |
10924 | + struct cluster_member *this; |
10925 | + unsigned long flags; | |
2380c486 | 10926 | + |
7e46296a AM |
10927 | + spin_lock_irqsave(&node_array[index].member_list_lock, flags); |
10928 | + list_for_each_entry(this, &node_array[index].member_list, list) | |
10929 | + this->ignore = 0; | |
10930 | + node_array[index].ignored_peer_count = 0; | |
10931 | + spin_unlock_irqrestore(&node_array[index].member_list_lock, flags); | |
10932 | +} | |
2380c486 | 10933 | + |
7e46296a AM |
10934 | +static int peers_in_message(int index, int message, int precise) |
10935 | +{ | |
10936 | + return node_array[index].peer_count - | |
10937 | + node_array[index].ignored_peer_count - | |
10938 | + peers_not_in_message(index, message, precise); | |
10939 | +} | |
2380c486 | 10940 | + |
7e46296a AM |
10941 | +static int time_to_continue(int index, unsigned long start, int message) |
10942 | +{ | |
10943 | + int first = peers_not_in_message(index, message, 0); | |
10944 | + int second = peers_in_message(index, message, 1); | |
2380c486 | 10945 | + |
7e46296a | 10946 | + PRINTK("First part returns %d, second returns %d.\n", first, second); |
2380c486 | 10947 | + |
7e46296a AM |
10948 | + if (!first && !second) { |
10949 | + PRINTK("All peers answered message %d.\n", | |
10950 | + message); | |
10951 | + return 1; | |
10952 | + } | |
2380c486 | 10953 | + |
7e46296a AM |
10954 | + if (time_after(jiffies, start + continue_delay)) { |
10955 | + PRINTK("Timeout reached.\n"); | |
10956 | + return 1; | |
10957 | + } | |
2380c486 | 10958 | + |
7e46296a AM |
10959 | + PRINTK("Not time to continue yet (%lu < %lu).\n", jiffies, |
10960 | + start + continue_delay); | |
10961 | + return 0; | |
2380c486 JR |
10962 | +} |
10963 | + | |
7e46296a | 10964 | +void toi_initiate_cluster_hibernate(void) |
2380c486 | 10965 | +{ |
7e46296a AM |
10966 | + int result; |
10967 | + unsigned long start; | |
2380c486 | 10968 | + |
7e46296a AM |
10969 | + result = do_toi_step(STEP_HIBERNATE_PREPARE_IMAGE); |
10970 | + if (result) | |
10971 | + return; | |
2380c486 | 10972 | + |
7e46296a | 10973 | + toi_send_if(MSG_HIBERNATE, 0); |
2380c486 | 10974 | + |
7e46296a AM |
10975 | + start = jiffies; |
10976 | + wait_event(node_array[0].member_events, | |
10977 | + time_to_continue(0, start, MSG_HIBERNATE)); | |
2380c486 | 10978 | + |
7e46296a AM |
10979 | + if (test_action_state(TOI_FREEZER_TEST)) { |
10980 | + toi_send_if(MSG_ABORT, 0); | |
2380c486 | 10981 | + |
7e46296a AM |
10982 | + start = jiffies; |
10983 | + wait_event(node_array[0].member_events, | |
10984 | + time_to_continue(0, start, MSG_RUNNING)); | |
10985 | + | |
10986 | + do_toi_step(STEP_QUIET_CLEANUP); | |
10987 | + return; | |
2380c486 | 10988 | + } |
7e46296a AM |
10989 | + |
10990 | + toi_send_if(MSG_IO, 0); | |
10991 | + | |
10992 | + result = do_toi_step(STEP_HIBERNATE_SAVE_IMAGE); | |
10993 | + if (result) | |
10994 | + return; | |
10995 | + | |
10996 | + /* This code runs at resume time too! */ | |
10997 | + if (toi_in_hibernate) | |
10998 | + result = do_toi_step(STEP_HIBERNATE_POWERDOWN); | |
2380c486 | 10999 | +} |
7e46296a | 11000 | +EXPORT_SYMBOL_GPL(toi_initiate_cluster_hibernate); |
2380c486 | 11001 | + |
7e46296a | 11002 | +/* toi_cluster_print_debug_stats |
2380c486 | 11003 | + * |
7e46296a AM |
11004 | + * Description: Print information to be recorded for debugging purposes into a |
11005 | + * buffer. | |
11006 | + * Arguments: buffer: Pointer to a buffer into which the debug info will be | |
11007 | + * printed. | |
11008 | + * size: Size of the buffer. | |
11009 | + * Returns: Number of characters written to the buffer. | |
2380c486 | 11010 | + */ |
7e46296a | 11011 | +static int toi_cluster_print_debug_stats(char *buffer, int size) |
2380c486 | 11012 | +{ |
2380c486 JR |
11013 | + int len; |
11014 | + | |
7e46296a AM |
11015 | + if (strlen(toi_cluster_iface)) |
11016 | + len = scnprintf(buffer, size, | |
11017 | + "- Cluster interface is '%s'.\n", | |
11018 | + toi_cluster_iface); | |
2380c486 | 11019 | + else |
7e46296a AM |
11020 | + len = scnprintf(buffer, size, |
11021 | + "- Cluster support is disabled.\n"); | |
2380c486 JR |
11022 | + return len; |
11023 | +} | |
11024 | + | |
7e46296a AM |
11025 | +/* cluster_memory_needed |
11026 | + * | |
11027 | + * Description: Tell the caller how much memory we need to operate during | |
11028 | + * hibernate/resume. | |
11029 | + * Returns: Unsigned long. Maximum number of bytes of memory required for | |
11030 | + * operation. | |
2380c486 | 11031 | + */ |
7e46296a | 11032 | +static int toi_cluster_memory_needed(void) |
2380c486 | 11033 | +{ |
7e46296a | 11034 | + return 0; |
2380c486 JR |
11035 | +} |
11036 | + | |
7e46296a | 11037 | +static int toi_cluster_storage_needed(void) |
2380c486 | 11038 | +{ |
7e46296a | 11039 | + return 1 + strlen(toi_cluster_iface); |
2380c486 JR |
11040 | +} |
11041 | + | |
7e46296a | 11042 | +/* toi_cluster_save_config_info |
2380c486 | 11043 | + * |
7e46296a AM |
11044 | + * Description: Save informaton needed when reloading the image at resume time. |
11045 | + * Arguments: Buffer: Pointer to a buffer of size PAGE_SIZE. | |
11046 | + * Returns: Number of bytes used for saving our data. | |
2380c486 | 11047 | + */ |
7e46296a | 11048 | +static int toi_cluster_save_config_info(char *buffer) |
2380c486 | 11049 | +{ |
7e46296a AM |
11050 | + strcpy(buffer, toi_cluster_iface); |
11051 | + return strlen(toi_cluster_iface + 1); | |
2380c486 JR |
11052 | +} |
11053 | + | |
7e46296a | 11054 | +/* toi_cluster_load_config_info |
2380c486 | 11055 | + * |
7e46296a AM |
11056 | + * Description: Reload information needed for declustering the image at |
11057 | + * resume time. | |
11058 | + * Arguments: Buffer: Pointer to the start of the data. | |
11059 | + * Size: Number of bytes that were saved. | |
2380c486 | 11060 | + */ |
7e46296a | 11061 | +static void toi_cluster_load_config_info(char *buffer, int size) |
2380c486 | 11062 | +{ |
7e46296a | 11063 | + strncpy(toi_cluster_iface, buffer, size); |
2380c486 JR |
11064 | + return; |
11065 | +} | |
11066 | + | |
7e46296a | 11067 | +static void cluster_startup(void) |
2380c486 | 11068 | +{ |
7e46296a AM |
11069 | + int have_image = do_check_can_resume(), i; |
11070 | + unsigned long start = jiffies, initial_message; | |
11071 | + struct task_struct *p; | |
2380c486 | 11072 | + |
7e46296a | 11073 | + initial_message = MSG_IMAGE; |
2380c486 | 11074 | + |
7e46296a | 11075 | + have_image = 1; |
2380c486 | 11076 | + |
7e46296a AM |
11077 | + for (i = 0; i < num_local_nodes; i++) { |
11078 | + PRINTK("Starting ktoiclusterd %d.\n", i); | |
11079 | + p = kthread_create(kTOICluster, (void *) initial_message, | |
11080 | + "ktoiclusterd/%d", i); | |
11081 | + if (IS_ERR(p)) { | |
11082 | + printk(KERN_ERR "Failed to start ktoiclusterd.\n"); | |
11083 | + return; | |
11084 | + } | |
2380c486 | 11085 | + |
7e46296a AM |
11086 | + wake_up_process(p); |
11087 | + } | |
2380c486 | 11088 | + |
7e46296a AM |
11089 | + /* Wait for delay or someone else sending first message */ |
11090 | + wait_event(node_array[0].member_events, time_to_continue(0, start, | |
11091 | + MSG_IMAGE)); | |
2380c486 | 11092 | + |
7e46296a | 11093 | + others_have_image = peers_in_message(0, MSG_IMAGE | MSG_ACK, 1); |
2380c486 | 11094 | + |
7e46296a AM |
11095 | + printk(KERN_INFO "Continuing. I %shave an image. Peers with image:" |
11096 | + " %d.\n", have_image ? "" : "don't ", others_have_image); | |
2380c486 | 11097 | + |
7e46296a AM |
11098 | + if (have_image) { |
11099 | + int result; | |
2380c486 | 11100 | + |
7e46296a AM |
11101 | + /* Start to resume */ |
11102 | + printk(KERN_INFO " === Starting to resume === \n"); | |
11103 | + node_array[0].current_message = MSG_IO; | |
11104 | + toi_send_if(MSG_IO, 0); | |
2380c486 | 11105 | + |
7e46296a AM |
11106 | + /* result = do_toi_step(STEP_RESUME_LOAD_PS1); */ |
11107 | + result = 0; | |
2380c486 | 11108 | + |
7e46296a AM |
11109 | + if (!result) { |
11110 | + /* | |
11111 | + * Atomic restore - we'll come back in the hibernation | |
11112 | + * path. | |
11113 | + */ | |
2380c486 | 11114 | + |
7e46296a AM |
11115 | + /* result = do_toi_step(STEP_RESUME_DO_RESTORE); */ |
11116 | + result = 0; | |
2380c486 | 11117 | + |
7e46296a AM |
11118 | + /* do_toi_step(STEP_QUIET_CLEANUP); */ |
11119 | + } | |
2380c486 | 11120 | + |
7e46296a AM |
11121 | + node_array[0].current_message |= MSG_NACK; |
11122 | + | |
11123 | + /* For debugging - disable for real life? */ | |
11124 | + wait_event(node_array[0].member_events, | |
11125 | + time_to_continue(0, start, MSG_IO)); | |
2380c486 JR |
11126 | + } |
11127 | + | |
7e46296a AM |
11128 | + if (others_have_image) { |
11129 | + /* Wait for them to resume */ | |
11130 | + printk(KERN_INFO "Waiting for other nodes to resume.\n"); | |
11131 | + start = jiffies; | |
11132 | + wait_event(node_array[0].member_events, | |
11133 | + time_to_continue(0, start, MSG_RUNNING)); | |
11134 | + if (peers_not_in_message(0, MSG_RUNNING, 0)) | |
11135 | + printk(KERN_INFO "Timed out while waiting for other " | |
11136 | + "nodes to resume.\n"); | |
11137 | + } | |
2380c486 | 11138 | + |
7e46296a AM |
11139 | + /* Find out whether an image exists here. Send ACK_IMAGE or NACK_IMAGE |
11140 | + * as appropriate. | |
11141 | + * | |
11142 | + * If we don't have an image: | |
11143 | + * - Wait until someone else says they have one, or conditions are met | |
11144 | + * for continuing to boot (n machines or t seconds). | |
11145 | + * - If anyone has an image, wait for them to resume before continuing | |
11146 | + * to boot. | |
11147 | + * | |
11148 | + * If we have an image: | |
11149 | + * - Wait until conditions are met before continuing to resume (n | |
11150 | + * machines or t seconds). Send RESUME_PREP and freeze processes. | |
11151 | + * NACK_PREP if freezing fails (shouldn't) and follow logic for | |
11152 | + * us having no image above. On success, wait for [N]ACK_PREP from | |
11153 | + * other machines. Read image (including atomic restore) until done. | |
11154 | + * Wait for ACK_READ from others (should never fail). Thaw processes | |
11155 | + * and do post-resume. (The section after the atomic restore is done | |
11156 | + * via the code for hibernating). | |
11157 | + */ | |
2380c486 | 11158 | + |
7e46296a AM |
11159 | + node_array[0].current_message = MSG_RUNNING; |
11160 | +} | |
2380c486 | 11161 | + |
7e46296a AM |
11162 | +/* toi_cluster_open_iface |
11163 | + * | |
11164 | + * Description: Prepare to use an interface. | |
11165 | + */ | |
2380c486 | 11166 | + |
7e46296a AM |
11167 | +static int toi_cluster_open_iface(void) |
11168 | +{ | |
11169 | + struct net_device *dev; | |
2380c486 | 11170 | + |
7e46296a | 11171 | + rtnl_lock(); |
2380c486 | 11172 | + |
7e46296a AM |
11173 | + for_each_netdev(&init_net, dev) { |
11174 | + if (/* dev == &init_net.loopback_dev || */ | |
11175 | + strcmp(dev->name, toi_cluster_iface)) | |
11176 | + continue; | |
2380c486 | 11177 | + |
7e46296a AM |
11178 | + net_dev = dev; |
11179 | + break; | |
2380c486 JR |
11180 | + } |
11181 | + | |
7e46296a AM |
11182 | + rtnl_unlock(); |
11183 | + | |
11184 | + if (!net_dev) { | |
11185 | + printk(KERN_ERR MYNAME ": Device %s not found.\n", | |
11186 | + toi_cluster_iface); | |
11187 | + return -ENODEV; | |
2380c486 JR |
11188 | + } |
11189 | + | |
7e46296a AM |
11190 | + dev_add_pack(&toi_cluster_packet_type); |
11191 | + added_pack = 1; | |
2380c486 | 11192 | + |
7e46296a AM |
11193 | + loopback_mode = (net_dev == init_net.loopback_dev); |
11194 | + num_local_nodes = loopback_mode ? 8 : 1; | |
2380c486 | 11195 | + |
7e46296a AM |
11196 | + PRINTK("Loopback mode is %s. Number of local nodes is %d.\n", |
11197 | + loopback_mode ? "on" : "off", num_local_nodes); | |
2380c486 | 11198 | + |
7e46296a | 11199 | + cluster_startup(); |
2380c486 JR |
11200 | + return 0; |
11201 | +} | |
2380c486 | 11202 | + |
7e46296a AM |
11203 | +/* toi_cluster_close_iface |
11204 | + * | |
11205 | + * Description: Stop using an interface. | |
11206 | + */ | |
2380c486 | 11207 | + |
7e46296a AM |
11208 | +static int toi_cluster_close_iface(void) |
11209 | +{ | |
11210 | + kill_clusterd(); | |
11211 | + if (added_pack) { | |
11212 | + dev_remove_pack(&toi_cluster_packet_type); | |
11213 | + added_pack = 0; | |
2380c486 | 11214 | + } |
7e46296a | 11215 | + return 0; |
2380c486 | 11216 | +} |
2380c486 | 11217 | + |
7e46296a | 11218 | +static void write_side_effect(void) |
2380c486 | 11219 | +{ |
7e46296a AM |
11220 | + if (toi_cluster_ops.enabled) { |
11221 | + toi_cluster_open_iface(); | |
11222 | + set_toi_state(TOI_CLUSTER_MODE); | |
11223 | + } else { | |
11224 | + toi_cluster_close_iface(); | |
11225 | + clear_toi_state(TOI_CLUSTER_MODE); | |
2380c486 | 11226 | + } |
2380c486 | 11227 | +} |
2380c486 | 11228 | + |
7e46296a | 11229 | +static void node_write_side_effect(void) |
2380c486 | 11230 | +{ |
7e46296a | 11231 | +} |
2380c486 | 11232 | + |
7e46296a AM |
11233 | +/* |
11234 | + * data for our sysfs entries. | |
11235 | + */ | |
11236 | +static struct toi_sysfs_data sysfs_params[] = { | |
11237 | + SYSFS_STRING("interface", SYSFS_RW, toi_cluster_iface, IFNAMSIZ, 0, | |
11238 | + NULL), | |
11239 | + SYSFS_INT("enabled", SYSFS_RW, &toi_cluster_ops.enabled, 0, 1, 0, | |
11240 | + write_side_effect), | |
11241 | + SYSFS_STRING("cluster_name", SYSFS_RW, toi_cluster_key, 32, 0, NULL), | |
11242 | + SYSFS_STRING("pre-hibernate-script", SYSFS_RW, pre_hibernate_script, | |
11243 | + 256, 0, NULL), | |
11244 | + SYSFS_STRING("post-hibernate-script", SYSFS_RW, post_hibernate_script, | |
11245 | + 256, 0, STRING), | |
11246 | + SYSFS_UL("continue_delay", SYSFS_RW, &continue_delay, HZ / 2, 60 * HZ, | |
11247 | + 0) | |
11248 | +}; | |
2380c486 | 11249 | + |
7e46296a AM |
11250 | +/* |
11251 | + * Ops structure. | |
11252 | + */ | |
2380c486 | 11253 | + |
7e46296a AM |
11254 | +static struct toi_module_ops toi_cluster_ops = { |
11255 | + .type = FILTER_MODULE, | |
11256 | + .name = "Cluster", | |
11257 | + .directory = "cluster", | |
11258 | + .module = THIS_MODULE, | |
11259 | + .memory_needed = toi_cluster_memory_needed, | |
11260 | + .print_debug_info = toi_cluster_print_debug_stats, | |
11261 | + .save_config_info = toi_cluster_save_config_info, | |
11262 | + .load_config_info = toi_cluster_load_config_info, | |
11263 | + .storage_needed = toi_cluster_storage_needed, | |
2380c486 | 11264 | + |
7e46296a AM |
11265 | + .sysfs_data = sysfs_params, |
11266 | + .num_sysfs_entries = sizeof(sysfs_params) / | |
11267 | + sizeof(struct toi_sysfs_data), | |
11268 | +}; | |
2380c486 | 11269 | + |
7e46296a | 11270 | +/* ---- Registration ---- */ |
2380c486 | 11271 | + |
7e46296a AM |
11272 | +#ifdef MODULE |
11273 | +#define INIT static __init | |
11274 | +#define EXIT static __exit | |
11275 | +#else | |
11276 | +#define INIT | |
11277 | +#define EXIT | |
11278 | +#endif | |
2380c486 | 11279 | + |
7e46296a | 11280 | +INIT int toi_cluster_init(void) |
2380c486 | 11281 | +{ |
7e46296a AM |
11282 | + int temp = toi_register_module(&toi_cluster_ops), i; |
11283 | + struct kobject *kobj = toi_cluster_ops.dir_kobj; | |
2380c486 | 11284 | + |
7e46296a AM |
11285 | + for (i = 0; i < MAX_LOCAL_NODES; i++) { |
11286 | + node_array[i].current_message = 0; | |
11287 | + INIT_LIST_HEAD(&node_array[i].member_list); | |
11288 | + init_waitqueue_head(&node_array[i].member_events); | |
11289 | + spin_lock_init(&node_array[i].member_list_lock); | |
11290 | + spin_lock_init(&node_array[i].receive_lock); | |
2380c486 | 11291 | + |
7e46296a AM |
11292 | + /* Set up sysfs entry */ |
11293 | + node_array[i].sysfs_data.attr.name = toi_kzalloc(8, | |
11294 | + sizeof(node_array[i].sysfs_data.attr.name), | |
11295 | + GFP_KERNEL); | |
11296 | + sprintf((char *) node_array[i].sysfs_data.attr.name, "node_%d", | |
11297 | + i); | |
11298 | + node_array[i].sysfs_data.attr.mode = SYSFS_RW; | |
11299 | + node_array[i].sysfs_data.type = TOI_SYSFS_DATA_INTEGER; | |
11300 | + node_array[i].sysfs_data.flags = 0; | |
11301 | + node_array[i].sysfs_data.data.integer.variable = | |
11302 | + (int *) &node_array[i].current_message; | |
11303 | + node_array[i].sysfs_data.data.integer.minimum = 0; | |
11304 | + node_array[i].sysfs_data.data.integer.maximum = INT_MAX; | |
11305 | + node_array[i].sysfs_data.write_side_effect = | |
11306 | + node_write_side_effect; | |
11307 | + toi_register_sysfs_file(kobj, &node_array[i].sysfs_data); | |
11308 | + } | |
2380c486 | 11309 | + |
7e46296a | 11310 | + toi_cluster_ops.enabled = (strlen(toi_cluster_iface) > 0); |
2380c486 | 11311 | + |
7e46296a AM |
11312 | + if (toi_cluster_ops.enabled) |
11313 | + toi_cluster_open_iface(); | |
2380c486 | 11314 | + |
7e46296a | 11315 | + return temp; |
2380c486 | 11316 | +} |
2380c486 | 11317 | + |
7e46296a | 11318 | +EXIT void toi_cluster_exit(void) |
2380c486 | 11319 | +{ |
7e46296a AM |
11320 | + int i; |
11321 | + toi_cluster_close_iface(); | |
2380c486 | 11322 | + |
7e46296a AM |
11323 | + for (i = 0; i < MAX_LOCAL_NODES; i++) |
11324 | + toi_unregister_sysfs_file(toi_cluster_ops.dir_kobj, | |
11325 | + &node_array[i].sysfs_data); | |
11326 | + toi_unregister_module(&toi_cluster_ops); | |
11327 | +} | |
2380c486 | 11328 | + |
7e46296a AM |
11329 | +static int __init toi_cluster_iface_setup(char *iface) |
11330 | +{ | |
11331 | + toi_cluster_ops.enabled = (*iface && | |
11332 | + strcmp(iface, "off")); | |
2380c486 | 11333 | + |
7e46296a AM |
11334 | + if (toi_cluster_ops.enabled) |
11335 | + strncpy(toi_cluster_iface, iface, strlen(iface)); | |
2380c486 | 11336 | +} |
7e46296a AM |
11337 | + |
11338 | +__setup("toi_cluster=", toi_cluster_iface_setup); | |
11339 | + | |
11340 | +#ifdef MODULE | |
11341 | +MODULE_LICENSE("GPL"); | |
11342 | +module_init(toi_cluster_init); | |
11343 | +module_exit(toi_cluster_exit); | |
11344 | +MODULE_AUTHOR("Nigel Cunningham"); | |
11345 | +MODULE_DESCRIPTION("Cluster Support for TuxOnIce"); | |
11346 | +#endif | |
11347 | diff --git a/kernel/power/tuxonice_cluster.h b/kernel/power/tuxonice_cluster.h | |
2380c486 | 11348 | new file mode 100644 |
5dd10c98 | 11349 | index 0000000..051feb3 |
2380c486 | 11350 | --- /dev/null |
7e46296a | 11351 | +++ b/kernel/power/tuxonice_cluster.h |
5dd10c98 | 11352 | @@ -0,0 +1,18 @@ |
2380c486 | 11353 | +/* |
7e46296a | 11354 | + * kernel/power/tuxonice_cluster.h |
2380c486 | 11355 | + * |
5dd10c98 | 11356 | + * Copyright (C) 2006-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
11357 | + * |
11358 | + * This file is released under the GPLv2. | |
2380c486 JR |
11359 | + */ |
11360 | + | |
7e46296a AM |
11361 | +#ifdef CONFIG_TOI_CLUSTER |
11362 | +extern int toi_cluster_init(void); | |
11363 | +extern void toi_cluster_exit(void); | |
11364 | +extern void toi_initiate_cluster_hibernate(void); | |
11365 | +#else | |
11366 | +static inline int toi_cluster_init(void) { return 0; } | |
11367 | +static inline void toi_cluster_exit(void) { } | |
11368 | +static inline void toi_initiate_cluster_hibernate(void) { } | |
2380c486 | 11369 | +#endif |
7e46296a AM |
11370 | + |
11371 | diff --git a/kernel/power/tuxonice_compress.c b/kernel/power/tuxonice_compress.c | |
2380c486 | 11372 | new file mode 100644 |
85eb3c9d | 11373 | index 0000000..2634cc8 |
2380c486 | 11374 | --- /dev/null |
7e46296a | 11375 | +++ b/kernel/power/tuxonice_compress.c |
85eb3c9d | 11376 | @@ -0,0 +1,454 @@ |
2380c486 | 11377 | +/* |
7e46296a | 11378 | + * kernel/power/compression.c |
2380c486 | 11379 | + * |
5dd10c98 | 11380 | + * Copyright (C) 2003-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 | 11381 | + * |
7e46296a | 11382 | + * This file is released under the GPLv2. |
2380c486 | 11383 | + * |
7e46296a AM |
11384 | + * This file contains data compression routines for TuxOnIce, |
11385 | + * using cryptoapi. | |
2380c486 JR |
11386 | + */ |
11387 | + | |
11388 | +#include <linux/suspend.h> | |
7e46296a AM |
11389 | +#include <linux/highmem.h> |
11390 | +#include <linux/vmalloc.h> | |
11391 | +#include <linux/crypto.h> | |
2380c486 | 11392 | + |
7e46296a | 11393 | +#include "tuxonice_builtin.h" |
2380c486 | 11394 | +#include "tuxonice.h" |
2380c486 | 11395 | +#include "tuxonice_modules.h" |
7e46296a | 11396 | +#include "tuxonice_sysfs.h" |
2380c486 | 11397 | +#include "tuxonice_io.h" |
7e46296a | 11398 | +#include "tuxonice_ui.h" |
2380c486 | 11399 | +#include "tuxonice_alloc.h" |
2380c486 | 11400 | + |
7e46296a | 11401 | +static int toi_expected_compression; |
2380c486 | 11402 | + |
7e46296a AM |
11403 | +static struct toi_module_ops toi_compression_ops; |
11404 | +static struct toi_module_ops *next_driver; | |
2380c486 | 11405 | + |
7e46296a | 11406 | +static char toi_compressor_name[32] = "lzo"; |
2380c486 | 11407 | + |
7e46296a | 11408 | +static DEFINE_MUTEX(stats_lock); |
2380c486 | 11409 | + |
7e46296a AM |
11410 | +struct cpu_context { |
11411 | + u8 *page_buffer; | |
11412 | + struct crypto_comp *transform; | |
11413 | + unsigned int len; | |
85eb3c9d AM |
11414 | + u8 *buffer_start; |
11415 | + u8 *output_buffer; | |
7e46296a | 11416 | +}; |
2380c486 | 11417 | + |
7e46296a | 11418 | +static DEFINE_PER_CPU(struct cpu_context, contexts); |
2380c486 | 11419 | + |
7e46296a AM |
11420 | +/* |
11421 | + * toi_crypto_prepare | |
11422 | + * | |
11423 | + * Prepare to do some work by allocating buffers and transforms. | |
11424 | + */ | |
11425 | +static int toi_compress_crypto_prepare(void) | |
2380c486 | 11426 | +{ |
7e46296a | 11427 | + int cpu; |
2380c486 | 11428 | + |
7e46296a AM |
11429 | + if (!*toi_compressor_name) { |
11430 | + printk(KERN_INFO "TuxOnIce: Compression enabled but no " | |
11431 | + "compressor name set.\n"); | |
11432 | + return 1; | |
2380c486 JR |
11433 | + } |
11434 | + | |
7e46296a AM |
11435 | + for_each_online_cpu(cpu) { |
11436 | + struct cpu_context *this = &per_cpu(contexts, cpu); | |
11437 | + this->transform = crypto_alloc_comp(toi_compressor_name, 0, 0); | |
11438 | + if (IS_ERR(this->transform)) { | |
11439 | + printk(KERN_INFO "TuxOnIce: Failed to initialise the " | |
11440 | + "%s compression transform.\n", | |
11441 | + toi_compressor_name); | |
11442 | + this->transform = NULL; | |
11443 | + return 1; | |
11444 | + } | |
2380c486 | 11445 | + |
7e46296a AM |
11446 | + this->page_buffer = |
11447 | + (char *) toi_get_zeroed_page(16, TOI_ATOMIC_GFP); | |
2380c486 | 11448 | + |
7e46296a AM |
11449 | + if (!this->page_buffer) { |
11450 | + printk(KERN_ERR | |
11451 | + "Failed to allocate a page buffer for TuxOnIce " | |
11452 | + "compression driver.\n"); | |
11453 | + return -ENOMEM; | |
2380c486 JR |
11454 | + } |
11455 | + | |
7e46296a AM |
11456 | + this->output_buffer = |
11457 | + (char *) vmalloc_32(2 * PAGE_SIZE); | |
2380c486 | 11458 | + |
7e46296a AM |
11459 | + if (!this->output_buffer) { |
11460 | + printk(KERN_ERR | |
11461 | + "Failed to allocate a output buffer for TuxOnIce " | |
11462 | + "compression driver.\n"); | |
11463 | + return -ENOMEM; | |
2380c486 | 11464 | + } |
5dd10c98 AM |
11465 | + } |
11466 | + | |
11467 | + return 0; | |
11468 | +} | |
11469 | + | |
11470 | +static int toi_compress_rw_cleanup(int writing) | |
11471 | +{ | |
11472 | + int cpu; | |
11473 | + | |
11474 | + for_each_online_cpu(cpu) { | |
11475 | + struct cpu_context *this = &per_cpu(contexts, cpu); | |
11476 | + if (this->transform) { | |
11477 | + crypto_free_comp(this->transform); | |
11478 | + this->transform = NULL; | |
11479 | + } | |
11480 | + | |
11481 | + if (this->page_buffer) | |
11482 | + toi_free_page(16, (unsigned long) this->page_buffer); | |
11483 | + | |
11484 | + this->page_buffer = NULL; | |
11485 | + | |
11486 | + if (this->output_buffer) | |
11487 | + vfree(this->output_buffer); | |
11488 | + | |
11489 | + this->output_buffer = NULL; | |
2380c486 JR |
11490 | + } |
11491 | + | |
7e46296a | 11492 | + return 0; |
2380c486 JR |
11493 | +} |
11494 | + | |
7e46296a AM |
11495 | +/* |
11496 | + * toi_compress_init | |
11497 | + */ | |
11498 | + | |
11499 | +static int toi_compress_init(int toi_or_resume) | |
2380c486 | 11500 | +{ |
7e46296a AM |
11501 | + if (!toi_or_resume) |
11502 | + return 0; | |
2380c486 | 11503 | + |
7e46296a AM |
11504 | + toi_compress_bytes_in = 0; |
11505 | + toi_compress_bytes_out = 0; | |
2380c486 | 11506 | + |
7e46296a | 11507 | + next_driver = toi_get_next_filter(&toi_compression_ops); |
2380c486 | 11508 | + |
5dd10c98 | 11509 | + return next_driver ? 0 : -ECHILD; |
2380c486 JR |
11510 | +} |
11511 | + | |
7e46296a AM |
11512 | +/* |
11513 | + * toi_compress_rw_init() | |
11514 | + */ | |
11515 | + | |
11516 | +static int toi_compress_rw_init(int rw, int stream_number) | |
2380c486 | 11517 | +{ |
5dd10c98 | 11518 | + if (toi_compress_crypto_prepare()) { |
7e46296a AM |
11519 | + printk(KERN_ERR "Failed to initialise compression " |
11520 | + "algorithm.\n"); | |
11521 | + if (rw == READ) { | |
11522 | + printk(KERN_INFO "Unable to read the image.\n"); | |
11523 | + return -ENODEV; | |
11524 | + } else { | |
11525 | + printk(KERN_INFO "Continuing without " | |
11526 | + "compressing the image.\n"); | |
11527 | + toi_compression_ops.enabled = 0; | |
11528 | + } | |
2380c486 | 11529 | + } |
7e46296a | 11530 | + |
5dd10c98 AM |
11531 | + return 0; |
11532 | +} | |
11533 | + | |
7e46296a AM |
11534 | +/* |
11535 | + * toi_compress_write_page() | |
11536 | + * | |
11537 | + * Compress a page of data, buffering output and passing on filled | |
11538 | + * pages to the next module in the pipeline. | |
11539 | + * | |
11540 | + * Buffer_page: Pointer to a buffer of size PAGE_SIZE, containing | |
11541 | + * data to be compressed. | |
11542 | + * | |
11543 | + * Returns: 0 on success. Otherwise the error is that returned by later | |
11544 | + * modules, -ECHILD if we have a broken pipeline or -EIO if | |
11545 | + * zlib errs. | |
11546 | + */ | |
85eb3c9d AM |
11547 | +static int toi_compress_write_page(unsigned long index, int buf_type, |
11548 | + void *buffer_page, unsigned int buf_size) | |
2380c486 | 11549 | +{ |
7e46296a AM |
11550 | + int ret, cpu = smp_processor_id(); |
11551 | + struct cpu_context *ctx = &per_cpu(contexts, cpu); | |
2380c486 | 11552 | + |
7e46296a | 11553 | + if (!ctx->transform) |
85eb3c9d AM |
11554 | + return next_driver->write_page(index, TOI_PAGE, buffer_page, |
11555 | + buf_size); | |
2380c486 | 11556 | + |
85eb3c9d | 11557 | + ctx->buffer_start = TOI_MAP(buf_type, buffer_page); |
2380c486 | 11558 | + |
e876a0dd | 11559 | + ctx->len = PAGE_SIZE; |
92bca44c | 11560 | + |
7e46296a AM |
11561 | + ret = crypto_comp_compress(ctx->transform, |
11562 | + ctx->buffer_start, buf_size, | |
11563 | + ctx->output_buffer, &ctx->len); | |
2380c486 | 11564 | + |
85eb3c9d | 11565 | + TOI_UNMAP(buf_type, buffer_page); |
2380c486 | 11566 | + |
7e46296a AM |
11567 | + mutex_lock(&stats_lock); |
11568 | + toi_compress_bytes_in += buf_size; | |
11569 | + toi_compress_bytes_out += ctx->len; | |
11570 | + mutex_unlock(&stats_lock); | |
2380c486 | 11571 | + |
85eb3c9d AM |
11572 | + toi_message(TOI_COMPRESS, TOI_VERBOSE, 0, |
11573 | + "CPU %d, index %lu: compressed %d bytes into %d.", | |
11574 | + cpu, index, buf_size, ctx->len); | |
5dd10c98 | 11575 | + |
85eb3c9d AM |
11576 | + if (!ret && ctx->len < buf_size) /* some compression */ |
11577 | + return next_driver->write_page(index, TOI_VIRT, | |
11578 | + ctx->output_buffer, ctx->len); | |
11579 | + else | |
11580 | + return next_driver->write_page(index, TOI_PAGE, buffer_page, | |
11581 | + buf_size); | |
7e46296a | 11582 | +} |
2380c486 | 11583 | + |
7e46296a AM |
11584 | +/* |
11585 | + * toi_compress_read_page() | |
11586 | + * @buffer_page: struct page *. Pointer to a buffer of size PAGE_SIZE. | |
11587 | + * | |
11588 | + * Retrieve data from later modules and decompress it until the input buffer | |
11589 | + * is filled. | |
11590 | + * Zero if successful. Error condition from me or from downstream on failure. | |
11591 | + */ | |
85eb3c9d AM |
11592 | +static int toi_compress_read_page(unsigned long *index, int buf_type, |
11593 | + void *buffer_page, unsigned int *buf_size) | |
7e46296a AM |
11594 | +{ |
11595 | + int ret, cpu = smp_processor_id(); | |
11596 | + unsigned int len; | |
11597 | + unsigned int outlen = PAGE_SIZE; | |
11598 | + char *buffer_start; | |
11599 | + struct cpu_context *ctx = &per_cpu(contexts, cpu); | |
2380c486 | 11600 | + |
7e46296a | 11601 | + if (!ctx->transform) |
85eb3c9d AM |
11602 | + return next_driver->read_page(index, TOI_PAGE, buffer_page, |
11603 | + buf_size); | |
2380c486 | 11604 | + |
7e46296a AM |
11605 | + /* |
11606 | + * All our reads must be synchronous - we can't decompress | |
11607 | + * data that hasn't been read yet. | |
11608 | + */ | |
2380c486 | 11609 | + |
85eb3c9d AM |
11610 | + ret = next_driver->read_page(index, TOI_VIRT, ctx->page_buffer, &len); |
11611 | + | |
11612 | + buffer_start = kmap(buffer_page); | |
2380c486 | 11613 | + |
7e46296a | 11614 | + /* Error or uncompressed data */ |
85eb3c9d AM |
11615 | + if (ret || len == PAGE_SIZE) { |
11616 | + memcpy(buffer_start, ctx->page_buffer, len); | |
11617 | + goto out; | |
11618 | + } | |
2380c486 | 11619 | + |
7e46296a AM |
11620 | + ret = crypto_comp_decompress( |
11621 | + ctx->transform, | |
11622 | + ctx->page_buffer, | |
11623 | + len, buffer_start, &outlen); | |
85eb3c9d AM |
11624 | + |
11625 | + toi_message(TOI_COMPRESS, TOI_VERBOSE, 0, | |
11626 | + "CPU %d, index %lu: decompressed %d bytes into %d (result %d).", | |
11627 | + cpu, *index, len, outlen, ret); | |
11628 | + | |
7e46296a AM |
11629 | + if (ret) |
11630 | + abort_hibernate(TOI_FAILED_IO, | |
11631 | + "Compress_read returned %d.\n", ret); | |
11632 | + else if (outlen != PAGE_SIZE) { | |
11633 | + abort_hibernate(TOI_FAILED_IO, | |
11634 | + "Decompression yielded %d bytes instead of %ld.\n", | |
11635 | + outlen, PAGE_SIZE); | |
11636 | + printk(KERN_ERR "Decompression yielded %d bytes instead of " | |
11637 | + "%ld.\n", outlen, PAGE_SIZE); | |
11638 | + ret = -EIO; | |
11639 | + *buf_size = outlen; | |
11640 | + } | |
85eb3c9d AM |
11641 | +out: |
11642 | + TOI_UNMAP(buf_type, buffer_page); | |
7e46296a | 11643 | + return ret; |
2380c486 JR |
11644 | +} |
11645 | + | |
7e46296a AM |
11646 | +/* |
11647 | + * toi_compress_print_debug_stats | |
11648 | + * @buffer: Pointer to a buffer into which the debug info will be printed. | |
11649 | + * @size: Size of the buffer. | |
2380c486 | 11650 | + * |
7e46296a AM |
11651 | + * Print information to be recorded for debugging purposes into a buffer. |
11652 | + * Returns: Number of characters written to the buffer. | |
11653 | + */ | |
2380c486 | 11654 | + |
7e46296a AM |
11655 | +static int toi_compress_print_debug_stats(char *buffer, int size) |
11656 | +{ | |
11657 | + unsigned long pages_in = toi_compress_bytes_in >> PAGE_SHIFT, | |
11658 | + pages_out = toi_compress_bytes_out >> PAGE_SHIFT; | |
11659 | + int len; | |
2380c486 | 11660 | + |
7e46296a AM |
11661 | + /* Output the compression ratio achieved. */ |
11662 | + if (*toi_compressor_name) | |
11663 | + len = scnprintf(buffer, size, "- Compressor is '%s'.\n", | |
11664 | + toi_compressor_name); | |
2380c486 | 11665 | + else |
7e46296a | 11666 | + len = scnprintf(buffer, size, "- Compressor is not set.\n"); |
2380c486 | 11667 | + |
7e46296a AM |
11668 | + if (pages_in) |
11669 | + len += scnprintf(buffer+len, size - len, " Compressed " | |
11670 | + "%lu bytes into %lu (%ld percent compression).\n", | |
11671 | + toi_compress_bytes_in, | |
11672 | + toi_compress_bytes_out, | |
11673 | + (pages_in - pages_out) * 100 / pages_in); | |
11674 | + return len; | |
2380c486 JR |
11675 | +} |
11676 | + | |
7e46296a AM |
11677 | +/* |
11678 | + * toi_compress_compression_memory_needed | |
11679 | + * | |
11680 | + * Tell the caller how much memory we need to operate during hibernate/resume. | |
11681 | + * Returns: Unsigned long. Maximum number of bytes of memory required for | |
11682 | + * operation. | |
11683 | + */ | |
11684 | +static int toi_compress_memory_needed(void) | |
2380c486 | 11685 | +{ |
7e46296a | 11686 | + return 2 * PAGE_SIZE; |
2380c486 JR |
11687 | +} |
11688 | + | |
7e46296a | 11689 | +static int toi_compress_storage_needed(void) |
2380c486 | 11690 | +{ |
85eb3c9d AM |
11691 | + return 2 * sizeof(unsigned long) + 2 * sizeof(int) + |
11692 | + strlen(toi_compressor_name) + 1; | |
2380c486 JR |
11693 | +} |
11694 | + | |
7e46296a AM |
11695 | +/* |
11696 | + * toi_compress_save_config_info | |
11697 | + * @buffer: Pointer to a buffer of size PAGE_SIZE. | |
11698 | + * | |
11699 | + * Save informaton needed when reloading the image at resume time. | |
11700 | + * Returns: Number of bytes used for saving our data. | |
11701 | + */ | |
11702 | +static int toi_compress_save_config_info(char *buffer) | |
2380c486 | 11703 | +{ |
85eb3c9d | 11704 | + int len = strlen(toi_compressor_name) + 1, offset = 0; |
2380c486 | 11705 | + |
7e46296a | 11706 | + *((unsigned long *) buffer) = toi_compress_bytes_in; |
85eb3c9d AM |
11707 | + offset += sizeof(unsigned long); |
11708 | + *((unsigned long *) (buffer + offset)) = toi_compress_bytes_out; | |
11709 | + offset += sizeof(unsigned long); | |
11710 | + *((int *) (buffer + offset)) = toi_expected_compression; | |
11711 | + offset += sizeof(int); | |
11712 | + *((int *) (buffer + offset)) = len; | |
11713 | + offset += sizeof(int); | |
11714 | + strncpy(buffer + offset, toi_compressor_name, len); | |
11715 | + return offset + len; | |
2380c486 JR |
11716 | +} |
11717 | + | |
7e46296a AM |
11718 | +/* toi_compress_load_config_info |
11719 | + * @buffer: Pointer to the start of the data. | |
11720 | + * @size: Number of bytes that were saved. | |
11721 | + * | |
11722 | + * Description: Reload information needed for decompressing the image at | |
11723 | + * resume time. | |
11724 | + */ | |
11725 | +static void toi_compress_load_config_info(char *buffer, int size) | |
2380c486 | 11726 | +{ |
85eb3c9d | 11727 | + int len, offset = 0; |
2380c486 | 11728 | + |
7e46296a | 11729 | + toi_compress_bytes_in = *((unsigned long *) buffer); |
85eb3c9d AM |
11730 | + offset += sizeof(unsigned long); |
11731 | + toi_compress_bytes_out = *((unsigned long *) (buffer + offset)); | |
11732 | + offset += sizeof(unsigned long); | |
11733 | + toi_expected_compression = *((int *) (buffer + offset)); | |
11734 | + offset += sizeof(int); | |
11735 | + len = *((int *) (buffer + offset)); | |
11736 | + offset += sizeof(int); | |
11737 | + strncpy(toi_compressor_name, buffer + offset, len); | |
2380c486 JR |
11738 | +} |
11739 | + | |
5dd10c98 AM |
11740 | +static void toi_compress_pre_atomic_restore(struct toi_boot_kernel_data *bkd) |
11741 | +{ | |
11742 | + bkd->compress_bytes_in = toi_compress_bytes_in; | |
11743 | + bkd->compress_bytes_out = toi_compress_bytes_out; | |
11744 | +} | |
11745 | + | |
11746 | +static void toi_compress_post_atomic_restore(struct toi_boot_kernel_data *bkd) | |
11747 | +{ | |
11748 | + toi_compress_bytes_in = bkd->compress_bytes_in; | |
11749 | + toi_compress_bytes_out = bkd->compress_bytes_out; | |
11750 | +} | |
11751 | + | |
7e46296a AM |
11752 | +/* |
11753 | + * toi_expected_compression_ratio | |
11754 | + * | |
11755 | + * Description: Returns the expected ratio between data passed into this module | |
11756 | + * and the amount of data output when writing. | |
11757 | + * Returns: 100 if the module is disabled. Otherwise the value set by the | |
11758 | + * user via our sysfs entry. | |
11759 | + */ | |
2380c486 | 11760 | + |
7e46296a AM |
11761 | +static int toi_compress_expected_ratio(void) |
11762 | +{ | |
11763 | + if (!toi_compression_ops.enabled) | |
11764 | + return 100; | |
11765 | + else | |
11766 | + return 100 - toi_expected_compression; | |
11767 | +} | |
2380c486 | 11768 | + |
7e46296a AM |
11769 | +/* |
11770 | + * data for our sysfs entries. | |
11771 | + */ | |
11772 | +static struct toi_sysfs_data sysfs_params[] = { | |
11773 | + SYSFS_INT("expected_compression", SYSFS_RW, &toi_expected_compression, | |
11774 | + 0, 99, 0, NULL), | |
11775 | + SYSFS_INT("enabled", SYSFS_RW, &toi_compression_ops.enabled, 0, 1, 0, | |
11776 | + NULL), | |
11777 | + SYSFS_STRING("algorithm", SYSFS_RW, toi_compressor_name, 31, 0, NULL), | |
11778 | +}; | |
2380c486 | 11779 | + |
7e46296a AM |
11780 | +/* |
11781 | + * Ops structure. | |
11782 | + */ | |
11783 | +static struct toi_module_ops toi_compression_ops = { | |
11784 | + .type = FILTER_MODULE, | |
11785 | + .name = "compression", | |
11786 | + .directory = "compression", | |
11787 | + .module = THIS_MODULE, | |
11788 | + .initialise = toi_compress_init, | |
7e46296a AM |
11789 | + .memory_needed = toi_compress_memory_needed, |
11790 | + .print_debug_info = toi_compress_print_debug_stats, | |
11791 | + .save_config_info = toi_compress_save_config_info, | |
11792 | + .load_config_info = toi_compress_load_config_info, | |
11793 | + .storage_needed = toi_compress_storage_needed, | |
11794 | + .expected_compression = toi_compress_expected_ratio, | |
2380c486 | 11795 | + |
5dd10c98 AM |
11796 | + .pre_atomic_restore = toi_compress_pre_atomic_restore, |
11797 | + .post_atomic_restore = toi_compress_post_atomic_restore, | |
11798 | + | |
7e46296a | 11799 | + .rw_init = toi_compress_rw_init, |
5dd10c98 | 11800 | + .rw_cleanup = toi_compress_rw_cleanup, |
2380c486 | 11801 | + |
7e46296a AM |
11802 | + .write_page = toi_compress_write_page, |
11803 | + .read_page = toi_compress_read_page, | |
2380c486 | 11804 | + |
7e46296a AM |
11805 | + .sysfs_data = sysfs_params, |
11806 | + .num_sysfs_entries = sizeof(sysfs_params) / | |
11807 | + sizeof(struct toi_sysfs_data), | |
11808 | +}; | |
2380c486 | 11809 | + |
7e46296a | 11810 | +/* ---- Registration ---- */ |
2380c486 | 11811 | + |
7e46296a AM |
11812 | +static __init int toi_compress_load(void) |
11813 | +{ | |
11814 | + return toi_register_module(&toi_compression_ops); | |
2380c486 JR |
11815 | +} |
11816 | + | |
7e46296a AM |
11817 | +#ifdef MODULE |
11818 | +static __exit void toi_compress_unload(void) | |
2380c486 | 11819 | +{ |
7e46296a | 11820 | + toi_unregister_module(&toi_compression_ops); |
2380c486 JR |
11821 | +} |
11822 | + | |
7e46296a AM |
11823 | +module_init(toi_compress_load); |
11824 | +module_exit(toi_compress_unload); | |
11825 | +MODULE_LICENSE("GPL"); | |
11826 | +MODULE_AUTHOR("Nigel Cunningham"); | |
11827 | +MODULE_DESCRIPTION("Compression Support for TuxOnIce"); | |
11828 | +#else | |
11829 | +late_initcall(toi_compress_load); | |
11830 | +#endif | |
11831 | diff --git a/kernel/power/tuxonice_extent.c b/kernel/power/tuxonice_extent.c | |
11832 | new file mode 100644 | |
5dd10c98 | 11833 | index 0000000..e84572c |
7e46296a AM |
11834 | --- /dev/null |
11835 | +++ b/kernel/power/tuxonice_extent.c | |
11836 | @@ -0,0 +1,123 @@ | |
11837 | +/* | |
11838 | + * kernel/power/tuxonice_extent.c | |
11839 | + * | |
5dd10c98 | 11840 | + * Copyright (C) 2003-2010 Nigel Cunningham (nigel at tuxonice net) |
7e46296a AM |
11841 | + * |
11842 | + * Distributed under GPLv2. | |
11843 | + * | |
11844 | + * These functions encapsulate the manipulation of storage metadata. | |
11845 | + */ | |
11846 | + | |
11847 | +#include <linux/suspend.h> | |
11848 | +#include "tuxonice_modules.h" | |
11849 | +#include "tuxonice_extent.h" | |
11850 | +#include "tuxonice_alloc.h" | |
11851 | +#include "tuxonice_ui.h" | |
11852 | +#include "tuxonice.h" | |
2380c486 JR |
11853 | + |
11854 | +/** | |
7e46296a | 11855 | + * toi_get_extent - return a free extent |
2380c486 | 11856 | + * |
7e46296a | 11857 | + * May fail, returning NULL instead. |
2380c486 | 11858 | + **/ |
7e46296a | 11859 | +static struct hibernate_extent *toi_get_extent(void) |
2380c486 | 11860 | +{ |
7e46296a AM |
11861 | + return (struct hibernate_extent *) toi_kzalloc(2, |
11862 | + sizeof(struct hibernate_extent), TOI_ATOMIC_GFP); | |
11863 | +} | |
2380c486 | 11864 | + |
7e46296a AM |
11865 | +/** |
11866 | + * toi_put_extent_chain - free a whole chain of extents | |
11867 | + * @chain: Chain to free. | |
11868 | + **/ | |
11869 | +void toi_put_extent_chain(struct hibernate_extent_chain *chain) | |
11870 | +{ | |
11871 | + struct hibernate_extent *this; | |
2380c486 | 11872 | + |
7e46296a | 11873 | + this = chain->first; |
2380c486 | 11874 | + |
7e46296a AM |
11875 | + while (this) { |
11876 | + struct hibernate_extent *next = this->next; | |
11877 | + toi_kfree(2, this, sizeof(*this)); | |
11878 | + chain->num_extents--; | |
11879 | + this = next; | |
2380c486 JR |
11880 | + } |
11881 | + | |
7e46296a AM |
11882 | + chain->first = NULL; |
11883 | + chain->last_touched = NULL; | |
11884 | + chain->current_extent = NULL; | |
11885 | + chain->size = 0; | |
2380c486 | 11886 | +} |
7e46296a | 11887 | +EXPORT_SYMBOL_GPL(toi_put_extent_chain); |
2380c486 JR |
11888 | + |
11889 | +/** | |
7e46296a AM |
11890 | + * toi_add_to_extent_chain - add an extent to an existing chain |
11891 | + * @chain: Chain to which the extend should be added | |
11892 | + * @start: Start of the extent (first physical block) | |
11893 | + * @end: End of the extent (last physical block) | |
2380c486 | 11894 | + * |
7e46296a | 11895 | + * The chain information is updated if the insertion is successful. |
2380c486 | 11896 | + **/ |
7e46296a AM |
11897 | +int toi_add_to_extent_chain(struct hibernate_extent_chain *chain, |
11898 | + unsigned long start, unsigned long end) | |
2380c486 | 11899 | +{ |
7e46296a | 11900 | + struct hibernate_extent *new_ext = NULL, *cur_ext = NULL; |
2380c486 | 11901 | + |
7e46296a AM |
11902 | + toi_message(TOI_IO, TOI_VERBOSE, 0, |
11903 | + "Adding extent %lu-%lu to chain %p.\n", start, end, chain); | |
2380c486 | 11904 | + |
7e46296a AM |
11905 | + /* Find the right place in the chain */ |
11906 | + if (chain->last_touched && chain->last_touched->start < start) | |
11907 | + cur_ext = chain->last_touched; | |
11908 | + else if (chain->first && chain->first->start < start) | |
11909 | + cur_ext = chain->first; | |
2380c486 | 11910 | + |
7e46296a AM |
11911 | + if (cur_ext) { |
11912 | + while (cur_ext->next && cur_ext->next->start < start) | |
11913 | + cur_ext = cur_ext->next; | |
2380c486 | 11914 | + |
7e46296a AM |
11915 | + if (cur_ext->end == (start - 1)) { |
11916 | + struct hibernate_extent *next_ext = cur_ext->next; | |
11917 | + cur_ext->end = end; | |
2380c486 | 11918 | + |
7e46296a AM |
11919 | + /* Merge with the following one? */ |
11920 | + if (next_ext && cur_ext->end + 1 == next_ext->start) { | |
11921 | + cur_ext->end = next_ext->end; | |
11922 | + cur_ext->next = next_ext->next; | |
11923 | + toi_kfree(2, next_ext, sizeof(*next_ext)); | |
11924 | + chain->num_extents--; | |
11925 | + } | |
2380c486 | 11926 | + |
7e46296a AM |
11927 | + chain->last_touched = cur_ext; |
11928 | + chain->size += (end - start + 1); | |
2380c486 | 11929 | + |
7e46296a | 11930 | + return 0; |
2380c486 | 11931 | + } |
2380c486 JR |
11932 | + } |
11933 | + | |
7e46296a AM |
11934 | + new_ext = toi_get_extent(); |
11935 | + if (!new_ext) { | |
11936 | + printk(KERN_INFO "Error unable to append a new extent to the " | |
11937 | + "chain.\n"); | |
11938 | + return -ENOMEM; | |
2380c486 JR |
11939 | + } |
11940 | + | |
7e46296a AM |
11941 | + chain->num_extents++; |
11942 | + chain->size += (end - start + 1); | |
11943 | + new_ext->start = start; | |
11944 | + new_ext->end = end; | |
2380c486 | 11945 | + |
7e46296a | 11946 | + chain->last_touched = new_ext; |
2380c486 | 11947 | + |
7e46296a AM |
11948 | + if (cur_ext) { |
11949 | + new_ext->next = cur_ext->next; | |
11950 | + cur_ext->next = new_ext; | |
11951 | + } else { | |
11952 | + if (chain->first) | |
11953 | + new_ext->next = chain->first; | |
11954 | + chain->first = new_ext; | |
2380c486 JR |
11955 | + } |
11956 | + | |
7e46296a AM |
11957 | + return 0; |
11958 | +} | |
11959 | +EXPORT_SYMBOL_GPL(toi_add_to_extent_chain); | |
11960 | diff --git a/kernel/power/tuxonice_extent.h b/kernel/power/tuxonice_extent.h | |
11961 | new file mode 100644 | |
5dd10c98 | 11962 | index 0000000..157446c |
7e46296a AM |
11963 | --- /dev/null |
11964 | +++ b/kernel/power/tuxonice_extent.h | |
11965 | @@ -0,0 +1,44 @@ | |
11966 | +/* | |
11967 | + * kernel/power/tuxonice_extent.h | |
11968 | + * | |
5dd10c98 | 11969 | + * Copyright (C) 2003-2010 Nigel Cunningham (nigel at tuxonice net) |
7e46296a AM |
11970 | + * |
11971 | + * This file is released under the GPLv2. | |
11972 | + * | |
11973 | + * It contains declarations related to extents. Extents are | |
11974 | + * TuxOnIce's method of storing some of the metadata for the image. | |
11975 | + * See tuxonice_extent.c for more info. | |
11976 | + * | |
11977 | + */ | |
11978 | + | |
11979 | +#include "tuxonice_modules.h" | |
2380c486 | 11980 | + |
7e46296a AM |
11981 | +#ifndef EXTENT_H |
11982 | +#define EXTENT_H | |
2380c486 | 11983 | + |
7e46296a AM |
11984 | +struct hibernate_extent { |
11985 | + unsigned long start, end; | |
11986 | + struct hibernate_extent *next; | |
11987 | +}; | |
2380c486 | 11988 | + |
7e46296a AM |
11989 | +struct hibernate_extent_chain { |
11990 | + unsigned long size; /* size of the chain ie sum (max-min+1) */ | |
11991 | + int num_extents; | |
11992 | + struct hibernate_extent *first, *last_touched; | |
11993 | + struct hibernate_extent *current_extent; | |
11994 | + unsigned long current_offset; | |
11995 | +}; | |
11996 | + | |
11997 | +/* Simplify iterating through all the values in an extent chain */ | |
11998 | +#define toi_extent_for_each(extent_chain, extentpointer, value) \ | |
11999 | +if ((extent_chain)->first) \ | |
12000 | + for ((extentpointer) = (extent_chain)->first, (value) = \ | |
12001 | + (extentpointer)->start; \ | |
12002 | + ((extentpointer) && ((extentpointer)->next || (value) <= \ | |
12003 | + (extentpointer)->end)); \ | |
12004 | + (((value) == (extentpointer)->end) ? \ | |
12005 | + ((extentpointer) = (extentpointer)->next, (value) = \ | |
12006 | + ((extentpointer) ? (extentpointer)->start : 0)) : \ | |
12007 | + (value)++)) | |
12008 | + | |
12009 | +#endif | |
12010 | diff --git a/kernel/power/tuxonice_file.c b/kernel/power/tuxonice_file.c | |
12011 | new file mode 100644 | |
cacc47f8 | 12012 | index 0000000..7a4614a |
7e46296a AM |
12013 | --- /dev/null |
12014 | +++ b/kernel/power/tuxonice_file.c | |
5dd10c98 | 12015 | @@ -0,0 +1,496 @@ |
7e46296a AM |
12016 | +/* |
12017 | + * kernel/power/tuxonice_file.c | |
2380c486 | 12018 | + * |
5dd10c98 | 12019 | + * Copyright (C) 2005-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 | 12020 | + * |
7e46296a AM |
12021 | + * Distributed under GPLv2. |
12022 | + * | |
12023 | + * This file encapsulates functions for usage of a simple file as a | |
12024 | + * backing store. It is based upon the swapallocator, and shares the | |
12025 | + * same basic working. Here, though, we have nothing to do with | |
12026 | + * swapspace, and only one device to worry about. | |
12027 | + * | |
12028 | + * The user can just | |
12029 | + * | |
12030 | + * echo TuxOnIce > /path/to/my_file | |
12031 | + * | |
12032 | + * dd if=/dev/zero bs=1M count=<file_size_desired> >> /path/to/my_file | |
12033 | + * | |
12034 | + * and | |
12035 | + * | |
12036 | + * echo /path/to/my_file > /sys/power/tuxonice/file/target | |
12037 | + * | |
12038 | + * then put what they find in /sys/power/tuxonice/resume | |
12039 | + * as their resume= parameter in lilo.conf (and rerun lilo if using it). | |
12040 | + * | |
12041 | + * Having done this, they're ready to hibernate and resume. | |
12042 | + * | |
12043 | + * TODO: | |
12044 | + * - File resizing. | |
12045 | + */ | |
2380c486 | 12046 | + |
7e46296a AM |
12047 | +#include <linux/blkdev.h> |
12048 | +#include <linux/mount.h> | |
12049 | +#include <linux/fs.h> | |
cacc47f8 | 12050 | +#include <linux/fs_uuid.h> |
2380c486 | 12051 | + |
7e46296a AM |
12052 | +#include "tuxonice.h" |
12053 | +#include "tuxonice_modules.h" | |
12054 | +#include "tuxonice_bio.h" | |
12055 | +#include "tuxonice_alloc.h" | |
12056 | +#include "tuxonice_builtin.h" | |
12057 | +#include "tuxonice_sysfs.h" | |
12058 | +#include "tuxonice_ui.h" | |
12059 | +#include "tuxonice_io.h" | |
2380c486 | 12060 | + |
7e46296a AM |
12061 | +#define target_is_normal_file() (S_ISREG(target_inode->i_mode)) |
12062 | + | |
12063 | +static struct toi_module_ops toi_fileops; | |
12064 | + | |
12065 | +static struct file *target_file; | |
12066 | +static struct block_device *toi_file_target_bdev; | |
12067 | +static unsigned long pages_available, pages_allocated; | |
12068 | +static char toi_file_target[256]; | |
12069 | +static struct inode *target_inode; | |
12070 | +static int file_target_priority; | |
12071 | +static int used_devt; | |
12072 | +static int target_claim; | |
12073 | +static dev_t toi_file_dev_t; | |
12074 | +static int sig_page_index; | |
12075 | + | |
12076 | +/* For test_toi_file_target */ | |
12077 | +static struct toi_bdev_info *file_chain; | |
12078 | + | |
12079 | +static int has_contiguous_blocks(struct toi_bdev_info *dev_info, int page_num) | |
2380c486 | 12080 | +{ |
7e46296a AM |
12081 | + int j; |
12082 | + sector_t last = 0; | |
12083 | + | |
12084 | + for (j = 0; j < dev_info->blocks_per_page; j++) { | |
12085 | + sector_t this = bmap(target_inode, | |
12086 | + page_num * dev_info->blocks_per_page + j); | |
12087 | + | |
12088 | + if (!this || (last && (last + 1) != this)) | |
12089 | + break; | |
2380c486 | 12090 | + |
7e46296a | 12091 | + last = this; |
2380c486 JR |
12092 | + } |
12093 | + | |
7e46296a AM |
12094 | + return j == dev_info->blocks_per_page; |
12095 | +} | |
2380c486 | 12096 | + |
7e46296a AM |
12097 | +static unsigned long get_usable_pages(struct toi_bdev_info *dev_info) |
12098 | +{ | |
12099 | + unsigned long result = 0; | |
12100 | + struct block_device *bdev = dev_info->bdev; | |
12101 | + int i; | |
2380c486 | 12102 | + |
7e46296a AM |
12103 | + switch (target_inode->i_mode & S_IFMT) { |
12104 | + case S_IFSOCK: | |
12105 | + case S_IFCHR: | |
12106 | + case S_IFIFO: /* Socket, Char, Fifo */ | |
12107 | + return -1; | |
12108 | + case S_IFREG: /* Regular file: current size - holes + free | |
12109 | + space on part */ | |
12110 | + for (i = 0; i < (target_inode->i_size >> PAGE_SHIFT) ; i++) { | |
12111 | + if (has_contiguous_blocks(dev_info, i)) | |
12112 | + result++; | |
12113 | + } | |
12114 | + break; | |
12115 | + case S_IFBLK: /* Block device */ | |
12116 | + if (!bdev->bd_disk) { | |
12117 | + toi_message(TOI_IO, TOI_VERBOSE, 0, | |
12118 | + "bdev->bd_disk null."); | |
12119 | + return 0; | |
12120 | + } | |
2380c486 | 12121 | + |
7e46296a AM |
12122 | + result = (bdev->bd_part ? |
12123 | + bdev->bd_part->nr_sects : | |
12124 | + get_capacity(bdev->bd_disk)) >> (PAGE_SHIFT - 9); | |
12125 | + } | |
2380c486 | 12126 | + |
2380c486 | 12127 | + |
7e46296a | 12128 | + return result; |
2380c486 JR |
12129 | +} |
12130 | + | |
7e46296a | 12131 | +static int toi_file_register_storage(void) |
2380c486 | 12132 | +{ |
7e46296a | 12133 | + struct toi_bdev_info *devinfo; |
5dd10c98 AM |
12134 | + int result = 0; |
12135 | + struct fs_info *fs_info; | |
7e46296a AM |
12136 | + |
12137 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_file_register_storage."); | |
12138 | + if (!strlen(toi_file_target)) { | |
12139 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Register file storage: " | |
12140 | + "No target filename set."); | |
12141 | + return 0; | |
12142 | + } | |
12143 | + | |
12144 | + target_file = filp_open(toi_file_target, O_RDONLY|O_LARGEFILE, 0); | |
12145 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "filp_open %s returned %p.", | |
12146 | + toi_file_target, target_file); | |
12147 | + | |
12148 | + if (IS_ERR(target_file) || !target_file) { | |
12149 | + target_file = NULL; | |
12150 | + toi_file_dev_t = name_to_dev_t(toi_file_target); | |
12151 | + if (!toi_file_dev_t) { | |
12152 | + struct kstat stat; | |
12153 | + int error = vfs_stat(toi_file_target, &stat); | |
12154 | + printk(KERN_INFO "Open file %s returned %p and " | |
12155 | + "name_to_devt failed.\n", | |
12156 | + toi_file_target, target_file); | |
12157 | + if (error) { | |
12158 | + printk(KERN_INFO "Stating the file also failed." | |
12159 | + " Nothing more we can do.\n"); | |
12160 | + return 0; | |
12161 | + } else | |
12162 | + toi_file_dev_t = stat.rdev; | |
12163 | + } | |
2380c486 | 12164 | + |
5dd10c98 | 12165 | + toi_file_target_bdev = toi_open_by_devnum(toi_file_dev_t); |
7e46296a AM |
12166 | + if (IS_ERR(toi_file_target_bdev)) { |
12167 | + printk(KERN_INFO "Got a dev_num (%lx) but failed to " | |
12168 | + "open it.\n", | |
12169 | + (unsigned long) toi_file_dev_t); | |
12170 | + toi_file_target_bdev = NULL; | |
12171 | + return 0; | |
12172 | + } | |
12173 | + used_devt = 1; | |
12174 | + target_inode = toi_file_target_bdev->bd_inode; | |
12175 | + } else | |
12176 | + target_inode = target_file->f_mapping->host; | |
2380c486 | 12177 | + |
7e46296a AM |
12178 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Succeeded in opening the target."); |
12179 | + if (S_ISLNK(target_inode->i_mode) || S_ISDIR(target_inode->i_mode) || | |
12180 | + S_ISSOCK(target_inode->i_mode) || S_ISFIFO(target_inode->i_mode)) { | |
12181 | + printk(KERN_INFO "File support works with regular files," | |
12182 | + " character files and block devices.\n"); | |
12183 | + /* Cleanup routine will undo the above */ | |
2380c486 JR |
12184 | + return 0; |
12185 | + } | |
12186 | + | |
7e46296a AM |
12187 | + if (!used_devt) { |
12188 | + if (S_ISBLK(target_inode->i_mode)) { | |
12189 | + toi_file_target_bdev = I_BDEV(target_inode); | |
12190 | + if (!bd_claim(toi_file_target_bdev, &toi_fileops)) | |
12191 | + target_claim = 1; | |
12192 | + } else | |
12193 | + toi_file_target_bdev = target_inode->i_sb->s_bdev; | |
5dd10c98 AM |
12194 | + if (!toi_file_target_bdev) { |
12195 | + printk(KERN_INFO "%s is not a valid file allocator " | |
12196 | + "target.\n", toi_file_target); | |
12197 | + return 0; | |
12198 | + } | |
7e46296a AM |
12199 | + toi_file_dev_t = toi_file_target_bdev->bd_dev; |
12200 | + } | |
2380c486 | 12201 | + |
7e46296a AM |
12202 | + devinfo = toi_kzalloc(39, sizeof(struct toi_bdev_info), GFP_ATOMIC); |
12203 | + if (!devinfo) { | |
12204 | + printk("Failed to allocate a toi_bdev_info struct for the file allocator.\n"); | |
12205 | + return -ENOMEM; | |
12206 | + } | |
2380c486 | 12207 | + |
7e46296a AM |
12208 | + devinfo->bdev = toi_file_target_bdev; |
12209 | + devinfo->allocator = &toi_fileops; | |
12210 | + devinfo->allocator_index = 0; | |
2380c486 | 12211 | + |
5dd10c98 AM |
12212 | + fs_info = fs_info_from_block_dev(toi_file_target_bdev); |
12213 | + if (fs_info && !IS_ERR(fs_info)) { | |
12214 | + memcpy(devinfo->uuid, &fs_info->uuid, 16); | |
12215 | + free_fs_info(fs_info); | |
12216 | + } else | |
12217 | + result = (int) PTR_ERR(fs_info); | |
12218 | + | |
12219 | + /* Unlike swap code, only complain if fs_info_from_block_dev returned | |
12220 | + * -ENOMEM. The 'file' might be a full partition, so might validly not | |
12221 | + * have an identifiable type, UUID etc. | |
12222 | + */ | |
7e46296a | 12223 | + if (result) |
5dd10c98 | 12224 | + printk(KERN_DEBUG "Failed to get fs_info for file device (%d).\n", |
7e46296a AM |
12225 | + result); |
12226 | + devinfo->dev_t = toi_file_dev_t; | |
12227 | + devinfo->prio = file_target_priority; | |
12228 | + devinfo->bmap_shift = target_inode->i_blkbits - 9; | |
12229 | + devinfo->blocks_per_page = | |
12230 | + (1 << (PAGE_SHIFT - target_inode->i_blkbits)); | |
5dd10c98 | 12231 | + sprintf(devinfo->name, "file %s", toi_file_target); |
7e46296a AM |
12232 | + file_chain = devinfo; |
12233 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Dev_t is %lx. Prio is %d. Bmap " | |
12234 | + "shift is %d. Blocks per page %d.", | |
12235 | + devinfo->dev_t, devinfo->prio, devinfo->bmap_shift, | |
12236 | + devinfo->blocks_per_page); | |
12237 | + | |
12238 | + /* Keep one aside for the signature */ | |
12239 | + pages_available = get_usable_pages(devinfo) - 1; | |
12240 | + | |
12241 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Registering file storage, %lu " | |
12242 | + "pages.", pages_available); | |
12243 | + | |
12244 | + toi_bio_ops.register_storage(devinfo); | |
12245 | + return 0; | |
12246 | +} | |
2380c486 | 12247 | + |
7e46296a AM |
12248 | +static unsigned long toi_file_storage_available(void) |
12249 | +{ | |
12250 | + return pages_available; | |
2380c486 JR |
12251 | +} |
12252 | + | |
7e46296a AM |
12253 | +static int toi_file_allocate_storage(struct toi_bdev_info *chain, |
12254 | + unsigned long request) | |
2380c486 | 12255 | +{ |
7e46296a AM |
12256 | + unsigned long available = pages_available - pages_allocated; |
12257 | + unsigned long to_add = min(available, request); | |
2380c486 | 12258 | + |
7e46296a AM |
12259 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Pages available is %lu. Allocated " |
12260 | + "is %lu. Allocating %lu pages from file.", | |
12261 | + pages_available, pages_allocated, to_add); | |
12262 | + pages_allocated += to_add; | |
2380c486 | 12263 | + |
7e46296a | 12264 | + return to_add; |
2380c486 JR |
12265 | +} |
12266 | + | |
12267 | +/** | |
7e46296a AM |
12268 | + * __populate_block_list - add an extent to the chain |
12269 | + * @min: Start of the extent (first physical block = sector) | |
12270 | + * @max: End of the extent (last physical block = sector) | |
2380c486 | 12271 | + * |
7e46296a AM |
12272 | + * If TOI_TEST_BIO is set, print a debug message, outputting the min and max |
12273 | + * fs block numbers. | |
2380c486 | 12274 | + **/ |
7e46296a | 12275 | +static int __populate_block_list(struct toi_bdev_info *chain, int min, int max) |
2380c486 | 12276 | +{ |
7e46296a AM |
12277 | + if (test_action_state(TOI_TEST_BIO)) |
12278 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Adding extent %d-%d.", | |
12279 | + min << chain->bmap_shift, | |
12280 | + ((max + 1) << chain->bmap_shift) - 1); | |
2380c486 | 12281 | + |
7e46296a AM |
12282 | + return toi_add_to_extent_chain(&chain->blocks, min, max); |
12283 | +} | |
2380c486 | 12284 | + |
7e46296a AM |
12285 | +static int get_main_pool_phys_params(struct toi_bdev_info *chain) |
12286 | +{ | |
12287 | + int i, extent_min = -1, extent_max = -1, result = 0, have_sig_page = 0; | |
12288 | + unsigned long pages_mapped = 0; | |
2380c486 | 12289 | + |
7e46296a | 12290 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Getting file allocator blocks."); |
2380c486 | 12291 | + |
7e46296a AM |
12292 | + if (chain->blocks.first) |
12293 | + toi_put_extent_chain(&chain->blocks); | |
2380c486 | 12294 | + |
7e46296a AM |
12295 | + if (!target_is_normal_file()) { |
12296 | + result = (pages_available > 0) ? | |
12297 | + __populate_block_list(chain, chain->blocks_per_page, | |
12298 | + (pages_allocated + 1) * | |
12299 | + chain->blocks_per_page - 1) : 0; | |
12300 | + return result; | |
2380c486 JR |
12301 | + } |
12302 | + | |
12303 | + /* | |
7e46296a AM |
12304 | + * FIXME: We are assuming the first page is contiguous. Is that |
12305 | + * assumption always right? | |
2380c486 JR |
12306 | + */ |
12307 | + | |
7e46296a AM |
12308 | + for (i = 0; i < (target_inode->i_size >> PAGE_SHIFT); i++) { |
12309 | + sector_t new_sector; | |
2380c486 | 12310 | + |
7e46296a AM |
12311 | + if (!has_contiguous_blocks(chain, i)) |
12312 | + continue; | |
2380c486 | 12313 | + |
7e46296a AM |
12314 | + if (!have_sig_page) { |
12315 | + have_sig_page = 1; | |
12316 | + sig_page_index = i; | |
12317 | + continue; | |
2380c486 | 12318 | + } |
2380c486 | 12319 | + |
7e46296a | 12320 | + pages_mapped++; |
2380c486 | 12321 | + |
7e46296a AM |
12322 | + /* Ignore first page - it has the header */ |
12323 | + if (pages_mapped == 1) | |
12324 | + continue; | |
2380c486 | 12325 | + |
7e46296a | 12326 | + new_sector = bmap(target_inode, (i * chain->blocks_per_page)); |
2380c486 | 12327 | + |
7e46296a AM |
12328 | + /* |
12329 | + * I'd love to be able to fill in holes and resize | |
12330 | + * files, but not yet... | |
12331 | + */ | |
2380c486 | 12332 | + |
7e46296a AM |
12333 | + if (new_sector == extent_max + 1) |
12334 | + extent_max += chain->blocks_per_page; | |
12335 | + else { | |
12336 | + if (extent_min > -1) { | |
12337 | + result = __populate_block_list(chain, | |
12338 | + extent_min, extent_max); | |
12339 | + if (result) | |
12340 | + return result; | |
12341 | + } | |
2380c486 | 12342 | + |
7e46296a AM |
12343 | + extent_min = new_sector; |
12344 | + extent_max = extent_min + | |
12345 | + chain->blocks_per_page - 1; | |
12346 | + } | |
2380c486 | 12347 | + |
7e46296a AM |
12348 | + if (pages_mapped == pages_allocated) |
12349 | + break; | |
12350 | + } | |
2380c486 | 12351 | + |
7e46296a AM |
12352 | + if (extent_min > -1) { |
12353 | + result = __populate_block_list(chain, extent_min, extent_max); | |
12354 | + if (result) | |
12355 | + return result; | |
12356 | + } | |
2380c486 | 12357 | + |
7e46296a | 12358 | + return 0; |
2380c486 JR |
12359 | +} |
12360 | + | |
7e46296a | 12361 | +static void toi_file_free_storage(struct toi_bdev_info *chain) |
2380c486 | 12362 | +{ |
7e46296a AM |
12363 | + pages_allocated = 0; |
12364 | + file_chain = NULL; | |
2380c486 JR |
12365 | +} |
12366 | + | |
12367 | +/** | |
7e46296a AM |
12368 | + * toi_file_print_debug_stats - print debug info |
12369 | + * @buffer: Buffer to data to populate | |
12370 | + * @size: Size of the buffer | |
2380c486 | 12371 | + **/ |
7e46296a | 12372 | +static int toi_file_print_debug_stats(char *buffer, int size) |
2380c486 | 12373 | +{ |
7e46296a AM |
12374 | + int len = scnprintf(buffer, size, "- File Allocator active.\n"); |
12375 | + | |
12376 | + len += scnprintf(buffer+len, size-len, " Storage available for " | |
12377 | + "image: %lu pages.\n", pages_available); | |
12378 | + | |
12379 | + return len; | |
2380c486 JR |
12380 | +} |
12381 | + | |
7e46296a | 12382 | +static void toi_file_cleanup(int finishing_cycle) |
2380c486 | 12383 | +{ |
7e46296a AM |
12384 | + if (toi_file_target_bdev) { |
12385 | + if (target_claim) { | |
12386 | + bd_release(toi_file_target_bdev); | |
12387 | + target_claim = 0; | |
12388 | + } | |
2380c486 | 12389 | + |
7e46296a AM |
12390 | + if (used_devt) { |
12391 | + blkdev_put(toi_file_target_bdev, | |
12392 | + FMODE_READ | FMODE_NDELAY); | |
12393 | + used_devt = 0; | |
2380c486 | 12394 | + } |
7e46296a AM |
12395 | + toi_file_target_bdev = NULL; |
12396 | + target_inode = NULL; | |
12397 | + } | |
12398 | + | |
12399 | + if (target_file) { | |
12400 | + filp_close(target_file, NULL); | |
12401 | + target_file = NULL; | |
2380c486 JR |
12402 | + } |
12403 | + | |
7e46296a AM |
12404 | + pages_available = 0; |
12405 | +} | |
2380c486 | 12406 | + |
7e46296a AM |
12407 | +/** |
12408 | + * test_toi_file_target - sysfs callback for /sys/power/tuxonince/file/target | |
12409 | + * | |
12410 | + * Test wheter the target file is valid for hibernating. | |
12411 | + **/ | |
12412 | +static void test_toi_file_target(void) | |
12413 | +{ | |
12414 | + int result = toi_file_register_storage(); | |
12415 | + sector_t sector; | |
de6743ae | 12416 | + char buf[50]; |
5dd10c98 | 12417 | + struct fs_info *fs_info; |
7e46296a | 12418 | + |
5dd10c98 | 12419 | + if (result || !file_chain) |
7e46296a AM |
12420 | + return; |
12421 | + | |
12422 | + /* This doesn't mean we're in business. Is any storage available? */ | |
12423 | + if (!pages_available) | |
12424 | + goto out; | |
12425 | + | |
12426 | + toi_file_allocate_storage(file_chain, 1); | |
12427 | + result = get_main_pool_phys_params(file_chain); | |
12428 | + if (result) | |
12429 | + goto out; | |
12430 | + | |
12431 | + | |
12432 | + sector = bmap(target_inode, sig_page_index * | |
12433 | + file_chain->blocks_per_page) << file_chain->bmap_shift; | |
12434 | + | |
12435 | + /* Use the uuid, or the dev_t if that fails */ | |
5dd10c98 AM |
12436 | + fs_info = fs_info_from_block_dev(toi_file_target_bdev); |
12437 | + if (!fs_info || IS_ERR(fs_info)) { | |
7e46296a AM |
12438 | + bdevname(toi_file_target_bdev, buf); |
12439 | + sprintf(resume_file, "/dev/%s:%llu", buf, | |
12440 | + (unsigned long long) sector); | |
12441 | + } else { | |
12442 | + int i; | |
5dd10c98 | 12443 | + hex_dump_to_buffer(fs_info->uuid, 16, 32, 1, buf, 50, 0); |
7e46296a AM |
12444 | + |
12445 | + /* Remove the spaces */ | |
12446 | + for (i = 1; i < 16; i++) { | |
12447 | + buf[2 * i] = buf[3 * i]; | |
12448 | + buf[2 * i + 1] = buf[3 * i + 1]; | |
12449 | + } | |
12450 | + buf[32] = 0; | |
5dd10c98 | 12451 | + sprintf(resume_file, "UUID=%s:0x%llx", buf, |
7e46296a | 12452 | + (unsigned long long) sector); |
5dd10c98 | 12453 | + free_fs_info(fs_info); |
2380c486 JR |
12454 | + } |
12455 | + | |
7e46296a AM |
12456 | + toi_attempt_to_parse_resume_device(0); |
12457 | +out: | |
12458 | + toi_file_free_storage(file_chain); | |
12459 | + toi_bio_ops.free_storage(); | |
2380c486 JR |
12460 | +} |
12461 | + | |
12462 | +static struct toi_sysfs_data sysfs_params[] = { | |
2380c486 JR |
12463 | + SYSFS_STRING("target", SYSFS_RW, toi_file_target, 256, |
12464 | + SYSFS_NEEDS_SM_FOR_WRITE, test_toi_file_target), | |
7e46296a AM |
12465 | + SYSFS_INT("enabled", SYSFS_RW, &toi_fileops.enabled, 0, 1, 0, NULL), |
12466 | + SYSFS_INT("priority", SYSFS_RW, &file_target_priority, -4095, | |
12467 | + 4096, 0, NULL), | |
12468 | +}; | |
12469 | + | |
12470 | +static struct toi_bio_allocator_ops toi_bio_fileops = { | |
12471 | + .register_storage = toi_file_register_storage, | |
12472 | + .storage_available = toi_file_storage_available, | |
12473 | + .allocate_storage = toi_file_allocate_storage, | |
12474 | + .bmap = get_main_pool_phys_params, | |
12475 | + .free_storage = toi_file_free_storage, | |
2380c486 JR |
12476 | +}; |
12477 | + | |
12478 | +static struct toi_module_ops toi_fileops = { | |
7e46296a | 12479 | + .type = BIO_ALLOCATOR_MODULE, |
2380c486 JR |
12480 | + .name = "file storage", |
12481 | + .directory = "file", | |
12482 | + .module = THIS_MODULE, | |
12483 | + .print_debug_info = toi_file_print_debug_stats, | |
2380c486 | 12484 | + .cleanup = toi_file_cleanup, |
7e46296a | 12485 | + .bio_allocator_ops = &toi_bio_fileops, |
2380c486 JR |
12486 | + |
12487 | + .sysfs_data = sysfs_params, | |
12488 | + .num_sysfs_entries = sizeof(sysfs_params) / | |
12489 | + sizeof(struct toi_sysfs_data), | |
12490 | +}; | |
12491 | + | |
12492 | +/* ---- Registration ---- */ | |
12493 | +static __init int toi_file_load(void) | |
12494 | +{ | |
2380c486 JR |
12495 | + return toi_register_module(&toi_fileops); |
12496 | +} | |
12497 | + | |
12498 | +#ifdef MODULE | |
12499 | +static __exit void toi_file_unload(void) | |
12500 | +{ | |
12501 | + toi_unregister_module(&toi_fileops); | |
12502 | +} | |
12503 | + | |
12504 | +module_init(toi_file_load); | |
12505 | +module_exit(toi_file_unload); | |
12506 | +MODULE_LICENSE("GPL"); | |
12507 | +MODULE_AUTHOR("Nigel Cunningham"); | |
12508 | +MODULE_DESCRIPTION("TuxOnIce FileAllocator"); | |
12509 | +#else | |
12510 | +late_initcall(toi_file_load); | |
12511 | +#endif | |
12512 | diff --git a/kernel/power/tuxonice_highlevel.c b/kernel/power/tuxonice_highlevel.c | |
12513 | new file mode 100644 | |
85eb3c9d | 12514 | index 0000000..d2a53b2 |
2380c486 JR |
12515 | --- /dev/null |
12516 | +++ b/kernel/power/tuxonice_highlevel.c | |
85eb3c9d | 12517 | @@ -0,0 +1,1311 @@ |
2380c486 JR |
12518 | +/* |
12519 | + * kernel/power/tuxonice_highlevel.c | |
12520 | + */ | |
12521 | +/** \mainpage TuxOnIce. | |
12522 | + * | |
12523 | + * TuxOnIce provides support for saving and restoring an image of | |
12524 | + * system memory to an arbitrary storage device, either on the local computer, | |
12525 | + * or across some network. The support is entirely OS based, so TuxOnIce | |
12526 | + * works without requiring BIOS, APM or ACPI support. The vast majority of the | |
12527 | + * code is also architecture independant, so it should be very easy to port | |
12528 | + * the code to new architectures. TuxOnIce includes support for SMP, 4G HighMem | |
12529 | + * and preemption. Initramfses and initrds are also supported. | |
12530 | + * | |
12531 | + * TuxOnIce uses a modular design, in which the method of storing the image is | |
12532 | + * completely abstracted from the core code, as are transformations on the data | |
12533 | + * such as compression and/or encryption (multiple 'modules' can be used to | |
12534 | + * provide arbitrary combinations of functionality). The user interface is also | |
12535 | + * modular, so that arbitrarily simple or complex interfaces can be used to | |
12536 | + * provide anything from debugging information through to eye candy. | |
12537 | + * | |
12538 | + * \section Copyright | |
12539 | + * | |
12540 | + * TuxOnIce is released under the GPLv2. | |
12541 | + * | |
12542 | + * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu><BR> | |
12543 | + * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz><BR> | |
12544 | + * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr><BR> | |
5dd10c98 | 12545 | + * Copyright (C) 2002-2010 Nigel Cunningham (nigel at tuxonice net)<BR> |
2380c486 JR |
12546 | + * |
12547 | + * \section Credits | |
12548 | + * | |
12549 | + * Nigel would like to thank the following people for their work: | |
12550 | + * | |
12551 | + * Bernard Blackham <bernard@blackham.com.au><BR> | |
12552 | + * Web page & Wiki administration, some coding. A person without whom | |
12553 | + * TuxOnIce would not be where it is. | |
12554 | + * | |
12555 | + * Michael Frank <mhf@linuxmail.org><BR> | |
12556 | + * Extensive testing and help with improving stability. I was constantly | |
12557 | + * amazed by the quality and quantity of Michael's help. | |
12558 | + * | |
12559 | + * Pavel Machek <pavel@ucw.cz><BR> | |
12560 | + * Modifications, defectiveness pointing, being with Gabor at the very | |
12561 | + * beginning, suspend to swap space, stop all tasks. Port to 2.4.18-ac and | |
12562 | + * 2.5.17. Even though Pavel and I disagree on the direction suspend to | |
12563 | + * disk should take, I appreciate the valuable work he did in helping Gabor | |
12564 | + * get the concept working. | |
12565 | + * | |
12566 | + * ..and of course the myriads of TuxOnIce users who have helped diagnose | |
12567 | + * and fix bugs, made suggestions on how to improve the code, proofread | |
12568 | + * documentation, and donated time and money. | |
12569 | + * | |
12570 | + * Thanks also to corporate sponsors: | |
12571 | + * | |
12572 | + * <B>Redhat.</B>Sometime employer from May 2006 (my fault, not Redhat's!). | |
12573 | + * | |
12574 | + * <B>Cyclades.com.</B> Nigel's employers from Dec 2004 until May 2006, who | |
12575 | + * allowed him to work on TuxOnIce and PM related issues on company time. | |
12576 | + * | |
12577 | + * <B>LinuxFund.org.</B> Sponsored Nigel's work on TuxOnIce for four months Oct | |
12578 | + * 2003 to Jan 2004. | |
12579 | + * | |
12580 | + * <B>LAC Linux.</B> Donated P4 hardware that enabled development and ongoing | |
12581 | + * maintenance of SMP and Highmem support. | |
12582 | + * | |
12583 | + * <B>OSDL.</B> Provided access to various hardware configurations, make | |
12584 | + * occasional small donations to the project. | |
12585 | + */ | |
12586 | + | |
12587 | +#include <linux/suspend.h> | |
2380c486 | 12588 | +#include <linux/freezer.h> |
5dd10c98 | 12589 | +#include <generated/utsrelease.h> |
2380c486 JR |
12590 | +#include <linux/cpu.h> |
12591 | +#include <linux/console.h> | |
12592 | +#include <linux/writeback.h> | |
12593 | +#include <linux/uaccess.h> /* for get/set_fs & KERNEL_DS on i386 */ | |
7e46296a | 12594 | +#include <linux/bio.h> |
2380c486 JR |
12595 | + |
12596 | +#include "tuxonice.h" | |
12597 | +#include "tuxonice_modules.h" | |
12598 | +#include "tuxonice_sysfs.h" | |
12599 | +#include "tuxonice_prepare_image.h" | |
12600 | +#include "tuxonice_io.h" | |
12601 | +#include "tuxonice_ui.h" | |
12602 | +#include "tuxonice_power_off.h" | |
12603 | +#include "tuxonice_storage.h" | |
12604 | +#include "tuxonice_checksum.h" | |
12605 | +#include "tuxonice_builtin.h" | |
12606 | +#include "tuxonice_atomic_copy.h" | |
12607 | +#include "tuxonice_alloc.h" | |
12608 | +#include "tuxonice_cluster.h" | |
12609 | + | |
12610 | +/*! Pageset metadata. */ | |
12611 | +struct pagedir pagedir2 = {2}; | |
12612 | +EXPORT_SYMBOL_GPL(pagedir2); | |
12613 | + | |
12614 | +static mm_segment_t oldfs; | |
12615 | +static DEFINE_MUTEX(tuxonice_in_use); | |
12616 | +static int block_dump_save; | |
2380c486 JR |
12617 | + |
12618 | +/* Binary signature if an image is present */ | |
7e46296a | 12619 | +char tuxonice_signature[9] = "\xed\xc3\x02\xe9\x98\x56\xe5\x0c"; |
2380c486 JR |
12620 | +EXPORT_SYMBOL_GPL(tuxonice_signature); |
12621 | + | |
2380c486 JR |
12622 | +unsigned long boot_kernel_data_buffer; |
12623 | + | |
12624 | +static char *result_strings[] = { | |
5dd10c98 | 12625 | + "Hibernation was aborted", |
2380c486 JR |
12626 | + "The user requested that we cancel the hibernation", |
12627 | + "No storage was available", | |
12628 | + "Insufficient storage was available", | |
12629 | + "Freezing filesystems and/or tasks failed", | |
12630 | + "A pre-existing image was used", | |
12631 | + "We would free memory, but image size limit doesn't allow this", | |
12632 | + "Unable to free enough memory to hibernate", | |
12633 | + "Unable to obtain the Power Management Semaphore", | |
12634 | + "A device suspend/resume returned an error", | |
12635 | + "A system device suspend/resume returned an error", | |
12636 | + "The extra pages allowance is too small", | |
12637 | + "We were unable to successfully prepare an image", | |
12638 | + "TuxOnIce module initialisation failed", | |
12639 | + "TuxOnIce module cleanup failed", | |
12640 | + "I/O errors were encountered", | |
12641 | + "Ran out of memory", | |
12642 | + "An error was encountered while reading the image", | |
12643 | + "Platform preparation failed", | |
12644 | + "CPU Hotplugging failed", | |
12645 | + "Architecture specific preparation failed", | |
12646 | + "Pages needed resaving, but we were told to abort if this happens", | |
12647 | + "We can't hibernate at the moment (invalid resume= or filewriter " | |
12648 | + "target?)", | |
12649 | + "A hibernation preparation notifier chain member cancelled the " | |
12650 | + "hibernation", | |
12651 | + "Pre-snapshot preparation failed", | |
12652 | + "Pre-restore preparation failed", | |
12653 | + "Failed to disable usermode helpers", | |
12654 | + "Can't resume from alternate image", | |
0ada99ac | 12655 | + "Header reservation too small", |
2380c486 JR |
12656 | +}; |
12657 | + | |
12658 | +/** | |
12659 | + * toi_finish_anything - cleanup after doing anything | |
12660 | + * @hibernate_or_resume: Whether finishing a cycle or attempt at | |
12661 | + * resuming. | |
12662 | + * | |
12663 | + * This is our basic clean-up routine, matching start_anything below. We | |
12664 | + * call cleanup routines, drop module references and restore process fs and | |
12665 | + * cpus allowed masks, together with the global block_dump variable's value. | |
12666 | + **/ | |
12667 | +void toi_finish_anything(int hibernate_or_resume) | |
12668 | +{ | |
12669 | + toi_cleanup_modules(hibernate_or_resume); | |
12670 | + toi_put_modules(); | |
12671 | + if (hibernate_or_resume) { | |
12672 | + block_dump = block_dump_save; | |
7e46296a | 12673 | + set_cpus_allowed_ptr(current, cpu_all_mask); |
2380c486 | 12674 | + toi_alloc_print_debug_stats(); |
2380c486 JR |
12675 | + atomic_inc(&snapshot_device_available); |
12676 | + mutex_unlock(&pm_mutex); | |
12677 | + } | |
12678 | + | |
12679 | + set_fs(oldfs); | |
12680 | + mutex_unlock(&tuxonice_in_use); | |
12681 | +} | |
12682 | + | |
12683 | +/** | |
12684 | + * toi_start_anything - basic initialisation for TuxOnIce | |
12685 | + * @toi_or_resume: Whether starting a cycle or attempt at resuming. | |
12686 | + * | |
12687 | + * Our basic initialisation routine. Take references on modules, use the | |
12688 | + * kernel segment, recheck resume= if no active allocator is set, initialise | |
12689 | + * modules, save and reset block_dump and ensure we're running on CPU0. | |
12690 | + **/ | |
12691 | +int toi_start_anything(int hibernate_or_resume) | |
12692 | +{ | |
2380c486 JR |
12693 | + mutex_lock(&tuxonice_in_use); |
12694 | + | |
12695 | + oldfs = get_fs(); | |
12696 | + set_fs(KERNEL_DS); | |
12697 | + | |
12698 | + if (hibernate_or_resume) { | |
12699 | + mutex_lock(&pm_mutex); | |
12700 | + | |
12701 | + if (!atomic_add_unless(&snapshot_device_available, -1, 0)) | |
12702 | + goto snapshotdevice_unavailable; | |
12703 | + } | |
12704 | + | |
2380c486 JR |
12705 | + if (hibernate_or_resume == SYSFS_HIBERNATE) |
12706 | + toi_print_modules(); | |
12707 | + | |
12708 | + if (toi_get_modules()) { | |
12709 | + printk(KERN_INFO "TuxOnIce: Get modules failed!\n"); | |
12710 | + goto prehibernate_err; | |
12711 | + } | |
12712 | + | |
12713 | + if (hibernate_or_resume) { | |
12714 | + block_dump_save = block_dump; | |
12715 | + block_dump = 0; | |
7e46296a AM |
12716 | + set_cpus_allowed_ptr(current, |
12717 | + &cpumask_of_cpu(first_cpu(cpu_online_map))); | |
2380c486 JR |
12718 | + } |
12719 | + | |
12720 | + if (toi_initialise_modules_early(hibernate_or_resume)) | |
12721 | + goto early_init_err; | |
12722 | + | |
12723 | + if (!toiActiveAllocator) | |
12724 | + toi_attempt_to_parse_resume_device(!hibernate_or_resume); | |
12725 | + | |
12726 | + if (!toi_initialise_modules_late(hibernate_or_resume)) | |
12727 | + return 0; | |
12728 | + | |
12729 | + toi_cleanup_modules(hibernate_or_resume); | |
12730 | +early_init_err: | |
12731 | + if (hibernate_or_resume) { | |
12732 | + block_dump_save = block_dump; | |
7e46296a | 12733 | + set_cpus_allowed_ptr(current, cpu_all_mask); |
2380c486 | 12734 | + } |
7e46296a | 12735 | + toi_put_modules(); |
2380c486 JR |
12736 | +prehibernate_err: |
12737 | + if (hibernate_or_resume) | |
12738 | + atomic_inc(&snapshot_device_available); | |
12739 | +snapshotdevice_unavailable: | |
12740 | + if (hibernate_or_resume) | |
12741 | + mutex_unlock(&pm_mutex); | |
12742 | + set_fs(oldfs); | |
12743 | + mutex_unlock(&tuxonice_in_use); | |
12744 | + return -EBUSY; | |
12745 | +} | |
12746 | + | |
12747 | +/* | |
12748 | + * Nosave page tracking. | |
12749 | + * | |
12750 | + * Here rather than in prepare_image because we want to do it once only at the | |
12751 | + * start of a cycle. | |
12752 | + */ | |
12753 | + | |
12754 | +/** | |
12755 | + * mark_nosave_pages - set up our Nosave bitmap | |
12756 | + * | |
12757 | + * Build a bitmap of Nosave pages from the list. The bitmap allows faster | |
12758 | + * use when preparing the image. | |
12759 | + **/ | |
12760 | +static void mark_nosave_pages(void) | |
12761 | +{ | |
12762 | + struct nosave_region *region; | |
12763 | + | |
12764 | + list_for_each_entry(region, &nosave_regions, list) { | |
12765 | + unsigned long pfn; | |
12766 | + | |
12767 | + for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++) | |
12768 | + if (pfn_valid(pfn)) | |
12769 | + SetPageNosave(pfn_to_page(pfn)); | |
12770 | + } | |
12771 | +} | |
12772 | + | |
85eb3c9d | 12773 | +static int toi_alloc_bitmap(struct memory_bitmap **bm) |
2380c486 JR |
12774 | +{ |
12775 | + int result = 0; | |
12776 | + | |
12777 | + *bm = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); | |
12778 | + if (!*bm) { | |
12779 | + printk(KERN_ERR "Failed to kzalloc memory for a bitmap.\n"); | |
12780 | + return -ENOMEM; | |
12781 | + } | |
12782 | + | |
12783 | + result = memory_bm_create(*bm, GFP_KERNEL, 0); | |
12784 | + | |
12785 | + if (result) { | |
12786 | + printk(KERN_ERR "Failed to create a bitmap.\n"); | |
12787 | + kfree(*bm); | |
12788 | + } | |
12789 | + | |
12790 | + return result; | |
12791 | +} | |
12792 | + | |
12793 | +/** | |
12794 | + * allocate_bitmaps - allocate bitmaps used to record page states | |
12795 | + * | |
12796 | + * Allocate the bitmaps we use to record the various TuxOnIce related | |
12797 | + * page states. | |
12798 | + **/ | |
12799 | +static int allocate_bitmaps(void) | |
12800 | +{ | |
85eb3c9d AM |
12801 | + if (toi_alloc_bitmap(&pageset1_map) || |
12802 | + toi_alloc_bitmap(&pageset1_copy_map) || | |
12803 | + toi_alloc_bitmap(&pageset2_map) || | |
12804 | + toi_alloc_bitmap(&io_map) || | |
12805 | + toi_alloc_bitmap(&nosave_map) || | |
12806 | + toi_alloc_bitmap(&free_map) || | |
12807 | + toi_alloc_bitmap(&page_resave_map)) | |
2380c486 JR |
12808 | + return 1; |
12809 | + | |
12810 | + return 0; | |
12811 | +} | |
12812 | + | |
85eb3c9d | 12813 | +static void toi_free_bitmap(struct memory_bitmap **bm) |
2380c486 JR |
12814 | +{ |
12815 | + if (!*bm) | |
12816 | + return; | |
12817 | + | |
12818 | + memory_bm_free(*bm, 0); | |
12819 | + kfree(*bm); | |
12820 | + *bm = NULL; | |
12821 | +} | |
12822 | + | |
12823 | +/** | |
12824 | + * free_bitmaps - free the bitmaps used to record page states | |
12825 | + * | |
12826 | + * Free the bitmaps allocated above. It is not an error to call | |
12827 | + * memory_bm_free on a bitmap that isn't currently allocated. | |
12828 | + **/ | |
12829 | +static void free_bitmaps(void) | |
12830 | +{ | |
85eb3c9d AM |
12831 | + toi_free_bitmap(&pageset1_map); |
12832 | + toi_free_bitmap(&pageset1_copy_map); | |
12833 | + toi_free_bitmap(&pageset2_map); | |
12834 | + toi_free_bitmap(&io_map); | |
12835 | + toi_free_bitmap(&nosave_map); | |
12836 | + toi_free_bitmap(&free_map); | |
12837 | + toi_free_bitmap(&page_resave_map); | |
2380c486 JR |
12838 | +} |
12839 | + | |
12840 | +/** | |
12841 | + * io_MB_per_second - return the number of MB/s read or written | |
12842 | + * @write: Whether to return the speed at which we wrote. | |
12843 | + * | |
12844 | + * Calculate the number of megabytes per second that were read or written. | |
12845 | + **/ | |
12846 | +static int io_MB_per_second(int write) | |
12847 | +{ | |
12848 | + return (toi_bkd.toi_io_time[write][1]) ? | |
12849 | + MB((unsigned long) toi_bkd.toi_io_time[write][0]) * HZ / | |
12850 | + toi_bkd.toi_io_time[write][1] : 0; | |
12851 | +} | |
12852 | + | |
12853 | +#define SNPRINTF(a...) do { len += scnprintf(((char *) buffer) + len, \ | |
12854 | + count - len - 1, ## a); } while (0) | |
12855 | + | |
12856 | +/** | |
12857 | + * get_debug_info - fill a buffer with debugging information | |
12858 | + * @buffer: The buffer to be filled. | |
12859 | + * @count: The size of the buffer, in bytes. | |
12860 | + * | |
12861 | + * Fill a (usually PAGE_SIZEd) buffer with the debugging info that we will | |
12862 | + * either printk or return via sysfs. | |
12863 | + **/ | |
12864 | +static int get_toi_debug_info(const char *buffer, int count) | |
12865 | +{ | |
12866 | + int len = 0, i, first_result = 1; | |
12867 | + | |
12868 | + SNPRINTF("TuxOnIce debugging info:\n"); | |
12869 | + SNPRINTF("- TuxOnIce core : " TOI_CORE_VERSION "\n"); | |
12870 | + SNPRINTF("- Kernel Version : " UTS_RELEASE "\n"); | |
12871 | + SNPRINTF("- Compiler vers. : %d.%d\n", __GNUC__, __GNUC_MINOR__); | |
12872 | + SNPRINTF("- Attempt number : %d\n", nr_hibernates); | |
7e46296a | 12873 | + SNPRINTF("- Parameters : %ld %ld %ld %d %ld %ld\n", |
2380c486 JR |
12874 | + toi_result, |
12875 | + toi_bkd.toi_action, | |
12876 | + toi_bkd.toi_debug_state, | |
12877 | + toi_bkd.toi_default_console_level, | |
12878 | + image_size_limit, | |
12879 | + toi_poweroff_method); | |
12880 | + SNPRINTF("- Overall expected compression percentage: %d.\n", | |
12881 | + 100 - toi_expected_compression_ratio()); | |
12882 | + len += toi_print_module_debug_info(((char *) buffer) + len, | |
12883 | + count - len - 1); | |
12884 | + if (toi_bkd.toi_io_time[0][1]) { | |
12885 | + if ((io_MB_per_second(0) < 5) || (io_MB_per_second(1) < 5)) { | |
12886 | + SNPRINTF("- I/O speed: Write %ld KB/s", | |
12887 | + (KB((unsigned long) toi_bkd.toi_io_time[0][0]) * HZ / | |
12888 | + toi_bkd.toi_io_time[0][1])); | |
12889 | + if (toi_bkd.toi_io_time[1][1]) | |
12890 | + SNPRINTF(", Read %ld KB/s", | |
12891 | + (KB((unsigned long) | |
12892 | + toi_bkd.toi_io_time[1][0]) * HZ / | |
12893 | + toi_bkd.toi_io_time[1][1])); | |
12894 | + } else { | |
12895 | + SNPRINTF("- I/O speed: Write %ld MB/s", | |
12896 | + (MB((unsigned long) toi_bkd.toi_io_time[0][0]) * HZ / | |
12897 | + toi_bkd.toi_io_time[0][1])); | |
12898 | + if (toi_bkd.toi_io_time[1][1]) | |
12899 | + SNPRINTF(", Read %ld MB/s", | |
12900 | + (MB((unsigned long) | |
12901 | + toi_bkd.toi_io_time[1][0]) * HZ / | |
12902 | + toi_bkd.toi_io_time[1][1])); | |
12903 | + } | |
12904 | + SNPRINTF(".\n"); | |
12905 | + } else | |
12906 | + SNPRINTF("- No I/O speed stats available.\n"); | |
92bca44c | 12907 | + SNPRINTF("- Extra pages : %lu used/%lu.\n", |
2380c486 JR |
12908 | + extra_pd1_pages_used, extra_pd1_pages_allowance); |
12909 | + | |
12910 | + for (i = 0; i < TOI_NUM_RESULT_STATES; i++) | |
12911 | + if (test_result_state(i)) { | |
12912 | + SNPRINTF("%s: %s.\n", first_result ? | |
12913 | + "- Result " : | |
12914 | + " ", | |
12915 | + result_strings[i]); | |
12916 | + first_result = 0; | |
12917 | + } | |
12918 | + if (first_result) | |
12919 | + SNPRINTF("- Result : %s.\n", nr_hibernates ? | |
12920 | + "Succeeded" : | |
12921 | + "No hibernation attempts so far"); | |
12922 | + return len; | |
12923 | +} | |
12924 | + | |
12925 | +/** | |
12926 | + * do_cleanup - cleanup after attempting to hibernate or resume | |
12927 | + * @get_debug_info: Whether to allocate and return debugging info. | |
12928 | + * | |
12929 | + * Cleanup after attempting to hibernate or resume, possibly getting | |
12930 | + * debugging info as we do so. | |
12931 | + **/ | |
e999739a | 12932 | +static void do_cleanup(int get_debug_info, int restarting) |
2380c486 JR |
12933 | +{ |
12934 | + int i = 0; | |
12935 | + char *buffer = NULL; | |
12936 | + | |
7e46296a AM |
12937 | + trap_non_toi_io = 0; |
12938 | + | |
2380c486 JR |
12939 | + if (get_debug_info) |
12940 | + toi_prepare_status(DONT_CLEAR_BAR, "Cleaning up..."); | |
12941 | + | |
12942 | + free_checksum_pages(); | |
12943 | + | |
12944 | + if (get_debug_info) | |
12945 | + buffer = (char *) toi_get_zeroed_page(20, TOI_ATOMIC_GFP); | |
12946 | + | |
12947 | + if (buffer) | |
12948 | + i = get_toi_debug_info(buffer, PAGE_SIZE); | |
12949 | + | |
12950 | + toi_free_extra_pagedir_memory(); | |
12951 | + | |
12952 | + pagedir1.size = 0; | |
12953 | + pagedir2.size = 0; | |
12954 | + set_highmem_size(pagedir1, 0); | |
12955 | + set_highmem_size(pagedir2, 0); | |
12956 | + | |
12957 | + if (boot_kernel_data_buffer) { | |
12958 | + if (!test_toi_state(TOI_BOOT_KERNEL)) | |
12959 | + toi_free_page(37, boot_kernel_data_buffer); | |
12960 | + boot_kernel_data_buffer = 0; | |
12961 | + } | |
12962 | + | |
12963 | + clear_toi_state(TOI_BOOT_KERNEL); | |
12964 | + thaw_processes(); | |
12965 | + | |
2380c486 JR |
12966 | + if (test_action_state(TOI_KEEP_IMAGE) && |
12967 | + !test_result_state(TOI_ABORTED)) { | |
12968 | + toi_message(TOI_ANY_SECTION, TOI_LOW, 1, | |
12969 | + "TuxOnIce: Not invalidating the image due " | |
7e46296a | 12970 | + "to Keep Image being enabled."); |
2380c486 JR |
12971 | + set_result_state(TOI_KEPT_IMAGE); |
12972 | + } else | |
2380c486 JR |
12973 | + if (toiActiveAllocator) |
12974 | + toiActiveAllocator->remove_image(); | |
12975 | + | |
12976 | + free_bitmaps(); | |
12977 | + usermodehelper_enable(); | |
12978 | + | |
12979 | + if (test_toi_state(TOI_NOTIFIERS_PREPARE)) { | |
12980 | + pm_notifier_call_chain(PM_POST_HIBERNATION); | |
12981 | + clear_toi_state(TOI_NOTIFIERS_PREPARE); | |
12982 | + } | |
12983 | + | |
12984 | + if (buffer && i) { | |
12985 | + /* Printk can only handle 1023 bytes, including | |
12986 | + * its level mangling. */ | |
12987 | + for (i = 0; i < 3; i++) | |
9474138d | 12988 | + printk(KERN_ERR "%s", buffer + (1023 * i)); |
2380c486 JR |
12989 | + toi_free_page(20, (unsigned long) buffer); |
12990 | + } | |
12991 | + | |
12992 | + if (!test_action_state(TOI_LATE_CPU_HOTPLUG)) | |
12993 | + enable_nonboot_cpus(); | |
e999739a | 12994 | + |
12995 | + if (!restarting) | |
12996 | + toi_cleanup_console(); | |
2380c486 JR |
12997 | + |
12998 | + free_attention_list(); | |
12999 | + | |
e999739a | 13000 | + if (!restarting) |
13001 | + toi_deactivate_storage(0); | |
2380c486 JR |
13002 | + |
13003 | + clear_toi_state(TOI_IGNORE_LOGLEVEL); | |
13004 | + clear_toi_state(TOI_TRYING_TO_RESUME); | |
13005 | + clear_toi_state(TOI_NOW_RESUMING); | |
13006 | +} | |
13007 | + | |
13008 | +/** | |
13009 | + * check_still_keeping_image - we kept an image; check whether to reuse it. | |
13010 | + * | |
13011 | + * We enter this routine when we have kept an image. If the user has said they | |
13012 | + * want to still keep it, all we need to do is powerdown. If powering down | |
13013 | + * means hibernating to ram and the power doesn't run out, we'll return 1. | |
13014 | + * If we do power off properly or the battery runs out, we'll resume via the | |
13015 | + * normal paths. | |
13016 | + * | |
13017 | + * If the user has said they want to remove the previously kept image, we | |
13018 | + * remove it, and return 0. We'll then store a new image. | |
13019 | + **/ | |
13020 | +static int check_still_keeping_image(void) | |
13021 | +{ | |
13022 | + if (test_action_state(TOI_KEEP_IMAGE)) { | |
e999739a | 13023 | + printk(KERN_INFO "Image already stored: powering down " |
13024 | + "immediately."); | |
2380c486 JR |
13025 | + do_toi_step(STEP_HIBERNATE_POWERDOWN); |
13026 | + return 1; /* Just in case we're using S3 */ | |
13027 | + } | |
13028 | + | |
e999739a | 13029 | + printk(KERN_INFO "Invalidating previous image.\n"); |
2380c486 JR |
13030 | + toiActiveAllocator->remove_image(); |
13031 | + | |
13032 | + return 0; | |
13033 | +} | |
13034 | + | |
13035 | +/** | |
13036 | + * toi_init - prepare to hibernate to disk | |
13037 | + * | |
13038 | + * Initialise variables & data structures, in preparation for | |
13039 | + * hibernating to disk. | |
13040 | + **/ | |
e999739a | 13041 | +static int toi_init(int restarting) |
2380c486 JR |
13042 | +{ |
13043 | + int result, i, j; | |
13044 | + | |
13045 | + toi_result = 0; | |
13046 | + | |
13047 | + printk(KERN_INFO "Initiating a hibernation cycle.\n"); | |
13048 | + | |
13049 | + nr_hibernates++; | |
13050 | + | |
13051 | + for (i = 0; i < 2; i++) | |
13052 | + for (j = 0; j < 2; j++) | |
13053 | + toi_bkd.toi_io_time[i][j] = 0; | |
13054 | + | |
13055 | + if (!test_toi_state(TOI_CAN_HIBERNATE) || | |
13056 | + allocate_bitmaps()) | |
13057 | + return 1; | |
13058 | + | |
13059 | + mark_nosave_pages(); | |
13060 | + | |
e999739a | 13061 | + if (!restarting) |
13062 | + toi_prepare_console(); | |
2380c486 JR |
13063 | + |
13064 | + result = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); | |
13065 | + if (result) { | |
13066 | + set_result_state(TOI_NOTIFIERS_PREPARE_FAILED); | |
13067 | + return 1; | |
13068 | + } | |
13069 | + set_toi_state(TOI_NOTIFIERS_PREPARE); | |
13070 | + | |
13071 | + result = usermodehelper_disable(); | |
13072 | + if (result) { | |
13073 | + printk(KERN_ERR "TuxOnIce: Failed to disable usermode " | |
13074 | + "helpers\n"); | |
13075 | + set_result_state(TOI_USERMODE_HELPERS_ERR); | |
13076 | + return 1; | |
13077 | + } | |
13078 | + | |
13079 | + boot_kernel_data_buffer = toi_get_zeroed_page(37, TOI_ATOMIC_GFP); | |
13080 | + if (!boot_kernel_data_buffer) { | |
13081 | + printk(KERN_ERR "TuxOnIce: Failed to allocate " | |
13082 | + "boot_kernel_data_buffer.\n"); | |
13083 | + set_result_state(TOI_OUT_OF_MEMORY); | |
13084 | + return 1; | |
13085 | + } | |
13086 | + | |
13087 | + if (test_action_state(TOI_LATE_CPU_HOTPLUG) || | |
13088 | + !disable_nonboot_cpus()) | |
13089 | + return 1; | |
13090 | + | |
13091 | + set_abort_result(TOI_CPU_HOTPLUG_FAILED); | |
13092 | + return 0; | |
13093 | +} | |
13094 | + | |
13095 | +/** | |
13096 | + * can_hibernate - perform basic 'Can we hibernate?' tests | |
13097 | + * | |
13098 | + * Perform basic tests that must pass if we're going to be able to hibernate: | |
13099 | + * Can we get the pm_mutex? Is resume= valid (we need to know where to write | |
13100 | + * the image header). | |
13101 | + **/ | |
13102 | +static int can_hibernate(void) | |
13103 | +{ | |
13104 | + if (!test_toi_state(TOI_CAN_HIBERNATE)) | |
13105 | + toi_attempt_to_parse_resume_device(0); | |
13106 | + | |
13107 | + if (!test_toi_state(TOI_CAN_HIBERNATE)) { | |
13108 | + printk(KERN_INFO "TuxOnIce: Hibernation is disabled.\n" | |
13109 | + "This may be because you haven't put something along " | |
13110 | + "the lines of\n\nresume=swap:/dev/hda1\n\n" | |
13111 | + "in lilo.conf or equivalent. (Where /dev/hda1 is your " | |
13112 | + "swap partition).\n"); | |
13113 | + set_abort_result(TOI_CANT_SUSPEND); | |
13114 | + return 0; | |
13115 | + } | |
13116 | + | |
13117 | + if (strlen(alt_resume_param)) { | |
13118 | + attempt_to_parse_alt_resume_param(); | |
13119 | + | |
13120 | + if (!strlen(alt_resume_param)) { | |
13121 | + printk(KERN_INFO "Alternate resume parameter now " | |
13122 | + "invalid. Aborting.\n"); | |
13123 | + set_abort_result(TOI_CANT_USE_ALT_RESUME); | |
13124 | + return 0; | |
13125 | + } | |
13126 | + } | |
13127 | + | |
13128 | + return 1; | |
13129 | +} | |
13130 | + | |
13131 | +/** | |
13132 | + * do_post_image_write - having written an image, figure out what to do next | |
13133 | + * | |
13134 | + * After writing an image, we might load an alternate image or power down. | |
13135 | + * Powering down might involve hibernating to ram, in which case we also | |
13136 | + * need to handle reloading pageset2. | |
13137 | + **/ | |
13138 | +static int do_post_image_write(void) | |
13139 | +{ | |
13140 | + /* If switching images fails, do normal powerdown */ | |
13141 | + if (alt_resume_param[0]) | |
13142 | + do_toi_step(STEP_RESUME_ALT_IMAGE); | |
13143 | + | |
13144 | + toi_power_down(); | |
13145 | + | |
13146 | + barrier(); | |
13147 | + mb(); | |
13148 | + return 0; | |
13149 | +} | |
13150 | + | |
13151 | +/** | |
13152 | + * __save_image - do the hard work of saving the image | |
13153 | + * | |
13154 | + * High level routine for getting the image saved. The key assumptions made | |
13155 | + * are that processes have been frozen and sufficient memory is available. | |
13156 | + * | |
13157 | + * We also exit through here at resume time, coming back from toi_hibernate | |
13158 | + * after the atomic restore. This is the reason for the toi_in_hibernate | |
13159 | + * test. | |
13160 | + **/ | |
13161 | +static int __save_image(void) | |
13162 | +{ | |
13163 | + int temp_result, did_copy = 0; | |
13164 | + | |
13165 | + toi_prepare_status(DONT_CLEAR_BAR, "Starting to save the image.."); | |
13166 | + | |
13167 | + toi_message(TOI_ANY_SECTION, TOI_LOW, 1, | |
7e46296a | 13168 | + " - Final values: %d and %d.", |
2380c486 JR |
13169 | + pagedir1.size, pagedir2.size); |
13170 | + | |
13171 | + toi_cond_pause(1, "About to write pagedir2."); | |
13172 | + | |
13173 | + temp_result = write_pageset(&pagedir2); | |
13174 | + | |
13175 | + if (temp_result == -1 || test_result_state(TOI_ABORTED)) | |
13176 | + return 1; | |
13177 | + | |
13178 | + toi_cond_pause(1, "About to copy pageset 1."); | |
13179 | + | |
13180 | + if (test_result_state(TOI_ABORTED)) | |
13181 | + return 1; | |
13182 | + | |
13183 | + toi_deactivate_storage(1); | |
13184 | + | |
13185 | + toi_prepare_status(DONT_CLEAR_BAR, "Doing atomic copy/restore."); | |
13186 | + | |
13187 | + toi_in_hibernate = 1; | |
13188 | + | |
13189 | + if (toi_go_atomic(PMSG_FREEZE, 1)) | |
13190 | + goto Failed; | |
13191 | + | |
13192 | + temp_result = toi_hibernate(); | |
13193 | + if (!temp_result) | |
13194 | + did_copy = 1; | |
13195 | + | |
13196 | + /* We return here at resume time too! */ | |
13197 | + toi_end_atomic(ATOMIC_ALL_STEPS, toi_in_hibernate, temp_result); | |
13198 | + | |
13199 | +Failed: | |
13200 | + if (toi_activate_storage(1)) | |
13201 | + panic("Failed to reactivate our storage."); | |
13202 | + | |
13203 | + /* Resume time? */ | |
13204 | + if (!toi_in_hibernate) { | |
13205 | + copyback_post(); | |
13206 | + return 0; | |
13207 | + } | |
13208 | + | |
13209 | + /* Nope. Hibernating. So, see if we can save the image... */ | |
13210 | + | |
13211 | + if (temp_result || test_result_state(TOI_ABORTED)) { | |
13212 | + if (did_copy) | |
13213 | + goto abort_reloading_pagedir_two; | |
13214 | + else | |
13215 | + return 1; | |
13216 | + } | |
13217 | + | |
13218 | + toi_update_status(pagedir2.size, pagedir1.size + pagedir2.size, | |
13219 | + NULL); | |
13220 | + | |
13221 | + if (test_result_state(TOI_ABORTED)) | |
13222 | + goto abort_reloading_pagedir_two; | |
13223 | + | |
13224 | + toi_cond_pause(1, "About to write pageset1."); | |
13225 | + | |
7e46296a | 13226 | + toi_message(TOI_ANY_SECTION, TOI_LOW, 1, "-- Writing pageset1"); |
2380c486 JR |
13227 | + |
13228 | + temp_result = write_pageset(&pagedir1); | |
13229 | + | |
13230 | + /* We didn't overwrite any memory, so no reread needs to be done. */ | |
85eb3c9d AM |
13231 | + if (test_action_state(TOI_TEST_FILTER_SPEED) || |
13232 | + test_action_state(TOI_TEST_BIO)) | |
2380c486 JR |
13233 | + return 1; |
13234 | + | |
13235 | + if (temp_result == 1 || test_result_state(TOI_ABORTED)) | |
13236 | + goto abort_reloading_pagedir_two; | |
13237 | + | |
13238 | + toi_cond_pause(1, "About to write header."); | |
13239 | + | |
13240 | + if (test_result_state(TOI_ABORTED)) | |
13241 | + goto abort_reloading_pagedir_two; | |
13242 | + | |
13243 | + temp_result = write_image_header(); | |
13244 | + | |
2380c486 JR |
13245 | + if (!temp_result && !test_result_state(TOI_ABORTED)) |
13246 | + return 0; | |
13247 | + | |
13248 | +abort_reloading_pagedir_two: | |
13249 | + temp_result = read_pageset2(1); | |
13250 | + | |
13251 | + /* If that failed, we're sunk. Panic! */ | |
13252 | + if (temp_result) | |
13253 | + panic("Attempt to reload pagedir 2 while aborting " | |
13254 | + "a hibernate failed."); | |
13255 | + | |
13256 | + return 1; | |
13257 | +} | |
13258 | + | |
13259 | +static void map_ps2_pages(int enable) | |
13260 | +{ | |
13261 | + unsigned long pfn = 0; | |
13262 | + | |
13263 | + pfn = memory_bm_next_pfn(pageset2_map); | |
13264 | + | |
13265 | + while (pfn != BM_END_OF_MAP) { | |
13266 | + struct page *page = pfn_to_page(pfn); | |
13267 | + kernel_map_pages(page, 1, enable); | |
13268 | + pfn = memory_bm_next_pfn(pageset2_map); | |
13269 | + } | |
13270 | +} | |
13271 | + | |
13272 | +/** | |
13273 | + * do_save_image - save the image and handle the result | |
13274 | + * | |
13275 | + * Save the prepared image. If we fail or we're in the path returning | |
13276 | + * from the atomic restore, cleanup. | |
13277 | + **/ | |
13278 | +static int do_save_image(void) | |
13279 | +{ | |
13280 | + int result; | |
13281 | + map_ps2_pages(0); | |
13282 | + result = __save_image(); | |
13283 | + map_ps2_pages(1); | |
13284 | + return result; | |
13285 | +} | |
13286 | + | |
13287 | +/** | |
13288 | + * do_prepare_image - try to prepare an image | |
13289 | + * | |
13290 | + * Seek to initialise and prepare an image to be saved. On failure, | |
13291 | + * cleanup. | |
13292 | + **/ | |
13293 | +static int do_prepare_image(void) | |
13294 | +{ | |
e999739a | 13295 | + int restarting = test_result_state(TOI_EXTRA_PAGES_ALLOW_TOO_SMALL); |
13296 | + | |
13297 | + if (!restarting && toi_activate_storage(0)) | |
2380c486 JR |
13298 | + return 1; |
13299 | + | |
13300 | + /* | |
13301 | + * If kept image and still keeping image and hibernating to RAM, we will | |
13302 | + * return 1 after hibernating and resuming (provided the power doesn't | |
13303 | + * run out. In that case, we skip directly to cleaning up and exiting. | |
13304 | + */ | |
13305 | + | |
13306 | + if (!can_hibernate() || | |
13307 | + (test_result_state(TOI_KEPT_IMAGE) && | |
13308 | + check_still_keeping_image())) | |
13309 | + return 1; | |
13310 | + | |
e999739a | 13311 | + if (toi_init(restarting) && !toi_prepare_image() && |
2380c486 JR |
13312 | + !test_result_state(TOI_ABORTED)) |
13313 | + return 0; | |
13314 | + | |
7e46296a AM |
13315 | + trap_non_toi_io = 1; |
13316 | + | |
2380c486 JR |
13317 | + return 1; |
13318 | +} | |
13319 | + | |
13320 | +/** | |
13321 | + * do_check_can_resume - find out whether an image has been stored | |
13322 | + * | |
13323 | + * Read whether an image exists. We use the same routine as the | |
13324 | + * image_exists sysfs entry, and just look to see whether the | |
13325 | + * first character in the resulting buffer is a '1'. | |
13326 | + **/ | |
13327 | +int do_check_can_resume(void) | |
13328 | +{ | |
7e46296a | 13329 | + int result = -1; |
2380c486 | 13330 | + |
7e46296a AM |
13331 | + if (toi_activate_storage(0)) |
13332 | + return -1; | |
2380c486 | 13333 | + |
7e46296a AM |
13334 | + if (!test_toi_state(TOI_RESUME_DEVICE_OK)) |
13335 | + toi_attempt_to_parse_resume_device(1); | |
2380c486 | 13336 | + |
7e46296a AM |
13337 | + if (toiActiveAllocator) |
13338 | + result = toiActiveAllocator->image_exists(1); | |
2380c486 | 13339 | + |
7e46296a | 13340 | + toi_deactivate_storage(0); |
2380c486 JR |
13341 | + return result; |
13342 | +} | |
13343 | +EXPORT_SYMBOL_GPL(do_check_can_resume); | |
13344 | + | |
13345 | +/** | |
13346 | + * do_load_atomic_copy - load the first part of an image, if it exists | |
13347 | + * | |
13348 | + * Check whether we have an image. If one exists, do sanity checking | |
13349 | + * (possibly invalidating the image or even rebooting if the user | |
13350 | + * requests that) before loading it into memory in preparation for the | |
13351 | + * atomic restore. | |
13352 | + * | |
13353 | + * If and only if we have an image loaded and ready to restore, we return 1. | |
13354 | + **/ | |
13355 | +static int do_load_atomic_copy(void) | |
13356 | +{ | |
13357 | + int read_image_result = 0; | |
13358 | + | |
13359 | + if (sizeof(swp_entry_t) != sizeof(long)) { | |
13360 | + printk(KERN_WARNING "TuxOnIce: The size of swp_entry_t != size" | |
13361 | + " of long. Please report this!\n"); | |
13362 | + return 1; | |
13363 | + } | |
13364 | + | |
13365 | + if (!resume_file[0]) | |
13366 | + printk(KERN_WARNING "TuxOnIce: " | |
13367 | + "You need to use a resume= command line parameter to " | |
13368 | + "tell TuxOnIce where to look for an image.\n"); | |
13369 | + | |
13370 | + toi_activate_storage(0); | |
13371 | + | |
13372 | + if (!(test_toi_state(TOI_RESUME_DEVICE_OK)) && | |
13373 | + !toi_attempt_to_parse_resume_device(0)) { | |
13374 | + /* | |
13375 | + * Without a usable storage device we can do nothing - | |
13376 | + * even if noresume is given | |
13377 | + */ | |
13378 | + | |
13379 | + if (!toiNumAllocators) | |
13380 | + printk(KERN_ALERT "TuxOnIce: " | |
13381 | + "No storage allocators have been registered.\n"); | |
13382 | + else | |
13383 | + printk(KERN_ALERT "TuxOnIce: " | |
13384 | + "Missing or invalid storage location " | |
13385 | + "(resume= parameter). Please correct and " | |
13386 | + "rerun lilo (or equivalent) before " | |
13387 | + "hibernating.\n"); | |
13388 | + toi_deactivate_storage(0); | |
13389 | + return 1; | |
13390 | + } | |
13391 | + | |
13392 | + if (allocate_bitmaps()) | |
13393 | + return 1; | |
13394 | + | |
13395 | + read_image_result = read_pageset1(); /* non fatal error ignored */ | |
13396 | + | |
13397 | + if (test_toi_state(TOI_NORESUME_SPECIFIED)) | |
13398 | + clear_toi_state(TOI_NORESUME_SPECIFIED); | |
13399 | + | |
13400 | + toi_deactivate_storage(0); | |
13401 | + | |
13402 | + if (read_image_result) | |
13403 | + return 1; | |
13404 | + | |
13405 | + return 0; | |
13406 | +} | |
13407 | + | |
13408 | +/** | |
13409 | + * prepare_restore_load_alt_image - save & restore alt image variables | |
13410 | + * | |
13411 | + * Save and restore the pageset1 maps, when loading an alternate image. | |
13412 | + **/ | |
13413 | +static void prepare_restore_load_alt_image(int prepare) | |
13414 | +{ | |
13415 | + static struct memory_bitmap *pageset1_map_save, *pageset1_copy_map_save; | |
13416 | + | |
13417 | + if (prepare) { | |
13418 | + pageset1_map_save = pageset1_map; | |
13419 | + pageset1_map = NULL; | |
13420 | + pageset1_copy_map_save = pageset1_copy_map; | |
13421 | + pageset1_copy_map = NULL; | |
13422 | + set_toi_state(TOI_LOADING_ALT_IMAGE); | |
13423 | + toi_reset_alt_image_pageset2_pfn(); | |
13424 | + } else { | |
13425 | + memory_bm_free(pageset1_map, 0); | |
13426 | + pageset1_map = pageset1_map_save; | |
13427 | + memory_bm_free(pageset1_copy_map, 0); | |
13428 | + pageset1_copy_map = pageset1_copy_map_save; | |
13429 | + clear_toi_state(TOI_NOW_RESUMING); | |
13430 | + clear_toi_state(TOI_LOADING_ALT_IMAGE); | |
13431 | + } | |
13432 | +} | |
13433 | + | |
13434 | +/** | |
13435 | + * do_toi_step - perform a step in hibernating or resuming | |
13436 | + * | |
13437 | + * Perform a step in hibernating or resuming an image. This abstraction | |
13438 | + * is in preparation for implementing cluster support, and perhaps replacing | |
13439 | + * uswsusp too (haven't looked whether that's possible yet). | |
13440 | + **/ | |
13441 | +int do_toi_step(int step) | |
13442 | +{ | |
13443 | + switch (step) { | |
13444 | + case STEP_HIBERNATE_PREPARE_IMAGE: | |
13445 | + return do_prepare_image(); | |
13446 | + case STEP_HIBERNATE_SAVE_IMAGE: | |
13447 | + return do_save_image(); | |
13448 | + case STEP_HIBERNATE_POWERDOWN: | |
13449 | + return do_post_image_write(); | |
13450 | + case STEP_RESUME_CAN_RESUME: | |
13451 | + return do_check_can_resume(); | |
13452 | + case STEP_RESUME_LOAD_PS1: | |
13453 | + return do_load_atomic_copy(); | |
13454 | + case STEP_RESUME_DO_RESTORE: | |
13455 | + /* | |
13456 | + * If we succeed, this doesn't return. | |
13457 | + * Instead, we return from do_save_image() in the | |
13458 | + * hibernated kernel. | |
13459 | + */ | |
13460 | + return toi_atomic_restore(); | |
13461 | + case STEP_RESUME_ALT_IMAGE: | |
13462 | + printk(KERN_INFO "Trying to resume alternate image.\n"); | |
13463 | + toi_in_hibernate = 0; | |
13464 | + save_restore_alt_param(SAVE, NOQUIET); | |
13465 | + prepare_restore_load_alt_image(1); | |
13466 | + if (!do_check_can_resume()) { | |
13467 | + printk(KERN_INFO "Nothing to resume from.\n"); | |
13468 | + goto out; | |
13469 | + } | |
13470 | + if (!do_load_atomic_copy()) | |
13471 | + toi_atomic_restore(); | |
13472 | + | |
13473 | + printk(KERN_INFO "Failed to load image.\n"); | |
13474 | +out: | |
13475 | + prepare_restore_load_alt_image(0); | |
13476 | + save_restore_alt_param(RESTORE, NOQUIET); | |
13477 | + break; | |
13478 | + case STEP_CLEANUP: | |
e999739a | 13479 | + do_cleanup(1, 0); |
2380c486 JR |
13480 | + break; |
13481 | + case STEP_QUIET_CLEANUP: | |
e999739a | 13482 | + do_cleanup(0, 0); |
2380c486 JR |
13483 | + break; |
13484 | + } | |
13485 | + | |
13486 | + return 0; | |
13487 | +} | |
13488 | +EXPORT_SYMBOL_GPL(do_toi_step); | |
13489 | + | |
13490 | +/* -- Functions for kickstarting a hibernate or resume --- */ | |
13491 | + | |
13492 | +/** | |
9474138d | 13493 | + * toi_try_resume - try to do the steps in resuming |
2380c486 JR |
13494 | + * |
13495 | + * Check if we have an image and if so try to resume. Clear the status | |
13496 | + * flags too. | |
13497 | + **/ | |
9474138d | 13498 | +void toi_try_resume(void) |
2380c486 JR |
13499 | +{ |
13500 | + set_toi_state(TOI_TRYING_TO_RESUME); | |
13501 | + resume_attempted = 1; | |
13502 | + | |
13503 | + current->flags |= PF_MEMALLOC; | |
13504 | + | |
13505 | + if (do_toi_step(STEP_RESUME_CAN_RESUME) && | |
13506 | + !do_toi_step(STEP_RESUME_LOAD_PS1)) | |
13507 | + do_toi_step(STEP_RESUME_DO_RESTORE); | |
13508 | + | |
e999739a | 13509 | + do_cleanup(0, 0); |
2380c486 JR |
13510 | + |
13511 | + current->flags &= ~PF_MEMALLOC; | |
13512 | + | |
13513 | + clear_toi_state(TOI_IGNORE_LOGLEVEL); | |
13514 | + clear_toi_state(TOI_TRYING_TO_RESUME); | |
13515 | + clear_toi_state(TOI_NOW_RESUMING); | |
13516 | +} | |
13517 | + | |
13518 | +/** | |
9474138d | 13519 | + * toi_sys_power_disk_try_resume - wrapper calling toi_try_resume |
2380c486 | 13520 | + * |
9474138d | 13521 | + * Wrapper for when __toi_try_resume is called from swsusp resume path, |
2380c486 JR |
13522 | + * rather than from echo > /sys/power/tuxonice/do_resume. |
13523 | + **/ | |
9474138d | 13524 | +static void toi_sys_power_disk_try_resume(void) |
2380c486 JR |
13525 | +{ |
13526 | + resume_attempted = 1; | |
13527 | + | |
13528 | + /* | |
13529 | + * There's a comment in kernel/power/disk.c that indicates | |
13530 | + * we should be able to use mutex_lock_nested below. That | |
13531 | + * doesn't seem to cut it, though, so let's just turn lockdep | |
13532 | + * off for now. | |
13533 | + */ | |
13534 | + lockdep_off(); | |
13535 | + | |
13536 | + if (toi_start_anything(SYSFS_RESUMING)) | |
13537 | + goto out; | |
13538 | + | |
9474138d | 13539 | + toi_try_resume(); |
2380c486 JR |
13540 | + |
13541 | + /* | |
13542 | + * For initramfs, we have to clear the boot time | |
13543 | + * flag after trying to resume | |
13544 | + */ | |
13545 | + clear_toi_state(TOI_BOOT_TIME); | |
13546 | + | |
13547 | + toi_finish_anything(SYSFS_RESUMING); | |
13548 | +out: | |
13549 | + lockdep_on(); | |
13550 | +} | |
13551 | + | |
13552 | +/** | |
9474138d | 13553 | + * toi_try_hibernate - try to start a hibernation cycle |
2380c486 JR |
13554 | + * |
13555 | + * Start a hibernation cycle, coming in from either | |
13556 | + * echo > /sys/power/tuxonice/do_suspend | |
13557 | + * | |
13558 | + * or | |
13559 | + * | |
13560 | + * echo disk > /sys/power/state | |
13561 | + * | |
13562 | + * In the later case, we come in without pm_sem taken; in the | |
13563 | + * former, it has been taken. | |
13564 | + **/ | |
9474138d | 13565 | +int toi_try_hibernate(void) |
2380c486 JR |
13566 | +{ |
13567 | + int result = 0, sys_power_disk = 0, retries = 0; | |
13568 | + | |
13569 | + if (!mutex_is_locked(&tuxonice_in_use)) { | |
13570 | + /* Came in via /sys/power/disk */ | |
13571 | + if (toi_start_anything(SYSFS_HIBERNATING)) | |
13572 | + return -EBUSY; | |
13573 | + sys_power_disk = 1; | |
13574 | + } | |
13575 | + | |
13576 | + current->flags |= PF_MEMALLOC; | |
13577 | + | |
13578 | + if (test_toi_state(TOI_CLUSTER_MODE)) { | |
13579 | + toi_initiate_cluster_hibernate(); | |
13580 | + goto out; | |
13581 | + } | |
13582 | + | |
13583 | +prepare: | |
13584 | + result = do_toi_step(STEP_HIBERNATE_PREPARE_IMAGE); | |
13585 | + | |
13586 | + if (result || test_action_state(TOI_FREEZER_TEST)) | |
13587 | + goto out; | |
13588 | + | |
13589 | + result = do_toi_step(STEP_HIBERNATE_SAVE_IMAGE); | |
13590 | + | |
13591 | + if (test_result_state(TOI_EXTRA_PAGES_ALLOW_TOO_SMALL)) { | |
13592 | + if (retries < 2) { | |
e999739a | 13593 | + do_cleanup(0, 1); |
2380c486 | 13594 | + retries++; |
e999739a | 13595 | + clear_result_state(TOI_ABORTED); |
2380c486 JR |
13596 | + extra_pd1_pages_allowance = extra_pd1_pages_used + 500; |
13597 | + printk(KERN_INFO "Automatically adjusting the extra" | |
13598 | + " pages allowance to %ld and restarting.\n", | |
13599 | + extra_pd1_pages_allowance); | |
13600 | + goto prepare; | |
13601 | + } | |
13602 | + | |
13603 | + printk(KERN_INFO "Adjusted extra pages allowance twice and " | |
13604 | + "still couldn't hibernate successfully. Giving up."); | |
13605 | + } | |
13606 | + | |
13607 | + /* This code runs at resume time too! */ | |
13608 | + if (!result && toi_in_hibernate) | |
13609 | + result = do_toi_step(STEP_HIBERNATE_POWERDOWN); | |
13610 | +out: | |
e999739a | 13611 | + do_cleanup(1, 0); |
2380c486 JR |
13612 | + current->flags &= ~PF_MEMALLOC; |
13613 | + | |
13614 | + if (sys_power_disk) | |
13615 | + toi_finish_anything(SYSFS_HIBERNATING); | |
13616 | + | |
13617 | + return result; | |
13618 | +} | |
13619 | + | |
13620 | +/* | |
13621 | + * channel_no: If !0, -c <channel_no> is added to args (userui). | |
13622 | + */ | |
13623 | +int toi_launch_userspace_program(char *command, int channel_no, | |
13624 | + enum umh_wait wait, int debug) | |
13625 | +{ | |
13626 | + int retval; | |
13627 | + static char *envp[] = { | |
13628 | + "HOME=/", | |
13629 | + "TERM=linux", | |
13630 | + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", | |
13631 | + NULL }; | |
5dd10c98 AM |
13632 | + static char *argv[] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL |
13633 | + }; | |
2380c486 JR |
13634 | + char *channel = NULL; |
13635 | + int arg = 0, size; | |
13636 | + char test_read[255]; | |
13637 | + char *orig_posn = command; | |
13638 | + | |
13639 | + if (!strlen(orig_posn)) | |
13640 | + return 1; | |
13641 | + | |
13642 | + if (channel_no) { | |
13643 | + channel = toi_kzalloc(4, 6, GFP_KERNEL); | |
13644 | + if (!channel) { | |
13645 | + printk(KERN_INFO "Failed to allocate memory in " | |
13646 | + "preparing to launch userspace program.\n"); | |
13647 | + return 1; | |
13648 | + } | |
13649 | + } | |
13650 | + | |
13651 | + /* Up to 6 args supported */ | |
13652 | + while (arg < 6) { | |
13653 | + sscanf(orig_posn, "%s", test_read); | |
13654 | + size = strlen(test_read); | |
13655 | + if (!(size)) | |
13656 | + break; | |
13657 | + argv[arg] = toi_kzalloc(5, size + 1, TOI_ATOMIC_GFP); | |
13658 | + strcpy(argv[arg], test_read); | |
13659 | + orig_posn += size + 1; | |
13660 | + *test_read = 0; | |
13661 | + arg++; | |
13662 | + } | |
13663 | + | |
13664 | + if (channel_no) { | |
13665 | + sprintf(channel, "-c%d", channel_no); | |
13666 | + argv[arg] = channel; | |
13667 | + } else | |
13668 | + arg--; | |
13669 | + | |
13670 | + if (debug) { | |
13671 | + argv[++arg] = toi_kzalloc(5, 8, TOI_ATOMIC_GFP); | |
13672 | + strcpy(argv[arg], "--debug"); | |
13673 | + } | |
13674 | + | |
13675 | + retval = call_usermodehelper(argv[0], argv, envp, wait); | |
13676 | + | |
13677 | + /* | |
13678 | + * If the program reports an error, retval = 256. Don't complain | |
13679 | + * about that here. | |
13680 | + */ | |
13681 | + if (retval && retval != 256) | |
e999739a | 13682 | + printk(KERN_ERR "Failed to launch userspace program '%s': " |
13683 | + "Error %d\n", command, retval); | |
2380c486 JR |
13684 | + |
13685 | + { | |
13686 | + int i; | |
13687 | + for (i = 0; i < arg; i++) | |
13688 | + if (argv[i] && argv[i] != channel) | |
7e46296a | 13689 | + toi_kfree(5, argv[i], sizeof(*argv[i])); |
2380c486 JR |
13690 | + } |
13691 | + | |
9474138d | 13692 | + toi_kfree(4, channel, sizeof(*channel)); |
2380c486 JR |
13693 | + |
13694 | + return retval; | |
13695 | +} | |
13696 | + | |
13697 | +/* | |
13698 | + * This array contains entries that are automatically registered at | |
13699 | + * boot. Modules and the console code register their own entries separately. | |
13700 | + */ | |
13701 | +static struct toi_sysfs_data sysfs_params[] = { | |
92bca44c | 13702 | + SYSFS_INT("freezer_sync", SYSFS_RW, &freezer_sync, 0, 1, 0, NULL), |
2380c486 JR |
13703 | + SYSFS_LONG("extra_pages_allowance", SYSFS_RW, |
13704 | + &extra_pd1_pages_allowance, 0, LONG_MAX, 0), | |
13705 | + SYSFS_CUSTOM("image_exists", SYSFS_RW, image_exists_read, | |
13706 | + image_exists_write, SYSFS_NEEDS_SM_FOR_BOTH, NULL), | |
13707 | + SYSFS_STRING("resume", SYSFS_RW, resume_file, 255, | |
13708 | + SYSFS_NEEDS_SM_FOR_WRITE, | |
13709 | + attempt_to_parse_resume_device2), | |
13710 | + SYSFS_STRING("alt_resume_param", SYSFS_RW, alt_resume_param, 255, | |
13711 | + SYSFS_NEEDS_SM_FOR_WRITE, | |
13712 | + attempt_to_parse_alt_resume_param), | |
13713 | + SYSFS_CUSTOM("debug_info", SYSFS_READONLY, get_toi_debug_info, NULL, 0, | |
13714 | + NULL), | |
13715 | + SYSFS_BIT("ignore_rootfs", SYSFS_RW, &toi_bkd.toi_action, | |
13716 | + TOI_IGNORE_ROOTFS, 0), | |
7e46296a AM |
13717 | + SYSFS_LONG("image_size_limit", SYSFS_RW, &image_size_limit, -2, |
13718 | + INT_MAX, 0), | |
2380c486 JR |
13719 | + SYSFS_UL("last_result", SYSFS_RW, &toi_result, 0, 0, 0), |
13720 | + SYSFS_BIT("no_multithreaded_io", SYSFS_RW, &toi_bkd.toi_action, | |
13721 | + TOI_NO_MULTITHREADED_IO, 0), | |
13722 | + SYSFS_BIT("no_flusher_thread", SYSFS_RW, &toi_bkd.toi_action, | |
13723 | + TOI_NO_FLUSHER_THREAD, 0), | |
13724 | + SYSFS_BIT("full_pageset2", SYSFS_RW, &toi_bkd.toi_action, | |
13725 | + TOI_PAGESET2_FULL, 0), | |
13726 | + SYSFS_BIT("reboot", SYSFS_RW, &toi_bkd.toi_action, TOI_REBOOT, 0), | |
13727 | + SYSFS_BIT("replace_swsusp", SYSFS_RW, &toi_bkd.toi_action, | |
13728 | + TOI_REPLACE_SWSUSP, 0), | |
13729 | + SYSFS_STRING("resume_commandline", SYSFS_RW, | |
13730 | + toi_bkd.toi_nosave_commandline, COMMAND_LINE_SIZE, 0, | |
13731 | + NULL), | |
13732 | + SYSFS_STRING("version", SYSFS_READONLY, TOI_CORE_VERSION, 0, 0, NULL), | |
2380c486 JR |
13733 | + SYSFS_BIT("freezer_test", SYSFS_RW, &toi_bkd.toi_action, |
13734 | + TOI_FREEZER_TEST, 0), | |
13735 | + SYSFS_BIT("test_bio", SYSFS_RW, &toi_bkd.toi_action, TOI_TEST_BIO, 0), | |
13736 | + SYSFS_BIT("test_filter_speed", SYSFS_RW, &toi_bkd.toi_action, | |
13737 | + TOI_TEST_FILTER_SPEED, 0), | |
13738 | + SYSFS_BIT("no_pageset2", SYSFS_RW, &toi_bkd.toi_action, | |
13739 | + TOI_NO_PAGESET2, 0), | |
13740 | + SYSFS_BIT("no_pageset2_if_unneeded", SYSFS_RW, &toi_bkd.toi_action, | |
13741 | + TOI_NO_PS2_IF_UNNEEDED, 0), | |
13742 | + SYSFS_BIT("late_cpu_hotplug", SYSFS_RW, &toi_bkd.toi_action, | |
13743 | + TOI_LATE_CPU_HOTPLUG, 0), | |
7e46296a AM |
13744 | + SYSFS_STRING("binary_signature", SYSFS_READONLY, |
13745 | + tuxonice_signature, 9, 0, NULL), | |
5dd10c98 AM |
13746 | + SYSFS_INT("max_workers", SYSFS_RW, &toi_max_workers, 0, NR_CPUS, 0, |
13747 | + NULL), | |
2380c486 JR |
13748 | +#ifdef CONFIG_TOI_KEEP_IMAGE |
13749 | + SYSFS_BIT("keep_image", SYSFS_RW , &toi_bkd.toi_action, TOI_KEEP_IMAGE, | |
13750 | + 0), | |
13751 | +#endif | |
13752 | +}; | |
13753 | + | |
13754 | +static struct toi_core_fns my_fns = { | |
13755 | + .get_nonconflicting_page = __toi_get_nonconflicting_page, | |
13756 | + .post_context_save = __toi_post_context_save, | |
9474138d AM |
13757 | + .try_hibernate = toi_try_hibernate, |
13758 | + .try_resume = toi_sys_power_disk_try_resume, | |
2380c486 JR |
13759 | +}; |
13760 | + | |
13761 | +/** | |
13762 | + * core_load - initialisation of TuxOnIce core | |
13763 | + * | |
13764 | + * Initialise the core, beginning with sysfs. Checksum and so on are part of | |
13765 | + * the core, but have their own initialisation routines because they either | |
13766 | + * aren't compiled in all the time or have their own subdirectories. | |
13767 | + **/ | |
13768 | +static __init int core_load(void) | |
13769 | +{ | |
13770 | + int i, | |
13771 | + numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data); | |
13772 | + | |
13773 | + printk(KERN_INFO "TuxOnIce " TOI_CORE_VERSION | |
13774 | + " (http://tuxonice.net)\n"); | |
2380c486 JR |
13775 | + |
13776 | + if (toi_sysfs_init()) | |
13777 | + return 1; | |
13778 | + | |
13779 | + for (i = 0; i < numfiles; i++) | |
13780 | + toi_register_sysfs_file(tuxonice_kobj, &sysfs_params[i]); | |
13781 | + | |
13782 | + toi_core_fns = &my_fns; | |
13783 | + | |
13784 | + if (toi_alloc_init()) | |
13785 | + return 1; | |
13786 | + if (toi_checksum_init()) | |
13787 | + return 1; | |
13788 | + if (toi_usm_init()) | |
13789 | + return 1; | |
13790 | + if (toi_ui_init()) | |
13791 | + return 1; | |
13792 | + if (toi_poweroff_init()) | |
13793 | + return 1; | |
13794 | + if (toi_cluster_init()) | |
13795 | + return 1; | |
13796 | + | |
13797 | + return 0; | |
13798 | +} | |
13799 | + | |
13800 | +#ifdef MODULE | |
13801 | +/** | |
13802 | + * core_unload: Prepare to unload the core code. | |
13803 | + **/ | |
13804 | +static __exit void core_unload(void) | |
13805 | +{ | |
13806 | + int i, | |
13807 | + numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data); | |
13808 | + | |
13809 | + toi_alloc_exit(); | |
13810 | + toi_checksum_exit(); | |
13811 | + toi_poweroff_exit(); | |
13812 | + toi_ui_exit(); | |
13813 | + toi_usm_exit(); | |
13814 | + toi_cluster_exit(); | |
13815 | + | |
13816 | + for (i = 0; i < numfiles; i++) | |
13817 | + toi_unregister_sysfs_file(tuxonice_kobj, &sysfs_params[i]); | |
13818 | + | |
13819 | + toi_core_fns = NULL; | |
13820 | + | |
13821 | + toi_sysfs_exit(); | |
13822 | +} | |
13823 | +MODULE_LICENSE("GPL"); | |
13824 | +module_init(core_load); | |
13825 | +module_exit(core_unload); | |
13826 | +#else | |
13827 | +late_initcall(core_load); | |
13828 | +#endif | |
13829 | diff --git a/kernel/power/tuxonice_io.c b/kernel/power/tuxonice_io.c | |
13830 | new file mode 100644 | |
85eb3c9d | 13831 | index 0000000..29b4988 |
2380c486 JR |
13832 | --- /dev/null |
13833 | +++ b/kernel/power/tuxonice_io.c | |
85eb3c9d | 13834 | @@ -0,0 +1,1862 @@ |
2380c486 JR |
13835 | +/* |
13836 | + * kernel/power/tuxonice_io.c | |
13837 | + * | |
13838 | + * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu> | |
13839 | + * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz> | |
13840 | + * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr> | |
5dd10c98 | 13841 | + * Copyright (C) 2002-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
13842 | + * |
13843 | + * This file is released under the GPLv2. | |
13844 | + * | |
13845 | + * It contains high level IO routines for hibernating. | |
13846 | + * | |
13847 | + */ | |
13848 | + | |
13849 | +#include <linux/suspend.h> | |
13850 | +#include <linux/version.h> | |
13851 | +#include <linux/utsname.h> | |
13852 | +#include <linux/mount.h> | |
13853 | +#include <linux/highmem.h> | |
2380c486 JR |
13854 | +#include <linux/kthread.h> |
13855 | +#include <linux/cpu.h> | |
9474138d | 13856 | +#include <linux/fs_struct.h> |
7e46296a | 13857 | +#include <linux/bio.h> |
cacc47f8 | 13858 | +#include <linux/fs_uuid.h> |
2380c486 JR |
13859 | +#include <asm/tlbflush.h> |
13860 | + | |
13861 | +#include "tuxonice.h" | |
13862 | +#include "tuxonice_modules.h" | |
13863 | +#include "tuxonice_pageflags.h" | |
13864 | +#include "tuxonice_io.h" | |
13865 | +#include "tuxonice_ui.h" | |
13866 | +#include "tuxonice_storage.h" | |
13867 | +#include "tuxonice_prepare_image.h" | |
13868 | +#include "tuxonice_extent.h" | |
13869 | +#include "tuxonice_sysfs.h" | |
13870 | +#include "tuxonice_builtin.h" | |
13871 | +#include "tuxonice_checksum.h" | |
13872 | +#include "tuxonice_alloc.h" | |
13873 | +char alt_resume_param[256]; | |
13874 | + | |
5dd10c98 AM |
13875 | +/* Version read from image header at resume */ |
13876 | +static int toi_image_header_version; | |
13877 | + | |
cacc47f8 | 13878 | +#define read_if_version(VERS, VAR, DESC, ERR_ACT) do { \ |
5dd10c98 AM |
13879 | + if (likely(toi_image_header_version >= VERS)) \ |
13880 | + if (toiActiveAllocator->rw_header_chunk(READ, NULL, \ | |
13881 | + (char *) &VAR, sizeof(VAR))) { \ | |
13882 | + abort_hibernate(TOI_FAILED_IO, "Failed to read DESC."); \ | |
cacc47f8 | 13883 | + ERR_ACT; \ |
5dd10c98 AM |
13884 | + } \ |
13885 | +} while(0) \ | |
13886 | + | |
2380c486 JR |
13887 | +/* Variables shared between threads and updated under the mutex */ |
13888 | +static int io_write, io_finish_at, io_base, io_barmax, io_pageset, io_result; | |
13889 | +static int io_index, io_nextupdate, io_pc, io_pc_step; | |
13890 | +static DEFINE_MUTEX(io_mutex); | |
13891 | +static DEFINE_PER_CPU(struct page *, last_sought); | |
13892 | +static DEFINE_PER_CPU(struct page *, last_high_page); | |
13893 | +static DEFINE_PER_CPU(char *, checksum_locn); | |
13894 | +static DEFINE_PER_CPU(struct pbe *, last_low_page); | |
13895 | +static atomic_t io_count; | |
13896 | +atomic_t toi_io_workers; | |
13897 | +EXPORT_SYMBOL_GPL(toi_io_workers); | |
13898 | + | |
13899 | +DECLARE_WAIT_QUEUE_HEAD(toi_io_queue_flusher); | |
13900 | +EXPORT_SYMBOL_GPL(toi_io_queue_flusher); | |
13901 | + | |
13902 | +int toi_bio_queue_flusher_should_finish; | |
13903 | +EXPORT_SYMBOL_GPL(toi_bio_queue_flusher_should_finish); | |
13904 | + | |
13905 | +/* Indicates that this thread should be used for checking throughput */ | |
13906 | +#define MONITOR ((void *) 1) | |
13907 | + | |
5dd10c98 AM |
13908 | +int toi_max_workers; |
13909 | + | |
13910 | +static char *image_version_error = "The image header version is newer than " \ | |
13911 | + "this kernel supports."; | |
13912 | + | |
85eb3c9d AM |
13913 | +struct toi_module_ops *first_filter; |
13914 | + | |
2380c486 JR |
13915 | +/** |
13916 | + * toi_attempt_to_parse_resume_device - determine if we can hibernate | |
13917 | + * | |
13918 | + * Can we hibernate, using the current resume= parameter? | |
13919 | + **/ | |
13920 | +int toi_attempt_to_parse_resume_device(int quiet) | |
13921 | +{ | |
13922 | + struct list_head *Allocator; | |
13923 | + struct toi_module_ops *thisAllocator; | |
13924 | + int result, returning = 0; | |
13925 | + | |
13926 | + if (toi_activate_storage(0)) | |
13927 | + return 0; | |
13928 | + | |
13929 | + toiActiveAllocator = NULL; | |
13930 | + clear_toi_state(TOI_RESUME_DEVICE_OK); | |
13931 | + clear_toi_state(TOI_CAN_RESUME); | |
13932 | + clear_result_state(TOI_ABORTED); | |
13933 | + | |
13934 | + if (!toiNumAllocators) { | |
13935 | + if (!quiet) | |
13936 | + printk(KERN_INFO "TuxOnIce: No storage allocators have " | |
13937 | + "been registered. Hibernating will be " | |
13938 | + "disabled.\n"); | |
13939 | + goto cleanup; | |
13940 | + } | |
13941 | + | |
2380c486 JR |
13942 | + list_for_each(Allocator, &toiAllocators) { |
13943 | + thisAllocator = list_entry(Allocator, struct toi_module_ops, | |
13944 | + type_list); | |
13945 | + | |
13946 | + /* | |
13947 | + * Not sure why you'd want to disable an allocator, but | |
13948 | + * we should honour the flag if we're providing it | |
13949 | + */ | |
13950 | + if (!thisAllocator->enabled) | |
13951 | + continue; | |
13952 | + | |
13953 | + result = thisAllocator->parse_sig_location( | |
13954 | + resume_file, (toiNumAllocators == 1), | |
13955 | + quiet); | |
13956 | + | |
13957 | + switch (result) { | |
13958 | + case -EINVAL: | |
13959 | + /* For this allocator, but not a valid | |
13960 | + * configuration. Error already printed. */ | |
13961 | + goto cleanup; | |
13962 | + | |
13963 | + case 0: | |
13964 | + /* For this allocator and valid. */ | |
13965 | + toiActiveAllocator = thisAllocator; | |
13966 | + | |
13967 | + set_toi_state(TOI_RESUME_DEVICE_OK); | |
13968 | + set_toi_state(TOI_CAN_RESUME); | |
13969 | + returning = 1; | |
13970 | + goto cleanup; | |
13971 | + } | |
13972 | + } | |
13973 | + if (!quiet) | |
e999739a | 13974 | + printk(KERN_INFO "TuxOnIce: No matching enabled allocator " |
13975 | + "found. Resuming disabled.\n"); | |
2380c486 JR |
13976 | +cleanup: |
13977 | + toi_deactivate_storage(0); | |
13978 | + return returning; | |
13979 | +} | |
13980 | +EXPORT_SYMBOL_GPL(toi_attempt_to_parse_resume_device); | |
13981 | + | |
13982 | +void attempt_to_parse_resume_device2(void) | |
13983 | +{ | |
13984 | + toi_prepare_usm(); | |
13985 | + toi_attempt_to_parse_resume_device(0); | |
13986 | + toi_cleanup_usm(); | |
13987 | +} | |
13988 | +EXPORT_SYMBOL_GPL(attempt_to_parse_resume_device2); | |
13989 | + | |
13990 | +void save_restore_alt_param(int replace, int quiet) | |
13991 | +{ | |
13992 | + static char resume_param_save[255]; | |
13993 | + static unsigned long toi_state_save; | |
13994 | + | |
13995 | + if (replace) { | |
13996 | + toi_state_save = toi_state; | |
13997 | + strcpy(resume_param_save, resume_file); | |
13998 | + strcpy(resume_file, alt_resume_param); | |
13999 | + } else { | |
14000 | + strcpy(resume_file, resume_param_save); | |
14001 | + toi_state = toi_state_save; | |
14002 | + } | |
14003 | + toi_attempt_to_parse_resume_device(quiet); | |
14004 | +} | |
14005 | + | |
14006 | +void attempt_to_parse_alt_resume_param(void) | |
14007 | +{ | |
14008 | + int ok = 0; | |
14009 | + | |
14010 | + /* Temporarily set resume_param to the poweroff value */ | |
14011 | + if (!strlen(alt_resume_param)) | |
14012 | + return; | |
14013 | + | |
e999739a | 14014 | + printk(KERN_INFO "=== Trying Poweroff Resume2 ===\n"); |
2380c486 JR |
14015 | + save_restore_alt_param(SAVE, NOQUIET); |
14016 | + if (test_toi_state(TOI_CAN_RESUME)) | |
14017 | + ok = 1; | |
14018 | + | |
14019 | + printk(KERN_INFO "=== Done ===\n"); | |
14020 | + save_restore_alt_param(RESTORE, QUIET); | |
14021 | + | |
14022 | + /* If not ok, clear the string */ | |
14023 | + if (ok) | |
14024 | + return; | |
14025 | + | |
14026 | + printk(KERN_INFO "Can't resume from that location; clearing " | |
14027 | + "alt_resume_param.\n"); | |
14028 | + alt_resume_param[0] = '\0'; | |
14029 | +} | |
14030 | + | |
14031 | +/** | |
14032 | + * noresume_reset_modules - reset data structures in case of non resuming | |
14033 | + * | |
14034 | + * When we read the start of an image, modules (and especially the | |
14035 | + * active allocator) might need to reset data structures if we | |
14036 | + * decide to remove the image rather than resuming from it. | |
14037 | + **/ | |
14038 | +static void noresume_reset_modules(void) | |
14039 | +{ | |
14040 | + struct toi_module_ops *this_filter; | |
14041 | + | |
14042 | + list_for_each_entry(this_filter, &toi_filters, type_list) | |
14043 | + if (this_filter->noresume_reset) | |
14044 | + this_filter->noresume_reset(); | |
14045 | + | |
14046 | + if (toiActiveAllocator && toiActiveAllocator->noresume_reset) | |
14047 | + toiActiveAllocator->noresume_reset(); | |
14048 | +} | |
14049 | + | |
14050 | +/** | |
14051 | + * fill_toi_header - fill the hibernate header structure | |
14052 | + * @struct toi_header: Header data structure to be filled. | |
14053 | + **/ | |
14054 | +static int fill_toi_header(struct toi_header *sh) | |
14055 | +{ | |
14056 | + int i, error; | |
14057 | + | |
e999739a | 14058 | + error = init_header((struct swsusp_info *) sh); |
2380c486 JR |
14059 | + if (error) |
14060 | + return error; | |
14061 | + | |
14062 | + sh->pagedir = pagedir1; | |
14063 | + sh->pageset_2_size = pagedir2.size; | |
14064 | + sh->param0 = toi_result; | |
14065 | + sh->param1 = toi_bkd.toi_action; | |
14066 | + sh->param2 = toi_bkd.toi_debug_state; | |
14067 | + sh->param3 = toi_bkd.toi_default_console_level; | |
14068 | + sh->root_fs = current->fs->root.mnt->mnt_sb->s_dev; | |
14069 | + for (i = 0; i < 4; i++) | |
14070 | + sh->io_time[i/2][i%2] = toi_bkd.toi_io_time[i/2][i%2]; | |
14071 | + sh->bkd = boot_kernel_data_buffer; | |
14072 | + return 0; | |
14073 | +} | |
14074 | + | |
14075 | +/** | |
14076 | + * rw_init_modules - initialize modules | |
14077 | + * @rw: Whether we are reading of writing an image. | |
14078 | + * @which: Section of the image being processed. | |
14079 | + * | |
14080 | + * Iterate over modules, preparing the ones that will be used to read or write | |
14081 | + * data. | |
14082 | + **/ | |
14083 | +static int rw_init_modules(int rw, int which) | |
14084 | +{ | |
14085 | + struct toi_module_ops *this_module; | |
14086 | + /* Initialise page transformers */ | |
14087 | + list_for_each_entry(this_module, &toi_filters, type_list) { | |
14088 | + if (!this_module->enabled) | |
14089 | + continue; | |
14090 | + if (this_module->rw_init && this_module->rw_init(rw, which)) { | |
14091 | + abort_hibernate(TOI_FAILED_MODULE_INIT, | |
14092 | + "Failed to initialize the %s filter.", | |
14093 | + this_module->name); | |
14094 | + return 1; | |
14095 | + } | |
14096 | + } | |
14097 | + | |
14098 | + /* Initialise allocator */ | |
14099 | + if (toiActiveAllocator->rw_init(rw, which)) { | |
14100 | + abort_hibernate(TOI_FAILED_MODULE_INIT, | |
14101 | + "Failed to initialise the allocator."); | |
14102 | + return 1; | |
14103 | + } | |
14104 | + | |
14105 | + /* Initialise other modules */ | |
14106 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
14107 | + if (!this_module->enabled || | |
14108 | + this_module->type == FILTER_MODULE || | |
14109 | + this_module->type == WRITER_MODULE) | |
14110 | + continue; | |
14111 | + if (this_module->rw_init && this_module->rw_init(rw, which)) { | |
14112 | + set_abort_result(TOI_FAILED_MODULE_INIT); | |
14113 | + printk(KERN_INFO "Setting aborted flag due to module " | |
14114 | + "init failure.\n"); | |
14115 | + return 1; | |
14116 | + } | |
14117 | + } | |
14118 | + | |
14119 | + return 0; | |
14120 | +} | |
14121 | + | |
14122 | +/** | |
14123 | + * rw_cleanup_modules - cleanup modules | |
14124 | + * @rw: Whether we are reading of writing an image. | |
14125 | + * | |
14126 | + * Cleanup components after reading or writing a set of pages. | |
14127 | + * Only the allocator may fail. | |
14128 | + **/ | |
14129 | +static int rw_cleanup_modules(int rw) | |
14130 | +{ | |
14131 | + struct toi_module_ops *this_module; | |
14132 | + int result = 0; | |
14133 | + | |
14134 | + /* Cleanup other modules */ | |
14135 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
14136 | + if (!this_module->enabled || | |
14137 | + this_module->type == FILTER_MODULE || | |
14138 | + this_module->type == WRITER_MODULE) | |
14139 | + continue; | |
14140 | + if (this_module->rw_cleanup) | |
14141 | + result |= this_module->rw_cleanup(rw); | |
14142 | + } | |
14143 | + | |
14144 | + /* Flush data and cleanup */ | |
14145 | + list_for_each_entry(this_module, &toi_filters, type_list) { | |
14146 | + if (!this_module->enabled) | |
14147 | + continue; | |
14148 | + if (this_module->rw_cleanup) | |
14149 | + result |= this_module->rw_cleanup(rw); | |
14150 | + } | |
14151 | + | |
14152 | + result |= toiActiveAllocator->rw_cleanup(rw); | |
14153 | + | |
14154 | + return result; | |
14155 | +} | |
14156 | + | |
14157 | +static struct page *copy_page_from_orig_page(struct page *orig_page) | |
14158 | +{ | |
14159 | + int is_high = PageHighMem(orig_page), index, min, max; | |
14160 | + struct page *high_page = NULL, | |
14161 | + **my_last_high_page = &__get_cpu_var(last_high_page), | |
14162 | + **my_last_sought = &__get_cpu_var(last_sought); | |
14163 | + struct pbe *this, **my_last_low_page = &__get_cpu_var(last_low_page); | |
14164 | + void *compare; | |
14165 | + | |
14166 | + if (is_high) { | |
14167 | + if (*my_last_sought && *my_last_high_page && | |
14168 | + *my_last_sought < orig_page) | |
14169 | + high_page = *my_last_high_page; | |
14170 | + else | |
14171 | + high_page = (struct page *) restore_highmem_pblist; | |
14172 | + this = (struct pbe *) kmap(high_page); | |
14173 | + compare = orig_page; | |
14174 | + } else { | |
14175 | + if (*my_last_sought && *my_last_low_page && | |
14176 | + *my_last_sought < orig_page) | |
14177 | + this = *my_last_low_page; | |
14178 | + else | |
14179 | + this = restore_pblist; | |
14180 | + compare = page_address(orig_page); | |
14181 | + } | |
14182 | + | |
14183 | + *my_last_sought = orig_page; | |
14184 | + | |
14185 | + /* Locate page containing pbe */ | |
14186 | + while (this[PBES_PER_PAGE - 1].next && | |
14187 | + this[PBES_PER_PAGE - 1].orig_address < compare) { | |
14188 | + if (is_high) { | |
14189 | + struct page *next_high_page = (struct page *) | |
14190 | + this[PBES_PER_PAGE - 1].next; | |
14191 | + kunmap(high_page); | |
14192 | + this = kmap(next_high_page); | |
14193 | + high_page = next_high_page; | |
14194 | + } else | |
14195 | + this = this[PBES_PER_PAGE - 1].next; | |
14196 | + } | |
14197 | + | |
14198 | + /* Do a binary search within the page */ | |
14199 | + min = 0; | |
14200 | + max = PBES_PER_PAGE; | |
14201 | + index = PBES_PER_PAGE / 2; | |
14202 | + while (max - min) { | |
14203 | + if (!this[index].orig_address || | |
14204 | + this[index].orig_address > compare) | |
14205 | + max = index; | |
14206 | + else if (this[index].orig_address == compare) { | |
14207 | + if (is_high) { | |
14208 | + struct page *page = this[index].address; | |
14209 | + *my_last_high_page = high_page; | |
14210 | + kunmap(high_page); | |
14211 | + return page; | |
14212 | + } | |
14213 | + *my_last_low_page = this; | |
14214 | + return virt_to_page(this[index].address); | |
14215 | + } else | |
14216 | + min = index; | |
14217 | + index = ((max + min) / 2); | |
14218 | + }; | |
14219 | + | |
14220 | + if (is_high) | |
14221 | + kunmap(high_page); | |
14222 | + | |
14223 | + abort_hibernate(TOI_FAILED_IO, "Failed to get destination page for" | |
14224 | + " orig page %p. This[min].orig_address=%p.\n", orig_page, | |
14225 | + this[index].orig_address); | |
14226 | + return NULL; | |
14227 | +} | |
14228 | + | |
14229 | +/** | |
9474138d AM |
14230 | + * write_next_page - write the next page in a pageset |
14231 | + * @data_pfn: The pfn where the next data to write is located. | |
14232 | + * @my_io_index: The index of the page in the pageset. | |
14233 | + * @write_pfn: The pfn number to write in the image (where the data belongs). | |
9474138d AM |
14234 | + * |
14235 | + * Get the pfn of the next page to write, map the page if necessary and do the | |
14236 | + * write. | |
14237 | + **/ | |
14238 | +static int write_next_page(unsigned long *data_pfn, int *my_io_index, | |
85eb3c9d | 14239 | + unsigned long *write_pfn) |
9474138d AM |
14240 | +{ |
14241 | + struct page *page; | |
14242 | + char **my_checksum_locn = &__get_cpu_var(checksum_locn); | |
14243 | + int result = 0, was_present; | |
14244 | + | |
14245 | + *data_pfn = memory_bm_next_pfn(io_map); | |
14246 | + | |
14247 | + /* Another thread could have beaten us to it. */ | |
14248 | + if (*data_pfn == BM_END_OF_MAP) { | |
14249 | + if (atomic_read(&io_count)) { | |
14250 | + printk(KERN_INFO "Ran out of pfns but io_count is " | |
14251 | + "still %d.\n", atomic_read(&io_count)); | |
14252 | + BUG(); | |
14253 | + } | |
e876a0dd | 14254 | + mutex_unlock(&io_mutex); |
9474138d AM |
14255 | + return -ENODATA; |
14256 | + } | |
14257 | + | |
14258 | + *my_io_index = io_finish_at - atomic_sub_return(1, &io_count); | |
14259 | + | |
14260 | + memory_bm_clear_bit(io_map, *data_pfn); | |
14261 | + page = pfn_to_page(*data_pfn); | |
14262 | + | |
14263 | + was_present = kernel_page_present(page); | |
14264 | + if (!was_present) | |
14265 | + kernel_map_pages(page, 1, 1); | |
14266 | + | |
14267 | + if (io_pageset == 1) | |
14268 | + *write_pfn = memory_bm_next_pfn(pageset1_map); | |
14269 | + else { | |
14270 | + *write_pfn = *data_pfn; | |
14271 | + *my_checksum_locn = tuxonice_get_next_checksum(); | |
14272 | + } | |
14273 | + | |
14274 | + mutex_unlock(&io_mutex); | |
14275 | + | |
14276 | + if (io_pageset == 2 && tuxonice_calc_checksum(page, *my_checksum_locn)) | |
14277 | + return 1; | |
14278 | + | |
85eb3c9d AM |
14279 | + result = first_filter->write_page(*write_pfn, TOI_PAGE, page, |
14280 | + PAGE_SIZE); | |
9474138d AM |
14281 | + |
14282 | + if (!was_present) | |
14283 | + kernel_map_pages(page, 1, 0); | |
14284 | + | |
14285 | + return result; | |
14286 | +} | |
14287 | + | |
14288 | +/** | |
14289 | + * read_next_page - read the next page in a pageset | |
14290 | + * @my_io_index: The index of the page in the pageset. | |
14291 | + * @write_pfn: The pfn in which the data belongs. | |
14292 | + * | |
e876a0dd AM |
14293 | + * Read a page of the image into our buffer. It can happen (here and in the |
14294 | + * write routine) that threads don't get run until after other CPUs have done | |
14295 | + * all the work. This was the cause of the long standing issue with | |
14296 | + * occasionally getting -ENODATA errors at the end of reading the image. We | |
14297 | + * therefore need to check there's actually a page to read before trying to | |
14298 | + * retrieve one. | |
9474138d AM |
14299 | + **/ |
14300 | + | |
14301 | +static int read_next_page(int *my_io_index, unsigned long *write_pfn, | |
85eb3c9d | 14302 | + struct page *buffer) |
9474138d | 14303 | +{ |
92bca44c | 14304 | + unsigned int buf_size = PAGE_SIZE; |
e876a0dd AM |
14305 | + unsigned long left = atomic_read(&io_count); |
14306 | + | |
14307 | + if (left) | |
85eb3c9d | 14308 | + *my_io_index = io_finish_at - left; |
9474138d | 14309 | + |
9474138d AM |
14310 | + mutex_unlock(&io_mutex); |
14311 | + | |
14312 | + /* | |
14313 | + * Are we aborting? If so, don't submit any more I/O as | |
14314 | + * resetting the resume_attempted flag (from ui.c) will | |
14315 | + * clear the bdev flags, making this thread oops. | |
14316 | + */ | |
14317 | + if (unlikely(test_toi_state(TOI_STOP_RESUME))) { | |
14318 | + atomic_dec(&toi_io_workers); | |
5dd10c98 AM |
14319 | + if (!atomic_read(&toi_io_workers)) { |
14320 | + /* | |
14321 | + * So we can be sure we'll have memory for | |
14322 | + * marking that we haven't resumed. | |
14323 | + */ | |
14324 | + rw_cleanup_modules(READ); | |
9474138d | 14325 | + set_toi_state(TOI_IO_STOPPED); |
5dd10c98 | 14326 | + } |
9474138d AM |
14327 | + while (1) |
14328 | + schedule(); | |
14329 | + } | |
14330 | + | |
e876a0dd AM |
14331 | + if (!left) |
14332 | + return -ENODATA; | |
14333 | + | |
92bca44c | 14334 | + /* |
7e46296a | 14335 | + * See toi_bio_read_page in tuxonice_bio.c: |
9474138d AM |
14336 | + * read the next page in the image. |
14337 | + */ | |
85eb3c9d | 14338 | + return first_filter->read_page(write_pfn, TOI_PAGE, buffer, &buf_size); |
9474138d AM |
14339 | +} |
14340 | + | |
9474138d AM |
14341 | +static void use_read_page(unsigned long write_pfn, struct page *buffer) |
14342 | +{ | |
14343 | + struct page *final_page = pfn_to_page(write_pfn), | |
14344 | + *copy_page = final_page; | |
14345 | + char *virt, *buffer_virt; | |
85eb3c9d | 14346 | + int was_present, cpu = smp_processor_id(); |
9474138d | 14347 | + |
85eb3c9d AM |
14348 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Seeking to use pfn %ld.", write_pfn); |
14349 | + if (io_pageset == 1 && (!pageset1_copy_map || | |
14350 | + !memory_bm_test_bit_index(pageset1_copy_map, write_pfn, cpu))) { | |
9474138d AM |
14351 | + copy_page = copy_page_from_orig_page(final_page); |
14352 | + BUG_ON(!copy_page); | |
14353 | + } | |
14354 | + | |
85eb3c9d AM |
14355 | + if (!memory_bm_test_bit_index(io_map, write_pfn, cpu)) { |
14356 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Ignoring read of pfn %ld.", write_pfn); | |
14357 | + return; | |
9474138d | 14358 | + } |
85eb3c9d AM |
14359 | + |
14360 | + virt = kmap(copy_page); | |
14361 | + buffer_virt = kmap(buffer); | |
14362 | + was_present = kernel_page_present(copy_page); | |
14363 | + if (!was_present) | |
14364 | + kernel_map_pages(copy_page, 1, 1); | |
14365 | + memcpy(virt, buffer_virt, PAGE_SIZE); | |
14366 | + if (!was_present) | |
14367 | + kernel_map_pages(copy_page, 1, 0); | |
14368 | + kunmap(copy_page); | |
14369 | + kunmap(buffer); | |
14370 | + memory_bm_clear_bit_index(io_map, write_pfn, cpu); | |
14371 | + atomic_dec(&io_count); | |
9474138d AM |
14372 | +} |
14373 | + | |
5dd10c98 AM |
14374 | +static unsigned long status_update(int writing, unsigned long done, |
14375 | + unsigned long ticks) | |
14376 | +{ | |
14377 | + int cs_index = writing ? 0 : 1; | |
14378 | + unsigned long ticks_so_far = toi_bkd.toi_io_time[cs_index][1] + ticks; | |
14379 | + unsigned long msec = jiffies_to_msecs(abs(ticks_so_far)); | |
14380 | + unsigned long pgs_per_s, estimate = 0, pages_left; | |
14381 | + | |
14382 | + if (msec) { | |
14383 | + pages_left = io_barmax - done; | |
14384 | + pgs_per_s = 1000 * done / msec; | |
14385 | + if (pgs_per_s) | |
14386 | + estimate = pages_left / pgs_per_s; | |
14387 | + } | |
14388 | + | |
14389 | + if (estimate && ticks > HZ / 2) | |
14390 | + return toi_update_status(done, io_barmax, | |
14391 | + " %d/%d MB (%lu sec left)", | |
14392 | + MB(done+1), MB(io_barmax), estimate); | |
14393 | + | |
14394 | + return toi_update_status(done, io_barmax, " %d/%d MB", | |
14395 | + MB(done+1), MB(io_barmax)); | |
14396 | +} | |
14397 | + | |
9474138d | 14398 | +/** |
2380c486 JR |
14399 | + * worker_rw_loop - main loop to read/write pages |
14400 | + * | |
14401 | + * The main I/O loop for reading or writing pages. The io_map bitmap is used to | |
14402 | + * track the pages to read/write. | |
14403 | + * If we are reading, the pages are loaded to their final (mapped) pfn. | |
14404 | + **/ | |
14405 | +static int worker_rw_loop(void *data) | |
14406 | +{ | |
5dd10c98 AM |
14407 | + unsigned long data_pfn, write_pfn, next_jiffies = jiffies + HZ / 4, |
14408 | + jif_index = 1, start_time = jiffies; | |
0ada99ac | 14409 | + int result = 0, my_io_index = 0, last_worker; |
2380c486 JR |
14410 | + struct page *buffer = toi_alloc_page(28, TOI_ATOMIC_GFP); |
14411 | + | |
14412 | + current->flags |= PF_NOFREEZE; | |
14413 | + | |
2380c486 JR |
14414 | + mutex_lock(&io_mutex); |
14415 | + | |
14416 | + do { | |
2380c486 | 14417 | + if (data && jiffies > next_jiffies) { |
5dd10c98 | 14418 | + next_jiffies += HZ / 4; |
2380c486 JR |
14419 | + if (toiActiveAllocator->update_throughput_throttle) |
14420 | + toiActiveAllocator->update_throughput_throttle( | |
14421 | + jif_index); | |
14422 | + jif_index++; | |
14423 | + } | |
14424 | + | |
14425 | + /* | |
14426 | + * What page to use? If reading, don't know yet which page's | |
14427 | + * data will be read, so always use the buffer. If writing, | |
14428 | + * use the copy (Pageset1) or original page (Pageset2), but | |
14429 | + * always write the pfn of the original page. | |
14430 | + */ | |
9474138d AM |
14431 | + if (io_write) |
14432 | + result = write_next_page(&data_pfn, &my_io_index, | |
85eb3c9d | 14433 | + &write_pfn); |
9474138d AM |
14434 | + else /* Reading */ |
14435 | + result = read_next_page(&my_io_index, &write_pfn, | |
85eb3c9d | 14436 | + buffer); |
9474138d | 14437 | + |
2380c486 | 14438 | + if (result) { |
92bca44c | 14439 | + mutex_lock(&io_mutex); |
e876a0dd | 14440 | + /* Nothing to do? */ |
85eb3c9d AM |
14441 | + if (result == -ENODATA) { |
14442 | + toi_message(TOI_IO, TOI_VERBOSE, 0, | |
14443 | + "Thread %d has no more work.", | |
14444 | + smp_processor_id()); | |
e876a0dd | 14445 | + break; |
85eb3c9d | 14446 | + } |
e876a0dd AM |
14447 | + |
14448 | + io_result = result; | |
92bca44c | 14449 | + |
2380c486 JR |
14450 | + if (io_write) { |
14451 | + printk(KERN_INFO "Write chunk returned %d.\n", | |
14452 | + result); | |
14453 | + abort_hibernate(TOI_FAILED_IO, | |
14454 | + "Failed to write a chunk of the " | |
14455 | + "image."); | |
92bca44c AM |
14456 | + break; |
14457 | + } | |
14458 | + | |
14459 | + if (io_pageset == 1) { | |
14460 | + printk(KERN_ERR "\nBreaking out of I/O loop " | |
14461 | + "because of result code %d.\n", result); | |
2380c486 JR |
14462 | + break; |
14463 | + } | |
14464 | + panic("Read chunk returned (%d)", result); | |
14465 | + } | |
14466 | + | |
14467 | + /* | |
14468 | + * Discard reads of resaved pages while reading ps2 | |
14469 | + * and unwanted pages while rereading ps2 when aborting. | |
14470 | + */ | |
9474138d AM |
14471 | + if (!io_write && !PageResave(pfn_to_page(write_pfn))) |
14472 | + use_read_page(write_pfn, buffer); | |
2380c486 | 14473 | + |
85eb3c9d AM |
14474 | + if (data) { |
14475 | + if(my_io_index + io_base > io_nextupdate) | |
14476 | + io_nextupdate = status_update(io_write, | |
14477 | + my_io_index + io_base, | |
14478 | + jiffies - start_time); | |
2380c486 | 14479 | + |
85eb3c9d AM |
14480 | + if (my_io_index > io_pc) { |
14481 | + printk(KERN_CONT "...%d%%", 20 * io_pc_step); | |
14482 | + io_pc_step++; | |
14483 | + io_pc = io_finish_at * io_pc_step / 5; | |
14484 | + } | |
2380c486 JR |
14485 | + } |
14486 | + | |
14487 | + toi_cond_pause(0, NULL); | |
14488 | + | |
14489 | + /* | |
14490 | + * Subtle: If there's less I/O still to be done than threads | |
14491 | + * running, quit. This stops us doing I/O beyond the end of | |
14492 | + * the image when reading. | |
14493 | + * | |
14494 | + * Possible race condition. Two threads could do the test at | |
14495 | + * the same time; one should exit and one should continue. | |
14496 | + * Therefore we take the mutex before comparing and exiting. | |
14497 | + */ | |
14498 | + | |
14499 | + mutex_lock(&io_mutex); | |
85eb3c9d AM |
14500 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "%d pages still to do, %d workers running.", |
14501 | + atomic_read(&io_count), atomic_read(&toi_io_workers)); | |
2380c486 JR |
14502 | + |
14503 | + } while (atomic_read(&io_count) >= atomic_read(&toi_io_workers) && | |
14504 | + !(io_write && test_result_state(TOI_ABORTED))); | |
14505 | + | |
14506 | + last_worker = atomic_dec_and_test(&toi_io_workers); | |
85eb3c9d | 14507 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "%d workers left.", atomic_read(&toi_io_workers)); |
2380c486 JR |
14508 | + mutex_unlock(&io_mutex); |
14509 | + | |
2380c486 JR |
14510 | + toi__free_page(28, buffer); |
14511 | + | |
0ada99ac | 14512 | + return result; |
2380c486 JR |
14513 | +} |
14514 | + | |
14515 | +static int start_other_threads(void) | |
14516 | +{ | |
14517 | + int cpu, num_started = 0; | |
14518 | + struct task_struct *p; | |
e876a0dd AM |
14519 | + int to_start = (toi_max_workers ? toi_max_workers : num_online_cpus()) - 1; |
14520 | + | |
14521 | + atomic_set(&toi_io_workers, to_start); | |
2380c486 JR |
14522 | + |
14523 | + for_each_online_cpu(cpu) { | |
e876a0dd | 14524 | + if (num_started == to_start) |
5dd10c98 AM |
14525 | + break; |
14526 | + | |
2380c486 JR |
14527 | + if (cpu == smp_processor_id()) |
14528 | + continue; | |
14529 | + | |
14530 | + p = kthread_create(worker_rw_loop, num_started ? NULL : MONITOR, | |
14531 | + "ktoi_io/%d", cpu); | |
14532 | + if (IS_ERR(p)) { | |
e999739a | 14533 | + printk(KERN_ERR "ktoi_io for %i failed\n", cpu); |
e876a0dd | 14534 | + atomic_dec(&toi_io_workers); |
2380c486 JR |
14535 | + continue; |
14536 | + } | |
14537 | + kthread_bind(p, cpu); | |
14538 | + p->flags |= PF_MEMALLOC; | |
14539 | + wake_up_process(p); | |
14540 | + num_started++; | |
14541 | + } | |
14542 | + | |
14543 | + return num_started; | |
14544 | +} | |
14545 | + | |
14546 | +/** | |
14547 | + * do_rw_loop - main highlevel function for reading or writing pages | |
14548 | + * | |
14549 | + * Create the io_map bitmap and call worker_rw_loop to perform I/O operations. | |
14550 | + **/ | |
14551 | +static int do_rw_loop(int write, int finish_at, struct memory_bitmap *pageflags, | |
14552 | + int base, int barmax, int pageset) | |
14553 | +{ | |
85eb3c9d AM |
14554 | + int index = 0, cpu, num_other_threads = 0, result = 0, flusher = 0; |
14555 | + int workers_started = 0; | |
2380c486 JR |
14556 | + unsigned long pfn; |
14557 | + | |
85eb3c9d AM |
14558 | + first_filter = toi_get_next_filter(NULL); |
14559 | + | |
2380c486 JR |
14560 | + if (!finish_at) |
14561 | + return 0; | |
14562 | + | |
14563 | + io_write = write; | |
14564 | + io_finish_at = finish_at; | |
14565 | + io_base = base; | |
14566 | + io_barmax = barmax; | |
14567 | + io_pageset = pageset; | |
14568 | + io_index = 0; | |
14569 | + io_pc = io_finish_at / 5; | |
14570 | + io_pc_step = 1; | |
14571 | + io_result = 0; | |
14572 | + io_nextupdate = base + 1; | |
14573 | + toi_bio_queue_flusher_should_finish = 0; | |
14574 | + | |
14575 | + for_each_online_cpu(cpu) { | |
14576 | + per_cpu(last_sought, cpu) = NULL; | |
14577 | + per_cpu(last_low_page, cpu) = NULL; | |
14578 | + per_cpu(last_high_page, cpu) = NULL; | |
14579 | + } | |
14580 | + | |
14581 | + /* Ensure all bits clear */ | |
14582 | + memory_bm_clear(io_map); | |
14583 | + | |
14584 | + /* Set the bits for the pages to write */ | |
14585 | + memory_bm_position_reset(pageflags); | |
14586 | + | |
14587 | + pfn = memory_bm_next_pfn(pageflags); | |
14588 | + | |
14589 | + while (pfn != BM_END_OF_MAP && index < finish_at) { | |
14590 | + memory_bm_set_bit(io_map, pfn); | |
14591 | + pfn = memory_bm_next_pfn(pageflags); | |
14592 | + index++; | |
14593 | + } | |
14594 | + | |
14595 | + BUG_ON(index < finish_at); | |
14596 | + | |
14597 | + atomic_set(&io_count, finish_at); | |
14598 | + | |
14599 | + memory_bm_position_reset(pageset1_map); | |
14600 | + | |
85eb3c9d AM |
14601 | + mutex_lock(&io_mutex); |
14602 | + | |
2380c486 | 14603 | + clear_toi_state(TOI_IO_STOPPED); |
2380c486 | 14604 | + |
7e46296a AM |
14605 | + if (!test_action_state(TOI_NO_MULTITHREADED_IO) && |
14606 | + (write || !toi_force_no_multithreaded)) | |
2380c486 JR |
14607 | + num_other_threads = start_other_threads(); |
14608 | + | |
14609 | + if (!num_other_threads || !toiActiveAllocator->io_flusher || | |
e876a0dd AM |
14610 | + test_action_state(TOI_NO_FLUSHER_THREAD)) { |
14611 | + atomic_inc(&toi_io_workers); | |
e876a0dd | 14612 | + } else |
85eb3c9d AM |
14613 | + flusher = 1; |
14614 | + | |
14615 | + workers_started = atomic_read(&toi_io_workers); | |
14616 | + | |
14617 | + memory_bm_set_iterators(io_map, workers_started); | |
14618 | + memory_bm_position_reset(io_map); | |
14619 | + | |
14620 | + memory_bm_set_iterators(pageset1_copy_map, workers_started); | |
14621 | + memory_bm_position_reset(pageset1_copy_map); | |
14622 | + | |
14623 | + mutex_unlock(&io_mutex); | |
14624 | + | |
14625 | + if (flusher) | |
0ada99ac | 14626 | + result = toiActiveAllocator->io_flusher(write); |
85eb3c9d AM |
14627 | + else |
14628 | + worker_rw_loop(num_other_threads ? NULL : MONITOR); | |
2380c486 JR |
14629 | + |
14630 | + while (atomic_read(&toi_io_workers)) | |
14631 | + schedule(); | |
14632 | + | |
85eb3c9d AM |
14633 | + printk(KERN_CONT "\n"); |
14634 | + | |
2380c486 | 14635 | + if (unlikely(test_toi_state(TOI_STOP_RESUME))) { |
5dd10c98 AM |
14636 | + if (!atomic_read(&toi_io_workers)) { |
14637 | + rw_cleanup_modules(READ); | |
14638 | + set_toi_state(TOI_IO_STOPPED); | |
14639 | + } | |
2380c486 JR |
14640 | + while (1) |
14641 | + schedule(); | |
14642 | + } | |
5dd10c98 | 14643 | + set_toi_state(TOI_IO_STOPPED); |
2380c486 | 14644 | + |
0ada99ac | 14645 | + if (!io_result && !result && !test_result_state(TOI_ABORTED)) { |
2380c486 JR |
14646 | + unsigned long next; |
14647 | + | |
2380c486 JR |
14648 | + toi_update_status(io_base + io_finish_at, io_barmax, |
14649 | + " %d/%d MB ", | |
14650 | + MB(io_base + io_finish_at), MB(io_barmax)); | |
14651 | + | |
14652 | + memory_bm_position_reset(io_map); | |
14653 | + next = memory_bm_next_pfn(io_map); | |
14654 | + if (next != BM_END_OF_MAP) { | |
14655 | + printk(KERN_INFO "Finished I/O loop but still work to " | |
14656 | + "do?\nFinish at = %d. io_count = %d.\n", | |
14657 | + finish_at, atomic_read(&io_count)); | |
14658 | + printk(KERN_INFO "I/O bitmap still records work to do." | |
14659 | + "%ld.\n", next); | |
85eb3c9d | 14660 | + BUG(); |
92bca44c AM |
14661 | + do { |
14662 | + cpu_relax(); | |
7e46296a | 14663 | + } while (0); |
2380c486 JR |
14664 | + } |
14665 | + } | |
14666 | + | |
0ada99ac | 14667 | + return io_result ? io_result : result; |
2380c486 JR |
14668 | +} |
14669 | + | |
14670 | +/** | |
14671 | + * write_pageset - write a pageset to disk. | |
14672 | + * @pagedir: Which pagedir to write. | |
14673 | + * | |
14674 | + * Returns: | |
14675 | + * Zero on success or -1 on failure. | |
14676 | + **/ | |
14677 | +int write_pageset(struct pagedir *pagedir) | |
14678 | +{ | |
5dd10c98 | 14679 | + int finish_at, base = 0; |
2380c486 JR |
14680 | + int barmax = pagedir1.size + pagedir2.size; |
14681 | + long error = 0; | |
14682 | + struct memory_bitmap *pageflags; | |
5dd10c98 | 14683 | + unsigned long start_time, end_time; |
2380c486 JR |
14684 | + |
14685 | + /* | |
14686 | + * Even if there is nothing to read or write, the allocator | |
14687 | + * may need the init/cleanup for it's housekeeping. (eg: | |
14688 | + * Pageset1 may start where pageset2 ends when writing). | |
14689 | + */ | |
14690 | + finish_at = pagedir->size; | |
14691 | + | |
14692 | + if (pagedir->id == 1) { | |
14693 | + toi_prepare_status(DONT_CLEAR_BAR, | |
14694 | + "Writing kernel & process data..."); | |
14695 | + base = pagedir2.size; | |
14696 | + if (test_action_state(TOI_TEST_FILTER_SPEED) || | |
14697 | + test_action_state(TOI_TEST_BIO)) | |
14698 | + pageflags = pageset1_map; | |
14699 | + else | |
14700 | + pageflags = pageset1_copy_map; | |
14701 | + } else { | |
14702 | + toi_prepare_status(DONT_CLEAR_BAR, "Writing caches..."); | |
14703 | + pageflags = pageset2_map; | |
14704 | + } | |
14705 | + | |
14706 | + start_time = jiffies; | |
14707 | + | |
14708 | + if (rw_init_modules(1, pagedir->id)) { | |
14709 | + abort_hibernate(TOI_FAILED_MODULE_INIT, | |
14710 | + "Failed to initialise modules for writing."); | |
14711 | + error = 1; | |
14712 | + } | |
14713 | + | |
14714 | + if (!error) | |
14715 | + error = do_rw_loop(1, finish_at, pageflags, base, barmax, | |
14716 | + pagedir->id); | |
14717 | + | |
14718 | + if (rw_cleanup_modules(WRITE) && !error) { | |
14719 | + abort_hibernate(TOI_FAILED_MODULE_CLEANUP, | |
14720 | + "Failed to cleanup after writing."); | |
14721 | + error = 1; | |
14722 | + } | |
14723 | + | |
14724 | + end_time = jiffies; | |
14725 | + | |
14726 | + if ((end_time - start_time) && (!test_result_state(TOI_ABORTED))) { | |
14727 | + toi_bkd.toi_io_time[0][0] += finish_at, | |
14728 | + toi_bkd.toi_io_time[0][1] += (end_time - start_time); | |
14729 | + } | |
14730 | + | |
14731 | + return error; | |
14732 | +} | |
14733 | + | |
14734 | +/** | |
14735 | + * read_pageset - highlevel function to read a pageset from disk | |
14736 | + * @pagedir: pageset to read | |
14737 | + * @overwrittenpagesonly: Whether to read the whole pageset or | |
14738 | + * only part of it. | |
14739 | + * | |
14740 | + * Returns: | |
14741 | + * Zero on success or -1 on failure. | |
14742 | + **/ | |
14743 | +static int read_pageset(struct pagedir *pagedir, int overwrittenpagesonly) | |
14744 | +{ | |
5dd10c98 | 14745 | + int result = 0, base = 0; |
2380c486 JR |
14746 | + int finish_at = pagedir->size; |
14747 | + int barmax = pagedir1.size + pagedir2.size; | |
14748 | + struct memory_bitmap *pageflags; | |
5dd10c98 | 14749 | + unsigned long start_time, end_time; |
2380c486 JR |
14750 | + |
14751 | + if (pagedir->id == 1) { | |
14752 | + toi_prepare_status(DONT_CLEAR_BAR, | |
14753 | + "Reading kernel & process data..."); | |
14754 | + pageflags = pageset1_map; | |
14755 | + } else { | |
14756 | + toi_prepare_status(DONT_CLEAR_BAR, "Reading caches..."); | |
14757 | + if (overwrittenpagesonly) { | |
14758 | + barmax = min(pagedir1.size, pagedir2.size); | |
14759 | + finish_at = min(pagedir1.size, pagedir2.size); | |
14760 | + } else | |
14761 | + base = pagedir1.size; | |
14762 | + pageflags = pageset2_map; | |
14763 | + } | |
14764 | + | |
14765 | + start_time = jiffies; | |
14766 | + | |
14767 | + if (rw_init_modules(0, pagedir->id)) { | |
14768 | + toiActiveAllocator->remove_image(); | |
14769 | + result = 1; | |
14770 | + } else | |
14771 | + result = do_rw_loop(0, finish_at, pageflags, base, barmax, | |
14772 | + pagedir->id); | |
14773 | + | |
14774 | + if (rw_cleanup_modules(READ) && !result) { | |
14775 | + abort_hibernate(TOI_FAILED_MODULE_CLEANUP, | |
14776 | + "Failed to cleanup after reading."); | |
14777 | + result = 1; | |
14778 | + } | |
14779 | + | |
14780 | + /* Statistics */ | |
14781 | + end_time = jiffies; | |
14782 | + | |
14783 | + if ((end_time - start_time) && (!test_result_state(TOI_ABORTED))) { | |
14784 | + toi_bkd.toi_io_time[1][0] += finish_at, | |
14785 | + toi_bkd.toi_io_time[1][1] += (end_time - start_time); | |
14786 | + } | |
14787 | + | |
14788 | + return result; | |
14789 | +} | |
14790 | + | |
14791 | +/** | |
14792 | + * write_module_configs - store the modules configuration | |
14793 | + * | |
14794 | + * The configuration for each module is stored in the image header. | |
14795 | + * Returns: Int | |
14796 | + * Zero on success, Error value otherwise. | |
14797 | + **/ | |
14798 | +static int write_module_configs(void) | |
14799 | +{ | |
14800 | + struct toi_module_ops *this_module; | |
14801 | + char *buffer = (char *) toi_get_zeroed_page(22, TOI_ATOMIC_GFP); | |
14802 | + int len, index = 1; | |
14803 | + struct toi_module_header toi_module_header; | |
14804 | + | |
14805 | + if (!buffer) { | |
14806 | + printk(KERN_INFO "Failed to allocate a buffer for saving " | |
14807 | + "module configuration info.\n"); | |
14808 | + return -ENOMEM; | |
14809 | + } | |
14810 | + | |
14811 | + /* | |
14812 | + * We have to know which data goes with which module, so we at | |
14813 | + * least write a length of zero for a module. Note that we are | |
14814 | + * also assuming every module's config data takes <= PAGE_SIZE. | |
14815 | + */ | |
14816 | + | |
14817 | + /* For each module (in registration order) */ | |
14818 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
14819 | + if (!this_module->enabled || !this_module->storage_needed || | |
14820 | + (this_module->type == WRITER_MODULE && | |
14821 | + toiActiveAllocator != this_module)) | |
14822 | + continue; | |
14823 | + | |
14824 | + /* Get the data from the module */ | |
14825 | + len = 0; | |
14826 | + if (this_module->save_config_info) | |
14827 | + len = this_module->save_config_info(buffer); | |
14828 | + | |
14829 | + /* Save the details of the module */ | |
14830 | + toi_module_header.enabled = this_module->enabled; | |
14831 | + toi_module_header.type = this_module->type; | |
14832 | + toi_module_header.index = index++; | |
14833 | + strncpy(toi_module_header.name, this_module->name, | |
14834 | + sizeof(toi_module_header.name)); | |
14835 | + toiActiveAllocator->rw_header_chunk(WRITE, | |
14836 | + this_module, | |
14837 | + (char *) &toi_module_header, | |
14838 | + sizeof(toi_module_header)); | |
14839 | + | |
14840 | + /* Save the size of the data and any data returned */ | |
14841 | + toiActiveAllocator->rw_header_chunk(WRITE, | |
14842 | + this_module, | |
14843 | + (char *) &len, sizeof(int)); | |
14844 | + if (len) | |
14845 | + toiActiveAllocator->rw_header_chunk( | |
14846 | + WRITE, this_module, buffer, len); | |
14847 | + } | |
14848 | + | |
14849 | + /* Write a blank header to terminate the list */ | |
14850 | + toi_module_header.name[0] = '\0'; | |
14851 | + toiActiveAllocator->rw_header_chunk(WRITE, NULL, | |
14852 | + (char *) &toi_module_header, sizeof(toi_module_header)); | |
14853 | + | |
14854 | + toi_free_page(22, (unsigned long) buffer); | |
14855 | + return 0; | |
14856 | +} | |
14857 | + | |
14858 | +/** | |
14859 | + * read_one_module_config - read and configure one module | |
14860 | + * | |
14861 | + * Read the configuration for one module, and configure the module | |
14862 | + * to match if it is loaded. | |
14863 | + * | |
14864 | + * Returns: Int | |
14865 | + * Zero on success, Error value otherwise. | |
14866 | + **/ | |
14867 | +static int read_one_module_config(struct toi_module_header *header) | |
14868 | +{ | |
14869 | + struct toi_module_ops *this_module; | |
14870 | + int result, len; | |
14871 | + char *buffer; | |
14872 | + | |
14873 | + /* Find the module */ | |
14874 | + this_module = toi_find_module_given_name(header->name); | |
14875 | + | |
14876 | + if (!this_module) { | |
14877 | + if (header->enabled) { | |
14878 | + toi_early_boot_message(1, TOI_CONTINUE_REQ, | |
14879 | + "It looks like we need module %s for reading " | |
14880 | + "the image but it hasn't been registered.\n", | |
14881 | + header->name); | |
14882 | + if (!(test_toi_state(TOI_CONTINUE_REQ))) | |
14883 | + return -EINVAL; | |
14884 | + } else | |
14885 | + printk(KERN_INFO "Module %s configuration data found, " | |
14886 | + "but the module hasn't registered. Looks like " | |
14887 | + "it was disabled, so we're ignoring its data.", | |
14888 | + header->name); | |
14889 | + } | |
14890 | + | |
14891 | + /* Get the length of the data (if any) */ | |
14892 | + result = toiActiveAllocator->rw_header_chunk(READ, NULL, (char *) &len, | |
14893 | + sizeof(int)); | |
14894 | + if (result) { | |
e999739a | 14895 | + printk(KERN_ERR "Failed to read the length of the module %s's" |
2380c486 JR |
14896 | + " configuration data.\n", |
14897 | + header->name); | |
14898 | + return -EINVAL; | |
14899 | + } | |
14900 | + | |
14901 | + /* Read any data and pass to the module (if we found one) */ | |
14902 | + if (!len) | |
14903 | + return 0; | |
14904 | + | |
14905 | + buffer = (char *) toi_get_zeroed_page(23, TOI_ATOMIC_GFP); | |
14906 | + | |
14907 | + if (!buffer) { | |
e999739a | 14908 | + printk(KERN_ERR "Failed to allocate a buffer for reloading " |
14909 | + "module configuration info.\n"); | |
2380c486 JR |
14910 | + return -ENOMEM; |
14911 | + } | |
14912 | + | |
14913 | + toiActiveAllocator->rw_header_chunk(READ, NULL, buffer, len); | |
14914 | + | |
14915 | + if (!this_module) | |
14916 | + goto out; | |
14917 | + | |
14918 | + if (!this_module->save_config_info) | |
e999739a | 14919 | + printk(KERN_ERR "Huh? Module %s appears to have a " |
14920 | + "save_config_info, but not a load_config_info " | |
14921 | + "function!\n", this_module->name); | |
2380c486 JR |
14922 | + else |
14923 | + this_module->load_config_info(buffer, len); | |
14924 | + | |
14925 | + /* | |
14926 | + * Now move this module to the tail of its lists. This will put it in | |
14927 | + * order. Any new modules will end up at the top of the lists. They | |
14928 | + * should have been set to disabled when loaded (people will | |
14929 | + * normally not edit an initrd to load a new module and then hibernate | |
14930 | + * without using it!). | |
14931 | + */ | |
14932 | + | |
14933 | + toi_move_module_tail(this_module); | |
14934 | + | |
14935 | + this_module->enabled = header->enabled; | |
14936 | + | |
14937 | +out: | |
14938 | + toi_free_page(23, (unsigned long) buffer); | |
14939 | + return 0; | |
14940 | +} | |
14941 | + | |
14942 | +/** | |
14943 | + * read_module_configs - reload module configurations from the image header. | |
14944 | + * | |
14945 | + * Returns: Int | |
14946 | + * Zero on success or an error code. | |
14947 | + **/ | |
14948 | +static int read_module_configs(void) | |
14949 | +{ | |
14950 | + int result = 0; | |
14951 | + struct toi_module_header toi_module_header; | |
14952 | + struct toi_module_ops *this_module; | |
14953 | + | |
14954 | + /* All modules are initially disabled. That way, if we have a module | |
14955 | + * loaded now that wasn't loaded when we hibernated, it won't be used | |
14956 | + * in trying to read the data. | |
14957 | + */ | |
14958 | + list_for_each_entry(this_module, &toi_modules, module_list) | |
14959 | + this_module->enabled = 0; | |
14960 | + | |
14961 | + /* Get the first module header */ | |
14962 | + result = toiActiveAllocator->rw_header_chunk(READ, NULL, | |
14963 | + (char *) &toi_module_header, | |
14964 | + sizeof(toi_module_header)); | |
14965 | + if (result) { | |
14966 | + printk(KERN_ERR "Failed to read the next module header.\n"); | |
14967 | + return -EINVAL; | |
14968 | + } | |
14969 | + | |
14970 | + /* For each module (in registration order) */ | |
14971 | + while (toi_module_header.name[0]) { | |
14972 | + result = read_one_module_config(&toi_module_header); | |
14973 | + | |
14974 | + if (result) | |
14975 | + return -EINVAL; | |
14976 | + | |
14977 | + /* Get the next module header */ | |
14978 | + result = toiActiveAllocator->rw_header_chunk(READ, NULL, | |
14979 | + (char *) &toi_module_header, | |
14980 | + sizeof(toi_module_header)); | |
14981 | + | |
14982 | + if (result) { | |
14983 | + printk(KERN_ERR "Failed to read the next module " | |
14984 | + "header.\n"); | |
14985 | + return -EINVAL; | |
14986 | + } | |
14987 | + } | |
14988 | + | |
14989 | + return 0; | |
14990 | +} | |
14991 | + | |
5dd10c98 AM |
14992 | +static inline int save_fs_info(struct fs_info *fs, struct block_device *bdev) |
14993 | +{ | |
14994 | + return (!fs || IS_ERR(fs) || !fs->last_mount_size) ? 0 : 1; | |
14995 | +} | |
14996 | + | |
14997 | +int fs_info_space_needed(void) | |
14998 | +{ | |
14999 | + const struct super_block *sb; | |
15000 | + int result = sizeof(int); | |
15001 | + | |
15002 | + list_for_each_entry(sb, &super_blocks, s_list) { | |
15003 | + struct fs_info *fs; | |
15004 | + | |
15005 | + if (!sb->s_bdev) | |
15006 | + continue; | |
15007 | + | |
15008 | + fs = fs_info_from_block_dev(sb->s_bdev); | |
15009 | + if (save_fs_info(fs, sb->s_bdev)) | |
cacc47f8 AM |
15010 | + result += 16 + sizeof(dev_t) + sizeof(int) + |
15011 | + fs->last_mount_size; | |
5dd10c98 AM |
15012 | + free_fs_info(fs); |
15013 | + } | |
15014 | + return result; | |
15015 | +} | |
15016 | + | |
15017 | +static int fs_info_num_to_save(void) | |
15018 | +{ | |
15019 | + const struct super_block *sb; | |
15020 | + int to_save = 0; | |
15021 | + | |
15022 | + list_for_each_entry(sb, &super_blocks, s_list) { | |
15023 | + struct fs_info *fs; | |
15024 | + | |
15025 | + if (!sb->s_bdev) | |
15026 | + continue; | |
15027 | + | |
15028 | + fs = fs_info_from_block_dev(sb->s_bdev); | |
15029 | + if (save_fs_info(fs, sb->s_bdev)) | |
15030 | + to_save++; | |
15031 | + free_fs_info(fs); | |
15032 | + } | |
15033 | + | |
15034 | + return to_save; | |
15035 | +} | |
15036 | + | |
15037 | +static int fs_info_save(void) | |
15038 | +{ | |
15039 | + const struct super_block *sb; | |
15040 | + int to_save = fs_info_num_to_save(); | |
15041 | + | |
15042 | + if (toiActiveAllocator->rw_header_chunk(WRITE, NULL, (char *) &to_save, | |
15043 | + sizeof(int))) { | |
15044 | + abort_hibernate(TOI_FAILED_IO, "Failed to write num fs_info" | |
15045 | + " to save."); | |
15046 | + return -EIO; | |
15047 | + } | |
15048 | + | |
15049 | + list_for_each_entry(sb, &super_blocks, s_list) { | |
15050 | + struct fs_info *fs; | |
15051 | + | |
15052 | + if (!sb->s_bdev) | |
15053 | + continue; | |
15054 | + | |
15055 | + fs = fs_info_from_block_dev(sb->s_bdev); | |
15056 | + if (save_fs_info(fs, sb->s_bdev)) { | |
15057 | + if (toiActiveAllocator->rw_header_chunk(WRITE, NULL, | |
15058 | + &fs->uuid[0], 16)) { | |
15059 | + abort_hibernate(TOI_FAILED_IO, "Failed to " | |
15060 | + "write uuid."); | |
15061 | + return -EIO; | |
15062 | + } | |
15063 | + if (toiActiveAllocator->rw_header_chunk(WRITE, NULL, | |
cacc47f8 AM |
15064 | + (char *) &fs->dev_t, sizeof(dev_t))) { |
15065 | + abort_hibernate(TOI_FAILED_IO, "Failed to " | |
15066 | + "write dev_t."); | |
15067 | + return -EIO; | |
15068 | + } | |
15069 | + if (toiActiveAllocator->rw_header_chunk(WRITE, NULL, | |
5dd10c98 AM |
15070 | + (char *) &fs->last_mount_size, sizeof(int))) { |
15071 | + abort_hibernate(TOI_FAILED_IO, "Failed to " | |
15072 | + "write last mount length."); | |
15073 | + return -EIO; | |
15074 | + } | |
15075 | + if (toiActiveAllocator->rw_header_chunk(WRITE, NULL, | |
15076 | + fs->last_mount, fs->last_mount_size)) { | |
15077 | + abort_hibernate(TOI_FAILED_IO, "Failed to " | |
15078 | + "write uuid."); | |
15079 | + return -EIO; | |
15080 | + } | |
15081 | + } | |
15082 | + free_fs_info(fs); | |
15083 | + } | |
15084 | + return 0; | |
15085 | +} | |
15086 | + | |
15087 | +static int fs_info_load_and_check_one(void) | |
15088 | +{ | |
15089 | + char uuid[16], *last_mount; | |
15090 | + int result = 0, ln; | |
15091 | + dev_t dev_t; | |
15092 | + struct block_device *dev; | |
cacc47f8 | 15093 | + struct fs_info *fs_info, seek; |
5dd10c98 AM |
15094 | + |
15095 | + if (toiActiveAllocator->rw_header_chunk(READ, NULL, uuid, 16)) { | |
15096 | + abort_hibernate(TOI_FAILED_IO, "Failed to read uuid."); | |
15097 | + return -EIO; | |
15098 | + } | |
15099 | + | |
cacc47f8 AM |
15100 | + read_if_version(3, dev_t, "uuid dev_t field", return -EIO); |
15101 | + | |
5dd10c98 AM |
15102 | + if (toiActiveAllocator->rw_header_chunk(READ, NULL, (char *) &ln, |
15103 | + sizeof(int))) { | |
15104 | + abort_hibernate(TOI_FAILED_IO, | |
15105 | + "Failed to read last mount size."); | |
15106 | + return -EIO; | |
15107 | + } | |
15108 | + | |
15109 | + last_mount = kzalloc(ln, GFP_KERNEL); | |
15110 | + | |
15111 | + if (!last_mount) | |
15112 | + return -ENOMEM; | |
15113 | + | |
15114 | + if (toiActiveAllocator->rw_header_chunk(READ, NULL, last_mount, ln)) { | |
15115 | + abort_hibernate(TOI_FAILED_IO, | |
15116 | + "Failed to read last mount timestamp."); | |
15117 | + result = -EIO; | |
15118 | + goto out_lmt; | |
15119 | + } | |
15120 | + | |
cacc47f8 AM |
15121 | + strncpy((char *) &seek.uuid, uuid, 16); |
15122 | + seek.dev_t = dev_t; | |
15123 | + seek.last_mount_size = ln; | |
15124 | + seek.last_mount = last_mount; | |
15125 | + dev_t = blk_lookup_fs_info(&seek); | |
5dd10c98 AM |
15126 | + if (!dev_t) |
15127 | + goto out_lmt; | |
15128 | + | |
15129 | + dev = toi_open_by_devnum(dev_t); | |
15130 | + | |
15131 | + fs_info = fs_info_from_block_dev(dev); | |
15132 | + if (fs_info && !IS_ERR(fs_info)) { | |
15133 | + if (ln != fs_info->last_mount_size) { | |
15134 | + printk(KERN_EMERG "Found matching uuid but last mount " | |
15135 | + "time lengths differ?! " | |
15136 | + "(%d vs %d).\n", ln, | |
15137 | + fs_info->last_mount_size); | |
15138 | + result = -EINVAL; | |
15139 | + } else { | |
15140 | + char buf[BDEVNAME_SIZE]; | |
15141 | + result = !!memcmp(fs_info->last_mount, last_mount, ln); | |
15142 | + if (result) | |
15143 | + printk(KERN_EMERG "Last mount time for %s has " | |
15144 | + "changed!\n", bdevname(dev, buf)); | |
15145 | + } | |
15146 | + } | |
15147 | + toi_close_bdev(dev); | |
15148 | + free_fs_info(fs_info); | |
15149 | +out_lmt: | |
15150 | + kfree(last_mount); | |
15151 | + return result; | |
15152 | +} | |
15153 | + | |
15154 | +static int fs_info_load_and_check(void) | |
15155 | +{ | |
de6743ae | 15156 | + int to_do, result = 0; |
5dd10c98 AM |
15157 | + |
15158 | + if (toiActiveAllocator->rw_header_chunk(READ, NULL, (char *) &to_do, | |
15159 | + sizeof(int))) { | |
15160 | + abort_hibernate(TOI_FAILED_IO, "Failed to read num fs_info " | |
15161 | + "to load."); | |
15162 | + return -EIO; | |
15163 | + } | |
15164 | + | |
15165 | + while(to_do--) | |
15166 | + result |= fs_info_load_and_check_one(); | |
15167 | + | |
15168 | + return result; | |
15169 | +} | |
15170 | + | |
2380c486 JR |
15171 | +/** |
15172 | + * write_image_header - write the image header after write the image proper | |
15173 | + * | |
15174 | + * Returns: Int | |
15175 | + * Zero on success, error value otherwise. | |
15176 | + **/ | |
15177 | +int write_image_header(void) | |
15178 | +{ | |
15179 | + int ret; | |
15180 | + int total = pagedir1.size + pagedir2.size+2; | |
15181 | + char *header_buffer = NULL; | |
15182 | + | |
15183 | + /* Now prepare to write the header */ | |
15184 | + ret = toiActiveAllocator->write_header_init(); | |
15185 | + if (ret) { | |
15186 | + abort_hibernate(TOI_FAILED_MODULE_INIT, | |
15187 | + "Active allocator's write_header_init" | |
15188 | + " function failed."); | |
15189 | + goto write_image_header_abort; | |
15190 | + } | |
15191 | + | |
15192 | + /* Get a buffer */ | |
15193 | + header_buffer = (char *) toi_get_zeroed_page(24, TOI_ATOMIC_GFP); | |
15194 | + if (!header_buffer) { | |
15195 | + abort_hibernate(TOI_OUT_OF_MEMORY, | |
15196 | + "Out of memory when trying to get page for header!"); | |
15197 | + goto write_image_header_abort; | |
15198 | + } | |
15199 | + | |
15200 | + /* Write hibernate header */ | |
15201 | + if (fill_toi_header((struct toi_header *) header_buffer)) { | |
15202 | + abort_hibernate(TOI_OUT_OF_MEMORY, | |
15203 | + "Failure to fill header information!"); | |
15204 | + goto write_image_header_abort; | |
15205 | + } | |
2380c486 | 15206 | + |
5dd10c98 AM |
15207 | + if (toiActiveAllocator->rw_header_chunk(WRITE, NULL, |
15208 | + header_buffer, sizeof(struct toi_header))) { | |
15209 | + abort_hibernate(TOI_OUT_OF_MEMORY, | |
15210 | + "Failure to write header info."); | |
15211 | + goto write_image_header_abort; | |
15212 | + } | |
15213 | + | |
15214 | + if (toiActiveAllocator->rw_header_chunk(WRITE, NULL, | |
15215 | + (char *) &toi_max_workers, sizeof(toi_max_workers))) { | |
15216 | + abort_hibernate(TOI_OUT_OF_MEMORY, | |
15217 | + "Failure to number of workers to use."); | |
15218 | + goto write_image_header_abort; | |
15219 | + } | |
15220 | + | |
15221 | + /* Write filesystem info */ | |
15222 | + if (fs_info_save()) | |
15223 | + goto write_image_header_abort; | |
2380c486 JR |
15224 | + |
15225 | + /* Write module configurations */ | |
15226 | + ret = write_module_configs(); | |
15227 | + if (ret) { | |
15228 | + abort_hibernate(TOI_FAILED_IO, | |
15229 | + "Failed to write module configs."); | |
15230 | + goto write_image_header_abort; | |
15231 | + } | |
15232 | + | |
5dd10c98 AM |
15233 | + if (memory_bm_write(pageset1_map, |
15234 | + toiActiveAllocator->rw_header_chunk)) { | |
15235 | + abort_hibernate(TOI_FAILED_IO, | |
15236 | + "Failed to write bitmaps."); | |
15237 | + goto write_image_header_abort; | |
15238 | + } | |
2380c486 JR |
15239 | + |
15240 | + /* Flush data and let allocator cleanup */ | |
15241 | + if (toiActiveAllocator->write_header_cleanup()) { | |
15242 | + abort_hibernate(TOI_FAILED_IO, | |
15243 | + "Failed to cleanup writing header."); | |
15244 | + goto write_image_header_abort_no_cleanup; | |
15245 | + } | |
15246 | + | |
15247 | + if (test_result_state(TOI_ABORTED)) | |
15248 | + goto write_image_header_abort_no_cleanup; | |
15249 | + | |
15250 | + toi_update_status(total, total, NULL); | |
15251 | + | |
5dd10c98 AM |
15252 | +out: |
15253 | + if (header_buffer) | |
15254 | + toi_free_page(24, (unsigned long) header_buffer); | |
15255 | + return ret; | |
2380c486 JR |
15256 | + |
15257 | +write_image_header_abort: | |
15258 | + toiActiveAllocator->write_header_cleanup(); | |
15259 | +write_image_header_abort_no_cleanup: | |
5dd10c98 AM |
15260 | + ret = -1; |
15261 | + goto out; | |
2380c486 JR |
15262 | +} |
15263 | + | |
15264 | +/** | |
15265 | + * sanity_check - check the header | |
15266 | + * @sh: the header which was saved at hibernate time. | |
15267 | + * | |
15268 | + * Perform a few checks, seeking to ensure that the kernel being | |
15269 | + * booted matches the one hibernated. They need to match so we can | |
15270 | + * be _sure_ things will work. It is not absolutely impossible for | |
15271 | + * resuming from a different kernel to work, just not assured. | |
15272 | + **/ | |
15273 | +static char *sanity_check(struct toi_header *sh) | |
15274 | +{ | |
e999739a | 15275 | + char *reason = check_image_kernel((struct swsusp_info *) sh); |
2380c486 JR |
15276 | + |
15277 | + if (reason) | |
15278 | + return reason; | |
15279 | + | |
15280 | + if (!test_action_state(TOI_IGNORE_ROOTFS)) { | |
15281 | + const struct super_block *sb; | |
15282 | + list_for_each_entry(sb, &super_blocks, s_list) { | |
15283 | + if ((!(sb->s_flags & MS_RDONLY)) && | |
15284 | + (sb->s_type->fs_flags & FS_REQUIRES_DEV)) | |
15285 | + return "Device backed fs has been mounted " | |
15286 | + "rw prior to resume or initrd/ramfs " | |
15287 | + "is mounted rw."; | |
15288 | + } | |
15289 | + } | |
15290 | + | |
15291 | + return NULL; | |
15292 | +} | |
15293 | + | |
15294 | +static DECLARE_WAIT_QUEUE_HEAD(freeze_wait); | |
15295 | + | |
15296 | +#define FREEZE_IN_PROGRESS (~0) | |
15297 | + | |
15298 | +static int freeze_result; | |
15299 | + | |
15300 | +static void do_freeze(struct work_struct *dummy) | |
15301 | +{ | |
15302 | + freeze_result = freeze_processes(); | |
15303 | + wake_up(&freeze_wait); | |
7e46296a | 15304 | + trap_non_toi_io = 1; |
2380c486 JR |
15305 | +} |
15306 | + | |
15307 | +static DECLARE_WORK(freeze_work, do_freeze); | |
15308 | + | |
15309 | +/** | |
15310 | + * __read_pageset1 - test for the existence of an image and attempt to load it | |
15311 | + * | |
15312 | + * Returns: Int | |
15313 | + * Zero if image found and pageset1 successfully loaded. | |
15314 | + * Error if no image found or loaded. | |
15315 | + **/ | |
15316 | +static int __read_pageset1(void) | |
15317 | +{ | |
15318 | + int i, result = 0; | |
15319 | + char *header_buffer = (char *) toi_get_zeroed_page(25, TOI_ATOMIC_GFP), | |
15320 | + *sanity_error = NULL; | |
15321 | + struct toi_header *toi_header; | |
15322 | + | |
15323 | + if (!header_buffer) { | |
15324 | + printk(KERN_INFO "Unable to allocate a page for reading the " | |
15325 | + "signature.\n"); | |
15326 | + return -ENOMEM; | |
15327 | + } | |
15328 | + | |
15329 | + /* Check for an image */ | |
15330 | + result = toiActiveAllocator->image_exists(1); | |
5dd10c98 AM |
15331 | + if (result == 3) { |
15332 | + result = -ENODATA; | |
15333 | + toi_early_boot_message(1, 0, "The signature from an older " | |
15334 | + "version of TuxOnIce has been detected."); | |
15335 | + goto out_remove_image; | |
15336 | + } | |
15337 | + | |
7e46296a | 15338 | + if (result != 1) { |
2380c486 JR |
15339 | + result = -ENODATA; |
15340 | + noresume_reset_modules(); | |
15341 | + printk(KERN_INFO "TuxOnIce: No image found.\n"); | |
15342 | + goto out; | |
15343 | + } | |
15344 | + | |
15345 | + /* | |
15346 | + * Prepare the active allocator for reading the image header. The | |
15347 | + * activate allocator might read its own configuration. | |
15348 | + * | |
15349 | + * NB: This call may never return because there might be a signature | |
15350 | + * for a different image such that we warn the user and they choose | |
15351 | + * to reboot. (If the device ids look erroneous (2.4 vs 2.6) or the | |
15352 | + * location of the image might be unavailable if it was stored on a | |
15353 | + * network connection). | |
15354 | + */ | |
15355 | + | |
15356 | + result = toiActiveAllocator->read_header_init(); | |
15357 | + if (result) { | |
15358 | + printk(KERN_INFO "TuxOnIce: Failed to initialise, reading the " | |
15359 | + "image header.\n"); | |
15360 | + goto out_remove_image; | |
15361 | + } | |
15362 | + | |
15363 | + /* Check for noresume command line option */ | |
15364 | + if (test_toi_state(TOI_NORESUME_SPECIFIED)) { | |
15365 | + printk(KERN_INFO "TuxOnIce: Noresume on command line. Removed " | |
15366 | + "image.\n"); | |
15367 | + goto out_remove_image; | |
15368 | + } | |
15369 | + | |
15370 | + /* Check whether we've resumed before */ | |
15371 | + if (test_toi_state(TOI_RESUMED_BEFORE)) { | |
15372 | + toi_early_boot_message(1, 0, NULL); | |
15373 | + if (!(test_toi_state(TOI_CONTINUE_REQ))) { | |
15374 | + printk(KERN_INFO "TuxOnIce: Tried to resume before: " | |
15375 | + "Invalidated image.\n"); | |
15376 | + goto out_remove_image; | |
15377 | + } | |
15378 | + } | |
15379 | + | |
15380 | + clear_toi_state(TOI_CONTINUE_REQ); | |
15381 | + | |
5dd10c98 AM |
15382 | + toi_image_header_version = toiActiveAllocator->get_header_version(); |
15383 | + | |
15384 | + if (unlikely(toi_image_header_version > TOI_HEADER_VERSION)) { | |
15385 | + toi_early_boot_message(1, 0, image_version_error); | |
15386 | + if (!(test_toi_state(TOI_CONTINUE_REQ))) { | |
15387 | + printk(KERN_INFO "TuxOnIce: Header version too new: " | |
15388 | + "Invalidated image.\n"); | |
15389 | + goto out_remove_image; | |
15390 | + } | |
15391 | + } | |
15392 | + | |
2380c486 JR |
15393 | + /* Read hibernate header */ |
15394 | + result = toiActiveAllocator->rw_header_chunk(READ, NULL, | |
15395 | + header_buffer, sizeof(struct toi_header)); | |
15396 | + if (result < 0) { | |
e999739a | 15397 | + printk(KERN_ERR "TuxOnIce: Failed to read the image " |
15398 | + "signature.\n"); | |
2380c486 JR |
15399 | + goto out_remove_image; |
15400 | + } | |
15401 | + | |
15402 | + toi_header = (struct toi_header *) header_buffer; | |
15403 | + | |
15404 | + /* | |
15405 | + * NB: This call may also result in a reboot rather than returning. | |
15406 | + */ | |
15407 | + | |
15408 | + sanity_error = sanity_check(toi_header); | |
15409 | + if (sanity_error) { | |
15410 | + toi_early_boot_message(1, TOI_CONTINUE_REQ, | |
15411 | + sanity_error); | |
15412 | + printk(KERN_INFO "TuxOnIce: Sanity check failed.\n"); | |
15413 | + goto out_remove_image; | |
15414 | + } | |
15415 | + | |
15416 | + /* | |
15417 | + * We have an image and it looks like it will load okay. | |
15418 | + * | |
15419 | + * Get metadata from header. Don't override commandline parameters. | |
15420 | + * | |
15421 | + * We don't need to save the image size limit because it's not used | |
15422 | + * during resume and will be restored with the image anyway. | |
15423 | + */ | |
15424 | + | |
15425 | + memcpy((char *) &pagedir1, | |
15426 | + (char *) &toi_header->pagedir, sizeof(pagedir1)); | |
15427 | + toi_result = toi_header->param0; | |
7e46296a AM |
15428 | + if (!toi_bkd.toi_debug_state) { |
15429 | + toi_bkd.toi_action = toi_header->param1; | |
15430 | + toi_bkd.toi_debug_state = toi_header->param2; | |
15431 | + toi_bkd.toi_default_console_level = toi_header->param3; | |
15432 | + } | |
2380c486 JR |
15433 | + clear_toi_state(TOI_IGNORE_LOGLEVEL); |
15434 | + pagedir2.size = toi_header->pageset_2_size; | |
15435 | + for (i = 0; i < 4; i++) | |
15436 | + toi_bkd.toi_io_time[i/2][i%2] = | |
15437 | + toi_header->io_time[i/2][i%2]; | |
15438 | + | |
15439 | + set_toi_state(TOI_BOOT_KERNEL); | |
15440 | + boot_kernel_data_buffer = toi_header->bkd; | |
15441 | + | |
cacc47f8 AM |
15442 | + read_if_version(1, toi_max_workers, "TuxOnIce max workers", |
15443 | + goto out_remove_image); | |
5dd10c98 AM |
15444 | + |
15445 | + /* Read filesystem info */ | |
15446 | + if (fs_info_load_and_check()) { | |
15447 | + printk(KERN_EMERG "TuxOnIce: File system mount time checks " | |
15448 | + "failed. Refusing to corrupt your filesystems!\n"); | |
15449 | + goto out_remove_image; | |
15450 | + } | |
15451 | + | |
2380c486 JR |
15452 | + /* Read module configurations */ |
15453 | + result = read_module_configs(); | |
15454 | + if (result) { | |
15455 | + pagedir1.size = 0; | |
15456 | + pagedir2.size = 0; | |
15457 | + printk(KERN_INFO "TuxOnIce: Failed to read TuxOnIce module " | |
15458 | + "configurations.\n"); | |
15459 | + clear_action_state(TOI_KEEP_IMAGE); | |
15460 | + goto out_remove_image; | |
15461 | + } | |
15462 | + | |
15463 | + toi_prepare_console(); | |
15464 | + | |
15465 | + set_toi_state(TOI_NOW_RESUMING); | |
15466 | + | |
15467 | + if (!test_action_state(TOI_LATE_CPU_HOTPLUG)) { | |
15468 | + toi_prepare_status(DONT_CLEAR_BAR, "Disable nonboot cpus."); | |
15469 | + if (disable_nonboot_cpus()) { | |
15470 | + set_abort_result(TOI_CPU_HOTPLUG_FAILED); | |
15471 | + goto out_reset_console; | |
15472 | + } | |
15473 | + } | |
15474 | + | |
15475 | + if (usermodehelper_disable()) | |
15476 | + goto out_enable_nonboot_cpus; | |
15477 | + | |
15478 | + current->flags |= PF_NOFREEZE; | |
15479 | + freeze_result = FREEZE_IN_PROGRESS; | |
15480 | + | |
15481 | + schedule_work_on(first_cpu(cpu_online_map), &freeze_work); | |
15482 | + | |
15483 | + toi_cond_pause(1, "About to read original pageset1 locations."); | |
15484 | + | |
15485 | + /* | |
7e46296a | 15486 | + * See _toi_rw_header_chunk in tuxonice_bio.c: |
2380c486 JR |
15487 | + * Initialize pageset1_map by reading the map from the image. |
15488 | + */ | |
15489 | + if (memory_bm_read(pageset1_map, toiActiveAllocator->rw_header_chunk)) | |
15490 | + goto out_thaw; | |
15491 | + | |
15492 | + /* | |
7e46296a | 15493 | + * See toi_rw_cleanup in tuxonice_bio.c: |
2380c486 JR |
15494 | + * Clean up after reading the header. |
15495 | + */ | |
15496 | + result = toiActiveAllocator->read_header_cleanup(); | |
15497 | + if (result) { | |
15498 | + printk(KERN_ERR "TuxOnIce: Failed to cleanup after reading the " | |
15499 | + "image header.\n"); | |
15500 | + goto out_thaw; | |
15501 | + } | |
15502 | + | |
15503 | + toi_cond_pause(1, "About to read pagedir."); | |
15504 | + | |
15505 | + /* | |
15506 | + * Get the addresses of pages into which we will load the kernel to | |
15507 | + * be copied back and check if they conflict with the ones we are using. | |
15508 | + */ | |
15509 | + if (toi_get_pageset1_load_addresses()) { | |
15510 | + printk(KERN_INFO "TuxOnIce: Failed to get load addresses for " | |
15511 | + "pageset1.\n"); | |
15512 | + goto out_thaw; | |
15513 | + } | |
15514 | + | |
15515 | + /* Read the original kernel back */ | |
15516 | + toi_cond_pause(1, "About to read pageset 1."); | |
15517 | + | |
15518 | + /* Given the pagemap, read back the data from disk */ | |
15519 | + if (read_pageset(&pagedir1, 0)) { | |
15520 | + toi_prepare_status(DONT_CLEAR_BAR, "Failed to read pageset 1."); | |
15521 | + result = -EIO; | |
15522 | + goto out_thaw; | |
15523 | + } | |
15524 | + | |
15525 | + toi_cond_pause(1, "About to restore original kernel."); | |
15526 | + result = 0; | |
15527 | + | |
15528 | + if (!test_action_state(TOI_KEEP_IMAGE) && | |
15529 | + toiActiveAllocator->mark_resume_attempted) | |
15530 | + toiActiveAllocator->mark_resume_attempted(1); | |
15531 | + | |
15532 | + wait_event(freeze_wait, freeze_result != FREEZE_IN_PROGRESS); | |
15533 | +out: | |
15534 | + current->flags &= ~PF_NOFREEZE; | |
15535 | + toi_free_page(25, (unsigned long) header_buffer); | |
15536 | + return result; | |
15537 | + | |
15538 | +out_thaw: | |
15539 | + wait_event(freeze_wait, freeze_result != FREEZE_IN_PROGRESS); | |
7e46296a | 15540 | + trap_non_toi_io = 0; |
2380c486 JR |
15541 | + thaw_processes(); |
15542 | + usermodehelper_enable(); | |
15543 | +out_enable_nonboot_cpus: | |
15544 | + enable_nonboot_cpus(); | |
15545 | +out_reset_console: | |
15546 | + toi_cleanup_console(); | |
15547 | +out_remove_image: | |
15548 | + result = -EINVAL; | |
15549 | + if (!test_action_state(TOI_KEEP_IMAGE)) | |
15550 | + toiActiveAllocator->remove_image(); | |
15551 | + toiActiveAllocator->read_header_cleanup(); | |
15552 | + noresume_reset_modules(); | |
15553 | + goto out; | |
15554 | +} | |
15555 | + | |
15556 | +/** | |
15557 | + * read_pageset1 - highlevel function to read the saved pages | |
15558 | + * | |
15559 | + * Attempt to read the header and pageset1 of a hibernate image. | |
15560 | + * Handle the outcome, complaining where appropriate. | |
15561 | + **/ | |
15562 | +int read_pageset1(void) | |
15563 | +{ | |
15564 | + int error; | |
15565 | + | |
15566 | + error = __read_pageset1(); | |
15567 | + | |
15568 | + if (error && error != -ENODATA && error != -EINVAL && | |
15569 | + !test_result_state(TOI_ABORTED)) | |
15570 | + abort_hibernate(TOI_IMAGE_ERROR, | |
15571 | + "TuxOnIce: Error %d resuming\n", error); | |
15572 | + | |
15573 | + return error; | |
15574 | +} | |
15575 | + | |
15576 | +/** | |
15577 | + * get_have_image_data - check the image header | |
15578 | + **/ | |
15579 | +static char *get_have_image_data(void) | |
15580 | +{ | |
15581 | + char *output_buffer = (char *) toi_get_zeroed_page(26, TOI_ATOMIC_GFP); | |
15582 | + struct toi_header *toi_header; | |
15583 | + | |
15584 | + if (!output_buffer) { | |
15585 | + printk(KERN_INFO "Output buffer null.\n"); | |
15586 | + return NULL; | |
15587 | + } | |
15588 | + | |
15589 | + /* Check for an image */ | |
15590 | + if (!toiActiveAllocator->image_exists(1) || | |
15591 | + toiActiveAllocator->read_header_init() || | |
15592 | + toiActiveAllocator->rw_header_chunk(READ, NULL, | |
15593 | + output_buffer, sizeof(struct toi_header))) { | |
15594 | + sprintf(output_buffer, "0\n"); | |
15595 | + /* | |
15596 | + * From an initrd/ramfs, catting have_image and | |
15597 | + * getting a result of 0 is sufficient. | |
15598 | + */ | |
15599 | + clear_toi_state(TOI_BOOT_TIME); | |
15600 | + goto out; | |
15601 | + } | |
15602 | + | |
15603 | + toi_header = (struct toi_header *) output_buffer; | |
15604 | + | |
15605 | + sprintf(output_buffer, "1\n%s\n%s\n", | |
15606 | + toi_header->uts.machine, | |
15607 | + toi_header->uts.version); | |
15608 | + | |
15609 | + /* Check whether we've resumed before */ | |
15610 | + if (test_toi_state(TOI_RESUMED_BEFORE)) | |
15611 | + strcat(output_buffer, "Resumed before.\n"); | |
15612 | + | |
15613 | +out: | |
15614 | + noresume_reset_modules(); | |
15615 | + return output_buffer; | |
15616 | +} | |
15617 | + | |
15618 | +/** | |
15619 | + * read_pageset2 - read second part of the image | |
15620 | + * @overwrittenpagesonly: Read only pages which would have been | |
15621 | + * verwritten by pageset1? | |
15622 | + * | |
15623 | + * Read in part or all of pageset2 of an image, depending upon | |
15624 | + * whether we are hibernating and have only overwritten a portion | |
15625 | + * with pageset1 pages, or are resuming and need to read them | |
15626 | + * all. | |
15627 | + * | |
15628 | + * Returns: Int | |
15629 | + * Zero if no error, otherwise the error value. | |
15630 | + **/ | |
15631 | +int read_pageset2(int overwrittenpagesonly) | |
15632 | +{ | |
15633 | + int result = 0; | |
15634 | + | |
15635 | + if (!pagedir2.size) | |
15636 | + return 0; | |
15637 | + | |
15638 | + result = read_pageset(&pagedir2, overwrittenpagesonly); | |
15639 | + | |
15640 | + toi_cond_pause(1, "Pagedir 2 read."); | |
15641 | + | |
15642 | + return result; | |
15643 | +} | |
15644 | + | |
15645 | +/** | |
15646 | + * image_exists_read - has an image been found? | |
15647 | + * @page: Output buffer | |
15648 | + * | |
15649 | + * Store 0 or 1 in page, depending on whether an image is found. | |
15650 | + * Incoming buffer is PAGE_SIZE and result is guaranteed | |
15651 | + * to be far less than that, so we don't worry about | |
15652 | + * overflow. | |
15653 | + **/ | |
15654 | +int image_exists_read(const char *page, int count) | |
15655 | +{ | |
15656 | + int len = 0; | |
15657 | + char *result; | |
15658 | + | |
15659 | + if (toi_activate_storage(0)) | |
15660 | + return count; | |
15661 | + | |
15662 | + if (!test_toi_state(TOI_RESUME_DEVICE_OK)) | |
15663 | + toi_attempt_to_parse_resume_device(0); | |
15664 | + | |
15665 | + if (!toiActiveAllocator) { | |
15666 | + len = sprintf((char *) page, "-1\n"); | |
15667 | + } else { | |
15668 | + result = get_have_image_data(); | |
15669 | + if (result) { | |
15670 | + len = sprintf((char *) page, "%s", result); | |
15671 | + toi_free_page(26, (unsigned long) result); | |
15672 | + } | |
15673 | + } | |
15674 | + | |
15675 | + toi_deactivate_storage(0); | |
15676 | + | |
15677 | + return len; | |
15678 | +} | |
15679 | + | |
15680 | +/** | |
15681 | + * image_exists_write - invalidate an image if one exists | |
15682 | + **/ | |
15683 | +int image_exists_write(const char *buffer, int count) | |
15684 | +{ | |
15685 | + if (toi_activate_storage(0)) | |
15686 | + return count; | |
15687 | + | |
15688 | + if (toiActiveAllocator && toiActiveAllocator->image_exists(1)) | |
15689 | + toiActiveAllocator->remove_image(); | |
15690 | + | |
15691 | + toi_deactivate_storage(0); | |
15692 | + | |
15693 | + clear_result_state(TOI_KEPT_IMAGE); | |
15694 | + | |
15695 | + return count; | |
15696 | +} | |
15697 | diff --git a/kernel/power/tuxonice_io.h b/kernel/power/tuxonice_io.h | |
15698 | new file mode 100644 | |
5dd10c98 | 15699 | index 0000000..fe37713 |
2380c486 JR |
15700 | --- /dev/null |
15701 | +++ b/kernel/power/tuxonice_io.h | |
5dd10c98 | 15702 | @@ -0,0 +1,74 @@ |
2380c486 JR |
15703 | +/* |
15704 | + * kernel/power/tuxonice_io.h | |
15705 | + * | |
5dd10c98 | 15706 | + * Copyright (C) 2005-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
15707 | + * |
15708 | + * This file is released under the GPLv2. | |
15709 | + * | |
15710 | + * It contains high level IO routines for hibernating. | |
15711 | + * | |
15712 | + */ | |
15713 | + | |
15714 | +#include <linux/utsname.h> | |
15715 | +#include "tuxonice_pagedir.h" | |
2380c486 JR |
15716 | + |
15717 | +/* Non-module data saved in our image header */ | |
15718 | +struct toi_header { | |
15719 | + /* | |
15720 | + * Mirror struct swsusp_info, but without | |
15721 | + * the page aligned attribute | |
15722 | + */ | |
15723 | + struct new_utsname uts; | |
15724 | + u32 version_code; | |
15725 | + unsigned long num_physpages; | |
15726 | + int cpus; | |
15727 | + unsigned long image_pages; | |
15728 | + unsigned long pages; | |
15729 | + unsigned long size; | |
15730 | + | |
15731 | + /* Our own data */ | |
15732 | + unsigned long orig_mem_free; | |
15733 | + int page_size; | |
15734 | + int pageset_2_size; | |
15735 | + int param0; | |
15736 | + int param1; | |
15737 | + int param2; | |
15738 | + int param3; | |
15739 | + int progress0; | |
15740 | + int progress1; | |
15741 | + int progress2; | |
15742 | + int progress3; | |
15743 | + int io_time[2][2]; | |
15744 | + struct pagedir pagedir; | |
15745 | + dev_t root_fs; | |
15746 | + unsigned long bkd; /* Boot kernel data locn */ | |
15747 | +}; | |
15748 | + | |
15749 | +extern int write_pageset(struct pagedir *pagedir); | |
15750 | +extern int write_image_header(void); | |
15751 | +extern int read_pageset1(void); | |
15752 | +extern int read_pageset2(int overwrittenpagesonly); | |
15753 | + | |
15754 | +extern int toi_attempt_to_parse_resume_device(int quiet); | |
15755 | +extern void attempt_to_parse_resume_device2(void); | |
15756 | +extern void attempt_to_parse_alt_resume_param(void); | |
15757 | +int image_exists_read(const char *page, int count); | |
15758 | +int image_exists_write(const char *buffer, int count); | |
15759 | +extern void save_restore_alt_param(int replace, int quiet); | |
15760 | +extern atomic_t toi_io_workers; | |
15761 | + | |
15762 | +/* Args to save_restore_alt_param */ | |
15763 | +#define RESTORE 0 | |
15764 | +#define SAVE 1 | |
15765 | + | |
15766 | +#define NOQUIET 0 | |
15767 | +#define QUIET 1 | |
15768 | + | |
15769 | +extern dev_t name_to_dev_t(char *line); | |
15770 | + | |
15771 | +extern wait_queue_head_t toi_io_queue_flusher; | |
15772 | +extern int toi_bio_queue_flusher_should_finish; | |
5dd10c98 AM |
15773 | + |
15774 | +int fs_info_space_needed(void); | |
15775 | + | |
15776 | +extern int toi_max_workers; | |
2380c486 JR |
15777 | diff --git a/kernel/power/tuxonice_modules.c b/kernel/power/tuxonice_modules.c |
15778 | new file mode 100644 | |
5dd10c98 | 15779 | index 0000000..4cc24a9 |
2380c486 JR |
15780 | --- /dev/null |
15781 | +++ b/kernel/power/tuxonice_modules.c | |
5dd10c98 | 15782 | @@ -0,0 +1,522 @@ |
2380c486 JR |
15783 | +/* |
15784 | + * kernel/power/tuxonice_modules.c | |
15785 | + * | |
5dd10c98 | 15786 | + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
15787 | + * |
15788 | + */ | |
15789 | + | |
15790 | +#include <linux/suspend.h> | |
2380c486 JR |
15791 | +#include "tuxonice.h" |
15792 | +#include "tuxonice_modules.h" | |
15793 | +#include "tuxonice_sysfs.h" | |
15794 | +#include "tuxonice_ui.h" | |
15795 | + | |
15796 | +LIST_HEAD(toi_filters); | |
15797 | +LIST_HEAD(toiAllocators); | |
7e46296a | 15798 | + |
2380c486 | 15799 | +LIST_HEAD(toi_modules); |
7e46296a | 15800 | +EXPORT_SYMBOL_GPL(toi_modules); |
2380c486 JR |
15801 | + |
15802 | +struct toi_module_ops *toiActiveAllocator; | |
15803 | +EXPORT_SYMBOL_GPL(toiActiveAllocator); | |
15804 | + | |
15805 | +static int toi_num_filters; | |
15806 | +int toiNumAllocators, toi_num_modules; | |
15807 | + | |
15808 | +/* | |
15809 | + * toi_header_storage_for_modules | |
15810 | + * | |
15811 | + * Returns the amount of space needed to store configuration | |
15812 | + * data needed by the modules prior to copying back the original | |
15813 | + * kernel. We can exclude data for pageset2 because it will be | |
15814 | + * available anyway once the kernel is copied back. | |
15815 | + */ | |
15816 | +long toi_header_storage_for_modules(void) | |
15817 | +{ | |
15818 | + struct toi_module_ops *this_module; | |
15819 | + int bytes = 0; | |
15820 | + | |
15821 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
15822 | + if (!this_module->enabled || | |
15823 | + (this_module->type == WRITER_MODULE && | |
15824 | + toiActiveAllocator != this_module)) | |
15825 | + continue; | |
15826 | + if (this_module->storage_needed) { | |
15827 | + int this = this_module->storage_needed() + | |
15828 | + sizeof(struct toi_module_header) + | |
15829 | + sizeof(int); | |
15830 | + this_module->header_requested = this; | |
15831 | + bytes += this; | |
15832 | + } | |
15833 | + } | |
15834 | + | |
15835 | + /* One more for the empty terminator */ | |
15836 | + return bytes + sizeof(struct toi_module_header); | |
15837 | +} | |
15838 | + | |
0ada99ac | 15839 | +void print_toi_header_storage_for_modules(void) |
15840 | +{ | |
15841 | + struct toi_module_ops *this_module; | |
15842 | + int bytes = 0; | |
15843 | + | |
15844 | + printk(KERN_DEBUG "Header storage:\n"); | |
15845 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
15846 | + if (!this_module->enabled || | |
15847 | + (this_module->type == WRITER_MODULE && | |
15848 | + toiActiveAllocator != this_module)) | |
15849 | + continue; | |
15850 | + if (this_module->storage_needed) { | |
15851 | + int this = this_module->storage_needed() + | |
15852 | + sizeof(struct toi_module_header) + | |
15853 | + sizeof(int); | |
15854 | + this_module->header_requested = this; | |
15855 | + bytes += this; | |
15856 | + printk(KERN_DEBUG "+ %16s : %-4d/%d.\n", | |
15857 | + this_module->name, | |
15858 | + this_module->header_used, this); | |
15859 | + } | |
15860 | + } | |
15861 | + | |
5dd10c98 | 15862 | + printk(KERN_DEBUG "+ empty terminator : %zu.\n", |
0ada99ac | 15863 | + sizeof(struct toi_module_header)); |
15864 | + printk(KERN_DEBUG " ====\n"); | |
5dd10c98 | 15865 | + printk(KERN_DEBUG " %zu\n", |
0ada99ac | 15866 | + bytes + sizeof(struct toi_module_header)); |
15867 | +} | |
9474138d | 15868 | +EXPORT_SYMBOL_GPL(print_toi_header_storage_for_modules); |
0ada99ac | 15869 | + |
2380c486 JR |
15870 | +/* |
15871 | + * toi_memory_for_modules | |
15872 | + * | |
15873 | + * Returns the amount of memory requested by modules for | |
15874 | + * doing their work during the cycle. | |
15875 | + */ | |
15876 | + | |
15877 | +long toi_memory_for_modules(int print_parts) | |
15878 | +{ | |
15879 | + long bytes = 0, result; | |
15880 | + struct toi_module_ops *this_module; | |
15881 | + | |
15882 | + if (print_parts) | |
15883 | + printk(KERN_INFO "Memory for modules:\n===================\n"); | |
15884 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
15885 | + int this; | |
15886 | + if (!this_module->enabled) | |
15887 | + continue; | |
15888 | + if (this_module->memory_needed) { | |
15889 | + this = this_module->memory_needed(); | |
15890 | + if (print_parts) | |
15891 | + printk(KERN_INFO "%10d bytes (%5ld pages) for " | |
15892 | + "module '%s'.\n", this, | |
15893 | + DIV_ROUND_UP(this, PAGE_SIZE), | |
15894 | + this_module->name); | |
15895 | + bytes += this; | |
15896 | + } | |
15897 | + } | |
15898 | + | |
15899 | + result = DIV_ROUND_UP(bytes, PAGE_SIZE); | |
15900 | + if (print_parts) | |
15901 | + printk(KERN_INFO " => %ld bytes, %ld pages.\n", bytes, result); | |
15902 | + | |
15903 | + return result; | |
15904 | +} | |
15905 | + | |
15906 | +/* | |
15907 | + * toi_expected_compression_ratio | |
15908 | + * | |
15909 | + * Returns the compression ratio expected when saving the image. | |
15910 | + */ | |
15911 | + | |
15912 | +int toi_expected_compression_ratio(void) | |
15913 | +{ | |
15914 | + int ratio = 100; | |
15915 | + struct toi_module_ops *this_module; | |
15916 | + | |
15917 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
15918 | + if (!this_module->enabled) | |
15919 | + continue; | |
15920 | + if (this_module->expected_compression) | |
15921 | + ratio = ratio * this_module->expected_compression() | |
15922 | + / 100; | |
15923 | + } | |
15924 | + | |
15925 | + return ratio; | |
15926 | +} | |
15927 | + | |
15928 | +/* toi_find_module_given_dir | |
15929 | + * Functionality : Return a module (if found), given a pointer | |
15930 | + * to its directory name | |
15931 | + */ | |
15932 | + | |
15933 | +static struct toi_module_ops *toi_find_module_given_dir(char *name) | |
15934 | +{ | |
15935 | + struct toi_module_ops *this_module, *found_module = NULL; | |
15936 | + | |
15937 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
15938 | + if (!strcmp(name, this_module->directory)) { | |
15939 | + found_module = this_module; | |
15940 | + break; | |
15941 | + } | |
15942 | + } | |
15943 | + | |
15944 | + return found_module; | |
15945 | +} | |
15946 | + | |
15947 | +/* toi_find_module_given_name | |
15948 | + * Functionality : Return a module (if found), given a pointer | |
15949 | + * to its name | |
15950 | + */ | |
15951 | + | |
15952 | +struct toi_module_ops *toi_find_module_given_name(char *name) | |
15953 | +{ | |
15954 | + struct toi_module_ops *this_module, *found_module = NULL; | |
15955 | + | |
15956 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
15957 | + if (!strcmp(name, this_module->name)) { | |
15958 | + found_module = this_module; | |
15959 | + break; | |
15960 | + } | |
15961 | + } | |
15962 | + | |
15963 | + return found_module; | |
15964 | +} | |
15965 | + | |
15966 | +/* | |
15967 | + * toi_print_module_debug_info | |
15968 | + * Functionality : Get debugging info from modules into a buffer. | |
15969 | + */ | |
15970 | +int toi_print_module_debug_info(char *buffer, int buffer_size) | |
15971 | +{ | |
15972 | + struct toi_module_ops *this_module; | |
15973 | + int len = 0; | |
15974 | + | |
15975 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
15976 | + if (!this_module->enabled) | |
15977 | + continue; | |
15978 | + if (this_module->print_debug_info) { | |
15979 | + int result; | |
15980 | + result = this_module->print_debug_info(buffer + len, | |
15981 | + buffer_size - len); | |
15982 | + len += result; | |
15983 | + } | |
15984 | + } | |
15985 | + | |
15986 | + /* Ensure null terminated */ | |
15987 | + buffer[buffer_size] = 0; | |
15988 | + | |
15989 | + return len; | |
15990 | +} | |
15991 | + | |
15992 | +/* | |
15993 | + * toi_register_module | |
15994 | + * | |
15995 | + * Register a module. | |
15996 | + */ | |
15997 | +int toi_register_module(struct toi_module_ops *module) | |
15998 | +{ | |
15999 | + int i; | |
16000 | + struct kobject *kobj; | |
16001 | + | |
16002 | + module->enabled = 1; | |
16003 | + | |
16004 | + if (toi_find_module_given_name(module->name)) { | |
16005 | + printk(KERN_INFO "TuxOnIce: Trying to load module %s," | |
16006 | + " which is already registered.\n", | |
16007 | + module->name); | |
16008 | + return -EBUSY; | |
16009 | + } | |
16010 | + | |
16011 | + switch (module->type) { | |
16012 | + case FILTER_MODULE: | |
16013 | + list_add_tail(&module->type_list, &toi_filters); | |
16014 | + toi_num_filters++; | |
16015 | + break; | |
16016 | + case WRITER_MODULE: | |
16017 | + list_add_tail(&module->type_list, &toiAllocators); | |
16018 | + toiNumAllocators++; | |
16019 | + break; | |
16020 | + case MISC_MODULE: | |
16021 | + case MISC_HIDDEN_MODULE: | |
7e46296a | 16022 | + case BIO_ALLOCATOR_MODULE: |
2380c486 JR |
16023 | + break; |
16024 | + default: | |
e999739a | 16025 | + printk(KERN_ERR "Hmmm. Module '%s' has an invalid type." |
2380c486 JR |
16026 | + " It has been ignored.\n", module->name); |
16027 | + return -EINVAL; | |
16028 | + } | |
16029 | + list_add_tail(&module->module_list, &toi_modules); | |
16030 | + toi_num_modules++; | |
16031 | + | |
16032 | + if ((!module->directory && !module->shared_directory) || | |
16033 | + !module->sysfs_data || !module->num_sysfs_entries) | |
16034 | + return 0; | |
16035 | + | |
16036 | + /* | |
16037 | + * Modules may share a directory, but those with shared_dir | |
16038 | + * set must be loaded (via symbol dependencies) after parents | |
16039 | + * and unloaded beforehand. | |
16040 | + */ | |
16041 | + if (module->shared_directory) { | |
16042 | + struct toi_module_ops *shared = | |
16043 | + toi_find_module_given_dir(module->shared_directory); | |
16044 | + if (!shared) { | |
e999739a | 16045 | + printk(KERN_ERR "TuxOnIce: Module %s wants to share " |
16046 | + "%s's directory but %s isn't loaded.\n", | |
2380c486 JR |
16047 | + module->name, module->shared_directory, |
16048 | + module->shared_directory); | |
16049 | + toi_unregister_module(module); | |
16050 | + return -ENODEV; | |
16051 | + } | |
16052 | + kobj = shared->dir_kobj; | |
16053 | + } else { | |
16054 | + if (!strncmp(module->directory, "[ROOT]", 6)) | |
16055 | + kobj = tuxonice_kobj; | |
16056 | + else | |
16057 | + kobj = make_toi_sysdir(module->directory); | |
16058 | + } | |
16059 | + module->dir_kobj = kobj; | |
16060 | + for (i = 0; i < module->num_sysfs_entries; i++) { | |
16061 | + int result = toi_register_sysfs_file(kobj, | |
16062 | + &module->sysfs_data[i]); | |
16063 | + if (result) | |
16064 | + return result; | |
16065 | + } | |
16066 | + return 0; | |
16067 | +} | |
16068 | +EXPORT_SYMBOL_GPL(toi_register_module); | |
16069 | + | |
16070 | +/* | |
16071 | + * toi_unregister_module | |
16072 | + * | |
16073 | + * Remove a module. | |
16074 | + */ | |
16075 | +void toi_unregister_module(struct toi_module_ops *module) | |
16076 | +{ | |
16077 | + int i; | |
16078 | + | |
16079 | + if (module->dir_kobj) | |
16080 | + for (i = 0; i < module->num_sysfs_entries; i++) | |
16081 | + toi_unregister_sysfs_file(module->dir_kobj, | |
16082 | + &module->sysfs_data[i]); | |
16083 | + | |
16084 | + if (!module->shared_directory && module->directory && | |
16085 | + strncmp(module->directory, "[ROOT]", 6)) | |
16086 | + remove_toi_sysdir(module->dir_kobj); | |
16087 | + | |
16088 | + switch (module->type) { | |
16089 | + case FILTER_MODULE: | |
16090 | + list_del(&module->type_list); | |
16091 | + toi_num_filters--; | |
16092 | + break; | |
16093 | + case WRITER_MODULE: | |
16094 | + list_del(&module->type_list); | |
16095 | + toiNumAllocators--; | |
16096 | + if (toiActiveAllocator == module) { | |
16097 | + toiActiveAllocator = NULL; | |
16098 | + clear_toi_state(TOI_CAN_RESUME); | |
16099 | + clear_toi_state(TOI_CAN_HIBERNATE); | |
16100 | + } | |
16101 | + break; | |
16102 | + case MISC_MODULE: | |
16103 | + case MISC_HIDDEN_MODULE: | |
7e46296a | 16104 | + case BIO_ALLOCATOR_MODULE: |
2380c486 JR |
16105 | + break; |
16106 | + default: | |
e999739a | 16107 | + printk(KERN_ERR "Module '%s' has an invalid type." |
2380c486 JR |
16108 | + " It has been ignored.\n", module->name); |
16109 | + return; | |
16110 | + } | |
16111 | + list_del(&module->module_list); | |
16112 | + toi_num_modules--; | |
16113 | +} | |
16114 | +EXPORT_SYMBOL_GPL(toi_unregister_module); | |
16115 | + | |
16116 | +/* | |
16117 | + * toi_move_module_tail | |
16118 | + * | |
16119 | + * Rearrange modules when reloading the config. | |
16120 | + */ | |
16121 | +void toi_move_module_tail(struct toi_module_ops *module) | |
16122 | +{ | |
16123 | + switch (module->type) { | |
16124 | + case FILTER_MODULE: | |
16125 | + if (toi_num_filters > 1) | |
16126 | + list_move_tail(&module->type_list, &toi_filters); | |
16127 | + break; | |
16128 | + case WRITER_MODULE: | |
16129 | + if (toiNumAllocators > 1) | |
16130 | + list_move_tail(&module->type_list, &toiAllocators); | |
16131 | + break; | |
16132 | + case MISC_MODULE: | |
16133 | + case MISC_HIDDEN_MODULE: | |
7e46296a | 16134 | + case BIO_ALLOCATOR_MODULE: |
2380c486 JR |
16135 | + break; |
16136 | + default: | |
e999739a | 16137 | + printk(KERN_ERR "Module '%s' has an invalid type." |
2380c486 JR |
16138 | + " It has been ignored.\n", module->name); |
16139 | + return; | |
16140 | + } | |
16141 | + if ((toi_num_filters + toiNumAllocators) > 1) | |
16142 | + list_move_tail(&module->module_list, &toi_modules); | |
16143 | +} | |
16144 | + | |
16145 | +/* | |
16146 | + * toi_initialise_modules | |
16147 | + * | |
16148 | + * Get ready to do some work! | |
16149 | + */ | |
16150 | +int toi_initialise_modules(int starting_cycle, int early) | |
16151 | +{ | |
16152 | + struct toi_module_ops *this_module; | |
16153 | + int result; | |
16154 | + | |
16155 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
16156 | + this_module->header_requested = 0; | |
16157 | + this_module->header_used = 0; | |
16158 | + if (!this_module->enabled) | |
16159 | + continue; | |
16160 | + if (this_module->early != early) | |
16161 | + continue; | |
16162 | + if (this_module->initialise) { | |
2380c486 JR |
16163 | + result = this_module->initialise(starting_cycle); |
16164 | + if (result) { | |
16165 | + toi_cleanup_modules(starting_cycle); | |
16166 | + return result; | |
16167 | + } | |
16168 | + this_module->initialised = 1; | |
16169 | + } | |
16170 | + } | |
16171 | + | |
16172 | + return 0; | |
16173 | +} | |
16174 | + | |
16175 | +/* | |
16176 | + * toi_cleanup_modules | |
16177 | + * | |
16178 | + * Tell modules the work is done. | |
16179 | + */ | |
16180 | +void toi_cleanup_modules(int finishing_cycle) | |
16181 | +{ | |
16182 | + struct toi_module_ops *this_module; | |
16183 | + | |
16184 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
16185 | + if (!this_module->enabled || !this_module->initialised) | |
16186 | + continue; | |
7e46296a | 16187 | + if (this_module->cleanup) |
2380c486 | 16188 | + this_module->cleanup(finishing_cycle); |
2380c486 JR |
16189 | + this_module->initialised = 0; |
16190 | + } | |
16191 | +} | |
16192 | + | |
16193 | +/* | |
5dd10c98 AM |
16194 | + * toi_pre_atomic_restore_modules |
16195 | + * | |
16196 | + * Get ready to do some work! | |
16197 | + */ | |
16198 | +void toi_pre_atomic_restore_modules(struct toi_boot_kernel_data *bkd) | |
16199 | +{ | |
16200 | + struct toi_module_ops *this_module; | |
16201 | + | |
16202 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
16203 | + if (this_module->enabled && this_module->pre_atomic_restore) | |
16204 | + this_module->pre_atomic_restore(bkd); | |
16205 | + } | |
16206 | +} | |
16207 | + | |
16208 | +/* | |
16209 | + * toi_post_atomic_restore_modules | |
16210 | + * | |
16211 | + * Get ready to do some work! | |
16212 | + */ | |
16213 | +void toi_post_atomic_restore_modules(struct toi_boot_kernel_data *bkd) | |
16214 | +{ | |
16215 | + struct toi_module_ops *this_module; | |
16216 | + | |
16217 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
16218 | + if (this_module->enabled && this_module->post_atomic_restore) | |
16219 | + this_module->post_atomic_restore(bkd); | |
16220 | + } | |
16221 | +} | |
16222 | + | |
16223 | +/* | |
2380c486 JR |
16224 | + * toi_get_next_filter |
16225 | + * | |
16226 | + * Get the next filter in the pipeline. | |
16227 | + */ | |
16228 | +struct toi_module_ops *toi_get_next_filter(struct toi_module_ops *filter_sought) | |
16229 | +{ | |
16230 | + struct toi_module_ops *last_filter = NULL, *this_filter = NULL; | |
16231 | + | |
16232 | + list_for_each_entry(this_filter, &toi_filters, type_list) { | |
16233 | + if (!this_filter->enabled) | |
16234 | + continue; | |
16235 | + if ((last_filter == filter_sought) || (!filter_sought)) | |
16236 | + return this_filter; | |
16237 | + last_filter = this_filter; | |
16238 | + } | |
16239 | + | |
16240 | + return toiActiveAllocator; | |
16241 | +} | |
16242 | +EXPORT_SYMBOL_GPL(toi_get_next_filter); | |
16243 | + | |
16244 | +/** | |
16245 | + * toi_show_modules: Printk what support is loaded. | |
16246 | + */ | |
16247 | +void toi_print_modules(void) | |
16248 | +{ | |
16249 | + struct toi_module_ops *this_module; | |
16250 | + int prev = 0; | |
16251 | + | |
e999739a | 16252 | + printk(KERN_INFO "TuxOnIce " TOI_CORE_VERSION ", with support for"); |
2380c486 JR |
16253 | + |
16254 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
16255 | + if (this_module->type == MISC_HIDDEN_MODULE) | |
16256 | + continue; | |
16257 | + printk("%s %s%s%s", prev ? "," : "", | |
16258 | + this_module->enabled ? "" : "[", | |
16259 | + this_module->name, | |
16260 | + this_module->enabled ? "" : "]"); | |
16261 | + prev = 1; | |
16262 | + } | |
16263 | + | |
16264 | + printk(".\n"); | |
16265 | +} | |
16266 | + | |
16267 | +/* toi_get_modules | |
16268 | + * | |
16269 | + * Take a reference to modules so they can't go away under us. | |
16270 | + */ | |
16271 | + | |
16272 | +int toi_get_modules(void) | |
16273 | +{ | |
16274 | + struct toi_module_ops *this_module; | |
16275 | + | |
16276 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
16277 | + struct toi_module_ops *this_module2; | |
16278 | + | |
16279 | + if (try_module_get(this_module->module)) | |
16280 | + continue; | |
16281 | + | |
16282 | + /* Failed! Reverse gets and return error */ | |
16283 | + list_for_each_entry(this_module2, &toi_modules, | |
16284 | + module_list) { | |
16285 | + if (this_module == this_module2) | |
16286 | + return -EINVAL; | |
16287 | + module_put(this_module2->module); | |
16288 | + } | |
16289 | + } | |
16290 | + return 0; | |
16291 | +} | |
16292 | + | |
16293 | +/* toi_put_modules | |
16294 | + * | |
16295 | + * Release our references to modules we used. | |
16296 | + */ | |
16297 | + | |
16298 | +void toi_put_modules(void) | |
16299 | +{ | |
16300 | + struct toi_module_ops *this_module; | |
16301 | + | |
16302 | + list_for_each_entry(this_module, &toi_modules, module_list) | |
16303 | + module_put(this_module->module); | |
16304 | +} | |
16305 | diff --git a/kernel/power/tuxonice_modules.h b/kernel/power/tuxonice_modules.h | |
16306 | new file mode 100644 | |
85eb3c9d | 16307 | index 0000000..bf5d749 |
2380c486 JR |
16308 | --- /dev/null |
16309 | +++ b/kernel/power/tuxonice_modules.h | |
85eb3c9d | 16310 | @@ -0,0 +1,211 @@ |
2380c486 JR |
16311 | +/* |
16312 | + * kernel/power/tuxonice_modules.h | |
16313 | + * | |
5dd10c98 | 16314 | + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
16315 | + * |
16316 | + * This file is released under the GPLv2. | |
16317 | + * | |
16318 | + * It contains declarations for modules. Modules are additions to | |
16319 | + * TuxOnIce that provide facilities such as image compression or | |
16320 | + * encryption, backends for storage of the image and user interfaces. | |
16321 | + * | |
16322 | + */ | |
16323 | + | |
16324 | +#ifndef TOI_MODULES_H | |
16325 | +#define TOI_MODULES_H | |
16326 | + | |
16327 | +/* This is the maximum size we store in the image header for a module name */ | |
16328 | +#define TOI_MAX_MODULE_NAME_LENGTH 30 | |
16329 | + | |
5dd10c98 AM |
16330 | +struct toi_boot_kernel_data; |
16331 | + | |
2380c486 JR |
16332 | +/* Per-module metadata */ |
16333 | +struct toi_module_header { | |
16334 | + char name[TOI_MAX_MODULE_NAME_LENGTH]; | |
16335 | + int enabled; | |
16336 | + int type; | |
16337 | + int index; | |
16338 | + int data_length; | |
16339 | + unsigned long signature; | |
16340 | +}; | |
16341 | + | |
16342 | +enum { | |
16343 | + FILTER_MODULE, | |
16344 | + WRITER_MODULE, | |
7e46296a AM |
16345 | + BIO_ALLOCATOR_MODULE, |
16346 | + MISC_MODULE, | |
2380c486 JR |
16347 | + MISC_HIDDEN_MODULE, |
16348 | +}; | |
16349 | + | |
16350 | +enum { | |
16351 | + TOI_ASYNC, | |
16352 | + TOI_SYNC | |
16353 | +}; | |
16354 | + | |
85eb3c9d AM |
16355 | +enum { |
16356 | + TOI_VIRT, | |
16357 | + TOI_PAGE, | |
16358 | +}; | |
16359 | + | |
16360 | +#define TOI_MAP(type, addr) \ | |
16361 | + (type == TOI_PAGE ? kmap(addr) : addr) | |
16362 | + | |
16363 | +#define TOI_UNMAP(type, addr) \ | |
16364 | + do { \ | |
16365 | + if (type == TOI_PAGE) \ | |
16366 | + kunmap(addr); \ | |
16367 | + } while(0) | |
16368 | + | |
2380c486 JR |
16369 | +struct toi_module_ops { |
16370 | + /* Functions common to all modules */ | |
16371 | + int type; | |
16372 | + char *name; | |
16373 | + char *directory; | |
16374 | + char *shared_directory; | |
16375 | + struct kobject *dir_kobj; | |
16376 | + struct module *module; | |
16377 | + int enabled, early, initialised; | |
16378 | + struct list_head module_list; | |
16379 | + | |
16380 | + /* List of filters or allocators */ | |
16381 | + struct list_head list, type_list; | |
16382 | + | |
16383 | + /* | |
16384 | + * Requirements for memory and storage in | |
16385 | + * the image header.. | |
16386 | + */ | |
16387 | + int (*memory_needed) (void); | |
16388 | + int (*storage_needed) (void); | |
16389 | + | |
16390 | + int header_requested, header_used; | |
16391 | + | |
16392 | + int (*expected_compression) (void); | |
16393 | + | |
16394 | + /* | |
16395 | + * Debug info | |
16396 | + */ | |
16397 | + int (*print_debug_info) (char *buffer, int size); | |
16398 | + int (*save_config_info) (char *buffer); | |
16399 | + void (*load_config_info) (char *buffer, int len); | |
16400 | + | |
16401 | + /* | |
16402 | + * Initialise & cleanup - general routines called | |
16403 | + * at the start and end of a cycle. | |
16404 | + */ | |
16405 | + int (*initialise) (int starting_cycle); | |
16406 | + void (*cleanup) (int finishing_cycle); | |
16407 | + | |
5dd10c98 AM |
16408 | + void (*pre_atomic_restore) (struct toi_boot_kernel_data *bkd); |
16409 | + void (*post_atomic_restore) (struct toi_boot_kernel_data *bkd); | |
16410 | + | |
2380c486 JR |
16411 | + /* |
16412 | + * Calls for allocating storage (allocators only). | |
16413 | + * | |
0ada99ac | 16414 | + * Header space is requested separately and cannot fail, but the |
16415 | + * reservation is only applied when main storage is allocated. | |
16416 | + * The header space reservation is thus always set prior to | |
16417 | + * requesting the allocation of storage - and prior to querying | |
16418 | + * how much storage is available. | |
2380c486 JR |
16419 | + */ |
16420 | + | |
92bca44c AM |
16421 | + unsigned long (*storage_available) (void); |
16422 | + void (*reserve_header_space) (unsigned long space_requested); | |
7e46296a | 16423 | + int (*register_storage) (void); |
92bca44c AM |
16424 | + int (*allocate_storage) (unsigned long space_requested); |
16425 | + unsigned long (*storage_allocated) (void); | |
2380c486 JR |
16426 | + |
16427 | + /* | |
16428 | + * Routines used in image I/O. | |
16429 | + */ | |
16430 | + int (*rw_init) (int rw, int stream_number); | |
16431 | + int (*rw_cleanup) (int rw); | |
85eb3c9d | 16432 | + int (*write_page) (unsigned long index, int buf_type, void *buf, |
2380c486 | 16433 | + unsigned int buf_size); |
85eb3c9d | 16434 | + int (*read_page) (unsigned long *index, int buf_type, void *buf, |
2380c486 | 16435 | + unsigned int *buf_size); |
0ada99ac | 16436 | + int (*io_flusher) (int rw); |
2380c486 JR |
16437 | + |
16438 | + /* Reset module if image exists but reading aborted */ | |
16439 | + void (*noresume_reset) (void); | |
16440 | + | |
16441 | + /* Read and write the metadata */ | |
16442 | + int (*write_header_init) (void); | |
16443 | + int (*write_header_cleanup) (void); | |
16444 | + | |
16445 | + int (*read_header_init) (void); | |
16446 | + int (*read_header_cleanup) (void); | |
16447 | + | |
5dd10c98 AM |
16448 | + /* To be called after read_header_init */ |
16449 | + int (*get_header_version) (void); | |
16450 | + | |
2380c486 JR |
16451 | + int (*rw_header_chunk) (int rw, struct toi_module_ops *owner, |
16452 | + char *buffer_start, int buffer_size); | |
16453 | + | |
16454 | + int (*rw_header_chunk_noreadahead) (int rw, | |
16455 | + struct toi_module_ops *owner, char *buffer_start, | |
16456 | + int buffer_size); | |
16457 | + | |
16458 | + /* Attempt to parse an image location */ | |
16459 | + int (*parse_sig_location) (char *buffer, int only_writer, int quiet); | |
16460 | + | |
16461 | + /* Throttle I/O according to throughput */ | |
16462 | + void (*update_throughput_throttle) (int jif_index); | |
16463 | + | |
16464 | + /* Flush outstanding I/O */ | |
0ada99ac | 16465 | + int (*finish_all_io) (void); |
2380c486 JR |
16466 | + |
16467 | + /* Determine whether image exists that we can restore */ | |
16468 | + int (*image_exists) (int quiet); | |
16469 | + | |
16470 | + /* Mark the image as having tried to resume */ | |
16471 | + int (*mark_resume_attempted) (int); | |
16472 | + | |
16473 | + /* Destroy image if one exists */ | |
16474 | + int (*remove_image) (void); | |
16475 | + | |
16476 | + /* Sysfs Data */ | |
16477 | + struct toi_sysfs_data *sysfs_data; | |
16478 | + int num_sysfs_entries; | |
7e46296a AM |
16479 | + |
16480 | + /* Block I/O allocator */ | |
16481 | + struct toi_bio_allocator_ops *bio_allocator_ops; | |
2380c486 JR |
16482 | +}; |
16483 | + | |
16484 | +extern int toi_num_modules, toiNumAllocators; | |
16485 | + | |
16486 | +extern struct toi_module_ops *toiActiveAllocator; | |
16487 | +extern struct list_head toi_filters, toiAllocators, toi_modules; | |
16488 | + | |
16489 | +extern void toi_prepare_console_modules(void); | |
16490 | +extern void toi_cleanup_console_modules(void); | |
16491 | + | |
16492 | +extern struct toi_module_ops *toi_find_module_given_name(char *name); | |
16493 | +extern struct toi_module_ops *toi_get_next_filter(struct toi_module_ops *); | |
16494 | + | |
16495 | +extern int toi_register_module(struct toi_module_ops *module); | |
16496 | +extern void toi_move_module_tail(struct toi_module_ops *module); | |
16497 | + | |
16498 | +extern long toi_header_storage_for_modules(void); | |
16499 | +extern long toi_memory_for_modules(int print_parts); | |
0ada99ac | 16500 | +extern void print_toi_header_storage_for_modules(void); |
2380c486 JR |
16501 | +extern int toi_expected_compression_ratio(void); |
16502 | + | |
16503 | +extern int toi_print_module_debug_info(char *buffer, int buffer_size); | |
16504 | +extern int toi_register_module(struct toi_module_ops *module); | |
16505 | +extern void toi_unregister_module(struct toi_module_ops *module); | |
16506 | + | |
16507 | +extern int toi_initialise_modules(int starting_cycle, int early); | |
16508 | +#define toi_initialise_modules_early(starting) \ | |
16509 | + toi_initialise_modules(starting, 1) | |
16510 | +#define toi_initialise_modules_late(starting) \ | |
16511 | + toi_initialise_modules(starting, 0) | |
16512 | +extern void toi_cleanup_modules(int finishing_cycle); | |
16513 | + | |
5dd10c98 AM |
16514 | +extern void toi_post_atomic_restore_modules(struct toi_boot_kernel_data *bkd); |
16515 | +extern void toi_pre_atomic_restore_modules(struct toi_boot_kernel_data *bkd); | |
16516 | + | |
2380c486 JR |
16517 | +extern void toi_print_modules(void); |
16518 | + | |
16519 | +int toi_get_modules(void); | |
16520 | +void toi_put_modules(void); | |
16521 | +#endif | |
16522 | diff --git a/kernel/power/tuxonice_netlink.c b/kernel/power/tuxonice_netlink.c | |
16523 | new file mode 100644 | |
cacc47f8 | 16524 | index 0000000..c5208ee |
2380c486 JR |
16525 | --- /dev/null |
16526 | +++ b/kernel/power/tuxonice_netlink.c | |
cacc47f8 | 16527 | @@ -0,0 +1,345 @@ |
2380c486 JR |
16528 | +/* |
16529 | + * kernel/power/tuxonice_netlink.c | |
16530 | + * | |
5dd10c98 | 16531 | + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
16532 | + * |
16533 | + * This file is released under the GPLv2. | |
16534 | + * | |
16535 | + * Functions for communicating with a userspace helper via netlink. | |
16536 | + */ | |
16537 | + | |
16538 | + | |
16539 | +#include <linux/suspend.h> | |
92bca44c | 16540 | +#include <linux/sched.h> |
2380c486 JR |
16541 | +#include "tuxonice_netlink.h" |
16542 | +#include "tuxonice.h" | |
16543 | +#include "tuxonice_modules.h" | |
16544 | +#include "tuxonice_alloc.h" | |
cacc47f8 | 16545 | +#include "tuxonice_builtin.h" |
2380c486 JR |
16546 | + |
16547 | +static struct user_helper_data *uhd_list; | |
16548 | + | |
16549 | +/* | |
16550 | + * Refill our pool of SKBs for use in emergencies (eg, when eating memory and | |
16551 | + * none can be allocated). | |
16552 | + */ | |
16553 | +static void toi_fill_skb_pool(struct user_helper_data *uhd) | |
16554 | +{ | |
16555 | + while (uhd->pool_level < uhd->pool_limit) { | |
16556 | + struct sk_buff *new_skb = | |
16557 | + alloc_skb(NLMSG_SPACE(uhd->skb_size), TOI_ATOMIC_GFP); | |
16558 | + | |
16559 | + if (!new_skb) | |
16560 | + break; | |
16561 | + | |
16562 | + new_skb->next = uhd->emerg_skbs; | |
16563 | + uhd->emerg_skbs = new_skb; | |
16564 | + uhd->pool_level++; | |
16565 | + } | |
16566 | +} | |
16567 | + | |
16568 | +/* | |
16569 | + * Try to allocate a single skb. If we can't get one, try to use one from | |
16570 | + * our pool. | |
16571 | + */ | |
16572 | +static struct sk_buff *toi_get_skb(struct user_helper_data *uhd) | |
16573 | +{ | |
16574 | + struct sk_buff *skb = | |
16575 | + alloc_skb(NLMSG_SPACE(uhd->skb_size), TOI_ATOMIC_GFP); | |
16576 | + | |
16577 | + if (skb) | |
16578 | + return skb; | |
16579 | + | |
16580 | + skb = uhd->emerg_skbs; | |
16581 | + if (skb) { | |
16582 | + uhd->pool_level--; | |
16583 | + uhd->emerg_skbs = skb->next; | |
16584 | + skb->next = NULL; | |
16585 | + } | |
16586 | + | |
16587 | + return skb; | |
16588 | +} | |
16589 | + | |
16590 | +static void put_skb(struct user_helper_data *uhd, struct sk_buff *skb) | |
16591 | +{ | |
16592 | + if (uhd->pool_level < uhd->pool_limit) { | |
16593 | + skb->next = uhd->emerg_skbs; | |
16594 | + uhd->emerg_skbs = skb; | |
16595 | + } else | |
16596 | + kfree_skb(skb); | |
16597 | +} | |
16598 | + | |
16599 | +void toi_send_netlink_message(struct user_helper_data *uhd, | |
16600 | + int type, void *params, size_t len) | |
16601 | +{ | |
16602 | + struct sk_buff *skb; | |
16603 | + struct nlmsghdr *nlh; | |
16604 | + void *dest; | |
16605 | + struct task_struct *t; | |
16606 | + | |
16607 | + if (uhd->pid == -1) | |
16608 | + return; | |
16609 | + | |
16610 | + if (uhd->debug) | |
16611 | + printk(KERN_ERR "toi_send_netlink_message: Send " | |
16612 | + "message type %d.\n", type); | |
16613 | + | |
16614 | + skb = toi_get_skb(uhd); | |
16615 | + if (!skb) { | |
16616 | + printk(KERN_INFO "toi_netlink: Can't allocate skb!\n"); | |
16617 | + return; | |
16618 | + } | |
16619 | + | |
16620 | + /* NLMSG_PUT contains a hidden goto nlmsg_failure */ | |
16621 | + nlh = NLMSG_PUT(skb, 0, uhd->sock_seq, type, len); | |
16622 | + uhd->sock_seq++; | |
16623 | + | |
16624 | + dest = NLMSG_DATA(nlh); | |
16625 | + if (params && len > 0) | |
16626 | + memcpy(dest, params, len); | |
16627 | + | |
16628 | + netlink_unicast(uhd->nl, skb, uhd->pid, 0); | |
16629 | + | |
cacc47f8 | 16630 | + toi_read_lock_tasklist(); |
92bca44c | 16631 | + t = find_task_by_pid_ns(uhd->pid, &init_pid_ns); |
2380c486 | 16632 | + if (!t) { |
cacc47f8 | 16633 | + toi_read_unlock_tasklist(); |
2380c486 JR |
16634 | + if (uhd->pid > -1) |
16635 | + printk(KERN_INFO "Hmm. Can't find the userspace task" | |
16636 | + " %d.\n", uhd->pid); | |
16637 | + return; | |
16638 | + } | |
16639 | + wake_up_process(t); | |
cacc47f8 | 16640 | + toi_read_unlock_tasklist(); |
2380c486 JR |
16641 | + |
16642 | + yield(); | |
16643 | + | |
16644 | + return; | |
16645 | + | |
16646 | +nlmsg_failure: | |
16647 | + if (skb) | |
16648 | + put_skb(uhd, skb); | |
16649 | + | |
16650 | + if (uhd->debug) | |
16651 | + printk(KERN_ERR "toi_send_netlink_message: Failed to send " | |
16652 | + "message type %d.\n", type); | |
16653 | +} | |
16654 | +EXPORT_SYMBOL_GPL(toi_send_netlink_message); | |
16655 | + | |
16656 | +static void send_whether_debugging(struct user_helper_data *uhd) | |
16657 | +{ | |
16658 | + static u8 is_debugging = 1; | |
16659 | + | |
16660 | + toi_send_netlink_message(uhd, NETLINK_MSG_IS_DEBUGGING, | |
16661 | + &is_debugging, sizeof(u8)); | |
16662 | +} | |
16663 | + | |
16664 | +/* | |
16665 | + * Set the PF_NOFREEZE flag on the given process to ensure it can run whilst we | |
16666 | + * are hibernating. | |
16667 | + */ | |
16668 | +static int nl_set_nofreeze(struct user_helper_data *uhd, __u32 pid) | |
16669 | +{ | |
16670 | + struct task_struct *t; | |
16671 | + | |
16672 | + if (uhd->debug) | |
16673 | + printk(KERN_ERR "nl_set_nofreeze for pid %d.\n", pid); | |
16674 | + | |
cacc47f8 | 16675 | + toi_read_lock_tasklist(); |
92bca44c | 16676 | + t = find_task_by_pid_ns(pid, &init_pid_ns); |
2380c486 | 16677 | + if (!t) { |
cacc47f8 | 16678 | + toi_read_unlock_tasklist(); |
2380c486 JR |
16679 | + printk(KERN_INFO "Strange. Can't find the userspace task %d.\n", |
16680 | + pid); | |
16681 | + return -EINVAL; | |
16682 | + } | |
16683 | + | |
16684 | + t->flags |= PF_NOFREEZE; | |
16685 | + | |
cacc47f8 | 16686 | + toi_read_unlock_tasklist(); |
2380c486 JR |
16687 | + uhd->pid = pid; |
16688 | + | |
16689 | + toi_send_netlink_message(uhd, NETLINK_MSG_NOFREEZE_ACK, NULL, 0); | |
16690 | + | |
16691 | + return 0; | |
16692 | +} | |
16693 | + | |
16694 | +/* | |
16695 | + * Called when the userspace process has informed us that it's ready to roll. | |
16696 | + */ | |
16697 | +static int nl_ready(struct user_helper_data *uhd, u32 version) | |
16698 | +{ | |
16699 | + if (version != uhd->interface_version) { | |
16700 | + printk(KERN_INFO "%s userspace process using invalid interface" | |
16701 | + " version (%d - kernel wants %d). Trying to " | |
16702 | + "continue without it.\n", | |
16703 | + uhd->name, version, uhd->interface_version); | |
16704 | + if (uhd->not_ready) | |
16705 | + uhd->not_ready(); | |
16706 | + return -EINVAL; | |
16707 | + } | |
16708 | + | |
16709 | + complete(&uhd->wait_for_process); | |
16710 | + | |
16711 | + return 0; | |
16712 | +} | |
16713 | + | |
16714 | +void toi_netlink_close_complete(struct user_helper_data *uhd) | |
16715 | +{ | |
16716 | + if (uhd->nl) { | |
16717 | + netlink_kernel_release(uhd->nl); | |
16718 | + uhd->nl = NULL; | |
16719 | + } | |
16720 | + | |
16721 | + while (uhd->emerg_skbs) { | |
16722 | + struct sk_buff *next = uhd->emerg_skbs->next; | |
16723 | + kfree_skb(uhd->emerg_skbs); | |
16724 | + uhd->emerg_skbs = next; | |
16725 | + } | |
16726 | + | |
16727 | + uhd->pid = -1; | |
16728 | +} | |
16729 | +EXPORT_SYMBOL_GPL(toi_netlink_close_complete); | |
16730 | + | |
16731 | +static int toi_nl_gen_rcv_msg(struct user_helper_data *uhd, | |
16732 | + struct sk_buff *skb, struct nlmsghdr *nlh) | |
16733 | +{ | |
16734 | + int type = nlh->nlmsg_type; | |
16735 | + int *data; | |
16736 | + int err; | |
16737 | + | |
16738 | + if (uhd->debug) | |
16739 | + printk(KERN_ERR "toi_user_rcv_skb: Received message %d.\n", | |
16740 | + type); | |
16741 | + | |
16742 | + /* Let the more specific handler go first. It returns | |
16743 | + * 1 for valid messages that it doesn't know. */ | |
16744 | + err = uhd->rcv_msg(skb, nlh); | |
16745 | + if (err != 1) | |
16746 | + return err; | |
16747 | + | |
16748 | + /* Only allow one task to receive NOFREEZE privileges */ | |
16749 | + if (type == NETLINK_MSG_NOFREEZE_ME && uhd->pid != -1) { | |
16750 | + printk(KERN_INFO "Received extra nofreeze me requests.\n"); | |
16751 | + return -EBUSY; | |
16752 | + } | |
16753 | + | |
16754 | + data = NLMSG_DATA(nlh); | |
16755 | + | |
16756 | + switch (type) { | |
16757 | + case NETLINK_MSG_NOFREEZE_ME: | |
16758 | + return nl_set_nofreeze(uhd, nlh->nlmsg_pid); | |
16759 | + case NETLINK_MSG_GET_DEBUGGING: | |
16760 | + send_whether_debugging(uhd); | |
16761 | + return 0; | |
16762 | + case NETLINK_MSG_READY: | |
16763 | + if (nlh->nlmsg_len != NLMSG_LENGTH(sizeof(u32))) { | |
16764 | + printk(KERN_INFO "Invalid ready mesage.\n"); | |
16765 | + if (uhd->not_ready) | |
16766 | + uhd->not_ready(); | |
16767 | + return -EINVAL; | |
16768 | + } | |
16769 | + return nl_ready(uhd, (u32) *data); | |
16770 | + case NETLINK_MSG_CLEANUP: | |
16771 | + toi_netlink_close_complete(uhd); | |
16772 | + return 0; | |
16773 | + } | |
16774 | + | |
16775 | + return -EINVAL; | |
16776 | +} | |
16777 | + | |
16778 | +static void toi_user_rcv_skb(struct sk_buff *skb) | |
16779 | +{ | |
16780 | + int err; | |
16781 | + struct nlmsghdr *nlh; | |
16782 | + struct user_helper_data *uhd = uhd_list; | |
16783 | + | |
16784 | + while (uhd && uhd->netlink_id != skb->sk->sk_protocol) | |
16785 | + uhd = uhd->next; | |
16786 | + | |
16787 | + if (!uhd) | |
16788 | + return; | |
16789 | + | |
16790 | + while (skb->len >= NLMSG_SPACE(0)) { | |
16791 | + u32 rlen; | |
16792 | + | |
16793 | + nlh = (struct nlmsghdr *) skb->data; | |
16794 | + if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) | |
16795 | + return; | |
16796 | + | |
16797 | + rlen = NLMSG_ALIGN(nlh->nlmsg_len); | |
16798 | + if (rlen > skb->len) | |
16799 | + rlen = skb->len; | |
16800 | + | |
16801 | + err = toi_nl_gen_rcv_msg(uhd, skb, nlh); | |
16802 | + if (err) | |
16803 | + netlink_ack(skb, nlh, err); | |
16804 | + else if (nlh->nlmsg_flags & NLM_F_ACK) | |
16805 | + netlink_ack(skb, nlh, 0); | |
16806 | + skb_pull(skb, rlen); | |
16807 | + } | |
16808 | +} | |
16809 | + | |
16810 | +static int netlink_prepare(struct user_helper_data *uhd) | |
16811 | +{ | |
16812 | + uhd->next = uhd_list; | |
16813 | + uhd_list = uhd; | |
16814 | + | |
16815 | + uhd->sock_seq = 0x42c0ffee; | |
16816 | + uhd->nl = netlink_kernel_create(&init_net, uhd->netlink_id, 0, | |
16817 | + toi_user_rcv_skb, NULL, THIS_MODULE); | |
16818 | + if (!uhd->nl) { | |
16819 | + printk(KERN_INFO "Failed to allocate netlink socket for %s.\n", | |
16820 | + uhd->name); | |
16821 | + return -ENOMEM; | |
16822 | + } | |
16823 | + | |
16824 | + toi_fill_skb_pool(uhd); | |
16825 | + | |
16826 | + return 0; | |
16827 | +} | |
16828 | + | |
16829 | +void toi_netlink_close(struct user_helper_data *uhd) | |
16830 | +{ | |
16831 | + struct task_struct *t; | |
16832 | + | |
cacc47f8 | 16833 | + toi_read_lock_tasklist(); |
92bca44c | 16834 | + t = find_task_by_pid_ns(uhd->pid, &init_pid_ns); |
2380c486 JR |
16835 | + if (t) |
16836 | + t->flags &= ~PF_NOFREEZE; | |
cacc47f8 | 16837 | + toi_read_unlock_tasklist(); |
2380c486 JR |
16838 | + |
16839 | + toi_send_netlink_message(uhd, NETLINK_MSG_CLEANUP, NULL, 0); | |
16840 | +} | |
16841 | +EXPORT_SYMBOL_GPL(toi_netlink_close); | |
16842 | + | |
16843 | +int toi_netlink_setup(struct user_helper_data *uhd) | |
16844 | +{ | |
16845 | + /* In case userui didn't cleanup properly on us */ | |
16846 | + toi_netlink_close_complete(uhd); | |
16847 | + | |
16848 | + if (netlink_prepare(uhd) < 0) { | |
16849 | + printk(KERN_INFO "Netlink prepare failed.\n"); | |
16850 | + return 1; | |
16851 | + } | |
16852 | + | |
16853 | + if (toi_launch_userspace_program(uhd->program, uhd->netlink_id, | |
16854 | + UMH_WAIT_EXEC, uhd->debug) < 0) { | |
16855 | + printk(KERN_INFO "Launch userspace program failed.\n"); | |
16856 | + toi_netlink_close_complete(uhd); | |
16857 | + return 1; | |
16858 | + } | |
16859 | + | |
16860 | + /* Wait 2 seconds for the userspace process to make contact */ | |
16861 | + wait_for_completion_timeout(&uhd->wait_for_process, 2*HZ); | |
16862 | + | |
16863 | + if (uhd->pid == -1) { | |
16864 | + printk(KERN_INFO "%s: Failed to contact userspace process.\n", | |
16865 | + uhd->name); | |
16866 | + toi_netlink_close_complete(uhd); | |
16867 | + return 1; | |
16868 | + } | |
16869 | + | |
16870 | + return 0; | |
16871 | +} | |
16872 | +EXPORT_SYMBOL_GPL(toi_netlink_setup); | |
16873 | diff --git a/kernel/power/tuxonice_netlink.h b/kernel/power/tuxonice_netlink.h | |
16874 | new file mode 100644 | |
5dd10c98 | 16875 | index 0000000..b8ef06e |
2380c486 JR |
16876 | --- /dev/null |
16877 | +++ b/kernel/power/tuxonice_netlink.h | |
16878 | @@ -0,0 +1,62 @@ | |
16879 | +/* | |
16880 | + * kernel/power/tuxonice_netlink.h | |
16881 | + * | |
5dd10c98 | 16882 | + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
16883 | + * |
16884 | + * This file is released under the GPLv2. | |
16885 | + * | |
16886 | + * Declarations for functions for communicating with a userspace helper | |
16887 | + * via netlink. | |
16888 | + */ | |
16889 | + | |
16890 | +#include <linux/netlink.h> | |
16891 | +#include <net/sock.h> | |
16892 | + | |
16893 | +#define NETLINK_MSG_BASE 0x10 | |
16894 | + | |
16895 | +#define NETLINK_MSG_READY 0x10 | |
16896 | +#define NETLINK_MSG_NOFREEZE_ME 0x16 | |
16897 | +#define NETLINK_MSG_GET_DEBUGGING 0x19 | |
16898 | +#define NETLINK_MSG_CLEANUP 0x24 | |
16899 | +#define NETLINK_MSG_NOFREEZE_ACK 0x27 | |
16900 | +#define NETLINK_MSG_IS_DEBUGGING 0x28 | |
16901 | + | |
16902 | +struct user_helper_data { | |
16903 | + int (*rcv_msg) (struct sk_buff *skb, struct nlmsghdr *nlh); | |
16904 | + void (*not_ready) (void); | |
16905 | + struct sock *nl; | |
16906 | + u32 sock_seq; | |
16907 | + pid_t pid; | |
16908 | + char *comm; | |
16909 | + char program[256]; | |
16910 | + int pool_level; | |
16911 | + int pool_limit; | |
16912 | + struct sk_buff *emerg_skbs; | |
16913 | + int skb_size; | |
16914 | + int netlink_id; | |
16915 | + char *name; | |
16916 | + struct user_helper_data *next; | |
16917 | + struct completion wait_for_process; | |
16918 | + u32 interface_version; | |
16919 | + int must_init; | |
16920 | + int debug; | |
16921 | +}; | |
16922 | + | |
16923 | +#ifdef CONFIG_NET | |
16924 | +int toi_netlink_setup(struct user_helper_data *uhd); | |
16925 | +void toi_netlink_close(struct user_helper_data *uhd); | |
16926 | +void toi_send_netlink_message(struct user_helper_data *uhd, | |
16927 | + int type, void *params, size_t len); | |
16928 | +void toi_netlink_close_complete(struct user_helper_data *uhd); | |
16929 | +#else | |
16930 | +static inline int toi_netlink_setup(struct user_helper_data *uhd) | |
16931 | +{ | |
16932 | + return 0; | |
16933 | +} | |
16934 | + | |
16935 | +static inline void toi_netlink_close(struct user_helper_data *uhd) { }; | |
16936 | +static inline void toi_send_netlink_message(struct user_helper_data *uhd, | |
16937 | + int type, void *params, size_t len) { }; | |
16938 | +static inline void toi_netlink_close_complete(struct user_helper_data *uhd) | |
16939 | + { }; | |
16940 | +#endif | |
16941 | diff --git a/kernel/power/tuxonice_pagedir.c b/kernel/power/tuxonice_pagedir.c | |
16942 | new file mode 100644 | |
85eb3c9d | 16943 | index 0000000..d0706bd |
2380c486 JR |
16944 | --- /dev/null |
16945 | +++ b/kernel/power/tuxonice_pagedir.c | |
85eb3c9d | 16946 | @@ -0,0 +1,346 @@ |
2380c486 JR |
16947 | +/* |
16948 | + * kernel/power/tuxonice_pagedir.c | |
16949 | + * | |
16950 | + * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu> | |
16951 | + * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz> | |
16952 | + * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr> | |
5dd10c98 | 16953 | + * Copyright (C) 2006-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
16954 | + * |
16955 | + * This file is released under the GPLv2. | |
16956 | + * | |
16957 | + * Routines for handling pagesets. | |
16958 | + * Note that pbes aren't actually stored as such. They're stored as | |
16959 | + * bitmaps and extents. | |
16960 | + */ | |
16961 | + | |
16962 | +#include <linux/suspend.h> | |
16963 | +#include <linux/highmem.h> | |
16964 | +#include <linux/bootmem.h> | |
16965 | +#include <linux/hardirq.h> | |
16966 | +#include <linux/sched.h> | |
e999739a | 16967 | +#include <linux/cpu.h> |
2380c486 JR |
16968 | +#include <asm/tlbflush.h> |
16969 | + | |
16970 | +#include "tuxonice_pageflags.h" | |
16971 | +#include "tuxonice_ui.h" | |
16972 | +#include "tuxonice_pagedir.h" | |
16973 | +#include "tuxonice_prepare_image.h" | |
16974 | +#include "tuxonice.h" | |
2380c486 JR |
16975 | +#include "tuxonice_builtin.h" |
16976 | +#include "tuxonice_alloc.h" | |
16977 | + | |
16978 | +static int ptoi_pfn; | |
16979 | +static struct pbe *this_low_pbe; | |
16980 | +static struct pbe **last_low_pbe_ptr; | |
2380c486 JR |
16981 | + |
16982 | +void toi_reset_alt_image_pageset2_pfn(void) | |
16983 | +{ | |
16984 | + memory_bm_position_reset(pageset2_map); | |
16985 | +} | |
16986 | + | |
16987 | +static struct page *first_conflicting_page; | |
16988 | + | |
16989 | +/* | |
16990 | + * free_conflicting_pages | |
16991 | + */ | |
16992 | + | |
16993 | +static void free_conflicting_pages(void) | |
16994 | +{ | |
16995 | + while (first_conflicting_page) { | |
16996 | + struct page *next = | |
16997 | + *((struct page **) kmap(first_conflicting_page)); | |
16998 | + kunmap(first_conflicting_page); | |
16999 | + toi__free_page(29, first_conflicting_page); | |
17000 | + first_conflicting_page = next; | |
17001 | + } | |
17002 | +} | |
17003 | + | |
17004 | +/* __toi_get_nonconflicting_page | |
17005 | + * | |
17006 | + * Description: Gets order zero pages that won't be overwritten | |
17007 | + * while copying the original pages. | |
17008 | + */ | |
17009 | + | |
17010 | +struct page *___toi_get_nonconflicting_page(int can_be_highmem) | |
17011 | +{ | |
17012 | + struct page *page; | |
17013 | + gfp_t flags = TOI_ATOMIC_GFP; | |
17014 | + if (can_be_highmem) | |
17015 | + flags |= __GFP_HIGHMEM; | |
17016 | + | |
17017 | + | |
17018 | + if (test_toi_state(TOI_LOADING_ALT_IMAGE) && | |
17019 | + pageset2_map && | |
17020 | + (ptoi_pfn != BM_END_OF_MAP)) { | |
17021 | + do { | |
17022 | + ptoi_pfn = memory_bm_next_pfn(pageset2_map); | |
17023 | + if (ptoi_pfn != BM_END_OF_MAP) { | |
17024 | + page = pfn_to_page(ptoi_pfn); | |
17025 | + if (!PagePageset1(page) && | |
17026 | + (can_be_highmem || !PageHighMem(page))) | |
17027 | + return page; | |
17028 | + } | |
17029 | + } while (ptoi_pfn != BM_END_OF_MAP); | |
17030 | + } | |
17031 | + | |
17032 | + do { | |
17033 | + page = toi_alloc_page(29, flags); | |
17034 | + if (!page) { | |
17035 | + printk(KERN_INFO "Failed to get nonconflicting " | |
17036 | + "page.\n"); | |
17037 | + return NULL; | |
17038 | + } | |
17039 | + if (PagePageset1(page)) { | |
17040 | + struct page **next = (struct page **) kmap(page); | |
17041 | + *next = first_conflicting_page; | |
17042 | + first_conflicting_page = page; | |
17043 | + kunmap(page); | |
17044 | + } | |
17045 | + } while (PagePageset1(page)); | |
17046 | + | |
17047 | + return page; | |
17048 | +} | |
17049 | + | |
17050 | +unsigned long __toi_get_nonconflicting_page(void) | |
17051 | +{ | |
17052 | + struct page *page = ___toi_get_nonconflicting_page(0); | |
17053 | + return page ? (unsigned long) page_address(page) : 0; | |
17054 | +} | |
17055 | + | |
17056 | +static struct pbe *get_next_pbe(struct page **page_ptr, struct pbe *this_pbe, | |
17057 | + int highmem) | |
17058 | +{ | |
17059 | + if (((((unsigned long) this_pbe) & (PAGE_SIZE - 1)) | |
17060 | + + 2 * sizeof(struct pbe)) > PAGE_SIZE) { | |
17061 | + struct page *new_page = | |
17062 | + ___toi_get_nonconflicting_page(highmem); | |
17063 | + if (!new_page) | |
17064 | + return ERR_PTR(-ENOMEM); | |
17065 | + this_pbe = (struct pbe *) kmap(new_page); | |
17066 | + memset(this_pbe, 0, PAGE_SIZE); | |
17067 | + *page_ptr = new_page; | |
17068 | + } else | |
17069 | + this_pbe++; | |
17070 | + | |
17071 | + return this_pbe; | |
17072 | +} | |
17073 | + | |
17074 | +/** | |
17075 | + * get_pageset1_load_addresses - generate pbes for conflicting pages | |
17076 | + * | |
17077 | + * We check here that pagedir & pages it points to won't collide | |
17078 | + * with pages where we're going to restore from the loaded pages | |
17079 | + * later. | |
17080 | + * | |
17081 | + * Returns: | |
17082 | + * Zero on success, one if couldn't find enough pages (shouldn't | |
17083 | + * happen). | |
17084 | + **/ | |
17085 | +int toi_get_pageset1_load_addresses(void) | |
17086 | +{ | |
17087 | + int pfn, highallocd = 0, lowallocd = 0; | |
17088 | + int low_needed = pagedir1.size - get_highmem_size(pagedir1); | |
17089 | + int high_needed = get_highmem_size(pagedir1); | |
17090 | + int low_pages_for_highmem = 0; | |
17091 | + gfp_t flags = GFP_ATOMIC | __GFP_NOWARN | __GFP_HIGHMEM; | |
17092 | + struct page *page, *high_pbe_page = NULL, *last_high_pbe_page = NULL, | |
85eb3c9d | 17093 | + *low_pbe_page, *last_low_pbe_page = NULL; |
2380c486 JR |
17094 | + struct pbe **last_high_pbe_ptr = &restore_highmem_pblist, |
17095 | + *this_high_pbe = NULL; | |
17096 | + int orig_low_pfn, orig_high_pfn; | |
17097 | + int high_pbes_done = 0, low_pbes_done = 0; | |
5dd10c98 | 17098 | + int low_direct = 0, high_direct = 0, result = 0, i; |
85eb3c9d | 17099 | + int high_page = 1, high_offset = 0, low_page = 1, low_offset = 0; |
e999739a | 17100 | + |
85eb3c9d | 17101 | + memory_bm_set_iterators(pageset1_map, 3); |
2380c486 | 17102 | + memory_bm_position_reset(pageset1_map); |
85eb3c9d AM |
17103 | + |
17104 | + memory_bm_set_iterators(pageset1_copy_map, 2); | |
17105 | + memory_bm_position_reset(pageset1_copy_map); | |
2380c486 JR |
17106 | + |
17107 | + last_low_pbe_ptr = &restore_pblist; | |
17108 | + | |
17109 | + /* First, allocate pages for the start of our pbe lists. */ | |
17110 | + if (high_needed) { | |
17111 | + high_pbe_page = ___toi_get_nonconflicting_page(1); | |
17112 | + if (!high_pbe_page) { | |
17113 | + result = -ENOMEM; | |
17114 | + goto out; | |
17115 | + } | |
17116 | + this_high_pbe = (struct pbe *) kmap(high_pbe_page); | |
17117 | + memset(this_high_pbe, 0, PAGE_SIZE); | |
17118 | + } | |
17119 | + | |
17120 | + low_pbe_page = ___toi_get_nonconflicting_page(0); | |
17121 | + if (!low_pbe_page) { | |
17122 | + result = -ENOMEM; | |
17123 | + goto out; | |
17124 | + } | |
17125 | + this_low_pbe = (struct pbe *) page_address(low_pbe_page); | |
17126 | + | |
17127 | + /* | |
5dd10c98 | 17128 | + * Next, allocate the number of pages we need. |
2380c486 JR |
17129 | + */ |
17130 | + | |
5dd10c98 AM |
17131 | + i = low_needed + high_needed; |
17132 | + | |
2380c486 | 17133 | + do { |
5dd10c98 AM |
17134 | + int is_high; |
17135 | + | |
17136 | + if (i == low_needed) | |
17137 | + flags &= ~__GFP_HIGHMEM; | |
17138 | + | |
2380c486 | 17139 | + page = toi_alloc_page(30, flags); |
5dd10c98 | 17140 | + BUG_ON(!page); |
2380c486 | 17141 | + |
5dd10c98 | 17142 | + SetPagePageset1Copy(page); |
2380c486 JR |
17143 | + is_high = PageHighMem(page); |
17144 | + | |
17145 | + if (PagePageset1(page)) { | |
5dd10c98 AM |
17146 | + if (is_high) |
17147 | + high_direct++; | |
17148 | + else | |
17149 | + low_direct++; | |
2380c486 JR |
17150 | + } else { |
17151 | + if (is_high) | |
17152 | + highallocd++; | |
17153 | + else | |
17154 | + lowallocd++; | |
17155 | + } | |
5dd10c98 | 17156 | + } while (--i); |
2380c486 JR |
17157 | + |
17158 | + high_needed -= high_direct; | |
17159 | + low_needed -= low_direct; | |
17160 | + | |
17161 | + /* | |
17162 | + * Do we need to use some lowmem pages for the copies of highmem | |
17163 | + * pages? | |
17164 | + */ | |
17165 | + if (high_needed > highallocd) { | |
17166 | + low_pages_for_highmem = high_needed - highallocd; | |
17167 | + high_needed -= low_pages_for_highmem; | |
17168 | + low_needed += low_pages_for_highmem; | |
17169 | + } | |
17170 | + | |
2380c486 JR |
17171 | + /* |
17172 | + * Now generate our pbes (which will be used for the atomic restore), | |
17173 | + * and free unneeded pages. | |
17174 | + */ | |
17175 | + memory_bm_position_reset(pageset1_copy_map); | |
85eb3c9d AM |
17176 | + for (pfn = memory_bm_next_pfn_index(pageset1_copy_map, 1); pfn != BM_END_OF_MAP; |
17177 | + pfn = memory_bm_next_pfn_index(pageset1_copy_map, 1)) { | |
2380c486 JR |
17178 | + int is_high; |
17179 | + page = pfn_to_page(pfn); | |
17180 | + is_high = PageHighMem(page); | |
17181 | + | |
17182 | + if (PagePageset1(page)) | |
17183 | + continue; | |
17184 | + | |
2380c486 JR |
17185 | + /* Nope. We're going to use this page. Add a pbe. */ |
17186 | + if (is_high || low_pages_for_highmem) { | |
17187 | + struct page *orig_page; | |
17188 | + high_pbes_done++; | |
17189 | + if (!is_high) | |
17190 | + low_pages_for_highmem--; | |
17191 | + do { | |
85eb3c9d | 17192 | + orig_high_pfn = memory_bm_next_pfn_index(pageset1_map, 1); |
2380c486 JR |
17193 | + BUG_ON(orig_high_pfn == BM_END_OF_MAP); |
17194 | + orig_page = pfn_to_page(orig_high_pfn); | |
17195 | + } while (!PageHighMem(orig_page) || | |
5dd10c98 | 17196 | + PagePageset1Copy(orig_page)); |
2380c486 JR |
17197 | + |
17198 | + this_high_pbe->orig_address = orig_page; | |
17199 | + this_high_pbe->address = page; | |
17200 | + this_high_pbe->next = NULL; | |
85eb3c9d AM |
17201 | + toi_message(TOI_PAGEDIR, TOI_VERBOSE, 0, "High pbe %d/%d: %p(%d)=>%p", |
17202 | + high_page, high_offset, page, orig_high_pfn, orig_page); | |
2380c486 JR |
17203 | + if (last_high_pbe_page != high_pbe_page) { |
17204 | + *last_high_pbe_ptr = | |
17205 | + (struct pbe *) high_pbe_page; | |
85eb3c9d AM |
17206 | + if (last_high_pbe_page) { |
17207 | + kunmap(last_high_pbe_page); | |
17208 | + high_page++; | |
17209 | + high_offset = 0; | |
17210 | + } else | |
17211 | + high_offset++; | |
2380c486 | 17212 | + last_high_pbe_page = high_pbe_page; |
85eb3c9d AM |
17213 | + } else { |
17214 | + *last_high_pbe_ptr = this_high_pbe; | |
17215 | + high_offset++; | |
2380c486 | 17216 | + } |
85eb3c9d | 17217 | + last_high_pbe_ptr = &this_high_pbe->next; |
2380c486 JR |
17218 | + this_high_pbe = get_next_pbe(&high_pbe_page, |
17219 | + this_high_pbe, 1); | |
17220 | + if (IS_ERR(this_high_pbe)) { | |
17221 | + printk(KERN_INFO | |
17222 | + "This high pbe is an error.\n"); | |
17223 | + return -ENOMEM; | |
17224 | + } | |
17225 | + } else { | |
17226 | + struct page *orig_page; | |
17227 | + low_pbes_done++; | |
17228 | + do { | |
85eb3c9d | 17229 | + orig_low_pfn = memory_bm_next_pfn_index(pageset1_map, 2); |
2380c486 JR |
17230 | + BUG_ON(orig_low_pfn == BM_END_OF_MAP); |
17231 | + orig_page = pfn_to_page(orig_low_pfn); | |
17232 | + } while (PageHighMem(orig_page) || | |
5dd10c98 | 17233 | + PagePageset1Copy(orig_page)); |
2380c486 JR |
17234 | + |
17235 | + this_low_pbe->orig_address = page_address(orig_page); | |
17236 | + this_low_pbe->address = page_address(page); | |
17237 | + this_low_pbe->next = NULL; | |
85eb3c9d AM |
17238 | + toi_message(TOI_PAGEDIR, TOI_VERBOSE, 0, "Low pbe %d/%d: %p(%d)=>%p", |
17239 | + low_page, low_offset, this_low_pbe->orig_address, | |
17240 | + orig_low_pfn, this_low_pbe->address); | |
2380c486 JR |
17241 | + *last_low_pbe_ptr = this_low_pbe; |
17242 | + last_low_pbe_ptr = &this_low_pbe->next; | |
17243 | + this_low_pbe = get_next_pbe(&low_pbe_page, | |
17244 | + this_low_pbe, 0); | |
85eb3c9d AM |
17245 | + if (low_pbe_page != last_low_pbe_page) { |
17246 | + if (last_low_pbe_page) { | |
17247 | + low_page++; | |
17248 | + low_offset = 0; | |
17249 | + } | |
17250 | + last_low_pbe_page = low_pbe_page; | |
17251 | + } else | |
17252 | + low_offset++; | |
2380c486 JR |
17253 | + if (IS_ERR(this_low_pbe)) { |
17254 | + printk(KERN_INFO "this_low_pbe is an error.\n"); | |
17255 | + return -ENOMEM; | |
17256 | + } | |
17257 | + } | |
17258 | + } | |
17259 | + | |
17260 | + if (high_pbe_page) | |
17261 | + kunmap(high_pbe_page); | |
17262 | + | |
17263 | + if (last_high_pbe_page != high_pbe_page) { | |
17264 | + if (last_high_pbe_page) | |
17265 | + kunmap(last_high_pbe_page); | |
17266 | + toi__free_page(29, high_pbe_page); | |
17267 | + } | |
17268 | + | |
17269 | + free_conflicting_pages(); | |
17270 | + | |
17271 | +out: | |
85eb3c9d AM |
17272 | + memory_bm_set_iterators(pageset1_map, 1); |
17273 | + memory_bm_set_iterators(pageset1_copy_map, 1); | |
2380c486 JR |
17274 | + return result; |
17275 | +} | |
17276 | + | |
17277 | +int add_boot_kernel_data_pbe(void) | |
17278 | +{ | |
17279 | + this_low_pbe->address = (char *) __toi_get_nonconflicting_page(); | |
17280 | + if (!this_low_pbe->address) { | |
17281 | + printk(KERN_INFO "Failed to get bkd atomic restore buffer."); | |
17282 | + return -ENOMEM; | |
17283 | + } | |
17284 | + | |
17285 | + toi_bkd.size = sizeof(toi_bkd); | |
17286 | + memcpy(this_low_pbe->address, &toi_bkd, sizeof(toi_bkd)); | |
17287 | + | |
17288 | + *last_low_pbe_ptr = this_low_pbe; | |
17289 | + this_low_pbe->orig_address = (char *) boot_kernel_data_buffer; | |
17290 | + this_low_pbe->next = NULL; | |
17291 | + return 0; | |
17292 | +} | |
17293 | diff --git a/kernel/power/tuxonice_pagedir.h b/kernel/power/tuxonice_pagedir.h | |
17294 | new file mode 100644 | |
5dd10c98 | 17295 | index 0000000..d08e4b1 |
2380c486 JR |
17296 | --- /dev/null |
17297 | +++ b/kernel/power/tuxonice_pagedir.h | |
17298 | @@ -0,0 +1,50 @@ | |
17299 | +/* | |
17300 | + * kernel/power/tuxonice_pagedir.h | |
17301 | + * | |
5dd10c98 | 17302 | + * Copyright (C) 2006-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
17303 | + * |
17304 | + * This file is released under the GPLv2. | |
17305 | + * | |
17306 | + * Declarations for routines for handling pagesets. | |
17307 | + */ | |
17308 | + | |
17309 | +#ifndef KERNEL_POWER_PAGEDIR_H | |
17310 | +#define KERNEL_POWER_PAGEDIR_H | |
17311 | + | |
17312 | +/* Pagedir | |
17313 | + * | |
17314 | + * Contains the metadata for a set of pages saved in the image. | |
17315 | + */ | |
17316 | + | |
17317 | +struct pagedir { | |
17318 | + int id; | |
92bca44c | 17319 | + unsigned long size; |
2380c486 | 17320 | +#ifdef CONFIG_HIGHMEM |
92bca44c | 17321 | + unsigned long size_high; |
2380c486 JR |
17322 | +#endif |
17323 | +}; | |
17324 | + | |
17325 | +#ifdef CONFIG_HIGHMEM | |
17326 | +#define get_highmem_size(pagedir) (pagedir.size_high) | |
17327 | +#define set_highmem_size(pagedir, sz) do { pagedir.size_high = sz; } while (0) | |
17328 | +#define inc_highmem_size(pagedir) do { pagedir.size_high++; } while (0) | |
17329 | +#define get_lowmem_size(pagedir) (pagedir.size - pagedir.size_high) | |
17330 | +#else | |
17331 | +#define get_highmem_size(pagedir) (0) | |
17332 | +#define set_highmem_size(pagedir, sz) do { } while (0) | |
17333 | +#define inc_highmem_size(pagedir) do { } while (0) | |
17334 | +#define get_lowmem_size(pagedir) (pagedir.size) | |
17335 | +#endif | |
17336 | + | |
17337 | +extern struct pagedir pagedir1, pagedir2; | |
17338 | + | |
17339 | +extern void toi_copy_pageset1(void); | |
17340 | + | |
17341 | +extern int toi_get_pageset1_load_addresses(void); | |
17342 | + | |
17343 | +extern unsigned long __toi_get_nonconflicting_page(void); | |
17344 | +struct page *___toi_get_nonconflicting_page(int can_be_highmem); | |
17345 | + | |
17346 | +extern void toi_reset_alt_image_pageset2_pfn(void); | |
17347 | +extern int add_boot_kernel_data_pbe(void); | |
17348 | +#endif | |
17349 | diff --git a/kernel/power/tuxonice_pageflags.c b/kernel/power/tuxonice_pageflags.c | |
17350 | new file mode 100644 | |
5dd10c98 | 17351 | index 0000000..e9ec5b5 |
2380c486 JR |
17352 | --- /dev/null |
17353 | +++ b/kernel/power/tuxonice_pageflags.c | |
9474138d | 17354 | @@ -0,0 +1,28 @@ |
2380c486 JR |
17355 | +/* |
17356 | + * kernel/power/tuxonice_pageflags.c | |
17357 | + * | |
5dd10c98 | 17358 | + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
17359 | + * |
17360 | + * This file is released under the GPLv2. | |
17361 | + * | |
17362 | + * Routines for serialising and relocating pageflags in which we | |
17363 | + * store our image metadata. | |
17364 | + */ | |
17365 | + | |
17366 | +#include <linux/list.h> | |
17367 | +#include "tuxonice_pageflags.h" | |
17368 | +#include "power.h" | |
17369 | + | |
17370 | +int toi_pageflags_space_needed(void) | |
17371 | +{ | |
17372 | + int total = 0; | |
17373 | + struct bm_block *bb; | |
17374 | + | |
17375 | + total = sizeof(unsigned int); | |
17376 | + | |
17377 | + list_for_each_entry(bb, &pageset1_map->blocks, hook) | |
17378 | + total += 2 * sizeof(unsigned long) + PAGE_SIZE; | |
17379 | + | |
17380 | + return total; | |
17381 | +} | |
9474138d | 17382 | +EXPORT_SYMBOL_GPL(toi_pageflags_space_needed); |
2380c486 JR |
17383 | diff --git a/kernel/power/tuxonice_pageflags.h b/kernel/power/tuxonice_pageflags.h |
17384 | new file mode 100644 | |
5dd10c98 | 17385 | index 0000000..d5aa7b1 |
2380c486 JR |
17386 | --- /dev/null |
17387 | +++ b/kernel/power/tuxonice_pageflags.h | |
9474138d | 17388 | @@ -0,0 +1,72 @@ |
2380c486 JR |
17389 | +/* |
17390 | + * kernel/power/tuxonice_pageflags.h | |
17391 | + * | |
5dd10c98 | 17392 | + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
17393 | + * |
17394 | + * This file is released under the GPLv2. | |
17395 | + */ | |
17396 | + | |
17397 | +#ifndef KERNEL_POWER_TUXONICE_PAGEFLAGS_H | |
17398 | +#define KERNEL_POWER_TUXONICE_PAGEFLAGS_H | |
17399 | + | |
2380c486 JR |
17400 | +extern struct memory_bitmap *pageset1_map; |
17401 | +extern struct memory_bitmap *pageset1_copy_map; | |
17402 | +extern struct memory_bitmap *pageset2_map; | |
17403 | +extern struct memory_bitmap *page_resave_map; | |
17404 | +extern struct memory_bitmap *io_map; | |
17405 | +extern struct memory_bitmap *nosave_map; | |
17406 | +extern struct memory_bitmap *free_map; | |
17407 | + | |
17408 | +#define PagePageset1(page) \ | |
17409 | + (memory_bm_test_bit(pageset1_map, page_to_pfn(page))) | |
17410 | +#define SetPagePageset1(page) \ | |
17411 | + (memory_bm_set_bit(pageset1_map, page_to_pfn(page))) | |
17412 | +#define ClearPagePageset1(page) \ | |
17413 | + (memory_bm_clear_bit(pageset1_map, page_to_pfn(page))) | |
17414 | + | |
17415 | +#define PagePageset1Copy(page) \ | |
17416 | + (memory_bm_test_bit(pageset1_copy_map, page_to_pfn(page))) | |
17417 | +#define SetPagePageset1Copy(page) \ | |
17418 | + (memory_bm_set_bit(pageset1_copy_map, page_to_pfn(page))) | |
17419 | +#define ClearPagePageset1Copy(page) \ | |
17420 | + (memory_bm_clear_bit(pageset1_copy_map, page_to_pfn(page))) | |
17421 | + | |
17422 | +#define PagePageset2(page) \ | |
17423 | + (memory_bm_test_bit(pageset2_map, page_to_pfn(page))) | |
17424 | +#define SetPagePageset2(page) \ | |
17425 | + (memory_bm_set_bit(pageset2_map, page_to_pfn(page))) | |
17426 | +#define ClearPagePageset2(page) \ | |
17427 | + (memory_bm_clear_bit(pageset2_map, page_to_pfn(page))) | |
17428 | + | |
17429 | +#define PageWasRW(page) \ | |
17430 | + (memory_bm_test_bit(pageset2_map, page_to_pfn(page))) | |
17431 | +#define SetPageWasRW(page) \ | |
17432 | + (memory_bm_set_bit(pageset2_map, page_to_pfn(page))) | |
17433 | +#define ClearPageWasRW(page) \ | |
17434 | + (memory_bm_clear_bit(pageset2_map, page_to_pfn(page))) | |
17435 | + | |
17436 | +#define PageResave(page) (page_resave_map ? \ | |
17437 | + memory_bm_test_bit(page_resave_map, page_to_pfn(page)) : 0) | |
17438 | +#define SetPageResave(page) \ | |
17439 | + (memory_bm_set_bit(page_resave_map, page_to_pfn(page))) | |
17440 | +#define ClearPageResave(page) \ | |
17441 | + (memory_bm_clear_bit(page_resave_map, page_to_pfn(page))) | |
17442 | + | |
17443 | +#define PageNosave(page) (nosave_map ? \ | |
17444 | + memory_bm_test_bit(nosave_map, page_to_pfn(page)) : 0) | |
17445 | +#define SetPageNosave(page) \ | |
17446 | + (memory_bm_set_bit(nosave_map, page_to_pfn(page))) | |
17447 | +#define ClearPageNosave(page) \ | |
17448 | + (memory_bm_clear_bit(nosave_map, page_to_pfn(page))) | |
17449 | + | |
17450 | +#define PageNosaveFree(page) (free_map ? \ | |
17451 | + memory_bm_test_bit(free_map, page_to_pfn(page)) : 0) | |
17452 | +#define SetPageNosaveFree(page) \ | |
17453 | + (memory_bm_set_bit(free_map, page_to_pfn(page))) | |
17454 | +#define ClearPageNosaveFree(page) \ | |
17455 | + (memory_bm_clear_bit(free_map, page_to_pfn(page))) | |
17456 | + | |
17457 | +extern void save_pageflags(struct memory_bitmap *pagemap); | |
17458 | +extern int load_pageflags(struct memory_bitmap *pagemap); | |
17459 | +extern int toi_pageflags_space_needed(void); | |
17460 | +#endif | |
17461 | diff --git a/kernel/power/tuxonice_power_off.c b/kernel/power/tuxonice_power_off.c | |
17462 | new file mode 100644 | |
5dd10c98 | 17463 | index 0000000..07e39c0 |
2380c486 JR |
17464 | --- /dev/null |
17465 | +++ b/kernel/power/tuxonice_power_off.c | |
7e46296a | 17466 | @@ -0,0 +1,285 @@ |
2380c486 JR |
17467 | +/* |
17468 | + * kernel/power/tuxonice_power_off.c | |
17469 | + * | |
5dd10c98 | 17470 | + * Copyright (C) 2006-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
17471 | + * |
17472 | + * This file is released under the GPLv2. | |
17473 | + * | |
17474 | + * Support for powering down. | |
17475 | + */ | |
17476 | + | |
17477 | +#include <linux/device.h> | |
17478 | +#include <linux/suspend.h> | |
17479 | +#include <linux/mm.h> | |
17480 | +#include <linux/pm.h> | |
17481 | +#include <linux/reboot.h> | |
17482 | +#include <linux/cpu.h> | |
17483 | +#include <linux/console.h> | |
17484 | +#include <linux/fs.h> | |
17485 | +#include "tuxonice.h" | |
17486 | +#include "tuxonice_ui.h" | |
17487 | +#include "tuxonice_power_off.h" | |
17488 | +#include "tuxonice_sysfs.h" | |
17489 | +#include "tuxonice_modules.h" | |
17490 | +#include "tuxonice_io.h" | |
17491 | + | |
17492 | +unsigned long toi_poweroff_method; /* 0 - Kernel power off */ | |
17493 | +EXPORT_SYMBOL_GPL(toi_poweroff_method); | |
17494 | + | |
17495 | +static int wake_delay; | |
17496 | +static char lid_state_file[256], wake_alarm_dir[256]; | |
17497 | +static struct file *lid_file, *alarm_file, *epoch_file; | |
17498 | +static int post_wake_state = -1; | |
17499 | + | |
17500 | +static int did_suspend_to_both; | |
17501 | + | |
17502 | +/* | |
17503 | + * __toi_power_down | |
17504 | + * Functionality : Powers down or reboots the computer once the image | |
17505 | + * has been written to disk. | |
17506 | + * Key Assumptions : Able to reboot/power down via code called or that | |
17507 | + * the warning emitted if the calls fail will be visible | |
17508 | + * to the user (ie printk resumes devices). | |
17509 | + */ | |
17510 | + | |
17511 | +static void __toi_power_down(int method) | |
17512 | +{ | |
17513 | + int error; | |
17514 | + | |
17515 | + toi_cond_pause(1, test_action_state(TOI_REBOOT) ? "Ready to reboot." : | |
17516 | + "Powering down."); | |
17517 | + | |
17518 | + if (test_result_state(TOI_ABORTED)) | |
17519 | + goto out; | |
17520 | + | |
17521 | + if (test_action_state(TOI_REBOOT)) | |
17522 | + kernel_restart(NULL); | |
17523 | + | |
17524 | + switch (method) { | |
17525 | + case 0: | |
17526 | + break; | |
17527 | + case 3: | |
17528 | + /* | |
17529 | + * Re-read the overwritten part of pageset2 to make post-resume | |
17530 | + * faster. | |
17531 | + */ | |
17532 | + if (read_pageset2(1)) | |
e999739a | 17533 | + panic("Attempt to reload pagedir 2 failed. " |
17534 | + "Try rebooting."); | |
2380c486 | 17535 | + |
7e46296a AM |
17536 | + pm_prepare_console(); |
17537 | + | |
2380c486 JR |
17538 | + error = pm_notifier_call_chain(PM_SUSPEND_PREPARE); |
17539 | + if (!error) { | |
17540 | + error = suspend_devices_and_enter(PM_SUSPEND_MEM); | |
17541 | + if (!error) | |
17542 | + did_suspend_to_both = 1; | |
17543 | + } | |
17544 | + pm_notifier_call_chain(PM_POST_SUSPEND); | |
7e46296a | 17545 | + pm_restore_console(); |
2380c486 JR |
17546 | + |
17547 | + /* Success - we're now post-resume-from-ram */ | |
17548 | + if (did_suspend_to_both) | |
17549 | + return; | |
17550 | + | |
17551 | + /* Failed to suspend to ram - do normal power off */ | |
17552 | + break; | |
17553 | + case 4: | |
17554 | + /* | |
17555 | + * If succeeds, doesn't return. If fails, do a simple | |
17556 | + * powerdown. | |
17557 | + */ | |
17558 | + hibernation_platform_enter(); | |
17559 | + break; | |
17560 | + case 5: | |
17561 | + /* Historic entry only now */ | |
17562 | + break; | |
17563 | + } | |
17564 | + | |
17565 | + if (method && method != 5) | |
17566 | + toi_cond_pause(1, | |
17567 | + "Falling back to alternate power off method."); | |
17568 | + | |
17569 | + if (test_result_state(TOI_ABORTED)) | |
17570 | + goto out; | |
17571 | + | |
17572 | + kernel_power_off(); | |
17573 | + kernel_halt(); | |
17574 | + toi_cond_pause(1, "Powerdown failed."); | |
17575 | + while (1) | |
17576 | + cpu_relax(); | |
17577 | + | |
17578 | +out: | |
17579 | + if (read_pageset2(1)) | |
17580 | + panic("Attempt to reload pagedir 2 failed. Try rebooting."); | |
17581 | + return; | |
17582 | +} | |
17583 | + | |
17584 | +#define CLOSE_FILE(file) \ | |
17585 | + if (file) { \ | |
17586 | + filp_close(file, NULL); file = NULL; \ | |
17587 | + } | |
17588 | + | |
17589 | +static void powerdown_cleanup(int toi_or_resume) | |
17590 | +{ | |
17591 | + if (!toi_or_resume) | |
17592 | + return; | |
17593 | + | |
17594 | + CLOSE_FILE(lid_file); | |
17595 | + CLOSE_FILE(alarm_file); | |
17596 | + CLOSE_FILE(epoch_file); | |
17597 | +} | |
17598 | + | |
17599 | +static void open_file(char *format, char *arg, struct file **var, int mode, | |
17600 | + char *desc) | |
17601 | +{ | |
17602 | + char buf[256]; | |
17603 | + | |
17604 | + if (strlen(arg)) { | |
17605 | + sprintf(buf, format, arg); | |
17606 | + *var = filp_open(buf, mode, 0); | |
17607 | + if (IS_ERR(*var) || !*var) { | |
17608 | + printk(KERN_INFO "Failed to open %s file '%s' (%p).\n", | |
17609 | + desc, buf, *var); | |
17610 | + *var = NULL; | |
17611 | + } | |
17612 | + } | |
17613 | +} | |
17614 | + | |
17615 | +static int powerdown_init(int toi_or_resume) | |
17616 | +{ | |
17617 | + if (!toi_or_resume) | |
17618 | + return 0; | |
17619 | + | |
17620 | + did_suspend_to_both = 0; | |
17621 | + | |
17622 | + open_file("/proc/acpi/button/%s/state", lid_state_file, &lid_file, | |
17623 | + O_RDONLY, "lid"); | |
17624 | + | |
17625 | + if (strlen(wake_alarm_dir)) { | |
17626 | + open_file("/sys/class/rtc/%s/wakealarm", wake_alarm_dir, | |
17627 | + &alarm_file, O_WRONLY, "alarm"); | |
17628 | + | |
17629 | + open_file("/sys/class/rtc/%s/since_epoch", wake_alarm_dir, | |
17630 | + &epoch_file, O_RDONLY, "epoch"); | |
17631 | + } | |
17632 | + | |
17633 | + return 0; | |
17634 | +} | |
17635 | + | |
17636 | +static int lid_closed(void) | |
17637 | +{ | |
17638 | + char array[25]; | |
17639 | + ssize_t size; | |
17640 | + loff_t pos = 0; | |
17641 | + | |
17642 | + if (!lid_file) | |
17643 | + return 0; | |
17644 | + | |
17645 | + size = vfs_read(lid_file, (char __user *) array, 25, &pos); | |
17646 | + if ((int) size < 1) { | |
17647 | + printk(KERN_INFO "Failed to read lid state file (%d).\n", | |
17648 | + (int) size); | |
17649 | + return 0; | |
17650 | + } | |
17651 | + | |
17652 | + if (!strcmp(array, "state: closed\n")) | |
17653 | + return 1; | |
17654 | + | |
17655 | + return 0; | |
17656 | +} | |
17657 | + | |
17658 | +static void write_alarm_file(int value) | |
17659 | +{ | |
17660 | + ssize_t size; | |
17661 | + char buf[40]; | |
17662 | + loff_t pos = 0; | |
17663 | + | |
17664 | + if (!alarm_file) | |
17665 | + return; | |
17666 | + | |
17667 | + sprintf(buf, "%d\n", value); | |
17668 | + | |
17669 | + size = vfs_write(alarm_file, (char __user *)buf, strlen(buf), &pos); | |
17670 | + | |
17671 | + if (size < 0) | |
17672 | + printk(KERN_INFO "Error %d writing alarm value %s.\n", | |
17673 | + (int) size, buf); | |
17674 | +} | |
17675 | + | |
17676 | +/** | |
17677 | + * toi_check_resleep: See whether to powerdown again after waking. | |
17678 | + * | |
17679 | + * After waking, check whether we should powerdown again in a (usually | |
17680 | + * different) way. We only do this if the lid switch is still closed. | |
17681 | + */ | |
17682 | +void toi_check_resleep(void) | |
17683 | +{ | |
17684 | + /* We only return if we suspended to ram and woke. */ | |
17685 | + if (lid_closed() && post_wake_state >= 0) | |
17686 | + __toi_power_down(post_wake_state); | |
17687 | +} | |
17688 | + | |
17689 | +void toi_power_down(void) | |
17690 | +{ | |
17691 | + if (alarm_file && wake_delay) { | |
17692 | + char array[25]; | |
17693 | + loff_t pos = 0; | |
17694 | + size_t size = vfs_read(epoch_file, (char __user *) array, 25, | |
17695 | + &pos); | |
17696 | + | |
17697 | + if (((int) size) < 1) | |
17698 | + printk(KERN_INFO "Failed to read epoch file (%d).\n", | |
17699 | + (int) size); | |
17700 | + else { | |
9474138d AM |
17701 | + unsigned long since_epoch; |
17702 | + if (!strict_strtoul(array, 0, &since_epoch)) { | |
17703 | + /* Clear any wakeup time. */ | |
17704 | + write_alarm_file(0); | |
2380c486 | 17705 | + |
9474138d AM |
17706 | + /* Set new wakeup time. */ |
17707 | + write_alarm_file(since_epoch + wake_delay); | |
17708 | + } | |
2380c486 JR |
17709 | + } |
17710 | + } | |
17711 | + | |
17712 | + __toi_power_down(toi_poweroff_method); | |
17713 | + | |
17714 | + toi_check_resleep(); | |
17715 | +} | |
17716 | +EXPORT_SYMBOL_GPL(toi_power_down); | |
17717 | + | |
17718 | +static struct toi_sysfs_data sysfs_params[] = { | |
17719 | +#if defined(CONFIG_ACPI) | |
17720 | + SYSFS_STRING("lid_file", SYSFS_RW, lid_state_file, 256, 0, NULL), | |
17721 | + SYSFS_INT("wake_delay", SYSFS_RW, &wake_delay, 0, INT_MAX, 0, NULL), | |
17722 | + SYSFS_STRING("wake_alarm_dir", SYSFS_RW, wake_alarm_dir, 256, 0, NULL), | |
17723 | + SYSFS_INT("post_wake_state", SYSFS_RW, &post_wake_state, -1, 5, 0, | |
17724 | + NULL), | |
17725 | + SYSFS_UL("powerdown_method", SYSFS_RW, &toi_poweroff_method, 0, 5, 0), | |
17726 | + SYSFS_INT("did_suspend_to_both", SYSFS_READONLY, &did_suspend_to_both, | |
17727 | + 0, 0, 0, NULL) | |
17728 | +#endif | |
17729 | +}; | |
17730 | + | |
17731 | +static struct toi_module_ops powerdown_ops = { | |
17732 | + .type = MISC_HIDDEN_MODULE, | |
17733 | + .name = "poweroff", | |
17734 | + .initialise = powerdown_init, | |
17735 | + .cleanup = powerdown_cleanup, | |
17736 | + .directory = "[ROOT]", | |
17737 | + .module = THIS_MODULE, | |
17738 | + .sysfs_data = sysfs_params, | |
17739 | + .num_sysfs_entries = sizeof(sysfs_params) / | |
17740 | + sizeof(struct toi_sysfs_data), | |
17741 | +}; | |
17742 | + | |
17743 | +int toi_poweroff_init(void) | |
17744 | +{ | |
17745 | + return toi_register_module(&powerdown_ops); | |
17746 | +} | |
17747 | + | |
17748 | +void toi_poweroff_exit(void) | |
17749 | +{ | |
17750 | + toi_unregister_module(&powerdown_ops); | |
17751 | +} | |
17752 | diff --git a/kernel/power/tuxonice_power_off.h b/kernel/power/tuxonice_power_off.h | |
17753 | new file mode 100644 | |
5dd10c98 | 17754 | index 0000000..9aa0ea8 |
2380c486 JR |
17755 | --- /dev/null |
17756 | +++ b/kernel/power/tuxonice_power_off.h | |
17757 | @@ -0,0 +1,24 @@ | |
17758 | +/* | |
17759 | + * kernel/power/tuxonice_power_off.h | |
17760 | + * | |
5dd10c98 | 17761 | + * Copyright (C) 2006-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
17762 | + * |
17763 | + * This file is released under the GPLv2. | |
17764 | + * | |
17765 | + * Support for the powering down. | |
17766 | + */ | |
17767 | + | |
17768 | +int toi_pm_state_finish(void); | |
17769 | +void toi_power_down(void); | |
17770 | +extern unsigned long toi_poweroff_method; | |
17771 | +int toi_poweroff_init(void); | |
17772 | +void toi_poweroff_exit(void); | |
17773 | +void toi_check_resleep(void); | |
17774 | + | |
17775 | +extern int platform_begin(int platform_mode); | |
17776 | +extern int platform_pre_snapshot(int platform_mode); | |
17777 | +extern void platform_leave(int platform_mode); | |
17778 | +extern void platform_end(int platform_mode); | |
17779 | +extern void platform_finish(int platform_mode); | |
17780 | +extern int platform_pre_restore(int platform_mode); | |
17781 | +extern void platform_restore_cleanup(int platform_mode); | |
17782 | diff --git a/kernel/power/tuxonice_prepare_image.c b/kernel/power/tuxonice_prepare_image.c | |
17783 | new file mode 100644 | |
85eb3c9d | 17784 | index 0000000..1d176a5 |
2380c486 JR |
17785 | --- /dev/null |
17786 | +++ b/kernel/power/tuxonice_prepare_image.c | |
85eb3c9d | 17787 | @@ -0,0 +1,1114 @@ |
2380c486 JR |
17788 | +/* |
17789 | + * kernel/power/tuxonice_prepare_image.c | |
17790 | + * | |
5dd10c98 | 17791 | + * Copyright (C) 2003-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
17792 | + * |
17793 | + * This file is released under the GPLv2. | |
17794 | + * | |
17795 | + * We need to eat memory until we can: | |
17796 | + * 1. Perform the save without changing anything (RAM_NEEDED < #pages) | |
17797 | + * 2. Fit it all in available space (toiActiveAllocator->available_space() >= | |
17798 | + * main_storage_needed()) | |
17799 | + * 3. Reload the pagedir and pageset1 to places that don't collide with their | |
17800 | + * final destinations, not knowing to what extent the resumed kernel will | |
17801 | + * overlap with the one loaded at boot time. I think the resumed kernel | |
17802 | + * should overlap completely, but I don't want to rely on this as it is | |
17803 | + * an unproven assumption. We therefore assume there will be no overlap at | |
17804 | + * all (worse case). | |
17805 | + * 4. Meet the user's requested limit (if any) on the size of the image. | |
17806 | + * The limit is in MB, so pages/256 (assuming 4K pages). | |
17807 | + * | |
17808 | + */ | |
17809 | + | |
2380c486 JR |
17810 | +#include <linux/highmem.h> |
17811 | +#include <linux/freezer.h> | |
17812 | +#include <linux/hardirq.h> | |
17813 | +#include <linux/mmzone.h> | |
17814 | +#include <linux/console.h> | |
17815 | + | |
17816 | +#include "tuxonice_pageflags.h" | |
17817 | +#include "tuxonice_modules.h" | |
17818 | +#include "tuxonice_io.h" | |
17819 | +#include "tuxonice_ui.h" | |
2380c486 | 17820 | +#include "tuxonice_prepare_image.h" |
2380c486 | 17821 | +#include "tuxonice.h" |
7e46296a | 17822 | +#include "tuxonice_extent.h" |
2380c486 JR |
17823 | +#include "tuxonice_checksum.h" |
17824 | +#include "tuxonice_sysfs.h" | |
17825 | +#include "tuxonice_alloc.h" | |
17826 | +#include "tuxonice_atomic_copy.h" | |
cacc47f8 | 17827 | +#include "tuxonice_builtin.h" |
2380c486 | 17828 | + |
92bca44c | 17829 | +static unsigned long num_nosave, main_storage_allocated, storage_limit, |
0ada99ac | 17830 | + header_storage_needed; |
7e46296a AM |
17831 | +unsigned long extra_pd1_pages_allowance = |
17832 | + CONFIG_TOI_DEFAULT_EXTRA_PAGES_ALLOWANCE; | |
17833 | +long image_size_limit; | |
2380c486 JR |
17834 | +static int no_ps2_needed; |
17835 | + | |
17836 | +struct attention_list { | |
17837 | + struct task_struct *task; | |
17838 | + struct attention_list *next; | |
17839 | +}; | |
17840 | + | |
17841 | +static struct attention_list *attention_list; | |
17842 | + | |
17843 | +#define PAGESET1 0 | |
17844 | +#define PAGESET2 1 | |
17845 | + | |
17846 | +void free_attention_list(void) | |
17847 | +{ | |
17848 | + struct attention_list *last = NULL; | |
17849 | + | |
17850 | + while (attention_list) { | |
17851 | + last = attention_list; | |
17852 | + attention_list = attention_list->next; | |
9474138d | 17853 | + toi_kfree(6, last, sizeof(*last)); |
2380c486 JR |
17854 | + } |
17855 | +} | |
17856 | + | |
17857 | +static int build_attention_list(void) | |
17858 | +{ | |
17859 | + int i, task_count = 0; | |
17860 | + struct task_struct *p; | |
17861 | + struct attention_list *next; | |
17862 | + | |
17863 | + /* | |
17864 | + * Count all userspace process (with task->mm) marked PF_NOFREEZE. | |
17865 | + */ | |
cacc47f8 | 17866 | + toi_read_lock_tasklist(); |
2380c486 JR |
17867 | + for_each_process(p) |
17868 | + if ((p->flags & PF_NOFREEZE) || p == current) | |
17869 | + task_count++; | |
cacc47f8 | 17870 | + toi_read_unlock_tasklist(); |
2380c486 JR |
17871 | + |
17872 | + /* | |
17873 | + * Allocate attention list structs. | |
17874 | + */ | |
17875 | + for (i = 0; i < task_count; i++) { | |
17876 | + struct attention_list *this = | |
17877 | + toi_kzalloc(6, sizeof(struct attention_list), | |
17878 | + TOI_WAIT_GFP); | |
17879 | + if (!this) { | |
17880 | + printk(KERN_INFO "Failed to allocate slab for " | |
17881 | + "attention list.\n"); | |
17882 | + free_attention_list(); | |
17883 | + return 1; | |
17884 | + } | |
17885 | + this->next = NULL; | |
17886 | + if (attention_list) | |
17887 | + this->next = attention_list; | |
17888 | + attention_list = this; | |
17889 | + } | |
17890 | + | |
17891 | + next = attention_list; | |
cacc47f8 | 17892 | + toi_read_lock_tasklist(); |
2380c486 JR |
17893 | + for_each_process(p) |
17894 | + if ((p->flags & PF_NOFREEZE) || p == current) { | |
17895 | + next->task = p; | |
17896 | + next = next->next; | |
17897 | + } | |
cacc47f8 | 17898 | + toi_read_unlock_tasklist(); |
2380c486 JR |
17899 | + return 0; |
17900 | +} | |
17901 | + | |
17902 | +static void pageset2_full(void) | |
17903 | +{ | |
17904 | + struct zone *zone; | |
17905 | + struct page *page; | |
17906 | + unsigned long flags; | |
17907 | + int i; | |
17908 | + | |
92bca44c | 17909 | + for_each_populated_zone(zone) { |
2380c486 JR |
17910 | + spin_lock_irqsave(&zone->lru_lock, flags); |
17911 | + for_each_lru(i) { | |
17912 | + if (!zone_page_state(zone, NR_LRU_BASE + i)) | |
17913 | + continue; | |
17914 | + | |
e999739a | 17915 | + list_for_each_entry(page, &zone->lru[i].list, lru) { |
17916 | + struct address_space *mapping; | |
17917 | + | |
17918 | + mapping = page_mapping(page); | |
17919 | + if (!mapping || !mapping->host || | |
17920 | + !(mapping->host->i_flags & S_ATOMIC_COPY)) | |
17921 | + SetPagePageset2(page); | |
17922 | + } | |
2380c486 JR |
17923 | + } |
17924 | + spin_unlock_irqrestore(&zone->lru_lock, flags); | |
17925 | + } | |
17926 | +} | |
17927 | + | |
17928 | +/* | |
17929 | + * toi_mark_task_as_pageset | |
17930 | + * Functionality : Marks all the saveable pages belonging to a given process | |
17931 | + * as belonging to a particular pageset. | |
17932 | + */ | |
17933 | + | |
17934 | +static void toi_mark_task_as_pageset(struct task_struct *t, int pageset2) | |
17935 | +{ | |
17936 | + struct vm_area_struct *vma; | |
17937 | + struct mm_struct *mm; | |
17938 | + | |
17939 | + mm = t->active_mm; | |
17940 | + | |
17941 | + if (!mm || !mm->mmap) | |
17942 | + return; | |
17943 | + | |
17944 | + if (!irqs_disabled()) | |
17945 | + down_read(&mm->mmap_sem); | |
17946 | + | |
17947 | + for (vma = mm->mmap; vma; vma = vma->vm_next) { | |
17948 | + unsigned long posn; | |
17949 | + | |
92bca44c AM |
17950 | + if (!vma->vm_start || |
17951 | + vma->vm_flags & (VM_IO | VM_RESERVED | VM_PFNMAP)) | |
2380c486 JR |
17952 | + continue; |
17953 | + | |
17954 | + for (posn = vma->vm_start; posn < vma->vm_end; | |
17955 | + posn += PAGE_SIZE) { | |
17956 | + struct page *page = follow_page(vma, posn, 0); | |
e999739a | 17957 | + struct address_space *mapping; |
17958 | + | |
17959 | + if (!page || !pfn_valid(page_to_pfn(page))) | |
17960 | + continue; | |
17961 | + | |
17962 | + mapping = page_mapping(page); | |
17963 | + if (mapping && mapping->host && | |
17964 | + mapping->host->i_flags & S_ATOMIC_COPY) | |
2380c486 JR |
17965 | + continue; |
17966 | + | |
17967 | + if (pageset2) | |
17968 | + SetPagePageset2(page); | |
17969 | + else { | |
17970 | + ClearPagePageset2(page); | |
17971 | + SetPagePageset1(page); | |
17972 | + } | |
17973 | + } | |
17974 | + } | |
17975 | + | |
17976 | + if (!irqs_disabled()) | |
17977 | + up_read(&mm->mmap_sem); | |
17978 | +} | |
17979 | + | |
e999739a | 17980 | +static void mark_tasks(int pageset) |
17981 | +{ | |
17982 | + struct task_struct *p; | |
17983 | + | |
cacc47f8 | 17984 | + toi_read_lock_tasklist(); |
e999739a | 17985 | + for_each_process(p) { |
17986 | + if (!p->mm) | |
17987 | + continue; | |
17988 | + | |
17989 | + if (p->flags & PF_KTHREAD) | |
17990 | + continue; | |
17991 | + | |
17992 | + toi_mark_task_as_pageset(p, pageset); | |
17993 | + } | |
cacc47f8 | 17994 | + toi_read_unlock_tasklist(); |
e999739a | 17995 | + |
17996 | +} | |
17997 | + | |
2380c486 JR |
17998 | +/* mark_pages_for_pageset2 |
17999 | + * | |
18000 | + * Description: Mark unshared pages in processes not needed for hibernate as | |
18001 | + * being able to be written out in a separate pagedir. | |
18002 | + * HighMem pages are simply marked as pageset2. They won't be | |
18003 | + * needed during hibernate. | |
18004 | + */ | |
18005 | + | |
18006 | +static void toi_mark_pages_for_pageset2(void) | |
18007 | +{ | |
2380c486 JR |
18008 | + struct attention_list *this = attention_list; |
18009 | + | |
18010 | + memory_bm_clear(pageset2_map); | |
18011 | + | |
18012 | + if (test_action_state(TOI_NO_PAGESET2) || no_ps2_needed) | |
18013 | + return; | |
18014 | + | |
18015 | + if (test_action_state(TOI_PAGESET2_FULL)) | |
18016 | + pageset2_full(); | |
e999739a | 18017 | + else |
18018 | + mark_tasks(PAGESET2); | |
2380c486 JR |
18019 | + |
18020 | + /* | |
18021 | + * Because the tasks in attention_list are ones related to hibernating, | |
18022 | + * we know that they won't go away under us. | |
18023 | + */ | |
18024 | + | |
18025 | + while (this) { | |
18026 | + if (!test_result_state(TOI_ABORTED)) | |
18027 | + toi_mark_task_as_pageset(this->task, PAGESET1); | |
18028 | + this = this->next; | |
18029 | + } | |
18030 | +} | |
18031 | + | |
18032 | +/* | |
18033 | + * The atomic copy of pageset1 is stored in pageset2 pages. | |
18034 | + * But if pageset1 is larger (normally only just after boot), | |
18035 | + * we need to allocate extra pages to store the atomic copy. | |
18036 | + * The following data struct and functions are used to handle | |
18037 | + * the allocation and freeing of that memory. | |
18038 | + */ | |
18039 | + | |
92bca44c | 18040 | +static unsigned long extra_pages_allocated; |
2380c486 JR |
18041 | + |
18042 | +struct extras { | |
18043 | + struct page *page; | |
18044 | + int order; | |
18045 | + struct extras *next; | |
18046 | +}; | |
18047 | + | |
18048 | +static struct extras *extras_list; | |
18049 | + | |
18050 | +/* toi_free_extra_pagedir_memory | |
18051 | + * | |
18052 | + * Description: Free previously allocated extra pagedir memory. | |
18053 | + */ | |
18054 | +void toi_free_extra_pagedir_memory(void) | |
18055 | +{ | |
18056 | + /* Free allocated pages */ | |
18057 | + while (extras_list) { | |
18058 | + struct extras *this = extras_list; | |
18059 | + int i; | |
18060 | + | |
18061 | + extras_list = this->next; | |
18062 | + | |
18063 | + for (i = 0; i < (1 << this->order); i++) | |
18064 | + ClearPageNosave(this->page + i); | |
18065 | + | |
18066 | + toi_free_pages(9, this->page, this->order); | |
9474138d | 18067 | + toi_kfree(7, this, sizeof(*this)); |
2380c486 JR |
18068 | + } |
18069 | + | |
18070 | + extra_pages_allocated = 0; | |
18071 | +} | |
18072 | + | |
18073 | +/* toi_allocate_extra_pagedir_memory | |
18074 | + * | |
18075 | + * Description: Allocate memory for making the atomic copy of pagedir1 in the | |
18076 | + * case where it is bigger than pagedir2. | |
18077 | + * Arguments: int num_to_alloc: Number of extra pages needed. | |
18078 | + * Result: int. Number of extra pages we now have allocated. | |
18079 | + */ | |
18080 | +static int toi_allocate_extra_pagedir_memory(int extra_pages_needed) | |
18081 | +{ | |
18082 | + int j, order, num_to_alloc = extra_pages_needed - extra_pages_allocated; | |
18083 | + gfp_t flags = TOI_ATOMIC_GFP; | |
18084 | + | |
18085 | + if (num_to_alloc < 1) | |
18086 | + return 0; | |
18087 | + | |
18088 | + order = fls(num_to_alloc); | |
18089 | + if (order >= MAX_ORDER) | |
18090 | + order = MAX_ORDER - 1; | |
18091 | + | |
18092 | + while (num_to_alloc) { | |
18093 | + struct page *newpage; | |
18094 | + unsigned long virt; | |
18095 | + struct extras *extras_entry; | |
18096 | + | |
18097 | + while ((1 << order) > num_to_alloc) | |
18098 | + order--; | |
18099 | + | |
18100 | + extras_entry = (struct extras *) toi_kzalloc(7, | |
18101 | + sizeof(struct extras), TOI_ATOMIC_GFP); | |
18102 | + | |
18103 | + if (!extras_entry) | |
18104 | + return extra_pages_allocated; | |
18105 | + | |
18106 | + virt = toi_get_free_pages(9, flags, order); | |
18107 | + while (!virt && order) { | |
18108 | + order--; | |
18109 | + virt = toi_get_free_pages(9, flags, order); | |
18110 | + } | |
18111 | + | |
18112 | + if (!virt) { | |
9474138d | 18113 | + toi_kfree(7, extras_entry, sizeof(*extras_entry)); |
2380c486 JR |
18114 | + return extra_pages_allocated; |
18115 | + } | |
18116 | + | |
18117 | + newpage = virt_to_page(virt); | |
18118 | + | |
18119 | + extras_entry->page = newpage; | |
18120 | + extras_entry->order = order; | |
18121 | + extras_entry->next = NULL; | |
18122 | + | |
18123 | + if (extras_list) | |
18124 | + extras_entry->next = extras_list; | |
18125 | + | |
18126 | + extras_list = extras_entry; | |
18127 | + | |
18128 | + for (j = 0; j < (1 << order); j++) { | |
18129 | + SetPageNosave(newpage + j); | |
18130 | + SetPagePageset1Copy(newpage + j); | |
18131 | + } | |
18132 | + | |
18133 | + extra_pages_allocated += (1 << order); | |
18134 | + num_to_alloc -= (1 << order); | |
18135 | + } | |
18136 | + | |
18137 | + return extra_pages_allocated; | |
18138 | +} | |
18139 | + | |
18140 | +/* | |
18141 | + * real_nr_free_pages: Count pcp pages for a zone type or all zones | |
18142 | + * (-1 for all, otherwise zone_idx() result desired). | |
18143 | + */ | |
92bca44c | 18144 | +unsigned long real_nr_free_pages(unsigned long zone_idx_mask) |
2380c486 JR |
18145 | +{ |
18146 | + struct zone *zone; | |
18147 | + int result = 0, cpu; | |
18148 | + | |
18149 | + /* PCP lists */ | |
9474138d | 18150 | + for_each_populated_zone(zone) { |
2380c486 JR |
18151 | + if (!(zone_idx_mask & (1 << zone_idx(zone)))) |
18152 | + continue; | |
18153 | + | |
18154 | + for_each_online_cpu(cpu) { | |
de6743ae AM |
18155 | + struct per_cpu_pageset *pset = |
18156 | + per_cpu_ptr(zone->pageset, cpu); | |
2380c486 JR |
18157 | + struct per_cpu_pages *pcp = &pset->pcp; |
18158 | + result += pcp->count; | |
18159 | + } | |
18160 | + | |
18161 | + result += zone_page_state(zone, NR_FREE_PAGES); | |
18162 | + } | |
18163 | + return result; | |
18164 | +} | |
18165 | +EXPORT_SYMBOL_GPL(real_nr_free_pages); | |
18166 | + | |
18167 | +/* | |
18168 | + * Discover how much extra memory will be required by the drivers | |
18169 | + * when they're asked to hibernate. We can then ensure that amount | |
18170 | + * of memory is available when we really want it. | |
18171 | + */ | |
18172 | +static void get_extra_pd1_allowance(void) | |
18173 | +{ | |
92bca44c | 18174 | + unsigned long orig_num_free = real_nr_free_pages(all_zones_mask), final; |
2380c486 JR |
18175 | + |
18176 | + toi_prepare_status(CLEAR_BAR, "Finding allowance for drivers."); | |
18177 | + | |
92bca44c AM |
18178 | + if (toi_go_atomic(PMSG_FREEZE, 1)) |
18179 | + return; | |
2380c486 | 18180 | + |
92bca44c AM |
18181 | + final = real_nr_free_pages(all_zones_mask); |
18182 | + toi_end_atomic(ATOMIC_ALL_STEPS, 1, 0); | |
18183 | + | |
18184 | + extra_pd1_pages_allowance = (orig_num_free > final) ? | |
18185 | + orig_num_free - final + MIN_EXTRA_PAGES_ALLOWANCE : | |
18186 | + MIN_EXTRA_PAGES_ALLOWANCE; | |
2380c486 JR |
18187 | +} |
18188 | + | |
18189 | +/* | |
18190 | + * Amount of storage needed, possibly taking into account the | |
18191 | + * expected compression ratio and possibly also ignoring our | |
18192 | + * allowance for extra pages. | |
18193 | + */ | |
92bca44c | 18194 | +static unsigned long main_storage_needed(int use_ecr, |
2380c486 JR |
18195 | + int ignore_extra_pd1_allow) |
18196 | +{ | |
18197 | + return (pagedir1.size + pagedir2.size + | |
18198 | + (ignore_extra_pd1_allow ? 0 : extra_pd1_pages_allowance)) * | |
18199 | + (use_ecr ? toi_expected_compression_ratio() : 100) / 100; | |
18200 | +} | |
18201 | + | |
18202 | +/* | |
18203 | + * Storage needed for the image header, in bytes until the return. | |
18204 | + */ | |
92bca44c | 18205 | +unsigned long get_header_storage_needed(void) |
2380c486 | 18206 | +{ |
92bca44c | 18207 | + unsigned long bytes = sizeof(struct toi_header) + |
2380c486 | 18208 | + toi_header_storage_for_modules() + |
5dd10c98 AM |
18209 | + toi_pageflags_space_needed() + |
18210 | + fs_info_space_needed(); | |
2380c486 JR |
18211 | + |
18212 | + return DIV_ROUND_UP(bytes, PAGE_SIZE); | |
18213 | +} | |
9474138d | 18214 | +EXPORT_SYMBOL_GPL(get_header_storage_needed); |
2380c486 JR |
18215 | + |
18216 | +/* | |
18217 | + * When freeing memory, pages from either pageset might be freed. | |
18218 | + * | |
18219 | + * When seeking to free memory to be able to hibernate, for every ps1 page | |
18220 | + * freed, we need 2 less pages for the atomic copy because there is one less | |
18221 | + * page to copy and one more page into which data can be copied. | |
18222 | + * | |
18223 | + * Freeing ps2 pages saves us nothing directly. No more memory is available | |
18224 | + * for the atomic copy. Indirectly, a ps1 page might be freed (slab?), but | |
18225 | + * that's too much work to figure out. | |
18226 | + * | |
18227 | + * => ps1_to_free functions | |
18228 | + * | |
18229 | + * Of course if we just want to reduce the image size, because of storage | |
18230 | + * limitations or an image size limit either ps will do. | |
18231 | + * | |
18232 | + * => any_to_free function | |
18233 | + */ | |
18234 | + | |
92bca44c | 18235 | +static unsigned long lowpages_usable_for_highmem_copy(void) |
2380c486 | 18236 | +{ |
92bca44c AM |
18237 | + unsigned long needed = get_lowmem_size(pagedir1) + |
18238 | + extra_pd1_pages_allowance + MIN_FREE_RAM + | |
18239 | + toi_memory_for_modules(0), | |
18240 | + available = get_lowmem_size(pagedir2) + | |
18241 | + real_nr_free_low_pages() + extra_pages_allocated; | |
18242 | + | |
18243 | + return available > needed ? available - needed : 0; | |
2380c486 JR |
18244 | +} |
18245 | + | |
92bca44c | 18246 | +static unsigned long highpages_ps1_to_free(void) |
2380c486 | 18247 | +{ |
92bca44c AM |
18248 | + unsigned long need = get_highmem_size(pagedir1), |
18249 | + available = get_highmem_size(pagedir2) + | |
18250 | + real_nr_free_high_pages() + | |
18251 | + lowpages_usable_for_highmem_copy(); | |
18252 | + | |
18253 | + return need > available ? DIV_ROUND_UP(need - available, 2) : 0; | |
2380c486 JR |
18254 | +} |
18255 | + | |
92bca44c AM |
18256 | +static unsigned long lowpages_ps1_to_free(void) |
18257 | +{ | |
18258 | + unsigned long needed = get_lowmem_size(pagedir1) + | |
18259 | + extra_pd1_pages_allowance + MIN_FREE_RAM + | |
18260 | + toi_memory_for_modules(0), | |
18261 | + available = get_lowmem_size(pagedir2) + | |
18262 | + real_nr_free_low_pages() + extra_pages_allocated; | |
18263 | + | |
18264 | + return needed > available ? DIV_ROUND_UP(needed - available, 2) : 0; | |
18265 | +} | |
18266 | + | |
18267 | +static unsigned long current_image_size(void) | |
2380c486 | 18268 | +{ |
0ada99ac | 18269 | + return pagedir1.size + pagedir2.size + header_storage_needed; |
2380c486 JR |
18270 | +} |
18271 | + | |
92bca44c | 18272 | +static unsigned long storage_still_required(void) |
2380c486 | 18273 | +{ |
92bca44c AM |
18274 | + unsigned long needed = main_storage_needed(1, 1); |
18275 | + return needed > storage_limit ? needed - storage_limit : 0; | |
2380c486 JR |
18276 | +} |
18277 | + | |
92bca44c | 18278 | +static unsigned long ram_still_required(void) |
2380c486 | 18279 | +{ |
92bca44c AM |
18280 | + unsigned long needed = MIN_FREE_RAM + toi_memory_for_modules(0) + |
18281 | + 2 * extra_pd1_pages_allowance, | |
de6743ae | 18282 | + available = real_nr_free_low_pages() + extra_pages_allocated; |
92bca44c | 18283 | + return needed > available ? needed - available : 0; |
2380c486 JR |
18284 | +} |
18285 | + | |
85eb3c9d | 18286 | +unsigned long any_to_free(int use_image_size_limit) |
2380c486 | 18287 | +{ |
92bca44c AM |
18288 | + int use_soft_limit = use_image_size_limit && image_size_limit > 0; |
18289 | + unsigned long current_size = current_image_size(), | |
18290 | + soft_limit = use_soft_limit ? (image_size_limit << 8) : 0, | |
7e46296a AM |
18291 | + to_free = use_soft_limit ? (current_size > soft_limit ? |
18292 | + current_size - soft_limit : 0) : 0, | |
92bca44c AM |
18293 | + storage_limit = storage_still_required(), |
18294 | + ram_limit = ram_still_required(), | |
7e46296a | 18295 | + first_max = max(to_free, storage_limit); |
2380c486 JR |
18296 | + |
18297 | + return max(first_max, ram_limit); | |
18298 | +} | |
18299 | + | |
18300 | +static int need_pageset2(void) | |
18301 | +{ | |
18302 | + return (real_nr_free_low_pages() + extra_pages_allocated - | |
18303 | + 2 * extra_pd1_pages_allowance - MIN_FREE_RAM - | |
18304 | + toi_memory_for_modules(0) - pagedir1.size) < pagedir2.size; | |
18305 | +} | |
18306 | + | |
18307 | +/* amount_needed | |
18308 | + * | |
18309 | + * Calculates the amount by which the image size needs to be reduced to meet | |
18310 | + * our constraints. | |
18311 | + */ | |
92bca44c | 18312 | +static unsigned long amount_needed(int use_image_size_limit) |
2380c486 JR |
18313 | +{ |
18314 | + return max(highpages_ps1_to_free() + lowpages_ps1_to_free(), | |
18315 | + any_to_free(use_image_size_limit)); | |
18316 | +} | |
18317 | + | |
92bca44c | 18318 | +static int image_not_ready(int use_image_size_limit) |
2380c486 JR |
18319 | +{ |
18320 | + toi_message(TOI_EAT_MEMORY, TOI_LOW, 1, | |
7e46296a AM |
18321 | + "Amount still needed (%lu) > 0:%u," |
18322 | + " Storage allocd: %lu < %lu: %u.\n", | |
2380c486 JR |
18323 | + amount_needed(use_image_size_limit), |
18324 | + (amount_needed(use_image_size_limit) > 0), | |
2380c486 JR |
18325 | + main_storage_allocated, |
18326 | + main_storage_needed(1, 1), | |
18327 | + main_storage_allocated < main_storage_needed(1, 1)); | |
18328 | + | |
18329 | + toi_cond_pause(0, NULL); | |
18330 | + | |
18331 | + return (amount_needed(use_image_size_limit) > 0) || | |
2380c486 JR |
18332 | + main_storage_allocated < main_storage_needed(1, 1); |
18333 | +} | |
18334 | + | |
18335 | +static void display_failure_reason(int tries_exceeded) | |
18336 | +{ | |
92bca44c | 18337 | + unsigned long storage_required = storage_still_required(), |
2380c486 JR |
18338 | + ram_required = ram_still_required(), |
18339 | + high_ps1 = highpages_ps1_to_free(), | |
18340 | + low_ps1 = lowpages_ps1_to_free(); | |
18341 | + | |
18342 | + printk(KERN_INFO "Failed to prepare the image because...\n"); | |
18343 | + | |
92bca44c | 18344 | + if (!storage_limit) { |
2380c486 JR |
18345 | + printk(KERN_INFO "- You need some storage available to be " |
18346 | + "able to hibernate.\n"); | |
18347 | + return; | |
18348 | + } | |
18349 | + | |
18350 | + if (tries_exceeded) | |
18351 | + printk(KERN_INFO "- The maximum number of iterations was " | |
18352 | + "reached without successfully preparing the " | |
18353 | + "image.\n"); | |
18354 | + | |
2380c486 | 18355 | + if (storage_required) { |
92bca44c AM |
18356 | + printk(KERN_INFO " - We need at least %lu pages of storage " |
18357 | + "(ignoring the header), but only have %lu.\n", | |
2380c486 JR |
18358 | + main_storage_needed(1, 1), |
18359 | + main_storage_allocated); | |
18360 | + set_abort_result(TOI_INSUFFICIENT_STORAGE); | |
18361 | + } | |
18362 | + | |
18363 | + if (ram_required) { | |
92bca44c | 18364 | + printk(KERN_INFO " - We need %lu more free pages of low " |
2380c486 JR |
18365 | + "memory.\n", ram_required); |
18366 | + printk(KERN_INFO " Minimum free : %8d\n", MIN_FREE_RAM); | |
92bca44c | 18367 | + printk(KERN_INFO " + Reqd. by modules : %8lu\n", |
2380c486 | 18368 | + toi_memory_for_modules(0)); |
92bca44c | 18369 | + printk(KERN_INFO " + 2 * extra allow : %8lu\n", |
2380c486 | 18370 | + 2 * extra_pd1_pages_allowance); |
92bca44c | 18371 | + printk(KERN_INFO " - Currently free : %8lu\n", |
2380c486 | 18372 | + real_nr_free_low_pages()); |
de6743ae AM |
18373 | + printk(KERN_INFO " - Pages allocd : %8lu\n", |
18374 | + extra_pages_allocated); | |
2380c486 | 18375 | + printk(KERN_INFO " : ========\n"); |
92bca44c | 18376 | + printk(KERN_INFO " Still needed : %8lu\n", |
2380c486 JR |
18377 | + ram_required); |
18378 | + | |
18379 | + /* Print breakdown of memory needed for modules */ | |
18380 | + toi_memory_for_modules(1); | |
18381 | + set_abort_result(TOI_UNABLE_TO_FREE_ENOUGH_MEMORY); | |
18382 | + } | |
18383 | + | |
18384 | + if (high_ps1) { | |
92bca44c | 18385 | + printk(KERN_INFO "- We need to free %lu highmem pageset 1 " |
2380c486 JR |
18386 | + "pages.\n", high_ps1); |
18387 | + set_abort_result(TOI_UNABLE_TO_FREE_ENOUGH_MEMORY); | |
18388 | + } | |
18389 | + | |
18390 | + if (low_ps1) { | |
18391 | + printk(KERN_INFO " - We need to free %ld lowmem pageset 1 " | |
18392 | + "pages.\n", low_ps1); | |
18393 | + set_abort_result(TOI_UNABLE_TO_FREE_ENOUGH_MEMORY); | |
18394 | + } | |
18395 | +} | |
18396 | + | |
18397 | +static void display_stats(int always, int sub_extra_pd1_allow) | |
18398 | +{ | |
18399 | + char buffer[255]; | |
18400 | + snprintf(buffer, 254, | |
7e46296a AM |
18401 | + "Free:%lu(%lu). Sets:%lu(%lu),%lu(%lu). " |
18402 | + "Nosave:%lu-%lu=%lu. Storage:%lu/%lu(%lu=>%lu). " | |
18403 | + "Needed:%lu,%lu,%lu(%u,%lu,%lu,%ld) (PS2:%s)\n", | |
2380c486 JR |
18404 | + |
18405 | + /* Free */ | |
18406 | + real_nr_free_pages(all_zones_mask), | |
18407 | + real_nr_free_low_pages(), | |
18408 | + | |
18409 | + /* Sets */ | |
18410 | + pagedir1.size, pagedir1.size - get_highmem_size(pagedir1), | |
18411 | + pagedir2.size, pagedir2.size - get_highmem_size(pagedir2), | |
18412 | + | |
2380c486 JR |
18413 | + /* Nosave */ |
18414 | + num_nosave, extra_pages_allocated, | |
18415 | + num_nosave - extra_pages_allocated, | |
18416 | + | |
18417 | + /* Storage */ | |
18418 | + main_storage_allocated, | |
92bca44c | 18419 | + storage_limit, |
2380c486 JR |
18420 | + main_storage_needed(1, sub_extra_pd1_allow), |
18421 | + main_storage_needed(1, 1), | |
18422 | + | |
18423 | + /* Needed */ | |
18424 | + lowpages_ps1_to_free(), highpages_ps1_to_free(), | |
18425 | + any_to_free(1), | |
18426 | + MIN_FREE_RAM, toi_memory_for_modules(0), | |
92bca44c | 18427 | + extra_pd1_pages_allowance, |
7e46296a | 18428 | + image_size_limit, |
2380c486 JR |
18429 | + |
18430 | + need_pageset2() ? "yes" : "no"); | |
18431 | + | |
18432 | + if (always) | |
18433 | + printk("%s", buffer); | |
18434 | + else | |
18435 | + toi_message(TOI_EAT_MEMORY, TOI_MEDIUM, 1, buffer); | |
18436 | +} | |
18437 | + | |
18438 | +/* generate_free_page_map | |
18439 | + * | |
18440 | + * Description: This routine generates a bitmap of free pages from the | |
18441 | + * lists used by the memory manager. We then use the bitmap | |
18442 | + * to quickly calculate which pages to save and in which | |
18443 | + * pagesets. | |
18444 | + */ | |
18445 | +static void generate_free_page_map(void) | |
18446 | +{ | |
5dd10c98 | 18447 | + int order, cpu, t; |
2380c486 JR |
18448 | + unsigned long flags, i; |
18449 | + struct zone *zone; | |
18450 | + struct list_head *curr; | |
5dd10c98 AM |
18451 | + unsigned long pfn; |
18452 | + struct page *page; | |
2380c486 | 18453 | + |
9474138d | 18454 | + for_each_populated_zone(zone) { |
5dd10c98 AM |
18455 | + |
18456 | + if (!zone->spanned_pages) | |
18457 | + continue; | |
18458 | + | |
2380c486 JR |
18459 | + spin_lock_irqsave(&zone->lock, flags); |
18460 | + | |
5dd10c98 AM |
18461 | + for (i = 0; i < zone->spanned_pages; i++) { |
18462 | + pfn = ZONE_START(zone) + i; | |
18463 | + | |
18464 | + if (!pfn_valid(pfn)) | |
18465 | + continue; | |
18466 | + | |
18467 | + page = pfn_to_page(pfn); | |
18468 | + | |
18469 | + ClearPageNosaveFree(page); | |
18470 | + } | |
2380c486 JR |
18471 | + |
18472 | + for_each_migratetype_order(order, t) { | |
18473 | + list_for_each(curr, | |
18474 | + &zone->free_area[order].free_list[t]) { | |
18475 | + unsigned long j; | |
18476 | + | |
18477 | + pfn = page_to_pfn(list_entry(curr, struct page, | |
18478 | + lru)); | |
18479 | + for (j = 0; j < (1UL << order); j++) | |
18480 | + SetPageNosaveFree(pfn_to_page(pfn + j)); | |
18481 | + } | |
18482 | + } | |
18483 | + | |
18484 | + for_each_online_cpu(cpu) { | |
de6743ae AM |
18485 | + struct per_cpu_pageset *pset = |
18486 | + per_cpu_ptr(zone->pageset, cpu); | |
2380c486 JR |
18487 | + struct per_cpu_pages *pcp = &pset->pcp; |
18488 | + struct page *page; | |
7e46296a | 18489 | + int t; |
2380c486 | 18490 | + |
7e46296a AM |
18491 | + for (t = 0; t < MIGRATE_PCPTYPES; t++) |
18492 | + list_for_each_entry(page, &pcp->lists[t], lru) | |
18493 | + SetPageNosaveFree(page); | |
2380c486 JR |
18494 | + } |
18495 | + | |
18496 | + spin_unlock_irqrestore(&zone->lock, flags); | |
18497 | + } | |
18498 | +} | |
18499 | + | |
18500 | +/* size_of_free_region | |
18501 | + * | |
18502 | + * Description: Return the number of pages that are free, beginning with and | |
18503 | + * including this one. | |
18504 | + */ | |
18505 | +static int size_of_free_region(struct zone *zone, unsigned long start_pfn) | |
18506 | +{ | |
18507 | + unsigned long this_pfn = start_pfn, | |
18508 | + end_pfn = ZONE_START(zone) + zone->spanned_pages - 1; | |
18509 | + | |
18510 | + while (this_pfn <= end_pfn && PageNosaveFree(pfn_to_page(this_pfn))) | |
18511 | + this_pfn++; | |
18512 | + | |
18513 | + return this_pfn - start_pfn; | |
18514 | +} | |
18515 | + | |
18516 | +/* flag_image_pages | |
18517 | + * | |
18518 | + * This routine generates our lists of pages to be stored in each | |
18519 | + * pageset. Since we store the data using extents, and adding new | |
18520 | + * extents might allocate a new extent page, this routine may well | |
18521 | + * be called more than once. | |
18522 | + */ | |
18523 | +static void flag_image_pages(int atomic_copy) | |
18524 | +{ | |
18525 | + int num_free = 0; | |
18526 | + unsigned long loop; | |
18527 | + struct zone *zone; | |
18528 | + | |
18529 | + pagedir1.size = 0; | |
18530 | + pagedir2.size = 0; | |
18531 | + | |
18532 | + set_highmem_size(pagedir1, 0); | |
18533 | + set_highmem_size(pagedir2, 0); | |
18534 | + | |
18535 | + num_nosave = 0; | |
18536 | + | |
18537 | + memory_bm_clear(pageset1_map); | |
18538 | + | |
18539 | + generate_free_page_map(); | |
18540 | + | |
18541 | + /* | |
18542 | + * Pages not to be saved are marked Nosave irrespective of being | |
18543 | + * reserved. | |
18544 | + */ | |
9474138d | 18545 | + for_each_populated_zone(zone) { |
2380c486 JR |
18546 | + int highmem = is_highmem(zone); |
18547 | + | |
2380c486 JR |
18548 | + for (loop = 0; loop < zone->spanned_pages; loop++) { |
18549 | + unsigned long pfn = ZONE_START(zone) + loop; | |
18550 | + struct page *page; | |
18551 | + int chunk_size; | |
18552 | + | |
18553 | + if (!pfn_valid(pfn)) | |
18554 | + continue; | |
18555 | + | |
18556 | + chunk_size = size_of_free_region(zone, pfn); | |
18557 | + if (chunk_size) { | |
18558 | + num_free += chunk_size; | |
18559 | + loop += chunk_size - 1; | |
18560 | + continue; | |
18561 | + } | |
18562 | + | |
18563 | + page = pfn_to_page(pfn); | |
18564 | + | |
18565 | + if (PageNosave(page)) { | |
18566 | + num_nosave++; | |
18567 | + continue; | |
18568 | + } | |
18569 | + | |
18570 | + page = highmem ? saveable_highmem_page(zone, pfn) : | |
18571 | + saveable_page(zone, pfn); | |
18572 | + | |
18573 | + if (!page) { | |
18574 | + num_nosave++; | |
18575 | + continue; | |
18576 | + } | |
18577 | + | |
18578 | + if (PagePageset2(page)) { | |
18579 | + pagedir2.size++; | |
18580 | + if (PageHighMem(page)) | |
18581 | + inc_highmem_size(pagedir2); | |
18582 | + else | |
18583 | + SetPagePageset1Copy(page); | |
18584 | + if (PageResave(page)) { | |
18585 | + SetPagePageset1(page); | |
18586 | + ClearPagePageset1Copy(page); | |
18587 | + pagedir1.size++; | |
18588 | + if (PageHighMem(page)) | |
18589 | + inc_highmem_size(pagedir1); | |
18590 | + } | |
18591 | + } else { | |
18592 | + pagedir1.size++; | |
18593 | + SetPagePageset1(page); | |
18594 | + if (PageHighMem(page)) | |
18595 | + inc_highmem_size(pagedir1); | |
18596 | + } | |
18597 | + } | |
18598 | + } | |
18599 | + | |
18600 | + if (!atomic_copy) | |
18601 | + toi_message(TOI_EAT_MEMORY, TOI_MEDIUM, 0, | |
18602 | + "Count data pages: Set1 (%d) + Set2 (%d) + Nosave (%ld)" | |
18603 | + " + NumFree (%d) = %d.\n", | |
18604 | + pagedir1.size, pagedir2.size, num_nosave, num_free, | |
18605 | + pagedir1.size + pagedir2.size + num_nosave + num_free); | |
18606 | +} | |
18607 | + | |
18608 | +void toi_recalculate_image_contents(int atomic_copy) | |
18609 | +{ | |
18610 | + memory_bm_clear(pageset1_map); | |
18611 | + if (!atomic_copy) { | |
18612 | + unsigned long pfn; | |
18613 | + memory_bm_position_reset(pageset2_map); | |
18614 | + for (pfn = memory_bm_next_pfn(pageset2_map); | |
18615 | + pfn != BM_END_OF_MAP; | |
18616 | + pfn = memory_bm_next_pfn(pageset2_map)) | |
18617 | + ClearPagePageset1Copy(pfn_to_page(pfn)); | |
18618 | + /* Need to call this before getting pageset1_size! */ | |
18619 | + toi_mark_pages_for_pageset2(); | |
18620 | + } | |
18621 | + flag_image_pages(atomic_copy); | |
18622 | + | |
18623 | + if (!atomic_copy) { | |
92bca44c | 18624 | + storage_limit = toiActiveAllocator->storage_available(); |
2380c486 JR |
18625 | + display_stats(0, 0); |
18626 | + } | |
18627 | +} | |
18628 | + | |
85eb3c9d | 18629 | +int try_allocate_extra_memory(void) |
2380c486 | 18630 | +{ |
85eb3c9d | 18631 | + unsigned long wanted = pagedir1.size + extra_pd1_pages_allowance - |
2380c486 JR |
18632 | + get_lowmem_size(pagedir2); |
18633 | + if (wanted > extra_pages_allocated) { | |
85eb3c9d | 18634 | + unsigned long got = toi_allocate_extra_pagedir_memory(wanted); |
2380c486 JR |
18635 | + if (wanted < got) { |
18636 | + toi_message(TOI_EAT_MEMORY, TOI_LOW, 1, | |
18637 | + "Want %d extra pages for pageset1, got %d.\n", | |
18638 | + wanted, got); | |
85eb3c9d | 18639 | + return 1; |
2380c486 JR |
18640 | + } |
18641 | + } | |
85eb3c9d AM |
18642 | + return 0; |
18643 | +} | |
18644 | + | |
18645 | + | |
18646 | +/* update_image | |
18647 | + * | |
18648 | + * Allocate [more] memory and storage for the image. | |
18649 | + */ | |
18650 | +static void update_image(int ps2_recalc) | |
18651 | +{ | |
18652 | + int old_header_req; | |
18653 | + unsigned long seek; | |
18654 | + | |
18655 | + if (try_allocate_extra_memory()) | |
18656 | + return; | |
2380c486 JR |
18657 | + |
18658 | + if (ps2_recalc) | |
18659 | + goto recalc; | |
18660 | + | |
18661 | + thaw_kernel_threads(); | |
18662 | + | |
18663 | + /* | |
18664 | + * Allocate remaining storage space, if possible, up to the | |
18665 | + * maximum we know we'll need. It's okay to allocate the | |
18666 | + * maximum if the writer is the swapwriter, but | |
18667 | + * we don't want to grab all available space on an NFS share. | |
18668 | + * We therefore ignore the expected compression ratio here, | |
18669 | + * thereby trying to allocate the maximum image size we could | |
18670 | + * need (assuming compression doesn't expand the image), but | |
18671 | + * don't complain if we can't get the full amount we're after. | |
18672 | + */ | |
18673 | + | |
0ada99ac | 18674 | + do { |
92bca44c AM |
18675 | + int result; |
18676 | + | |
0ada99ac | 18677 | + old_header_req = header_storage_needed; |
18678 | + toiActiveAllocator->reserve_header_space(header_storage_needed); | |
2380c486 | 18679 | + |
0ada99ac | 18680 | + /* How much storage is free with the reservation applied? */ |
92bca44c AM |
18681 | + storage_limit = toiActiveAllocator->storage_available(); |
18682 | + seek = min(storage_limit, main_storage_needed(0, 0)); | |
2380c486 | 18683 | + |
7e46296a | 18684 | + result = toiActiveAllocator->allocate_storage(seek); |
92bca44c AM |
18685 | + if (result) |
18686 | + printk("Failed to allocate storage (%d).\n", result); | |
2380c486 | 18687 | + |
0ada99ac | 18688 | + main_storage_allocated = |
18689 | + toiActiveAllocator->storage_allocated(); | |
2380c486 | 18690 | + |
0ada99ac | 18691 | + /* Need more header because more storage allocated? */ |
18692 | + header_storage_needed = get_header_storage_needed(); | |
2380c486 | 18693 | + |
0ada99ac | 18694 | + } while (header_storage_needed > old_header_req); |
2380c486 JR |
18695 | + |
18696 | + if (freeze_processes()) | |
18697 | + set_abort_result(TOI_FREEZING_FAILED); | |
18698 | + | |
18699 | +recalc: | |
18700 | + toi_recalculate_image_contents(0); | |
18701 | +} | |
18702 | + | |
18703 | +/* attempt_to_freeze | |
18704 | + * | |
18705 | + * Try to freeze processes. | |
18706 | + */ | |
18707 | + | |
18708 | +static int attempt_to_freeze(void) | |
18709 | +{ | |
18710 | + int result; | |
18711 | + | |
18712 | + /* Stop processes before checking again */ | |
18713 | + thaw_processes(); | |
18714 | + toi_prepare_status(CLEAR_BAR, "Freezing processes & syncing " | |
18715 | + "filesystems."); | |
18716 | + result = freeze_processes(); | |
18717 | + | |
18718 | + if (result) | |
18719 | + set_abort_result(TOI_FREEZING_FAILED); | |
18720 | + | |
18721 | + return result; | |
18722 | +} | |
18723 | + | |
18724 | +/* eat_memory | |
18725 | + * | |
18726 | + * Try to free some memory, either to meet hard or soft constraints on the image | |
18727 | + * characteristics. | |
18728 | + * | |
18729 | + * Hard constraints: | |
18730 | + * - Pageset1 must be < half of memory; | |
18731 | + * - We must have enough memory free at resume time to have pageset1 | |
18732 | + * be able to be loaded in pages that don't conflict with where it has to | |
18733 | + * be restored. | |
18734 | + * Soft constraints | |
18735 | + * - User specificied image size limit. | |
18736 | + */ | |
18737 | +static void eat_memory(void) | |
18738 | +{ | |
92bca44c | 18739 | + unsigned long amount_wanted = 0; |
2380c486 JR |
18740 | + int did_eat_memory = 0; |
18741 | + | |
18742 | + /* | |
18743 | + * Note that if we have enough storage space and enough free memory, we | |
18744 | + * may exit without eating anything. We give up when the last 10 | |
18745 | + * iterations ate no extra pages because we're not going to get much | |
18746 | + * more anyway, but the few pages we get will take a lot of time. | |
18747 | + * | |
18748 | + * We freeze processes before beginning, and then unfreeze them if we | |
18749 | + * need to eat memory until we think we have enough. If our attempts | |
18750 | + * to freeze fail, we give up and abort. | |
18751 | + */ | |
18752 | + | |
2380c486 JR |
18753 | + amount_wanted = amount_needed(1); |
18754 | + | |
18755 | + switch (image_size_limit) { | |
18756 | + case -1: /* Don't eat any memory */ | |
18757 | + if (amount_wanted > 0) { | |
18758 | + set_abort_result(TOI_WOULD_EAT_MEMORY); | |
18759 | + return; | |
18760 | + } | |
18761 | + break; | |
18762 | + case -2: /* Free caches only */ | |
18763 | + drop_pagecache(); | |
18764 | + toi_recalculate_image_contents(0); | |
18765 | + amount_wanted = amount_needed(1); | |
2380c486 JR |
18766 | + break; |
18767 | + default: | |
18768 | + break; | |
18769 | + } | |
18770 | + | |
18771 | + if (amount_wanted > 0 && !test_result_state(TOI_ABORTED) && | |
18772 | + image_size_limit != -1) { | |
de6743ae AM |
18773 | + unsigned long request = amount_wanted; |
18774 | + unsigned long high_req = max(highpages_ps1_to_free(), | |
18775 | + any_to_free(1)); | |
18776 | + unsigned long low_req = lowpages_ps1_to_free(); | |
18777 | + unsigned long got = 0; | |
2380c486 JR |
18778 | + |
18779 | + toi_prepare_status(CLEAR_BAR, | |
18780 | + "Seeking to free %ldMB of memory.", | |
18781 | + MB(amount_wanted)); | |
18782 | + | |
18783 | + thaw_kernel_threads(); | |
18784 | + | |
18785 | + /* | |
18786 | + * Ask for too many because shrink_all_memory doesn't | |
18787 | + * currently return enough most of the time. | |
18788 | + */ | |
de6743ae AM |
18789 | + |
18790 | + if (low_req) | |
18791 | + got = shrink_memory_mask(low_req, GFP_KERNEL); | |
18792 | + if (high_req) | |
18793 | + shrink_memory_mask(high_req - got, GFP_HIGHUSER); | |
2380c486 JR |
18794 | + |
18795 | + did_eat_memory = 1; | |
18796 | + | |
18797 | + toi_recalculate_image_contents(0); | |
18798 | + | |
18799 | + amount_wanted = amount_needed(1); | |
18800 | + | |
de6743ae AM |
18801 | + printk(KERN_DEBUG "Asked shrink_all_memory for %ld low pages &" |
18802 | + " %ld pages from anywhere, got %ld.\n", | |
18803 | + high_req, low_req, | |
7e46296a | 18804 | + request - amount_wanted); |
2380c486 JR |
18805 | + |
18806 | + toi_cond_pause(0, NULL); | |
18807 | + | |
18808 | + if (freeze_processes()) | |
18809 | + set_abort_result(TOI_FREEZING_FAILED); | |
18810 | + } | |
18811 | + | |
18812 | + if (did_eat_memory) | |
18813 | + toi_recalculate_image_contents(0); | |
18814 | +} | |
18815 | + | |
18816 | +/* toi_prepare_image | |
18817 | + * | |
18818 | + * Entry point to the whole image preparation section. | |
18819 | + * | |
18820 | + * We do four things: | |
18821 | + * - Freeze processes; | |
18822 | + * - Ensure image size constraints are met; | |
18823 | + * - Complete all the preparation for saving the image, | |
18824 | + * including allocation of storage. The only memory | |
18825 | + * that should be needed when we're finished is that | |
18826 | + * for actually storing the image (and we know how | |
18827 | + * much is needed for that because the modules tell | |
18828 | + * us). | |
18829 | + * - Make sure that all dirty buffers are written out. | |
18830 | + */ | |
18831 | +#define MAX_TRIES 2 | |
18832 | +int toi_prepare_image(void) | |
18833 | +{ | |
18834 | + int result = 1, tries = 1; | |
18835 | + | |
2380c486 JR |
18836 | + main_storage_allocated = 0; |
18837 | + no_ps2_needed = 0; | |
18838 | + | |
18839 | + if (attempt_to_freeze()) | |
18840 | + return 1; | |
18841 | + | |
18842 | + if (!extra_pd1_pages_allowance) | |
18843 | + get_extra_pd1_allowance(); | |
18844 | + | |
92bca44c | 18845 | + storage_limit = toiActiveAllocator->storage_available(); |
2380c486 | 18846 | + |
92bca44c | 18847 | + if (!storage_limit) { |
2380c486 JR |
18848 | + printk(KERN_INFO "No storage available. Didn't try to prepare " |
18849 | + "an image.\n"); | |
18850 | + display_failure_reason(0); | |
18851 | + set_abort_result(TOI_NOSTORAGE_AVAILABLE); | |
18852 | + return 1; | |
18853 | + } | |
18854 | + | |
18855 | + if (build_attention_list()) { | |
18856 | + abort_hibernate(TOI_UNABLE_TO_PREPARE_IMAGE, | |
18857 | + "Unable to successfully prepare the image.\n"); | |
18858 | + return 1; | |
18859 | + } | |
18860 | + | |
9474138d AM |
18861 | + toi_recalculate_image_contents(0); |
18862 | + | |
2380c486 JR |
18863 | + do { |
18864 | + toi_prepare_status(CLEAR_BAR, | |
18865 | + "Preparing Image. Try %d.", tries); | |
18866 | + | |
18867 | + eat_memory(); | |
18868 | + | |
18869 | + if (test_result_state(TOI_ABORTED)) | |
18870 | + break; | |
18871 | + | |
18872 | + update_image(0); | |
18873 | + | |
18874 | + tries++; | |
18875 | + | |
18876 | + } while (image_not_ready(1) && tries <= MAX_TRIES && | |
18877 | + !test_result_state(TOI_ABORTED)); | |
18878 | + | |
18879 | + result = image_not_ready(0); | |
18880 | + | |
18881 | + if (!test_result_state(TOI_ABORTED)) { | |
18882 | + if (result) { | |
18883 | + display_stats(1, 0); | |
18884 | + display_failure_reason(tries > MAX_TRIES); | |
18885 | + abort_hibernate(TOI_UNABLE_TO_PREPARE_IMAGE, | |
18886 | + "Unable to successfully prepare the image.\n"); | |
18887 | + } else { | |
18888 | + /* Pageset 2 needed? */ | |
18889 | + if (!need_pageset2() && | |
18890 | + test_action_state(TOI_NO_PS2_IF_UNNEEDED)) { | |
18891 | + no_ps2_needed = 1; | |
9474138d | 18892 | + toi_recalculate_image_contents(0); |
2380c486 JR |
18893 | + update_image(1); |
18894 | + } | |
18895 | + | |
18896 | + toi_cond_pause(1, "Image preparation complete."); | |
18897 | + } | |
18898 | + } | |
18899 | + | |
18900 | + return result ? result : allocate_checksum_pages(); | |
18901 | +} | |
18902 | diff --git a/kernel/power/tuxonice_prepare_image.h b/kernel/power/tuxonice_prepare_image.h | |
18903 | new file mode 100644 | |
85eb3c9d | 18904 | index 0000000..2a2ca0b |
2380c486 JR |
18905 | --- /dev/null |
18906 | +++ b/kernel/power/tuxonice_prepare_image.h | |
85eb3c9d | 18907 | @@ -0,0 +1,38 @@ |
2380c486 JR |
18908 | +/* |
18909 | + * kernel/power/tuxonice_prepare_image.h | |
18910 | + * | |
5dd10c98 | 18911 | + * Copyright (C) 2003-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
18912 | + * |
18913 | + * This file is released under the GPLv2. | |
18914 | + * | |
18915 | + */ | |
18916 | + | |
18917 | +#include <asm/sections.h> | |
18918 | + | |
18919 | +extern int toi_prepare_image(void); | |
18920 | +extern void toi_recalculate_image_contents(int storage_available); | |
92bca44c | 18921 | +extern unsigned long real_nr_free_pages(unsigned long zone_idx_mask); |
7e46296a | 18922 | +extern long image_size_limit; |
2380c486 | 18923 | +extern void toi_free_extra_pagedir_memory(void); |
92bca44c | 18924 | +extern unsigned long extra_pd1_pages_allowance; |
2380c486 JR |
18925 | +extern void free_attention_list(void); |
18926 | + | |
18927 | +#define MIN_FREE_RAM 100 | |
18928 | +#define MIN_EXTRA_PAGES_ALLOWANCE 500 | |
18929 | + | |
18930 | +#define all_zones_mask ((unsigned long) ((1 << MAX_NR_ZONES) - 1)) | |
18931 | +#ifdef CONFIG_HIGHMEM | |
18932 | +#define real_nr_free_high_pages() (real_nr_free_pages(1 << ZONE_HIGHMEM)) | |
18933 | +#define real_nr_free_low_pages() (real_nr_free_pages(all_zones_mask - \ | |
18934 | + (1 << ZONE_HIGHMEM))) | |
18935 | +#else | |
18936 | +#define real_nr_free_high_pages() (0) | |
18937 | +#define real_nr_free_low_pages() (real_nr_free_pages(all_zones_mask)) | |
18938 | + | |
18939 | +/* For eat_memory function */ | |
18940 | +#define ZONE_HIGHMEM (MAX_NR_ZONES + 1) | |
18941 | +#endif | |
18942 | + | |
92bca44c | 18943 | +unsigned long get_header_storage_needed(void); |
85eb3c9d AM |
18944 | +unsigned long any_to_free(int use_image_size_limit); |
18945 | +int try_allocate_extra_memory(void); | |
2380c486 JR |
18946 | diff --git a/kernel/power/tuxonice_storage.c b/kernel/power/tuxonice_storage.c |
18947 | new file mode 100644 | |
85eb3c9d | 18948 | index 0000000..82c337a |
2380c486 JR |
18949 | --- /dev/null |
18950 | +++ b/kernel/power/tuxonice_storage.c | |
18951 | @@ -0,0 +1,282 @@ | |
18952 | +/* | |
18953 | + * kernel/power/tuxonice_storage.c | |
18954 | + * | |
5dd10c98 | 18955 | + * Copyright (C) 2005-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
18956 | + * |
18957 | + * This file is released under the GPLv2. | |
18958 | + * | |
18959 | + * Routines for talking to a userspace program that manages storage. | |
18960 | + * | |
18961 | + * The kernel side: | |
18962 | + * - starts the userspace program; | |
18963 | + * - sends messages telling it when to open and close the connection; | |
18964 | + * - tells it when to quit; | |
18965 | + * | |
18966 | + * The user space side: | |
18967 | + * - passes messages regarding status; | |
18968 | + * | |
18969 | + */ | |
18970 | + | |
18971 | +#include <linux/suspend.h> | |
18972 | +#include <linux/freezer.h> | |
18973 | + | |
18974 | +#include "tuxonice_sysfs.h" | |
18975 | +#include "tuxonice_modules.h" | |
18976 | +#include "tuxonice_netlink.h" | |
18977 | +#include "tuxonice_storage.h" | |
18978 | +#include "tuxonice_ui.h" | |
18979 | + | |
18980 | +static struct user_helper_data usm_helper_data; | |
18981 | +static struct toi_module_ops usm_ops; | |
18982 | +static int message_received, usm_prepare_count; | |
18983 | +static int storage_manager_last_action, storage_manager_action; | |
18984 | + | |
18985 | +static int usm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |
18986 | +{ | |
18987 | + int type; | |
18988 | + int *data; | |
18989 | + | |
18990 | + type = nlh->nlmsg_type; | |
18991 | + | |
18992 | + /* A control message: ignore them */ | |
18993 | + if (type < NETLINK_MSG_BASE) | |
18994 | + return 0; | |
18995 | + | |
18996 | + /* Unknown message: reply with EINVAL */ | |
18997 | + if (type >= USM_MSG_MAX) | |
18998 | + return -EINVAL; | |
18999 | + | |
19000 | + /* All operations require privileges, even GET */ | |
19001 | + if (security_netlink_recv(skb, CAP_NET_ADMIN)) | |
19002 | + return -EPERM; | |
19003 | + | |
19004 | + /* Only allow one task to receive NOFREEZE privileges */ | |
19005 | + if (type == NETLINK_MSG_NOFREEZE_ME && usm_helper_data.pid != -1) | |
19006 | + return -EBUSY; | |
19007 | + | |
19008 | + data = (int *) NLMSG_DATA(nlh); | |
19009 | + | |
19010 | + switch (type) { | |
19011 | + case USM_MSG_SUCCESS: | |
19012 | + case USM_MSG_FAILED: | |
19013 | + message_received = type; | |
19014 | + complete(&usm_helper_data.wait_for_process); | |
19015 | + break; | |
19016 | + default: | |
19017 | + printk(KERN_INFO "Storage manager doesn't recognise " | |
19018 | + "message %d.\n", type); | |
19019 | + } | |
19020 | + | |
19021 | + return 1; | |
19022 | +} | |
19023 | + | |
19024 | +#ifdef CONFIG_NET | |
19025 | +static int activations; | |
19026 | + | |
19027 | +int toi_activate_storage(int force) | |
19028 | +{ | |
19029 | + int tries = 1; | |
19030 | + | |
19031 | + if (usm_helper_data.pid == -1 || !usm_ops.enabled) | |
19032 | + return 0; | |
19033 | + | |
19034 | + message_received = 0; | |
19035 | + activations++; | |
19036 | + | |
19037 | + if (activations > 1 && !force) | |
19038 | + return 0; | |
19039 | + | |
19040 | + while ((!message_received || message_received == USM_MSG_FAILED) && | |
19041 | + tries < 2) { | |
19042 | + toi_prepare_status(DONT_CLEAR_BAR, "Activate storage attempt " | |
19043 | + "%d.\n", tries); | |
19044 | + | |
19045 | + init_completion(&usm_helper_data.wait_for_process); | |
19046 | + | |
19047 | + toi_send_netlink_message(&usm_helper_data, | |
19048 | + USM_MSG_CONNECT, | |
19049 | + NULL, 0); | |
19050 | + | |
19051 | + /* Wait 2 seconds for the userspace process to make contact */ | |
19052 | + wait_for_completion_timeout(&usm_helper_data.wait_for_process, | |
19053 | + 2*HZ); | |
19054 | + | |
19055 | + tries++; | |
19056 | + } | |
19057 | + | |
19058 | + return 0; | |
19059 | +} | |
19060 | + | |
19061 | +int toi_deactivate_storage(int force) | |
19062 | +{ | |
19063 | + if (usm_helper_data.pid == -1 || !usm_ops.enabled) | |
19064 | + return 0; | |
19065 | + | |
19066 | + message_received = 0; | |
19067 | + activations--; | |
19068 | + | |
19069 | + if (activations && !force) | |
19070 | + return 0; | |
19071 | + | |
19072 | + init_completion(&usm_helper_data.wait_for_process); | |
19073 | + | |
19074 | + toi_send_netlink_message(&usm_helper_data, | |
19075 | + USM_MSG_DISCONNECT, | |
19076 | + NULL, 0); | |
19077 | + | |
19078 | + wait_for_completion_timeout(&usm_helper_data.wait_for_process, 2*HZ); | |
19079 | + | |
19080 | + if (!message_received || message_received == USM_MSG_FAILED) { | |
19081 | + printk(KERN_INFO "Returning failure disconnecting storage.\n"); | |
19082 | + return 1; | |
19083 | + } | |
19084 | + | |
19085 | + return 0; | |
19086 | +} | |
19087 | +#endif | |
19088 | + | |
19089 | +static void storage_manager_simulate(void) | |
19090 | +{ | |
19091 | + printk(KERN_INFO "--- Storage manager simulate ---\n"); | |
19092 | + toi_prepare_usm(); | |
19093 | + schedule(); | |
19094 | + printk(KERN_INFO "--- Activate storage 1 ---\n"); | |
19095 | + toi_activate_storage(1); | |
19096 | + schedule(); | |
19097 | + printk(KERN_INFO "--- Deactivate storage 1 ---\n"); | |
19098 | + toi_deactivate_storage(1); | |
19099 | + schedule(); | |
19100 | + printk(KERN_INFO "--- Cleanup usm ---\n"); | |
19101 | + toi_cleanup_usm(); | |
19102 | + schedule(); | |
19103 | + printk(KERN_INFO "--- Storage manager simulate ends ---\n"); | |
19104 | +} | |
19105 | + | |
19106 | +static int usm_storage_needed(void) | |
19107 | +{ | |
85eb3c9d | 19108 | + return sizeof(int) + strlen(usm_helper_data.program) + 1; |
2380c486 JR |
19109 | +} |
19110 | + | |
19111 | +static int usm_save_config_info(char *buf) | |
19112 | +{ | |
19113 | + int len = strlen(usm_helper_data.program); | |
85eb3c9d AM |
19114 | + memcpy(buf, usm_helper_data.program, len + 1); |
19115 | + return sizeof(int) + len + 1; | |
2380c486 JR |
19116 | +} |
19117 | + | |
19118 | +static void usm_load_config_info(char *buf, int size) | |
19119 | +{ | |
19120 | + /* Don't load the saved path if one has already been set */ | |
19121 | + if (usm_helper_data.program[0]) | |
19122 | + return; | |
19123 | + | |
85eb3c9d | 19124 | + memcpy(usm_helper_data.program, buf + sizeof(int), *((int *) buf)); |
2380c486 JR |
19125 | +} |
19126 | + | |
19127 | +static int usm_memory_needed(void) | |
19128 | +{ | |
19129 | + /* ball park figure of 32 pages */ | |
19130 | + return 32 * PAGE_SIZE; | |
19131 | +} | |
19132 | + | |
19133 | +/* toi_prepare_usm | |
19134 | + */ | |
19135 | +int toi_prepare_usm(void) | |
19136 | +{ | |
19137 | + usm_prepare_count++; | |
19138 | + | |
19139 | + if (usm_prepare_count > 1 || !usm_ops.enabled) | |
19140 | + return 0; | |
19141 | + | |
19142 | + usm_helper_data.pid = -1; | |
19143 | + | |
19144 | + if (!*usm_helper_data.program) | |
19145 | + return 0; | |
19146 | + | |
19147 | + toi_netlink_setup(&usm_helper_data); | |
19148 | + | |
19149 | + if (usm_helper_data.pid == -1) | |
19150 | + printk(KERN_INFO "TuxOnIce Storage Manager wanted, but couldn't" | |
19151 | + " start it.\n"); | |
19152 | + | |
19153 | + toi_activate_storage(0); | |
19154 | + | |
19155 | + return usm_helper_data.pid != -1; | |
19156 | +} | |
19157 | + | |
19158 | +void toi_cleanup_usm(void) | |
19159 | +{ | |
19160 | + usm_prepare_count--; | |
19161 | + | |
19162 | + if (usm_helper_data.pid > -1 && !usm_prepare_count) { | |
19163 | + toi_deactivate_storage(0); | |
19164 | + toi_netlink_close(&usm_helper_data); | |
19165 | + } | |
19166 | +} | |
19167 | + | |
19168 | +static void storage_manager_activate(void) | |
19169 | +{ | |
19170 | + if (storage_manager_action == storage_manager_last_action) | |
19171 | + return; | |
19172 | + | |
19173 | + if (storage_manager_action) | |
19174 | + toi_prepare_usm(); | |
19175 | + else | |
19176 | + toi_cleanup_usm(); | |
19177 | + | |
19178 | + storage_manager_last_action = storage_manager_action; | |
19179 | +} | |
19180 | + | |
19181 | +/* | |
19182 | + * User interface specific /sys/power/tuxonice entries. | |
19183 | + */ | |
19184 | + | |
19185 | +static struct toi_sysfs_data sysfs_params[] = { | |
19186 | + SYSFS_NONE("simulate_atomic_copy", storage_manager_simulate), | |
19187 | + SYSFS_INT("enabled", SYSFS_RW, &usm_ops.enabled, 0, 1, 0, NULL), | |
19188 | + SYSFS_STRING("program", SYSFS_RW, usm_helper_data.program, 254, 0, | |
19189 | + NULL), | |
19190 | + SYSFS_INT("activate_storage", SYSFS_RW , &storage_manager_action, 0, 1, | |
19191 | + 0, storage_manager_activate) | |
19192 | +}; | |
19193 | + | |
19194 | +static struct toi_module_ops usm_ops = { | |
19195 | + .type = MISC_MODULE, | |
19196 | + .name = "usm", | |
19197 | + .directory = "storage_manager", | |
19198 | + .module = THIS_MODULE, | |
19199 | + .storage_needed = usm_storage_needed, | |
19200 | + .save_config_info = usm_save_config_info, | |
19201 | + .load_config_info = usm_load_config_info, | |
19202 | + .memory_needed = usm_memory_needed, | |
19203 | + | |
19204 | + .sysfs_data = sysfs_params, | |
19205 | + .num_sysfs_entries = sizeof(sysfs_params) / | |
19206 | + sizeof(struct toi_sysfs_data), | |
19207 | +}; | |
19208 | + | |
19209 | +/* toi_usm_sysfs_init | |
19210 | + * Description: Boot time initialisation for user interface. | |
19211 | + */ | |
19212 | +int toi_usm_init(void) | |
19213 | +{ | |
19214 | + usm_helper_data.nl = NULL; | |
19215 | + usm_helper_data.program[0] = '\0'; | |
19216 | + usm_helper_data.pid = -1; | |
19217 | + usm_helper_data.skb_size = 0; | |
19218 | + usm_helper_data.pool_limit = 6; | |
19219 | + usm_helper_data.netlink_id = NETLINK_TOI_USM; | |
19220 | + usm_helper_data.name = "userspace storage manager"; | |
19221 | + usm_helper_data.rcv_msg = usm_user_rcv_msg; | |
19222 | + usm_helper_data.interface_version = 2; | |
19223 | + usm_helper_data.must_init = 0; | |
19224 | + init_completion(&usm_helper_data.wait_for_process); | |
19225 | + | |
19226 | + return toi_register_module(&usm_ops); | |
19227 | +} | |
19228 | + | |
19229 | +void toi_usm_exit(void) | |
19230 | +{ | |
19231 | + toi_netlink_close_complete(&usm_helper_data); | |
19232 | + toi_unregister_module(&usm_ops); | |
19233 | +} | |
19234 | diff --git a/kernel/power/tuxonice_storage.h b/kernel/power/tuxonice_storage.h | |
19235 | new file mode 100644 | |
5dd10c98 | 19236 | index 0000000..8c6b5a7 |
2380c486 JR |
19237 | --- /dev/null |
19238 | +++ b/kernel/power/tuxonice_storage.h | |
19239 | @@ -0,0 +1,45 @@ | |
19240 | +/* | |
19241 | + * kernel/power/tuxonice_storage.h | |
19242 | + * | |
5dd10c98 | 19243 | + * Copyright (C) 2005-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
19244 | + * |
19245 | + * This file is released under the GPLv2. | |
19246 | + */ | |
19247 | + | |
19248 | +#ifdef CONFIG_NET | |
19249 | +int toi_prepare_usm(void); | |
19250 | +void toi_cleanup_usm(void); | |
19251 | + | |
19252 | +int toi_activate_storage(int force); | |
19253 | +int toi_deactivate_storage(int force); | |
19254 | +extern int toi_usm_init(void); | |
19255 | +extern void toi_usm_exit(void); | |
19256 | +#else | |
19257 | +static inline int toi_usm_init(void) { return 0; } | |
19258 | +static inline void toi_usm_exit(void) { } | |
19259 | + | |
19260 | +static inline int toi_activate_storage(int force) | |
19261 | +{ | |
19262 | + return 0; | |
19263 | +} | |
19264 | + | |
19265 | +static inline int toi_deactivate_storage(int force) | |
19266 | +{ | |
19267 | + return 0; | |
19268 | +} | |
19269 | + | |
19270 | +static inline int toi_prepare_usm(void) { return 0; } | |
19271 | +static inline void toi_cleanup_usm(void) { } | |
19272 | +#endif | |
19273 | + | |
19274 | +enum { | |
19275 | + USM_MSG_BASE = 0x10, | |
19276 | + | |
19277 | + /* Kernel -> Userspace */ | |
19278 | + USM_MSG_CONNECT = 0x30, | |
19279 | + USM_MSG_DISCONNECT = 0x31, | |
19280 | + USM_MSG_SUCCESS = 0x40, | |
19281 | + USM_MSG_FAILED = 0x41, | |
19282 | + | |
19283 | + USM_MSG_MAX, | |
19284 | +}; | |
19285 | diff --git a/kernel/power/tuxonice_swap.c b/kernel/power/tuxonice_swap.c | |
19286 | new file mode 100644 | |
cacc47f8 | 19287 | index 0000000..a4dbceb |
2380c486 JR |
19288 | --- /dev/null |
19289 | +++ b/kernel/power/tuxonice_swap.c | |
5dd10c98 | 19290 | @@ -0,0 +1,487 @@ |
2380c486 JR |
19291 | +/* |
19292 | + * kernel/power/tuxonice_swap.c | |
19293 | + * | |
5dd10c98 | 19294 | + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
19295 | + * |
19296 | + * Distributed under GPLv2. | |
19297 | + * | |
19298 | + * This file encapsulates functions for usage of swap space as a | |
19299 | + * backing store. | |
19300 | + */ | |
19301 | + | |
19302 | +#include <linux/suspend.h> | |
2380c486 JR |
19303 | +#include <linux/blkdev.h> |
19304 | +#include <linux/swapops.h> | |
19305 | +#include <linux/swap.h> | |
19306 | +#include <linux/syscalls.h> | |
cacc47f8 | 19307 | +#include <linux/fs_uuid.h> |
2380c486 JR |
19308 | + |
19309 | +#include "tuxonice.h" | |
19310 | +#include "tuxonice_sysfs.h" | |
19311 | +#include "tuxonice_modules.h" | |
19312 | +#include "tuxonice_io.h" | |
19313 | +#include "tuxonice_ui.h" | |
19314 | +#include "tuxonice_extent.h" | |
7e46296a | 19315 | +#include "tuxonice_bio.h" |
2380c486 JR |
19316 | +#include "tuxonice_alloc.h" |
19317 | +#include "tuxonice_builtin.h" | |
19318 | + | |
19319 | +static struct toi_module_ops toi_swapops; | |
19320 | + | |
7e46296a AM |
19321 | +/* For swapfile automatically swapon/off'd. */ |
19322 | +static char swapfilename[255] = ""; | |
19323 | +static int toi_swapon_status; | |
2380c486 | 19324 | + |
7e46296a AM |
19325 | +/* Swap Pages */ |
19326 | +static unsigned long swap_allocated; | |
2380c486 | 19327 | + |
7e46296a | 19328 | +static struct sysinfo swapinfo; |
2380c486 | 19329 | + |
7e46296a AM |
19330 | +/** |
19331 | + * enable_swapfile: Swapon the user specified swapfile prior to hibernating. | |
19332 | + * | |
19333 | + * Activate the given swapfile if it wasn't already enabled. Remember whether | |
19334 | + * we really did swapon it for swapoffing later. | |
2380c486 | 19335 | + */ |
7e46296a | 19336 | +static void enable_swapfile(void) |
2380c486 | 19337 | +{ |
7e46296a | 19338 | + int activateswapresult = -EINVAL; |
2380c486 | 19339 | + |
7e46296a AM |
19340 | + if (swapfilename[0]) { |
19341 | + /* Attempt to swap on with maximum priority */ | |
19342 | + activateswapresult = sys_swapon(swapfilename, 0xFFFF); | |
19343 | + if (activateswapresult && activateswapresult != -EBUSY) | |
19344 | + printk(KERN_ERR "TuxOnIce: The swapfile/partition " | |
19345 | + "specified by /sys/power/tuxonice/swap/swapfile" | |
19346 | + " (%s) could not be turned on (error %d). " | |
19347 | + "Attempting to continue.\n", | |
19348 | + swapfilename, activateswapresult); | |
19349 | + if (!activateswapresult) | |
19350 | + toi_swapon_status = 1; | |
19351 | + } | |
2380c486 JR |
19352 | +} |
19353 | + | |
7e46296a AM |
19354 | +/** |
19355 | + * disable_swapfile: Swapoff any file swaponed at the start of the cycle. | |
19356 | + * | |
19357 | + * If we did successfully swapon a file at the start of the cycle, swapoff | |
19358 | + * it now (finishing up). | |
19359 | + */ | |
19360 | +static void disable_swapfile(void) | |
2380c486 | 19361 | +{ |
7e46296a AM |
19362 | + if (!toi_swapon_status) |
19363 | + return; | |
2380c486 | 19364 | + |
7e46296a AM |
19365 | + sys_swapoff(swapfilename); |
19366 | + toi_swapon_status = 0; | |
2380c486 JR |
19367 | +} |
19368 | + | |
7e46296a AM |
19369 | +static int add_blocks_to_extent_chain(struct toi_bdev_info *chain, |
19370 | + unsigned long start, unsigned long end) | |
2380c486 JR |
19371 | +{ |
19372 | + if (test_action_state(TOI_TEST_BIO)) | |
7e46296a AM |
19373 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Adding extent %lu-%lu to " |
19374 | + "chain %p.", start << chain->bmap_shift, | |
19375 | + end << chain->bmap_shift, chain); | |
2380c486 | 19376 | + |
7e46296a | 19377 | + return toi_add_to_extent_chain(&chain->blocks, start, end); |
2380c486 JR |
19378 | +} |
19379 | + | |
19380 | + | |
7e46296a | 19381 | +static int get_main_pool_phys_params(struct toi_bdev_info *chain) |
2380c486 JR |
19382 | +{ |
19383 | + struct hibernate_extent *extentpointer = NULL; | |
92bca44c | 19384 | + unsigned long address, extent_min = 0, extent_max = 0; |
7e46296a AM |
19385 | + int empty = 1; |
19386 | + | |
19387 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "get main pool phys params for " | |
19388 | + "chain %d.", chain->allocator_index); | |
19389 | + | |
19390 | + if (!chain->allocations.first) | |
19391 | + return 0; | |
2380c486 | 19392 | + |
7e46296a AM |
19393 | + if (chain->blocks.first) |
19394 | + toi_put_extent_chain(&chain->blocks); | |
2380c486 | 19395 | + |
7e46296a | 19396 | + toi_extent_for_each(&chain->allocations, extentpointer, address) { |
2380c486 | 19397 | + swp_entry_t swap_address = (swp_entry_t) { address }; |
5dd10c98 AM |
19398 | + struct block_device *bdev; |
19399 | + sector_t new_sector = map_swap_entry(swap_address, &bdev); | |
2380c486 | 19400 | + |
7e46296a AM |
19401 | + if (empty) { |
19402 | + empty = 0; | |
19403 | + extent_min = extent_max = new_sector; | |
e999739a | 19404 | + continue; |
7e46296a | 19405 | + } |
e999739a | 19406 | + |
7e46296a | 19407 | + if (new_sector == extent_max + 1) { |
2380c486 JR |
19408 | + extent_max++; |
19409 | + continue; | |
19410 | + } | |
19411 | + | |
7e46296a | 19412 | + if (add_blocks_to_extent_chain(chain, extent_min, extent_max)) { |
9474138d AM |
19413 | + printk(KERN_ERR "Out of memory while making block " |
19414 | + "chains.\n"); | |
2380c486 | 19415 | + return -ENOMEM; |
0ada99ac | 19416 | + } |
2380c486 JR |
19417 | + |
19418 | + extent_min = new_sector; | |
19419 | + extent_max = new_sector; | |
2380c486 JR |
19420 | + } |
19421 | + | |
7e46296a AM |
19422 | + if (!empty && |
19423 | + add_blocks_to_extent_chain(chain, extent_min, extent_max)) { | |
9474138d AM |
19424 | + printk(KERN_ERR "Out of memory while making block chains.\n"); |
19425 | + return -ENOMEM; | |
0ada99ac | 19426 | + } |
2380c486 | 19427 | + |
7e46296a | 19428 | + return 0; |
2380c486 JR |
19429 | +} |
19430 | + | |
19431 | +/* | |
e999739a | 19432 | + * Like si_swapinfo, except that we don't include ram backed swap (compcache!) |
19433 | + * and don't need to use the spinlocks (userspace is stopped when this | |
19434 | + * function is called). | |
19435 | + */ | |
5dd10c98 | 19436 | +void si_swapinfo_no_compcache(void) |
e999739a | 19437 | +{ |
19438 | + unsigned int i; | |
19439 | + | |
19440 | + si_swapinfo(&swapinfo); | |
5dd10c98 AM |
19441 | + swapinfo.freeswap = 0; |
19442 | + swapinfo.totalswap = 0; | |
e999739a | 19443 | + |
19444 | + for (i = 0; i < MAX_SWAPFILES; i++) { | |
19445 | + struct swap_info_struct *si = get_swap_info_struct(i); | |
5dd10c98 | 19446 | + if (si && (si->flags & SWP_WRITEOK) && |
e999739a | 19447 | + (strncmp(si->bdev->bd_disk->disk_name, "ram", 3))) { |
5dd10c98 AM |
19448 | + swapinfo.totalswap += si->inuse_pages; |
19449 | + swapinfo.freeswap += si->pages - si->inuse_pages; | |
e999739a | 19450 | + } |
19451 | + } | |
19452 | +} | |
19453 | +/* | |
2380c486 JR |
19454 | + * We can't just remember the value from allocation time, because other |
19455 | + * processes might have allocated swap in the mean time. | |
19456 | + */ | |
92bca44c | 19457 | +static unsigned long toi_swap_storage_available(void) |
2380c486 | 19458 | +{ |
7e46296a | 19459 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "In toi_swap_storage_available."); |
5dd10c98 | 19460 | + si_swapinfo_no_compcache(); |
7e46296a | 19461 | + return swapinfo.freeswap + swap_allocated; |
2380c486 JR |
19462 | +} |
19463 | + | |
19464 | +static int toi_swap_initialise(int starting_cycle) | |
19465 | +{ | |
19466 | + if (!starting_cycle) | |
19467 | + return 0; | |
19468 | + | |
19469 | + enable_swapfile(); | |
7e46296a | 19470 | + return 0; |
2380c486 JR |
19471 | +} |
19472 | + | |
19473 | +static void toi_swap_cleanup(int ending_cycle) | |
19474 | +{ | |
19475 | + if (ending_cycle) | |
19476 | + disable_swapfile(); | |
7e46296a | 19477 | +} |
2380c486 | 19478 | + |
7e46296a AM |
19479 | +static void toi_swap_free_storage(struct toi_bdev_info *chain) |
19480 | +{ | |
19481 | + /* Free swap entries */ | |
19482 | + struct hibernate_extent *extentpointer; | |
19483 | + unsigned long extentvalue; | |
2380c486 | 19484 | + |
7e46296a AM |
19485 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Freeing storage for chain %p.", |
19486 | + chain); | |
2380c486 | 19487 | + |
7e46296a AM |
19488 | + swap_allocated -= chain->allocations.size; |
19489 | + toi_extent_for_each(&chain->allocations, extentpointer, extentvalue) | |
19490 | + swap_free((swp_entry_t) { extentvalue }); | |
2380c486 | 19491 | + |
7e46296a AM |
19492 | + toi_put_extent_chain(&chain->allocations); |
19493 | +} | |
2380c486 | 19494 | + |
7e46296a AM |
19495 | +static void free_swap_range(unsigned long min, unsigned long max) |
19496 | +{ | |
19497 | + int j; | |
2380c486 | 19498 | + |
7e46296a AM |
19499 | + for (j = min; j <= max; j++) |
19500 | + swap_free((swp_entry_t) { j }); | |
19501 | + swap_allocated -= (max - min + 1); | |
19502 | +} | |
2380c486 | 19503 | + |
7e46296a AM |
19504 | +/* |
19505 | + * Allocation of a single swap type. Swap priorities are handled at the higher | |
19506 | + * level. | |
19507 | + */ | |
19508 | +static int toi_swap_allocate_storage(struct toi_bdev_info *chain, | |
19509 | + unsigned long request) | |
19510 | +{ | |
19511 | + int to_add = 0; | |
19512 | + unsigned long gotten = 0; | |
19513 | + unsigned long extent_min = 0, extent_max = 0; | |
2380c486 | 19514 | + |
7e46296a AM |
19515 | + toi_message(TOI_IO, TOI_VERBOSE, 0, " Swap allocate storage: Asked to" |
19516 | + " allocate %lu pages from device %d.", request, | |
19517 | + chain->allocator_index); | |
2380c486 | 19518 | + |
7e46296a AM |
19519 | + while (gotten < request) { |
19520 | + swp_entry_t entry; | |
19521 | + unsigned long new_value; | |
2380c486 | 19522 | + |
7e46296a AM |
19523 | + entry = get_swap_page_of_type(chain->allocator_index); |
19524 | + if (!entry.val) | |
19525 | + break; | |
2380c486 | 19526 | + |
7e46296a AM |
19527 | + swap_allocated++; |
19528 | + new_value = entry.val; | |
19529 | + gotten++; | |
2380c486 | 19530 | + |
7e46296a AM |
19531 | + if (!to_add) { |
19532 | + to_add = 1; | |
19533 | + extent_min = new_value; | |
19534 | + extent_max = new_value; | |
2380c486 JR |
19535 | + continue; |
19536 | + } | |
19537 | + | |
7e46296a AM |
19538 | + if (new_value == extent_max + 1) { |
19539 | + extent_max++; | |
2380c486 JR |
19540 | + continue; |
19541 | + } | |
19542 | + | |
7e46296a AM |
19543 | + if (toi_add_to_extent_chain(&chain->allocations, extent_min, |
19544 | + extent_max)) { | |
19545 | + printk(KERN_INFO "Failed to allocate extent for " | |
19546 | + "%lu-%lu.\n", extent_min, extent_max); | |
19547 | + free_swap_range(extent_min, extent_max); | |
19548 | + swap_free(entry); | |
19549 | + gotten -= (extent_max - extent_min); | |
19550 | + /* Don't try to add again below */ | |
19551 | + to_add = 0; | |
19552 | + break; | |
19553 | + } | |
19554 | + | |
19555 | + extent_min = new_value; | |
19556 | + extent_max = new_value; | |
2380c486 JR |
19557 | + } |
19558 | + | |
7e46296a AM |
19559 | + if (to_add) { |
19560 | + int this_result = toi_add_to_extent_chain(&chain->allocations, | |
19561 | + extent_min, extent_max); | |
2380c486 | 19562 | + |
7e46296a AM |
19563 | + if (this_result) { |
19564 | + free_swap_range(extent_min, extent_max); | |
19565 | + gotten -= (extent_max - extent_min + 1); | |
19566 | + } | |
19567 | + } | |
2380c486 | 19568 | + |
7e46296a AM |
19569 | + toi_message(TOI_IO, TOI_VERBOSE, 0, " Allocated %lu pages.", gotten); |
19570 | + return gotten; | |
2380c486 JR |
19571 | +} |
19572 | + | |
7e46296a | 19573 | +static int toi_swap_register_storage(void) |
2380c486 | 19574 | +{ |
5dd10c98 | 19575 | + int i, result = 0; |
7e46296a AM |
19576 | + |
19577 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_swap_register_storage."); | |
19578 | + for (i = 0; i < MAX_SWAPFILES; i++) { | |
19579 | + struct swap_info_struct *si = get_swap_info_struct(i); | |
19580 | + struct toi_bdev_info *devinfo; | |
5dd10c98 AM |
19581 | + unsigned char *p; |
19582 | + unsigned char buf[256]; | |
19583 | + struct fs_info *fs_info; | |
19584 | + | |
19585 | + if (!si || !(si->flags & SWP_WRITEOK) || | |
7e46296a AM |
19586 | + !strncmp(si->bdev->bd_disk->disk_name, "ram", 3)) |
19587 | + continue; | |
19588 | + | |
19589 | + devinfo = toi_kzalloc(39, sizeof(struct toi_bdev_info), | |
19590 | + GFP_ATOMIC); | |
19591 | + if (!devinfo) { | |
19592 | + printk("Failed to allocate devinfo struct for swap " | |
19593 | + "device %d.\n", i); | |
19594 | + return -ENOMEM; | |
19595 | + } | |
19596 | + | |
19597 | + devinfo->bdev = si->bdev; | |
19598 | + devinfo->allocator = &toi_swapops; | |
19599 | + devinfo->allocator_index = i; | |
19600 | + | |
5dd10c98 AM |
19601 | + fs_info = fs_info_from_block_dev(si->bdev); |
19602 | + if (fs_info && !IS_ERR(fs_info)) { | |
19603 | + memcpy(devinfo->uuid, &fs_info->uuid, 16); | |
19604 | + free_fs_info(fs_info); | |
19605 | + } else | |
19606 | + result = (int) PTR_ERR(fs_info); | |
19607 | + | |
19608 | + if (!fs_info) | |
19609 | + printk("fs_info from block dev returned %d.\n", result); | |
7e46296a AM |
19610 | + devinfo->dev_t = si->bdev->bd_dev; |
19611 | + devinfo->prio = si->prio; | |
19612 | + devinfo->bmap_shift = 3; | |
19613 | + devinfo->blocks_per_page = 1; | |
19614 | + | |
5dd10c98 AM |
19615 | + p = d_path(&si->swap_file->f_path, buf, sizeof(buf)); |
19616 | + sprintf(devinfo->name, "swap on %s", p); | |
19617 | + | |
7e46296a AM |
19618 | + toi_message(TOI_IO, TOI_VERBOSE, 0, "Registering swap storage:" |
19619 | + " Device %d (%lx), prio %d.", i, | |
19620 | + (unsigned long) devinfo->dev_t, devinfo->prio); | |
19621 | + toi_bio_ops.register_storage(devinfo); | |
19622 | + } | |
19623 | + | |
2380c486 JR |
19624 | + return 0; |
19625 | +} | |
19626 | + | |
19627 | +/* | |
19628 | + * workspace_size | |
19629 | + * | |
19630 | + * Description: | |
19631 | + * Returns the number of bytes of RAM needed for this | |
19632 | + * code to do its work. (Used when calculating whether | |
19633 | + * we have enough memory to be able to hibernate & resume). | |
19634 | + * | |
19635 | + */ | |
19636 | +static int toi_swap_memory_needed(void) | |
19637 | +{ | |
19638 | + return 1; | |
19639 | +} | |
19640 | + | |
19641 | +/* | |
19642 | + * Print debug info | |
19643 | + * | |
19644 | + * Description: | |
19645 | + */ | |
19646 | +static int toi_swap_print_debug_stats(char *buffer, int size) | |
19647 | +{ | |
19648 | + int len = 0; | |
2380c486 | 19649 | + |
7e46296a | 19650 | + len = scnprintf(buffer, size, "- Swap Allocator enabled.\n"); |
2380c486 JR |
19651 | + if (swapfilename[0]) |
19652 | + len += scnprintf(buffer+len, size-len, | |
19653 | + " Attempting to automatically swapon: %s.\n", | |
19654 | + swapfilename); | |
19655 | + | |
5dd10c98 | 19656 | + si_swapinfo_no_compcache(); |
2380c486 JR |
19657 | + |
19658 | + len += scnprintf(buffer+len, size-len, | |
92bca44c | 19659 | + " Swap available for image: %lu pages.\n", |
5dd10c98 | 19660 | + swapinfo.freeswap + swap_allocated); |
2380c486 JR |
19661 | + |
19662 | + return len; | |
19663 | +} | |
19664 | + | |
2380c486 JR |
19665 | +static int header_locations_read_sysfs(const char *page, int count) |
19666 | +{ | |
19667 | + int i, printedpartitionsmessage = 0, len = 0, haveswap = 0; | |
19668 | + struct inode *swapf = NULL; | |
19669 | + int zone; | |
19670 | + char *path_page = (char *) toi_get_free_page(10, GFP_KERNEL); | |
19671 | + char *path, *output = (char *) page; | |
19672 | + int path_len; | |
19673 | + | |
19674 | + if (!page) | |
19675 | + return 0; | |
19676 | + | |
19677 | + for (i = 0; i < MAX_SWAPFILES; i++) { | |
19678 | + struct swap_info_struct *si = get_swap_info_struct(i); | |
19679 | + | |
5dd10c98 | 19680 | + if (!si || !(si->flags & SWP_WRITEOK)) |
2380c486 JR |
19681 | + continue; |
19682 | + | |
19683 | + if (S_ISBLK(si->swap_file->f_mapping->host->i_mode)) { | |
19684 | + haveswap = 1; | |
19685 | + if (!printedpartitionsmessage) { | |
19686 | + len += sprintf(output + len, | |
19687 | + "For swap partitions, simply use the " | |
19688 | + "format: resume=swap:/dev/hda1.\n"); | |
19689 | + printedpartitionsmessage = 1; | |
19690 | + } | |
19691 | + } else { | |
19692 | + path_len = 0; | |
19693 | + | |
19694 | + path = d_path(&si->swap_file->f_path, path_page, | |
19695 | + PAGE_SIZE); | |
5dd10c98 | 19696 | + path_len = snprintf(path_page, PAGE_SIZE, "%s", path); |
2380c486 JR |
19697 | + |
19698 | + haveswap = 1; | |
19699 | + swapf = si->swap_file->f_mapping->host; | |
19700 | + zone = bmap(swapf, 0); | |
19701 | + if (!zone) { | |
19702 | + len += sprintf(output + len, | |
19703 | + "Swapfile %s has been corrupted. Reuse" | |
19704 | + " mkswap on it and try again.\n", | |
19705 | + path_page); | |
19706 | + } else { | |
5dd10c98 | 19707 | + char name_buffer[BDEVNAME_SIZE]; |
2380c486 JR |
19708 | + len += sprintf(output + len, |
19709 | + "For swapfile `%s`," | |
19710 | + " use resume=swap:/dev/%s:0x%x.\n", | |
19711 | + path_page, | |
19712 | + bdevname(si->bdev, name_buffer), | |
19713 | + zone << (swapf->i_blkbits - 9)); | |
19714 | + } | |
19715 | + } | |
19716 | + } | |
19717 | + | |
19718 | + if (!haveswap) | |
19719 | + len = sprintf(output, "You need to turn on swap partitions " | |
19720 | + "before examining this file.\n"); | |
19721 | + | |
19722 | + toi_free_page(10, (unsigned long) path_page); | |
19723 | + return len; | |
19724 | +} | |
19725 | + | |
19726 | +static struct toi_sysfs_data sysfs_params[] = { | |
19727 | + SYSFS_STRING("swapfilename", SYSFS_RW, swapfilename, 255, 0, NULL), | |
19728 | + SYSFS_CUSTOM("headerlocations", SYSFS_READONLY, | |
19729 | + header_locations_read_sysfs, NULL, 0, NULL), | |
19730 | + SYSFS_INT("enabled", SYSFS_RW, &toi_swapops.enabled, 0, 1, 0, | |
19731 | + attempt_to_parse_resume_device2), | |
19732 | +}; | |
19733 | + | |
7e46296a AM |
19734 | +static struct toi_bio_allocator_ops toi_bio_swapops = { |
19735 | + .register_storage = toi_swap_register_storage, | |
19736 | + .storage_available = toi_swap_storage_available, | |
19737 | + .allocate_storage = toi_swap_allocate_storage, | |
19738 | + .bmap = get_main_pool_phys_params, | |
19739 | + .free_storage = toi_swap_free_storage, | |
19740 | +}; | |
19741 | + | |
2380c486 | 19742 | +static struct toi_module_ops toi_swapops = { |
7e46296a | 19743 | + .type = BIO_ALLOCATOR_MODULE, |
2380c486 JR |
19744 | + .name = "swap storage", |
19745 | + .directory = "swap", | |
19746 | + .module = THIS_MODULE, | |
19747 | + .memory_needed = toi_swap_memory_needed, | |
19748 | + .print_debug_info = toi_swap_print_debug_stats, | |
2380c486 JR |
19749 | + .initialise = toi_swap_initialise, |
19750 | + .cleanup = toi_swap_cleanup, | |
7e46296a | 19751 | + .bio_allocator_ops = &toi_bio_swapops, |
2380c486 JR |
19752 | + |
19753 | + .sysfs_data = sysfs_params, | |
19754 | + .num_sysfs_entries = sizeof(sysfs_params) / | |
19755 | + sizeof(struct toi_sysfs_data), | |
19756 | +}; | |
19757 | + | |
19758 | +/* ---- Registration ---- */ | |
19759 | +static __init int toi_swap_load(void) | |
19760 | +{ | |
2380c486 JR |
19761 | + return toi_register_module(&toi_swapops); |
19762 | +} | |
19763 | + | |
19764 | +#ifdef MODULE | |
19765 | +static __exit void toi_swap_unload(void) | |
19766 | +{ | |
19767 | + toi_unregister_module(&toi_swapops); | |
19768 | +} | |
19769 | + | |
19770 | +module_init(toi_swap_load); | |
19771 | +module_exit(toi_swap_unload); | |
19772 | +MODULE_LICENSE("GPL"); | |
19773 | +MODULE_AUTHOR("Nigel Cunningham"); | |
19774 | +MODULE_DESCRIPTION("TuxOnIce SwapAllocator"); | |
19775 | +#else | |
19776 | +late_initcall(toi_swap_load); | |
19777 | +#endif | |
19778 | diff --git a/kernel/power/tuxonice_sysfs.c b/kernel/power/tuxonice_sysfs.c | |
19779 | new file mode 100644 | |
5dd10c98 | 19780 | index 0000000..0088409 |
2380c486 JR |
19781 | --- /dev/null |
19782 | +++ b/kernel/power/tuxonice_sysfs.c | |
9474138d | 19783 | @@ -0,0 +1,335 @@ |
2380c486 JR |
19784 | +/* |
19785 | + * kernel/power/tuxonice_sysfs.c | |
19786 | + * | |
5dd10c98 | 19787 | + * Copyright (C) 2002-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
19788 | + * |
19789 | + * This file is released under the GPLv2. | |
19790 | + * | |
19791 | + * This file contains support for sysfs entries for tuning TuxOnIce. | |
19792 | + * | |
19793 | + * We have a generic handler that deals with the most common cases, and | |
19794 | + * hooks for special handlers to use. | |
19795 | + */ | |
19796 | + | |
19797 | +#include <linux/suspend.h> | |
2380c486 JR |
19798 | + |
19799 | +#include "tuxonice_sysfs.h" | |
19800 | +#include "tuxonice.h" | |
19801 | +#include "tuxonice_storage.h" | |
19802 | +#include "tuxonice_alloc.h" | |
19803 | + | |
19804 | +static int toi_sysfs_initialised; | |
19805 | + | |
19806 | +static void toi_initialise_sysfs(void); | |
19807 | + | |
19808 | +static struct toi_sysfs_data sysfs_params[]; | |
19809 | + | |
19810 | +#define to_sysfs_data(_attr) container_of(_attr, struct toi_sysfs_data, attr) | |
19811 | + | |
19812 | +static void toi_main_wrapper(void) | |
19813 | +{ | |
9474138d | 19814 | + toi_try_hibernate(); |
2380c486 JR |
19815 | +} |
19816 | + | |
19817 | +static ssize_t toi_attr_show(struct kobject *kobj, struct attribute *attr, | |
19818 | + char *page) | |
19819 | +{ | |
19820 | + struct toi_sysfs_data *sysfs_data = to_sysfs_data(attr); | |
19821 | + int len = 0; | |
19822 | + int full_prep = sysfs_data->flags & SYSFS_NEEDS_SM_FOR_READ; | |
19823 | + | |
19824 | + if (full_prep && toi_start_anything(0)) | |
19825 | + return -EBUSY; | |
19826 | + | |
19827 | + if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_READ) | |
19828 | + toi_prepare_usm(); | |
19829 | + | |
19830 | + switch (sysfs_data->type) { | |
19831 | + case TOI_SYSFS_DATA_CUSTOM: | |
19832 | + len = (sysfs_data->data.special.read_sysfs) ? | |
19833 | + (sysfs_data->data.special.read_sysfs)(page, PAGE_SIZE) | |
19834 | + : 0; | |
19835 | + break; | |
19836 | + case TOI_SYSFS_DATA_BIT: | |
19837 | + len = sprintf(page, "%d\n", | |
19838 | + -test_bit(sysfs_data->data.bit.bit, | |
19839 | + sysfs_data->data.bit.bit_vector)); | |
19840 | + break; | |
19841 | + case TOI_SYSFS_DATA_INTEGER: | |
19842 | + len = sprintf(page, "%d\n", | |
19843 | + *(sysfs_data->data.integer.variable)); | |
19844 | + break; | |
19845 | + case TOI_SYSFS_DATA_LONG: | |
19846 | + len = sprintf(page, "%ld\n", | |
19847 | + *(sysfs_data->data.a_long.variable)); | |
19848 | + break; | |
19849 | + case TOI_SYSFS_DATA_UL: | |
19850 | + len = sprintf(page, "%lu\n", | |
19851 | + *(sysfs_data->data.ul.variable)); | |
19852 | + break; | |
19853 | + case TOI_SYSFS_DATA_STRING: | |
19854 | + len = sprintf(page, "%s\n", | |
19855 | + sysfs_data->data.string.variable); | |
19856 | + break; | |
19857 | + } | |
19858 | + | |
19859 | + if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_READ) | |
19860 | + toi_cleanup_usm(); | |
19861 | + | |
19862 | + if (full_prep) | |
19863 | + toi_finish_anything(0); | |
19864 | + | |
19865 | + return len; | |
19866 | +} | |
19867 | + | |
19868 | +#define BOUND(_variable, _type) do { \ | |
19869 | + if (*_variable < sysfs_data->data._type.minimum) \ | |
19870 | + *_variable = sysfs_data->data._type.minimum; \ | |
19871 | + else if (*_variable > sysfs_data->data._type.maximum) \ | |
19872 | + *_variable = sysfs_data->data._type.maximum; \ | |
19873 | +} while (0) | |
19874 | + | |
19875 | +static ssize_t toi_attr_store(struct kobject *kobj, struct attribute *attr, | |
19876 | + const char *my_buf, size_t count) | |
19877 | +{ | |
19878 | + int assigned_temp_buffer = 0, result = count; | |
19879 | + struct toi_sysfs_data *sysfs_data = to_sysfs_data(attr); | |
19880 | + | |
19881 | + if (toi_start_anything((sysfs_data->flags & SYSFS_HIBERNATE_OR_RESUME))) | |
19882 | + return -EBUSY; | |
19883 | + | |
19884 | + ((char *) my_buf)[count] = 0; | |
19885 | + | |
19886 | + if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_WRITE) | |
19887 | + toi_prepare_usm(); | |
19888 | + | |
19889 | + switch (sysfs_data->type) { | |
19890 | + case TOI_SYSFS_DATA_CUSTOM: | |
19891 | + if (sysfs_data->data.special.write_sysfs) | |
19892 | + result = (sysfs_data->data.special.write_sysfs)(my_buf, | |
19893 | + count); | |
19894 | + break; | |
19895 | + case TOI_SYSFS_DATA_BIT: | |
19896 | + { | |
9474138d AM |
19897 | + unsigned long value; |
19898 | + result = strict_strtoul(my_buf, 0, &value); | |
19899 | + if (result) | |
19900 | + break; | |
2380c486 JR |
19901 | + if (value) |
19902 | + set_bit(sysfs_data->data.bit.bit, | |
19903 | + (sysfs_data->data.bit.bit_vector)); | |
19904 | + else | |
19905 | + clear_bit(sysfs_data->data.bit.bit, | |
19906 | + (sysfs_data->data.bit.bit_vector)); | |
19907 | + } | |
19908 | + break; | |
19909 | + case TOI_SYSFS_DATA_INTEGER: | |
19910 | + { | |
9474138d AM |
19911 | + long temp; |
19912 | + result = strict_strtol(my_buf, 0, &temp); | |
19913 | + if (result) | |
19914 | + break; | |
19915 | + *(sysfs_data->data.integer.variable) = (int) temp; | |
19916 | + BOUND(sysfs_data->data.integer.variable, integer); | |
2380c486 JR |
19917 | + break; |
19918 | + } | |
19919 | + case TOI_SYSFS_DATA_LONG: | |
19920 | + { | |
19921 | + long *variable = | |
19922 | + sysfs_data->data.a_long.variable; | |
9474138d AM |
19923 | + result = strict_strtol(my_buf, 0, variable); |
19924 | + if (result) | |
19925 | + break; | |
2380c486 JR |
19926 | + BOUND(variable, a_long); |
19927 | + break; | |
19928 | + } | |
19929 | + case TOI_SYSFS_DATA_UL: | |
19930 | + { | |
19931 | + unsigned long *variable = | |
19932 | + sysfs_data->data.ul.variable; | |
9474138d AM |
19933 | + result = strict_strtoul(my_buf, 0, variable); |
19934 | + if (result) | |
19935 | + break; | |
2380c486 JR |
19936 | + BOUND(variable, ul); |
19937 | + break; | |
19938 | + } | |
19939 | + break; | |
19940 | + case TOI_SYSFS_DATA_STRING: | |
19941 | + { | |
19942 | + int copy_len = count; | |
19943 | + char *variable = | |
19944 | + sysfs_data->data.string.variable; | |
19945 | + | |
19946 | + if (sysfs_data->data.string.max_length && | |
19947 | + (copy_len > sysfs_data->data.string.max_length)) | |
19948 | + copy_len = sysfs_data->data.string.max_length; | |
19949 | + | |
19950 | + if (!variable) { | |
19951 | + variable = (char *) toi_get_zeroed_page(31, | |
19952 | + TOI_ATOMIC_GFP); | |
19953 | + sysfs_data->data.string.variable = variable; | |
19954 | + assigned_temp_buffer = 1; | |
19955 | + } | |
19956 | + strncpy(variable, my_buf, copy_len); | |
19957 | + if (copy_len && my_buf[copy_len - 1] == '\n') | |
19958 | + variable[count - 1] = 0; | |
19959 | + variable[count] = 0; | |
19960 | + } | |
19961 | + break; | |
19962 | + } | |
19963 | + | |
9474138d AM |
19964 | + if (!result) |
19965 | + result = count; | |
19966 | + | |
2380c486 | 19967 | + /* Side effect routine? */ |
9474138d | 19968 | + if (result == count && sysfs_data->write_side_effect) |
2380c486 JR |
19969 | + sysfs_data->write_side_effect(); |
19970 | + | |
19971 | + /* Free temporary buffers */ | |
19972 | + if (assigned_temp_buffer) { | |
19973 | + toi_free_page(31, | |
19974 | + (unsigned long) sysfs_data->data.string.variable); | |
19975 | + sysfs_data->data.string.variable = NULL; | |
19976 | + } | |
19977 | + | |
19978 | + if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_WRITE) | |
19979 | + toi_cleanup_usm(); | |
19980 | + | |
19981 | + toi_finish_anything(sysfs_data->flags & SYSFS_HIBERNATE_OR_RESUME); | |
19982 | + | |
19983 | + return result; | |
19984 | +} | |
19985 | + | |
19986 | +static struct sysfs_ops toi_sysfs_ops = { | |
19987 | + .show = &toi_attr_show, | |
19988 | + .store = &toi_attr_store, | |
19989 | +}; | |
19990 | + | |
19991 | +static struct kobj_type toi_ktype = { | |
19992 | + .sysfs_ops = &toi_sysfs_ops, | |
19993 | +}; | |
19994 | + | |
19995 | +struct kobject *tuxonice_kobj; | |
19996 | + | |
19997 | +/* Non-module sysfs entries. | |
19998 | + * | |
19999 | + * This array contains entries that are automatically registered at | |
20000 | + * boot. Modules and the console code register their own entries separately. | |
20001 | + */ | |
20002 | + | |
20003 | +static struct toi_sysfs_data sysfs_params[] = { | |
20004 | + SYSFS_CUSTOM("do_hibernate", SYSFS_WRITEONLY, NULL, NULL, | |
20005 | + SYSFS_HIBERNATING, toi_main_wrapper), | |
20006 | + SYSFS_CUSTOM("do_resume", SYSFS_WRITEONLY, NULL, NULL, | |
9474138d | 20007 | + SYSFS_RESUMING, toi_try_resume) |
2380c486 JR |
20008 | +}; |
20009 | + | |
20010 | +void remove_toi_sysdir(struct kobject *kobj) | |
20011 | +{ | |
20012 | + if (!kobj) | |
20013 | + return; | |
20014 | + | |
20015 | + kobject_put(kobj); | |
20016 | +} | |
20017 | + | |
20018 | +struct kobject *make_toi_sysdir(char *name) | |
20019 | +{ | |
20020 | + struct kobject *kobj = kobject_create_and_add(name, tuxonice_kobj); | |
20021 | + | |
20022 | + if (!kobj) { | |
20023 | + printk(KERN_INFO "TuxOnIce: Can't allocate kobject for sysfs " | |
20024 | + "dir!\n"); | |
20025 | + return NULL; | |
20026 | + } | |
20027 | + | |
20028 | + kobj->ktype = &toi_ktype; | |
20029 | + | |
20030 | + return kobj; | |
20031 | +} | |
20032 | + | |
20033 | +/* toi_register_sysfs_file | |
20034 | + * | |
20035 | + * Helper for registering a new /sysfs/tuxonice entry. | |
20036 | + */ | |
20037 | + | |
20038 | +int toi_register_sysfs_file( | |
20039 | + struct kobject *kobj, | |
20040 | + struct toi_sysfs_data *toi_sysfs_data) | |
20041 | +{ | |
20042 | + int result; | |
20043 | + | |
20044 | + if (!toi_sysfs_initialised) | |
20045 | + toi_initialise_sysfs(); | |
20046 | + | |
20047 | + result = sysfs_create_file(kobj, &toi_sysfs_data->attr); | |
20048 | + if (result) | |
20049 | + printk(KERN_INFO "TuxOnIce: sysfs_create_file for %s " | |
20050 | + "returned %d.\n", | |
20051 | + toi_sysfs_data->attr.name, result); | |
20052 | + kobj->ktype = &toi_ktype; | |
20053 | + | |
20054 | + return result; | |
20055 | +} | |
20056 | +EXPORT_SYMBOL_GPL(toi_register_sysfs_file); | |
20057 | + | |
20058 | +/* toi_unregister_sysfs_file | |
20059 | + * | |
20060 | + * Helper for removing unwanted /sys/power/tuxonice entries. | |
20061 | + * | |
20062 | + */ | |
20063 | +void toi_unregister_sysfs_file(struct kobject *kobj, | |
20064 | + struct toi_sysfs_data *toi_sysfs_data) | |
20065 | +{ | |
20066 | + sysfs_remove_file(kobj, &toi_sysfs_data->attr); | |
20067 | +} | |
20068 | +EXPORT_SYMBOL_GPL(toi_unregister_sysfs_file); | |
20069 | + | |
20070 | +void toi_cleanup_sysfs(void) | |
20071 | +{ | |
20072 | + int i, | |
20073 | + numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data); | |
20074 | + | |
20075 | + if (!toi_sysfs_initialised) | |
20076 | + return; | |
20077 | + | |
20078 | + for (i = 0; i < numfiles; i++) | |
20079 | + toi_unregister_sysfs_file(tuxonice_kobj, &sysfs_params[i]); | |
20080 | + | |
20081 | + kobject_put(tuxonice_kobj); | |
20082 | + toi_sysfs_initialised = 0; | |
20083 | +} | |
20084 | + | |
20085 | +/* toi_initialise_sysfs | |
20086 | + * | |
20087 | + * Initialise the /sysfs/tuxonice directory. | |
20088 | + */ | |
20089 | + | |
20090 | +static void toi_initialise_sysfs(void) | |
20091 | +{ | |
20092 | + int i; | |
20093 | + int numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data); | |
20094 | + | |
20095 | + if (toi_sysfs_initialised) | |
20096 | + return; | |
20097 | + | |
20098 | + /* Make our TuxOnIce directory a child of /sys/power */ | |
20099 | + tuxonice_kobj = kobject_create_and_add("tuxonice", power_kobj); | |
20100 | + if (!tuxonice_kobj) | |
20101 | + return; | |
20102 | + | |
20103 | + toi_sysfs_initialised = 1; | |
20104 | + | |
20105 | + for (i = 0; i < numfiles; i++) | |
20106 | + toi_register_sysfs_file(tuxonice_kobj, &sysfs_params[i]); | |
20107 | +} | |
20108 | + | |
20109 | +int toi_sysfs_init(void) | |
20110 | +{ | |
20111 | + toi_initialise_sysfs(); | |
20112 | + return 0; | |
20113 | +} | |
20114 | + | |
20115 | +void toi_sysfs_exit(void) | |
20116 | +{ | |
20117 | + toi_cleanup_sysfs(); | |
20118 | +} | |
20119 | diff --git a/kernel/power/tuxonice_sysfs.h b/kernel/power/tuxonice_sysfs.h | |
20120 | new file mode 100644 | |
5dd10c98 | 20121 | index 0000000..4185c6d |
2380c486 JR |
20122 | --- /dev/null |
20123 | +++ b/kernel/power/tuxonice_sysfs.h | |
9474138d | 20124 | @@ -0,0 +1,137 @@ |
2380c486 JR |
20125 | +/* |
20126 | + * kernel/power/tuxonice_sysfs.h | |
20127 | + * | |
5dd10c98 | 20128 | + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
20129 | + * |
20130 | + * This file is released under the GPLv2. | |
20131 | + */ | |
20132 | + | |
20133 | +#include <linux/sysfs.h> | |
2380c486 JR |
20134 | + |
20135 | +struct toi_sysfs_data { | |
20136 | + struct attribute attr; | |
20137 | + int type; | |
20138 | + int flags; | |
20139 | + union { | |
20140 | + struct { | |
20141 | + unsigned long *bit_vector; | |
20142 | + int bit; | |
20143 | + } bit; | |
20144 | + struct { | |
20145 | + int *variable; | |
20146 | + int minimum; | |
20147 | + int maximum; | |
20148 | + } integer; | |
20149 | + struct { | |
20150 | + long *variable; | |
20151 | + long minimum; | |
20152 | + long maximum; | |
20153 | + } a_long; | |
20154 | + struct { | |
20155 | + unsigned long *variable; | |
20156 | + unsigned long minimum; | |
20157 | + unsigned long maximum; | |
20158 | + } ul; | |
20159 | + struct { | |
20160 | + char *variable; | |
20161 | + int max_length; | |
20162 | + } string; | |
20163 | + struct { | |
20164 | + int (*read_sysfs) (const char *buffer, int count); | |
20165 | + int (*write_sysfs) (const char *buffer, int count); | |
20166 | + void *data; | |
20167 | + } special; | |
20168 | + } data; | |
20169 | + | |
20170 | + /* Side effects routine. Used, eg, for reparsing the | |
20171 | + * resume= entry when it changes */ | |
20172 | + void (*write_side_effect) (void); | |
20173 | + struct list_head sysfs_data_list; | |
20174 | +}; | |
20175 | + | |
20176 | +enum { | |
20177 | + TOI_SYSFS_DATA_NONE = 1, | |
20178 | + TOI_SYSFS_DATA_CUSTOM, | |
20179 | + TOI_SYSFS_DATA_BIT, | |
20180 | + TOI_SYSFS_DATA_INTEGER, | |
20181 | + TOI_SYSFS_DATA_UL, | |
20182 | + TOI_SYSFS_DATA_LONG, | |
20183 | + TOI_SYSFS_DATA_STRING | |
20184 | +}; | |
20185 | + | |
20186 | +#define SYSFS_WRITEONLY 0200 | |
20187 | +#define SYSFS_READONLY 0444 | |
20188 | +#define SYSFS_RW 0644 | |
20189 | + | |
20190 | +#define SYSFS_BIT(_name, _mode, _ul, _bit, _flags) { \ | |
20191 | + .attr = {.name = _name , .mode = _mode }, \ | |
20192 | + .type = TOI_SYSFS_DATA_BIT, \ | |
20193 | + .flags = _flags, \ | |
20194 | + .data = { .bit = { .bit_vector = _ul, .bit = _bit } } } | |
20195 | + | |
20196 | +#define SYSFS_INT(_name, _mode, _int, _min, _max, _flags, _wse) { \ | |
20197 | + .attr = {.name = _name , .mode = _mode }, \ | |
20198 | + .type = TOI_SYSFS_DATA_INTEGER, \ | |
20199 | + .flags = _flags, \ | |
20200 | + .data = { .integer = { .variable = _int, .minimum = _min, \ | |
20201 | + .maximum = _max } }, \ | |
20202 | + .write_side_effect = _wse } | |
20203 | + | |
20204 | +#define SYSFS_UL(_name, _mode, _ul, _min, _max, _flags) { \ | |
20205 | + .attr = {.name = _name , .mode = _mode }, \ | |
20206 | + .type = TOI_SYSFS_DATA_UL, \ | |
20207 | + .flags = _flags, \ | |
20208 | + .data = { .ul = { .variable = _ul, .minimum = _min, \ | |
20209 | + .maximum = _max } } } | |
20210 | + | |
20211 | +#define SYSFS_LONG(_name, _mode, _long, _min, _max, _flags) { \ | |
20212 | + .attr = {.name = _name , .mode = _mode }, \ | |
20213 | + .type = TOI_SYSFS_DATA_LONG, \ | |
20214 | + .flags = _flags, \ | |
20215 | + .data = { .a_long = { .variable = _long, .minimum = _min, \ | |
20216 | + .maximum = _max } } } | |
20217 | + | |
20218 | +#define SYSFS_STRING(_name, _mode, _string, _max_len, _flags, _wse) { \ | |
20219 | + .attr = {.name = _name , .mode = _mode }, \ | |
20220 | + .type = TOI_SYSFS_DATA_STRING, \ | |
20221 | + .flags = _flags, \ | |
20222 | + .data = { .string = { .variable = _string, .max_length = _max_len } }, \ | |
20223 | + .write_side_effect = _wse } | |
20224 | + | |
20225 | +#define SYSFS_CUSTOM(_name, _mode, _read, _write, _flags, _wse) { \ | |
20226 | + .attr = {.name = _name , .mode = _mode }, \ | |
20227 | + .type = TOI_SYSFS_DATA_CUSTOM, \ | |
20228 | + .flags = _flags, \ | |
20229 | + .data = { .special = { .read_sysfs = _read, .write_sysfs = _write } }, \ | |
20230 | + .write_side_effect = _wse } | |
20231 | + | |
20232 | +#define SYSFS_NONE(_name, _wse) { \ | |
20233 | + .attr = {.name = _name , .mode = SYSFS_WRITEONLY }, \ | |
20234 | + .type = TOI_SYSFS_DATA_NONE, \ | |
20235 | + .write_side_effect = _wse, \ | |
20236 | +} | |
20237 | + | |
20238 | +/* Flags */ | |
20239 | +#define SYSFS_NEEDS_SM_FOR_READ 1 | |
20240 | +#define SYSFS_NEEDS_SM_FOR_WRITE 2 | |
20241 | +#define SYSFS_HIBERNATE 4 | |
20242 | +#define SYSFS_RESUME 8 | |
20243 | +#define SYSFS_HIBERNATE_OR_RESUME (SYSFS_HIBERNATE | SYSFS_RESUME) | |
20244 | +#define SYSFS_HIBERNATING (SYSFS_HIBERNATE | SYSFS_NEEDS_SM_FOR_WRITE) | |
20245 | +#define SYSFS_RESUMING (SYSFS_RESUME | SYSFS_NEEDS_SM_FOR_WRITE) | |
20246 | +#define SYSFS_NEEDS_SM_FOR_BOTH \ | |
20247 | + (SYSFS_NEEDS_SM_FOR_READ | SYSFS_NEEDS_SM_FOR_WRITE) | |
20248 | + | |
20249 | +int toi_register_sysfs_file(struct kobject *kobj, | |
20250 | + struct toi_sysfs_data *toi_sysfs_data); | |
20251 | +void toi_unregister_sysfs_file(struct kobject *kobj, | |
20252 | + struct toi_sysfs_data *toi_sysfs_data); | |
20253 | + | |
20254 | +extern struct kobject *tuxonice_kobj; | |
20255 | + | |
20256 | +struct kobject *make_toi_sysdir(char *name); | |
20257 | +void remove_toi_sysdir(struct kobject *obj); | |
20258 | +extern void toi_cleanup_sysfs(void); | |
20259 | + | |
20260 | +extern int toi_sysfs_init(void); | |
20261 | +extern void toi_sysfs_exit(void); | |
20262 | diff --git a/kernel/power/tuxonice_ui.c b/kernel/power/tuxonice_ui.c | |
20263 | new file mode 100644 | |
5dd10c98 | 20264 | index 0000000..b0b3b40 |
2380c486 JR |
20265 | --- /dev/null |
20266 | +++ b/kernel/power/tuxonice_ui.c | |
20267 | @@ -0,0 +1,250 @@ | |
20268 | +/* | |
20269 | + * kernel/power/tuxonice_ui.c | |
20270 | + * | |
20271 | + * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu> | |
20272 | + * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz> | |
20273 | + * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr> | |
5dd10c98 | 20274 | + * Copyright (C) 2002-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
20275 | + * |
20276 | + * This file is released under the GPLv2. | |
20277 | + * | |
20278 | + * Routines for TuxOnIce's user interface. | |
20279 | + * | |
20280 | + * The user interface code talks to a userspace program via a | |
20281 | + * netlink socket. | |
20282 | + * | |
20283 | + * The kernel side: | |
20284 | + * - starts the userui program; | |
20285 | + * - sends text messages and progress bar status; | |
20286 | + * | |
20287 | + * The user space side: | |
20288 | + * - passes messages regarding user requests (abort, toggle reboot etc) | |
20289 | + * | |
20290 | + */ | |
20291 | + | |
20292 | +#define __KERNEL_SYSCALLS__ | |
20293 | + | |
20294 | +#include <linux/reboot.h> | |
20295 | + | |
20296 | +#include "tuxonice_sysfs.h" | |
20297 | +#include "tuxonice_modules.h" | |
20298 | +#include "tuxonice.h" | |
20299 | +#include "tuxonice_ui.h" | |
20300 | +#include "tuxonice_netlink.h" | |
20301 | +#include "tuxonice_power_off.h" | |
20302 | +#include "tuxonice_builtin.h" | |
20303 | + | |
20304 | +static char local_printf_buf[1024]; /* Same as printk - should be safe */ | |
20305 | +struct ui_ops *toi_current_ui; | |
20306 | +EXPORT_SYMBOL_GPL(toi_current_ui); | |
20307 | + | |
20308 | +/** | |
20309 | + * toi_wait_for_keypress - Wait for keypress via userui or /dev/console. | |
20310 | + * | |
20311 | + * @timeout: Maximum time to wait. | |
20312 | + * | |
20313 | + * Wait for a keypress, either from userui or /dev/console if userui isn't | |
20314 | + * available. The non-userui path is particularly for at boot-time, prior | |
20315 | + * to userui being started, when we have an important warning to give to | |
20316 | + * the user. | |
20317 | + */ | |
20318 | +static char toi_wait_for_keypress(int timeout) | |
20319 | +{ | |
20320 | + if (toi_current_ui && toi_current_ui->wait_for_key(timeout)) | |
20321 | + return ' '; | |
20322 | + | |
20323 | + return toi_wait_for_keypress_dev_console(timeout); | |
20324 | +} | |
20325 | + | |
20326 | +/* toi_early_boot_message() | |
20327 | + * Description: Handle errors early in the process of booting. | |
20328 | + * The user may press C to continue booting, perhaps | |
20329 | + * invalidating the image, or space to reboot. | |
20330 | + * This works from either the serial console or normally | |
20331 | + * attached keyboard. | |
20332 | + * | |
20333 | + * Note that we come in here from init, while the kernel is | |
20334 | + * locked. If we want to get events from the serial console, | |
20335 | + * we need to temporarily unlock the kernel. | |
20336 | + * | |
20337 | + * toi_early_boot_message may also be called post-boot. | |
20338 | + * In this case, it simply printks the message and returns. | |
20339 | + * | |
20340 | + * Arguments: int Whether we are able to erase the image. | |
20341 | + * int default_answer. What to do when we timeout. This | |
20342 | + * will normally be continue, but the user might | |
20343 | + * provide command line options (__setup) to override | |
20344 | + * particular cases. | |
20345 | + * Char *. Pointer to a string explaining why we're moaning. | |
20346 | + */ | |
20347 | + | |
20348 | +#define say(message, a...) printk(KERN_EMERG message, ##a) | |
20349 | + | |
20350 | +void toi_early_boot_message(int message_detail, int default_answer, | |
20351 | + char *warning_reason, ...) | |
20352 | +{ | |
20353 | +#if defined(CONFIG_VT) || defined(CONFIG_SERIAL_CONSOLE) | |
20354 | + unsigned long orig_state = get_toi_state(), continue_req = 0; | |
20355 | + unsigned long orig_loglevel = console_loglevel; | |
20356 | + int can_ask = 1; | |
20357 | +#else | |
20358 | + int can_ask = 0; | |
20359 | +#endif | |
20360 | + | |
20361 | + va_list args; | |
20362 | + int printed_len; | |
20363 | + | |
20364 | + if (!toi_wait) { | |
20365 | + set_toi_state(TOI_CONTINUE_REQ); | |
20366 | + can_ask = 0; | |
20367 | + } | |
20368 | + | |
20369 | + if (warning_reason) { | |
20370 | + va_start(args, warning_reason); | |
20371 | + printed_len = vsnprintf(local_printf_buf, | |
20372 | + sizeof(local_printf_buf), | |
20373 | + warning_reason, | |
20374 | + args); | |
20375 | + va_end(args); | |
20376 | + } | |
20377 | + | |
20378 | + if (!test_toi_state(TOI_BOOT_TIME)) { | |
20379 | + printk("TuxOnIce: %s\n", local_printf_buf); | |
20380 | + return; | |
20381 | + } | |
20382 | + | |
20383 | + if (!can_ask) { | |
20384 | + continue_req = !!default_answer; | |
20385 | + goto post_ask; | |
20386 | + } | |
20387 | + | |
20388 | +#if defined(CONFIG_VT) || defined(CONFIG_SERIAL_CONSOLE) | |
20389 | + console_loglevel = 7; | |
20390 | + | |
20391 | + say("=== TuxOnIce ===\n\n"); | |
20392 | + if (warning_reason) { | |
20393 | + say("BIG FAT WARNING!! %s\n\n", local_printf_buf); | |
20394 | + switch (message_detail) { | |
20395 | + case 0: | |
20396 | + say("If you continue booting, note that any image WILL" | |
20397 | + "NOT BE REMOVED.\nTuxOnIce is unable to do so " | |
20398 | + "because the appropriate modules aren't\n" | |
20399 | + "loaded. You should manually remove the image " | |
20400 | + "to avoid any\npossibility of corrupting your " | |
20401 | + "filesystem(s) later.\n"); | |
20402 | + break; | |
20403 | + case 1: | |
20404 | + say("If you want to use the current TuxOnIce image, " | |
20405 | + "reboot and try\nagain with the same kernel " | |
20406 | + "that you hibernated from. If you want\n" | |
20407 | + "to forget that image, continue and the image " | |
20408 | + "will be erased.\n"); | |
20409 | + break; | |
20410 | + } | |
20411 | + say("Press SPACE to reboot or C to continue booting with " | |
20412 | + "this kernel\n\n"); | |
20413 | + if (toi_wait > 0) | |
20414 | + say("Default action if you don't select one in %d " | |
20415 | + "seconds is: %s.\n", | |
20416 | + toi_wait, | |
20417 | + default_answer == TOI_CONTINUE_REQ ? | |
20418 | + "continue booting" : "reboot"); | |
20419 | + } else { | |
20420 | + say("BIG FAT WARNING!!\n\n" | |
20421 | + "You have tried to resume from this image before.\n" | |
20422 | + "If it failed once, it may well fail again.\n" | |
20423 | + "Would you like to remove the image and boot " | |
20424 | + "normally?\nThis will be equivalent to entering " | |
20425 | + "noresume on the\nkernel command line.\n\n" | |
20426 | + "Press SPACE to remove the image or C to continue " | |
20427 | + "resuming.\n\n"); | |
20428 | + if (toi_wait > 0) | |
20429 | + say("Default action if you don't select one in %d " | |
20430 | + "seconds is: %s.\n", toi_wait, | |
20431 | + !!default_answer ? | |
20432 | + "continue resuming" : "remove the image"); | |
20433 | + } | |
20434 | + console_loglevel = orig_loglevel; | |
20435 | + | |
20436 | + set_toi_state(TOI_SANITY_CHECK_PROMPT); | |
20437 | + clear_toi_state(TOI_CONTINUE_REQ); | |
20438 | + | |
20439 | + if (toi_wait_for_keypress(toi_wait) == 0) /* We timed out */ | |
20440 | + continue_req = !!default_answer; | |
20441 | + else | |
20442 | + continue_req = test_toi_state(TOI_CONTINUE_REQ); | |
20443 | + | |
20444 | +#endif /* CONFIG_VT or CONFIG_SERIAL_CONSOLE */ | |
20445 | + | |
20446 | +post_ask: | |
20447 | + if ((warning_reason) && (!continue_req)) | |
20448 | + machine_restart(NULL); | |
20449 | + | |
20450 | + restore_toi_state(orig_state); | |
20451 | + if (continue_req) | |
20452 | + set_toi_state(TOI_CONTINUE_REQ); | |
20453 | +} | |
20454 | +EXPORT_SYMBOL_GPL(toi_early_boot_message); | |
20455 | +#undef say | |
20456 | + | |
20457 | +/* | |
20458 | + * User interface specific /sys/power/tuxonice entries. | |
20459 | + */ | |
20460 | + | |
20461 | +static struct toi_sysfs_data sysfs_params[] = { | |
20462 | +#if defined(CONFIG_NET) && defined(CONFIG_SYSFS) | |
20463 | + SYSFS_INT("default_console_level", SYSFS_RW, | |
20464 | + &toi_bkd.toi_default_console_level, 0, 7, 0, NULL), | |
20465 | + SYSFS_UL("debug_sections", SYSFS_RW, &toi_bkd.toi_debug_state, 0, | |
20466 | + 1 << 30, 0), | |
20467 | + SYSFS_BIT("log_everything", SYSFS_RW, &toi_bkd.toi_action, TOI_LOGALL, | |
20468 | + 0) | |
20469 | +#endif | |
20470 | +}; | |
20471 | + | |
20472 | +static struct toi_module_ops userui_ops = { | |
20473 | + .type = MISC_HIDDEN_MODULE, | |
20474 | + .name = "printk ui", | |
20475 | + .directory = "user_interface", | |
20476 | + .module = THIS_MODULE, | |
20477 | + .sysfs_data = sysfs_params, | |
20478 | + .num_sysfs_entries = sizeof(sysfs_params) / | |
20479 | + sizeof(struct toi_sysfs_data), | |
20480 | +}; | |
20481 | + | |
20482 | +int toi_register_ui_ops(struct ui_ops *this_ui) | |
20483 | +{ | |
20484 | + if (toi_current_ui) { | |
20485 | + printk(KERN_INFO "Only one TuxOnIce user interface module can " | |
20486 | + "be loaded at a time."); | |
20487 | + return -EBUSY; | |
20488 | + } | |
20489 | + | |
20490 | + toi_current_ui = this_ui; | |
20491 | + | |
20492 | + return 0; | |
20493 | +} | |
20494 | +EXPORT_SYMBOL_GPL(toi_register_ui_ops); | |
20495 | + | |
20496 | +void toi_remove_ui_ops(struct ui_ops *this_ui) | |
20497 | +{ | |
20498 | + if (toi_current_ui != this_ui) | |
20499 | + return; | |
20500 | + | |
20501 | + toi_current_ui = NULL; | |
20502 | +} | |
20503 | +EXPORT_SYMBOL_GPL(toi_remove_ui_ops); | |
20504 | + | |
20505 | +/* toi_console_sysfs_init | |
20506 | + * Description: Boot time initialisation for user interface. | |
20507 | + */ | |
20508 | + | |
20509 | +int toi_ui_init(void) | |
20510 | +{ | |
20511 | + return toi_register_module(&userui_ops); | |
20512 | +} | |
20513 | + | |
20514 | +void toi_ui_exit(void) | |
20515 | +{ | |
20516 | + toi_unregister_module(&userui_ops); | |
20517 | +} | |
20518 | diff --git a/kernel/power/tuxonice_ui.h b/kernel/power/tuxonice_ui.h | |
20519 | new file mode 100644 | |
85eb3c9d | 20520 | index 0000000..4ced165 |
2380c486 JR |
20521 | --- /dev/null |
20522 | +++ b/kernel/power/tuxonice_ui.h | |
5dd10c98 | 20523 | @@ -0,0 +1,97 @@ |
2380c486 JR |
20524 | +/* |
20525 | + * kernel/power/tuxonice_ui.h | |
20526 | + * | |
5dd10c98 | 20527 | + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
20528 | + */ |
20529 | + | |
20530 | +enum { | |
20531 | + DONT_CLEAR_BAR, | |
20532 | + CLEAR_BAR | |
20533 | +}; | |
20534 | + | |
20535 | +enum { | |
20536 | + /* Userspace -> Kernel */ | |
20537 | + USERUI_MSG_ABORT = 0x11, | |
20538 | + USERUI_MSG_SET_STATE = 0x12, | |
20539 | + USERUI_MSG_GET_STATE = 0x13, | |
20540 | + USERUI_MSG_GET_DEBUG_STATE = 0x14, | |
20541 | + USERUI_MSG_SET_DEBUG_STATE = 0x15, | |
20542 | + USERUI_MSG_SPACE = 0x18, | |
20543 | + USERUI_MSG_GET_POWERDOWN_METHOD = 0x1A, | |
20544 | + USERUI_MSG_SET_POWERDOWN_METHOD = 0x1B, | |
20545 | + USERUI_MSG_GET_LOGLEVEL = 0x1C, | |
20546 | + USERUI_MSG_SET_LOGLEVEL = 0x1D, | |
20547 | + USERUI_MSG_PRINTK = 0x1E, | |
20548 | + | |
20549 | + /* Kernel -> Userspace */ | |
20550 | + USERUI_MSG_MESSAGE = 0x21, | |
20551 | + USERUI_MSG_PROGRESS = 0x22, | |
20552 | + USERUI_MSG_POST_ATOMIC_RESTORE = 0x25, | |
20553 | + | |
20554 | + USERUI_MSG_MAX, | |
20555 | +}; | |
20556 | + | |
20557 | +struct userui_msg_params { | |
20558 | + u32 a, b, c, d; | |
20559 | + char text[255]; | |
20560 | +}; | |
20561 | + | |
20562 | +struct ui_ops { | |
20563 | + char (*wait_for_key) (int timeout); | |
20564 | + u32 (*update_status) (u32 value, u32 maximum, const char *fmt, ...); | |
20565 | + void (*prepare_status) (int clearbar, const char *fmt, ...); | |
20566 | + void (*cond_pause) (int pause, char *message); | |
20567 | + void (*abort)(int result_code, const char *fmt, ...); | |
20568 | + void (*prepare)(void); | |
20569 | + void (*cleanup)(void); | |
2380c486 JR |
20570 | + void (*message)(u32 section, u32 level, u32 normally_logged, |
20571 | + const char *fmt, ...); | |
20572 | +}; | |
20573 | + | |
20574 | +extern struct ui_ops *toi_current_ui; | |
20575 | + | |
20576 | +#define toi_update_status(val, max, fmt, args...) \ | |
20577 | + (toi_current_ui ? (toi_current_ui->update_status) (val, max, fmt, ##args) : \ | |
20578 | + max) | |
20579 | + | |
2380c486 | 20580 | +#define toi_prepare_console(void) \ |
e999739a | 20581 | + do { if (toi_current_ui) \ |
2380c486 JR |
20582 | + (toi_current_ui->prepare)(); \ |
20583 | + } while (0) | |
20584 | + | |
20585 | +#define toi_cleanup_console(void) \ | |
e999739a | 20586 | + do { if (toi_current_ui) \ |
2380c486 JR |
20587 | + (toi_current_ui->cleanup)(); \ |
20588 | + } while (0) | |
20589 | + | |
20590 | +#define abort_hibernate(result, fmt, args...) \ | |
20591 | + do { if (toi_current_ui) \ | |
20592 | + (toi_current_ui->abort)(result, fmt, ##args); \ | |
20593 | + else { \ | |
20594 | + set_abort_result(result); \ | |
20595 | + } \ | |
20596 | + } while (0) | |
20597 | + | |
20598 | +#define toi_cond_pause(pause, message) \ | |
20599 | + do { if (toi_current_ui) \ | |
20600 | + (toi_current_ui->cond_pause)(pause, message); \ | |
20601 | + } while (0) | |
20602 | + | |
20603 | +#define toi_prepare_status(clear, fmt, args...) \ | |
20604 | + do { if (toi_current_ui) \ | |
20605 | + (toi_current_ui->prepare_status)(clear, fmt, ##args); \ | |
20606 | + else \ | |
85eb3c9d | 20607 | + printk(KERN_INFO fmt "%s", ##args, "\n"); \ |
2380c486 JR |
20608 | + } while (0) |
20609 | + | |
20610 | +#define toi_message(sn, lev, log, fmt, a...) \ | |
20611 | +do { \ | |
20612 | + if (toi_current_ui && (!sn || test_debug_state(sn))) \ | |
20613 | + toi_current_ui->message(sn, lev, log, fmt, ##a); \ | |
20614 | +} while (0) | |
20615 | + | |
20616 | +__exit void toi_ui_cleanup(void); | |
20617 | +extern int toi_ui_init(void); | |
20618 | +extern void toi_ui_exit(void); | |
20619 | +extern int toi_register_ui_ops(struct ui_ops *this_ui); | |
20620 | +extern void toi_remove_ui_ops(struct ui_ops *this_ui); | |
20621 | diff --git a/kernel/power/tuxonice_userui.c b/kernel/power/tuxonice_userui.c | |
20622 | new file mode 100644 | |
5dd10c98 | 20623 | index 0000000..625d863 |
2380c486 JR |
20624 | --- /dev/null |
20625 | +++ b/kernel/power/tuxonice_userui.c | |
7e46296a | 20626 | @@ -0,0 +1,668 @@ |
2380c486 JR |
20627 | +/* |
20628 | + * kernel/power/user_ui.c | |
20629 | + * | |
20630 | + * Copyright (C) 2005-2007 Bernard Blackham | |
5dd10c98 | 20631 | + * Copyright (C) 2002-2010 Nigel Cunningham (nigel at tuxonice net) |
2380c486 JR |
20632 | + * |
20633 | + * This file is released under the GPLv2. | |
20634 | + * | |
20635 | + * Routines for TuxOnIce's user interface. | |
20636 | + * | |
20637 | + * The user interface code talks to a userspace program via a | |
20638 | + * netlink socket. | |
20639 | + * | |
20640 | + * The kernel side: | |
20641 | + * - starts the userui program; | |
20642 | + * - sends text messages and progress bar status; | |
20643 | + * | |
20644 | + * The user space side: | |
20645 | + * - passes messages regarding user requests (abort, toggle reboot etc) | |
20646 | + * | |
20647 | + */ | |
20648 | + | |
20649 | +#define __KERNEL_SYSCALLS__ | |
20650 | + | |
20651 | +#include <linux/suspend.h> | |
20652 | +#include <linux/freezer.h> | |
20653 | +#include <linux/console.h> | |
20654 | +#include <linux/ctype.h> | |
20655 | +#include <linux/tty.h> | |
20656 | +#include <linux/vt_kern.h> | |
2380c486 JR |
20657 | +#include <linux/reboot.h> |
20658 | +#include <linux/kmod.h> | |
20659 | +#include <linux/security.h> | |
20660 | +#include <linux/syscalls.h> | |
5dd10c98 | 20661 | +#include <linux/vt.h> |
2380c486 JR |
20662 | + |
20663 | +#include "tuxonice_sysfs.h" | |
20664 | +#include "tuxonice_modules.h" | |
20665 | +#include "tuxonice.h" | |
20666 | +#include "tuxonice_ui.h" | |
20667 | +#include "tuxonice_netlink.h" | |
20668 | +#include "tuxonice_power_off.h" | |
20669 | + | |
20670 | +static char local_printf_buf[1024]; /* Same as printk - should be safe */ | |
20671 | + | |
20672 | +static struct user_helper_data ui_helper_data; | |
20673 | +static struct toi_module_ops userui_ops; | |
20674 | +static int orig_kmsg; | |
20675 | + | |
20676 | +static char lastheader[512]; | |
20677 | +static int lastheader_message_len; | |
20678 | +static int ui_helper_changed; /* Used at resume-time so don't overwrite value | |
20679 | + set from initrd/ramfs. */ | |
20680 | + | |
20681 | +/* Number of distinct progress amounts that userspace can display */ | |
20682 | +static int progress_granularity = 30; | |
20683 | + | |
20684 | +static DECLARE_WAIT_QUEUE_HEAD(userui_wait_for_key); | |
20685 | + | |
20686 | +/** | |
20687 | + * ui_nl_set_state - Update toi_action based on a message from userui. | |
20688 | + * | |
20689 | + * @n: The bit (1 << bit) to set. | |
20690 | + */ | |
20691 | +static void ui_nl_set_state(int n) | |
20692 | +{ | |
20693 | + /* Only let them change certain settings */ | |
20694 | + static const u32 toi_action_mask = | |
20695 | + (1 << TOI_REBOOT) | (1 << TOI_PAUSE) | | |
20696 | + (1 << TOI_LOGALL) | | |
20697 | + (1 << TOI_SINGLESTEP) | | |
20698 | + (1 << TOI_PAUSE_NEAR_PAGESET_END); | |
7e46296a | 20699 | + static unsigned long new_action; |
2380c486 | 20700 | + |
7e46296a | 20701 | + new_action = (toi_bkd.toi_action & (~toi_action_mask)) | |
2380c486 JR |
20702 | + (n & toi_action_mask); |
20703 | + | |
7e46296a AM |
20704 | + printk(KERN_DEBUG "n is %x. Action flags being changed from %lx " |
20705 | + "to %lx.", n, toi_bkd.toi_action, new_action); | |
20706 | + toi_bkd.toi_action = new_action; | |
20707 | + | |
2380c486 JR |
20708 | + if (!test_action_state(TOI_PAUSE) && |
20709 | + !test_action_state(TOI_SINGLESTEP)) | |
20710 | + wake_up_interruptible(&userui_wait_for_key); | |
20711 | +} | |
20712 | + | |
20713 | +/** | |
20714 | + * userui_post_atomic_restore - Tell userui that atomic restore just happened. | |
20715 | + * | |
20716 | + * Tell userui that atomic restore just occured, so that it can do things like | |
20717 | + * redrawing the screen, re-getting settings and so on. | |
20718 | + */ | |
5dd10c98 | 20719 | +static void userui_post_atomic_restore(struct toi_boot_kernel_data *bkd) |
2380c486 JR |
20720 | +{ |
20721 | + toi_send_netlink_message(&ui_helper_data, | |
20722 | + USERUI_MSG_POST_ATOMIC_RESTORE, NULL, 0); | |
20723 | +} | |
20724 | + | |
20725 | +/** | |
20726 | + * userui_storage_needed - Report how much memory in image header is needed. | |
20727 | + */ | |
20728 | +static int userui_storage_needed(void) | |
20729 | +{ | |
20730 | + return sizeof(ui_helper_data.program) + 1 + sizeof(int); | |
20731 | +} | |
20732 | + | |
20733 | +/** | |
20734 | + * userui_save_config_info - Fill buffer with config info for image header. | |
20735 | + * | |
20736 | + * @buf: Buffer into which to put the config info we want to save. | |
20737 | + */ | |
20738 | +static int userui_save_config_info(char *buf) | |
20739 | +{ | |
20740 | + *((int *) buf) = progress_granularity; | |
20741 | + memcpy(buf + sizeof(int), ui_helper_data.program, | |
20742 | + sizeof(ui_helper_data.program)); | |
20743 | + return sizeof(ui_helper_data.program) + sizeof(int) + 1; | |
20744 | +} | |
20745 | + | |
20746 | +/** | |
20747 | + * userui_load_config_info - Restore config info from buffer. | |
20748 | + * | |
20749 | + * @buf: Buffer containing header info loaded. | |
20750 | + * @size: Size of data loaded for this module. | |
20751 | + */ | |
20752 | +static void userui_load_config_info(char *buf, int size) | |
20753 | +{ | |
20754 | + progress_granularity = *((int *) buf); | |
20755 | + size -= sizeof(int); | |
20756 | + | |
20757 | + /* Don't load the saved path if one has already been set */ | |
20758 | + if (ui_helper_changed) | |
20759 | + return; | |
20760 | + | |
20761 | + if (size > sizeof(ui_helper_data.program)) | |
20762 | + size = sizeof(ui_helper_data.program); | |
20763 | + | |
20764 | + memcpy(ui_helper_data.program, buf + sizeof(int), size); | |
20765 | + ui_helper_data.program[sizeof(ui_helper_data.program)-1] = '\0'; | |
20766 | +} | |
20767 | + | |
20768 | +/** | |
20769 | + * set_ui_program_set: Record that userui program was changed. | |
20770 | + * | |
20771 | + * Side effect routine for when the userui program is set. In an initrd or | |
20772 | + * ramfs, the user may set a location for the userui program. If this happens, | |
20773 | + * we don't want to reload the value that was saved in the image header. This | |
20774 | + * routine allows us to flag that we shouldn't restore the program name from | |
20775 | + * the image header. | |
20776 | + */ | |
20777 | +static void set_ui_program_set(void) | |
20778 | +{ | |
20779 | + ui_helper_changed = 1; | |
20780 | +} | |
20781 | + | |
20782 | +/** | |
20783 | + * userui_memory_needed - Tell core how much memory to reserve for us. | |
20784 | + */ | |
20785 | +static int userui_memory_needed(void) | |
20786 | +{ | |
20787 | + /* ball park figure of 128 pages */ | |
20788 | + return 128 * PAGE_SIZE; | |
20789 | +} | |
20790 | + | |
20791 | +/** | |
20792 | + * userui_update_status - Update the progress bar and (if on) in-bar message. | |
20793 | + * | |
20794 | + * @value: Current progress percentage numerator. | |
20795 | + * @maximum: Current progress percentage denominator. | |
20796 | + * @fmt: Message to be displayed in the middle of the progress bar. | |
20797 | + * | |
20798 | + * Note that a NULL message does not mean that any previous message is erased! | |
20799 | + * For that, you need toi_prepare_status with clearbar on. | |
20800 | + * | |
20801 | + * Returns an unsigned long, being the next numerator (as determined by the | |
20802 | + * maximum and progress granularity) where status needs to be updated. | |
20803 | + * This is to reduce unnecessary calls to update_status. | |
20804 | + */ | |
20805 | +static u32 userui_update_status(u32 value, u32 maximum, const char *fmt, ...) | |
20806 | +{ | |
20807 | + static u32 last_step = 9999; | |
20808 | + struct userui_msg_params msg; | |
20809 | + u32 this_step, next_update; | |
20810 | + int bitshift; | |
20811 | + | |
20812 | + if (ui_helper_data.pid == -1) | |
20813 | + return 0; | |
20814 | + | |
20815 | + if ((!maximum) || (!progress_granularity)) | |
20816 | + return maximum; | |
20817 | + | |
20818 | + if (value < 0) | |
20819 | + value = 0; | |
20820 | + | |
20821 | + if (value > maximum) | |
20822 | + value = maximum; | |
20823 | + | |
20824 | + /* Try to avoid math problems - we can't do 64 bit math here | |
20825 | + * (and shouldn't need it - anyone got screen resolution | |
20826 | + * of 65536 pixels or more?) */ | |
20827 | + bitshift = fls(maximum) - 16; | |
20828 | + if (bitshift > 0) { | |
20829 | + u32 temp_maximum = maximum >> bitshift; | |
20830 | + u32 temp_value = value >> bitshift; | |
20831 | + this_step = (u32) | |
20832 | + (temp_value * progress_granularity / temp_maximum); | |
20833 | + next_update = (((this_step + 1) * temp_maximum / | |
20834 | + progress_granularity) + 1) << bitshift; | |
20835 | + } else { | |
20836 | + this_step = (u32) (value * progress_granularity / maximum); | |
20837 | + next_update = ((this_step + 1) * maximum / | |
20838 | + progress_granularity) + 1; | |
20839 | + } | |
20840 | + | |
20841 | + if (this_step == last_step) | |
20842 | + return next_update; | |
20843 | + | |
20844 | + memset(&msg, 0, sizeof(msg)); | |
20845 | + | |
20846 | + msg.a = this_step; | |
20847 | + msg.b = progress_granularity; | |
20848 | + | |
20849 | + if (fmt) { | |
20850 | + va_list args; | |
20851 | + va_start(args, fmt); | |
20852 | + vsnprintf(msg.text, sizeof(msg.text), fmt, args); | |
20853 | + va_end(args); | |
20854 | + msg.text[sizeof(msg.text)-1] = '\0'; | |
20855 | + } | |
20856 | + | |
20857 | + toi_send_netlink_message(&ui_helper_data, USERUI_MSG_PROGRESS, | |
20858 | + &msg, sizeof(msg)); | |
20859 | + last_step = this_step; | |
20860 | + | |
20861 | + return next_update; | |
20862 | +} | |
20863 | + | |
20864 | +/** | |
20865 | + * userui_message - Display a message without necessarily logging it. | |
20866 | + * | |
20867 | + * @section: Type of message. Messages can be filtered by type. | |
20868 | + * @level: Degree of importance of the message. Lower values = higher priority. | |
20869 | + * @normally_logged: Whether logged even if log_everything is off. | |
20870 | + * @fmt: Message (and parameters). | |
20871 | + * | |
20872 | + * This function is intended to do the same job as printk, but without normally | |
20873 | + * logging what is printed. The point is to be able to get debugging info on | |
20874 | + * screen without filling the logs with "1/534. ^M 2/534^M. 3/534^M" | |
20875 | + * | |
20876 | + * It may be called from an interrupt context - can't sleep! | |
20877 | + */ | |
20878 | +static void userui_message(u32 section, u32 level, u32 normally_logged, | |
20879 | + const char *fmt, ...) | |
20880 | +{ | |
20881 | + struct userui_msg_params msg; | |
20882 | + | |
20883 | + if ((level) && (level > console_loglevel)) | |
20884 | + return; | |
20885 | + | |
20886 | + memset(&msg, 0, sizeof(msg)); | |
20887 | + | |
20888 | + msg.a = section; | |
20889 | + msg.b = level; | |
20890 | + msg.c = normally_logged; | |
20891 | + | |
20892 | + if (fmt) { | |
20893 | + va_list args; | |
20894 | + va_start(args, fmt); | |
20895 | + vsnprintf(msg.text, sizeof(msg.text), fmt, args); | |
20896 | + va_end(args); | |
20897 | + msg.text[sizeof(msg.text)-1] = '\0'; | |
20898 | + } | |
20899 | + | |
20900 | + if (test_action_state(TOI_LOGALL)) | |
20901 | + printk(KERN_INFO "%s\n", msg.text); | |
20902 | + | |
20903 | + toi_send_netlink_message(&ui_helper_data, USERUI_MSG_MESSAGE, | |
20904 | + &msg, sizeof(msg)); | |
20905 | +} | |
20906 | + | |
20907 | +/** | |
20908 | + * wait_for_key_via_userui - Wait for userui to receive a keypress. | |
20909 | + */ | |
20910 | +static void wait_for_key_via_userui(void) | |
20911 | +{ | |
20912 | + DECLARE_WAITQUEUE(wait, current); | |
20913 | + | |
20914 | + add_wait_queue(&userui_wait_for_key, &wait); | |
20915 | + set_current_state(TASK_INTERRUPTIBLE); | |
20916 | + | |
20917 | + interruptible_sleep_on(&userui_wait_for_key); | |
20918 | + | |
20919 | + set_current_state(TASK_RUNNING); | |
20920 | + remove_wait_queue(&userui_wait_for_key, &wait); | |
20921 | +} | |
20922 | + | |
20923 | +/** | |
20924 | + * userui_prepare_status - Display high level messages. | |
20925 | + * | |
20926 | + * @clearbar: Whether to clear the progress bar. | |
20927 | + * @fmt...: New message for the title. | |
20928 | + * | |
20929 | + * Prepare the 'nice display', drawing the header and version, along with the | |
20930 | + * current action and perhaps also resetting the progress bar. | |
20931 | + */ | |
20932 | +static void userui_prepare_status(int clearbar, const char *fmt, ...) | |
20933 | +{ | |
20934 | + va_list args; | |
20935 | + | |
20936 | + if (fmt) { | |
20937 | + va_start(args, fmt); | |
20938 | + lastheader_message_len = vsnprintf(lastheader, 512, fmt, args); | |
20939 | + va_end(args); | |
20940 | + } | |
20941 | + | |
20942 | + if (clearbar) | |
20943 | + toi_update_status(0, 1, NULL); | |
20944 | + | |
20945 | + if (ui_helper_data.pid == -1) | |
20946 | + printk(KERN_EMERG "%s\n", lastheader); | |
20947 | + else | |
20948 | + toi_message(0, TOI_STATUS, 1, lastheader, NULL); | |
20949 | +} | |
20950 | + | |
20951 | +/** | |
20952 | + * toi_wait_for_keypress - Wait for keypress via userui. | |
20953 | + * | |
20954 | + * @timeout: Maximum time to wait. | |
20955 | + * | |
20956 | + * Wait for a keypress from userui. | |
20957 | + * | |
20958 | + * FIXME: Implement timeout? | |
20959 | + */ | |
20960 | +static char userui_wait_for_keypress(int timeout) | |
20961 | +{ | |
20962 | + char key = '\0'; | |
20963 | + | |
20964 | + if (ui_helper_data.pid != -1) { | |
20965 | + wait_for_key_via_userui(); | |
20966 | + key = ' '; | |
20967 | + } | |
20968 | + | |
20969 | + return key; | |
20970 | +} | |
20971 | + | |
20972 | +/** | |
20973 | + * userui_abort_hibernate - Abort a cycle & tell user if they didn't request it. | |
20974 | + * | |
20975 | + * @result_code: Reason why we're aborting (1 << bit). | |
20976 | + * @fmt: Message to display if telling the user what's going on. | |
20977 | + * | |
20978 | + * Abort a cycle. If this wasn't at the user's request (and we're displaying | |
20979 | + * output), tell the user why and wait for them to acknowledge the message. | |
20980 | + */ | |
20981 | +static void userui_abort_hibernate(int result_code, const char *fmt, ...) | |
20982 | +{ | |
20983 | + va_list args; | |
20984 | + int printed_len = 0; | |
20985 | + | |
20986 | + set_result_state(result_code); | |
20987 | + | |
20988 | + if (test_result_state(TOI_ABORTED)) | |
20989 | + return; | |
20990 | + | |
20991 | + set_result_state(TOI_ABORTED); | |
20992 | + | |
20993 | + if (test_result_state(TOI_ABORT_REQUESTED)) | |
20994 | + return; | |
20995 | + | |
20996 | + va_start(args, fmt); | |
20997 | + printed_len = vsnprintf(local_printf_buf, sizeof(local_printf_buf), | |
20998 | + fmt, args); | |
20999 | + va_end(args); | |
21000 | + if (ui_helper_data.pid != -1) | |
21001 | + printed_len = sprintf(local_printf_buf + printed_len, | |
21002 | + " (Press SPACE to continue)"); | |
21003 | + | |
21004 | + toi_prepare_status(CLEAR_BAR, "%s", local_printf_buf); | |
21005 | + | |
21006 | + if (ui_helper_data.pid != -1) | |
21007 | + userui_wait_for_keypress(0); | |
21008 | +} | |
21009 | + | |
21010 | +/** | |
21011 | + * request_abort_hibernate - Abort hibernating or resuming at user request. | |
21012 | + * | |
21013 | + * Handle the user requesting the cancellation of a hibernation or resume by | |
21014 | + * pressing escape. | |
21015 | + */ | |
21016 | +static void request_abort_hibernate(void) | |
21017 | +{ | |
7e46296a AM |
21018 | + if (test_result_state(TOI_ABORT_REQUESTED) || |
21019 | + !test_action_state(TOI_CAN_CANCEL)) | |
2380c486 JR |
21020 | + return; |
21021 | + | |
21022 | + if (test_toi_state(TOI_NOW_RESUMING)) { | |
21023 | + toi_prepare_status(CLEAR_BAR, "Escape pressed. " | |
21024 | + "Powering down again."); | |
21025 | + set_toi_state(TOI_STOP_RESUME); | |
21026 | + while (!test_toi_state(TOI_IO_STOPPED)) | |
21027 | + schedule(); | |
21028 | + if (toiActiveAllocator->mark_resume_attempted) | |
21029 | + toiActiveAllocator->mark_resume_attempted(0); | |
21030 | + toi_power_down(); | |
21031 | + } | |
21032 | + | |
21033 | + toi_prepare_status(CLEAR_BAR, "--- ESCAPE PRESSED :" | |
21034 | + " ABORTING HIBERNATION ---"); | |
21035 | + set_abort_result(TOI_ABORT_REQUESTED); | |
21036 | + wake_up_interruptible(&userui_wait_for_key); | |
21037 | +} | |
21038 | + | |
21039 | +/** | |
21040 | + * userui_user_rcv_msg - Receive a netlink message from userui. | |
21041 | + * | |
21042 | + * @skb: skb received. | |
21043 | + * @nlh: Netlink header received. | |
21044 | + */ | |
21045 | +static int userui_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |
21046 | +{ | |
21047 | + int type; | |
21048 | + int *data; | |
21049 | + | |
21050 | + type = nlh->nlmsg_type; | |
21051 | + | |
21052 | + /* A control message: ignore them */ | |
21053 | + if (type < NETLINK_MSG_BASE) | |
21054 | + return 0; | |
21055 | + | |
21056 | + /* Unknown message: reply with EINVAL */ | |
21057 | + if (type >= USERUI_MSG_MAX) | |
21058 | + return -EINVAL; | |
21059 | + | |
21060 | + /* All operations require privileges, even GET */ | |
21061 | + if (security_netlink_recv(skb, CAP_NET_ADMIN)) | |
21062 | + return -EPERM; | |
21063 | + | |
21064 | + /* Only allow one task to receive NOFREEZE privileges */ | |
21065 | + if (type == NETLINK_MSG_NOFREEZE_ME && ui_helper_data.pid != -1) { | |
21066 | + printk(KERN_INFO "Got NOFREEZE_ME request when " | |
21067 | + "ui_helper_data.pid is %d.\n", ui_helper_data.pid); | |
21068 | + return -EBUSY; | |
21069 | + } | |
21070 | + | |
21071 | + data = (int *) NLMSG_DATA(nlh); | |
21072 | + | |
21073 | + switch (type) { | |
21074 | + case USERUI_MSG_ABORT: | |
21075 | + request_abort_hibernate(); | |
21076 | + return 0; | |
21077 | + case USERUI_MSG_GET_STATE: | |
21078 | + toi_send_netlink_message(&ui_helper_data, | |
21079 | + USERUI_MSG_GET_STATE, &toi_bkd.toi_action, | |
21080 | + sizeof(toi_bkd.toi_action)); | |
21081 | + return 0; | |
21082 | + case USERUI_MSG_GET_DEBUG_STATE: | |
21083 | + toi_send_netlink_message(&ui_helper_data, | |
21084 | + USERUI_MSG_GET_DEBUG_STATE, | |
21085 | + &toi_bkd.toi_debug_state, | |
21086 | + sizeof(toi_bkd.toi_debug_state)); | |
21087 | + return 0; | |
21088 | + case USERUI_MSG_SET_STATE: | |
21089 | + if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(int))) | |
21090 | + return -EINVAL; | |
21091 | + ui_nl_set_state(*data); | |
21092 | + return 0; | |
21093 | + case USERUI_MSG_SET_DEBUG_STATE: | |
21094 | + if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(int))) | |
21095 | + return -EINVAL; | |
21096 | + toi_bkd.toi_debug_state = (*data); | |
21097 | + return 0; | |
21098 | + case USERUI_MSG_SPACE: | |
21099 | + wake_up_interruptible(&userui_wait_for_key); | |
21100 | + return 0; | |
21101 | + case USERUI_MSG_GET_POWERDOWN_METHOD: | |
21102 | + toi_send_netlink_message(&ui_helper_data, | |
21103 | + USERUI_MSG_GET_POWERDOWN_METHOD, | |
21104 | + &toi_poweroff_method, | |
21105 | + sizeof(toi_poweroff_method)); | |
21106 | + return 0; | |
21107 | + case USERUI_MSG_SET_POWERDOWN_METHOD: | |
21108 | + if (nlh->nlmsg_len != NLMSG_LENGTH(sizeof(char))) | |
21109 | + return -EINVAL; | |
21110 | + toi_poweroff_method = (unsigned long)(*data); | |
21111 | + return 0; | |
21112 | + case USERUI_MSG_GET_LOGLEVEL: | |
21113 | + toi_send_netlink_message(&ui_helper_data, | |
21114 | + USERUI_MSG_GET_LOGLEVEL, | |
21115 | + &toi_bkd.toi_default_console_level, | |
21116 | + sizeof(toi_bkd.toi_default_console_level)); | |
21117 | + return 0; | |
21118 | + case USERUI_MSG_SET_LOGLEVEL: | |
21119 | + if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(int))) | |
21120 | + return -EINVAL; | |
21121 | + toi_bkd.toi_default_console_level = (*data); | |
21122 | + return 0; | |
21123 | + case USERUI_MSG_PRINTK: | |
e999739a | 21124 | + printk(KERN_INFO "%s", (char *) data); |
2380c486 JR |
21125 | + return 0; |
21126 | + } | |
21127 | + | |
21128 | + /* Unhandled here */ | |
21129 | + return 1; | |
21130 | +} | |
21131 | + | |
21132 | +/** | |
21133 | + * userui_cond_pause - Possibly pause at user request. | |
21134 | + * | |
21135 | + * @pause: Whether to pause or just display the message. | |
21136 | + * @message: Message to display at the start of pausing. | |
21137 | + * | |
21138 | + * Potentially pause and wait for the user to tell us to continue. We normally | |
21139 | + * only pause when @pause is set. While paused, the user can do things like | |
21140 | + * changing the loglevel, toggling the display of debugging sections and such | |
21141 | + * like. | |
21142 | + */ | |
21143 | +static void userui_cond_pause(int pause, char *message) | |
21144 | +{ | |
21145 | + int displayed_message = 0, last_key = 0; | |
21146 | + | |
21147 | + while (last_key != 32 && | |
21148 | + ui_helper_data.pid != -1 && | |
21149 | + ((test_action_state(TOI_PAUSE) && pause) || | |
21150 | + (test_action_state(TOI_SINGLESTEP)))) { | |
21151 | + if (!displayed_message) { | |
21152 | + toi_prepare_status(DONT_CLEAR_BAR, | |
21153 | + "%s Press SPACE to continue.%s", | |
21154 | + message ? message : "", | |
21155 | + (test_action_state(TOI_SINGLESTEP)) ? | |
21156 | + " Single step on." : ""); | |
21157 | + displayed_message = 1; | |
21158 | + } | |
21159 | + last_key = userui_wait_for_keypress(0); | |
21160 | + } | |
21161 | + schedule(); | |
21162 | +} | |
21163 | + | |
21164 | +/** | |
21165 | + * userui_prepare_console - Prepare the console for use. | |
21166 | + * | |
21167 | + * Prepare a console for use, saving current kmsg settings and attempting to | |
21168 | + * start userui. Console loglevel changes are handled by userui. | |
21169 | + */ | |
21170 | +static void userui_prepare_console(void) | |
21171 | +{ | |
5dd10c98 | 21172 | + orig_kmsg = vt_kmsg_redirect(fg_console + 1); |
2380c486 JR |
21173 | + |
21174 | + ui_helper_data.pid = -1; | |
21175 | + | |
21176 | + if (!userui_ops.enabled) { | |
e999739a | 21177 | + printk(KERN_INFO "TuxOnIce: Userui disabled.\n"); |
2380c486 JR |
21178 | + return; |
21179 | + } | |
21180 | + | |
21181 | + if (*ui_helper_data.program) | |
21182 | + toi_netlink_setup(&ui_helper_data); | |
21183 | + else | |
21184 | + printk(KERN_INFO "TuxOnIce: Userui program not configured.\n"); | |
21185 | +} | |
21186 | + | |
21187 | +/** | |
21188 | + * userui_cleanup_console - Cleanup after a cycle. | |
21189 | + * | |
21190 | + * Tell userui to cleanup, and restore kmsg_redirect to its original value. | |
21191 | + */ | |
21192 | + | |
21193 | +static void userui_cleanup_console(void) | |
21194 | +{ | |
21195 | + if (ui_helper_data.pid > -1) | |
21196 | + toi_netlink_close(&ui_helper_data); | |
21197 | + | |
5dd10c98 | 21198 | + vt_kmsg_redirect(orig_kmsg); |
2380c486 JR |
21199 | +} |
21200 | + | |
21201 | +/* | |
21202 | + * User interface specific /sys/power/tuxonice entries. | |
21203 | + */ | |
21204 | + | |
21205 | +static struct toi_sysfs_data sysfs_params[] = { | |
21206 | +#if defined(CONFIG_NET) && defined(CONFIG_SYSFS) | |
21207 | + SYSFS_BIT("enable_escape", SYSFS_RW, &toi_bkd.toi_action, | |
21208 | + TOI_CAN_CANCEL, 0), | |
21209 | + SYSFS_BIT("pause_between_steps", SYSFS_RW, &toi_bkd.toi_action, | |
21210 | + TOI_PAUSE, 0), | |
21211 | + SYSFS_INT("enabled", SYSFS_RW, &userui_ops.enabled, 0, 1, 0, NULL), | |
21212 | + SYSFS_INT("progress_granularity", SYSFS_RW, &progress_granularity, 1, | |
21213 | + 2048, 0, NULL), | |
21214 | + SYSFS_STRING("program", SYSFS_RW, ui_helper_data.program, 255, 0, | |
21215 | + set_ui_program_set), | |
21216 | + SYSFS_INT("debug", SYSFS_RW, &ui_helper_data.debug, 0, 1, 0, NULL) | |
21217 | +#endif | |
21218 | +}; | |
21219 | + | |
21220 | +static struct toi_module_ops userui_ops = { | |
21221 | + .type = MISC_MODULE, | |
21222 | + .name = "userui", | |
21223 | + .shared_directory = "user_interface", | |
21224 | + .module = THIS_MODULE, | |
21225 | + .storage_needed = userui_storage_needed, | |
21226 | + .save_config_info = userui_save_config_info, | |
21227 | + .load_config_info = userui_load_config_info, | |
21228 | + .memory_needed = userui_memory_needed, | |
5dd10c98 | 21229 | + .post_atomic_restore = userui_post_atomic_restore, |
2380c486 JR |
21230 | + .sysfs_data = sysfs_params, |
21231 | + .num_sysfs_entries = sizeof(sysfs_params) / | |
21232 | + sizeof(struct toi_sysfs_data), | |
21233 | +}; | |
21234 | + | |
21235 | +static struct ui_ops my_ui_ops = { | |
2380c486 JR |
21236 | + .update_status = userui_update_status, |
21237 | + .message = userui_message, | |
21238 | + .prepare_status = userui_prepare_status, | |
21239 | + .abort = userui_abort_hibernate, | |
21240 | + .cond_pause = userui_cond_pause, | |
21241 | + .prepare = userui_prepare_console, | |
21242 | + .cleanup = userui_cleanup_console, | |
21243 | + .wait_for_key = userui_wait_for_keypress, | |
21244 | +}; | |
21245 | + | |
21246 | +/** | |
21247 | + * toi_user_ui_init - Boot time initialisation for user interface. | |
21248 | + * | |
21249 | + * Invoked from the core init routine. | |
21250 | + */ | |
21251 | +static __init int toi_user_ui_init(void) | |
21252 | +{ | |
21253 | + int result; | |
21254 | + | |
21255 | + ui_helper_data.nl = NULL; | |
21256 | + strncpy(ui_helper_data.program, CONFIG_TOI_USERUI_DEFAULT_PATH, 255); | |
21257 | + ui_helper_data.pid = -1; | |
21258 | + ui_helper_data.skb_size = sizeof(struct userui_msg_params); | |
21259 | + ui_helper_data.pool_limit = 6; | |
21260 | + ui_helper_data.netlink_id = NETLINK_TOI_USERUI; | |
21261 | + ui_helper_data.name = "userspace ui"; | |
21262 | + ui_helper_data.rcv_msg = userui_user_rcv_msg; | |
21263 | + ui_helper_data.interface_version = 8; | |
21264 | + ui_helper_data.must_init = 0; | |
21265 | + ui_helper_data.not_ready = userui_cleanup_console; | |
21266 | + init_completion(&ui_helper_data.wait_for_process); | |
21267 | + result = toi_register_module(&userui_ops); | |
21268 | + if (!result) | |
21269 | + result = toi_register_ui_ops(&my_ui_ops); | |
21270 | + if (result) | |
21271 | + toi_unregister_module(&userui_ops); | |
21272 | + | |
21273 | + return result; | |
21274 | +} | |
21275 | + | |
21276 | +#ifdef MODULE | |
21277 | +/** | |
21278 | + * toi_user_ui_ext - Cleanup code for if the core is unloaded. | |
21279 | + */ | |
21280 | +static __exit void toi_user_ui_exit(void) | |
21281 | +{ | |
21282 | + toi_netlink_close_complete(&ui_helper_data); | |
21283 | + toi_remove_ui_ops(&my_ui_ops); | |
21284 | + toi_unregister_module(&userui_ops); | |
21285 | +} | |
21286 | + | |
21287 | +module_init(toi_user_ui_init); | |
21288 | +module_exit(toi_user_ui_exit); | |
21289 | +MODULE_AUTHOR("Nigel Cunningham"); | |
21290 | +MODULE_DESCRIPTION("TuxOnIce Userui Support"); | |
21291 | +MODULE_LICENSE("GPL"); | |
21292 | +#else | |
21293 | +late_initcall(toi_user_ui_init); | |
21294 | +#endif | |
21295 | diff --git a/kernel/power/user.c b/kernel/power/user.c | |
5bd2511a | 21296 | index e819e17..193abc1 100644 |
2380c486 JR |
21297 | --- a/kernel/power/user.c |
21298 | +++ b/kernel/power/user.c | |
92bca44c | 21299 | @@ -64,6 +64,7 @@ static struct snapshot_data { |
2380c486 JR |
21300 | } snapshot_state; |
21301 | ||
21302 | atomic_t snapshot_device_available = ATOMIC_INIT(1); | |
21303 | +EXPORT_SYMBOL_GPL(snapshot_device_available); | |
21304 | ||
21305 | static int snapshot_open(struct inode *inode, struct file *filp) | |
21306 | { | |
21307 | diff --git a/kernel/printk.c b/kernel/printk.c | |
5bd2511a | 21308 | index 444b770..49ddbab 100644 |
2380c486 JR |
21309 | --- a/kernel/printk.c |
21310 | +++ b/kernel/printk.c | |
21311 | @@ -32,6 +32,7 @@ | |
21312 | #include <linux/security.h> | |
21313 | #include <linux/bootmem.h> | |
21314 | #include <linux/syscalls.h> | |
21315 | +#include <linux/suspend.h> | |
9474138d | 21316 | #include <linux/kexec.h> |
5bd2511a | 21317 | #include <linux/kdb.h> |
5dd10c98 | 21318 | #include <linux/ratelimit.h> |
5bd2511a | 21319 | @@ -70,6 +71,7 @@ int console_printk[4] = { |
2380c486 JR |
21320 | MINIMUM_CONSOLE_LOGLEVEL, /* minimum_console_loglevel */ |
21321 | DEFAULT_CONSOLE_LOGLEVEL, /* default_console_loglevel */ | |
21322 | }; | |
21323 | +EXPORT_SYMBOL_GPL(console_printk); | |
21324 | ||
de6743ae AM |
21325 | /* |
21326 | * Low level drivers may need that to know if they can schedule in | |
5bd2511a | 21327 | @@ -974,6 +976,7 @@ void suspend_console(void) |
2380c486 JR |
21328 | console_suspended = 1; |
21329 | up(&console_sem); | |
21330 | } | |
21331 | +EXPORT_SYMBOL_GPL(suspend_console); | |
21332 | ||
21333 | void resume_console(void) | |
21334 | { | |
5bd2511a | 21335 | @@ -983,6 +986,7 @@ void resume_console(void) |
2380c486 JR |
21336 | console_suspended = 0; |
21337 | release_console_sem(); | |
21338 | } | |
21339 | +EXPORT_SYMBOL_GPL(resume_console); | |
21340 | ||
21341 | /** | |
21342 | * acquire_console_sem - lock the console system for exclusive use. | |
2380c486 | 21343 | diff --git a/mm/bootmem.c b/mm/bootmem.c |
85eb3c9d | 21344 | index 142c84a..f91f5aa 100644 |
2380c486 JR |
21345 | --- a/mm/bootmem.c |
21346 | +++ b/mm/bootmem.c | |
de6743ae | 21347 | @@ -25,6 +25,7 @@ |
2380c486 JR |
21348 | unsigned long max_low_pfn; |
21349 | unsigned long min_low_pfn; | |
21350 | unsigned long max_pfn; | |
21351 | +EXPORT_SYMBOL_GPL(max_pfn); | |
21352 | ||
21353 | #ifdef CONFIG_CRASH_DUMP | |
21354 | /* | |
21355 | diff --git a/mm/highmem.c b/mm/highmem.c | |
5bd2511a | 21356 | index 66baa20..2dd71c1 100644 |
2380c486 JR |
21357 | --- a/mm/highmem.c |
21358 | +++ b/mm/highmem.c | |
92bca44c | 21359 | @@ -57,6 +57,7 @@ unsigned int nr_free_highpages (void) |
2380c486 JR |
21360 | |
21361 | return pages; | |
21362 | } | |
21363 | +EXPORT_SYMBOL_GPL(nr_free_highpages); | |
21364 | ||
21365 | static int pkmap_count[LAST_PKMAP]; | |
21366 | static unsigned int last_pkmap_nr; | |
21367 | diff --git a/mm/memory.c b/mm/memory.c | |
85eb3c9d | 21368 | index 53cf85d..e2724b2 100644 |
2380c486 JR |
21369 | --- a/mm/memory.c |
21370 | +++ b/mm/memory.c | |
5bd2511a | 21371 | @@ -1340,6 +1340,7 @@ no_page_table: |
7e46296a | 21372 | return ERR_PTR(-EFAULT); |
2380c486 JR |
21373 | return page; |
21374 | } | |
21375 | +EXPORT_SYMBOL_GPL(follow_page); | |
21376 | ||
7e46296a AM |
21377 | int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, |
21378 | unsigned long start, int nr_pages, unsigned int gup_flags, | |
2380c486 | 21379 | diff --git a/mm/mmzone.c b/mm/mmzone.c |
9474138d | 21380 | index f5b7d17..72a6770 100644 |
2380c486 JR |
21381 | --- a/mm/mmzone.c |
21382 | +++ b/mm/mmzone.c | |
9474138d | 21383 | @@ -14,6 +14,7 @@ struct pglist_data *first_online_pgdat(void) |
2380c486 JR |
21384 | { |
21385 | return NODE_DATA(first_online_node); | |
21386 | } | |
21387 | +EXPORT_SYMBOL_GPL(first_online_pgdat); | |
21388 | ||
21389 | struct pglist_data *next_online_pgdat(struct pglist_data *pgdat) | |
21390 | { | |
9474138d | 21391 | @@ -23,6 +24,7 @@ struct pglist_data *next_online_pgdat(struct pglist_data *pgdat) |
2380c486 JR |
21392 | return NULL; |
21393 | return NODE_DATA(nid); | |
21394 | } | |
21395 | +EXPORT_SYMBOL_GPL(next_online_pgdat); | |
21396 | ||
21397 | /* | |
21398 | * next_zone - helper magic for for_each_zone() | |
9474138d | 21399 | @@ -42,6 +44,7 @@ struct zone *next_zone(struct zone *zone) |
2380c486 JR |
21400 | } |
21401 | return zone; | |
21402 | } | |
21403 | +EXPORT_SYMBOL_GPL(next_zone); | |
21404 | ||
21405 | static inline int zref_in_nodemask(struct zoneref *zref, nodemask_t *nodes) | |
21406 | { | |
21407 | diff --git a/mm/page-writeback.c b/mm/page-writeback.c | |
85eb3c9d | 21408 | index 37498ef..2662851 100644 |
2380c486 JR |
21409 | --- a/mm/page-writeback.c |
21410 | +++ b/mm/page-writeback.c | |
7e46296a | 21411 | @@ -99,6 +99,7 @@ unsigned int dirty_expire_interval = 30 * 100; /* centiseconds */ |
2380c486 JR |
21412 | * Flag that makes the machine dump writes/reads and block dirtyings. |
21413 | */ | |
21414 | int block_dump; | |
21415 | +EXPORT_SYMBOL_GPL(block_dump); | |
21416 | ||
21417 | /* | |
21418 | * Flag that puts the machine in "laptop mode". Doubles as a timeout in jiffies: | |
21419 | diff --git a/mm/page_alloc.c b/mm/page_alloc.c | |
85eb3c9d | 21420 | index 9bd339e..fde5a73 100644 |
2380c486 JR |
21421 | --- a/mm/page_alloc.c |
21422 | +++ b/mm/page_alloc.c | |
5bd2511a | 21423 | @@ -2273,6 +2273,26 @@ static unsigned int nr_free_zone_pages(int offset) |
2380c486 JR |
21424 | return sum; |
21425 | } | |
21426 | ||
21427 | +static unsigned int nr_unallocated_zone_pages(int offset) | |
21428 | +{ | |
21429 | + struct zoneref *z; | |
21430 | + struct zone *zone; | |
21431 | + | |
21432 | + /* Just pick one node, since fallback list is circular */ | |
21433 | + unsigned int sum = 0; | |
21434 | + | |
21435 | + struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL); | |
21436 | + | |
21437 | + for_each_zone_zonelist(zone, z, zonelist, offset) { | |
92bca44c | 21438 | + unsigned long high = high_wmark_pages(zone); |
2380c486 JR |
21439 | + unsigned long left = zone_page_state(zone, NR_FREE_PAGES); |
21440 | + if (left > high) | |
21441 | + sum += left - high; | |
21442 | + } | |
21443 | + | |
21444 | + return sum; | |
21445 | +} | |
21446 | + | |
21447 | /* | |
21448 | * Amount of free RAM allocatable within ZONE_DMA and ZONE_NORMAL | |
21449 | */ | |
5bd2511a | 21450 | @@ -2283,6 +2303,15 @@ unsigned int nr_free_buffer_pages(void) |
2380c486 JR |
21451 | EXPORT_SYMBOL_GPL(nr_free_buffer_pages); |
21452 | ||
21453 | /* | |
21454 | + * Amount of free RAM allocatable within ZONE_DMA and ZONE_NORMAL | |
21455 | + */ | |
21456 | +unsigned int nr_unallocated_buffer_pages(void) | |
21457 | +{ | |
21458 | + return nr_unallocated_zone_pages(gfp_zone(GFP_USER)); | |
21459 | +} | |
21460 | +EXPORT_SYMBOL_GPL(nr_unallocated_buffer_pages); | |
21461 | + | |
21462 | +/* | |
21463 | * Amount of free RAM allocatable within all zones | |
21464 | */ | |
21465 | unsigned int nr_free_pagecache_pages(void) | |
e999739a | 21466 | diff --git a/mm/shmem.c b/mm/shmem.c |
5bd2511a | 21467 | index f65f840..3024d35 100644 |
e999739a | 21468 | --- a/mm/shmem.c |
21469 | +++ b/mm/shmem.c | |
5bd2511a | 21470 | @@ -1568,6 +1568,8 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode |
e999739a | 21471 | memset(info, 0, (char *)inode - (char *)info); |
21472 | spin_lock_init(&info->lock); | |
21473 | info->flags = flags & VM_NORESERVE; | |
21474 | + if (flags & VM_ATOMIC_COPY) | |
21475 | + inode->i_flags |= S_ATOMIC_COPY; | |
21476 | INIT_LIST_HEAD(&info->swaplist); | |
92bca44c | 21477 | cache_no_acl(inode); |
e999739a | 21478 | |
e999739a | 21479 | diff --git a/mm/swap_state.c b/mm/swap_state.c |
de6743ae | 21480 | index e10f583..86bc26a 100644 |
e999739a | 21481 | --- a/mm/swap_state.c |
21482 | +++ b/mm/swap_state.c | |
de6743ae | 21483 | @@ -47,6 +47,7 @@ struct address_space swapper_space = { |
e999739a | 21484 | .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear), |
21485 | .backing_dev_info = &swap_backing_dev_info, | |
21486 | }; | |
21487 | +EXPORT_SYMBOL_GPL(swapper_space); | |
21488 | ||
21489 | #define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0) | |
21490 | ||
2380c486 | 21491 | diff --git a/mm/swapfile.c b/mm/swapfile.c |
85eb3c9d | 21492 | index f08d165..1126d05 100644 |
2380c486 JR |
21493 | --- a/mm/swapfile.c |
21494 | +++ b/mm/swapfile.c | |
5dd10c98 AM |
21495 | @@ -39,7 +39,6 @@ |
21496 | static bool swap_count_continued(struct swap_info_struct *, pgoff_t, | |
21497 | unsigned char); | |
21498 | static void free_swap_count_continuations(struct swap_info_struct *); | |
21499 | -static sector_t map_swap_entry(swp_entry_t, struct block_device**); | |
21500 | ||
21501 | static DEFINE_SPINLOCK(swap_lock); | |
21502 | static unsigned int nr_swapfiles; | |
85eb3c9d | 21503 | @@ -482,6 +481,7 @@ noswap: |
2380c486 JR |
21504 | spin_unlock(&swap_lock); |
21505 | return (swp_entry_t) {0}; | |
21506 | } | |
21507 | +EXPORT_SYMBOL_GPL(get_swap_page); | |
21508 | ||
92bca44c | 21509 | /* The only caller of this function is now susupend routine */ |
2380c486 | 21510 | swp_entry_t get_swap_page_of_type(int type) |
85eb3c9d | 21511 | @@ -504,6 +504,7 @@ swp_entry_t get_swap_page_of_type(int type) |
7e46296a AM |
21512 | spin_unlock(&swap_lock); |
21513 | return (swp_entry_t) {0}; | |
21514 | } | |
21515 | +EXPORT_SYMBOL_GPL(get_swap_page_of_type); | |
21516 | ||
5dd10c98 | 21517 | static struct swap_info_struct *swap_info_get(swp_entry_t entry) |
7e46296a | 21518 | { |
85eb3c9d | 21519 | @@ -628,6 +629,7 @@ void swapcache_free(swp_entry_t entry, struct page *page) |
5dd10c98 | 21520 | spin_unlock(&swap_lock); |
2380c486 JR |
21521 | } |
21522 | } | |
21523 | +EXPORT_SYMBOL_GPL(swap_free); | |
21524 | ||
21525 | /* | |
21526 | * How many references to page are currently swapped out? | |
85eb3c9d | 21527 | @@ -1304,7 +1306,7 @@ static void drain_mmlist(void) |
5dd10c98 AM |
21528 | * Note that the type of this function is sector_t, but it returns page offset |
21529 | * into the bdev, not sector offset. | |
21530 | */ | |
21531 | -static sector_t map_swap_entry(swp_entry_t entry, struct block_device **bdev) | |
21532 | +sector_t map_swap_entry(swp_entry_t entry, struct block_device **bdev) | |
21533 | { | |
21534 | struct swap_info_struct *sis; | |
21535 | struct swap_extent *start_se; | |
85eb3c9d | 21536 | @@ -1331,6 +1333,7 @@ static sector_t map_swap_entry(swp_entry_t entry, struct block_device **bdev) |
2380c486 JR |
21537 | BUG_ON(se == start_se); /* It *must* be present */ |
21538 | } | |
21539 | } | |
5dd10c98 | 21540 | +EXPORT_SYMBOL_GPL(map_swap_entry); |
2380c486 | 21541 | |
2380c486 | 21542 | /* |
5dd10c98 | 21543 | * Returns the page offset into bdev for the specified page's swap entry. |
85eb3c9d | 21544 | @@ -1673,6 +1676,7 @@ out_dput: |
2380c486 JR |
21545 | out: |
21546 | return err; | |
21547 | } | |
21548 | +EXPORT_SYMBOL_GPL(sys_swapoff); | |
21549 | ||
21550 | #ifdef CONFIG_PROC_FS | |
21551 | /* iterator */ | |
85eb3c9d | 21552 | @@ -2102,6 +2106,7 @@ out: |
2380c486 JR |
21553 | } |
21554 | return error; | |
21555 | } | |
21556 | +EXPORT_SYMBOL_GPL(sys_swapon); | |
21557 | ||
21558 | void si_swapinfo(struct sysinfo *val) | |
21559 | { | |
85eb3c9d | 21560 | @@ -2119,6 +2124,7 @@ void si_swapinfo(struct sysinfo *val) |
2380c486 JR |
21561 | val->totalswap = total_swap_pages + nr_to_be_unused; |
21562 | spin_unlock(&swap_lock); | |
21563 | } | |
21564 | +EXPORT_SYMBOL_GPL(si_swapinfo); | |
21565 | ||
21566 | /* | |
21567 | * Verify that a swap entry is valid and increment its swap map count. | |
85eb3c9d | 21568 | @@ -2230,6 +2236,13 @@ int swapcache_prepare(swp_entry_t entry) |
5dd10c98 | 21569 | return __swap_duplicate(entry, SWAP_HAS_CACHE); |
2380c486 | 21570 | } |
2380c486 | 21571 | |
5dd10c98 AM |
21572 | + |
21573 | +struct swap_info_struct *get_swap_info_struct(unsigned type) | |
21574 | +{ | |
21575 | + return swap_info[type]; | |
21576 | +} | |
21577 | +EXPORT_SYMBOL_GPL(get_swap_info_struct); | |
21578 | + | |
2380c486 JR |
21579 | /* |
21580 | * swap_lock prevents swap_map being freed. Don't grab an extra | |
5dd10c98 | 21581 | * reference on the swaphandle, it doesn't matter if it becomes unused. |
2380c486 | 21582 | diff --git a/mm/vmscan.c b/mm/vmscan.c |
85eb3c9d | 21583 | index b94fe1b..2f3ff21 100644 |
2380c486 JR |
21584 | --- a/mm/vmscan.c |
21585 | +++ b/mm/vmscan.c | |
85eb3c9d | 21586 | @@ -2337,6 +2337,9 @@ void wakeup_kswapd(struct zone *zone, int order) |
2380c486 JR |
21587 | if (!populated_zone(zone)) |
21588 | return; | |
21589 | ||
21590 | + if (freezer_is_on()) | |
21591 | + return; | |
21592 | + | |
21593 | pgdat = zone->zone_pgdat; | |
92bca44c | 21594 | if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0)) |
2380c486 | 21595 | return; |
85eb3c9d | 21596 | @@ -2393,11 +2396,11 @@ unsigned long zone_reclaimable_pages(struct zone *zone) |
de6743ae AM |
21597 | * LRU order by reclaiming preferentially |
21598 | * inactive > active > active referenced > active mapped | |
21599 | */ | |
21600 | -unsigned long shrink_all_memory(unsigned long nr_to_reclaim) | |
21601 | +unsigned long shrink_memory_mask(unsigned long nr_to_reclaim, gfp_t mask) | |
21602 | { | |
21603 | struct reclaim_state reclaim_state; | |
21604 | struct scan_control sc = { | |
21605 | - .gfp_mask = GFP_HIGHUSER_MOVABLE, | |
21606 | + .gfp_mask = mask, | |
21607 | .may_swap = 1, | |
21608 | .may_unmap = 1, | |
21609 | .may_writepage = 1, | |
85eb3c9d | 21610 | @@ -2423,6 +2426,13 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) |
2380c486 | 21611 | |
5dd10c98 | 21612 | return nr_reclaimed; |
2380c486 | 21613 | } |
de6743ae AM |
21614 | +EXPORT_SYMBOL_GPL(shrink_memory_mask); |
21615 | + | |
21616 | +unsigned long shrink_all_memory(unsigned long nr_to_reclaim) | |
21617 | +{ | |
21618 | + return shrink_memory_mask(nr_to_reclaim, GFP_HIGHUSER_MOVABLE); | |
21619 | +} | |
2380c486 | 21620 | +EXPORT_SYMBOL_GPL(shrink_all_memory); |
92bca44c | 21621 | #endif /* CONFIG_HIBERNATION */ |
2380c486 JR |
21622 | |
21623 | /* It's optimal to keep kswapds on the same CPUs as their memory, but |