]>
Commit | Line | Data |
---|---|---|
2380c486 JR |
1 | diff --git a/Documentation/power/tuxonice-internals.txt b/Documentation/power/tuxonice-internals.txt |
2 | new file mode 100644 | |
e999739a | 3 | index 0000000..7a96186 |
2380c486 JR |
4 | --- /dev/null |
5 | +++ b/Documentation/power/tuxonice-internals.txt | |
e999739a | 6 | @@ -0,0 +1,477 @@ |
2380c486 | 7 | + TuxOnIce 3.0 Internal Documentation. |
e999739a | 8 | + Updated to 26 March 2009 |
2380c486 JR |
9 | + |
10 | +1. Introduction. | |
11 | + | |
12 | + TuxOnIce 3.0 is an addition to the Linux Kernel, designed to | |
13 | + allow the user to quickly shutdown and quickly boot a computer, without | |
14 | + needing to close documents or programs. It is equivalent to the | |
15 | + hibernate facility in some laptops. This implementation, however, | |
16 | + requires no special BIOS or hardware support. | |
17 | + | |
18 | + The code in these files is based upon the original implementation | |
19 | + prepared by Gabor Kuti and additional work by Pavel Machek and a | |
20 | + host of others. This code has been substantially reworked by Nigel | |
21 | + Cunningham, again with the help and testing of many others, not the | |
22 | + least of whom is Michael Frank. At its heart, however, the operation is | |
23 | + essentially the same as Gabor's version. | |
24 | + | |
25 | +2. Overview of operation. | |
26 | + | |
27 | + The basic sequence of operations is as follows: | |
28 | + | |
29 | + a. Quiesce all other activity. | |
30 | + b. Ensure enough memory and storage space are available, and attempt | |
31 | + to free memory/storage if necessary. | |
32 | + c. Allocate the required memory and storage space. | |
33 | + d. Write the image. | |
34 | + e. Power down. | |
35 | + | |
36 | + There are a number of complicating factors which mean that things are | |
37 | + not as simple as the above would imply, however... | |
38 | + | |
39 | + o The activity of each process must be stopped at a point where it will | |
40 | + not be holding locks necessary for saving the image, or unexpectedly | |
41 | + restart operations due to something like a timeout and thereby make | |
42 | + our image inconsistent. | |
43 | + | |
44 | + o It is desirous that we sync outstanding I/O to disk before calculating | |
45 | + image statistics. This reduces corruption if one should suspend but | |
46 | + then not resume, and also makes later parts of the operation safer (see | |
47 | + below). | |
48 | + | |
49 | + o We need to get as close as we can to an atomic copy of the data. | |
50 | + Inconsistencies in the image will result in inconsistent memory contents at | |
51 | + resume time, and thus in instability of the system and/or file system | |
52 | + corruption. This would appear to imply a maximum image size of one half of | |
53 | + the amount of RAM, but we have a solution... (again, below). | |
54 | + | |
55 | + o In 2.6, we choose to play nicely with the other suspend-to-disk | |
56 | + implementations. | |
57 | + | |
58 | +3. Detailed description of internals. | |
59 | + | |
60 | + a. Quiescing activity. | |
61 | + | |
62 | + Safely quiescing the system is achieved using three separate but related | |
63 | + aspects. | |
64 | + | |
65 | + First, we note that the vast majority of processes don't need to run during | |
66 | + suspend. They can be 'frozen'. We therefore implement a refrigerator | |
67 | + routine, which processes enter and in which they remain until the cycle is | |
68 | + complete. Processes enter the refrigerator via try_to_freeze() invocations | |
69 | + at appropriate places. A process cannot be frozen in any old place. It | |
70 | + must not be holding locks that will be needed for writing the image or | |
71 | + freezing other processes. For this reason, userspace processes generally | |
72 | + enter the refrigerator via the signal handling code, and kernel threads at | |
73 | + the place in their event loops where they drop locks and yield to other | |
74 | + processes or sleep. | |
75 | + | |
76 | + The task of freezing processes is complicated by the fact that there can be | |
77 | + interdependencies between processes. Freezing process A before process B may | |
78 | + mean that process B cannot be frozen, because it stops at waiting for | |
79 | + process A rather than in the refrigerator. This issue is seen where | |
80 | + userspace waits on freezeable kernel threads or fuse filesystem threads. To | |
81 | + address this issue, we implement the following algorithm for quiescing | |
82 | + activity: | |
83 | + | |
84 | + - Freeze filesystems (including fuse - userspace programs starting | |
85 | + new requests are immediately frozen; programs already running | |
86 | + requests complete their work before being frozen in the next | |
87 | + step) | |
88 | + - Freeze userspace | |
89 | + - Thaw filesystems (this is safe now that userspace is frozen and no | |
90 | + fuse requests are outstanding). | |
91 | + - Invoke sys_sync (noop on fuse). | |
92 | + - Freeze filesystems | |
93 | + - Freeze kernel threads | |
94 | + | |
95 | + If we need to free memory, we thaw kernel threads and filesystems, but not | |
96 | + userspace. We can then free caches without worrying about deadlocks due to | |
97 | + swap files being on frozen filesystems or such like. | |
98 | + | |
99 | + b. Ensure enough memory & storage are available. | |
100 | + | |
101 | + We have a number of constraints to meet in order to be able to successfully | |
102 | + suspend and resume. | |
103 | + | |
104 | + First, the image will be written in two parts, described below. One of these | |
105 | + parts needs to have an atomic copy made, which of course implies a maximum | |
106 | + size of one half of the amount of system memory. The other part ('pageset') | |
107 | + is not atomically copied, and can therefore be as large or small as desired. | |
108 | + | |
109 | + Second, we have constraints on the amount of storage available. In these | |
110 | + calculations, we may also consider any compression that will be done. The | |
111 | + cryptoapi module allows the user to configure an expected compression ratio. | |
112 | + | |
113 | + Third, the user can specify an arbitrary limit on the image size, in | |
114 | + megabytes. This limit is treated as a soft limit, so that we don't fail the | |
115 | + attempt to suspend if we cannot meet this constraint. | |
116 | + | |
117 | + c. Allocate the required memory and storage space. | |
118 | + | |
119 | + Having done the initial freeze, we determine whether the above constraints | |
120 | + are met, and seek to allocate the metadata for the image. If the constraints | |
121 | + are not met, or we fail to allocate the required space for the metadata, we | |
122 | + seek to free the amount of memory that we calculate is needed and try again. | |
123 | + We allow up to four iterations of this loop before aborting the cycle. If we | |
124 | + do fail, it should only be because of a bug in TuxOnIce's calculations. | |
125 | + | |
126 | + These steps are merged together in the prepare_image function, found in | |
127 | + prepare_image.c. The functions are merged because of the cyclical nature | |
128 | + of the problem of calculating how much memory and storage is needed. Since | |
129 | + the data structures containing the information about the image must | |
130 | + themselves take memory and use storage, the amount of memory and storage | |
131 | + required changes as we prepare the image. Since the changes are not large, | |
132 | + only one or two iterations will be required to achieve a solution. | |
133 | + | |
134 | + The recursive nature of the algorithm is miminised by keeping user space | |
135 | + frozen while preparing the image, and by the fact that our records of which | |
136 | + pages are to be saved and which pageset they are saved in use bitmaps (so | |
137 | + that changes in number or fragmentation of the pages to be saved don't | |
138 | + feedback via changes in the amount of memory needed for metadata). The | |
139 | + recursiveness is thus limited to any extra slab pages allocated to store the | |
140 | + extents that record storage used, and the effects of seeking to free memory. | |
141 | + | |
142 | + d. Write the image. | |
143 | + | |
144 | + We previously mentioned the need to create an atomic copy of the data, and | |
145 | + the half-of-memory limitation that is implied in this. This limitation is | |
146 | + circumvented by dividing the memory to be saved into two parts, called | |
147 | + pagesets. | |
148 | + | |
e999739a | 149 | + Pageset2 contains most of the page cache - the pages on the active and |
150 | + inactive LRU lists that aren't needed or modified while TuxOnIce is | |
151 | + running, so they can be safely written without an atomic copy. They are | |
152 | + therefore saved first and reloaded last. While saving these pages, | |
153 | + TuxOnIce carefully ensures that the work of writing the pages doesn't make | |
154 | + the image inconsistent. With the support for Kernel (Video) Mode Setting | |
155 | + going into the kernel at the time of writing, we need to check for pages | |
156 | + on the LRU that are used by KMS, and exclude them from pageset2. They are | |
157 | + atomically copied as part of pageset 1. | |
2380c486 JR |
158 | + |
159 | + Once pageset2 has been saved, we prepare to do the atomic copy of remaining | |
160 | + memory. As part of the preparation, we power down drivers, thereby providing | |
161 | + them with the opportunity to have their state recorded in the image. The | |
162 | + amount of memory allocated by drivers for this is usually negligible, but if | |
163 | + DRI is in use, video drivers may require significants amounts. Ideally we | |
164 | + would be able to query drivers while preparing the image as to the amount of | |
165 | + memory they will need. Unfortunately no such mechanism exists at the time of | |
166 | + writing. For this reason, TuxOnIce allows the user to set an | |
167 | + 'extra_pages_allowance', which is used to seek to ensure sufficient memory | |
168 | + is available for drivers at this point. TuxOnIce also lets the user set this | |
169 | + value to 0. In this case, a test driver suspend is done while preparing the | |
e999739a | 170 | + image, and the difference (plus a margin) used instead. TuxOnIce will also |
171 | + automatically restart the hibernation process (twice at most) if it finds | |
172 | + that the extra pages allowance is not sufficient. It will then use what was | |
173 | + actually needed (plus a margin, again). Failure to hibernate should thus | |
174 | + be an extremely rare occurence. | |
2380c486 JR |
175 | + |
176 | + Having suspended the drivers, we save the CPU context before making an | |
177 | + atomic copy of pageset1, resuming the drivers and saving the atomic copy. | |
178 | + After saving the two pagesets, we just need to save our metadata before | |
179 | + powering down. | |
180 | + | |
181 | + As we mentioned earlier, the contents of pageset2 pages aren't needed once | |
182 | + they've been saved. We therefore use them as the destination of our atomic | |
183 | + copy. In the unlikely event that pageset1 is larger, extra pages are | |
184 | + allocated while the image is being prepared. This is normally only a real | |
185 | + possibility when the system has just been booted and the page cache is | |
186 | + small. | |
187 | + | |
188 | + This is where we need to be careful about syncing, however. Pageset2 will | |
189 | + probably contain filesystem meta data. If this is overwritten with pageset1 | |
190 | + and then a sync occurs, the filesystem will be corrupted - at least until | |
191 | + resume time and another sync of the restored data. Since there is a | |
192 | + possibility that the user might not resume or (may it never be!) that | |
e999739a | 193 | + TuxOnIce might oops, we do our utmost to avoid syncing filesystems after |
2380c486 JR |
194 | + copying pageset1. |
195 | + | |
196 | + e. Power down. | |
197 | + | |
198 | + Powering down uses standard kernel routines. TuxOnIce supports powering down | |
199 | + using the ACPI S3, S4 and S5 methods or the kernel's non-ACPI power-off. | |
200 | + Supporting suspend to ram (S3) as a power off option might sound strange, | |
201 | + but it allows the user to quickly get their system up and running again if | |
202 | + the battery doesn't run out (we just need to re-read the overwritten pages) | |
203 | + and if the battery does run out (or the user removes power), they can still | |
204 | + resume. | |
205 | + | |
206 | +4. Data Structures. | |
207 | + | |
208 | + TuxOnIce uses three main structures to store its metadata and configuration | |
209 | + information: | |
210 | + | |
211 | + a) Pageflags bitmaps. | |
212 | + | |
213 | + TuxOnIce records which pages will be in pageset1, pageset2, the destination | |
214 | + of the atomic copy and the source of the atomically restored image using | |
e999739a | 215 | + bitmaps. The code used is that written for swsusp, with small improvements |
216 | + to match TuxOnIce's requirements. | |
2380c486 JR |
217 | + |
218 | + The pageset1 bitmap is thus easily stored in the image header for use at | |
219 | + resume time. | |
220 | + | |
221 | + As mentioned above, using bitmaps also means that the amount of memory and | |
222 | + storage required for recording the above information is constant. This | |
223 | + greatly simplifies the work of preparing the image. In earlier versions of | |
224 | + TuxOnIce, extents were used to record which pages would be stored. In that | |
225 | + case, however, eating memory could result in greater fragmentation of the | |
226 | + lists of pages, which in turn required more memory to store the extents and | |
227 | + more storage in the image header. These could in turn require further | |
228 | + freeing of memory, and another iteration. All of this complexity is removed | |
229 | + by having bitmaps. | |
230 | + | |
231 | + Bitmaps also make a lot of sense because TuxOnIce only ever iterates | |
232 | + through the lists. There is therefore no cost to not being able to find the | |
233 | + nth page in order 0 time. We only need to worry about the cost of finding | |
234 | + the n+1th page, given the location of the nth page. Bitwise optimisations | |
235 | + help here. | |
236 | + | |
2380c486 JR |
237 | + b) Extents for block data. |
238 | + | |
239 | + TuxOnIce supports writing the image to multiple block devices. In the case | |
240 | + of swap, multiple partitions and/or files may be in use, and we happily use | |
e999739a | 241 | + them all (with the exception of compcache pages, which we allocate but do |
242 | + not use). This use of multiple block devices is accomplished as follows: | |
2380c486 JR |
243 | + |
244 | + Whatever the actual source of the allocated storage, the destination of the | |
245 | + image can be viewed in terms of one or more block devices, and on each | |
246 | + device, a list of sectors. To simplify matters, we only use contiguous, | |
247 | + PAGE_SIZE aligned sectors, like the swap code does. | |
248 | + | |
249 | + Since sector numbers on each bdev may well not start at 0, it makes much | |
250 | + more sense to use extents here. Contiguous ranges of pages can thus be | |
251 | + represented in the extents by contiguous values. | |
252 | + | |
253 | + Variations in block size are taken account of in transforming this data | |
254 | + into the parameters for bio submission. | |
255 | + | |
256 | + We can thus implement a layer of abstraction wherein the core of TuxOnIce | |
257 | + doesn't have to worry about which device we're currently writing to or | |
258 | + where in the device we are. It simply requests that the next page in the | |
259 | + pageset or header be written, leaving the details to this lower layer. | |
260 | + The lower layer remembers where in the sequence of devices and blocks each | |
261 | + pageset starts. The header always starts at the beginning of the allocated | |
262 | + storage. | |
263 | + | |
264 | + So extents are: | |
265 | + | |
266 | + struct extent { | |
267 | + unsigned long minimum, maximum; | |
268 | + struct extent *next; | |
269 | + } | |
270 | + | |
271 | + These are combined into chains of extents for a device: | |
272 | + | |
273 | + struct extent_chain { | |
274 | + int size; /* size of the extent ie sum (max-min+1) */ | |
275 | + int allocs, frees; | |
276 | + char *name; | |
277 | + struct extent *first, *last_touched; | |
278 | + }; | |
279 | + | |
280 | + For each bdev, we need to store a little more info: | |
281 | + | |
282 | + struct suspend_bdev_info { | |
283 | + struct block_device *bdev; | |
284 | + dev_t dev_t; | |
285 | + int bmap_shift; | |
286 | + int blocks_per_page; | |
287 | + }; | |
288 | + | |
289 | + The dev_t is used to identify the device in the stored image. As a result, | |
290 | + we expect devices at resume time to have the same major and minor numbers | |
291 | + as they had while suspending. This is primarily a concern where the user | |
292 | + utilises LVM for storage, as they will need to dmsetup their partitions in | |
293 | + such a way as to maintain this consistency at resume time. | |
294 | + | |
e999739a | 295 | + bmap_shift and blocks_per_page apply the effects of variations in blocks |
296 | + per page settings for the filesystem and underlying bdev. For most | |
2380c486 JR |
297 | + filesystems, these are the same, but for xfs, they can have independant |
298 | + values. | |
299 | + | |
300 | + Combining these two structures together, we have everything we need to | |
301 | + record what devices and what blocks on each device are being used to | |
302 | + store the image, and to submit i/o using bio_submit. | |
303 | + | |
304 | + The last elements in the picture are a means of recording how the storage | |
305 | + is being used. | |
306 | + | |
307 | + We do this first and foremost by implementing a layer of abstraction on | |
308 | + top of the devices and extent chains which allows us to view however many | |
309 | + devices there might be as one long storage tape, with a single 'head' that | |
310 | + tracks a 'current position' on the tape: | |
311 | + | |
312 | + struct extent_iterate_state { | |
313 | + struct extent_chain *chains; | |
314 | + int num_chains; | |
315 | + int current_chain; | |
316 | + struct extent *current_extent; | |
317 | + unsigned long current_offset; | |
318 | + }; | |
319 | + | |
320 | + That is, *chains points to an array of size num_chains of extent chains. | |
321 | + For the filewriter, this is always a single chain. For the swapwriter, the | |
322 | + array is of size MAX_SWAPFILES. | |
323 | + | |
324 | + current_chain, current_extent and current_offset thus point to the current | |
325 | + index in the chains array (and into a matching array of struct | |
326 | + suspend_bdev_info), the current extent in that chain (to optimise access), | |
327 | + and the current value in the offset. | |
328 | + | |
329 | + The image is divided into three parts: | |
330 | + - The header | |
331 | + - Pageset 1 | |
332 | + - Pageset 2 | |
333 | + | |
334 | + The header always starts at the first device and first block. We know its | |
335 | + size before we begin to save the image because we carefully account for | |
336 | + everything that will be stored in it. | |
337 | + | |
338 | + The second pageset (LRU) is stored first. It begins on the next page after | |
339 | + the end of the header. | |
340 | + | |
341 | + The first pageset is stored second. It's start location is only known once | |
342 | + pageset2 has been saved, since pageset2 may be compressed as it is written. | |
343 | + This location is thus recorded at the end of saving pageset2. It is page | |
344 | + aligned also. | |
345 | + | |
346 | + Since this information is needed at resume time, and the location of extents | |
347 | + in memory will differ at resume time, this needs to be stored in a portable | |
348 | + way: | |
349 | + | |
350 | + struct extent_iterate_saved_state { | |
351 | + int chain_num; | |
352 | + int extent_num; | |
353 | + unsigned long offset; | |
354 | + }; | |
355 | + | |
356 | + We can thus implement a layer of abstraction wherein the core of TuxOnIce | |
357 | + doesn't have to worry about which device we're currently writing to or | |
358 | + where in the device we are. It simply requests that the next page in the | |
359 | + pageset or header be written, leaving the details to this layer, and | |
360 | + invokes the routines to remember and restore the position, without having | |
361 | + to worry about the details of how the data is arranged on disk or such like. | |
362 | + | |
363 | + c) Modules | |
364 | + | |
365 | + One aim in designing TuxOnIce was to make it flexible. We wanted to allow | |
366 | + for the implementation of different methods of transforming a page to be | |
367 | + written to disk and different methods of getting the pages stored. | |
368 | + | |
369 | + In early versions (the betas and perhaps Suspend1), compression support was | |
370 | + inlined in the image writing code, and the data structures and code for | |
371 | + managing swap were intertwined with the rest of the code. A number of people | |
372 | + had expressed interest in implementing image encryption, and alternative | |
373 | + methods of storing the image. | |
374 | + | |
375 | + In order to achieve this, TuxOnIce was given a modular design. | |
376 | + | |
377 | + A module is a single file which encapsulates the functionality needed | |
378 | + to transform a pageset of data (encryption or compression, for example), | |
379 | + or to write the pageset to a device. The former type of module is called | |
380 | + a 'page-transformer', the later a 'writer'. | |
381 | + | |
382 | + Modules are linked together in pipeline fashion. There may be zero or more | |
383 | + page transformers in a pipeline, and there is always exactly one writer. | |
384 | + The pipeline follows this pattern: | |
385 | + | |
386 | + --------------------------------- | |
387 | + | TuxOnIce Core | | |
388 | + --------------------------------- | |
389 | + | | |
390 | + | | |
391 | + --------------------------------- | |
392 | + | Page transformer 1 | | |
393 | + --------------------------------- | |
394 | + | | |
395 | + | | |
396 | + --------------------------------- | |
397 | + | Page transformer 2 | | |
398 | + --------------------------------- | |
399 | + | | |
400 | + | | |
401 | + --------------------------------- | |
402 | + | Writer | | |
403 | + --------------------------------- | |
404 | + | |
405 | + During the writing of an image, the core code feeds pages one at a time | |
406 | + to the first module. This module performs whatever transformations it | |
407 | + implements on the incoming data, completely consuming the incoming data and | |
e999739a | 408 | + feeding output in a similar manner to the next module. |
2380c486 JR |
409 | + |
410 | + All routines are SMP safe, and the final result of the transformations is | |
411 | + written with an index (provided by the core) and size of the output by the | |
412 | + writer. As a result, we can have multithreaded I/O without needing to | |
413 | + worry about the sequence in which pages are written (or read). | |
414 | + | |
415 | + During reading, the pipeline works in the reverse direction. The core code | |
416 | + calls the first module with the address of a buffer which should be filled. | |
417 | + (Note that the buffer size is always PAGE_SIZE at this time). This module | |
418 | + will in turn request data from the next module and so on down until the | |
419 | + writer is made to read from the stored image. | |
420 | + | |
421 | + Part of definition of the structure of a module thus looks like this: | |
422 | + | |
423 | + int (*rw_init) (int rw, int stream_number); | |
424 | + int (*rw_cleanup) (int rw); | |
425 | + int (*write_chunk) (struct page *buffer_page); | |
426 | + int (*read_chunk) (struct page *buffer_page, int sync); | |
427 | + | |
428 | + It should be noted that the _cleanup routine may be called before the | |
429 | + full stream of data has been read or written. While writing the image, | |
430 | + the user may (depending upon settings) choose to abort suspending, and | |
431 | + if we are in the midst of writing the last portion of the image, a portion | |
432 | + of the second pageset may be reread. This may also happen if an error | |
433 | + occurs and we seek to abort the process of writing the image. | |
434 | + | |
435 | + The modular design is also useful in a number of other ways. It provides | |
436 | + a means where by we can add support for: | |
437 | + | |
438 | + - providing overall initialisation and cleanup routines; | |
439 | + - serialising configuration information in the image header; | |
440 | + - providing debugging information to the user; | |
441 | + - determining memory and image storage requirements; | |
442 | + - dis/enabling components at run-time; | |
443 | + - configuring the module (see below); | |
444 | + | |
445 | + ...and routines for writers specific to their work: | |
446 | + - Parsing a resume= location; | |
447 | + - Determining whether an image exists; | |
448 | + - Marking a resume as having been attempted; | |
449 | + - Invalidating an image; | |
450 | + | |
451 | + Since some parts of the core - the user interface and storage manager | |
452 | + support - have use for some of these functions, they are registered as | |
453 | + 'miscellaneous' modules as well. | |
454 | + | |
455 | + d) Sysfs data structures. | |
456 | + | |
457 | + This brings us naturally to support for configuring TuxOnIce. We desired to | |
458 | + provide a way to make TuxOnIce as flexible and configurable as possible. | |
e999739a | 459 | + The user shouldn't have to reboot just because they want to now hibernate to |
2380c486 JR |
460 | + a file instead of a partition, for example. |
461 | + | |
462 | + To accomplish this, TuxOnIce implements a very generic means whereby the | |
463 | + core and modules can register new sysfs entries. All TuxOnIce entries use | |
e999739a | 464 | + a single _store and _show routine, both of which are found in |
465 | + tuxonice_sysfs.c in the kernel/power directory. These routines handle the | |
466 | + most common operations - getting and setting the values of bits, integers, | |
467 | + longs, unsigned longs and strings in one place, and allow overrides for | |
468 | + customised get and set options as well as side-effect routines for all | |
469 | + reads and writes. | |
2380c486 JR |
470 | + |
471 | + When combined with some simple macros, a new sysfs entry can then be defined | |
472 | + in just a couple of lines: | |
473 | + | |
e999739a | 474 | + SYSFS_INT("progress_granularity", SYSFS_RW, &progress_granularity, 1, |
475 | + 2048, 0, NULL), | |
2380c486 JR |
476 | + |
477 | + This defines a sysfs entry named "progress_granularity" which is rw and | |
478 | + allows the user to access an integer stored at &progress_granularity, giving | |
479 | + it a value between 1 and 2048 inclusive. | |
480 | + | |
481 | + Sysfs entries are registered under /sys/power/tuxonice, and entries for | |
482 | + modules are located in a subdirectory named after the module. | |
483 | + | |
484 | diff --git a/Documentation/power/tuxonice.txt b/Documentation/power/tuxonice.txt | |
485 | new file mode 100644 | |
9474138d | 486 | index 0000000..8900b45 |
2380c486 JR |
487 | --- /dev/null |
488 | +++ b/Documentation/power/tuxonice.txt | |
9474138d | 489 | @@ -0,0 +1,948 @@ |
2380c486 JR |
490 | + --- TuxOnIce, version 3.0 --- |
491 | + | |
492 | +1. What is it? | |
493 | +2. Why would you want it? | |
494 | +3. What do you need to use it? | |
495 | +4. Why not just use the version already in the kernel? | |
496 | +5. How do you use it? | |
497 | +6. What do all those entries in /sys/power/tuxonice do? | |
498 | +7. How do you get support? | |
499 | +8. I think I've found a bug. What should I do? | |
500 | +9. When will XXX be supported? | |
501 | +10 How does it work? | |
502 | +11. Who wrote TuxOnIce? | |
503 | + | |
504 | +1. What is it? | |
505 | + | |
506 | + Imagine you're sitting at your computer, working away. For some reason, you | |
507 | + need to turn off your computer for a while - perhaps it's time to go home | |
508 | + for the day. When you come back to your computer next, you're going to want | |
509 | + to carry on where you left off. Now imagine that you could push a button and | |
510 | + have your computer store the contents of its memory to disk and power down. | |
511 | + Then, when you next start up your computer, it loads that image back into | |
512 | + memory and you can carry on from where you were, just as if you'd never | |
513 | + turned the computer off. You have far less time to start up, no reopening of | |
514 | + applications or finding what directory you put that file in yesterday. | |
515 | + That's what TuxOnIce does. | |
516 | + | |
517 | + TuxOnIce has a long heritage. It began life as work by Gabor Kuti, who, | |
518 | + with some help from Pavel Machek, got an early version going in 1999. The | |
519 | + project was then taken over by Florent Chabaud while still in alpha version | |
520 | + numbers. Nigel Cunningham came on the scene when Florent was unable to | |
521 | + continue, moving the project into betas, then 1.0, 2.0 and so on up to | |
522 | + the present series. During the 2.0 series, the name was contracted to | |
523 | + Suspend2 and the website suspend2.net created. Beginning around July 2007, | |
524 | + a transition to calling the software TuxOnIce was made, to seek to help | |
525 | + make it clear that TuxOnIce is more concerned with hibernation than suspend | |
526 | + to ram. | |
527 | + | |
528 | + Pavel Machek's swsusp code, which was merged around 2.5.17 retains the | |
529 | + original name, and was essentially a fork of the beta code until Rafael | |
530 | + Wysocki came on the scene in 2005 and began to improve it further. | |
531 | + | |
532 | +2. Why would you want it? | |
533 | + | |
534 | + Why wouldn't you want it? | |
535 | + | |
536 | + Being able to save the state of your system and quickly restore it improves | |
537 | + your productivity - you get a useful system in far less time than through | |
538 | + the normal boot process. You also get to be completely 'green', using zero | |
539 | + power, or as close to that as possible (the computer may still provide | |
540 | + minimal power to some devices, so they can initiate a power on, but that | |
541 | + will be the same amount of power as would be used if you told the computer | |
542 | + to shutdown. | |
543 | + | |
544 | +3. What do you need to use it? | |
545 | + | |
546 | + a. Kernel Support. | |
547 | + | |
548 | + i) The TuxOnIce patch. | |
549 | + | |
550 | + TuxOnIce is part of the Linux Kernel. This version is not part of Linus's | |
551 | + 2.6 tree at the moment, so you will need to download the kernel source and | |
552 | + apply the latest patch. Having done that, enable the appropriate options in | |
553 | + make [menu|x]config (under Power Management Options - look for "Enhanced | |
554 | + Hibernation"), compile and install your kernel. TuxOnIce works with SMP, | |
555 | + Highmem, preemption, fuse filesystems, x86-32, PPC and x86_64. | |
556 | + | |
557 | + TuxOnIce patches are available from http://tuxonice.net. | |
558 | + | |
559 | + ii) Compression support. | |
560 | + | |
561 | + Compression support is implemented via the cryptoapi. You will therefore want | |
562 | + to select any Cryptoapi transforms that you want to use on your image from | |
9474138d AM |
563 | + the Cryptoapi menu while configuring your kernel. We recommend the use of the |
564 | + LZO compression method - it is very fast and still achieves good compression. | |
2380c486 | 565 | + |
9474138d | 566 | + You can also tell TuxOnIce to write its image to an encrypted and/or |
2380c486 JR |
567 | + compressed filesystem/swap partition. In that case, you don't need to do |
568 | + anything special for TuxOnIce when it comes to kernel configuration. | |
569 | + | |
570 | + iii) Configuring other options. | |
571 | + | |
572 | + While you're configuring your kernel, try to configure as much as possible | |
573 | + to build as modules. We recommend this because there are a number of drivers | |
574 | + that are still in the process of implementing proper power management | |
575 | + support. In those cases, the best way to work around their current lack is | |
576 | + to build them as modules and remove the modules while hibernating. You might | |
577 | + also bug the driver authors to get their support up to speed, or even help! | |
578 | + | |
579 | + b. Storage. | |
580 | + | |
581 | + i) Swap. | |
582 | + | |
583 | + TuxOnIce can store the hibernation image in your swap partition, a swap file or | |
584 | + a combination thereof. Whichever combination you choose, you will probably | |
585 | + want to create enough swap space to store the largest image you could have, | |
586 | + plus the space you'd normally use for swap. A good rule of thumb would be | |
587 | + to calculate the amount of swap you'd want without using TuxOnIce, and then | |
588 | + add the amount of memory you have. This swapspace can be arranged in any way | |
589 | + you'd like. It can be in one partition or file, or spread over a number. The | |
590 | + only requirement is that they be active when you start a hibernation cycle. | |
591 | + | |
592 | + There is one exception to this requirement. TuxOnIce has the ability to turn | |
593 | + on one swap file or partition at the start of hibernating and turn it back off | |
594 | + at the end. If you want to ensure you have enough memory to store a image | |
595 | + when your memory is fully used, you might want to make one swap partition or | |
596 | + file for 'normal' use, and another for TuxOnIce to activate & deactivate | |
597 | + automatically. (Further details below). | |
598 | + | |
599 | + ii) Normal files. | |
600 | + | |
601 | + TuxOnIce includes a 'file allocator'. The file allocator can store your | |
602 | + image in a simple file. Since Linux has the concept of everything being a | |
603 | + file, this is more powerful than it initially sounds. If, for example, you | |
604 | + were to set up a network block device file, you could hibernate to a network | |
605 | + server. This has been tested and works to a point, but nbd itself isn't | |
606 | + stateless enough for our purposes. | |
607 | + | |
608 | + Take extra care when setting up the file allocator. If you just type | |
609 | + commands without thinking and then try to hibernate, you could cause | |
610 | + irreversible corruption on your filesystems! Make sure you have backups. | |
611 | + | |
612 | + Most people will only want to hibernate to a local file. To achieve that, do | |
613 | + something along the lines of: | |
614 | + | |
615 | + echo "TuxOnIce" > /hibernation-file | |
616 | + dd if=/dev/zero bs=1M count=512 >> hibernation-file | |
617 | + | |
618 | + This will create a 512MB file called /hibernation-file. To get TuxOnIce to use | |
619 | + it: | |
620 | + | |
621 | + echo /hibernation-file > /sys/power/tuxonice/file/target | |
622 | + | |
623 | + Then | |
624 | + | |
625 | + cat /sys/power/tuxonice/resume | |
626 | + | |
627 | + Put the results of this into your bootloader's configuration (see also step | |
628 | + C, below): | |
629 | + | |
630 | + ---EXAMPLE-ONLY-DON'T-COPY-AND-PASTE--- | |
631 | + # cat /sys/power/tuxonice/resume | |
632 | + file:/dev/hda2:0x1e001 | |
633 | + | |
634 | + In this example, we would edit the append= line of our lilo.conf|menu.lst | |
635 | + so that it included: | |
636 | + | |
637 | + resume=file:/dev/hda2:0x1e001 | |
638 | + ---EXAMPLE-ONLY-DON'T-COPY-AND-PASTE--- | |
639 | + | |
640 | + For those who are thinking 'Could I make the file sparse?', the answer is | |
641 | + 'No!'. At the moment, there is no way for TuxOnIce to fill in the holes in | |
642 | + a sparse file while hibernating. In the longer term (post merge!), I'd like | |
643 | + to change things so that the file could be dynamically resized and have | |
644 | + holes filled as needed. Right now, however, that's not possible and not a | |
645 | + priority. | |
646 | + | |
647 | + c. Bootloader configuration. | |
648 | + | |
649 | + Using TuxOnIce also requires that you add an extra parameter to | |
650 | + your lilo.conf or equivalent. Here's an example for a swap partition: | |
651 | + | |
652 | + append="resume=swap:/dev/hda1" | |
653 | + | |
654 | + This would tell TuxOnIce that /dev/hda1 is a swap partition you | |
655 | + have. TuxOnIce will use the swap signature of this partition as a | |
656 | + pointer to your data when you hibernate. This means that (in this example) | |
657 | + /dev/hda1 doesn't need to be _the_ swap partition where all of your data | |
658 | + is actually stored. It just needs to be a swap partition that has a | |
659 | + valid signature. | |
660 | + | |
661 | + You don't need to have a swap partition for this purpose. TuxOnIce | |
662 | + can also use a swap file, but usage is a little more complex. Having made | |
663 | + your swap file, turn it on and do | |
664 | + | |
665 | + cat /sys/power/tuxonice/swap/headerlocations | |
666 | + | |
667 | + (this assumes you've already compiled your kernel with TuxOnIce | |
668 | + support and booted it). The results of the cat command will tell you | |
669 | + what you need to put in lilo.conf: | |
670 | + | |
671 | + For swap partitions like /dev/hda1, simply use resume=/dev/hda1. | |
672 | + For swapfile `swapfile`, use resume=swap:/dev/hda2:0x242d. | |
673 | + | |
674 | + If the swapfile changes for any reason (it is moved to a different | |
675 | + location, it is deleted and recreated, or the filesystem is | |
676 | + defragmented) then you will have to check | |
677 | + /sys/power/tuxonice/swap/headerlocations for a new resume_block value. | |
678 | + | |
679 | + Once you've compiled and installed the kernel and adjusted your bootloader | |
680 | + configuration, you should only need to reboot for the most basic part | |
681 | + of TuxOnIce to be ready. | |
682 | + | |
683 | + If you only compile in the swap allocator, or only compile in the file | |
684 | + allocator, you don't need to add the "swap:" part of the resume= | |
685 | + parameters above. resume=/dev/hda2:0x242d will work just as well. If you | |
686 | + have compiled both and your storage is on swap, you can also use this | |
687 | + format (the swap allocator is the default allocator). | |
688 | + | |
689 | + When compiling your kernel, one of the options in the 'Power Management | |
690 | + Support' menu, just above the 'Enhanced Hibernation (TuxOnIce)' entry is | |
691 | + called 'Default resume partition'. This can be used to set a default value | |
692 | + for the resume= parameter. | |
693 | + | |
694 | + d. The hibernate script. | |
695 | + | |
696 | + Since the driver model in 2.6 kernels is still being developed, you may need | |
697 | + to do more than just configure TuxOnIce. Users of TuxOnIce usually start the | |
698 | + process via a script which prepares for the hibernation cycle, tells the | |
699 | + kernel to do its stuff and then restore things afterwards. This script might | |
700 | + involve: | |
701 | + | |
702 | + - Switching to a text console and back if X doesn't like the video card | |
703 | + status on resume. | |
704 | + - Un/reloading drivers that don't play well with hibernation. | |
705 | + | |
706 | + Note that you might not be able to unload some drivers if there are | |
707 | + processes using them. You might have to kill off processes that hold | |
708 | + devices open. Hint: if your X server accesses an USB mouse, doing a | |
709 | + 'chvt' to a text console releases the device and you can unload the | |
710 | + module. | |
711 | + | |
712 | + Check out the latest script (available on tuxonice.net). | |
713 | + | |
714 | + e. The userspace user interface. | |
715 | + | |
716 | + TuxOnIce has very limited support for displaying status if you only apply | |
717 | + the kernel patch - it can printk messages, but that is all. In addition, | |
718 | + some of the functions mentioned in this document (such as cancelling a cycle | |
719 | + or performing interactive debugging) are unavailable. To utilise these | |
720 | + functions, or simply get a nice display, you need the 'userui' component. | |
721 | + Userui comes in three flavours, usplash, fbsplash and text. Text should | |
722 | + work on any console. Usplash and fbsplash require the appropriate | |
723 | + (distro specific?) support. | |
724 | + | |
725 | + To utilise a userui, TuxOnIce just needs to be told where to find the | |
726 | + userspace binary: | |
727 | + | |
728 | + echo "/usr/local/sbin/tuxoniceui_fbsplash" > /sys/power/tuxonice/user_interface/program | |
729 | + | |
730 | + The hibernate script can do this for you, and a default value for this | |
731 | + setting can be configured when compiling the kernel. This path is also | |
732 | + stored in the image header, so if you have an initrd or initramfs, you can | |
733 | + use the userui during the first part of resuming (prior to the atomic | |
734 | + restore) by putting the binary in the same path in your initrd/ramfs. | |
735 | + Alternatively, you can put it in a different location and do an echo | |
736 | + similar to the above prior to the echo > do_resume. The value saved in the | |
737 | + image header will then be ignored. | |
738 | + | |
739 | +4. Why not just use the version already in the kernel? | |
740 | + | |
741 | + The version in the vanilla kernel has a number of drawbacks. The most | |
742 | + serious of these are: | |
743 | + - it has a maximum image size of 1/2 total memory; | |
744 | + - it doesn't allocate storage until after it has snapshotted memory. | |
745 | + This means that you can't be sure hibernating will work until you | |
746 | + see it start to write the image; | |
747 | + - it does not allow you to press escape to cancel a cycle; | |
748 | + - it does not allow you to press escape to cancel resuming; | |
749 | + - it does not allow you to automatically swapon a file when | |
750 | + starting a cycle; | |
751 | + - it does not allow you to use multiple swap partitions or files; | |
752 | + - it does not allow you to use ordinary files; | |
753 | + - it just invalidates an image and continues to boot if you | |
754 | + accidentally boot the wrong kernel after hibernating; | |
755 | + - it doesn't support any sort of nice display while hibernating; | |
756 | + - it is moving toward requiring that you have an initrd/initramfs | |
757 | + to ever have a hope of resuming (uswsusp). While uswsusp will | |
758 | + address some of the concerns above, it won't address all of them, | |
759 | + and will be more complicated to get set up; | |
760 | + - it doesn't have support for suspend-to-both (write a hibernation | |
761 | + image, then suspend to ram; I think this is known as ReadySafe | |
762 | + under M$). | |
763 | + | |
764 | +5. How do you use it? | |
765 | + | |
766 | + A hibernation cycle can be started directly by doing: | |
767 | + | |
768 | + echo > /sys/power/tuxonice/do_hibernate | |
769 | + | |
770 | + In practice, though, you'll probably want to use the hibernate script | |
771 | + to unload modules, configure the kernel the way you like it and so on. | |
772 | + In that case, you'd do (as root): | |
773 | + | |
774 | + hibernate | |
775 | + | |
776 | + See the hibernate script's man page for more details on the options it | |
777 | + takes. | |
778 | + | |
779 | + If you're using the text or splash user interface modules, one feature of | |
780 | + TuxOnIce that you might find useful is that you can press Escape at any time | |
781 | + during hibernating, and the process will be aborted. | |
782 | + | |
783 | + Due to the way hibernation works, this means you'll have your system back and | |
784 | + perfectly usable almost instantly. The only exception is when it's at the | |
785 | + very end of writing the image. Then it will need to reload a small (usually | |
786 | + 4-50MBs, depending upon the image characteristics) portion first. | |
787 | + | |
788 | + Likewise, when resuming, you can press escape and resuming will be aborted. | |
789 | + The computer will then powerdown again according to settings at that time for | |
790 | + the powerdown method or rebooting. | |
791 | + | |
792 | + You can change the settings for powering down while the image is being | |
793 | + written by pressing 'R' to toggle rebooting and 'O' to toggle between | |
794 | + suspending to ram and powering down completely). | |
795 | + | |
796 | + If you run into problems with resuming, adding the "noresume" option to | |
797 | + the kernel command line will let you skip the resume step and recover your | |
798 | + system. This option shouldn't normally be needed, because TuxOnIce modifies | |
799 | + the image header prior to the atomic restore, and will thus prompt you | |
800 | + if it detects that you've tried to resume an image before (this flag is | |
801 | + removed if you press Escape to cancel a resume, so you won't be prompted | |
802 | + then). | |
803 | + | |
804 | + Recent kernels (2.6.24 onwards) add support for resuming from a different | |
805 | + kernel to the one that was hibernated (thanks to Rafael for his work on | |
806 | + this - I've just embraced and enhanced the support for TuxOnIce). This | |
807 | + should further reduce the need for you to use the noresume option. | |
808 | + | |
809 | +6. What do all those entries in /sys/power/tuxonice do? | |
810 | + | |
811 | + /sys/power/tuxonice is the directory which contains files you can use to | |
812 | + tune and configure TuxOnIce to your liking. The exact contents of | |
813 | + the directory will depend upon the version of TuxOnIce you're | |
814 | + running and the options you selected at compile time. In the following | |
815 | + descriptions, names in brackets refer to compile time options. | |
816 | + (Note that they're all dependant upon you having selected CONFIG_TUXONICE | |
817 | + in the first place!). | |
818 | + | |
819 | + Since the values of these settings can open potential security risks, the | |
820 | + writeable ones are accessible only to the root user. You may want to | |
821 | + configure sudo to allow you to invoke your hibernate script as an ordinary | |
822 | + user. | |
823 | + | |
9474138d AM |
824 | + - alloc/failure_test |
825 | + | |
826 | + This debugging option provides a way of testing TuxOnIce's handling of | |
827 | + memory allocation failures. Each allocation type that TuxOnIce makes has | |
828 | + been given a unique number (see the source code). Echo the appropriate | |
829 | + number into this entry, and when TuxOnIce attempts to do that allocation, | |
830 | + it will pretend there was a failure and act accordingly. | |
831 | + | |
832 | + - alloc/find_max_mem_allocated | |
833 | + | |
834 | + This debugging option will cause TuxOnIce to find the maximum amount of | |
835 | + memory it used during a cycle, and report that information in debugging | |
836 | + information at the end of the cycle. | |
837 | + | |
838 | + - alt_resume_param | |
839 | + | |
840 | + Instead of powering down after writing a hibernation image, TuxOnIce | |
841 | + supports resuming from a different image. This entry lets you set the | |
842 | + location of the signature for that image (the resume= value you'd use | |
843 | + for it). Using an alternate image and keep_image mode, you can do things | |
844 | + like using an alternate image to power down an uninterruptible power | |
845 | + supply. | |
846 | + | |
847 | + - block_io/target_outstanding_io | |
848 | + | |
849 | + This value controls the amount of memory that the block I/O code says it | |
850 | + needs when the core code is calculating how much memory is needed for | |
851 | + hibernating and for resuming. It doesn't directly control the amount of | |
852 | + I/O that is submitted at any one time - that depends on the amount of | |
853 | + available memory (we may have more available than we asked for), the | |
854 | + throughput that is being achieved and the ability of the CPU to keep up | |
855 | + with disk throughput (particularly where we're compressing pages). | |
856 | + | |
2380c486 JR |
857 | + - checksum/enabled |
858 | + | |
859 | + Use cryptoapi hashing routines to verify that Pageset2 pages don't change | |
860 | + while we're saving the first part of the image, and to get any pages that | |
861 | + do change resaved in the atomic copy. This should normally not be needed, | |
862 | + but if you're seeing issues, please enable this. If your issues stop you | |
863 | + being able to resume, enable this option, hibernate and cancel the cycle | |
864 | + after the atomic copy is done. If the debugging info shows a non-zero | |
865 | + number of pages resaved, please report this to Nigel. | |
866 | + | |
867 | + - compression/algorithm | |
868 | + | |
869 | + Set the cryptoapi algorithm used for compressing the image. | |
870 | + | |
871 | + - compression/expected_compression | |
872 | + | |
873 | + These values allow you to set an expected compression ratio, which TuxOnice | |
874 | + will use in calculating whether it meets constraints on the image size. If | |
875 | + this expected compression ratio is not attained, the hibernation cycle will | |
876 | + abort, so it is wise to allow some spare. You can see what compression | |
877 | + ratio is achieved in the logs after hibernating. | |
878 | + | |
879 | + - debug_info: | |
880 | + | |
881 | + This file returns information about your configuration that may be helpful | |
882 | + in diagnosing problems with hibernating. | |
883 | + | |
9474138d AM |
884 | + - did_suspend_to_both: |
885 | + | |
886 | + This file can be used when you hibernate with powerdown method 3 (ie suspend | |
887 | + to ram after writing the image). There can be two outcomes in this case. We | |
888 | + can resume from the suspend-to-ram before the battery runs out, or we can run | |
889 | + out of juice and and up resuming like normal. This entry lets you find out, | |
890 | + post resume, which way we went. If the value is 1, we resumed from suspend | |
891 | + to ram. This can be useful when actions need to be run post suspend-to-ram | |
892 | + that don't need to be run if we did the normal resume from power off. | |
893 | + | |
2380c486 JR |
894 | + - do_hibernate: |
895 | + | |
896 | + When anything is written to this file, the kernel side of TuxOnIce will | |
897 | + begin to attempt to write an image to disk and power down. You'll normally | |
898 | + want to run the hibernate script instead, to get modules unloaded first. | |
899 | + | |
900 | + - do_resume: | |
901 | + | |
902 | + When anything is written to this file TuxOnIce will attempt to read and | |
903 | + restore an image. If there is no image, it will return almost immediately. | |
904 | + If an image exists, the echo > will never return. Instead, the original | |
905 | + kernel context will be restored and the original echo > do_hibernate will | |
906 | + return. | |
907 | + | |
908 | + - */enabled | |
909 | + | |
910 | + These option can be used to temporarily disable various parts of TuxOnIce. | |
911 | + | |
912 | + - extra_pages_allowance | |
913 | + | |
914 | + When TuxOnIce does its atomic copy, it calls the driver model suspend | |
915 | + and resume methods. If you have DRI enabled with a driver such as fglrx, | |
916 | + this can result in the driver allocating a substantial amount of memory | |
917 | + for storing its state. Extra_pages_allowance tells TuxOnIce how much | |
918 | + extra memory it should ensure is available for those allocations. If | |
919 | + your attempts at hibernating end with a message in dmesg indicating that | |
920 | + insufficient extra pages were allowed, you need to increase this value. | |
921 | + | |
922 | + - file/target: | |
923 | + | |
924 | + Read this value to get the current setting. Write to it to point TuxOnice | |
925 | + at a new storage location for the file allocator. See section 3.b.ii above | |
926 | + for details of how to set up the file allocator. | |
927 | + | |
928 | + - freezer_test | |
929 | + | |
930 | + This entry can be used to get TuxOnIce to just test the freezer and prepare | |
931 | + an image without actually doing a hibernation cycle. It is useful for | |
932 | + diagnosing freezing and image preparation issues. | |
933 | + | |
9474138d AM |
934 | + - full_pageset2 |
935 | + | |
936 | + TuxOnIce divides the pages that are stored in an image into two sets. The | |
937 | + difference between the two sets is that pages in pageset 1 are atomically | |
938 | + copied, and pages in pageset 2 are written to disk without being copied | |
939 | + first. A page CAN be written to disk without being copied first if and only | |
940 | + if its contents will not be modified or used at any time after userspace | |
941 | + processes are frozen. A page MUST be in pageset 1 if its contents are | |
942 | + modified or used at any time after userspace processes have been frozen. | |
943 | + | |
944 | + Normally (ie if this option is enabled), TuxOnIce will put all pages on the | |
945 | + per-zone LRUs in pageset2, then remove those pages used by any userspace | |
946 | + user interface helper and TuxOnIce storage manager that are running, | |
947 | + together with pages used by the GEM memory manager introduced around 2.6.28 | |
948 | + kernels. | |
949 | + | |
950 | + If this option is disabled, a much more conservative approach will be taken. | |
951 | + The only pages in pageset2 will be those belonging to userspace processes, | |
952 | + with the exclusion of those belonging to the TuxOnIce userspace helpers | |
953 | + mentioned above. This will result in a much smaller pageset2, and will | |
954 | + therefore result in smaller images than are possible with this option | |
955 | + enabled. | |
956 | + | |
957 | + - ignore_rootfs | |
958 | + | |
959 | + TuxOnIce records which device is mounted as the root filesystem when | |
960 | + writing the hibernation image. It will normally check at resume time that | |
961 | + this device isn't already mounted - that would be a cause of filesystem | |
962 | + corruption. In some particular cases (RAM based root filesystems), you | |
963 | + might want to disable this check. This option allows you to do that. | |
964 | + | |
2380c486 JR |
965 | + - image_exists: |
966 | + | |
967 | + Can be used in a script to determine whether a valid image exists at the | |
968 | + location currently pointed to by resume=. Returns up to three lines. | |
969 | + The first is whether an image exists (-1 for unsure, otherwise 0 or 1). | |
970 | + If an image eixsts, additional lines will return the machine and version. | |
971 | + Echoing anything to this entry removes any current image. | |
972 | + | |
973 | + - image_size_limit: | |
974 | + | |
975 | + The maximum size of hibernation image written to disk, measured in megabytes | |
976 | + (1024*1024). | |
977 | + | |
978 | + - last_result: | |
979 | + | |
980 | + The result of the last hibernation cycle, as defined in | |
981 | + include/linux/suspend-debug.h with the values SUSPEND_ABORTED to | |
982 | + SUSPEND_KEPT_IMAGE. This is a bitmask. | |
983 | + | |
9474138d AM |
984 | + - late_cpu_hotplug: |
985 | + | |
986 | + This sysfs entry controls whether cpu hotplugging is done - as normal - just | |
987 | + before (unplug) and after (replug) the atomic copy/restore (so that all | |
988 | + CPUs/cores are available for multithreaded I/O). The alternative is to | |
989 | + unplug all secondary CPUs/cores at the start of hibernating/resuming, and | |
990 | + replug them at the end of resuming. No multithreaded I/O will be possible in | |
991 | + this configuration, but the odd machine has been reported to require it. | |
992 | + | |
993 | + - lid_file: | |
994 | + | |
995 | + This determines which ACPI button file we look in to determine whether the | |
996 | + lid is open or closed after resuming from suspend to disk or power off. | |
997 | + If the entry is set to "lid/LID", we'll open /proc/acpi/button/lid/LID/state | |
998 | + and check its contents at the appropriate moment. See post_wake_state below | |
999 | + for more details on how this entry is used. | |
1000 | + | |
2380c486 JR |
1001 | + - log_everything (CONFIG_PM_DEBUG): |
1002 | + | |
1003 | + Setting this option results in all messages printed being logged. Normally, | |
1004 | + only a subset are logged, so as to not slow the process and not clutter the | |
1005 | + logs. Useful for debugging. It can be toggled during a cycle by pressing | |
1006 | + 'L'. | |
1007 | + | |
9474138d AM |
1008 | + - no_load_direct: |
1009 | + | |
1010 | + This is a debugging option. If, when loading the atomically copied pages of | |
1011 | + an image, TuxOnIce finds that the destination address for a page is free, | |
1012 | + it will normally allocate the image, load the data directly into that | |
1013 | + address and skip it in the atomic restore. If this option is disabled, the | |
1014 | + page will be loaded somewhere else and atomically restored like other pages. | |
1015 | + | |
1016 | + - no_flusher_thread: | |
1017 | + | |
1018 | + When doing multithreaded I/O (see below), the first online CPU can be used | |
1019 | + to _just_ submit compressed pages when writing the image, rather than | |
1020 | + compressing and submitting data. This option is normally disabled, but has | |
1021 | + been included because Nigel would like to see whether it will be more useful | |
1022 | + as the number of cores/cpus in computers increases. | |
1023 | + | |
1024 | + - no_multithreaded_io: | |
1025 | + | |
1026 | + TuxOnIce will normally create one thread per cpu/core on your computer, | |
1027 | + each of which will then perform I/O. This will generally result in | |
1028 | + throughput that's the maximum the storage medium can handle. There | |
1029 | + shouldn't be any reason to disable multithreaded I/O now, but this option | |
1030 | + has been retained for debugging purposes. | |
1031 | + | |
1032 | + - no_pageset2 | |
1033 | + | |
1034 | + See the entry for full_pageset2 above for an explanation of pagesets. | |
1035 | + Enabling this option causes TuxOnIce to do an atomic copy of all pages, | |
1036 | + thereby limiting the maximum image size to 1/2 of memory, as swsusp does. | |
1037 | + | |
1038 | + - no_pageset2_if_unneeded | |
1039 | + | |
1040 | + See the entry for full_pageset2 above for an explanation of pagesets. | |
1041 | + Enabling this option causes TuxOnIce to act like no_pageset2 was enabled | |
1042 | + if and only it isn't needed anyway. This option may still make TuxOnIce | |
1043 | + less reliable because pageset2 pages are normally used to store the | |
1044 | + atomic copy - drivers that want to do allocations of larger amounts of | |
1045 | + memory in one shot will be more likely to find that those amounts aren't | |
1046 | + available if this option is enabled. | |
1047 | + | |
2380c486 JR |
1048 | + - pause_between_steps (CONFIG_PM_DEBUG): |
1049 | + | |
1050 | + This option is used during debugging, to make TuxOnIce pause between | |
1051 | + each step of the process. It is ignored when the nice display is on. | |
1052 | + | |
9474138d AM |
1053 | + - post_wake_state: |
1054 | + | |
1055 | + TuxOnIce provides support for automatically waking after a user-selected | |
1056 | + delay, and using a different powerdown method if the lid is still closed. | |
1057 | + (Yes, we're assuming a laptop). This entry lets you choose what state | |
1058 | + should be entered next. The values are those described under | |
1059 | + powerdown_method, below. It can be used to suspend to RAM after hibernating, | |
1060 | + then powerdown properly (say) 20 minutes. It can also be used to power down | |
1061 | + properly, then wake at (say) 6.30am and suspend to RAM until you're ready | |
1062 | + to use the machine. | |
1063 | + | |
2380c486 JR |
1064 | + - powerdown_method: |
1065 | + | |
1066 | + Used to select a method by which TuxOnIce should powerdown after writing the | |
1067 | + image. Currently: | |
1068 | + | |
1069 | + 0: Don't use ACPI to power off. | |
1070 | + 3: Attempt to enter Suspend-to-ram. | |
1071 | + 4: Attempt to enter ACPI S4 mode. | |
1072 | + 5: Attempt to power down via ACPI S5 mode. | |
1073 | + | |
1074 | + Note that these options are highly dependant upon your hardware & software: | |
1075 | + | |
1076 | + 3: When succesful, your machine suspends to ram instead of powering off. | |
1077 | + The advantage of using this mode is that it doesn't matter whether your | |
1078 | + battery has enough charge to make it through to your next resume. If it | |
1079 | + lasts, you will simply resume from suspend to ram (and the image on disk | |
1080 | + will be discarded). If the battery runs out, you will resume from disk | |
1081 | + instead. The disadvantage is that it takes longer than a normal | |
1082 | + suspend-to-ram to enter the state, since the suspend-to-disk image needs | |
1083 | + to be written first. | |
1084 | + 4/5: When successful, your machine will be off and comsume (almost) no power. | |
1085 | + But it might still react to some external events like opening the lid or | |
1086 | + trafic on a network or usb device. For the bios, resume is then the same | |
1087 | + as warm boot, similar to a situation where you used the command `reboot' | |
1088 | + to reboot your machine. If your machine has problems on warm boot or if | |
1089 | + you want to protect your machine with the bios password, this is probably | |
1090 | + not the right choice. Mode 4 may be necessary on some machines where ACPI | |
1091 | + wake up methods need to be run to properly reinitialise hardware after a | |
1092 | + hibernation cycle. | |
1093 | + 0: Switch the machine completely off. The only possible wakeup is the power | |
1094 | + button. For the bios, resume is then the same as a cold boot, in | |
1095 | + particular you would have to provide your bios boot password if your | |
1096 | + machine uses that feature for booting. | |
1097 | + | |
1098 | + - progressbar_granularity_limit: | |
1099 | + | |
1100 | + This option can be used to limit the granularity of the progress bar | |
1101 | + displayed with a bootsplash screen. The value is the maximum number of | |
1102 | + steps. That is, 10 will make the progress bar jump in 10% increments. | |
1103 | + | |
1104 | + - reboot: | |
1105 | + | |
1106 | + This option causes TuxOnIce to reboot rather than powering down | |
1107 | + at the end of saving an image. It can be toggled during a cycle by pressing | |
1108 | + 'R'. | |
1109 | + | |
9474138d AM |
1110 | + - resume: |
1111 | + | |
1112 | + This sysfs entry can be used to read and set the location in which TuxOnIce | |
1113 | + will look for the signature of an image - the value set using resume= at | |
1114 | + boot time or CONFIG_PM_STD_PARTITION ("Default resume partition"). By | |
1115 | + writing to this file as well as modifying your bootloader's configuration | |
1116 | + file (eg menu.lst), you can set or reset the location of your image or the | |
1117 | + method of storing the image without rebooting. | |
1118 | + | |
1119 | + - replace_swsusp (CONFIG_TOI_REPLACE_SWSUSP): | |
1120 | + | |
1121 | + This option makes | |
1122 | + | |
1123 | + echo disk > /sys/power/state | |
1124 | + | |
1125 | + activate TuxOnIce instead of swsusp. Regardless of whether this option is | |
1126 | + enabled, any invocation of swsusp's resume time trigger will cause TuxOnIce | |
1127 | + to check for an image too. This is due to the fact that at resume time, we | |
1128 | + can't know whether this option was enabled until we see if an image is there | |
1129 | + for us to resume from. (And when an image exists, we don't care whether we | |
1130 | + did replace swsusp anyway - we just want to resume). | |
1131 | + | |
2380c486 JR |
1132 | + - resume_commandline: |
1133 | + | |
1134 | + This entry can be read after resuming to see the commandline that was used | |
1135 | + when resuming began. You might use this to set up two bootloader entries | |
1136 | + that are the same apart from the fact that one includes a extra append= | |
1137 | + argument "at_work=1". You could then grep resume_commandline in your | |
1138 | + post-resume scripts and configure networking (for example) differently | |
1139 | + depending upon whether you're at home or work. resume_commandline can be | |
1140 | + set to arbitrary text if you wish to remove sensitive contents. | |
1141 | + | |
1142 | + - swap/swapfilename: | |
1143 | + | |
1144 | + This entry is used to specify the swapfile or partition that | |
1145 | + TuxOnIce will attempt to swapon/swapoff automatically. Thus, if | |
1146 | + I normally use /dev/hda1 for swap, and want to use /dev/hda2 for specifically | |
1147 | + for my hibernation image, I would | |
1148 | + | |
1149 | + echo /dev/hda2 > /sys/power/tuxonice/swap/swapfile | |
1150 | + | |
1151 | + /dev/hda2 would then be automatically swapon'd and swapoff'd. Note that the | |
1152 | + swapon and swapoff occur while other processes are frozen (including kswapd) | |
1153 | + so this swap file will not be used up when attempting to free memory. The | |
1154 | + parition/file is also given the highest priority, so other swapfiles/partitions | |
1155 | + will only be used to save the image when this one is filled. | |
1156 | + | |
1157 | + The value of this file is used by headerlocations along with any currently | |
1158 | + activated swapfiles/partitions. | |
1159 | + | |
1160 | + - swap/headerlocations: | |
1161 | + | |
1162 | + This option tells you the resume= options to use for swap devices you | |
1163 | + currently have activated. It is particularly useful when you only want to | |
1164 | + use a swap file to store your image. See above for further details. | |
1165 | + | |
9474138d | 1166 | + - test_bio |
2380c486 | 1167 | + |
9474138d AM |
1168 | + This is a debugging option. When enabled, TuxOnIce will not hibernate. |
1169 | + Instead, when asked to write an image, it will skip the atomic copy, | |
1170 | + just doing the writing of the image and then returning control to the | |
1171 | + user at the point where it would have powered off. This is useful for | |
1172 | + testing throughput in different configurations. | |
1173 | + | |
1174 | + - test_filter_speed | |
1175 | + | |
1176 | + This is a debugging option. When enabled, TuxOnIce will not hibernate. | |
1177 | + Instead, when asked to write an image, it will not write anything or do | |
1178 | + an atomic copy, but will only run any enabled compression algorithm on the | |
1179 | + data that would have been written (the source pages of the atomic copy in | |
1180 | + the case of pageset 1). This is useful for comparing the performance of | |
1181 | + compression algorithms and for determining the extent to which an upgrade | |
1182 | + to your storage method would improve hibernation speed. | |
2380c486 JR |
1183 | + |
1184 | + - user_interface/debug_sections (CONFIG_PM_DEBUG): | |
1185 | + | |
1186 | + This value, together with the console log level, controls what debugging | |
1187 | + information is displayed. The console log level determines the level of | |
1188 | + detail, and this value determines what detail is displayed. This value is | |
1189 | + a bit vector, and the meaning of the bits can be found in the kernel tree | |
1190 | + in include/linux/tuxonice.h. It can be overridden using the kernel's | |
1191 | + command line option suspend_dbg. | |
1192 | + | |
1193 | + - user_interface/default_console_level (CONFIG_PM_DEBUG): | |
1194 | + | |
1195 | + This determines the value of the console log level at the start of a | |
1196 | + hibernation cycle. If debugging is compiled in, the console log level can be | |
1197 | + changed during a cycle by pressing the digit keys. Meanings are: | |
1198 | + | |
1199 | + 0: Nice display. | |
1200 | + 1: Nice display plus numerical progress. | |
1201 | + 2: Errors only. | |
1202 | + 3: Low level debugging info. | |
1203 | + 4: Medium level debugging info. | |
1204 | + 5: High level debugging info. | |
1205 | + 6: Verbose debugging info. | |
1206 | + | |
1207 | + - user_interface/enable_escape: | |
1208 | + | |
1209 | + Setting this to "1" will enable you abort a hibernation cycle or resuming by | |
1210 | + pressing escape, "0" (default) disables this feature. Note that enabling | |
1211 | + this option means that you cannot initiate a hibernation cycle and then walk | |
9474138d | 1212 | + away from your computer, expecting it to be secure. With feature disabled, |
2380c486 JR |
1213 | + you can validly have this expectation once TuxOnice begins to write the |
1214 | + image to disk. (Prior to this point, it is possible that TuxOnice might | |
1215 | + about because of failure to freeze all processes or because constraints | |
1216 | + on its ability to save the image are not met). | |
1217 | + | |
9474138d AM |
1218 | + - user_interface/program |
1219 | + | |
1220 | + This entry is used to tell TuxOnice what userspace program to use for | |
1221 | + providing a user interface while hibernating. The program uses a netlink | |
1222 | + socket to pass messages back and forward to the kernel, allowing all of the | |
1223 | + functions formerly implemented in the kernel user interface components. | |
1224 | + | |
2380c486 JR |
1225 | + - version: |
1226 | + | |
1227 | + The version of TuxOnIce you have compiled into the currently running kernel. | |
1228 | + | |
9474138d AM |
1229 | + - wake_alarm_dir: |
1230 | + | |
1231 | + As mentioned above (post_wake_state), TuxOnIce supports automatically waking | |
1232 | + after some delay. This entry allows you to select which wake alarm to use. | |
1233 | + It should contain the value "rtc0" if you're wanting to use | |
1234 | + /sys/class/rtc/rtc0. | |
1235 | + | |
1236 | + - wake_delay: | |
1237 | + | |
1238 | + This value determines the delay from the end of writing the image until the | |
1239 | + wake alarm is triggered. You can set an absolute time by writing the desired | |
1240 | + time into /sys/class/rtc/<wake_alarm_dir>/wakealarm and leaving these values | |
1241 | + empty. | |
1242 | + | |
1243 | + Note that for the wakeup to actually occur, you may need to modify entries | |
1244 | + in /proc/acpi/wakeup. This is done by echoing the name of the button in the | |
1245 | + first column (eg PBTN) into the file. | |
1246 | + | |
2380c486 JR |
1247 | +7. How do you get support? |
1248 | + | |
1249 | + Glad you asked. TuxOnIce is being actively maintained and supported | |
1250 | + by Nigel (the guy doing most of the kernel coding at the moment), Bernard | |
1251 | + (who maintains the hibernate script and userspace user interface components) | |
1252 | + and its users. | |
1253 | + | |
1254 | + Resources availble include HowTos, FAQs and a Wiki, all available via | |
1255 | + tuxonice.net. You can find the mailing lists there. | |
1256 | + | |
1257 | +8. I think I've found a bug. What should I do? | |
1258 | + | |
1259 | + By far and a way, the most common problems people have with TuxOnIce | |
1260 | + related to drivers not having adequate power management support. In this | |
1261 | + case, it is not a bug with TuxOnIce, but we can still help you. As we | |
1262 | + mentioned above, such issues can usually be worked around by building the | |
1263 | + functionality as modules and unloading them while hibernating. Please visit | |
1264 | + the Wiki for up-to-date lists of known issues and work arounds. | |
1265 | + | |
1266 | + If this information doesn't help, try running: | |
1267 | + | |
1268 | + hibernate --bug-report | |
1269 | + | |
1270 | + ..and sending the output to the users mailing list. | |
1271 | + | |
1272 | + Good information on how to provide us with useful information from an | |
1273 | + oops is found in the file REPORTING-BUGS, in the top level directory | |
1274 | + of the kernel tree. If you get an oops, please especially note the | |
1275 | + information about running what is printed on the screen through ksymoops. | |
1276 | + The raw information is useless. | |
1277 | + | |
1278 | +9. When will XXX be supported? | |
1279 | + | |
1280 | + If there's a feature missing from TuxOnIce that you'd like, feel free to | |
1281 | + ask. We try to be obliging, within reason. | |
1282 | + | |
1283 | + Patches are welcome. Please send to the list. | |
1284 | + | |
1285 | +10. How does it work? | |
1286 | + | |
1287 | + TuxOnIce does its work in a number of steps. | |
1288 | + | |
1289 | + a. Freezing system activity. | |
1290 | + | |
1291 | + The first main stage in hibernating is to stop all other activity. This is | |
1292 | + achieved in stages. Processes are considered in fours groups, which we will | |
1293 | + describe in reverse order for clarity's sake: Threads with the PF_NOFREEZE | |
1294 | + flag, kernel threads without this flag, userspace processes with the | |
1295 | + PF_SYNCTHREAD flag and all other processes. The first set (PF_NOFREEZE) are | |
1296 | + untouched by the refrigerator code. They are allowed to run during hibernating | |
1297 | + and resuming, and are used to support user interaction, storage access or the | |
1298 | + like. Other kernel threads (those unneeded while hibernating) are frozen last. | |
1299 | + This leaves us with userspace processes that need to be frozen. When a | |
1300 | + process enters one of the *_sync system calls, we set a PF_SYNCTHREAD flag on | |
1301 | + that process for the duration of that call. Processes that have this flag are | |
1302 | + frozen after processes without it, so that we can seek to ensure that dirty | |
1303 | + data is synced to disk as quickly as possible in a situation where other | |
1304 | + processes may be submitting writes at the same time. Freezing the processes | |
1305 | + that are submitting data stops new I/O from being submitted. Syncthreads can | |
1306 | + then cleanly finish their work. So the order is: | |
1307 | + | |
1308 | + - Userspace processes without PF_SYNCTHREAD or PF_NOFREEZE; | |
1309 | + - Userspace processes with PF_SYNCTHREAD (they won't have NOFREEZE); | |
1310 | + - Kernel processes without PF_NOFREEZE. | |
1311 | + | |
1312 | + b. Eating memory. | |
1313 | + | |
1314 | + For a successful hibernation cycle, you need to have enough disk space to store the | |
1315 | + image and enough memory for the various limitations of TuxOnIce's | |
1316 | + algorithm. You can also specify a maximum image size. In order to attain | |
1317 | + to those constraints, TuxOnIce may 'eat' memory. If, after freezing | |
1318 | + processes, the constraints aren't met, TuxOnIce will thaw all the | |
1319 | + other processes and begin to eat memory until its calculations indicate | |
1320 | + the constraints are met. It will then freeze processes again and recheck | |
1321 | + its calculations. | |
1322 | + | |
1323 | + c. Allocation of storage. | |
1324 | + | |
1325 | + Next, TuxOnIce allocates the storage that will be used to save | |
1326 | + the image. | |
1327 | + | |
1328 | + The core of TuxOnIce knows nothing about how or where pages are stored. We | |
1329 | + therefore request the active allocator (remember you might have compiled in | |
1330 | + more than one!) to allocate enough storage for our expect image size. If | |
1331 | + this request cannot be fulfilled, we eat more memory and try again. If it | |
1332 | + is fulfiled, we seek to allocate additional storage, just in case our | |
1333 | + expected compression ratio (if any) isn't achieved. This time, however, we | |
1334 | + just continue if we can't allocate enough storage. | |
1335 | + | |
1336 | + If these calls to our allocator change the characteristics of the image | |
1337 | + such that we haven't allocated enough memory, we also loop. (The allocator | |
1338 | + may well need to allocate space for its storage information). | |
1339 | + | |
1340 | + d. Write the first part of the image. | |
1341 | + | |
1342 | + TuxOnIce stores the image in two sets of pages called 'pagesets'. | |
1343 | + Pageset 2 contains pages on the active and inactive lists; essentially | |
1344 | + the page cache. Pageset 1 contains all other pages, including the kernel. | |
1345 | + We use two pagesets for one important reason: We need to make an atomic copy | |
1346 | + of the kernel to ensure consistency of the image. Without a second pageset, | |
1347 | + that would limit us to an image that was at most half the amount of memory | |
1348 | + available. Using two pagesets allows us to store a full image. Since pageset | |
1349 | + 2 pages won't be needed in saving pageset 1, we first save pageset 2 pages. | |
1350 | + We can then make our atomic copy of the remaining pages using both pageset 2 | |
1351 | + pages and any other pages that are free. While saving both pagesets, we are | |
1352 | + careful not to corrupt the image. Among other things, we use lowlevel block | |
1353 | + I/O routines that don't change the pagecache contents. | |
1354 | + | |
1355 | + The next step, then, is writing pageset 2. | |
1356 | + | |
1357 | + e. Suspending drivers and storing processor context. | |
1358 | + | |
1359 | + Having written pageset2, TuxOnIce calls the power management functions to | |
1360 | + notify drivers of the hibernation, and saves the processor state in preparation | |
1361 | + for the atomic copy of memory we are about to make. | |
1362 | + | |
1363 | + f. Atomic copy. | |
1364 | + | |
1365 | + At this stage, everything else but the TuxOnIce code is halted. Processes | |
1366 | + are frozen or idling, drivers are quiesced and have stored (ideally and where | |
1367 | + necessary) their configuration in memory we are about to atomically copy. | |
1368 | + In our lowlevel architecture specific code, we have saved the CPU state. | |
1369 | + We can therefore now do our atomic copy before resuming drivers etc. | |
1370 | + | |
1371 | + g. Save the atomic copy (pageset 1). | |
1372 | + | |
1373 | + TuxOnice can then write the atomic copy of the remaining pages. Since we | |
1374 | + have copied the pages into other locations, we can continue to use the | |
1375 | + normal block I/O routines without fear of corruption our image. | |
1376 | + | |
1377 | + f. Save the image header. | |
1378 | + | |
1379 | + Nearly there! We save our settings and other parameters needed for | |
1380 | + reloading pageset 1 in an 'image header'. We also tell our allocator to | |
1381 | + serialise its data at this stage, so that it can reread the image at resume | |
1382 | + time. | |
1383 | + | |
1384 | + g. Set the image header. | |
1385 | + | |
1386 | + Finally, we edit the header at our resume= location. The signature is | |
1387 | + changed by the allocator to reflect the fact that an image exists, and to | |
1388 | + point to the start of that data if necessary (swap allocator). | |
1389 | + | |
1390 | + h. Power down. | |
1391 | + | |
1392 | + Or reboot if we're debugging and the appropriate option is selected. | |
1393 | + | |
1394 | + Whew! | |
1395 | + | |
1396 | + Reloading the image. | |
1397 | + -------------------- | |
1398 | + | |
1399 | + Reloading the image is essentially the reverse of all the above. We load | |
1400 | + our copy of pageset 1, being careful to choose locations that aren't going | |
1401 | + to be overwritten as we copy it back (We start very early in the boot | |
1402 | + process, so there are no other processes to quiesce here). We then copy | |
1403 | + pageset 1 back to its original location in memory and restore the process | |
1404 | + context. We are now running with the original kernel. Next, we reload the | |
1405 | + pageset 2 pages, free the memory and swap used by TuxOnIce, restore | |
1406 | + the pageset header and restart processes. Sounds easy in comparison to | |
1407 | + hibernating, doesn't it! | |
1408 | + | |
1409 | + There is of course more to TuxOnIce than this, but this explanation | |
1410 | + should be a good start. If there's interest, I'll write further | |
1411 | + documentation on range pages and the low level I/O. | |
1412 | + | |
1413 | +11. Who wrote TuxOnIce? | |
1414 | + | |
1415 | + (Answer based on the writings of Florent Chabaud, credits in files and | |
1416 | + Nigel's limited knowledge; apologies to anyone missed out!) | |
1417 | + | |
1418 | + The main developers of TuxOnIce have been... | |
1419 | + | |
1420 | + Gabor Kuti | |
1421 | + Pavel Machek | |
1422 | + Florent Chabaud | |
1423 | + Bernard Blackham | |
1424 | + Nigel Cunningham | |
1425 | + | |
1426 | + Significant portions of swsusp, the code in the vanilla kernel which | |
1427 | + TuxOnIce enhances, have been worked on by Rafael Wysocki. Thanks should | |
1428 | + also be expressed to him. | |
1429 | + | |
1430 | + The above mentioned developers have been aided in their efforts by a host | |
1431 | + of hundreds, if not thousands of testers and people who have submitted bug | |
1432 | + fixes & suggestions. Of special note are the efforts of Michael Frank, who | |
1433 | + had his computers repetitively hibernate and resume for literally tens of | |
1434 | + thousands of cycles and developed scripts to stress the system and test | |
1435 | + TuxOnIce far beyond the point most of us (Nigel included!) would consider | |
1436 | + testing. His efforts have contributed as much to TuxOnIce as any of the | |
1437 | + names above. | |
1438 | diff --git a/MAINTAINERS b/MAINTAINERS | |
9474138d | 1439 | index cf4abdd..979b923 100644 |
2380c486 JR |
1440 | --- a/MAINTAINERS |
1441 | +++ b/MAINTAINERS | |
9474138d AM |
1442 | @@ -5655,6 +5655,13 @@ S: Maintained |
1443 | F: drivers/tc/ | |
1444 | F: include/linux/tc.h | |
2380c486 JR |
1445 | |
1446 | +TUXONICE (ENHANCED HIBERNATION) | |
1447 | +P: Nigel Cunningham | |
1448 | +M: nigel@tuxonice.net | |
e999739a | 1449 | +L: tuxonice-devel@tuxonice.net |
2380c486 JR |
1450 | +W: http://tuxonice.net |
1451 | +S: Maintained | |
1452 | + | |
1453 | U14-34F SCSI DRIVER | |
1454 | P: Dario Ballabio | |
1455 | M: ballabio_dario@emc.com | |
1456 | diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c | |
9474138d | 1457 | index 5422169..33be4fa 100644 |
2380c486 JR |
1458 | --- a/arch/powerpc/mm/pgtable_32.c |
1459 | +++ b/arch/powerpc/mm/pgtable_32.c | |
9474138d | 1460 | @@ -396,6 +396,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable) |
2380c486 JR |
1461 | |
1462 | change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0)); | |
1463 | } | |
1464 | +EXPORT_SYMBOL_GPL(kernel_map_pages); | |
1465 | #endif /* CONFIG_DEBUG_PAGEALLOC */ | |
1466 | ||
1467 | static int fixmaps; | |
1468 | diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c | |
9474138d | 1469 | index 667188e..8113e78 100644 |
2380c486 JR |
1470 | --- a/arch/x86/kernel/reboot.c |
1471 | +++ b/arch/x86/kernel/reboot.c | |
9474138d | 1472 | @@ -620,6 +620,7 @@ void machine_restart(char *cmd) |
2380c486 JR |
1473 | { |
1474 | machine_ops.restart(cmd); | |
1475 | } | |
1476 | +EXPORT_SYMBOL_GPL(machine_restart); | |
1477 | ||
1478 | void machine_halt(void) | |
1479 | { | |
1480 | diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c | |
9474138d | 1481 | index e17efed..5d0b4d2 100644 |
2380c486 JR |
1482 | --- a/arch/x86/mm/pageattr.c |
1483 | +++ b/arch/x86/mm/pageattr.c | |
9474138d | 1484 | @@ -1268,6 +1268,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable) |
2380c486 JR |
1485 | */ |
1486 | __flush_tlb_all(); | |
1487 | } | |
1488 | +EXPORT_SYMBOL_GPL(kernel_map_pages); | |
1489 | ||
1490 | #ifdef CONFIG_HIBERNATION | |
1491 | ||
9474138d | 1492 | @@ -1282,7 +1283,7 @@ bool kernel_page_present(struct page *page) |
2380c486 JR |
1493 | pte = lookup_address((unsigned long)page_address(page), &level); |
1494 | return (pte_val(*pte) & _PAGE_PRESENT); | |
1495 | } | |
1496 | - | |
1497 | +EXPORT_SYMBOL_GPL(kernel_page_present); | |
1498 | #endif /* CONFIG_HIBERNATION */ | |
1499 | ||
1500 | #endif /* CONFIG_DEBUG_PAGEALLOC */ | |
1501 | diff --git a/arch/x86/power/cpu_64.c b/arch/x86/power/cpu_64.c | |
9474138d | 1502 | index 5343540..d2d55bd 100644 |
2380c486 JR |
1503 | --- a/arch/x86/power/cpu_64.c |
1504 | +++ b/arch/x86/power/cpu_64.c | |
1505 | @@ -10,6 +10,7 @@ | |
1506 | ||
1507 | #include <linux/smp.h> | |
1508 | #include <linux/suspend.h> | |
1509 | +#include <linux/module.h> | |
1510 | #include <asm/proto.h> | |
1511 | #include <asm/page.h> | |
1512 | #include <asm/pgtable.h> | |
9474138d | 1513 | @@ -77,6 +78,7 @@ void save_processor_state(void) |
2380c486 JR |
1514 | { |
1515 | __save_processor_state(&saved_context); | |
1516 | } | |
1517 | +EXPORT_SYMBOL_GPL(save_processor_state); | |
1518 | ||
1519 | static void do_fpu_end(void) | |
1520 | { | |
1521 | diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c | |
1522 | index 81197c6..ff7e534 100644 | |
1523 | --- a/arch/x86/power/hibernate_32.c | |
1524 | +++ b/arch/x86/power/hibernate_32.c | |
1525 | @@ -8,6 +8,7 @@ | |
1526 | ||
1527 | #include <linux/suspend.h> | |
1528 | #include <linux/bootmem.h> | |
1529 | +#include <linux/module.h> | |
1530 | ||
1531 | #include <asm/system.h> | |
1532 | #include <asm/page.h> | |
1533 | @@ -163,6 +164,7 @@ int swsusp_arch_resume(void) | |
1534 | restore_image(); | |
1535 | return 0; | |
1536 | } | |
1537 | +EXPORT_SYMBOL_GPL(swsusp_arch_resume); | |
1538 | ||
1539 | /* | |
1540 | * pfn_is_nosave - check if given pfn is in the 'nosave' section | |
1541 | diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c | |
9474138d | 1542 | index 65fdc86..e5c31f6 100644 |
2380c486 JR |
1543 | --- a/arch/x86/power/hibernate_64.c |
1544 | +++ b/arch/x86/power/hibernate_64.c | |
1545 | @@ -10,6 +10,7 @@ | |
1546 | ||
1547 | #include <linux/smp.h> | |
1548 | #include <linux/suspend.h> | |
1549 | +#include <linux/module.h> | |
1550 | #include <asm/proto.h> | |
1551 | #include <asm/page.h> | |
1552 | #include <asm/pgtable.h> | |
9474138d | 1553 | @@ -118,6 +119,7 @@ int swsusp_arch_resume(void) |
2380c486 JR |
1554 | restore_image(); |
1555 | return 0; | |
1556 | } | |
1557 | +EXPORT_SYMBOL_GPL(swsusp_arch_resume); | |
1558 | ||
1559 | /* | |
1560 | * pfn_is_nosave - check if given pfn is in the 'nosave' section | |
9474138d | 1561 | @@ -168,3 +170,4 @@ int arch_hibernation_header_restore(void *addr) |
2380c486 JR |
1562 | restore_cr3 = rdr->cr3; |
1563 | return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL; | |
1564 | } | |
1565 | +EXPORT_SYMBOL_GPL(arch_hibernation_header_restore); | |
2380c486 | 1566 | diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c |
9474138d | 1567 | index 3e4bc69..80612e3 100644 |
2380c486 JR |
1568 | --- a/drivers/base/power/main.c |
1569 | +++ b/drivers/base/power/main.c | |
9474138d | 1570 | @@ -55,6 +55,7 @@ void device_pm_lock(void) |
2380c486 JR |
1571 | { |
1572 | mutex_lock(&dpm_list_mtx); | |
1573 | } | |
1574 | +EXPORT_SYMBOL_GPL(device_pm_lock); | |
1575 | ||
1576 | /** | |
1577 | * device_pm_unlock - unlock the list of active devices used by the PM core | |
9474138d | 1578 | @@ -63,6 +64,7 @@ void device_pm_unlock(void) |
2380c486 JR |
1579 | { |
1580 | mutex_unlock(&dpm_list_mtx); | |
1581 | } | |
1582 | +EXPORT_SYMBOL_GPL(device_pm_unlock); | |
1583 | ||
1584 | /** | |
1585 | * device_pm_add - add a device to the list of active devices | |
1586 | diff --git a/drivers/char/vt.c b/drivers/char/vt.c | |
9474138d | 1587 | index 08151d4..7377d98 100644 |
2380c486 JR |
1588 | --- a/drivers/char/vt.c |
1589 | +++ b/drivers/char/vt.c | |
1590 | @@ -187,6 +187,7 @@ int fg_console; | |
1591 | int last_console; | |
1592 | int want_console = -1; | |
1593 | int kmsg_redirect; | |
1594 | +EXPORT_SYMBOL_GPL(kmsg_redirect); | |
1595 | ||
1596 | /* | |
1597 | * For each existing display, we have a pointer to console currently visible | |
e999739a | 1598 | diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c |
9474138d | 1599 | index 4984aa8..c69b548 100644 |
e999739a | 1600 | --- a/drivers/gpu/drm/drm_gem.c |
1601 | +++ b/drivers/gpu/drm/drm_gem.c | |
1602 | @@ -136,7 +136,8 @@ drm_gem_object_alloc(struct drm_device *dev, size_t size) | |
1603 | obj = kcalloc(1, sizeof(*obj), GFP_KERNEL); | |
1604 | ||
1605 | obj->dev = dev; | |
1606 | - obj->filp = shmem_file_setup("drm mm object", size, VM_NORESERVE); | |
1607 | + obj->filp = shmem_file_setup("drm mm object", size, | |
1608 | + VM_NORESERVE | VM_ATOMIC_COPY); | |
1609 | if (IS_ERR(obj->filp)) { | |
1610 | kfree(obj); | |
1611 | return NULL; | |
2380c486 | 1612 | diff --git a/drivers/md/md.c b/drivers/md/md.c |
9474138d | 1613 | index 641b211..73ccc45 100644 |
2380c486 JR |
1614 | --- a/drivers/md/md.c |
1615 | +++ b/drivers/md/md.c | |
9474138d | 1616 | @@ -6251,6 +6251,9 @@ void md_do_sync(mddev_t *mddev) |
2380c486 JR |
1617 | mddev->curr_resync = 2; |
1618 | ||
1619 | try_again: | |
1620 | + while (freezer_is_on()) | |
1621 | + yield(); | |
1622 | + | |
1623 | if (kthread_should_stop()) { | |
1624 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | |
1625 | goto skip; | |
9474138d | 1626 | @@ -6272,6 +6275,10 @@ void md_do_sync(mddev_t *mddev) |
2380c486 JR |
1627 | * time 'round when curr_resync == 2 |
1628 | */ | |
1629 | continue; | |
1630 | + | |
1631 | + while (freezer_is_on()) | |
1632 | + yield(); | |
1633 | + | |
1634 | /* We need to wait 'interruptible' so as not to | |
1635 | * contribute to the load average, and not to | |
1636 | * be caught by 'softlockup' | |
9474138d | 1637 | @@ -6284,6 +6291,7 @@ void md_do_sync(mddev_t *mddev) |
2380c486 JR |
1638 | " share one or more physical units)\n", |
1639 | desc, mdname(mddev), mdname(mddev2)); | |
1640 | mddev_put(mddev2); | |
1641 | + try_to_freeze(); | |
1642 | if (signal_pending(current)) | |
1643 | flush_signals(current); | |
1644 | schedule(); | |
9474138d | 1645 | @@ -6384,6 +6392,9 @@ void md_do_sync(mddev_t *mddev) |
2380c486 JR |
1646 | mddev->resync_max > j |
1647 | || kthread_should_stop()); | |
9474138d | 1648 | |
2380c486 JR |
1649 | + while (freezer_is_on()) |
1650 | + yield(); | |
1651 | + | |
1652 | if (kthread_should_stop()) | |
1653 | goto interrupted; | |
9474138d AM |
1654 | |
1655 | @@ -6428,6 +6439,9 @@ void md_do_sync(mddev_t *mddev) | |
2380c486 JR |
1656 | last_mark = next; |
1657 | } | |
1658 | ||
1659 | + while (freezer_is_on()) | |
1660 | + yield(); | |
1661 | + | |
1662 | ||
1663 | if (kthread_should_stop()) | |
1664 | goto interrupted; | |
9474138d AM |
1665 | diff --git a/fs/block_dev.c b/fs/block_dev.c |
1666 | index f45dbc1..52a7869 100644 | |
1667 | --- a/fs/block_dev.c | |
1668 | +++ b/fs/block_dev.c | |
1669 | @@ -321,6 +321,93 @@ int thaw_bdev(struct block_device *bdev, struct super_block *sb) | |
2380c486 JR |
1670 | } |
1671 | EXPORT_SYMBOL(thaw_bdev); | |
1672 | ||
1673 | +#ifdef CONFIG_FS_FREEZER_DEBUG | |
1674 | +#define FS_PRINTK(fmt, args...) printk(fmt, ## args) | |
1675 | +#else | |
1676 | +#define FS_PRINTK(fmt, args...) | |
1677 | +#endif | |
1678 | + | |
1679 | +/* #define DEBUG_FS_FREEZING */ | |
1680 | + | |
1681 | +/** | |
1682 | + * freeze_filesystems - lock all filesystems and force them into a consistent | |
1683 | + * state | |
1684 | + * @which: What combination of fuse & non-fuse to freeze. | |
1685 | + */ | |
1686 | +void freeze_filesystems(int which) | |
1687 | +{ | |
1688 | + struct super_block *sb; | |
1689 | + | |
1690 | + lockdep_off(); | |
1691 | + | |
1692 | + /* | |
1693 | + * Freeze in reverse order so filesystems dependant upon others are | |
1694 | + * frozen in the right order (eg. loopback on ext3). | |
1695 | + */ | |
1696 | + list_for_each_entry_reverse(sb, &super_blocks, s_list) { | |
1697 | + FS_PRINTK(KERN_INFO "Considering %s.%s: (root %p, bdev %x)", | |
1698 | + sb->s_type->name ? sb->s_type->name : "?", | |
1699 | + sb->s_subtype ? sb->s_subtype : "", sb->s_root, | |
1700 | + sb->s_bdev ? sb->s_bdev->bd_dev : 0); | |
1701 | + | |
1702 | + if (sb->s_type->fs_flags & FS_IS_FUSE && | |
1703 | + sb->s_frozen == SB_UNFROZEN && | |
1704 | + which & FS_FREEZER_FUSE) { | |
1705 | + sb->s_frozen = SB_FREEZE_TRANS; | |
1706 | + sb->s_flags |= MS_FROZEN; | |
1707 | + FS_PRINTK("Fuse filesystem done.\n"); | |
1708 | + continue; | |
1709 | + } | |
1710 | + | |
1711 | + if (!sb->s_root || !sb->s_bdev || | |
1712 | + (sb->s_frozen == SB_FREEZE_TRANS) || | |
1713 | + (sb->s_flags & MS_RDONLY) || | |
1714 | + (sb->s_flags & MS_FROZEN) || | |
1715 | + !(which & FS_FREEZER_NORMAL)) { | |
1716 | + FS_PRINTK(KERN_INFO "Nope.\n"); | |
1717 | + continue; | |
1718 | + } | |
1719 | + | |
1720 | + FS_PRINTK(KERN_INFO "Freezing %x... ", sb->s_bdev->bd_dev); | |
1721 | + freeze_bdev(sb->s_bdev); | |
1722 | + sb->s_flags |= MS_FROZEN; | |
1723 | + FS_PRINTK(KERN_INFO "Done.\n"); | |
1724 | + } | |
1725 | + | |
1726 | + lockdep_on(); | |
1727 | +} | |
1728 | + | |
1729 | +/** | |
1730 | + * thaw_filesystems - unlock all filesystems | |
1731 | + * @which: What combination of fuse & non-fuse to thaw. | |
1732 | + */ | |
1733 | +void thaw_filesystems(int which) | |
1734 | +{ | |
1735 | + struct super_block *sb; | |
1736 | + | |
1737 | + lockdep_off(); | |
1738 | + | |
1739 | + list_for_each_entry(sb, &super_blocks, s_list) { | |
1740 | + if (!(sb->s_flags & MS_FROZEN)) | |
1741 | + continue; | |
1742 | + | |
1743 | + if (sb->s_type->fs_flags & FS_IS_FUSE) { | |
1744 | + if (!(which & FS_FREEZER_FUSE)) | |
1745 | + continue; | |
1746 | + | |
1747 | + sb->s_frozen = SB_UNFROZEN; | |
1748 | + } else { | |
1749 | + if (!(which & FS_FREEZER_NORMAL)) | |
1750 | + continue; | |
1751 | + | |
1752 | + thaw_bdev(sb->s_bdev, sb); | |
1753 | + } | |
1754 | + sb->s_flags &= ~MS_FROZEN; | |
1755 | + } | |
1756 | + | |
1757 | + lockdep_on(); | |
1758 | +} | |
1759 | + | |
9474138d AM |
1760 | static int blkdev_writepage(struct page *page, struct writeback_control *wbc) |
1761 | { | |
1762 | return block_write_full_page(page, blkdev_get_block, wbc); | |
2380c486 | 1763 | diff --git a/fs/drop_caches.c b/fs/drop_caches.c |
9474138d | 1764 | index b6a719a..30ef3f3 100644 |
2380c486 JR |
1765 | --- a/fs/drop_caches.c |
1766 | +++ b/fs/drop_caches.c | |
1767 | @@ -8,6 +8,7 @@ | |
1768 | #include <linux/writeback.h> | |
1769 | #include <linux/sysctl.h> | |
1770 | #include <linux/gfp.h> | |
1771 | +#include <linux/module.h> | |
1772 | ||
1773 | /* A global variable is a bit ugly, but it keeps the code simple */ | |
1774 | int sysctl_drop_caches; | |
1775 | @@ -33,7 +34,7 @@ static void drop_pagecache_sb(struct super_block *sb) | |
1776 | iput(toput_inode); | |
1777 | } | |
1778 | ||
1779 | -static void drop_pagecache(void) | |
1780 | +void drop_pagecache(void) | |
1781 | { | |
1782 | struct super_block *sb; | |
1783 | ||
1784 | @@ -61,6 +62,7 @@ static void drop_slab(void) | |
1785 | nr_objects = shrink_slab(1000, GFP_KERNEL, 1000); | |
1786 | } while (nr_objects > 10); | |
1787 | } | |
1788 | +EXPORT_SYMBOL_GPL(drop_pagecache); | |
1789 | ||
1790 | int drop_caches_sysctl_handler(ctl_table *table, int write, | |
1791 | struct file *file, void __user *buffer, size_t *length, loff_t *ppos) | |
1792 | diff --git a/fs/fuse/control.c b/fs/fuse/control.c | |
1793 | index 99c99df..cadffd8 100644 | |
1794 | --- a/fs/fuse/control.c | |
1795 | +++ b/fs/fuse/control.c | |
1796 | @@ -209,6 +209,7 @@ static void fuse_ctl_kill_sb(struct super_block *sb) | |
1797 | static struct file_system_type fuse_ctl_fs_type = { | |
1798 | .owner = THIS_MODULE, | |
1799 | .name = "fusectl", | |
1800 | + .fs_flags = FS_IS_FUSE, | |
1801 | .get_sb = fuse_ctl_get_sb, | |
1802 | .kill_sb = fuse_ctl_kill_sb, | |
1803 | }; | |
1804 | diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c | |
1805 | index ba76b68..e9942d4 100644 | |
1806 | --- a/fs/fuse/dev.c | |
1807 | +++ b/fs/fuse/dev.c | |
1808 | @@ -7,6 +7,7 @@ | |
1809 | */ | |
1810 | ||
1811 | #include "fuse_i.h" | |
1812 | +#include "fuse.h" | |
1813 | ||
1814 | #include <linux/init.h> | |
1815 | #include <linux/module.h> | |
1816 | @@ -16,6 +17,7 @@ | |
1817 | #include <linux/pagemap.h> | |
1818 | #include <linux/file.h> | |
1819 | #include <linux/slab.h> | |
1820 | +#include <linux/freezer.h> | |
1821 | ||
1822 | MODULE_ALIAS_MISCDEV(FUSE_MINOR); | |
1823 | ||
1824 | @@ -752,6 +754,8 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov, | |
1825 | if (!fc) | |
1826 | return -EPERM; | |
1827 | ||
1828 | + FUSE_MIGHT_FREEZE(file->f_mapping->host->i_sb, "fuse_dev_read"); | |
1829 | + | |
1830 | restart: | |
1831 | spin_lock(&fc->lock); | |
1832 | err = -EAGAIN; | |
1833 | @@ -912,6 +916,9 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov, | |
1834 | if (!fc) | |
1835 | return -EPERM; | |
1836 | ||
1837 | + FUSE_MIGHT_FREEZE(iocb->ki_filp->f_mapping->host->i_sb, | |
1838 | + "fuse_dev_write"); | |
1839 | + | |
1840 | fuse_copy_init(&cs, fc, 0, NULL, iov, nr_segs); | |
1841 | if (nbytes < sizeof(struct fuse_out_header)) | |
1842 | return -EINVAL; | |
1843 | diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c | |
9474138d | 1844 | index 8b8eebc..31cda20 100644 |
2380c486 JR |
1845 | --- a/fs/fuse/dir.c |
1846 | +++ b/fs/fuse/dir.c | |
1847 | @@ -7,12 +7,14 @@ | |
1848 | */ | |
1849 | ||
1850 | #include "fuse_i.h" | |
1851 | +#include "fuse.h" | |
1852 | ||
1853 | #include <linux/pagemap.h> | |
1854 | #include <linux/file.h> | |
1855 | #include <linux/gfp.h> | |
1856 | #include <linux/sched.h> | |
1857 | #include <linux/namei.h> | |
1858 | +#include <linux/freezer.h> | |
1859 | ||
1860 | #if BITS_PER_LONG >= 64 | |
1861 | static inline void fuse_dentry_settime(struct dentry *entry, u64 time) | |
1862 | @@ -174,6 +176,9 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) | |
1863 | return 0; | |
1864 | ||
1865 | fc = get_fuse_conn(inode); | |
1866 | + | |
1867 | + FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_dentry_revalidate"); | |
1868 | + | |
1869 | req = fuse_get_req(fc); | |
1870 | if (IS_ERR(req)) | |
1871 | return 0; | |
1872 | @@ -268,6 +273,8 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name, | |
1873 | if (name->len > FUSE_NAME_MAX) | |
1874 | goto out; | |
1875 | ||
1876 | + FUSE_MIGHT_FREEZE(sb, "fuse_lookup_name"); | |
1877 | + | |
1878 | req = fuse_get_req(fc); | |
1879 | err = PTR_ERR(req); | |
1880 | if (IS_ERR(req)) | |
1881 | @@ -331,6 +338,8 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, | |
1882 | if (err) | |
1883 | goto out_err; | |
1884 | ||
1885 | + FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_lookup"); | |
1886 | + | |
1887 | err = -EIO; | |
1888 | if (inode && get_node_id(inode) == FUSE_ROOT_ID) | |
1889 | goto out_iput; | |
1890 | @@ -402,6 +411,8 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, | |
1891 | if (IS_ERR(forget_req)) | |
1892 | return PTR_ERR(forget_req); | |
1893 | ||
1894 | + FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_create_open"); | |
1895 | + | |
1896 | req = fuse_get_req(fc); | |
1897 | err = PTR_ERR(req); | |
1898 | if (IS_ERR(req)) | |
1899 | @@ -488,6 +499,8 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req, | |
1900 | int err; | |
1901 | struct fuse_req *forget_req; | |
1902 | ||
1903 | + FUSE_MIGHT_FREEZE(dir->i_sb, "create_new_entry"); | |
1904 | + | |
1905 | forget_req = fuse_get_req(fc); | |
1906 | if (IS_ERR(forget_req)) { | |
1907 | fuse_put_request(fc, req); | |
1908 | @@ -585,7 +598,11 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, int mode) | |
1909 | { | |
1910 | struct fuse_mkdir_in inarg; | |
1911 | struct fuse_conn *fc = get_fuse_conn(dir); | |
1912 | - struct fuse_req *req = fuse_get_req(fc); | |
1913 | + struct fuse_req *req; | |
1914 | + | |
1915 | + FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_mkdir"); | |
1916 | + | |
1917 | + req = fuse_get_req(fc); | |
1918 | if (IS_ERR(req)) | |
1919 | return PTR_ERR(req); | |
1920 | ||
1921 | @@ -605,7 +622,11 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry, | |
1922 | { | |
1923 | struct fuse_conn *fc = get_fuse_conn(dir); | |
1924 | unsigned len = strlen(link) + 1; | |
1925 | - struct fuse_req *req = fuse_get_req(fc); | |
1926 | + struct fuse_req *req; | |
1927 | + | |
1928 | + FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_symlink"); | |
1929 | + | |
1930 | + req = fuse_get_req(fc); | |
1931 | if (IS_ERR(req)) | |
1932 | return PTR_ERR(req); | |
1933 | ||
1934 | @@ -622,7 +643,11 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) | |
1935 | { | |
1936 | int err; | |
1937 | struct fuse_conn *fc = get_fuse_conn(dir); | |
1938 | - struct fuse_req *req = fuse_get_req(fc); | |
1939 | + struct fuse_req *req; | |
1940 | + | |
1941 | + FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_unlink"); | |
1942 | + | |
1943 | + req = fuse_get_req(fc); | |
1944 | if (IS_ERR(req)) | |
1945 | return PTR_ERR(req); | |
1946 | ||
1947 | @@ -655,7 +680,11 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) | |
1948 | { | |
1949 | int err; | |
1950 | struct fuse_conn *fc = get_fuse_conn(dir); | |
1951 | - struct fuse_req *req = fuse_get_req(fc); | |
1952 | + struct fuse_req *req; | |
1953 | + | |
1954 | + FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_rmdir"); | |
1955 | + | |
1956 | + req = fuse_get_req(fc); | |
1957 | if (IS_ERR(req)) | |
1958 | return PTR_ERR(req); | |
1959 | ||
1960 | diff --git a/fs/fuse/file.c b/fs/fuse/file.c | |
9474138d | 1961 | index 06f30e9..80ad032 100644 |
2380c486 JR |
1962 | --- a/fs/fuse/file.c |
1963 | +++ b/fs/fuse/file.c | |
1964 | @@ -7,11 +7,13 @@ | |
1965 | */ | |
1966 | ||
1967 | #include "fuse_i.h" | |
1968 | +#include "fuse.h" | |
1969 | ||
1970 | #include <linux/pagemap.h> | |
1971 | #include <linux/slab.h> | |
1972 | #include <linux/kernel.h> | |
1973 | #include <linux/sched.h> | |
1974 | +#include <linux/freezer.h> | |
1975 | ||
1976 | static const struct file_operations fuse_direct_io_file_operations; | |
1977 | ||
1978 | @@ -23,6 +25,8 @@ static int fuse_send_open(struct inode *inode, struct file *file, int isdir, | |
1979 | struct fuse_req *req; | |
1980 | int err; | |
1981 | ||
1982 | + FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_send_open"); | |
1983 | + | |
1984 | req = fuse_get_req(fc); | |
1985 | if (IS_ERR(req)) | |
1986 | return PTR_ERR(req); | |
1987 | @@ -279,6 +283,8 @@ static int fuse_flush(struct file *file, fl_owner_t id) | |
1988 | if (fc->no_flush) | |
1989 | return 0; | |
1990 | ||
1991 | + FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_flush"); | |
1992 | + | |
1993 | req = fuse_get_req_nofail(fc, file); | |
1994 | memset(&inarg, 0, sizeof(inarg)); | |
1995 | inarg.fh = ff->fh; | |
1996 | @@ -330,6 +336,8 @@ int fuse_fsync_common(struct file *file, struct dentry *de, int datasync, | |
1997 | if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir)) | |
1998 | return 0; | |
1999 | ||
2000 | + FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_fsync_common"); | |
2001 | + | |
2002 | /* | |
2003 | * Start writeback against all dirty pages of the inode, then | |
2004 | * wait for all outstanding writes, before sending the FSYNC | |
9474138d | 2005 | @@ -437,6 +445,8 @@ static int fuse_readpage(struct file *file, struct page *page) |
2380c486 JR |
2006 | if (is_bad_inode(inode)) |
2007 | goto out; | |
2008 | ||
2009 | + FUSE_MIGHT_FREEZE(file->f_mapping->host->i_sb, "fuse_readpage"); | |
2010 | + | |
2011 | /* | |
2012 | * Page writeback can extend beyond the liftime of the | |
2013 | * page-cache page, so make sure we read a properly synced | |
9474138d | 2014 | @@ -540,6 +550,9 @@ static int fuse_readpages_fill(void *_data, struct page *page) |
2380c486 JR |
2015 | struct inode *inode = data->inode; |
2016 | struct fuse_conn *fc = get_fuse_conn(inode); | |
2017 | ||
2018 | + FUSE_MIGHT_FREEZE(data->file->f_mapping->host->i_sb, | |
2019 | + "fuse_readpages_fill"); | |
2020 | + | |
2021 | fuse_wait_on_page_writeback(inode, page->index); | |
2022 | ||
2023 | if (req->num_pages && | |
9474138d | 2024 | @@ -570,6 +583,8 @@ static int fuse_readpages(struct file *file, struct address_space *mapping, |
2380c486 JR |
2025 | if (is_bad_inode(inode)) |
2026 | goto out; | |
2027 | ||
2028 | + FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_readpages"); | |
2029 | + | |
2030 | data.file = file; | |
2031 | data.inode = inode; | |
2032 | data.req = fuse_get_req(fc); | |
9474138d | 2033 | @@ -686,6 +701,8 @@ static int fuse_buffered_write(struct file *file, struct inode *inode, |
2380c486 JR |
2034 | if (is_bad_inode(inode)) |
2035 | return -EIO; | |
2036 | ||
2037 | + FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_buffered_write"); | |
2038 | + | |
2039 | /* | |
2040 | * Make sure writepages on the same page are not mixed up with | |
2041 | * plain writes. | |
9474138d | 2042 | @@ -842,6 +859,8 @@ static ssize_t fuse_perform_write(struct file *file, |
2380c486 JR |
2043 | struct fuse_req *req; |
2044 | ssize_t count; | |
2045 | ||
2046 | + FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_perform_write"); | |
2047 | + | |
2048 | req = fuse_get_req(fc); | |
2049 | if (IS_ERR(req)) { | |
2050 | err = PTR_ERR(req); | |
9474138d | 2051 | @@ -992,6 +1011,8 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf, |
2380c486 JR |
2052 | if (is_bad_inode(inode)) |
2053 | return -EIO; | |
2054 | ||
2055 | + FUSE_MIGHT_FREEZE(file->f_mapping->host->i_sb, "fuse_direct_io"); | |
2056 | + | |
2057 | req = fuse_get_req(fc); | |
2058 | if (IS_ERR(req)) | |
2059 | return PTR_ERR(req); | |
9474138d | 2060 | @@ -1360,6 +1381,8 @@ static int fuse_getlk(struct file *file, struct file_lock *fl) |
2380c486 JR |
2061 | struct fuse_lk_out outarg; |
2062 | int err; | |
2063 | ||
2064 | + FUSE_MIGHT_FREEZE(file->f_mapping->host->i_sb, "fuse_getlk"); | |
2065 | + | |
2066 | req = fuse_get_req(fc); | |
2067 | if (IS_ERR(req)) | |
2068 | return PTR_ERR(req); | |
9474138d | 2069 | @@ -1395,6 +1418,8 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock) |
2380c486 JR |
2070 | if (fl->fl_flags & FL_CLOSE) |
2071 | return 0; | |
2072 | ||
2073 | + FUSE_MIGHT_FREEZE(file->f_mapping->host->i_sb, "fuse_setlk"); | |
2074 | + | |
2075 | req = fuse_get_req(fc); | |
2076 | if (IS_ERR(req)) | |
2077 | return PTR_ERR(req); | |
9474138d | 2078 | @@ -1461,6 +1486,8 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block) |
2380c486 JR |
2079 | if (!inode->i_sb->s_bdev || fc->no_bmap) |
2080 | return 0; | |
2081 | ||
2082 | + FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_bmap"); | |
2083 | + | |
2084 | req = fuse_get_req(fc); | |
2085 | if (IS_ERR(req)) | |
2086 | return 0; | |
2087 | diff --git a/fs/fuse/fuse.h b/fs/fuse/fuse.h | |
2088 | new file mode 100644 | |
2089 | index 0000000..170e49a | |
2090 | --- /dev/null | |
2091 | +++ b/fs/fuse/fuse.h | |
2092 | @@ -0,0 +1,13 @@ | |
2093 | +#define FUSE_MIGHT_FREEZE(superblock, desc) \ | |
2094 | +do { \ | |
2095 | + int printed = 0; \ | |
2096 | + while (superblock->s_frozen != SB_UNFROZEN) { \ | |
2097 | + if (!printed) { \ | |
2098 | + printk(KERN_INFO "%d frozen in " desc ".\n", \ | |
2099 | + current->pid); \ | |
2100 | + printed = 1; \ | |
2101 | + } \ | |
2102 | + try_to_freeze(); \ | |
2103 | + yield(); \ | |
2104 | + } \ | |
2105 | +} while (0) | |
2106 | diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c | |
9474138d | 2107 | index 91f7c85..cb18b16 100644 |
2380c486 JR |
2108 | --- a/fs/fuse/inode.c |
2109 | +++ b/fs/fuse/inode.c | |
9474138d | 2110 | @@ -929,7 +929,7 @@ static int fuse_get_sb(struct file_system_type *fs_type, |
2380c486 JR |
2111 | static struct file_system_type fuse_fs_type = { |
2112 | .owner = THIS_MODULE, | |
2113 | .name = "fuse", | |
2114 | - .fs_flags = FS_HAS_SUBTYPE, | |
2115 | + .fs_flags = FS_HAS_SUBTYPE | FS_IS_FUSE, | |
2116 | .get_sb = fuse_get_sb, | |
2117 | .kill_sb = kill_anon_super, | |
2118 | }; | |
9474138d | 2119 | @@ -948,7 +948,7 @@ static struct file_system_type fuseblk_fs_type = { |
2380c486 JR |
2120 | .name = "fuseblk", |
2121 | .get_sb = fuse_get_sb_blk, | |
2122 | .kill_sb = kill_block_super, | |
2123 | - .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE, | |
2124 | + .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE | FS_IS_FUSE, | |
2125 | }; | |
2126 | ||
2127 | static inline int register_fuseblk(void) | |
2128 | diff --git a/fs/namei.c b/fs/namei.c | |
9474138d | 2129 | index 967c3db..bffeb61 100644 |
2380c486 JR |
2130 | --- a/fs/namei.c |
2131 | +++ b/fs/namei.c | |
9474138d | 2132 | @@ -2220,6 +2220,8 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry) |
2380c486 JR |
2133 | if (!dir->i_op->unlink) |
2134 | return -EPERM; | |
2135 | ||
2136 | + vfs_check_frozen(dir->i_sb, SB_FREEZE_WRITE); | |
2137 | + | |
9474138d | 2138 | vfs_dq_init(dir); |
2380c486 JR |
2139 | |
2140 | mutex_lock(&dentry->d_inode->i_mutex); | |
2141 | diff --git a/fs/super.c b/fs/super.c | |
9474138d | 2142 | index 1943fdf..0ad40de 100644 |
2380c486 JR |
2143 | --- a/fs/super.c |
2144 | +++ b/fs/super.c | |
2145 | @@ -44,6 +44,8 @@ | |
2146 | ||
2147 | ||
2148 | LIST_HEAD(super_blocks); | |
2149 | +EXPORT_SYMBOL_GPL(super_blocks); | |
2150 | + | |
2151 | DEFINE_SPINLOCK(sb_lock); | |
2152 | ||
2153 | /** | |
2154 | diff --git a/include/linux/Kbuild b/include/linux/Kbuild | |
9474138d | 2155 | index 3f0eaa3..40e2a83 100644 |
2380c486 JR |
2156 | --- a/include/linux/Kbuild |
2157 | +++ b/include/linux/Kbuild | |
9474138d | 2158 | @@ -209,6 +209,7 @@ unifdef-y += filter.h |
2380c486 JR |
2159 | unifdef-y += flat.h |
2160 | unifdef-y += futex.h | |
2161 | unifdef-y += fs.h | |
2162 | +unifdef-y += freezer.h | |
2163 | unifdef-y += gameport.h | |
2164 | unifdef-y += generic_serial.h | |
2165 | unifdef-y += hayesesp.h | |
2380c486 JR |
2166 | diff --git a/include/linux/freezer.h b/include/linux/freezer.h |
2167 | index 5a361f8..c775cd1 100644 | |
2168 | --- a/include/linux/freezer.h | |
2169 | +++ b/include/linux/freezer.h | |
2170 | @@ -121,6 +121,23 @@ static inline void set_freezable(void) | |
2171 | current->flags &= ~PF_NOFREEZE; | |
2172 | } | |
2173 | ||
2174 | +#ifdef CONFIG_PM_SLEEP | |
2175 | +extern int freezer_state; | |
2176 | +#define FREEZER_OFF 0 | |
2177 | +#define FREEZER_FILESYSTEMS_FROZEN 1 | |
2178 | +#define FREEZER_USERSPACE_FROZEN 2 | |
2179 | +#define FREEZER_FULLY_ON 3 | |
2180 | + | |
2181 | +static inline int freezer_is_on(void) | |
2182 | +{ | |
2183 | + return freezer_state == FREEZER_FULLY_ON; | |
2184 | +} | |
2185 | +#else | |
2186 | +static inline int freezer_is_on(void) { return 0; } | |
2187 | +#endif | |
2188 | + | |
2189 | +extern void thaw_kernel_threads(void); | |
2190 | + | |
2191 | /* | |
2192 | * Tell the freezer that the current task should be frozen by it and that it | |
2193 | * should send a fake signal to the task to freeze it. | |
2194 | @@ -172,6 +189,8 @@ static inline int freeze_processes(void) { BUG(); return 0; } | |
2195 | static inline void thaw_processes(void) {} | |
2196 | ||
2197 | static inline int try_to_freeze(void) { return 0; } | |
2198 | +static inline int freezer_is_on(void) { return 0; } | |
2199 | +static inline void thaw_kernel_threads(void) { } | |
2200 | ||
2201 | static inline void freezer_do_not_count(void) {} | |
2202 | static inline void freezer_count(void) {} | |
2203 | diff --git a/include/linux/fs.h b/include/linux/fs.h | |
9474138d | 2204 | index 3b534e5..46dc165 100644 |
2380c486 JR |
2205 | --- a/include/linux/fs.h |
2206 | +++ b/include/linux/fs.h | |
2207 | @@ -8,6 +8,7 @@ | |
2208 | ||
2209 | #include <linux/limits.h> | |
2210 | #include <linux/ioctl.h> | |
2211 | +#include <linux/freezer.h> | |
2212 | ||
2213 | /* | |
2214 | * It's silly to have NR_OPEN bigger than NR_FILE, but you can change | |
9474138d | 2215 | @@ -172,6 +173,7 @@ struct inodes_stat_t { |
2380c486 JR |
2216 | #define FS_REQUIRES_DEV 1 |
2217 | #define FS_BINARY_MOUNTDATA 2 | |
2218 | #define FS_HAS_SUBTYPE 4 | |
2219 | +#define FS_IS_FUSE 8 /* Fuse filesystem - bdev freeze these too */ | |
2220 | #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ | |
2221 | #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() | |
2222 | * during rename() internally. | |
9474138d | 2223 | @@ -205,6 +207,7 @@ struct inodes_stat_t { |
2380c486 JR |
2224 | #define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */ |
2225 | #define MS_I_VERSION (1<<23) /* Update inode I_version field */ | |
9474138d AM |
2226 | #define MS_STRICTATIME (1<<24) /* Always perform atime updates */ |
2227 | +#define MS_FROZEN (1<<25) /* Frozen by freeze_filesystems() */ | |
2380c486 JR |
2228 | #define MS_ACTIVE (1<<30) |
2229 | #define MS_NOUSER (1<<31) | |
2230 | ||
9474138d | 2231 | @@ -231,6 +234,8 @@ struct inodes_stat_t { |
e999739a | 2232 | #define S_NOCMTIME 128 /* Do not update file c/mtime */ |
2233 | #define S_SWAPFILE 256 /* Do not truncate: swapon got its bmaps */ | |
2234 | #define S_PRIVATE 512 /* Inode is fs-internal */ | |
2235 | +#define S_ATOMIC_COPY 1024 /* Pages mapped with this inode need to be | |
2236 | + atomically copied (gem) */ | |
2237 | ||
2238 | /* | |
2239 | * Note that nosuid etc flags are inode-specific: setting some file-system | |
9474138d | 2240 | @@ -1390,8 +1395,11 @@ enum { |
2380c486 JR |
2241 | SB_FREEZE_TRANS = 2, |
2242 | }; | |
2243 | ||
2244 | -#define vfs_check_frozen(sb, level) \ | |
2245 | - wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level))) | |
2246 | +#define vfs_check_frozen(sb, level) do { \ | |
2247 | + freezer_do_not_count(); \ | |
2248 | + wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level))); \ | |
2249 | + freezer_count(); \ | |
2250 | +} while (0) | |
2251 | ||
2252 | #define get_fs_excl() atomic_inc(¤t->fs_excl) | |
2253 | #define put_fs_excl() atomic_dec(¤t->fs_excl) | |
9474138d AM |
2254 | @@ -1949,6 +1957,11 @@ extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); |
2255 | extern int fsync_bdev(struct block_device *); | |
2256 | extern int fsync_super(struct super_block *); | |
2257 | extern int fsync_no_super(struct block_device *); | |
2258 | +#define FS_FREEZER_FUSE 1 | |
2259 | +#define FS_FREEZER_NORMAL 2 | |
2260 | +#define FS_FREEZER_ALL (FS_FREEZER_FUSE | FS_FREEZER_NORMAL) | |
2261 | +void freeze_filesystems(int which); | |
2262 | +void thaw_filesystems(int which); | |
2263 | #else | |
2264 | static inline void bd_forget(struct inode *inode) {} | |
2265 | static inline int sync_blockdev(struct block_device *bdev) { return 0; } | |
2380c486 | 2266 | diff --git a/include/linux/mm.h b/include/linux/mm.h |
9474138d | 2267 | index bff1f0d..c4199cd 100644 |
2380c486 JR |
2268 | --- a/include/linux/mm.h |
2269 | +++ b/include/linux/mm.h | |
9474138d | 2270 | @@ -105,6 +105,7 @@ extern unsigned int kobjsize(const void *objp); |
e999739a | 2271 | #define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ |
2272 | #define VM_SAO 0x20000000 /* Strong Access Ordering (powerpc) */ | |
9474138d AM |
2273 | #define VM_PFN_AT_MMAP 0x40000000 /* PFNMAP vma that is fully mapped at mmap time */ |
2274 | +#define VM_ATOMIC_COPY 0x80000000 /* TuxOnIce should atomically copy */ | |
e999739a | 2275 | |
2276 | #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ | |
2277 | #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS | |
9474138d | 2278 | @@ -1297,6 +1298,7 @@ int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *, |
2380c486 JR |
2279 | void __user *, size_t *, loff_t *); |
2280 | unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask, | |
2281 | unsigned long lru_pages); | |
2282 | +void drop_pagecache(void); | |
2283 | ||
2284 | #ifndef CONFIG_MMU | |
2285 | #define randomize_va_space 0 | |
2286 | diff --git a/include/linux/netlink.h b/include/linux/netlink.h | |
9474138d | 2287 | index 5ba398e..f220828 100644 |
2380c486 JR |
2288 | --- a/include/linux/netlink.h |
2289 | +++ b/include/linux/netlink.h | |
2290 | @@ -24,6 +24,8 @@ | |
2291 | /* leave room for NETLINK_DM (DM Events) */ | |
2292 | #define NETLINK_SCSITRANSPORT 18 /* SCSI Transports */ | |
2293 | #define NETLINK_ECRYPTFS 19 | |
2294 | +#define NETLINK_TOI_USERUI 20 /* TuxOnIce's userui */ | |
2295 | +#define NETLINK_TOI_USM 21 /* Userspace storage manager */ | |
2296 | ||
2297 | #define MAX_LINKS 32 | |
2298 | ||
2299 | diff --git a/include/linux/suspend.h b/include/linux/suspend.h | |
9474138d | 2300 | index 795032e..1f52617 100644 |
2380c486 JR |
2301 | --- a/include/linux/suspend.h |
2302 | +++ b/include/linux/suspend.h | |
9474138d | 2303 | @@ -308,4 +308,70 @@ static inline void register_nosave_region_late(unsigned long b, unsigned long e) |
2380c486 JR |
2304 | |
2305 | extern struct mutex pm_mutex; | |
2306 | ||
2307 | +enum { | |
2308 | + TOI_CAN_HIBERNATE, | |
2309 | + TOI_CAN_RESUME, | |
2310 | + TOI_RESUME_DEVICE_OK, | |
2311 | + TOI_NORESUME_SPECIFIED, | |
2312 | + TOI_SANITY_CHECK_PROMPT, | |
2313 | + TOI_CONTINUE_REQ, | |
2314 | + TOI_RESUMED_BEFORE, | |
2315 | + TOI_BOOT_TIME, | |
2316 | + TOI_NOW_RESUMING, | |
2317 | + TOI_IGNORE_LOGLEVEL, | |
2318 | + TOI_TRYING_TO_RESUME, | |
2319 | + TOI_LOADING_ALT_IMAGE, | |
2320 | + TOI_STOP_RESUME, | |
2321 | + TOI_IO_STOPPED, | |
2322 | + TOI_NOTIFIERS_PREPARE, | |
2323 | + TOI_CLUSTER_MODE, | |
2324 | + TOI_BOOT_KERNEL, | |
2325 | +}; | |
2326 | + | |
2327 | +#ifdef CONFIG_TOI | |
2328 | + | |
2329 | +/* Used in init dir files */ | |
2330 | +extern unsigned long toi_state; | |
2331 | +#define set_toi_state(bit) (set_bit(bit, &toi_state)) | |
2332 | +#define clear_toi_state(bit) (clear_bit(bit, &toi_state)) | |
2333 | +#define test_toi_state(bit) (test_bit(bit, &toi_state)) | |
2334 | +extern int toi_running; | |
2335 | + | |
2336 | +#define test_action_state(bit) (test_bit(bit, &toi_bkd.toi_action)) | |
9474138d | 2337 | +extern int try_tuxonice_hibernate(void); |
2380c486 JR |
2338 | + |
2339 | +#else /* !CONFIG_TOI */ | |
2340 | + | |
2341 | +#define toi_state (0) | |
2342 | +#define set_toi_state(bit) do { } while (0) | |
2343 | +#define clear_toi_state(bit) do { } while (0) | |
2344 | +#define test_toi_state(bit) (0) | |
2345 | +#define toi_running (0) | |
2346 | + | |
9474138d | 2347 | +static inline int try_tuxonice_hibernate(void) { return 0; } |
2380c486 JR |
2348 | +#define test_action_state(bit) (0) |
2349 | + | |
2350 | +#endif /* CONFIG_TOI */ | |
2351 | + | |
2352 | +#ifdef CONFIG_HIBERNATION | |
2353 | +#ifdef CONFIG_TOI | |
9474138d | 2354 | +extern void try_tuxonice_resume(void); |
2380c486 | 2355 | +#else |
9474138d | 2356 | +#define try_tuxonice_resume() do { } while (0) |
2380c486 JR |
2357 | +#endif |
2358 | + | |
2359 | +extern int resume_attempted; | |
2360 | +extern int software_resume(void); | |
2361 | + | |
2362 | +static inline void check_resume_attempted(void) | |
2363 | +{ | |
2364 | + if (resume_attempted) | |
2365 | + return; | |
2366 | + | |
2367 | + software_resume(); | |
2368 | +} | |
2369 | +#else | |
2370 | +#define check_resume_attempted() do { } while (0) | |
2371 | +#define resume_attempted (0) | |
2372 | +#endif | |
2373 | #endif /* _LINUX_SUSPEND_H */ | |
2374 | diff --git a/include/linux/swap.h b/include/linux/swap.h | |
9474138d | 2375 | index d476aad..b522e83 100644 |
2380c486 JR |
2376 | --- a/include/linux/swap.h |
2377 | +++ b/include/linux/swap.h | |
2378 | @@ -168,6 +168,7 @@ struct swap_list_t { | |
2379 | extern unsigned long totalram_pages; | |
2380 | extern unsigned long totalreserve_pages; | |
2381 | extern unsigned int nr_free_buffer_pages(void); | |
2382 | +extern unsigned int nr_unallocated_buffer_pages(void); | |
2383 | extern unsigned int nr_free_pagecache_pages(void); | |
2384 | ||
2385 | /* Definition of global_page_state not available yet */ | |
2386 | diff --git a/init/do_mounts.c b/init/do_mounts.c | |
9474138d | 2387 | index dd7ee5f..5ecae29 100644 |
2380c486 JR |
2388 | --- a/init/do_mounts.c |
2389 | +++ b/init/do_mounts.c | |
9474138d | 2390 | @@ -143,6 +143,7 @@ fail: |
2380c486 JR |
2391 | done: |
2392 | return res; | |
2393 | } | |
2394 | +EXPORT_SYMBOL_GPL(name_to_dev_t); | |
2395 | ||
2396 | static int __init root_dev_setup(char *line) | |
2397 | { | |
9474138d | 2398 | @@ -412,6 +413,8 @@ void __init prepare_namespace(void) |
2380c486 JR |
2399 | if (is_floppy && rd_doload && rd_load_disk(0)) |
2400 | ROOT_DEV = Root_RAM0; | |
2401 | ||
2402 | + check_resume_attempted(); | |
2403 | + | |
2404 | mount_root(); | |
2405 | out: | |
2406 | sys_mount(".", "/", NULL, MS_MOVE, NULL); | |
2407 | diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c | |
2408 | index 614241b..f3ea292 100644 | |
2409 | --- a/init/do_mounts_initrd.c | |
2410 | +++ b/init/do_mounts_initrd.c | |
2411 | @@ -6,6 +6,7 @@ | |
2412 | #include <linux/romfs_fs.h> | |
2413 | #include <linux/initrd.h> | |
2414 | #include <linux/sched.h> | |
2415 | +#include <linux/suspend.h> | |
2416 | #include <linux/freezer.h> | |
2417 | ||
2418 | #include "do_mounts.h" | |
2419 | @@ -68,6 +69,11 @@ static void __init handle_initrd(void) | |
2420 | ||
2421 | current->flags &= ~PF_FREEZER_SKIP; | |
2422 | ||
2423 | + if (!resume_attempted) | |
2424 | + printk(KERN_ERR "TuxOnIce: No attempt was made to resume from " | |
2425 | + "any image that might exist.\n"); | |
2426 | + clear_toi_state(TOI_BOOT_TIME); | |
2427 | + | |
2428 | /* move initrd to rootfs' /old */ | |
2429 | sys_fchdir(old_fd); | |
2430 | sys_mount("/", ".", NULL, MS_MOVE, NULL); | |
2431 | diff --git a/init/main.c b/init/main.c | |
9474138d | 2432 | index d721dad..1c0b018 100644 |
2380c486 JR |
2433 | --- a/init/main.c |
2434 | +++ b/init/main.c | |
9474138d | 2435 | @@ -117,6 +117,7 @@ extern void softirq_init(void); |
2380c486 JR |
2436 | char __initdata boot_command_line[COMMAND_LINE_SIZE]; |
2437 | /* Untouched saved command line (eg. for /proc) */ | |
2438 | char *saved_command_line; | |
2439 | +EXPORT_SYMBOL_GPL(saved_command_line); | |
2440 | /* Command line for parameter parsing */ | |
2441 | static char *static_command_line; | |
2442 | ||
2443 | diff --git a/kernel/cpu.c b/kernel/cpu.c | |
9474138d | 2444 | index 395b697..fe274d1 100644 |
2380c486 JR |
2445 | --- a/kernel/cpu.c |
2446 | +++ b/kernel/cpu.c | |
2447 | @@ -415,6 +415,7 @@ int disable_nonboot_cpus(void) | |
2448 | stop_machine_destroy(); | |
2449 | return error; | |
2450 | } | |
2451 | +EXPORT_SYMBOL_GPL(disable_nonboot_cpus); | |
2452 | ||
2453 | void __ref enable_nonboot_cpus(void) | |
2454 | { | |
2455 | @@ -439,6 +440,7 @@ void __ref enable_nonboot_cpus(void) | |
2456 | out: | |
2457 | cpu_maps_update_done(); | |
2458 | } | |
2459 | +EXPORT_SYMBOL_GPL(enable_nonboot_cpus); | |
2460 | ||
2461 | static int alloc_frozen_cpus(void) | |
2462 | { | |
2463 | diff --git a/kernel/fork.c b/kernel/fork.c | |
9474138d | 2464 | index 875ffbd..c9df3fe 100644 |
2380c486 JR |
2465 | --- a/kernel/fork.c |
2466 | +++ b/kernel/fork.c | |
9474138d | 2467 | @@ -82,6 +82,7 @@ int max_threads; /* tunable limit on nr_threads */ |
2380c486 JR |
2468 | DEFINE_PER_CPU(unsigned long, process_counts) = 0; |
2469 | ||
2470 | __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ | |
2471 | +EXPORT_SYMBOL_GPL(tasklist_lock); | |
2472 | ||
2473 | DEFINE_TRACE(sched_process_fork); | |
2474 | ||
2475 | diff --git a/kernel/kmod.c b/kernel/kmod.c | |
9474138d | 2476 | index 7e95bed..41e5186 100644 |
2380c486 JR |
2477 | --- a/kernel/kmod.c |
2478 | +++ b/kernel/kmod.c | |
9474138d | 2479 | @@ -319,6 +319,7 @@ int usermodehelper_disable(void) |
2380c486 JR |
2480 | usermodehelper_disabled = 0; |
2481 | return -EAGAIN; | |
2482 | } | |
2483 | +EXPORT_SYMBOL_GPL(usermodehelper_disable); | |
2484 | ||
2485 | /** | |
2486 | * usermodehelper_enable - allow new helpers to be started again | |
9474138d | 2487 | @@ -327,6 +328,7 @@ void usermodehelper_enable(void) |
2380c486 JR |
2488 | { |
2489 | usermodehelper_disabled = 0; | |
2490 | } | |
2491 | +EXPORT_SYMBOL_GPL(usermodehelper_enable); | |
2492 | ||
2493 | static void helper_lock(void) | |
2494 | { | |
2495 | diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig | |
9474138d | 2496 | index 23bd4da..7638270 100644 |
2380c486 JR |
2497 | --- a/kernel/power/Kconfig |
2498 | +++ b/kernel/power/Kconfig | |
2499 | @@ -38,6 +38,13 @@ config CAN_PM_TRACE | |
2500 | def_bool y | |
2501 | depends on PM_DEBUG && PM_SLEEP && EXPERIMENTAL | |
2502 | ||
2503 | +config FS_FREEZER_DEBUG | |
2504 | + bool "Filesystem freezer debugging" | |
2505 | + depends on PM_DEBUG | |
2506 | + default n | |
2507 | + ---help--- | |
2508 | + This option enables debugging of the filesystem freezing code. | |
2509 | + | |
2510 | config PM_TRACE | |
2511 | bool | |
2512 | help | |
9474138d | 2513 | @@ -179,6 +186,237 @@ config PM_STD_PARTITION |
2380c486 JR |
2514 | suspended image to. It will simply pick the first available swap |
2515 | device. | |
2516 | ||
2517 | +menuconfig TOI_CORE | |
2518 | + tristate "Enhanced Hibernation (TuxOnIce)" | |
2519 | + depends on HIBERNATION | |
2520 | + default y | |
2521 | + ---help--- | |
2522 | + TuxOnIce is the 'new and improved' suspend support. | |
2523 | + | |
2524 | + See the TuxOnIce home page (tuxonice.net) | |
2525 | + for FAQs, HOWTOs and other documentation. | |
2526 | + | |
2527 | + comment "Image Storage (you need at least one allocator)" | |
2528 | + depends on TOI_CORE | |
2529 | + | |
2530 | + config TOI_FILE | |
2531 | + tristate "File Allocator" | |
2532 | + depends on TOI_CORE | |
2533 | + default y | |
2534 | + ---help--- | |
2535 | + This option enables support for storing an image in a | |
2536 | + simple file. This should be possible, but we're still | |
2537 | + testing it. | |
2538 | + | |
2539 | + config TOI_SWAP | |
2540 | + tristate "Swap Allocator" | |
2541 | + depends on TOI_CORE && SWAP | |
2542 | + default y | |
2543 | + ---help--- | |
2544 | + This option enables support for storing an image in your | |
2545 | + swap space. | |
2546 | + | |
2547 | + comment "General Options" | |
2548 | + depends on TOI_CORE | |
2549 | + | |
2380c486 JR |
2550 | + config TOI_CRYPTO |
2551 | + tristate "Compression support" | |
2552 | + depends on TOI_CORE && CRYPTO | |
2553 | + default y | |
2554 | + ---help--- | |
2555 | + This option adds support for using cryptoapi compression | |
9474138d AM |
2556 | + algorithms. Compression is particularly useful as it can |
2557 | + more than double your suspend and resume speed (depending | |
2558 | + upon how well your image compresses). | |
2380c486 JR |
2559 | + |
2560 | + You probably want this, so say Y here. | |
2561 | + | |
2562 | + comment "No compression support available without Cryptoapi support." | |
2563 | + depends on TOI_CORE && !CRYPTO | |
2564 | + | |
2565 | + config TOI_USERUI | |
2566 | + tristate "Userspace User Interface support" | |
2567 | + depends on TOI_CORE && NET && (VT || SERIAL_CONSOLE) | |
2568 | + default y | |
2569 | + ---help--- | |
2570 | + This option enabled support for a userspace based user interface | |
2571 | + to TuxOnIce, which allows you to have a nice display while suspending | |
2572 | + and resuming, and also enables features such as pressing escape to | |
2573 | + cancel a cycle or interactive debugging. | |
2574 | + | |
2575 | + config TOI_USERUI_DEFAULT_PATH | |
2576 | + string "Default userui program location" | |
e999739a | 2577 | + default "/usr/local/sbin/tuxoniceui_text" |
2380c486 JR |
2578 | + depends on TOI_USERUI |
2579 | + ---help--- | |
2580 | + This entry allows you to specify a default path to the userui binary. | |
2581 | + | |
2582 | + config TOI_KEEP_IMAGE | |
2583 | + bool "Allow Keep Image Mode" | |
2584 | + depends on TOI_CORE | |
2585 | + ---help--- | |
2586 | + This option allows you to keep and image and reuse it. It is intended | |
2587 | + __ONLY__ for use with systems where all filesystems are mounted read- | |
2588 | + only (kiosks, for example). To use it, compile this option in and boot | |
2589 | + normally. Set the KEEP_IMAGE flag in /sys/power/tuxonice and suspend. | |
2590 | + When you resume, the image will not be removed. You will be unable to turn | |
2591 | + off swap partitions (assuming you are using the swap allocator), but future | |
2592 | + suspends simply do a power-down. The image can be updated using the | |
2593 | + kernel command line parameter suspend_act= to turn off the keep image | |
2594 | + bit. Keep image mode is a little less user friendly on purpose - it | |
2595 | + should not be used without thought! | |
2596 | + | |
2597 | + config TOI_REPLACE_SWSUSP | |
2598 | + bool "Replace swsusp by default" | |
2599 | + default y | |
2600 | + depends on TOI_CORE | |
2601 | + ---help--- | |
2602 | + TuxOnIce can replace swsusp. This option makes that the default state, | |
2603 | + requiring you to echo 0 > /sys/power/tuxonice/replace_swsusp if you want | |
2604 | + to use the vanilla kernel functionality. Note that your initrd/ramfs will | |
2605 | + need to do this before trying to resume, too. | |
2606 | + With overriding swsusp enabled, echoing disk to /sys/power/state will | |
2607 | + start a TuxOnIce cycle. If resume= doesn't specify an allocator and both | |
2608 | + the swap and file allocators are compiled in, the swap allocator will be | |
2609 | + used by default. | |
2610 | + | |
2611 | + config TOI_IGNORE_LATE_INITCALL | |
2612 | + bool "Wait for initrd/ramfs to run, by default" | |
2613 | + default n | |
2614 | + depends on TOI_CORE | |
2615 | + ---help--- | |
2616 | + When booting, TuxOnIce can check for an image and start to resume prior | |
2617 | + to any initrd/ramfs running (via a late initcall). | |
2618 | + | |
2619 | + If you don't have an initrd/ramfs, this is what you want to happen - | |
2620 | + otherwise you won't be able to safely resume. You should set this option | |
2621 | + to 'No'. | |
2622 | + | |
2623 | + If, however, you want your initrd/ramfs to run anyway before resuming, | |
2624 | + you need to tell TuxOnIce to ignore that earlier opportunity to resume. | |
2625 | + This can be done either by using this compile time option, or by | |
2626 | + overriding this option with the boot-time parameter toi_initramfs_resume_only=1. | |
2627 | + | |
2628 | + Note that if TuxOnIce can't resume at the earlier opportunity, the | |
2629 | + value of this option won't matter - the initramfs/initrd (if any) will | |
2630 | + run anyway. | |
2631 | + | |
2632 | + menuconfig TOI_CLUSTER | |
2633 | + tristate "Cluster support" | |
2634 | + default n | |
2635 | + depends on TOI_CORE && NET && BROKEN | |
2636 | + ---help--- | |
2637 | + Support for linking multiple machines in a cluster so that they suspend | |
2638 | + and resume together. | |
2639 | + | |
2640 | + config TOI_DEFAULT_CLUSTER_INTERFACE | |
2641 | + string "Default cluster interface" | |
2642 | + depends on TOI_CLUSTER | |
2643 | + ---help--- | |
2644 | + The default interface on which to communicate with other nodes in | |
2645 | + the cluster. | |
2646 | + | |
2647 | + If no value is set here, cluster support will be disabled by default. | |
2648 | + | |
2649 | + config TOI_DEFAULT_CLUSTER_KEY | |
2650 | + string "Default cluster key" | |
2651 | + default "Default" | |
2652 | + depends on TOI_CLUSTER | |
2653 | + ---help--- | |
2654 | + The default key used by this node. All nodes in the same cluster | |
2655 | + have the same key. Multiple clusters may coexist on the same lan | |
2656 | + by using different values for this key. | |
2657 | + | |
2658 | + config TOI_CLUSTER_IMAGE_TIMEOUT | |
2659 | + int "Timeout when checking for image" | |
2660 | + default 15 | |
2661 | + depends on TOI_CLUSTER | |
2662 | + ---help--- | |
2663 | + Timeout (seconds) before continuing to boot when waiting to see | |
2664 | + whether other nodes might have an image. Set to -1 to wait | |
2665 | + indefinitely. In WAIT_UNTIL_NODES is non zero, we might continue | |
2666 | + booting sooner than this timeout. | |
2667 | + | |
2668 | + config TOI_CLUSTER_WAIT_UNTIL_NODES | |
2669 | + int "Nodes without image before continuing" | |
2670 | + default 0 | |
2671 | + depends on TOI_CLUSTER | |
2672 | + ---help--- | |
2673 | + When booting and no image is found, we wait to see if other nodes | |
2674 | + have an image before continuing to boot. This value lets us | |
2675 | + continue after seeing a certain number of nodes without an image, | |
2676 | + instead of continuing to wait for the timeout. Set to 0 to only | |
2677 | + use the timeout. | |
2678 | + | |
2679 | + config TOI_DEFAULT_CLUSTER_PRE_HIBERNATE | |
2680 | + string "Default pre-hibernate script" | |
2681 | + depends on TOI_CLUSTER | |
2682 | + ---help--- | |
2683 | + The default script to be called when starting to hibernate. | |
2684 | + | |
2685 | + config TOI_DEFAULT_CLUSTER_POST_HIBERNATE | |
2686 | + string "Default post-hibernate script" | |
2687 | + depends on TOI_CLUSTER | |
2688 | + ---help--- | |
2689 | + The default script to be called after resuming from hibernation. | |
2690 | + | |
2691 | + config TOI_DEFAULT_WAIT | |
2692 | + int "Default waiting time for emergency boot messages" | |
2693 | + default "25" | |
2694 | + range -1 32768 | |
2695 | + depends on TOI_CORE | |
2696 | + help | |
2697 | + TuxOnIce can display warnings very early in the process of resuming, | |
2698 | + if (for example) it appears that you have booted a kernel that doesn't | |
2699 | + match an image on disk. It can then give you the opportunity to either | |
2700 | + continue booting that kernel, or reboot the machine. This option can be | |
2701 | + used to control how long to wait in such circumstances. -1 means wait | |
2702 | + forever. 0 means don't wait at all (do the default action, which will | |
2703 | + generally be to continue booting and remove the image). Values of 1 or | |
2704 | + more indicate a number of seconds (up to 255) to wait before doing the | |
2705 | + default. | |
2706 | + | |
2707 | + config TOI_DEFAULT_EXTRA_PAGES_ALLOWANCE | |
2708 | + int "Default extra pages allowance" | |
2709 | + default "2000" | |
2710 | + range 500 32768 | |
2711 | + depends on TOI_CORE | |
2712 | + help | |
2713 | + This value controls the default for the allowance TuxOnIce makes for | |
2714 | + drivers to allocate extra memory during the atomic copy. The default | |
2715 | + value of 2000 will be okay in most cases. If you are using | |
2716 | + DRI, the easiest way to find what value to use is to try to hibernate | |
2717 | + and look at how many pages were actually needed in the sysfs entry | |
2718 | + /sys/power/tuxonice/debug_info (first number on the last line), adding | |
2719 | + a little extra because the value is not always the same. | |
2720 | + | |
2721 | + config TOI_CHECKSUM | |
2722 | + bool "Checksum pageset2" | |
2723 | + default n | |
2724 | + depends on TOI_CORE | |
2725 | + select CRYPTO | |
2726 | + select CRYPTO_ALGAPI | |
2727 | + select CRYPTO_MD4 | |
2728 | + ---help--- | |
2729 | + Adds support for checksumming pageset2 pages, to ensure you really get an | |
2730 | + atomic copy. Since some filesystems (XFS especially) change metadata even | |
2731 | + when there's no other activity, we need this to check for pages that have | |
2732 | + been changed while we were saving the page cache. If your debugging output | |
2733 | + always says no pages were resaved, you may be able to safely disable this | |
2734 | + option. | |
2735 | + | |
2736 | +config TOI | |
2737 | + bool | |
2738 | + depends on TOI_CORE!=n | |
2739 | + default y | |
2740 | + | |
2741 | +config TOI_EXPORTS | |
2742 | + bool | |
2743 | + depends on TOI_SWAP=m || TOI_FILE=m || \ | |
2744 | + TOI_CRYPTO=m || TOI_CLUSTER=m || \ | |
2745 | + TOI_USERUI=m || TOI_CORE=m | |
2746 | + default y | |
2747 | + | |
2748 | config APM_EMULATION | |
2749 | tristate "Advanced Power Management Emulation" | |
2750 | depends on PM && SYS_SUPPORTS_APM_EMULATION | |
2751 | diff --git a/kernel/power/Makefile b/kernel/power/Makefile | |
9474138d | 2752 | index 720ea4f..e797c0d 100644 |
2380c486 JR |
2753 | --- a/kernel/power/Makefile |
2754 | +++ b/kernel/power/Makefile | |
9474138d | 2755 | @@ -3,6 +3,34 @@ ifeq ($(CONFIG_PM_DEBUG),y) |
2380c486 JR |
2756 | EXTRA_CFLAGS += -DDEBUG |
2757 | endif | |
2758 | ||
2380c486 JR |
2759 | +tuxonice_core-objs := tuxonice_modules.o tuxonice_sysfs.o tuxonice_highlevel.o \ |
2760 | + tuxonice_io.o tuxonice_pagedir.o tuxonice_prepare_image.o \ | |
2761 | + tuxonice_extent.o tuxonice_pageflags.o tuxonice_ui.o \ | |
2762 | + tuxonice_power_off.o tuxonice_atomic_copy.o | |
2763 | + | |
2764 | +obj-$(CONFIG_TOI) += tuxonice_builtin.o | |
2765 | + | |
2766 | +ifdef CONFIG_PM_DEBUG | |
2767 | +tuxonice_core-objs += tuxonice_alloc.o | |
2768 | +endif | |
2769 | + | |
2770 | +ifdef CONFIG_TOI_CHECKSUM | |
2771 | +tuxonice_core-objs += tuxonice_checksum.o | |
2772 | +endif | |
2773 | + | |
2774 | +ifdef CONFIG_NET | |
2775 | +tuxonice_core-objs += tuxonice_storage.o tuxonice_netlink.o | |
2776 | +endif | |
2777 | + | |
2778 | +obj-$(CONFIG_TOI_CORE) += tuxonice_core.o | |
2779 | +obj-$(CONFIG_TOI_CRYPTO) += tuxonice_compress.o | |
2780 | + | |
2781 | +obj-$(CONFIG_TOI_SWAP) += tuxonice_block_io.o tuxonice_swap.o | |
2782 | +obj-$(CONFIG_TOI_FILE) += tuxonice_block_io.o tuxonice_file.o | |
2783 | +obj-$(CONFIG_TOI_CLUSTER) += tuxonice_cluster.o | |
2784 | + | |
2785 | +obj-$(CONFIG_TOI_USERUI) += tuxonice_userui.o | |
2786 | + | |
2787 | obj-$(CONFIG_PM) += main.o | |
2788 | obj-$(CONFIG_PM_SLEEP) += console.o | |
2789 | obj-$(CONFIG_FREEZER) += process.o | |
2380c486 | 2790 | diff --git a/kernel/power/disk.c b/kernel/power/disk.c |
9474138d | 2791 | index 5cb080e..4f82ed5 100644 |
2380c486 JR |
2792 | --- a/kernel/power/disk.c |
2793 | +++ b/kernel/power/disk.c | |
9474138d AM |
2794 | @@ -25,11 +25,12 @@ |
2795 | #include <scsi/scsi_scan.h> | |
2796 | #include <asm/suspend.h> | |
2380c486 | 2797 | |
9474138d | 2798 | -#include "power.h" |
2380c486 JR |
2799 | - |
2800 | +#include "tuxonice.h" | |
2801 | ||
2802 | static int noresume = 0; | |
2803 | -static char resume_file[256] = CONFIG_PM_STD_PARTITION; | |
2804 | +char resume_file[256] = CONFIG_PM_STD_PARTITION; | |
2805 | +EXPORT_SYMBOL_GPL(resume_file); | |
2806 | + | |
2807 | dev_t swsusp_resume_device; | |
2808 | sector_t swsusp_resume_block; | |
2809 | ||
9474138d | 2810 | @@ -115,55 +116,60 @@ static int hibernation_test(int level) { return 0; } |
2380c486 JR |
2811 | * hibernation |
2812 | */ | |
2813 | ||
2814 | -static int platform_begin(int platform_mode) | |
2815 | +int platform_begin(int platform_mode) | |
2816 | { | |
2817 | return (platform_mode && hibernation_ops) ? | |
2818 | hibernation_ops->begin() : 0; | |
2819 | } | |
2820 | +EXPORT_SYMBOL_GPL(platform_begin); | |
2821 | ||
2822 | /** | |
2823 | * platform_end - tell the platform driver that we've entered the | |
2824 | * working state | |
2825 | */ | |
2826 | ||
2827 | -static void platform_end(int platform_mode) | |
2828 | +void platform_end(int platform_mode) | |
2829 | { | |
2830 | if (platform_mode && hibernation_ops) | |
2831 | hibernation_ops->end(); | |
2832 | } | |
2833 | +EXPORT_SYMBOL_GPL(platform_end); | |
2834 | ||
2835 | /** | |
2836 | * platform_pre_snapshot - prepare the machine for hibernation using the | |
2837 | * platform driver if so configured and return an error code if it fails | |
2838 | */ | |
2839 | ||
2840 | -static int platform_pre_snapshot(int platform_mode) | |
2841 | +int platform_pre_snapshot(int platform_mode) | |
2842 | { | |
2843 | return (platform_mode && hibernation_ops) ? | |
2844 | hibernation_ops->pre_snapshot() : 0; | |
2845 | } | |
2846 | +EXPORT_SYMBOL_GPL(platform_pre_snapshot); | |
2847 | ||
2848 | /** | |
2849 | * platform_leave - prepare the machine for switching to the normal mode | |
2850 | * of operation using the platform driver (called with interrupts disabled) | |
2851 | */ | |
2852 | ||
2853 | -static void platform_leave(int platform_mode) | |
2854 | +void platform_leave(int platform_mode) | |
2855 | { | |
2856 | if (platform_mode && hibernation_ops) | |
2857 | hibernation_ops->leave(); | |
2858 | } | |
2859 | +EXPORT_SYMBOL_GPL(platform_leave); | |
2860 | ||
2861 | /** | |
2862 | * platform_finish - switch the machine to the normal mode of operation | |
2863 | * using the platform driver (must be called after platform_prepare()) | |
2864 | */ | |
2865 | ||
2866 | -static void platform_finish(int platform_mode) | |
2867 | +void platform_finish(int platform_mode) | |
2868 | { | |
2869 | if (platform_mode && hibernation_ops) | |
2870 | hibernation_ops->finish(); | |
2871 | } | |
2872 | +EXPORT_SYMBOL_GPL(platform_finish); | |
2873 | ||
2874 | /** | |
2875 | * platform_pre_restore - prepare the platform for the restoration from a | |
9474138d | 2876 | @@ -171,11 +177,12 @@ static void platform_finish(int platform_mode) |
2380c486 JR |
2877 | * called, platform_restore_cleanup() must be called. |
2878 | */ | |
2879 | ||
2880 | -static int platform_pre_restore(int platform_mode) | |
2881 | +int platform_pre_restore(int platform_mode) | |
2882 | { | |
2883 | return (platform_mode && hibernation_ops) ? | |
2884 | hibernation_ops->pre_restore() : 0; | |
2885 | } | |
2886 | +EXPORT_SYMBOL_GPL(platform_pre_restore); | |
2887 | ||
2888 | /** | |
2889 | * platform_restore_cleanup - switch the platform to the normal mode of | |
9474138d | 2890 | @@ -184,22 +191,24 @@ static int platform_pre_restore(int platform_mode) |
2380c486 JR |
2891 | * regardless of the result of platform_pre_restore(). |
2892 | */ | |
2893 | ||
2894 | -static void platform_restore_cleanup(int platform_mode) | |
2895 | +void platform_restore_cleanup(int platform_mode) | |
2896 | { | |
2897 | if (platform_mode && hibernation_ops) | |
2898 | hibernation_ops->restore_cleanup(); | |
2899 | } | |
2900 | +EXPORT_SYMBOL_GPL(platform_restore_cleanup); | |
2901 | ||
2902 | /** | |
2903 | * platform_recover - recover the platform from a failure to suspend | |
2904 | * devices. | |
2905 | */ | |
2906 | ||
2907 | -static void platform_recover(int platform_mode) | |
2908 | +void platform_recover(int platform_mode) | |
2909 | { | |
2910 | if (platform_mode && hibernation_ops && hibernation_ops->recover) | |
2911 | hibernation_ops->recover(); | |
2912 | } | |
2913 | +EXPORT_SYMBOL_GPL(platform_recover); | |
2914 | ||
2915 | /** | |
2916 | * create_image - freeze devices that need to be frozen with interrupts | |
9474138d | 2917 | @@ -423,6 +432,7 @@ int hibernation_restore(int platform_mode) |
2380c486 JR |
2918 | pm_restore_console(); |
2919 | return error; | |
2920 | } | |
2921 | +EXPORT_SYMBOL_GPL(hibernation_platform_enter); | |
2922 | ||
2923 | /** | |
2924 | * hibernation_platform_enter - enter the hibernation state using the | |
9474138d | 2925 | @@ -542,6 +552,9 @@ int hibernate(void) |
2380c486 JR |
2926 | { |
2927 | int error; | |
2928 | ||
2929 | + if (test_action_state(TOI_REPLACE_SWSUSP)) | |
9474138d | 2930 | + return try_tuxonice_hibernate(); |
2380c486 JR |
2931 | + |
2932 | mutex_lock(&pm_mutex); | |
2933 | /* The snapshot device should not be opened while we're running */ | |
2934 | if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { | |
9474138d | 2935 | @@ -619,10 +632,17 @@ int hibernate(void) |
2380c486 JR |
2936 | * |
2937 | */ | |
2938 | ||
2939 | -static int software_resume(void) | |
2940 | +int software_resume(void) | |
2941 | { | |
2942 | int error; | |
2943 | unsigned int flags; | |
2944 | + resume_attempted = 1; | |
2945 | + | |
2946 | + /* | |
2947 | + * We can't know (until an image header - if any - is loaded), whether | |
2948 | + * we did override swsusp. We therefore ensure that both are tried. | |
2949 | + */ | |
9474138d | 2950 | + try_tuxonice_resume(); |
2380c486 JR |
2951 | |
2952 | /* | |
2953 | * If the user said "noresume".. bail out early. | |
9474138d | 2954 | @@ -947,6 +967,7 @@ static int __init resume_offset_setup(char *str) |
2380c486 JR |
2955 | static int __init noresume_setup(char *str) |
2956 | { | |
2957 | noresume = 1; | |
2958 | + set_toi_state(TOI_NORESUME_SPECIFIED); | |
2959 | return 1; | |
2960 | } | |
2961 | ||
2962 | diff --git a/kernel/power/main.c b/kernel/power/main.c | |
9474138d | 2963 | index 8680282..ea50274 100644 |
2380c486 JR |
2964 | --- a/kernel/power/main.c |
2965 | +++ b/kernel/power/main.c | |
2966 | @@ -26,6 +26,7 @@ | |
2967 | #include "power.h" | |
2968 | ||
2969 | DEFINE_MUTEX(pm_mutex); | |
2970 | +EXPORT_SYMBOL_GPL(pm_mutex); | |
2971 | ||
2972 | unsigned int pm_flags; | |
2973 | EXPORT_SYMBOL(pm_flags); | |
2974 | @@ -34,7 +35,8 @@ EXPORT_SYMBOL(pm_flags); | |
2975 | ||
2976 | /* Routines for PM-transition notifications */ | |
2977 | ||
2978 | -static BLOCKING_NOTIFIER_HEAD(pm_chain_head); | |
2979 | +BLOCKING_NOTIFIER_HEAD(pm_chain_head); | |
2980 | +EXPORT_SYMBOL_GPL(pm_chain_head); | |
2981 | ||
2982 | int register_pm_notifier(struct notifier_block *nb) | |
2983 | { | |
2984 | @@ -204,6 +206,7 @@ void suspend_set_ops(struct platform_suspend_ops *ops) | |
2985 | suspend_ops = ops; | |
2986 | mutex_unlock(&pm_mutex); | |
2987 | } | |
2988 | +EXPORT_SYMBOL_GPL(pm_notifier_call_chain); | |
2989 | ||
2990 | /** | |
2991 | * suspend_valid_only_mem - generic memory-only valid callback | |
9474138d | 2992 | @@ -465,6 +468,7 @@ static int enter_state(suspend_state_t state) |
2380c486 JR |
2993 | mutex_unlock(&pm_mutex); |
2994 | return error; | |
2995 | } | |
2996 | +EXPORT_SYMBOL_GPL(suspend_devices_and_enter); | |
2997 | ||
2998 | ||
2999 | /** | |
9474138d | 3000 | @@ -487,6 +491,7 @@ EXPORT_SYMBOL(pm_suspend); |
2380c486 JR |
3001 | #endif /* CONFIG_SUSPEND */ |
3002 | ||
3003 | struct kobject *power_kobj; | |
3004 | +EXPORT_SYMBOL_GPL(power_kobj); | |
3005 | ||
3006 | /** | |
3007 | * state - control system power state. | |
3008 | diff --git a/kernel/power/power.h b/kernel/power/power.h | |
9474138d | 3009 | index 46b5ec7..4cc59d5 100644 |
2380c486 JR |
3010 | --- a/kernel/power/power.h |
3011 | +++ b/kernel/power/power.h | |
9474138d | 3012 | @@ -31,8 +31,12 @@ static inline char *check_image_kernel(struct swsusp_info *info) |
2380c486 JR |
3013 | return arch_hibernation_header_restore(info) ? |
3014 | "architecture specific data" : NULL; | |
3015 | } | |
3016 | +#else | |
e999739a | 3017 | +extern char *check_image_kernel(struct swsusp_info *info); |
2380c486 | 3018 | #endif /* CONFIG_ARCH_HIBERNATION_HEADER */ |
e999739a | 3019 | +extern int init_header(struct swsusp_info *info); |
2380c486 JR |
3020 | |
3021 | +extern char resume_file[256]; | |
3022 | /* | |
3023 | * Keep some memory free so that I/O operations can succeed without paging | |
3024 | * [Might this be more than 4 MB?] | |
9474138d | 3025 | @@ -49,6 +53,7 @@ static inline char *check_image_kernel(struct swsusp_info *info) |
2380c486 JR |
3026 | extern int hibernation_snapshot(int platform_mode); |
3027 | extern int hibernation_restore(int platform_mode); | |
3028 | extern int hibernation_platform_enter(void); | |
3029 | +extern void platform_recover(int platform_mode); | |
3030 | #endif | |
3031 | ||
3032 | extern int pfn_is_nosave(unsigned long); | |
9474138d | 3033 | @@ -63,6 +68,8 @@ static struct kobj_attribute _name##_attr = { \ |
2380c486 JR |
3034 | .store = _name##_store, \ |
3035 | } | |
3036 | ||
3037 | +extern struct pbe *restore_pblist; | |
3038 | + | |
3039 | /* Preferred image size in bytes (default 500 MB) */ | |
3040 | extern unsigned long image_size; | |
3041 | extern int in_suspend; | |
9474138d | 3042 | @@ -223,3 +230,86 @@ static inline void suspend_thaw_processes(void) |
2380c486 JR |
3043 | { |
3044 | } | |
3045 | #endif | |
3046 | + | |
3047 | +extern struct page *saveable_page(struct zone *z, unsigned long p); | |
3048 | +#ifdef CONFIG_HIGHMEM | |
3049 | +extern struct page *saveable_highmem_page(struct zone *z, unsigned long p); | |
3050 | +#else | |
3051 | +static | |
3052 | +inline struct page *saveable_highmem_page(struct zone *z, unsigned long p) | |
3053 | +{ | |
3054 | + return NULL; | |
3055 | +} | |
3056 | +#endif | |
3057 | + | |
3058 | +#define PBES_PER_PAGE (PAGE_SIZE / sizeof(struct pbe)) | |
3059 | +extern struct list_head nosave_regions; | |
3060 | + | |
3061 | +/** | |
3062 | + * This structure represents a range of page frames the contents of which | |
3063 | + * should not be saved during the suspend. | |
3064 | + */ | |
3065 | + | |
3066 | +struct nosave_region { | |
3067 | + struct list_head list; | |
3068 | + unsigned long start_pfn; | |
3069 | + unsigned long end_pfn; | |
3070 | +}; | |
3071 | + | |
3072 | +#ifndef PHYS_PFN_OFFSET | |
3073 | +#define PHYS_PFN_OFFSET 0 | |
3074 | +#endif | |
3075 | + | |
3076 | +#define ZONE_START(thiszone) ((thiszone)->zone_start_pfn - PHYS_PFN_OFFSET) | |
3077 | + | |
3078 | +#define BM_END_OF_MAP (~0UL) | |
3079 | + | |
3080 | +#define BM_BITS_PER_BLOCK (PAGE_SIZE << 3) | |
3081 | + | |
3082 | +struct bm_block { | |
3083 | + struct list_head hook; /* hook into a list of bitmap blocks */ | |
3084 | + unsigned long start_pfn; /* pfn represented by the first bit */ | |
3085 | + unsigned long end_pfn; /* pfn represented by the last bit plus 1 */ | |
3086 | + unsigned long *data; /* bitmap representing pages */ | |
3087 | +}; | |
3088 | + | |
3089 | +/* struct bm_position is used for browsing memory bitmaps */ | |
3090 | + | |
3091 | +struct bm_position { | |
3092 | + struct bm_block *block; | |
3093 | + int bit; | |
3094 | +}; | |
3095 | + | |
3096 | +struct memory_bitmap { | |
3097 | + struct list_head blocks; /* list of bitmap blocks */ | |
3098 | + struct linked_page *p_list; /* list of pages used to store zone | |
3099 | + * bitmap objects and bitmap block | |
3100 | + * objects | |
3101 | + */ | |
3102 | + struct bm_position cur; /* most recently used bit position */ | |
3103 | + struct bm_position iter; /* most recently used bit position | |
3104 | + * when iterating over a bitmap. | |
3105 | + */ | |
3106 | +}; | |
3107 | + | |
2380c486 JR |
3108 | +extern int memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, |
3109 | + int safe_needed); | |
3110 | +extern void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free); | |
3111 | +extern void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn); | |
3112 | +extern void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn); | |
3113 | +extern int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn); | |
3114 | +extern unsigned long memory_bm_next_pfn(struct memory_bitmap *bm); | |
3115 | +extern void memory_bm_position_reset(struct memory_bitmap *bm); | |
3116 | +extern void memory_bm_clear(struct memory_bitmap *bm); | |
3117 | +extern void memory_bm_copy(struct memory_bitmap *source, | |
3118 | + struct memory_bitmap *dest); | |
3119 | +extern void memory_bm_dup(struct memory_bitmap *source, | |
3120 | + struct memory_bitmap *dest); | |
3121 | + | |
3122 | +#ifdef CONFIG_TOI | |
3123 | +struct toi_module_ops; | |
3124 | +extern int memory_bm_read(struct memory_bitmap *bm, int (*rw_chunk) | |
3125 | + (int rw, struct toi_module_ops *owner, char *buffer, int buffer_size)); | |
3126 | +extern int memory_bm_write(struct memory_bitmap *bm, int (*rw_chunk) | |
3127 | + (int rw, struct toi_module_ops *owner, char *buffer, int buffer_size)); | |
3128 | +#endif | |
2380c486 | 3129 | diff --git a/kernel/power/process.c b/kernel/power/process.c |
9474138d | 3130 | index ca63401..10ba50f 100644 |
2380c486 JR |
3131 | --- a/kernel/power/process.c |
3132 | +++ b/kernel/power/process.c | |
3133 | @@ -13,6 +13,10 @@ | |
3134 | #include <linux/module.h> | |
3135 | #include <linux/syscalls.h> | |
3136 | #include <linux/freezer.h> | |
3137 | +#include <linux/buffer_head.h> | |
3138 | + | |
3139 | +int freezer_state; | |
3140 | +EXPORT_SYMBOL_GPL(freezer_state); | |
3141 | ||
3142 | /* | |
3143 | * Timeout for stopping processes | |
3144 | @@ -86,7 +90,8 @@ static int try_to_freeze_tasks(bool sig_only) | |
3145 | do_each_thread(g, p) { | |
3146 | task_lock(p); | |
3147 | if (freezing(p) && !freezer_should_skip(p)) | |
3148 | - printk(KERN_ERR " %s\n", p->comm); | |
3149 | + printk(KERN_ERR " %s (%d) failed to freeze.\n", | |
3150 | + p->comm, p->pid); | |
3151 | cancel_freezing(p); | |
3152 | task_unlock(p); | |
3153 | } while_each_thread(g, p); | |
3154 | @@ -106,22 +111,31 @@ int freeze_processes(void) | |
3155 | { | |
3156 | int error; | |
3157 | ||
3158 | - printk("Freezing user space processes ... "); | |
3159 | + printk(KERN_INFO "Stopping fuse filesystems.\n"); | |
3160 | + freeze_filesystems(FS_FREEZER_FUSE); | |
3161 | + freezer_state = FREEZER_FILESYSTEMS_FROZEN; | |
3162 | + printk(KERN_INFO "Freezing user space processes ... "); | |
3163 | error = try_to_freeze_tasks(true); | |
3164 | if (error) | |
3165 | goto Exit; | |
9474138d | 3166 | printk("done.\n"); |
2380c486 JR |
3167 | |
3168 | - printk("Freezing remaining freezable tasks ... "); | |
3169 | + sys_sync(); | |
3170 | + printk(KERN_INFO "Stopping normal filesystems.\n"); | |
3171 | + freeze_filesystems(FS_FREEZER_NORMAL); | |
3172 | + freezer_state = FREEZER_USERSPACE_FROZEN; | |
3173 | + printk(KERN_INFO "Freezing remaining freezable tasks ... "); | |
3174 | error = try_to_freeze_tasks(false); | |
3175 | if (error) | |
3176 | goto Exit; | |
3177 | printk("done."); | |
3178 | + freezer_state = FREEZER_FULLY_ON; | |
3179 | Exit: | |
3180 | BUG_ON(in_atomic()); | |
3181 | printk("\n"); | |
3182 | return error; | |
3183 | } | |
3184 | +EXPORT_SYMBOL_GPL(freeze_processes); | |
3185 | ||
3186 | static void thaw_tasks(bool nosig_only) | |
3187 | { | |
9474138d | 3188 | @@ -145,10 +159,35 @@ static void thaw_tasks(bool nosig_only) |
2380c486 JR |
3189 | |
3190 | void thaw_processes(void) | |
3191 | { | |
3192 | - printk("Restarting tasks ... "); | |
3193 | - thaw_tasks(true); | |
3194 | + int old_state = freezer_state; | |
3195 | + | |
3196 | + if (old_state == FREEZER_OFF) | |
3197 | + return; | |
3198 | + | |
2380c486 JR |
3199 | + freezer_state = FREEZER_OFF; |
3200 | + | |
3201 | + printk(KERN_INFO "Restarting all filesystems ...\n"); | |
3202 | + thaw_filesystems(FS_FREEZER_ALL); | |
3203 | + | |
3204 | + printk(KERN_INFO "Restarting tasks ... "); | |
2380c486 JR |
3205 | + if (old_state == FREEZER_FULLY_ON) |
3206 | + thaw_tasks(true); | |
3207 | thaw_tasks(false); | |
3208 | schedule(); | |
3209 | printk("done.\n"); | |
3210 | } | |
3211 | +EXPORT_SYMBOL_GPL(thaw_processes); | |
3212 | ||
3213 | +void thaw_kernel_threads(void) | |
3214 | +{ | |
3215 | + freezer_state = FREEZER_USERSPACE_FROZEN; | |
3216 | + printk(KERN_INFO "Restarting normal filesystems.\n"); | |
3217 | + thaw_filesystems(FS_FREEZER_NORMAL); | |
3218 | + thaw_tasks(true); | |
3219 | +} | |
3220 | + | |
3221 | +/* | |
3222 | + * It's ugly putting this EXPORT down here, but it's necessary so that it | |
3223 | + * doesn't matter whether the fs-freezing patch is applied or not. | |
3224 | + */ | |
3225 | +EXPORT_SYMBOL_GPL(thaw_kernel_threads); | |
3226 | diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c | |
9474138d | 3227 | index 33e2e4a..a78b7ef 100644 |
2380c486 JR |
3228 | --- a/kernel/power/snapshot.c |
3229 | +++ b/kernel/power/snapshot.c | |
3230 | @@ -34,6 +34,8 @@ | |
3231 | #include <asm/io.h> | |
3232 | ||
3233 | #include "power.h" | |
3234 | +#include "tuxonice_builtin.h" | |
3235 | +#include "tuxonice_pagedir.h" | |
3236 | ||
3237 | static int swsusp_page_is_free(struct page *); | |
3238 | static void swsusp_set_page_forbidden(struct page *); | |
3239 | @@ -45,6 +47,10 @@ static void swsusp_unset_page_forbidden(struct page *); | |
3240 | * directly to their "original" page frames. | |
3241 | */ | |
3242 | struct pbe *restore_pblist; | |
3243 | +EXPORT_SYMBOL_GPL(restore_pblist); | |
3244 | + | |
3245 | +int resume_attempted; | |
3246 | +EXPORT_SYMBOL_GPL(resume_attempted); | |
3247 | ||
3248 | /* Pointer to an auxiliary buffer (1 page) */ | |
3249 | static void *buffer; | |
3250 | @@ -87,6 +93,9 @@ static void *get_image_page(gfp_t gfp_mask, int safe_needed) | |
3251 | ||
3252 | unsigned long get_safe_page(gfp_t gfp_mask) | |
3253 | { | |
3254 | + if (toi_running) | |
3255 | + return toi_get_nonconflicting_page(); | |
3256 | + | |
3257 | return (unsigned long)get_image_page(gfp_mask, PG_SAFE); | |
3258 | } | |
3259 | ||
9474138d | 3260 | @@ -223,47 +232,22 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size) |
2380c486 JR |
3261 | * the represented memory area. |
3262 | */ | |
3263 | ||
3264 | -#define BM_END_OF_MAP (~0UL) | |
3265 | - | |
3266 | -#define BM_BITS_PER_BLOCK (PAGE_SIZE << 3) | |
3267 | - | |
3268 | -struct bm_block { | |
3269 | - struct list_head hook; /* hook into a list of bitmap blocks */ | |
3270 | - unsigned long start_pfn; /* pfn represented by the first bit */ | |
3271 | - unsigned long end_pfn; /* pfn represented by the last bit plus 1 */ | |
3272 | - unsigned long *data; /* bitmap representing pages */ | |
3273 | -}; | |
3274 | - | |
3275 | static inline unsigned long bm_block_bits(struct bm_block *bb) | |
3276 | { | |
3277 | return bb->end_pfn - bb->start_pfn; | |
3278 | } | |
3279 | ||
3280 | -/* strcut bm_position is used for browsing memory bitmaps */ | |
3281 | - | |
3282 | -struct bm_position { | |
3283 | - struct bm_block *block; | |
3284 | - int bit; | |
3285 | -}; | |
3286 | - | |
3287 | -struct memory_bitmap { | |
3288 | - struct list_head blocks; /* list of bitmap blocks */ | |
3289 | - struct linked_page *p_list; /* list of pages used to store zone | |
3290 | - * bitmap objects and bitmap block | |
3291 | - * objects | |
3292 | - */ | |
3293 | - struct bm_position cur; /* most recently used bit position */ | |
3294 | -}; | |
3295 | - | |
3296 | /* Functions that operate on memory bitmaps */ | |
3297 | ||
3298 | -static void memory_bm_position_reset(struct memory_bitmap *bm) | |
3299 | +void memory_bm_position_reset(struct memory_bitmap *bm) | |
3300 | { | |
3301 | bm->cur.block = list_entry(bm->blocks.next, struct bm_block, hook); | |
3302 | bm->cur.bit = 0; | |
9474138d AM |
3303 | -} |
3304 | ||
3305 | -static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free); | |
2380c486 JR |
3306 | + bm->iter.block = list_entry(bm->blocks.next, struct bm_block, hook); |
3307 | + bm->iter.bit = 0; | |
9474138d | 3308 | +} |
2380c486 JR |
3309 | +EXPORT_SYMBOL_GPL(memory_bm_position_reset); |
3310 | ||
2380c486 JR |
3311 | /** |
3312 | * create_bm_block_list - create a list of block bitmap objects | |
9474138d | 3313 | @@ -371,7 +355,7 @@ static int create_mem_extents(struct list_head *list, gfp_t gfp_mask) |
2380c486 JR |
3314 | /** |
3315 | * memory_bm_create - allocate memory for a memory bitmap | |
3316 | */ | |
3317 | -static int | |
3318 | +int | |
3319 | memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) | |
3320 | { | |
3321 | struct chain_allocator ca; | |
9474138d | 3322 | @@ -427,11 +411,12 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) |
2380c486 JR |
3323 | memory_bm_free(bm, PG_UNSAFE_CLEAR); |
3324 | goto Exit; | |
3325 | } | |
3326 | +EXPORT_SYMBOL_GPL(memory_bm_create); | |
3327 | ||
3328 | /** | |
3329 | * memory_bm_free - free memory occupied by the memory bitmap @bm | |
3330 | */ | |
3331 | -static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) | |
3332 | +void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) | |
3333 | { | |
3334 | struct bm_block *bb; | |
3335 | ||
9474138d | 3336 | @@ -443,6 +428,7 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) |
2380c486 JR |
3337 | |
3338 | INIT_LIST_HEAD(&bm->blocks); | |
3339 | } | |
3340 | +EXPORT_SYMBOL_GPL(memory_bm_free); | |
3341 | ||
3342 | /** | |
3343 | * memory_bm_find_bit - find the bit in the bitmap @bm that corresponds | |
9474138d | 3344 | @@ -481,7 +467,7 @@ static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, |
2380c486 JR |
3345 | return 0; |
3346 | } | |
3347 | ||
3348 | -static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) | |
3349 | +void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) | |
3350 | { | |
3351 | void *addr; | |
3352 | unsigned int bit; | |
9474138d | 3353 | @@ -491,6 +477,7 @@ static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) |
2380c486 JR |
3354 | BUG_ON(error); |
3355 | set_bit(bit, addr); | |
3356 | } | |
3357 | +EXPORT_SYMBOL_GPL(memory_bm_set_bit); | |
3358 | ||
9474138d | 3359 | static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn) |
2380c486 | 3360 | { |
9474138d | 3361 | @@ -504,7 +491,7 @@ static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn) |
2380c486 JR |
3362 | return error; |
3363 | } | |
3364 | ||
3365 | -static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn) | |
3366 | +void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn) | |
3367 | { | |
3368 | void *addr; | |
3369 | unsigned int bit; | |
9474138d | 3370 | @@ -514,8 +501,9 @@ static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn) |
2380c486 JR |
3371 | BUG_ON(error); |
3372 | clear_bit(bit, addr); | |
3373 | } | |
3374 | +EXPORT_SYMBOL_GPL(memory_bm_clear_bit); | |
3375 | ||
3376 | -static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) | |
3377 | +int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) | |
3378 | { | |
3379 | void *addr; | |
3380 | unsigned int bit; | |
9474138d | 3381 | @@ -525,6 +513,7 @@ static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) |
2380c486 JR |
3382 | BUG_ON(error); |
3383 | return test_bit(bit, addr); | |
3384 | } | |
3385 | +EXPORT_SYMBOL_GPL(memory_bm_test_bit); | |
3386 | ||
3387 | static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn) | |
3388 | { | |
9474138d | 3389 | @@ -543,43 +532,178 @@ static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn) |
2380c486 JR |
3390 | * this function. |
3391 | */ | |
3392 | ||
3393 | -static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) | |
3394 | +unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) | |
3395 | { | |
3396 | struct bm_block *bb; | |
3397 | int bit; | |
3398 | ||
3399 | - bb = bm->cur.block; | |
3400 | + bb = bm->iter.block; | |
3401 | do { | |
3402 | - bit = bm->cur.bit; | |
3403 | + bit = bm->iter.bit; | |
3404 | bit = find_next_bit(bb->data, bm_block_bits(bb), bit); | |
3405 | if (bit < bm_block_bits(bb)) | |
3406 | goto Return_pfn; | |
3407 | ||
3408 | bb = list_entry(bb->hook.next, struct bm_block, hook); | |
3409 | - bm->cur.block = bb; | |
3410 | - bm->cur.bit = 0; | |
3411 | + bm->iter.block = bb; | |
3412 | + bm->iter.bit = 0; | |
3413 | } while (&bb->hook != &bm->blocks); | |
3414 | ||
3415 | memory_bm_position_reset(bm); | |
3416 | return BM_END_OF_MAP; | |
3417 | ||
3418 | Return_pfn: | |
3419 | - bm->cur.bit = bit + 1; | |
3420 | + bm->iter.bit = bit + 1; | |
3421 | return bb->start_pfn + bit; | |
3422 | } | |
3423 | +EXPORT_SYMBOL_GPL(memory_bm_next_pfn); | |
3424 | ||
3425 | -/** | |
3426 | - * This structure represents a range of page frames the contents of which | |
3427 | - * should not be saved during the suspend. | |
3428 | - */ | |
3429 | +void memory_bm_clear(struct memory_bitmap *bm) | |
3430 | +{ | |
3431 | + unsigned long pfn; | |
3432 | ||
3433 | -struct nosave_region { | |
3434 | - struct list_head list; | |
3435 | - unsigned long start_pfn; | |
3436 | - unsigned long end_pfn; | |
3437 | -}; | |
3438 | + memory_bm_position_reset(bm); | |
3439 | + pfn = memory_bm_next_pfn(bm); | |
3440 | + while (pfn != BM_END_OF_MAP) { | |
3441 | + memory_bm_clear_bit(bm, pfn); | |
3442 | + pfn = memory_bm_next_pfn(bm); | |
3443 | + } | |
3444 | +} | |
3445 | +EXPORT_SYMBOL_GPL(memory_bm_clear); | |
3446 | + | |
3447 | +void memory_bm_copy(struct memory_bitmap *source, struct memory_bitmap *dest) | |
3448 | +{ | |
3449 | + unsigned long pfn; | |
3450 | + | |
3451 | + memory_bm_position_reset(source); | |
3452 | + pfn = memory_bm_next_pfn(source); | |
3453 | + while (pfn != BM_END_OF_MAP) { | |
3454 | + memory_bm_set_bit(dest, pfn); | |
3455 | + pfn = memory_bm_next_pfn(source); | |
3456 | + } | |
3457 | +} | |
3458 | +EXPORT_SYMBOL_GPL(memory_bm_copy); | |
3459 | + | |
3460 | +void memory_bm_dup(struct memory_bitmap *source, struct memory_bitmap *dest) | |
3461 | +{ | |
3462 | + memory_bm_clear(dest); | |
3463 | + memory_bm_copy(source, dest); | |
3464 | +} | |
3465 | +EXPORT_SYMBOL_GPL(memory_bm_dup); | |
3466 | + | |
3467 | +#ifdef CONFIG_TOI | |
3468 | +#define DEFINE_MEMORY_BITMAP(name) \ | |
3469 | +struct memory_bitmap *name; \ | |
3470 | +EXPORT_SYMBOL_GPL(name) | |
3471 | + | |
3472 | +DEFINE_MEMORY_BITMAP(pageset1_map); | |
3473 | +DEFINE_MEMORY_BITMAP(pageset1_copy_map); | |
3474 | +DEFINE_MEMORY_BITMAP(pageset2_map); | |
3475 | +DEFINE_MEMORY_BITMAP(page_resave_map); | |
3476 | +DEFINE_MEMORY_BITMAP(io_map); | |
3477 | +DEFINE_MEMORY_BITMAP(nosave_map); | |
3478 | +DEFINE_MEMORY_BITMAP(free_map); | |
9474138d | 3479 | + |
2380c486 JR |
3480 | +int memory_bm_write(struct memory_bitmap *bm, int (*rw_chunk) |
3481 | + (int rw, struct toi_module_ops *owner, char *buffer, int buffer_size)) | |
3482 | +{ | |
3483 | + int result = 0; | |
3484 | + unsigned int nr = 0; | |
3485 | + struct bm_block *bb; | |
3486 | + | |
3487 | + if (!bm) | |
3488 | + return result; | |
9474138d AM |
3489 | |
3490 | -static LIST_HEAD(nosave_regions); | |
2380c486 JR |
3491 | + list_for_each_entry(bb, &bm->blocks, hook) |
3492 | + nr++; | |
3493 | + | |
3494 | + result = (*rw_chunk)(WRITE, NULL, (char *) &nr, sizeof(unsigned int)); | |
3495 | + if (result) | |
3496 | + return result; | |
3497 | + | |
3498 | + list_for_each_entry(bb, &bm->blocks, hook) { | |
3499 | + result = (*rw_chunk)(WRITE, NULL, (char *) &bb->start_pfn, | |
3500 | + 2 * sizeof(unsigned long)); | |
3501 | + if (result) | |
3502 | + return result; | |
3503 | + | |
3504 | + result = (*rw_chunk)(WRITE, NULL, (char *) bb->data, PAGE_SIZE); | |
3505 | + if (result) | |
3506 | + return result; | |
3507 | + } | |
3508 | + | |
3509 | + return 0; | |
3510 | +} | |
3511 | +EXPORT_SYMBOL_GPL(memory_bm_write); | |
3512 | + | |
3513 | +int memory_bm_read(struct memory_bitmap *bm, int (*rw_chunk) | |
3514 | + (int rw, struct toi_module_ops *owner, char *buffer, int buffer_size)) | |
3515 | +{ | |
3516 | + int result = 0; | |
3517 | + unsigned int nr, i; | |
3518 | + struct bm_block *bb; | |
3519 | + | |
3520 | + if (!bm) | |
3521 | + return result; | |
3522 | + | |
3523 | + result = memory_bm_create(bm, GFP_KERNEL, 0); | |
3524 | + | |
3525 | + if (result) | |
3526 | + return result; | |
3527 | + | |
3528 | + result = (*rw_chunk)(READ, NULL, (char *) &nr, sizeof(unsigned int)); | |
3529 | + if (result) | |
3530 | + goto Free; | |
3531 | + | |
3532 | + for (i = 0; i < nr; i++) { | |
3533 | + unsigned long pfn; | |
3534 | + | |
3535 | + result = (*rw_chunk)(READ, NULL, (char *) &pfn, | |
3536 | + sizeof(unsigned long)); | |
3537 | + if (result) | |
3538 | + goto Free; | |
3539 | + | |
3540 | + list_for_each_entry(bb, &bm->blocks, hook) | |
3541 | + if (bb->start_pfn == pfn) | |
3542 | + break; | |
3543 | + | |
3544 | + if (&bb->hook == &bm->blocks) { | |
3545 | + printk(KERN_ERR | |
3546 | + "TuxOnIce: Failed to load memory bitmap.\n"); | |
3547 | + result = -EINVAL; | |
3548 | + goto Free; | |
3549 | + } | |
3550 | + | |
3551 | + result = (*rw_chunk)(READ, NULL, (char *) &pfn, | |
3552 | + sizeof(unsigned long)); | |
3553 | + if (result) | |
3554 | + goto Free; | |
3555 | + | |
3556 | + if (pfn != bb->end_pfn) { | |
3557 | + printk(KERN_ERR | |
3558 | + "TuxOnIce: Failed to load memory bitmap. " | |
3559 | + "End PFN doesn't match what was saved.\n"); | |
3560 | + result = -EINVAL; | |
3561 | + goto Free; | |
3562 | + } | |
3563 | + | |
3564 | + result = (*rw_chunk)(READ, NULL, (char *) bb->data, PAGE_SIZE); | |
3565 | + | |
3566 | + if (result) | |
3567 | + goto Free; | |
3568 | + } | |
3569 | + | |
3570 | + return 0; | |
3571 | + | |
3572 | +Free: | |
3573 | + memory_bm_free(bm, PG_ANY); | |
3574 | + return result; | |
3575 | +} | |
3576 | +EXPORT_SYMBOL_GPL(memory_bm_read); | |
3577 | +#endif | |
3578 | + | |
3579 | +LIST_HEAD(nosave_regions); | |
3580 | +EXPORT_SYMBOL_GPL(nosave_regions); | |
3581 | ||
3582 | /** | |
3583 | * register_nosave_region - register a range of page frames the contents | |
9474138d | 3584 | @@ -815,7 +939,7 @@ static unsigned int count_free_highmem_pages(void) |
2380c486 JR |
3585 | * We should save the page if it isn't Nosave or NosaveFree, or Reserved, |
3586 | * and it isn't a part of a free chunk of pages. | |
3587 | */ | |
3588 | -static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn) | |
3589 | +struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn) | |
3590 | { | |
3591 | struct page *page; | |
3592 | ||
9474138d | 3593 | @@ -834,6 +958,7 @@ static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn) |
2380c486 JR |
3594 | |
3595 | return page; | |
3596 | } | |
3597 | +EXPORT_SYMBOL_GPL(saveable_highmem_page); | |
3598 | ||
3599 | /** | |
3600 | * count_highmem_pages - compute the total number of saveable highmem | |
9474138d | 3601 | @@ -859,11 +984,6 @@ unsigned int count_highmem_pages(void) |
2380c486 JR |
3602 | } |
3603 | return n; | |
3604 | } | |
3605 | -#else | |
3606 | -static inline void *saveable_highmem_page(struct zone *z, unsigned long p) | |
3607 | -{ | |
3608 | - return NULL; | |
3609 | -} | |
3610 | #endif /* CONFIG_HIGHMEM */ | |
3611 | ||
3612 | /** | |
9474138d | 3613 | @@ -874,7 +994,7 @@ static inline void *saveable_highmem_page(struct zone *z, unsigned long p) |
2380c486 JR |
3614 | * of pages statically defined as 'unsaveable', and it isn't a part of |
3615 | * a free chunk of pages. | |
3616 | */ | |
3617 | -static struct page *saveable_page(struct zone *zone, unsigned long pfn) | |
3618 | +struct page *saveable_page(struct zone *zone, unsigned long pfn) | |
3619 | { | |
3620 | struct page *page; | |
3621 | ||
9474138d | 3622 | @@ -896,6 +1016,7 @@ static struct page *saveable_page(struct zone *zone, unsigned long pfn) |
2380c486 JR |
3623 | |
3624 | return page; | |
3625 | } | |
3626 | +EXPORT_SYMBOL_GPL(saveable_page); | |
3627 | ||
3628 | /** | |
3629 | * count_data_pages - compute the total number of saveable non-highmem | |
9474138d | 3630 | @@ -1210,6 +1331,9 @@ asmlinkage int swsusp_save(void) |
2380c486 JR |
3631 | { |
3632 | unsigned int nr_pages, nr_highmem; | |
3633 | ||
3634 | + if (toi_running) | |
3635 | + return toi_post_context_save(); | |
3636 | + | |
3637 | printk(KERN_INFO "PM: Creating hibernation image: \n"); | |
3638 | ||
3639 | drain_local_pages(NULL); | |
9474138d | 3640 | @@ -1250,14 +1374,14 @@ asmlinkage int swsusp_save(void) |
2380c486 JR |
3641 | } |
3642 | ||
3643 | #ifndef CONFIG_ARCH_HIBERNATION_HEADER | |
3644 | -static int init_header_complete(struct swsusp_info *info) | |
e999739a | 3645 | +int init_header_complete(struct swsusp_info *info) |
2380c486 JR |
3646 | { |
3647 | memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname)); | |
3648 | info->version_code = LINUX_VERSION_CODE; | |
3649 | return 0; | |
3650 | } | |
3651 | ||
3652 | -static char *check_image_kernel(struct swsusp_info *info) | |
e999739a | 3653 | +char *check_image_kernel(struct swsusp_info *info) |
2380c486 JR |
3654 | { |
3655 | if (info->version_code != LINUX_VERSION_CODE) | |
3656 | return "kernel version"; | |
9474138d | 3657 | @@ -1271,6 +1395,7 @@ static char *check_image_kernel(struct swsusp_info *info) |
2380c486 JR |
3658 | return "machine"; |
3659 | return NULL; | |
3660 | } | |
e999739a | 3661 | +EXPORT_SYMBOL_GPL(check_image_kernel); |
2380c486 JR |
3662 | #endif /* CONFIG_ARCH_HIBERNATION_HEADER */ |
3663 | ||
3664 | unsigned long snapshot_get_image_size(void) | |
9474138d | 3665 | @@ -1278,7 +1403,7 @@ unsigned long snapshot_get_image_size(void) |
2380c486 JR |
3666 | return nr_copy_pages + nr_meta_pages + 1; |
3667 | } | |
3668 | ||
3669 | -static int init_header(struct swsusp_info *info) | |
e999739a | 3670 | +int init_header(struct swsusp_info *info) |
2380c486 JR |
3671 | { |
3672 | memset(info, 0, sizeof(struct swsusp_info)); | |
3673 | info->num_physpages = num_physpages; | |
9474138d | 3674 | @@ -1288,6 +1413,7 @@ static int init_header(struct swsusp_info *info) |
2380c486 | 3675 | info->size <<= PAGE_SHIFT; |
e999739a | 3676 | return init_header_complete(info); |
2380c486 | 3677 | } |
e999739a | 3678 | +EXPORT_SYMBOL_GPL(init_header); |
2380c486 JR |
3679 | |
3680 | /** | |
3681 | * pack_pfns - pfns corresponding to the set bits found in the bitmap @bm | |
2380c486 JR |
3682 | diff --git a/kernel/power/tuxonice.h b/kernel/power/tuxonice.h |
3683 | new file mode 100644 | |
9474138d | 3684 | index 0000000..1d6349d |
2380c486 JR |
3685 | --- /dev/null |
3686 | +++ b/kernel/power/tuxonice.h | |
9474138d | 3687 | @@ -0,0 +1,213 @@ |
2380c486 JR |
3688 | +/* |
3689 | + * kernel/power/tuxonice.h | |
3690 | + * | |
3691 | + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net) | |
3692 | + * | |
3693 | + * This file is released under the GPLv2. | |
3694 | + * | |
3695 | + * It contains declarations used throughout swsusp. | |
3696 | + * | |
3697 | + */ | |
3698 | + | |
3699 | +#ifndef KERNEL_POWER_TOI_H | |
3700 | +#define KERNEL_POWER_TOI_H | |
3701 | + | |
3702 | +#include <linux/delay.h> | |
3703 | +#include <linux/bootmem.h> | |
3704 | +#include <linux/suspend.h> | |
3705 | +#include <linux/fs.h> | |
3706 | +#include <linux/kmod.h> | |
3707 | +#include <asm/setup.h> | |
3708 | +#include "tuxonice_pageflags.h" | |
9474138d | 3709 | +#include "power.h" |
2380c486 | 3710 | + |
0ada99ac | 3711 | +#define TOI_CORE_VERSION "3.0.1" |
2380c486 JR |
3712 | + |
3713 | +#define MY_BOOT_KERNEL_DATA_VERSION 1 | |
3714 | + | |
3715 | +struct toi_boot_kernel_data { | |
3716 | + int version; | |
3717 | + int size; | |
3718 | + unsigned long toi_action; | |
3719 | + unsigned long toi_debug_state; | |
3720 | + u32 toi_default_console_level; | |
3721 | + int toi_io_time[2][2]; | |
3722 | + char toi_nosave_commandline[COMMAND_LINE_SIZE]; | |
3723 | +}; | |
3724 | + | |
3725 | +extern struct toi_boot_kernel_data toi_bkd; | |
3726 | + | |
3727 | +/* Location of book kernel data struct in kernel being resumed */ | |
3728 | +extern unsigned long boot_kernel_data_buffer; | |
3729 | + | |
3730 | +/* == Action states == */ | |
3731 | + | |
3732 | +enum { | |
3733 | + TOI_REBOOT, | |
3734 | + TOI_PAUSE, | |
3735 | + TOI_LOGALL, | |
3736 | + TOI_CAN_CANCEL, | |
3737 | + TOI_KEEP_IMAGE, | |
3738 | + TOI_FREEZER_TEST, | |
3739 | + TOI_SINGLESTEP, | |
3740 | + TOI_PAUSE_NEAR_PAGESET_END, | |
3741 | + TOI_TEST_FILTER_SPEED, | |
3742 | + TOI_TEST_BIO, | |
3743 | + TOI_NO_PAGESET2, | |
2380c486 JR |
3744 | + TOI_IGNORE_ROOTFS, |
3745 | + TOI_REPLACE_SWSUSP, | |
3746 | + TOI_PAGESET2_FULL, | |
3747 | + TOI_ABORT_ON_RESAVE_NEEDED, | |
3748 | + TOI_NO_MULTITHREADED_IO, | |
3749 | + TOI_NO_DIRECT_LOAD, | |
3750 | + TOI_LATE_CPU_HOTPLUG, | |
3751 | + TOI_GET_MAX_MEM_ALLOCD, | |
3752 | + TOI_NO_FLUSHER_THREAD, | |
3753 | + TOI_NO_PS2_IF_UNNEEDED | |
3754 | +}; | |
3755 | + | |
3756 | +#define clear_action_state(bit) (test_and_clear_bit(bit, &toi_bkd.toi_action)) | |
3757 | + | |
3758 | +/* == Result states == */ | |
3759 | + | |
3760 | +enum { | |
3761 | + TOI_ABORTED, | |
3762 | + TOI_ABORT_REQUESTED, | |
3763 | + TOI_NOSTORAGE_AVAILABLE, | |
3764 | + TOI_INSUFFICIENT_STORAGE, | |
3765 | + TOI_FREEZING_FAILED, | |
3766 | + TOI_KEPT_IMAGE, | |
3767 | + TOI_WOULD_EAT_MEMORY, | |
3768 | + TOI_UNABLE_TO_FREE_ENOUGH_MEMORY, | |
3769 | + TOI_PM_SEM, | |
3770 | + TOI_DEVICE_REFUSED, | |
3771 | + TOI_SYSDEV_REFUSED, | |
3772 | + TOI_EXTRA_PAGES_ALLOW_TOO_SMALL, | |
3773 | + TOI_UNABLE_TO_PREPARE_IMAGE, | |
3774 | + TOI_FAILED_MODULE_INIT, | |
3775 | + TOI_FAILED_MODULE_CLEANUP, | |
3776 | + TOI_FAILED_IO, | |
3777 | + TOI_OUT_OF_MEMORY, | |
3778 | + TOI_IMAGE_ERROR, | |
3779 | + TOI_PLATFORM_PREP_FAILED, | |
3780 | + TOI_CPU_HOTPLUG_FAILED, | |
3781 | + TOI_ARCH_PREPARE_FAILED, | |
3782 | + TOI_RESAVE_NEEDED, | |
3783 | + TOI_CANT_SUSPEND, | |
3784 | + TOI_NOTIFIERS_PREPARE_FAILED, | |
3785 | + TOI_PRE_SNAPSHOT_FAILED, | |
3786 | + TOI_PRE_RESTORE_FAILED, | |
3787 | + TOI_USERMODE_HELPERS_ERR, | |
3788 | + TOI_CANT_USE_ALT_RESUME, | |
0ada99ac | 3789 | + TOI_HEADER_TOO_BIG, |
2380c486 JR |
3790 | + TOI_NUM_RESULT_STATES /* Used in printing debug info only */ |
3791 | +}; | |
3792 | + | |
3793 | +extern unsigned long toi_result; | |
3794 | + | |
3795 | +#define set_result_state(bit) (test_and_set_bit(bit, &toi_result)) | |
3796 | +#define set_abort_result(bit) (test_and_set_bit(TOI_ABORTED, &toi_result), \ | |
3797 | + test_and_set_bit(bit, &toi_result)) | |
3798 | +#define clear_result_state(bit) (test_and_clear_bit(bit, &toi_result)) | |
3799 | +#define test_result_state(bit) (test_bit(bit, &toi_result)) | |
3800 | + | |
3801 | +/* == Debug sections and levels == */ | |
3802 | + | |
3803 | +/* debugging levels. */ | |
3804 | +enum { | |
3805 | + TOI_STATUS = 0, | |
3806 | + TOI_ERROR = 2, | |
3807 | + TOI_LOW, | |
3808 | + TOI_MEDIUM, | |
3809 | + TOI_HIGH, | |
3810 | + TOI_VERBOSE, | |
3811 | +}; | |
3812 | + | |
3813 | +enum { | |
3814 | + TOI_ANY_SECTION, | |
3815 | + TOI_EAT_MEMORY, | |
3816 | + TOI_IO, | |
3817 | + TOI_HEADER, | |
3818 | + TOI_WRITER, | |
3819 | + TOI_MEMORY, | |
3820 | +}; | |
3821 | + | |
3822 | +#define set_debug_state(bit) (test_and_set_bit(bit, &toi_bkd.toi_debug_state)) | |
3823 | +#define clear_debug_state(bit) \ | |
3824 | + (test_and_clear_bit(bit, &toi_bkd.toi_debug_state)) | |
3825 | +#define test_debug_state(bit) (test_bit(bit, &toi_bkd.toi_debug_state)) | |
3826 | + | |
3827 | +/* == Steps in hibernating == */ | |
3828 | + | |
3829 | +enum { | |
3830 | + STEP_HIBERNATE_PREPARE_IMAGE, | |
3831 | + STEP_HIBERNATE_SAVE_IMAGE, | |
3832 | + STEP_HIBERNATE_POWERDOWN, | |
3833 | + STEP_RESUME_CAN_RESUME, | |
3834 | + STEP_RESUME_LOAD_PS1, | |
3835 | + STEP_RESUME_DO_RESTORE, | |
3836 | + STEP_RESUME_READ_PS2, | |
3837 | + STEP_RESUME_GO, | |
3838 | + STEP_RESUME_ALT_IMAGE, | |
3839 | + STEP_CLEANUP, | |
3840 | + STEP_QUIET_CLEANUP | |
3841 | +}; | |
3842 | + | |
3843 | +/* == TuxOnIce states == | |
3844 | + (see also include/linux/suspend.h) */ | |
3845 | + | |
3846 | +#define get_toi_state() (toi_state) | |
3847 | +#define restore_toi_state(saved_state) \ | |
3848 | + do { toi_state = saved_state; } while (0) | |
3849 | + | |
3850 | +/* == Module support == */ | |
3851 | + | |
3852 | +struct toi_core_fns { | |
3853 | + int (*post_context_save)(void); | |
3854 | + unsigned long (*get_nonconflicting_page)(void); | |
3855 | + int (*try_hibernate)(void); | |
3856 | + void (*try_resume)(void); | |
3857 | +}; | |
3858 | + | |
3859 | +extern struct toi_core_fns *toi_core_fns; | |
3860 | + | |
3861 | +/* == All else == */ | |
3862 | +#define KB(x) ((x) << (PAGE_SHIFT - 10)) | |
3863 | +#define MB(x) ((x) >> (20 - PAGE_SHIFT)) | |
3864 | + | |
3865 | +extern int toi_start_anything(int toi_or_resume); | |
3866 | +extern void toi_finish_anything(int toi_or_resume); | |
3867 | + | |
3868 | +extern int save_image_part1(void); | |
3869 | +extern int toi_atomic_restore(void); | |
3870 | + | |
9474138d AM |
3871 | +extern int toi_try_hibernate(void); |
3872 | +extern void toi_try_resume(void); | |
2380c486 JR |
3873 | + |
3874 | +extern int __toi_post_context_save(void); | |
3875 | + | |
3876 | +extern unsigned int nr_hibernates; | |
3877 | +extern char alt_resume_param[256]; | |
3878 | + | |
3879 | +extern void copyback_post(void); | |
3880 | +extern int toi_hibernate(void); | |
3881 | +extern long extra_pd1_pages_used; | |
3882 | + | |
3883 | +#define SECTOR_SIZE 512 | |
3884 | + | |
3885 | +extern void toi_early_boot_message(int can_erase_image, int default_answer, | |
3886 | + char *warning_reason, ...); | |
3887 | + | |
3888 | +static inline int load_direct(struct page *page) | |
3889 | +{ | |
3890 | + return test_action_state(TOI_NO_DIRECT_LOAD) ? 0 : | |
3891 | + PagePageset1Copy(page); | |
3892 | +} | |
3893 | + | |
3894 | +extern int do_check_can_resume(void); | |
3895 | +extern int do_toi_step(int step); | |
3896 | +extern int toi_launch_userspace_program(char *command, int channel_no, | |
3897 | + enum umh_wait wait, int debug); | |
3898 | + | |
3899 | +extern char *tuxonice_signature; | |
3900 | +#endif | |
3901 | diff --git a/kernel/power/tuxonice_alloc.c b/kernel/power/tuxonice_alloc.c | |
3902 | new file mode 100644 | |
9474138d | 3903 | index 0000000..0089ab8 |
2380c486 JR |
3904 | --- /dev/null |
3905 | +++ b/kernel/power/tuxonice_alloc.c | |
9474138d | 3906 | @@ -0,0 +1,288 @@ |
2380c486 JR |
3907 | +/* |
3908 | + * kernel/power/tuxonice_alloc.c | |
3909 | + * | |
3910 | + * Copyright (C) 2008 Nigel Cunningham (nigel at tuxonice net) | |
3911 | + * | |
3912 | + * This file is released under the GPLv2. | |
3913 | + * | |
3914 | + */ | |
3915 | + | |
3916 | +#ifdef CONFIG_PM_DEBUG | |
3917 | +#include <linux/module.h> | |
3918 | +#include <linux/slab.h> | |
3919 | +#include "tuxonice_modules.h" | |
3920 | +#include "tuxonice_alloc.h" | |
3921 | +#include "tuxonice_sysfs.h" | |
3922 | +#include "tuxonice.h" | |
3923 | + | |
3924 | +#define TOI_ALLOC_PATHS 39 | |
3925 | + | |
3926 | +static DEFINE_MUTEX(toi_alloc_mutex); | |
3927 | + | |
3928 | +static struct toi_module_ops toi_alloc_ops; | |
3929 | + | |
3930 | +static int toi_fail_num; | |
3931 | +static atomic_t toi_alloc_count[TOI_ALLOC_PATHS], | |
3932 | + toi_free_count[TOI_ALLOC_PATHS], | |
3933 | + toi_test_count[TOI_ALLOC_PATHS], | |
3934 | + toi_fail_count[TOI_ALLOC_PATHS]; | |
3935 | +static int toi_cur_allocd[TOI_ALLOC_PATHS], toi_max_allocd[TOI_ALLOC_PATHS]; | |
3936 | +static int cur_allocd, max_allocd; | |
3937 | + | |
3938 | +static char *toi_alloc_desc[TOI_ALLOC_PATHS] = { | |
3939 | + "", /* 0 */ | |
3940 | + "get_io_info_struct", | |
3941 | + "extent", | |
3942 | + "extent (loading chain)", | |
3943 | + "userui channel", | |
3944 | + "userui arg", /* 5 */ | |
3945 | + "attention list metadata", | |
3946 | + "extra pagedir memory metadata", | |
3947 | + "bdev metadata", | |
3948 | + "extra pagedir memory", | |
3949 | + "header_locations_read", /* 10 */ | |
3950 | + "bio queue", | |
3951 | + "prepare_readahead", | |
3952 | + "i/o buffer", | |
3953 | + "writer buffer in bio_init", | |
3954 | + "checksum buffer", /* 15 */ | |
3955 | + "compression buffer", | |
3956 | + "filewriter signature op", | |
3957 | + "set resume param alloc1", | |
3958 | + "set resume param alloc2", | |
3959 | + "debugging info buffer", /* 20 */ | |
3960 | + "check can resume buffer", | |
3961 | + "write module config buffer", | |
3962 | + "read module config buffer", | |
3963 | + "write image header buffer", | |
3964 | + "read pageset1 buffer", /* 25 */ | |
3965 | + "get_have_image_data buffer", | |
3966 | + "checksum page", | |
3967 | + "worker rw loop", | |
3968 | + "get nonconflicting page", | |
3969 | + "ps1 load addresses", /* 30 */ | |
3970 | + "remove swap image", | |
3971 | + "swap image exists", | |
3972 | + "swap parse sig location", | |
3973 | + "sysfs kobj", | |
3974 | + "swap mark resume attempted buffer", /* 35 */ | |
3975 | + "cluster member", | |
3976 | + "boot kernel data buffer", | |
3977 | + "setting swap signature" | |
3978 | +}; | |
3979 | + | |
3980 | +#define MIGHT_FAIL(FAIL_NUM, FAIL_VAL) \ | |
3981 | + do { \ | |
3982 | + BUG_ON(FAIL_NUM >= TOI_ALLOC_PATHS); \ | |
3983 | + \ | |
3984 | + if (FAIL_NUM == toi_fail_num) { \ | |
3985 | + atomic_inc(&toi_test_count[FAIL_NUM]); \ | |
3986 | + toi_fail_num = 0; \ | |
3987 | + return FAIL_VAL; \ | |
3988 | + } \ | |
3989 | + } while (0) | |
3990 | + | |
9474138d | 3991 | +static void alloc_update_stats(int fail_num, void *result, int size) |
2380c486 JR |
3992 | +{ |
3993 | + if (!result) { | |
3994 | + atomic_inc(&toi_fail_count[fail_num]); | |
3995 | + return; | |
3996 | + } | |
3997 | + | |
3998 | + atomic_inc(&toi_alloc_count[fail_num]); | |
3999 | + if (unlikely(test_action_state(TOI_GET_MAX_MEM_ALLOCD))) { | |
4000 | + mutex_lock(&toi_alloc_mutex); | |
4001 | + toi_cur_allocd[fail_num]++; | |
9474138d | 4002 | + cur_allocd+= size; |
2380c486 JR |
4003 | + if (unlikely(cur_allocd > max_allocd)) { |
4004 | + int i; | |
4005 | + | |
4006 | + for (i = 0; i < TOI_ALLOC_PATHS; i++) | |
4007 | + toi_max_allocd[i] = toi_cur_allocd[i]; | |
4008 | + max_allocd = cur_allocd; | |
4009 | + } | |
4010 | + mutex_unlock(&toi_alloc_mutex); | |
4011 | + } | |
4012 | +} | |
4013 | + | |
9474138d | 4014 | +static void free_update_stats(int fail_num, int size) |
2380c486 JR |
4015 | +{ |
4016 | + BUG_ON(fail_num >= TOI_ALLOC_PATHS); | |
4017 | + atomic_inc(&toi_free_count[fail_num]); | |
4018 | + if (unlikely(test_action_state(TOI_GET_MAX_MEM_ALLOCD))) { | |
4019 | + mutex_lock(&toi_alloc_mutex); | |
9474138d | 4020 | + cur_allocd-= size; |
2380c486 JR |
4021 | + toi_cur_allocd[fail_num]--; |
4022 | + mutex_unlock(&toi_alloc_mutex); | |
4023 | + } | |
4024 | +} | |
4025 | + | |
4026 | +void *toi_kzalloc(int fail_num, size_t size, gfp_t flags) | |
4027 | +{ | |
4028 | + void *result; | |
4029 | + | |
4030 | + if (toi_alloc_ops.enabled) | |
4031 | + MIGHT_FAIL(fail_num, NULL); | |
4032 | + result = kzalloc(size, flags); | |
4033 | + if (toi_alloc_ops.enabled) | |
9474138d | 4034 | + alloc_update_stats(fail_num, result, size); |
2380c486 JR |
4035 | + return result; |
4036 | +} | |
4037 | +EXPORT_SYMBOL_GPL(toi_kzalloc); | |
4038 | + | |
4039 | +unsigned long toi_get_free_pages(int fail_num, gfp_t mask, | |
4040 | + unsigned int order) | |
4041 | +{ | |
4042 | + unsigned long result; | |
4043 | + | |
4044 | + if (toi_alloc_ops.enabled) | |
4045 | + MIGHT_FAIL(fail_num, 0); | |
4046 | + result = __get_free_pages(mask, order); | |
4047 | + if (toi_alloc_ops.enabled) | |
9474138d AM |
4048 | + alloc_update_stats(fail_num, (void *) result, |
4049 | + PAGE_SIZE << order); | |
2380c486 JR |
4050 | + return result; |
4051 | +} | |
4052 | +EXPORT_SYMBOL_GPL(toi_get_free_pages); | |
4053 | + | |
4054 | +struct page *toi_alloc_page(int fail_num, gfp_t mask) | |
4055 | +{ | |
4056 | + struct page *result; | |
4057 | + | |
4058 | + if (toi_alloc_ops.enabled) | |
4059 | + MIGHT_FAIL(fail_num, NULL); | |
4060 | + result = alloc_page(mask); | |
4061 | + if (toi_alloc_ops.enabled) | |
9474138d | 4062 | + alloc_update_stats(fail_num, (void *) result, PAGE_SIZE); |
2380c486 JR |
4063 | + return result; |
4064 | +} | |
4065 | +EXPORT_SYMBOL_GPL(toi_alloc_page); | |
4066 | + | |
4067 | +unsigned long toi_get_zeroed_page(int fail_num, gfp_t mask) | |
4068 | +{ | |
4069 | + unsigned long result; | |
4070 | + | |
4071 | + if (toi_alloc_ops.enabled) | |
4072 | + MIGHT_FAIL(fail_num, 0); | |
4073 | + result = get_zeroed_page(mask); | |
4074 | + if (toi_alloc_ops.enabled) | |
9474138d | 4075 | + alloc_update_stats(fail_num, (void *) result, PAGE_SIZE); |
2380c486 JR |
4076 | + return result; |
4077 | +} | |
4078 | +EXPORT_SYMBOL_GPL(toi_get_zeroed_page); | |
4079 | + | |
9474138d | 4080 | +void toi_kfree(int fail_num, const void *arg, int size) |
2380c486 JR |
4081 | +{ |
4082 | + if (arg && toi_alloc_ops.enabled) | |
9474138d | 4083 | + free_update_stats(fail_num, size); |
2380c486 JR |
4084 | + |
4085 | + kfree(arg); | |
4086 | +} | |
4087 | +EXPORT_SYMBOL_GPL(toi_kfree); | |
4088 | + | |
4089 | +void toi_free_page(int fail_num, unsigned long virt) | |
4090 | +{ | |
4091 | + if (virt && toi_alloc_ops.enabled) | |
9474138d | 4092 | + free_update_stats(fail_num, PAGE_SIZE); |
2380c486 JR |
4093 | + |
4094 | + free_page(virt); | |
4095 | +} | |
4096 | +EXPORT_SYMBOL_GPL(toi_free_page); | |
4097 | + | |
4098 | +void toi__free_page(int fail_num, struct page *page) | |
4099 | +{ | |
4100 | + if (page && toi_alloc_ops.enabled) | |
9474138d | 4101 | + free_update_stats(fail_num, PAGE_SIZE); |
2380c486 JR |
4102 | + |
4103 | + __free_page(page); | |
4104 | +} | |
4105 | +EXPORT_SYMBOL_GPL(toi__free_page); | |
4106 | + | |
4107 | +void toi_free_pages(int fail_num, struct page *page, int order) | |
4108 | +{ | |
4109 | + if (page && toi_alloc_ops.enabled) | |
9474138d | 4110 | + free_update_stats(fail_num, PAGE_SIZE << order); |
2380c486 JR |
4111 | + |
4112 | + __free_pages(page, order); | |
4113 | +} | |
4114 | + | |
4115 | +void toi_alloc_print_debug_stats(void) | |
4116 | +{ | |
4117 | + int i, header_done = 0; | |
4118 | + | |
4119 | + if (!toi_alloc_ops.enabled) | |
4120 | + return; | |
4121 | + | |
4122 | + for (i = 0; i < TOI_ALLOC_PATHS; i++) | |
4123 | + if (atomic_read(&toi_alloc_count[i]) != | |
4124 | + atomic_read(&toi_free_count[i])) { | |
4125 | + if (!header_done) { | |
4126 | + printk(KERN_INFO "Idx Allocs Frees Tests " | |
4127 | + " Fails Max Description\n"); | |
4128 | + header_done = 1; | |
4129 | + } | |
4130 | + | |
4131 | + printk(KERN_INFO "%3d %7d %7d %7d %7d %7d %s\n", i, | |
4132 | + atomic_read(&toi_alloc_count[i]), | |
4133 | + atomic_read(&toi_free_count[i]), | |
4134 | + atomic_read(&toi_test_count[i]), | |
4135 | + atomic_read(&toi_fail_count[i]), | |
4136 | + toi_max_allocd[i], | |
4137 | + toi_alloc_desc[i]); | |
4138 | + } | |
4139 | +} | |
4140 | +EXPORT_SYMBOL_GPL(toi_alloc_print_debug_stats); | |
4141 | + | |
4142 | +static int toi_alloc_initialise(int starting_cycle) | |
4143 | +{ | |
4144 | + int i; | |
4145 | + | |
4146 | + if (starting_cycle && toi_alloc_ops.enabled) { | |
4147 | + for (i = 0; i < TOI_ALLOC_PATHS; i++) { | |
4148 | + atomic_set(&toi_alloc_count[i], 0); | |
4149 | + atomic_set(&toi_free_count[i], 0); | |
4150 | + atomic_set(&toi_test_count[i], 0); | |
4151 | + atomic_set(&toi_fail_count[i], 0); | |
4152 | + toi_cur_allocd[i] = 0; | |
4153 | + toi_max_allocd[i] = 0; | |
4154 | + }; | |
4155 | + max_allocd = 0; | |
4156 | + cur_allocd = 0; | |
4157 | + } | |
4158 | + | |
4159 | + return 0; | |
4160 | +} | |
4161 | + | |
4162 | +static struct toi_sysfs_data sysfs_params[] = { | |
4163 | + SYSFS_INT("failure_test", SYSFS_RW, &toi_fail_num, 0, 99, 0, NULL), | |
4164 | + SYSFS_BIT("find_max_mem_allocated", SYSFS_RW, &toi_bkd.toi_action, | |
4165 | + TOI_GET_MAX_MEM_ALLOCD, 0), | |
4166 | + SYSFS_INT("enabled", SYSFS_RW, &toi_alloc_ops.enabled, 0, 1, 0, | |
4167 | + NULL) | |
4168 | +}; | |
4169 | + | |
4170 | +static struct toi_module_ops toi_alloc_ops = { | |
4171 | + .type = MISC_HIDDEN_MODULE, | |
4172 | + .name = "allocation debugging", | |
4173 | + .directory = "alloc", | |
4174 | + .module = THIS_MODULE, | |
4175 | + .early = 1, | |
4176 | + .initialise = toi_alloc_initialise, | |
4177 | + | |
4178 | + .sysfs_data = sysfs_params, | |
4179 | + .num_sysfs_entries = sizeof(sysfs_params) / | |
4180 | + sizeof(struct toi_sysfs_data), | |
4181 | +}; | |
4182 | + | |
4183 | +int toi_alloc_init(void) | |
4184 | +{ | |
4185 | + int result = toi_register_module(&toi_alloc_ops); | |
4186 | + toi_alloc_ops.enabled = 0; | |
4187 | + return result; | |
4188 | +} | |
4189 | + | |
4190 | +void toi_alloc_exit(void) | |
4191 | +{ | |
4192 | + toi_unregister_module(&toi_alloc_ops); | |
4193 | +} | |
4194 | +#endif | |
4195 | diff --git a/kernel/power/tuxonice_alloc.h b/kernel/power/tuxonice_alloc.h | |
4196 | new file mode 100644 | |
9474138d | 4197 | index 0000000..6069dfa |
2380c486 JR |
4198 | --- /dev/null |
4199 | +++ b/kernel/power/tuxonice_alloc.h | |
4200 | @@ -0,0 +1,51 @@ | |
4201 | +/* | |
4202 | + * kernel/power/tuxonice_alloc.h | |
4203 | + * | |
4204 | + * Copyright (C) 2008 Nigel Cunningham (nigel at tuxonice net) | |
4205 | + * | |
4206 | + * This file is released under the GPLv2. | |
4207 | + * | |
4208 | + */ | |
4209 | + | |
4210 | +#define TOI_WAIT_GFP (GFP_KERNEL | __GFP_NOWARN) | |
4211 | +#define TOI_ATOMIC_GFP (GFP_ATOMIC | __GFP_NOWARN) | |
4212 | + | |
4213 | +#ifdef CONFIG_PM_DEBUG | |
4214 | +extern void *toi_kzalloc(int fail_num, size_t size, gfp_t flags); | |
9474138d | 4215 | +extern void toi_kfree(int fail_num, const void *arg, int size); |
2380c486 JR |
4216 | + |
4217 | +extern unsigned long toi_get_free_pages(int fail_num, gfp_t mask, | |
4218 | + unsigned int order); | |
4219 | +#define toi_get_free_page(FAIL_NUM, MASK) toi_get_free_pages(FAIL_NUM, MASK, 0) | |
4220 | +extern unsigned long toi_get_zeroed_page(int fail_num, gfp_t mask); | |
4221 | +extern void toi_free_page(int fail_num, unsigned long buf); | |
4222 | +extern void toi__free_page(int fail_num, struct page *page); | |
4223 | +extern void toi_free_pages(int fail_num, struct page *page, int order); | |
4224 | +extern struct page *toi_alloc_page(int fail_num, gfp_t mask); | |
4225 | +extern int toi_alloc_init(void); | |
4226 | +extern void toi_alloc_exit(void); | |
4227 | + | |
4228 | +extern void toi_alloc_print_debug_stats(void); | |
4229 | + | |
4230 | +#else /* CONFIG_PM_DEBUG */ | |
4231 | + | |
4232 | +#define toi_kzalloc(FAIL, SIZE, FLAGS) (kzalloc(SIZE, FLAGS)) | |
9474138d | 4233 | +#define toi_kfree(FAIL, ALLOCN, SIZE) (kfree(ALLOCN)) |
2380c486 JR |
4234 | + |
4235 | +#define toi_get_free_pages(FAIL, FLAGS, ORDER) __get_free_pages(FLAGS, ORDER) | |
4236 | +#define toi_get_free_page(FAIL, FLAGS) __get_free_page(FLAGS) | |
4237 | +#define toi_get_zeroed_page(FAIL, FLAGS) get_zeroed_page(FLAGS) | |
4238 | +#define toi_free_page(FAIL, ALLOCN) do { free_page(ALLOCN); } while (0) | |
4239 | +#define toi__free_page(FAIL, PAGE) __free_page(PAGE) | |
4240 | +#define toi_free_pages(FAIL, PAGE, ORDER) __free_pages(PAGE, ORDER) | |
4241 | +#define toi_alloc_page(FAIL, MASK) alloc_page(MASK) | |
4242 | +static inline int toi_alloc_init(void) | |
4243 | +{ | |
4244 | + return 0; | |
4245 | +} | |
4246 | + | |
4247 | +static inline void toi_alloc_exit(void) { } | |
4248 | + | |
4249 | +static inline void toi_alloc_print_debug_stats(void) { } | |
4250 | + | |
4251 | +#endif | |
4252 | diff --git a/kernel/power/tuxonice_atomic_copy.c b/kernel/power/tuxonice_atomic_copy.c | |
4253 | new file mode 100644 | |
9474138d | 4254 | index 0000000..d6ef2b0 |
2380c486 JR |
4255 | --- /dev/null |
4256 | +++ b/kernel/power/tuxonice_atomic_copy.c | |
9474138d | 4257 | @@ -0,0 +1,415 @@ |
2380c486 JR |
4258 | +/* |
4259 | + * kernel/power/tuxonice_atomic_copy.c | |
4260 | + * | |
4261 | + * Copyright 2004-2008 Nigel Cunningham (nigel at tuxonice net) | |
4262 | + * Copyright (C) 2006 Red Hat, inc. | |
4263 | + * | |
4264 | + * Distributed under GPLv2. | |
4265 | + * | |
4266 | + * Routines for doing the atomic save/restore. | |
4267 | + */ | |
4268 | + | |
4269 | +#include <linux/suspend.h> | |
4270 | +#include <linux/highmem.h> | |
4271 | +#include <linux/cpu.h> | |
4272 | +#include <linux/freezer.h> | |
4273 | +#include <linux/console.h> | |
9474138d | 4274 | +#include <asm/suspend.h> |
2380c486 JR |
4275 | +#include "tuxonice.h" |
4276 | +#include "tuxonice_storage.h" | |
4277 | +#include "tuxonice_power_off.h" | |
4278 | +#include "tuxonice_ui.h" | |
2380c486 JR |
4279 | +#include "tuxonice_io.h" |
4280 | +#include "tuxonice_prepare_image.h" | |
4281 | +#include "tuxonice_pageflags.h" | |
4282 | +#include "tuxonice_checksum.h" | |
4283 | +#include "tuxonice_builtin.h" | |
4284 | +#include "tuxonice_atomic_copy.h" | |
4285 | +#include "tuxonice_alloc.h" | |
4286 | + | |
4287 | +long extra_pd1_pages_used; | |
4288 | + | |
4289 | +/** | |
4290 | + * free_pbe_list - free page backup entries used by the atomic copy code. | |
4291 | + * @list: List to free. | |
4292 | + * @highmem: Whether the list is in highmem. | |
4293 | + * | |
4294 | + * Normally, this function isn't used. If, however, we need to abort before | |
4295 | + * doing the atomic copy, we use this to free the pbes previously allocated. | |
4296 | + **/ | |
4297 | +static void free_pbe_list(struct pbe **list, int highmem) | |
4298 | +{ | |
4299 | + while (*list) { | |
4300 | + int i; | |
4301 | + struct pbe *free_pbe, *next_page = NULL; | |
4302 | + struct page *page; | |
4303 | + | |
4304 | + if (highmem) { | |
4305 | + page = (struct page *) *list; | |
4306 | + free_pbe = (struct pbe *) kmap(page); | |
4307 | + } else { | |
4308 | + page = virt_to_page(*list); | |
4309 | + free_pbe = *list; | |
4310 | + } | |
4311 | + | |
4312 | + for (i = 0; i < PBES_PER_PAGE; i++) { | |
4313 | + if (!free_pbe) | |
4314 | + break; | |
4315 | + if (highmem) | |
4316 | + toi__free_page(29, free_pbe->address); | |
4317 | + else | |
4318 | + toi_free_page(29, | |
4319 | + (unsigned long) free_pbe->address); | |
4320 | + free_pbe = free_pbe->next; | |
4321 | + } | |
4322 | + | |
4323 | + if (highmem) { | |
4324 | + if (free_pbe) | |
4325 | + next_page = free_pbe; | |
4326 | + kunmap(page); | |
4327 | + } else { | |
4328 | + if (free_pbe) | |
4329 | + next_page = free_pbe; | |
4330 | + } | |
4331 | + | |
4332 | + toi__free_page(29, page); | |
4333 | + *list = (struct pbe *) next_page; | |
4334 | + }; | |
4335 | +} | |
4336 | + | |
4337 | +/** | |
4338 | + * copyback_post - post atomic-restore actions | |
4339 | + * | |
4340 | + * After doing the atomic restore, we have a few more things to do: | |
4341 | + * 1) We want to retain some values across the restore, so we now copy | |
4342 | + * these from the nosave variables to the normal ones. | |
4343 | + * 2) Set the status flags. | |
4344 | + * 3) Resume devices. | |
4345 | + * 4) Tell userui so it can redraw & restore settings. | |
4346 | + * 5) Reread the page cache. | |
4347 | + **/ | |
4348 | +void copyback_post(void) | |
4349 | +{ | |
4350 | + struct toi_boot_kernel_data *bkd = | |
4351 | + (struct toi_boot_kernel_data *) boot_kernel_data_buffer; | |
4352 | + | |
4353 | + /* | |
4354 | + * The boot kernel's data may be larger (newer version) or | |
4355 | + * smaller (older version) than ours. Copy the minimum | |
4356 | + * of the two sizes, so that we don't overwrite valid values | |
4357 | + * from pre-atomic copy. | |
4358 | + */ | |
4359 | + | |
4360 | + memcpy(&toi_bkd, (char *) boot_kernel_data_buffer, | |
4361 | + min_t(int, sizeof(struct toi_boot_kernel_data), | |
4362 | + bkd->size)); | |
4363 | + | |
4364 | + if (toi_activate_storage(1)) | |
4365 | + panic("Failed to reactivate our storage."); | |
4366 | + | |
4367 | + toi_ui_post_atomic_restore(); | |
4368 | + | |
4369 | + toi_cond_pause(1, "About to reload secondary pagedir."); | |
4370 | + | |
4371 | + if (read_pageset2(0)) | |
4372 | + panic("Unable to successfully reread the page cache."); | |
4373 | + | |
4374 | + /* | |
4375 | + * If the user wants to sleep again after resuming from full-off, | |
4376 | + * it's most likely to be in order to suspend to ram, so we'll | |
4377 | + * do this check after loading pageset2, to give them the fastest | |
4378 | + * wakeup when they are ready to use the computer again. | |
4379 | + */ | |
4380 | + toi_check_resleep(); | |
4381 | +} | |
4382 | + | |
4383 | +/** | |
4384 | + * toi_copy_pageset1 - do the atomic copy of pageset1 | |
4385 | + * | |
4386 | + * Make the atomic copy of pageset1. We can't use copy_page (as we once did) | |
4387 | + * because we can't be sure what side effects it has. On my old Duron, with | |
4388 | + * 3DNOW, kernel_fpu_begin increments preempt count, making our preempt | |
4389 | + * count at resume time 4 instead of 3. | |
4390 | + * | |
4391 | + * We don't want to call kmap_atomic unconditionally because it has the side | |
4392 | + * effect of incrementing the preempt count, which will leave it one too high | |
4393 | + * post resume (the page containing the preempt count will be copied after | |
4394 | + * its incremented. This is essentially the same problem. | |
4395 | + **/ | |
4396 | +void toi_copy_pageset1(void) | |
4397 | +{ | |
4398 | + int i; | |
4399 | + unsigned long source_index, dest_index; | |
4400 | + | |
4401 | + memory_bm_position_reset(pageset1_map); | |
4402 | + memory_bm_position_reset(pageset1_copy_map); | |
4403 | + | |
4404 | + source_index = memory_bm_next_pfn(pageset1_map); | |
4405 | + dest_index = memory_bm_next_pfn(pageset1_copy_map); | |
4406 | + | |
4407 | + for (i = 0; i < pagedir1.size; i++) { | |
4408 | + unsigned long *origvirt, *copyvirt; | |
4409 | + struct page *origpage, *copypage; | |
4410 | + int loop = (PAGE_SIZE / sizeof(unsigned long)) - 1, | |
4411 | + was_present1, was_present2; | |
4412 | + | |
4413 | + origpage = pfn_to_page(source_index); | |
4414 | + copypage = pfn_to_page(dest_index); | |
4415 | + | |
4416 | + origvirt = PageHighMem(origpage) ? | |
4417 | + kmap_atomic(origpage, KM_USER0) : | |
4418 | + page_address(origpage); | |
4419 | + | |
4420 | + copyvirt = PageHighMem(copypage) ? | |
4421 | + kmap_atomic(copypage, KM_USER1) : | |
4422 | + page_address(copypage); | |
4423 | + | |
4424 | + was_present1 = kernel_page_present(origpage); | |
4425 | + if (!was_present1) | |
4426 | + kernel_map_pages(origpage, 1, 1); | |
4427 | + | |
4428 | + was_present2 = kernel_page_present(copypage); | |
4429 | + if (!was_present2) | |
4430 | + kernel_map_pages(copypage, 1, 1); | |
4431 | + | |
4432 | + while (loop >= 0) { | |
4433 | + *(copyvirt + loop) = *(origvirt + loop); | |
4434 | + loop--; | |
4435 | + } | |
4436 | + | |
4437 | + if (!was_present1) | |
4438 | + kernel_map_pages(origpage, 1, 0); | |
4439 | + | |
4440 | + if (!was_present2) | |
4441 | + kernel_map_pages(copypage, 1, 0); | |
4442 | + | |
4443 | + if (PageHighMem(origpage)) | |
4444 | + kunmap_atomic(origvirt, KM_USER0); | |
4445 | + | |
4446 | + if (PageHighMem(copypage)) | |
4447 | + kunmap_atomic(copyvirt, KM_USER1); | |
4448 | + | |
4449 | + source_index = memory_bm_next_pfn(pageset1_map); | |
4450 | + dest_index = memory_bm_next_pfn(pageset1_copy_map); | |
4451 | + } | |
4452 | +} | |
4453 | + | |
4454 | +/** | |
4455 | + * __toi_post_context_save - steps after saving the cpu context | |
4456 | + * | |
4457 | + * Steps taken after saving the CPU state to make the actual | |
4458 | + * atomic copy. | |
4459 | + * | |
4460 | + * Called from swsusp_save in snapshot.c via toi_post_context_save. | |
4461 | + **/ | |
4462 | +int __toi_post_context_save(void) | |
4463 | +{ | |
4464 | + long old_ps1_size = pagedir1.size; | |
4465 | + | |
4466 | + check_checksums(); | |
4467 | + | |
4468 | + free_checksum_pages(); | |
4469 | + | |
4470 | + toi_recalculate_image_contents(1); | |
4471 | + | |
4472 | + extra_pd1_pages_used = pagedir1.size - old_ps1_size; | |
4473 | + | |
4474 | + if (extra_pd1_pages_used > extra_pd1_pages_allowance) { | |
4475 | + printk(KERN_INFO "Pageset1 has grown by %ld pages. " | |
4476 | + "extra_pages_allowance is currently only %lu.\n", | |
4477 | + pagedir1.size - old_ps1_size, | |
4478 | + extra_pd1_pages_allowance); | |
4479 | + | |
4480 | + /* | |
4481 | + * Highlevel code will see this, clear the state and | |
4482 | + * retry if we haven't already done so twice. | |
4483 | + */ | |
4484 | + set_abort_result(TOI_EXTRA_PAGES_ALLOW_TOO_SMALL); | |
4485 | + return 1; | |
4486 | + } | |
4487 | + | |
4488 | + if (!test_action_state(TOI_TEST_FILTER_SPEED) && | |
4489 | + !test_action_state(TOI_TEST_BIO)) | |
4490 | + toi_copy_pageset1(); | |
4491 | + | |
4492 | + return 0; | |
4493 | +} | |
4494 | + | |
4495 | +/** | |
4496 | + * toi_hibernate - high level code for doing the atomic copy | |
4497 | + * | |
4498 | + * High-level code which prepares to do the atomic copy. Loosely based | |
4499 | + * on the swsusp version, but with the following twists: | |
4500 | + * - We set toi_running so the swsusp code uses our code paths. | |
4501 | + * - We give better feedback regarding what goes wrong if there is a | |
4502 | + * problem. | |
4503 | + * - We use an extra function to call the assembly, just in case this code | |
4504 | + * is in a module (return address). | |
4505 | + **/ | |
4506 | +int toi_hibernate(void) | |
4507 | +{ | |
4508 | + int error; | |
4509 | + | |
4510 | + toi_running = 1; /* For the swsusp code we use :< */ | |
4511 | + | |
4512 | + error = toi_lowlevel_builtin(); | |
4513 | + | |
4514 | + toi_running = 0; | |
4515 | + return error; | |
4516 | +} | |
4517 | + | |
4518 | +/** | |
4519 | + * toi_atomic_restore - prepare to do the atomic restore | |
4520 | + * | |
4521 | + * Get ready to do the atomic restore. This part gets us into the same | |
4522 | + * state we are in prior to do calling do_toi_lowlevel while | |
4523 | + * hibernating: hot-unplugging secondary cpus and freeze processes, | |
4524 | + * before starting the thread that will do the restore. | |
4525 | + **/ | |
4526 | +int toi_atomic_restore(void) | |
4527 | +{ | |
4528 | + int error; | |
4529 | + | |
4530 | + toi_running = 1; | |
4531 | + | |
4532 | + toi_prepare_status(DONT_CLEAR_BAR, "Atomic restore."); | |
4533 | + | |
4534 | + memcpy(&toi_bkd.toi_nosave_commandline, saved_command_line, | |
4535 | + strlen(saved_command_line)); | |
4536 | + | |
4537 | + if (add_boot_kernel_data_pbe()) | |
4538 | + goto Failed; | |
4539 | + | |
4540 | + toi_prepare_status(DONT_CLEAR_BAR, "Doing atomic copy/restore."); | |
4541 | + | |
4542 | + if (toi_go_atomic(PMSG_QUIESCE, 0)) | |
4543 | + goto Failed; | |
4544 | + | |
4545 | + /* We'll ignore saved state, but this gets preempt count (etc) right */ | |
4546 | + save_processor_state(); | |
4547 | + | |
4548 | + error = swsusp_arch_resume(); | |
4549 | + /* | |
4550 | + * Code below is only ever reached in case of failure. Otherwise | |
4551 | + * execution continues at place where swsusp_arch_suspend was called. | |
4552 | + * | |
4553 | + * We don't know whether it's safe to continue (this shouldn't happen), | |
4554 | + * so lets err on the side of caution. | |
4555 | + */ | |
4556 | + BUG(); | |
4557 | + | |
4558 | +Failed: | |
4559 | + free_pbe_list(&restore_pblist, 0); | |
4560 | +#ifdef CONFIG_HIGHMEM | |
4561 | + free_pbe_list(&restore_highmem_pblist, 1); | |
4562 | +#endif | |
2380c486 JR |
4563 | + toi_running = 0; |
4564 | + return 1; | |
4565 | +} | |
4566 | + | |
4567 | +/** | |
4568 | + * toi_go_atomic - do the actual atomic copy/restore | |
4569 | + * @state: The state to use for device_suspend & power_down calls. | |
4570 | + * @suspend_time: Whether we're suspending or resuming. | |
4571 | + **/ | |
4572 | +int toi_go_atomic(pm_message_t state, int suspend_time) | |
4573 | +{ | |
4574 | + if (suspend_time && platform_begin(1)) { | |
4575 | + set_abort_result(TOI_PLATFORM_PREP_FAILED); | |
2380c486 JR |
4576 | + return 1; |
4577 | + } | |
4578 | + | |
4579 | + suspend_console(); | |
4580 | + | |
4581 | + if (device_suspend(state)) { | |
4582 | + set_abort_result(TOI_DEVICE_REFUSED); | |
4583 | + toi_end_atomic(ATOMIC_STEP_DEVICE_RESUME, suspend_time, 3); | |
4584 | + return 1; | |
4585 | + } | |
4586 | + | |
9474138d AM |
4587 | + if (suspend_time && arch_prepare_suspend()) { |
4588 | + set_abort_result(TOI_ARCH_PREPARE_FAILED); | |
4589 | + toi_end_atomic(ATOMIC_STEP_DEVICE_RESUME, suspend_time, 1); | |
4590 | + return 1; | |
4591 | + } | |
4592 | + | |
4593 | + /* At this point, device_suspend() has been called, but *not* | |
4594 | + * device_power_down(). We *must* device_power_down() now. | |
4595 | + * Otherwise, drivers for some devices (e.g. interrupt controllers) | |
4596 | + * become desynchronized with the actual state of the hardware | |
4597 | + * at resume time, and evil weirdness ensues. | |
4598 | + */ | |
4599 | + | |
4600 | + if (device_power_down(state)) { | |
4601 | + set_abort_result(TOI_DEVICE_REFUSED); | |
4602 | + toi_end_atomic(ATOMIC_STEP_DEVICE_RESUME, suspend_time, 1); | |
4603 | + return 1; | |
4604 | + } | |
4605 | + | |
2380c486 JR |
4606 | + if (suspend_time && platform_pre_snapshot(1)) { |
4607 | + set_abort_result(TOI_PRE_SNAPSHOT_FAILED); | |
9474138d | 4608 | + toi_end_atomic(ATOMIC_STEP_PLATFORM_FINISH, suspend_time, 1); |
2380c486 JR |
4609 | + return 1; |
4610 | + } | |
4611 | + | |
4612 | + if (!suspend_time && platform_pre_restore(1)) { | |
4613 | + set_abort_result(TOI_PRE_RESTORE_FAILED); | |
9474138d | 4614 | + toi_end_atomic(ATOMIC_STEP_PLATFORM_FINISH, suspend_time, 1); |
2380c486 JR |
4615 | + return 1; |
4616 | + } | |
4617 | + | |
4618 | + if (test_action_state(TOI_LATE_CPU_HOTPLUG)) { | |
4619 | + if (disable_nonboot_cpus()) { | |
4620 | + set_abort_result(TOI_CPU_HOTPLUG_FAILED); | |
4621 | + toi_end_atomic(ATOMIC_STEP_CPU_HOTPLUG, | |
9474138d | 4622 | + suspend_time, 1); |
2380c486 JR |
4623 | + return 1; |
4624 | + } | |
4625 | + } | |
4626 | + | |
2380c486 JR |
4627 | + local_irq_disable(); |
4628 | + | |
2380c486 JR |
4629 | + if (sysdev_suspend(state)) { |
4630 | + set_abort_result(TOI_SYSDEV_REFUSED); | |
9474138d | 4631 | + toi_end_atomic(ATOMIC_STEP_IRQS, suspend_time, 1); |
2380c486 JR |
4632 | + return 1; |
4633 | + } | |
4634 | + | |
4635 | + return 0; | |
4636 | +} | |
4637 | + | |
4638 | +/** | |
4639 | + * toi_end_atomic - post atomic copy/restore routines | |
4640 | + * @stage: What step to start at. | |
4641 | + * @suspend_time: Whether we're suspending or resuming. | |
4642 | + * @error: Whether we're recovering from an error. | |
4643 | + **/ | |
4644 | +void toi_end_atomic(int stage, int suspend_time, int error) | |
4645 | +{ | |
4646 | + switch (stage) { | |
4647 | + case ATOMIC_ALL_STEPS: | |
4648 | + if (!suspend_time) | |
4649 | + platform_leave(1); | |
4650 | + sysdev_resume(); | |
2380c486 JR |
4651 | + case ATOMIC_STEP_IRQS: |
4652 | + local_irq_enable(); | |
2380c486 JR |
4653 | + case ATOMIC_STEP_CPU_HOTPLUG: |
4654 | + if (test_action_state(TOI_LATE_CPU_HOTPLUG)) | |
4655 | + enable_nonboot_cpus(); | |
9474138d | 4656 | + platform_restore_cleanup(1); |
2380c486 JR |
4657 | + case ATOMIC_STEP_PLATFORM_FINISH: |
4658 | + platform_finish(1); | |
9474138d AM |
4659 | + device_power_up(suspend_time ? |
4660 | + (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); | |
2380c486 JR |
4661 | + case ATOMIC_STEP_DEVICE_RESUME: |
4662 | + if (suspend_time && (error & 2)) | |
4663 | + platform_recover(1); | |
4664 | + device_resume(suspend_time ? | |
4665 | + ((error & 1) ? PMSG_RECOVER : PMSG_THAW) : | |
4666 | + PMSG_RESTORE); | |
2380c486 | 4667 | + resume_console(); |
2380c486 JR |
4668 | + platform_end(1); |
4669 | + | |
4670 | + toi_prepare_status(DONT_CLEAR_BAR, "Post atomic."); | |
4671 | + } | |
4672 | +} | |
4673 | diff --git a/kernel/power/tuxonice_atomic_copy.h b/kernel/power/tuxonice_atomic_copy.h | |
4674 | new file mode 100644 | |
9474138d | 4675 | index 0000000..a428f4c |
2380c486 JR |
4676 | --- /dev/null |
4677 | +++ b/kernel/power/tuxonice_atomic_copy.h | |
9474138d | 4678 | @@ -0,0 +1,20 @@ |
2380c486 JR |
4679 | +/* |
4680 | + * kernel/power/tuxonice_atomic_copy.h | |
4681 | + * | |
4682 | + * Copyright 2008 Nigel Cunningham (nigel at tuxonice net) | |
4683 | + * | |
4684 | + * Distributed under GPLv2. | |
4685 | + * | |
4686 | + * Routines for doing the atomic save/restore. | |
4687 | + */ | |
4688 | + | |
4689 | +enum { | |
4690 | + ATOMIC_ALL_STEPS, | |
2380c486 JR |
4691 | + ATOMIC_STEP_IRQS, |
4692 | + ATOMIC_STEP_CPU_HOTPLUG, | |
4693 | + ATOMIC_STEP_PLATFORM_FINISH, | |
4694 | + ATOMIC_STEP_DEVICE_RESUME, | |
2380c486 JR |
4695 | +}; |
4696 | + | |
4697 | +int toi_go_atomic(pm_message_t state, int toi_time); | |
4698 | +void toi_end_atomic(int stage, int toi_time, int error); | |
4699 | diff --git a/kernel/power/tuxonice_block_io.c b/kernel/power/tuxonice_block_io.c | |
4700 | new file mode 100644 | |
9474138d | 4701 | index 0000000..12fa249 |
2380c486 JR |
4702 | --- /dev/null |
4703 | +++ b/kernel/power/tuxonice_block_io.c | |
9474138d | 4704 | @@ -0,0 +1,1338 @@ |
2380c486 JR |
4705 | +/* |
4706 | + * kernel/power/tuxonice_block_io.c | |
4707 | + * | |
4708 | + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net) | |
4709 | + * | |
4710 | + * Distributed under GPLv2. | |
4711 | + * | |
4712 | + * This file contains block io functions for TuxOnIce. These are | |
4713 | + * used by the swapwriter and it is planned that they will also | |
4714 | + * be used by the NFSwriter. | |
4715 | + * | |
4716 | + */ | |
4717 | + | |
4718 | +#include <linux/blkdev.h> | |
4719 | +#include <linux/syscalls.h> | |
4720 | +#include <linux/suspend.h> | |
4721 | + | |
4722 | +#include "tuxonice.h" | |
4723 | +#include "tuxonice_sysfs.h" | |
4724 | +#include "tuxonice_modules.h" | |
4725 | +#include "tuxonice_prepare_image.h" | |
4726 | +#include "tuxonice_block_io.h" | |
4727 | +#include "tuxonice_ui.h" | |
4728 | +#include "tuxonice_alloc.h" | |
4729 | +#include "tuxonice_io.h" | |
4730 | + | |
4731 | +#define MEMORY_ONLY 1 | |
4732 | +#define THROTTLE_WAIT 2 | |
4733 | + | |
4734 | +/* #define MEASURE_MUTEX_CONTENTION */ | |
4735 | +#ifndef MEASURE_MUTEX_CONTENTION | |
4736 | +#define my_mutex_lock(index, the_lock) mutex_lock(the_lock) | |
4737 | +#define my_mutex_unlock(index, the_lock) mutex_unlock(the_lock) | |
4738 | +#else | |
4739 | +unsigned long mutex_times[2][2][NR_CPUS]; | |
4740 | +#define my_mutex_lock(index, the_lock) do { \ | |
4741 | + int have_mutex; \ | |
4742 | + have_mutex = mutex_trylock(the_lock); \ | |
4743 | + if (!have_mutex) { \ | |
4744 | + mutex_lock(the_lock); \ | |
4745 | + mutex_times[index][0][smp_processor_id()]++; \ | |
4746 | + } else { \ | |
4747 | + mutex_times[index][1][smp_processor_id()]++; \ | |
4748 | + } | |
4749 | + | |
4750 | +#define my_mutex_unlock(index, the_lock) \ | |
4751 | + mutex_unlock(the_lock); \ | |
4752 | +} while (0) | |
4753 | +#endif | |
4754 | + | |
4755 | +static int target_outstanding_io = 1024; | |
4756 | +static int max_outstanding_writes, max_outstanding_reads; | |
4757 | + | |
4758 | +static struct page *bio_queue_head, *bio_queue_tail; | |
4759 | +static atomic_t toi_bio_queue_size; | |
4760 | +static DEFINE_SPINLOCK(bio_queue_lock); | |
4761 | + | |
4762 | +static int free_mem_throttle, throughput_throttle; | |
4763 | +static int more_readahead = 1; | |
4764 | +static struct page *readahead_list_head, *readahead_list_tail; | |
4765 | +static DECLARE_WAIT_QUEUE_HEAD(readahead_list_wait); | |
4766 | + | |
4767 | +static struct page *waiting_on; | |
4768 | + | |
4769 | +static atomic_t toi_io_in_progress, toi_io_done; | |
4770 | +static DECLARE_WAIT_QUEUE_HEAD(num_in_progress_wait); | |
4771 | + | |
4772 | +static int extra_page_forward; | |
4773 | + | |
4774 | +static int current_stream; | |
4775 | +/* 0 = Header, 1 = Pageset1, 2 = Pageset2, 3 = End of PS1 */ | |
4776 | +struct hibernate_extent_iterate_saved_state toi_writer_posn_save[4]; | |
4777 | +EXPORT_SYMBOL_GPL(toi_writer_posn_save); | |
4778 | + | |
4779 | +/* Pointer to current entry being loaded/saved. */ | |
4780 | +struct toi_extent_iterate_state toi_writer_posn; | |
4781 | +EXPORT_SYMBOL_GPL(toi_writer_posn); | |
4782 | + | |
4783 | +/* Not static, so that the allocators can setup and complete | |
4784 | + * writing the header */ | |
4785 | +char *toi_writer_buffer; | |
4786 | +EXPORT_SYMBOL_GPL(toi_writer_buffer); | |
4787 | + | |
4788 | +int toi_writer_buffer_posn; | |
4789 | +EXPORT_SYMBOL_GPL(toi_writer_buffer_posn); | |
4790 | + | |
4791 | +static struct toi_bdev_info *toi_devinfo; | |
4792 | + | |
4793 | +static DEFINE_MUTEX(toi_bio_mutex); | |
4794 | +static DEFINE_MUTEX(toi_bio_readahead_mutex); | |
4795 | + | |
4796 | +static struct task_struct *toi_queue_flusher; | |
4797 | +static int toi_bio_queue_flush_pages(int dedicated_thread); | |
4798 | + | |
4799 | +#define TOTAL_OUTSTANDING_IO (atomic_read(&toi_io_in_progress) + \ | |
4800 | + atomic_read(&toi_bio_queue_size)) | |
4801 | + | |
4802 | +/** | |
4803 | + * set_free_mem_throttle - set the point where we pause to avoid oom. | |
4804 | + * | |
4805 | + * Initially, this value is zero, but when we first fail to allocate memory, | |
4806 | + * we set it (plus a buffer) and thereafter throttle i/o once that limit is | |
4807 | + * reached. | |
4808 | + **/ | |
4809 | +static void set_free_mem_throttle(void) | |
4810 | +{ | |
4811 | + int new_throttle = nr_unallocated_buffer_pages() + 256; | |
4812 | + | |
4813 | + if (new_throttle > free_mem_throttle) | |
4814 | + free_mem_throttle = new_throttle; | |
4815 | +} | |
4816 | + | |
4817 | +#define NUM_REASONS 7 | |
4818 | +static atomic_t reasons[NUM_REASONS]; | |
4819 | +static char *reason_name[NUM_REASONS] = { | |
4820 | + "readahead not ready", | |
4821 | + "bio allocation", | |
4822 | + "synchronous I/O", | |
4823 | + "toi_bio_get_new_page", | |
4824 | + "memory low", | |
4825 | + "readahead buffer allocation", | |
4826 | + "throughput_throttle", | |
4827 | +}; | |
4828 | + | |
4829 | +/** | |
4830 | + * do_bio_wait - wait for some TuxOnIce I/O to complete | |
4831 | + * @reason: The array index of the reason we're waiting. | |
4832 | + * | |
4833 | + * Wait for a particular page of I/O if we're after a particular page. | |
4834 | + * If we're not after a particular page, wait instead for all in flight | |
4835 | + * I/O to be completed or for us to have enough free memory to be able | |
4836 | + * to submit more I/O. | |
4837 | + * | |
4838 | + * If we wait, we also update our statistics regarding why we waited. | |
4839 | + **/ | |
4840 | +static void do_bio_wait(int reason) | |
4841 | +{ | |
4842 | + struct page *was_waiting_on = waiting_on; | |
4843 | + | |
4844 | + /* On SMP, waiting_on can be reset, so we make a copy */ | |
4845 | + if (was_waiting_on) { | |
4846 | + if (PageLocked(was_waiting_on)) { | |
4847 | + wait_on_page_bit(was_waiting_on, PG_locked); | |
4848 | + atomic_inc(&reasons[reason]); | |
4849 | + } | |
4850 | + } else { | |
4851 | + atomic_inc(&reasons[reason]); | |
4852 | + | |
4853 | + wait_event(num_in_progress_wait, | |
4854 | + !atomic_read(&toi_io_in_progress) || | |
4855 | + nr_unallocated_buffer_pages() > free_mem_throttle); | |
4856 | + } | |
4857 | +} | |
4858 | + | |
4859 | +/** | |
4860 | + * throttle_if_needed - wait for I/O completion if throttle points are reached | |
4861 | + * @flags: What to check and how to act. | |
4862 | + * | |
4863 | + * Check whether we need to wait for some I/O to complete. We always check | |
4864 | + * whether we have enough memory available, but may also (depending upon | |
4865 | + * @reason) check if the throughput throttle limit has been reached. | |
4866 | + **/ | |
4867 | +static int throttle_if_needed(int flags) | |
4868 | +{ | |
4869 | + int free_pages = nr_unallocated_buffer_pages(); | |
4870 | + | |
4871 | + /* Getting low on memory and I/O is in progress? */ | |
4872 | + while (unlikely(free_pages < free_mem_throttle) && | |
4873 | + atomic_read(&toi_io_in_progress)) { | |
4874 | + if (!(flags & THROTTLE_WAIT)) | |
4875 | + return -ENOMEM; | |
4876 | + do_bio_wait(4); | |
4877 | + free_pages = nr_unallocated_buffer_pages(); | |
4878 | + } | |
4879 | + | |
4880 | + while (!(flags & MEMORY_ONLY) && throughput_throttle && | |
4881 | + TOTAL_OUTSTANDING_IO >= throughput_throttle) { | |
4882 | + int result = toi_bio_queue_flush_pages(0); | |
4883 | + if (result) | |
4884 | + return result; | |
4885 | + atomic_inc(&reasons[6]); | |
4886 | + wait_event(num_in_progress_wait, | |
4887 | + !atomic_read(&toi_io_in_progress) || | |
4888 | + TOTAL_OUTSTANDING_IO < throughput_throttle); | |
4889 | + } | |
4890 | + | |
4891 | + return 0; | |
4892 | +} | |
4893 | + | |
4894 | +/** | |
4895 | + * update_throughput_throttle - update the raw throughput throttle | |
4896 | + * @jif_index: The number of times this function has been called. | |
4897 | + * | |
4898 | + * This function is called twice per second by the core, and used to limit the | |
4899 | + * amount of I/O we submit at once, spreading out our waiting through the | |
4900 | + * whole job and letting userui get an opportunity to do its work. | |
4901 | + * | |
4902 | + * We don't start limiting I/O until 1/2s has gone so that we get a | |
4903 | + * decent sample for our initial limit, and keep updating it because | |
4904 | + * throughput may vary (on rotating media, eg) with our block number. | |
4905 | + * | |
4906 | + * We throttle to 1/10s worth of I/O. | |
4907 | + **/ | |
4908 | +static void update_throughput_throttle(int jif_index) | |
4909 | +{ | |
4910 | + int done = atomic_read(&toi_io_done); | |
4911 | + throughput_throttle = done / jif_index / 5; | |
4912 | +} | |
4913 | + | |
4914 | +/** | |
4915 | + * toi_finish_all_io - wait for all outstanding i/o to complete | |
4916 | + * | |
4917 | + * Flush any queued but unsubmitted I/O and wait for it all to complete. | |
4918 | + **/ | |
0ada99ac | 4919 | +static int toi_finish_all_io(void) |
2380c486 | 4920 | +{ |
0ada99ac | 4921 | + int result = toi_bio_queue_flush_pages(0); |
2380c486 | 4922 | + wait_event(num_in_progress_wait, !TOTAL_OUTSTANDING_IO); |
0ada99ac | 4923 | + return result; |
2380c486 JR |
4924 | +} |
4925 | + | |
4926 | +/** | |
4927 | + * toi_end_bio - bio completion function. | |
4928 | + * @bio: bio that has completed. | |
4929 | + * @err: Error value. Yes, like end_swap_bio_read, we ignore it. | |
4930 | + * | |
4931 | + * Function called by the block driver from interrupt context when I/O is | |
4932 | + * completed. If we were writing the page, we want to free it and will have | |
4933 | + * set bio->bi_private to the parameter we should use in telling the page | |
4934 | + * allocation accounting code what the page was allocated for. If we're | |
4935 | + * reading the page, it will be in the singly linked list made from | |
4936 | + * page->private pointers. | |
4937 | + **/ | |
4938 | +static void toi_end_bio(struct bio *bio, int err) | |
4939 | +{ | |
4940 | + struct page *page = bio->bi_io_vec[0].bv_page; | |
4941 | + | |
4942 | + BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags)); | |
4943 | + | |
4944 | + unlock_page(page); | |
4945 | + bio_put(bio); | |
4946 | + | |
4947 | + if (waiting_on == page) | |
4948 | + waiting_on = NULL; | |
4949 | + | |
4950 | + put_page(page); | |
4951 | + | |
4952 | + if (bio->bi_private) | |
4953 | + toi__free_page((int) ((unsigned long) bio->bi_private) , page); | |
4954 | + | |
4955 | + bio_put(bio); | |
4956 | + | |
4957 | + atomic_dec(&toi_io_in_progress); | |
4958 | + atomic_inc(&toi_io_done); | |
4959 | + | |
4960 | + wake_up(&num_in_progress_wait); | |
4961 | +} | |
4962 | + | |
4963 | +/** | |
4964 | + * submit - submit BIO request | |
4965 | + * @writing: READ or WRITE. | |
4966 | + * @dev: The block device we're using. | |
4967 | + * @first_block: The first sector we're using. | |
4968 | + * @page: The page being used for I/O. | |
4969 | + * @free_group: If writing, the group that was used in allocating the page | |
4970 | + * and which will be used in freeing the page from the completion | |
4971 | + * routine. | |
4972 | + * | |
4973 | + * Based on Patrick Mochell's pmdisk code from long ago: "Straight from the | |
4974 | + * textbook - allocate and initialize the bio. If we're writing, make sure | |
4975 | + * the page is marked as dirty. Then submit it and carry on." | |
4976 | + * | |
4977 | + * If we're just testing the speed of our own code, we fake having done all | |
4978 | + * the hard work and all toi_end_bio immediately. | |
4979 | + **/ | |
4980 | +static int submit(int writing, struct block_device *dev, sector_t first_block, | |
4981 | + struct page *page, int free_group) | |
4982 | +{ | |
4983 | + struct bio *bio = NULL; | |
4984 | + int cur_outstanding_io, result; | |
4985 | + | |
4986 | + /* | |
4987 | + * Shouldn't throttle if reading - can deadlock in the single | |
4988 | + * threaded case as pages are only freed when we use the | |
4989 | + * readahead. | |
4990 | + */ | |
4991 | + if (writing) { | |
4992 | + result = throttle_if_needed(MEMORY_ONLY | THROTTLE_WAIT); | |
4993 | + if (result) | |
4994 | + return result; | |
4995 | + } | |
4996 | + | |
4997 | + while (!bio) { | |
4998 | + bio = bio_alloc(TOI_ATOMIC_GFP, 1); | |
4999 | + if (!bio) { | |
5000 | + set_free_mem_throttle(); | |
5001 | + do_bio_wait(1); | |
5002 | + } | |
5003 | + } | |
5004 | + | |
5005 | + bio->bi_bdev = dev; | |
5006 | + bio->bi_sector = first_block; | |
5007 | + bio->bi_private = (void *) ((unsigned long) free_group); | |
5008 | + bio->bi_end_io = toi_end_bio; | |
5009 | + | |
5010 | + if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { | |
0ada99ac | 5011 | + printk(KERN_DEBUG "ERROR: adding page to bio at %lld\n", |
2380c486 JR |
5012 | + (unsigned long long) first_block); |
5013 | + bio_put(bio); | |
5014 | + return -EFAULT; | |
5015 | + } | |
5016 | + | |
5017 | + bio_get(bio); | |
5018 | + | |
5019 | + cur_outstanding_io = atomic_add_return(1, &toi_io_in_progress); | |
5020 | + if (writing) { | |
5021 | + if (cur_outstanding_io > max_outstanding_writes) | |
5022 | + max_outstanding_writes = cur_outstanding_io; | |
5023 | + } else { | |
5024 | + if (cur_outstanding_io > max_outstanding_reads) | |
5025 | + max_outstanding_reads = cur_outstanding_io; | |
5026 | + } | |
5027 | + | |
5028 | + | |
5029 | + if (unlikely(test_action_state(TOI_TEST_FILTER_SPEED))) { | |
5030 | + /* Fake having done the hard work */ | |
5031 | + set_bit(BIO_UPTODATE, &bio->bi_flags); | |
5032 | + toi_end_bio(bio, 0); | |
5033 | + } else | |
5034 | + submit_bio(writing | (1 << BIO_RW_SYNCIO) | | |
5035 | + (1 << BIO_RW_UNPLUG), bio); | |
5036 | + | |
5037 | + return 0; | |
5038 | +} | |
5039 | + | |
5040 | +/** | |
5041 | + * toi_do_io: Prepare to do some i/o on a page and submit or batch it. | |
5042 | + * | |
5043 | + * @writing: Whether reading or writing. | |
5044 | + * @bdev: The block device which we're using. | |
5045 | + * @block0: The first sector we're reading or writing. | |
5046 | + * @page: The page on which I/O is being done. | |
5047 | + * @readahead_index: If doing readahead, the index (reset this flag when done). | |
5048 | + * @syncio: Whether the i/o is being done synchronously. | |
5049 | + * | |
5050 | + * Prepare and start a read or write operation. | |
5051 | + * | |
5052 | + * Note that we always work with our own page. If writing, we might be given a | |
5053 | + * compression buffer that will immediately be used to start compressing the | |
5054 | + * next page. For reading, we do readahead and therefore don't know the final | |
5055 | + * address where the data needs to go. | |
5056 | + **/ | |
5057 | +static int toi_do_io(int writing, struct block_device *bdev, long block0, | |
5058 | + struct page *page, int is_readahead, int syncio, int free_group) | |
5059 | +{ | |
5060 | + page->private = 0; | |
5061 | + | |
5062 | + /* Do here so we don't race against toi_bio_get_next_page_read */ | |
5063 | + lock_page(page); | |
5064 | + | |
5065 | + if (is_readahead) { | |
5066 | + if (readahead_list_head) | |
5067 | + readahead_list_tail->private = (unsigned long) page; | |
5068 | + else | |
5069 | + readahead_list_head = page; | |
5070 | + | |
5071 | + readahead_list_tail = page; | |
5072 | + wake_up(&readahead_list_wait); | |
5073 | + } | |
5074 | + | |
5075 | + /* Done before submitting to avoid races. */ | |
5076 | + if (syncio) | |
5077 | + waiting_on = page; | |
5078 | + | |
5079 | + /* Submit the page */ | |
5080 | + get_page(page); | |
5081 | + | |
5082 | + if (submit(writing, bdev, block0, page, free_group)) | |
5083 | + return -EFAULT; | |
5084 | + | |
5085 | + if (syncio) | |
5086 | + do_bio_wait(2); | |
5087 | + | |
5088 | + return 0; | |
5089 | +} | |
5090 | + | |
5091 | +/** | |
5092 | + * toi_bdev_page_io - simpler interface to do directly i/o on a single page | |
5093 | + * @writing: Whether reading or writing. | |
5094 | + * @bdev: Block device on which we're operating. | |
5095 | + * @pos: Sector at which page to read or write starts. | |
5096 | + * @page: Page to be read/written. | |
5097 | + * | |
5098 | + * A simple interface to submit a page of I/O and wait for its completion. | |
5099 | + * The caller must free the page used. | |
5100 | + **/ | |
5101 | +static int toi_bdev_page_io(int writing, struct block_device *bdev, | |
5102 | + long pos, struct page *page) | |
5103 | +{ | |
5104 | + return toi_do_io(writing, bdev, pos, page, 0, 1, 0); | |
5105 | +} | |
5106 | + | |
5107 | +/** | |
5108 | + * toi_bio_memory_needed - report the amount of memory needed for block i/o | |
5109 | + * | |
5110 | + * We want to have at least enough memory so as to have target_outstanding_io | |
5111 | + * or more transactions on the fly at once. If we can do more, fine. | |
5112 | + **/ | |
5113 | +static int toi_bio_memory_needed(void) | |
5114 | +{ | |
5115 | + return target_outstanding_io * (PAGE_SIZE + sizeof(struct request) + | |
5116 | + sizeof(struct bio)); | |
5117 | +} | |
5118 | + | |
5119 | +/** | |
5120 | + * toi_bio_print_debug_stats - put out debugging info in the buffer provided | |
5121 | + * @buffer: A buffer of size @size into which text should be placed. | |
5122 | + * @size: The size of @buffer. | |
5123 | + * | |
5124 | + * Fill a buffer with debugging info. This is used for both our debug_info sysfs | |
5125 | + * entry and for recording the same info in dmesg. | |
5126 | + **/ | |
5127 | +static int toi_bio_print_debug_stats(char *buffer, int size) | |
5128 | +{ | |
5129 | + int len = scnprintf(buffer, size, "- Max outstanding reads %d. Max " | |
5130 | + "writes %d.\n", max_outstanding_reads, | |
5131 | + max_outstanding_writes); | |
5132 | + | |
5133 | + len += scnprintf(buffer + len, size - len, | |
5134 | + " Memory_needed: %d x (%lu + %u + %u) = %d bytes.\n", | |
5135 | + target_outstanding_io, | |
5136 | + PAGE_SIZE, (unsigned int) sizeof(struct request), | |
5137 | + (unsigned int) sizeof(struct bio), toi_bio_memory_needed()); | |
5138 | + | |
5139 | +#ifdef MEASURE_MUTEX_CONTENTION | |
5140 | + { | |
5141 | + int i; | |
5142 | + | |
5143 | + len += scnprintf(buffer + len, size - len, | |
5144 | + " Mutex contention while reading:\n Contended Free\n"); | |
5145 | + | |
5146 | + for_each_online_cpu(i) | |
5147 | + len += scnprintf(buffer + len, size - len, | |
5148 | + " %9lu %9lu\n", | |
5149 | + mutex_times[0][0][i], mutex_times[0][1][i]); | |
5150 | + | |
5151 | + len += scnprintf(buffer + len, size - len, | |
5152 | + " Mutex contention while writing:\n Contended Free\n"); | |
5153 | + | |
5154 | + for_each_online_cpu(i) | |
5155 | + len += scnprintf(buffer + len, size - len, | |
5156 | + " %9lu %9lu\n", | |
5157 | + mutex_times[1][0][i], mutex_times[1][1][i]); | |
5158 | + | |
5159 | + } | |
5160 | +#endif | |
5161 | + | |
5162 | + return len + scnprintf(buffer + len, size - len, | |
5163 | + " Free mem throttle point reached %d.\n", free_mem_throttle); | |
5164 | +} | |
5165 | + | |
5166 | +/** | |
5167 | + * toi_set_devinfo - set the bdev info used for i/o | |
5168 | + * @info: Pointer to an array of struct toi_bdev_info - the list of | |
5169 | + * bdevs and blocks on them in which the image is stored. | |
5170 | + * | |
5171 | + * Set the list of bdevs and blocks in which the image will be stored. | |
5172 | + * Think of them (all together) as one long tape on which the data will be | |
5173 | + * stored. | |
5174 | + **/ | |
5175 | +static void toi_set_devinfo(struct toi_bdev_info *info) | |
5176 | +{ | |
5177 | + toi_devinfo = info; | |
5178 | +} | |
5179 | + | |
5180 | +/** | |
5181 | + * dump_block_chains - print the contents of the bdev info array. | |
5182 | + **/ | |
5183 | +static void dump_block_chains(void) | |
5184 | +{ | |
5185 | + int i; | |
5186 | + | |
5187 | + for (i = 0; i < toi_writer_posn.num_chains; i++) { | |
5188 | + struct hibernate_extent *this; | |
5189 | + | |
5190 | + this = (toi_writer_posn.chains + i)->first; | |
5191 | + | |
5192 | + if (!this) | |
5193 | + continue; | |
5194 | + | |
0ada99ac | 5195 | + printk(KERN_DEBUG "Chain %d:", i); |
2380c486 JR |
5196 | + |
5197 | + while (this) { | |
5198 | + printk(" [%lu-%lu]%s", this->start, | |
5199 | + this->end, this->next ? "," : ""); | |
5200 | + this = this->next; | |
5201 | + } | |
5202 | + | |
5203 | + printk("\n"); | |
5204 | + } | |
5205 | + | |
5206 | + for (i = 0; i < 4; i++) | |
0ada99ac | 5207 | + printk(KERN_DEBUG "Posn %d: Chain %d, extent %d, offset %lu.\n", |
2380c486 JR |
5208 | + i, toi_writer_posn_save[i].chain_num, |
5209 | + toi_writer_posn_save[i].extent_num, | |
5210 | + toi_writer_posn_save[i].offset); | |
5211 | +} | |
5212 | + | |
0ada99ac | 5213 | +static int total_header_bytes; |
5214 | +static int unowned; | |
5215 | + | |
5216 | +static int debug_broken_header(void) | |
5217 | +{ | |
5218 | + printk(KERN_DEBUG "Image header too big for size allocated!\n"); | |
5219 | + print_toi_header_storage_for_modules(); | |
5220 | + printk(KERN_DEBUG "Page flags : %d.\n", toi_pageflags_space_needed()); | |
5221 | + printk(KERN_DEBUG "toi_header : %ld.\n", sizeof(struct toi_header)); | |
5222 | + printk(KERN_DEBUG "Total unowned : %d.\n", unowned); | |
5223 | + printk(KERN_DEBUG "Total used : %d (%ld pages).\n", total_header_bytes, | |
5224 | + DIV_ROUND_UP(total_header_bytes, PAGE_SIZE)); | |
9474138d AM |
5225 | + printk(KERN_DEBUG "Space needed now : %ld.\n", |
5226 | + get_header_storage_needed()); | |
0ada99ac | 5227 | + dump_block_chains(); |
5228 | + abort_hibernate(TOI_HEADER_TOO_BIG, "Header reservation too small."); | |
5229 | + return -EIO; | |
5230 | +} | |
5231 | + | |
2380c486 JR |
5232 | +/** |
5233 | + * go_next_page - skip blocks to the start of the next page | |
5234 | + * @writing: Whether we're reading or writing the image. | |
5235 | + * | |
5236 | + * Go forward one page, or two if extra_page_forward is set. It only gets | |
5237 | + * set at the start of reading the image header, to skip the first page | |
5238 | + * of the header, which is read without using the extent chains. | |
5239 | + **/ | |
0ada99ac | 5240 | +static int go_next_page(int writing, int section_barrier) |
2380c486 | 5241 | +{ |
9474138d AM |
5242 | + int i, chain_num = toi_writer_posn.current_chain, |
5243 | + max = (chain_num == -1) ? 1 : toi_devinfo[chain_num].blocks_per_page, | |
5244 | + compare_to = 0, compare_chain, compare_offset; | |
e999739a | 5245 | + |
5246 | + /* Have we already used the last page of the stream? */ | |
5247 | + switch (current_stream) { | |
5248 | + case 0: | |
5249 | + compare_to = 2; | |
5250 | + break; | |
5251 | + case 1: | |
5252 | + compare_to = 3; | |
5253 | + break; | |
5254 | + case 2: | |
5255 | + compare_to = 1; | |
5256 | + break; | |
5257 | + } | |
2380c486 | 5258 | + |
9474138d AM |
5259 | + compare_chain = toi_writer_posn_save[compare_to].chain_num; |
5260 | + compare_offset = toi_writer_posn_save[compare_to].offset; | |
5261 | + | |
5262 | + if (section_barrier && chain_num == compare_chain && | |
5263 | + toi_writer_posn.current_offset == compare_offset) { | |
0ada99ac | 5264 | + if (writing) { |
9474138d AM |
5265 | + if (!current_stream) |
5266 | + return debug_broken_header(); | |
0ada99ac | 5267 | + } else { |
e999739a | 5268 | + more_readahead = 0; |
5269 | + return -ENODATA; | |
5270 | + } | |
5271 | + } | |
5272 | + | |
5273 | + /* Nope. Go foward a page - or maybe two */ | |
2380c486 JR |
5274 | + for (i = 0; i < max; i++) |
5275 | + toi_extent_state_next(&toi_writer_posn); | |
5276 | + | |
5277 | + if (toi_extent_state_eof(&toi_writer_posn)) { | |
5278 | + /* Don't complain if readahead falls off the end */ | |
0ada99ac | 5279 | + if (writing && section_barrier) { |
5280 | + printk(KERN_DEBUG "Extent state eof. " | |
2380c486 JR |
5281 | + "Expected compression ratio too optimistic?\n"); |
5282 | + dump_block_chains(); | |
5283 | + } | |
5284 | + return -ENODATA; | |
5285 | + } | |
5286 | + | |
5287 | + if (extra_page_forward) { | |
5288 | + extra_page_forward = 0; | |
0ada99ac | 5289 | + return go_next_page(writing, section_barrier); |
2380c486 JR |
5290 | + } |
5291 | + | |
5292 | + return 0; | |
5293 | +} | |
5294 | + | |
5295 | +/** | |
5296 | + * set_extra_page_forward - make us skip an extra page on next go_next_page | |
5297 | + * | |
5298 | + * Used in reading header, to jump to 2nd page after getting 1st page | |
5299 | + * direct from image header. | |
5300 | + **/ | |
5301 | +static void set_extra_page_forward(void) | |
5302 | +{ | |
5303 | + extra_page_forward = 1; | |
5304 | +} | |
5305 | + | |
5306 | +/** | |
5307 | + * toi_bio_rw_page - do i/o on the next disk page in the image | |
5308 | + * @writing: Whether reading or writing. | |
5309 | + * @page: Page to do i/o on. | |
5310 | + * @is_readahead: Whether we're doing readahead | |
5311 | + * @free_group: The group used in allocating the page | |
5312 | + * | |
5313 | + * Submit a page for reading or writing, possibly readahead. | |
5314 | + * Pass the group used in allocating the page as well, as it should | |
5315 | + * be freed on completion of the bio if we're writing the page. | |
5316 | + **/ | |
5317 | +static int toi_bio_rw_page(int writing, struct page *page, | |
5318 | + int is_readahead, int free_group) | |
5319 | +{ | |
5320 | + struct toi_bdev_info *dev_info; | |
0ada99ac | 5321 | + int result = go_next_page(writing, 1); |
2380c486 | 5322 | + |
0ada99ac | 5323 | + if (result) |
5324 | + return result; | |
2380c486 JR |
5325 | + |
5326 | + dev_info = &toi_devinfo[toi_writer_posn.current_chain]; | |
5327 | + | |
e999739a | 5328 | + return toi_do_io(writing, dev_info->bdev, |
2380c486 JR |
5329 | + toi_writer_posn.current_offset << |
5330 | + dev_info->bmap_shift, | |
5331 | + page, is_readahead, 0, free_group); | |
2380c486 JR |
5332 | +} |
5333 | + | |
5334 | +/** | |
5335 | + * toi_rw_init - prepare to read or write a stream in the image | |
5336 | + * @writing: Whether reading or writing. | |
5337 | + * @stream number: Section of the image being processed. | |
5338 | + * | |
5339 | + * Prepare to read or write a section ('stream') in the image. | |
5340 | + **/ | |
5341 | +static int toi_rw_init(int writing, int stream_number) | |
5342 | +{ | |
5343 | + if (stream_number) | |
5344 | + toi_extent_state_restore(&toi_writer_posn, | |
5345 | + &toi_writer_posn_save[stream_number]); | |
5346 | + else | |
5347 | + toi_extent_state_goto_start(&toi_writer_posn); | |
5348 | + | |
5349 | + atomic_set(&toi_io_done, 0); | |
5350 | + toi_writer_buffer = (char *) toi_get_zeroed_page(11, TOI_ATOMIC_GFP); | |
5351 | + toi_writer_buffer_posn = writing ? 0 : PAGE_SIZE; | |
5352 | + | |
5353 | + current_stream = stream_number; | |
5354 | + | |
5355 | + more_readahead = 1; | |
5356 | + | |
5357 | + return toi_writer_buffer ? 0 : -ENOMEM; | |
5358 | +} | |
5359 | + | |
5360 | +/** | |
5361 | + * toi_read_header_init - prepare to read the image header | |
5362 | + * | |
5363 | + * Reset readahead indices prior to starting to read a section of the image. | |
5364 | + **/ | |
5365 | +static void toi_read_header_init(void) | |
5366 | +{ | |
5367 | + toi_writer_buffer = (char *) toi_get_zeroed_page(11, TOI_ATOMIC_GFP); | |
5368 | + more_readahead = 1; | |
5369 | +} | |
5370 | + | |
5371 | +/** | |
5372 | + * toi_bio_queue_write - queue a page for writing | |
5373 | + * @full_buffer: Pointer to a page to be queued | |
5374 | + * | |
5375 | + * Add a page to the queue to be submitted. If we're the queue flusher, | |
5376 | + * we'll do this once we've dropped toi_bio_mutex, so other threads can | |
5377 | + * continue to submit I/O while we're on the slow path doing the actual | |
5378 | + * submission. | |
5379 | + **/ | |
5380 | +static void toi_bio_queue_write(char **full_buffer) | |
5381 | +{ | |
5382 | + struct page *page = virt_to_page(*full_buffer); | |
5383 | + unsigned long flags; | |
5384 | + | |
5385 | + page->private = 0; | |
5386 | + | |
5387 | + spin_lock_irqsave(&bio_queue_lock, flags); | |
5388 | + if (!bio_queue_head) | |
5389 | + bio_queue_head = page; | |
5390 | + else | |
5391 | + bio_queue_tail->private = (unsigned long) page; | |
5392 | + | |
5393 | + bio_queue_tail = page; | |
5394 | + atomic_inc(&toi_bio_queue_size); | |
5395 | + | |
5396 | + spin_unlock_irqrestore(&bio_queue_lock, flags); | |
5397 | + wake_up(&toi_io_queue_flusher); | |
5398 | + | |
5399 | + *full_buffer = NULL; | |
5400 | +} | |
5401 | + | |
5402 | +/** | |
5403 | + * toi_rw_cleanup - Cleanup after i/o. | |
5404 | + * @writing: Whether we were reading or writing. | |
5405 | + * | |
5406 | + * Flush all I/O and clean everything up after reading or writing a | |
5407 | + * section of the image. | |
5408 | + **/ | |
5409 | +static int toi_rw_cleanup(int writing) | |
5410 | +{ | |
0ada99ac | 5411 | + int i, result; |
2380c486 JR |
5412 | + |
5413 | + if (writing) { | |
5414 | + int result; | |
5415 | + | |
5416 | + if (toi_writer_buffer_posn && !test_result_state(TOI_ABORTED)) | |
5417 | + toi_bio_queue_write(&toi_writer_buffer); | |
5418 | + | |
5419 | + result = toi_bio_queue_flush_pages(0); | |
5420 | + | |
5421 | + if (result) | |
5422 | + return result; | |
5423 | + | |
5424 | + if (current_stream == 2) | |
5425 | + toi_extent_state_save(&toi_writer_posn, | |
5426 | + &toi_writer_posn_save[1]); | |
5427 | + else if (current_stream == 1) | |
5428 | + toi_extent_state_save(&toi_writer_posn, | |
5429 | + &toi_writer_posn_save[3]); | |
5430 | + } | |
5431 | + | |
0ada99ac | 5432 | + result = toi_finish_all_io(); |
2380c486 JR |
5433 | + |
5434 | + while (readahead_list_head) { | |
5435 | + void *next = (void *) readahead_list_head->private; | |
5436 | + toi__free_page(12, readahead_list_head); | |
5437 | + readahead_list_head = next; | |
5438 | + } | |
5439 | + | |
5440 | + readahead_list_tail = NULL; | |
5441 | + | |
5442 | + if (!current_stream) | |
0ada99ac | 5443 | + return result; |
2380c486 JR |
5444 | + |
5445 | + for (i = 0; i < NUM_REASONS; i++) { | |
5446 | + if (!atomic_read(&reasons[i])) | |
5447 | + continue; | |
0ada99ac | 5448 | + printk(KERN_DEBUG "Waited for i/o due to %s %d times.\n", |
2380c486 JR |
5449 | + reason_name[i], atomic_read(&reasons[i])); |
5450 | + atomic_set(&reasons[i], 0); | |
5451 | + } | |
5452 | + | |
5453 | + current_stream = 0; | |
0ada99ac | 5454 | + return result; |
2380c486 JR |
5455 | +} |
5456 | + | |
5457 | +/** | |
5458 | + * toi_start_one_readahead - start one page of readahead | |
5459 | + * @dedicated_thread: Is this a thread dedicated to doing readahead? | |
5460 | + * | |
5461 | + * Start one new page of readahead. If this is being called by a thread | |
5462 | + * whose only just is to submit readahead, don't quit because we failed | |
5463 | + * to allocate a page. | |
5464 | + **/ | |
5465 | +static int toi_start_one_readahead(int dedicated_thread) | |
5466 | +{ | |
5467 | + char *buffer = NULL; | |
5468 | + int oom = 0, result; | |
5469 | + | |
5470 | + result = throttle_if_needed(dedicated_thread ? THROTTLE_WAIT : 0); | |
5471 | + if (result) | |
5472 | + return result; | |
5473 | + | |
5474 | + mutex_lock(&toi_bio_readahead_mutex); | |
5475 | + | |
5476 | + while (!buffer) { | |
5477 | + buffer = (char *) toi_get_zeroed_page(12, | |
5478 | + TOI_ATOMIC_GFP); | |
5479 | + if (!buffer) { | |
5480 | + if (oom && !dedicated_thread) { | |
5481 | + mutex_unlock(&toi_bio_readahead_mutex); | |
5482 | + return -ENOMEM; | |
5483 | + } | |
5484 | + | |
5485 | + oom = 1; | |
5486 | + set_free_mem_throttle(); | |
5487 | + do_bio_wait(5); | |
5488 | + } | |
5489 | + } | |
5490 | + | |
5491 | + result = toi_bio_rw_page(READ, virt_to_page(buffer), 1, 0); | |
5492 | + mutex_unlock(&toi_bio_readahead_mutex); | |
5493 | + return result; | |
5494 | +} | |
5495 | + | |
5496 | +/** | |
5497 | + * toi_start_new_readahead - start new readahead | |
5498 | + * @dedicated_thread: Are we dedicated to this task? | |
5499 | + * | |
5500 | + * Start readahead of image pages. | |
5501 | + * | |
5502 | + * We can be called as a thread dedicated to this task (may be helpful on | |
5503 | + * systems with lots of CPUs), in which case we don't exit until there's no | |
5504 | + * more readahead. | |
5505 | + * | |
5506 | + * If this is not called by a dedicated thread, we top up our queue until | |
5507 | + * there's no more readahead to submit, we've submitted the number given | |
5508 | + * in target_outstanding_io or the number in progress exceeds the target | |
5509 | + * outstanding I/O value. | |
5510 | + * | |
5511 | + * No mutex needed because this is only ever called by the first cpu. | |
5512 | + **/ | |
5513 | +static int toi_start_new_readahead(int dedicated_thread) | |
5514 | +{ | |
5515 | + int last_result, num_submitted = 0; | |
5516 | + | |
5517 | + /* Start a new readahead? */ | |
5518 | + if (!more_readahead) | |
5519 | + return 0; | |
5520 | + | |
5521 | + do { | |
5522 | + last_result = toi_start_one_readahead(dedicated_thread); | |
5523 | + | |
5524 | + if (last_result) { | |
e999739a | 5525 | + if (last_result == -ENOMEM || last_result == -ENODATA) |
2380c486 JR |
5526 | + return 0; |
5527 | + | |
0ada99ac | 5528 | + printk(KERN_DEBUG |
e999739a | 5529 | + "Begin read chunk returned %d.\n", |
5530 | + last_result); | |
2380c486 JR |
5531 | + } else |
5532 | + num_submitted++; | |
5533 | + | |
5534 | + } while (more_readahead && !last_result && | |
5535 | + (dedicated_thread || | |
5536 | + (num_submitted < target_outstanding_io && | |
5537 | + atomic_read(&toi_io_in_progress) < target_outstanding_io))); | |
5538 | + | |
5539 | + return last_result; | |
5540 | +} | |
5541 | + | |
5542 | +/** | |
5543 | + * bio_io_flusher - start the dedicated I/O flushing routine | |
5544 | + * @writing: Whether we're writing the image. | |
5545 | + **/ | |
0ada99ac | 5546 | +static int bio_io_flusher(int writing) |
2380c486 JR |
5547 | +{ |
5548 | + | |
5549 | + if (writing) | |
0ada99ac | 5550 | + return toi_bio_queue_flush_pages(1); |
2380c486 | 5551 | + else |
0ada99ac | 5552 | + return toi_start_new_readahead(1); |
2380c486 JR |
5553 | +} |
5554 | + | |
5555 | +/** | |
5556 | + * toi_bio_get_next_page_read - read a disk page, perhaps with readahead | |
5557 | + * @no_readahead: Whether we can use readahead | |
5558 | + * | |
5559 | + * Read a page from disk, submitting readahead and cleaning up finished i/o | |
5560 | + * while we wait for the page we're after. | |
5561 | + **/ | |
5562 | +static int toi_bio_get_next_page_read(int no_readahead) | |
5563 | +{ | |
5564 | + unsigned long *virt; | |
5565 | + struct page *next; | |
5566 | + | |
5567 | + /* | |
5568 | + * When reading the second page of the header, we have to | |
5569 | + * delay submitting the read until after we've gotten the | |
5570 | + * extents out of the first page. | |
5571 | + */ | |
5572 | + if (unlikely(no_readahead && toi_start_one_readahead(0))) { | |
0ada99ac | 5573 | + printk(KERN_DEBUG "No readahead and toi_start_one_readahead " |
2380c486 JR |
5574 | + "returned non-zero.\n"); |
5575 | + return -EIO; | |
5576 | + } | |
5577 | + | |
5578 | + if (unlikely(!readahead_list_head)) { | |
5579 | + BUG_ON(!more_readahead); | |
5580 | + if (unlikely(toi_start_one_readahead(0))) { | |
0ada99ac | 5581 | + printk(KERN_DEBUG "No readahead and " |
2380c486 JR |
5582 | + "toi_start_one_readahead returned non-zero.\n"); |
5583 | + return -EIO; | |
5584 | + } | |
5585 | + } | |
5586 | + | |
5587 | + if (PageLocked(readahead_list_head)) { | |
5588 | + waiting_on = readahead_list_head; | |
5589 | + do_bio_wait(0); | |
5590 | + } | |
5591 | + | |
5592 | + virt = page_address(readahead_list_head); | |
5593 | + memcpy(toi_writer_buffer, virt, PAGE_SIZE); | |
5594 | + | |
5595 | + next = (struct page *) readahead_list_head->private; | |
5596 | + toi__free_page(12, readahead_list_head); | |
5597 | + readahead_list_head = next; | |
5598 | + return 0; | |
5599 | +} | |
5600 | + | |
5601 | +/** | |
5602 | + * toi_bio_queue_flush_pages - flush the queue of pages queued for writing | |
5603 | + * @dedicated_thread: Whether we're a dedicated thread | |
5604 | + * | |
5605 | + * Flush the queue of pages ready to be written to disk. | |
5606 | + * | |
5607 | + * If we're a dedicated thread, stay in here until told to leave, | |
5608 | + * sleeping in wait_event. | |
5609 | + * | |
5610 | + * The first thread is normally the only one to come in here. Another | |
5611 | + * thread can enter this routine too, though, via throttle_if_needed. | |
5612 | + * Since that's the case, we must be careful to only have one thread | |
5613 | + * doing this work at a time. Otherwise we have a race and could save | |
5614 | + * pages out of order. | |
0ada99ac | 5615 | + * |
5616 | + * If an error occurs, free all remaining pages without submitting them | |
5617 | + * for I/O. | |
2380c486 JR |
5618 | + **/ |
5619 | + | |
5620 | +int toi_bio_queue_flush_pages(int dedicated_thread) | |
5621 | +{ | |
5622 | + unsigned long flags; | |
5623 | + int result = 0; | |
e999739a | 5624 | + static int busy; |
2380c486 JR |
5625 | + |
5626 | + if (busy) | |
5627 | + return 0; | |
5628 | + | |
5629 | + busy = 1; | |
5630 | + | |
5631 | +top: | |
5632 | + spin_lock_irqsave(&bio_queue_lock, flags); | |
5633 | + while (bio_queue_head) { | |
5634 | + struct page *page = bio_queue_head; | |
5635 | + bio_queue_head = (struct page *) page->private; | |
5636 | + if (bio_queue_tail == page) | |
5637 | + bio_queue_tail = NULL; | |
5638 | + atomic_dec(&toi_bio_queue_size); | |
5639 | + spin_unlock_irqrestore(&bio_queue_lock, flags); | |
0ada99ac | 5640 | + if (!result) |
5641 | + result = toi_bio_rw_page(WRITE, page, 0, 11); | |
2380c486 | 5642 | + if (result) |
0ada99ac | 5643 | + toi__free_page(11 , page); |
2380c486 JR |
5644 | + spin_lock_irqsave(&bio_queue_lock, flags); |
5645 | + } | |
5646 | + spin_unlock_irqrestore(&bio_queue_lock, flags); | |
5647 | + | |
5648 | + if (dedicated_thread) { | |
5649 | + wait_event(toi_io_queue_flusher, bio_queue_head || | |
5650 | + toi_bio_queue_flusher_should_finish); | |
5651 | + if (likely(!toi_bio_queue_flusher_should_finish)) | |
5652 | + goto top; | |
5653 | + toi_bio_queue_flusher_should_finish = 0; | |
5654 | + } | |
5655 | + | |
2380c486 JR |
5656 | + busy = 0; |
5657 | + return result; | |
5658 | +} | |
5659 | + | |
5660 | +/** | |
5661 | + * toi_bio_get_new_page - get a new page for I/O | |
5662 | + * @full_buffer: Pointer to a page to allocate. | |
5663 | + **/ | |
5664 | +static int toi_bio_get_new_page(char **full_buffer) | |
5665 | +{ | |
5666 | + int result = throttle_if_needed(THROTTLE_WAIT); | |
5667 | + if (result) | |
5668 | + return result; | |
5669 | + | |
5670 | + while (!*full_buffer) { | |
5671 | + *full_buffer = (char *) toi_get_zeroed_page(11, TOI_ATOMIC_GFP); | |
5672 | + if (!*full_buffer) { | |
5673 | + set_free_mem_throttle(); | |
5674 | + do_bio_wait(3); | |
5675 | + } | |
5676 | + } | |
5677 | + | |
5678 | + return 0; | |
5679 | +} | |
5680 | + | |
5681 | +/** | |
5682 | + * toi_rw_buffer - combine smaller buffers into PAGE_SIZE I/O | |
5683 | + * @writing: Bool - whether writing (or reading). | |
5684 | + * @buffer: The start of the buffer to write or fill. | |
5685 | + * @buffer_size: The size of the buffer to write or fill. | |
5686 | + * @no_readahead: Don't try to start readhead (when getting extents). | |
5687 | + **/ | |
5688 | +static int toi_rw_buffer(int writing, char *buffer, int buffer_size, | |
5689 | + int no_readahead) | |
5690 | +{ | |
5691 | + int bytes_left = buffer_size, result = 0; | |
5692 | + | |
5693 | + while (bytes_left) { | |
5694 | + char *source_start = buffer + buffer_size - bytes_left; | |
5695 | + char *dest_start = toi_writer_buffer + toi_writer_buffer_posn; | |
5696 | + int capacity = PAGE_SIZE - toi_writer_buffer_posn; | |
5697 | + char *to = writing ? dest_start : source_start; | |
5698 | + char *from = writing ? source_start : dest_start; | |
5699 | + | |
5700 | + if (bytes_left <= capacity) { | |
5701 | + memcpy(to, from, bytes_left); | |
5702 | + toi_writer_buffer_posn += bytes_left; | |
5703 | + return 0; | |
5704 | + } | |
5705 | + | |
5706 | + /* Complete this page and start a new one */ | |
5707 | + memcpy(to, from, capacity); | |
5708 | + bytes_left -= capacity; | |
5709 | + | |
5710 | + if (!writing) { | |
5711 | + /* | |
5712 | + * Perform actual I/O: | |
5713 | + * read readahead_list_head into toi_writer_buffer | |
5714 | + */ | |
5715 | + int result = toi_bio_get_next_page_read(no_readahead); | |
5716 | + if (result) | |
5717 | + return result; | |
5718 | + } else { | |
5719 | + toi_bio_queue_write(&toi_writer_buffer); | |
5720 | + result = toi_bio_get_new_page(&toi_writer_buffer); | |
5721 | + if (result) | |
5722 | + return result; | |
5723 | + } | |
5724 | + | |
5725 | + toi_writer_buffer_posn = 0; | |
5726 | + toi_cond_pause(0, NULL); | |
5727 | + } | |
5728 | + | |
5729 | + return 0; | |
5730 | +} | |
5731 | + | |
5732 | +/** | |
5733 | + * toi_bio_read_page - read a page of the image | |
5734 | + * @pfn: The pfn where the data belongs. | |
5735 | + * @buffer_page: The page containing the (possibly compressed) data. | |
5736 | + * @buf_size: The number of bytes on @buffer_page used (PAGE_SIZE). | |
5737 | + * | |
5738 | + * Read a (possibly compressed) page from the image, into buffer_page, | |
5739 | + * returning its pfn and the buffer size. | |
5740 | + **/ | |
5741 | +static int toi_bio_read_page(unsigned long *pfn, struct page *buffer_page, | |
5742 | + unsigned int *buf_size) | |
5743 | +{ | |
5744 | + int result = 0; | |
5745 | + char *buffer_virt = kmap(buffer_page); | |
5746 | + | |
5747 | + /* | |
5748 | + * Only call start_new_readahead if we don't have a dedicated thread | |
5749 | + * and we're the queue flusher. | |
5750 | + */ | |
5751 | + if (current == toi_queue_flusher) { | |
5752 | + int result2 = toi_start_new_readahead(0); | |
5753 | + if (result2) { | |
0ada99ac | 5754 | + printk(KERN_DEBUG "Queue flusher and " |
2380c486 JR |
5755 | + "toi_start_one_readahead returned non-zero.\n"); |
5756 | + result = -EIO; | |
5757 | + goto out; | |
5758 | + } | |
5759 | + } | |
5760 | + | |
5761 | + my_mutex_lock(0, &toi_bio_mutex); | |
5762 | + | |
5763 | + /* | |
5764 | + * Structure in the image: | |
5765 | + * [destination pfn|page size|page data] | |
5766 | + * buf_size is PAGE_SIZE | |
5767 | + */ | |
5768 | + if (toi_rw_buffer(READ, (char *) pfn, sizeof(unsigned long), 0) || | |
5769 | + toi_rw_buffer(READ, (char *) buf_size, sizeof(int), 0) || | |
5770 | + toi_rw_buffer(READ, buffer_virt, *buf_size, 0)) { | |
5771 | + abort_hibernate(TOI_FAILED_IO, "Read of data failed."); | |
5772 | + result = 1; | |
5773 | + } | |
5774 | + | |
5775 | + my_mutex_unlock(0, &toi_bio_mutex); | |
5776 | +out: | |
5777 | + kunmap(buffer_page); | |
5778 | + return result; | |
5779 | +} | |
5780 | + | |
5781 | +/** | |
5782 | + * toi_bio_write_page - write a page of the image | |
5783 | + * @pfn: The pfn where the data belongs. | |
5784 | + * @buffer_page: The page containing the (possibly compressed) data. | |
5785 | + * @buf_size: The number of bytes on @buffer_page used. | |
5786 | + * | |
5787 | + * Write a (possibly compressed) page to the image from the buffer, together | |
5788 | + * with it's index and buffer size. | |
5789 | + **/ | |
5790 | +static int toi_bio_write_page(unsigned long pfn, struct page *buffer_page, | |
5791 | + unsigned int buf_size) | |
5792 | +{ | |
5793 | + char *buffer_virt; | |
5794 | + int result = 0, result2 = 0; | |
5795 | + | |
5796 | + if (unlikely(test_action_state(TOI_TEST_FILTER_SPEED))) | |
5797 | + return 0; | |
5798 | + | |
5799 | + my_mutex_lock(1, &toi_bio_mutex); | |
5800 | + | |
5801 | + if (test_result_state(TOI_ABORTED)) { | |
5802 | + my_mutex_unlock(1, &toi_bio_mutex); | |
5803 | + return -EIO; | |
5804 | + } | |
5805 | + | |
5806 | + buffer_virt = kmap(buffer_page); | |
5807 | + | |
5808 | + /* | |
5809 | + * Structure in the image: | |
5810 | + * [destination pfn|page size|page data] | |
5811 | + * buf_size is PAGE_SIZE | |
5812 | + */ | |
5813 | + if (toi_rw_buffer(WRITE, (char *) &pfn, sizeof(unsigned long), 0) || | |
5814 | + toi_rw_buffer(WRITE, (char *) &buf_size, sizeof(int), 0) || | |
5815 | + toi_rw_buffer(WRITE, buffer_virt, buf_size, 0)) { | |
0ada99ac | 5816 | + printk(KERN_DEBUG "toi_rw_buffer returned non-zero to " |
2380c486 JR |
5817 | + "toi_bio_write_page.\n"); |
5818 | + result = -EIO; | |
5819 | + } | |
5820 | + | |
5821 | + kunmap(buffer_page); | |
5822 | + my_mutex_unlock(1, &toi_bio_mutex); | |
5823 | + | |
5824 | + if (current == toi_queue_flusher) | |
5825 | + result2 = toi_bio_queue_flush_pages(0); | |
5826 | + | |
5827 | + return result ? result : result2; | |
5828 | +} | |
5829 | + | |
5830 | +/** | |
5831 | + * _toi_rw_header_chunk - read or write a portion of the image header | |
5832 | + * @writing: Whether reading or writing. | |
5833 | + * @owner: The module for which we're writing. | |
5834 | + * Used for confirming that modules | |
5835 | + * don't use more header space than they asked for. | |
5836 | + * @buffer: Address of the data to write. | |
5837 | + * @buffer_size: Size of the data buffer. | |
5838 | + * @no_readahead: Don't try to start readhead (when getting extents). | |
5839 | + * | |
5840 | + * Perform PAGE_SIZE I/O. Start readahead if needed. | |
5841 | + **/ | |
5842 | +static int _toi_rw_header_chunk(int writing, struct toi_module_ops *owner, | |
5843 | + char *buffer, int buffer_size, int no_readahead) | |
5844 | +{ | |
5845 | + int result = 0; | |
5846 | + | |
5847 | + if (owner) { | |
5848 | + owner->header_used += buffer_size; | |
5849 | + toi_message(TOI_HEADER, TOI_LOW, 1, | |
0ada99ac | 5850 | + "Header: %s : %d bytes (%d/%d).\n", |
5851 | + owner->name, | |
2380c486 JR |
5852 | + buffer_size, owner->header_used, |
5853 | + owner->header_requested); | |
5854 | + if (owner->header_used > owner->header_requested) { | |
5855 | + printk(KERN_EMERG "TuxOnIce module %s is using more " | |
5856 | + "header space (%u) than it requested (%u).\n", | |
5857 | + owner->name, | |
5858 | + owner->header_used, | |
5859 | + owner->header_requested); | |
5860 | + return buffer_size; | |
5861 | + } | |
0ada99ac | 5862 | + } else { |
5863 | + unowned += buffer_size; | |
2380c486 | 5864 | + toi_message(TOI_HEADER, TOI_LOW, 1, |
0ada99ac | 5865 | + "Header: (No owner): %d bytes (%d total so far)\n", |
5866 | + buffer_size, unowned); | |
5867 | + } | |
2380c486 JR |
5868 | + |
5869 | + if (!writing && !no_readahead) | |
5870 | + result = toi_start_new_readahead(0); | |
5871 | + | |
5872 | + if (!result) | |
5873 | + result = toi_rw_buffer(writing, buffer, buffer_size, | |
5874 | + no_readahead); | |
5875 | + | |
0ada99ac | 5876 | + total_header_bytes += buffer_size; |
2380c486 JR |
5877 | + return result; |
5878 | +} | |
5879 | + | |
5880 | +static int toi_rw_header_chunk(int writing, struct toi_module_ops *owner, | |
5881 | + char *buffer, int size) | |
5882 | +{ | |
5883 | + return _toi_rw_header_chunk(writing, owner, buffer, size, 0); | |
5884 | +} | |
5885 | + | |
5886 | +static int toi_rw_header_chunk_noreadahead(int writing, | |
5887 | + struct toi_module_ops *owner, char *buffer, int size) | |
5888 | +{ | |
5889 | + return _toi_rw_header_chunk(writing, owner, buffer, size, 1); | |
5890 | +} | |
5891 | + | |
5892 | +/** | |
5893 | + * write_header_chunk_finish - flush any buffered header data | |
5894 | + **/ | |
5895 | +static int write_header_chunk_finish(void) | |
5896 | +{ | |
5897 | + int result = 0; | |
5898 | + | |
5899 | + if (toi_writer_buffer_posn) | |
5900 | + toi_bio_queue_write(&toi_writer_buffer); | |
5901 | + | |
0ada99ac | 5902 | + result = toi_finish_all_io(); |
2380c486 | 5903 | + |
0ada99ac | 5904 | + unowned = 0; |
5905 | + total_header_bytes = 0; | |
2380c486 JR |
5906 | + return result; |
5907 | +} | |
5908 | + | |
5909 | +/** | |
5910 | + * toi_bio_storage_needed - get the amount of storage needed for my fns | |
5911 | + **/ | |
5912 | +static int toi_bio_storage_needed(void) | |
5913 | +{ | |
0ada99ac | 5914 | + return sizeof(int); |
2380c486 JR |
5915 | +} |
5916 | + | |
5917 | +/** | |
5918 | + * toi_bio_save_config_info - save block I/O config to image header | |
5919 | + * @buf: PAGE_SIZE'd buffer into which data should be saved. | |
5920 | + **/ | |
5921 | +static int toi_bio_save_config_info(char *buf) | |
5922 | +{ | |
5923 | + int *ints = (int *) buf; | |
5924 | + ints[0] = target_outstanding_io; | |
5925 | + return sizeof(int); | |
5926 | +} | |
5927 | + | |
5928 | +/** | |
5929 | + * toi_bio_load_config_info - restore block I/O config | |
5930 | + * @buf: Data to be reloaded. | |
5931 | + * @size: Size of the buffer saved. | |
5932 | + **/ | |
5933 | +static void toi_bio_load_config_info(char *buf, int size) | |
5934 | +{ | |
5935 | + int *ints = (int *) buf; | |
5936 | + target_outstanding_io = ints[0]; | |
5937 | +} | |
5938 | + | |
5939 | +/** | |
5940 | + * toi_bio_initialise - initialise bio code at start of some action | |
5941 | + * @starting_cycle: Whether starting a hibernation cycle, or just reading or | |
5942 | + * writing a sysfs value. | |
5943 | + **/ | |
5944 | +static int toi_bio_initialise(int starting_cycle) | |
5945 | +{ | |
5946 | + if (starting_cycle) { | |
5947 | + max_outstanding_writes = 0; | |
5948 | + max_outstanding_reads = 0; | |
5949 | + toi_queue_flusher = current; | |
5950 | +#ifdef MEASURE_MUTEX_CONTENTION | |
5951 | + { | |
5952 | + int i, j, k; | |
5953 | + | |
5954 | + for (i = 0; i < 2; i++) | |
5955 | + for (j = 0; j < 2; j++) | |
5956 | + for_each_online_cpu(k) | |
5957 | + mutex_times[i][j][k] = 0; | |
5958 | + } | |
5959 | +#endif | |
5960 | + } | |
5961 | + | |
5962 | + return 0; | |
5963 | +} | |
5964 | + | |
5965 | +/** | |
5966 | + * toi_bio_cleanup - cleanup after some action | |
5967 | + * @finishing_cycle: Whether completing a cycle. | |
5968 | + **/ | |
5969 | +static void toi_bio_cleanup(int finishing_cycle) | |
5970 | +{ | |
5971 | + if (toi_writer_buffer) { | |
5972 | + toi_free_page(11, (unsigned long) toi_writer_buffer); | |
5973 | + toi_writer_buffer = NULL; | |
5974 | + } | |
5975 | +} | |
5976 | + | |
5977 | +struct toi_bio_ops toi_bio_ops = { | |
5978 | + .bdev_page_io = toi_bdev_page_io, | |
5979 | + .finish_all_io = toi_finish_all_io, | |
5980 | + .update_throughput_throttle = update_throughput_throttle, | |
5981 | + .forward_one_page = go_next_page, | |
5982 | + .set_extra_page_forward = set_extra_page_forward, | |
5983 | + .set_devinfo = toi_set_devinfo, | |
5984 | + .read_page = toi_bio_read_page, | |
5985 | + .write_page = toi_bio_write_page, | |
5986 | + .rw_init = toi_rw_init, | |
5987 | + .rw_cleanup = toi_rw_cleanup, | |
5988 | + .read_header_init = toi_read_header_init, | |
5989 | + .rw_header_chunk = toi_rw_header_chunk, | |
5990 | + .rw_header_chunk_noreadahead = toi_rw_header_chunk_noreadahead, | |
5991 | + .write_header_chunk_finish = write_header_chunk_finish, | |
5992 | + .io_flusher = bio_io_flusher, | |
5993 | +}; | |
5994 | +EXPORT_SYMBOL_GPL(toi_bio_ops); | |
5995 | + | |
5996 | +static struct toi_sysfs_data sysfs_params[] = { | |
5997 | + SYSFS_INT("target_outstanding_io", SYSFS_RW, &target_outstanding_io, | |
5998 | + 0, 16384, 0, NULL), | |
5999 | +}; | |
6000 | + | |
6001 | +static struct toi_module_ops toi_blockwriter_ops = { | |
6002 | + .name = "lowlevel i/o", | |
6003 | + .type = MISC_HIDDEN_MODULE, | |
6004 | + .directory = "block_io", | |
6005 | + .module = THIS_MODULE, | |
6006 | + .print_debug_info = toi_bio_print_debug_stats, | |
6007 | + .memory_needed = toi_bio_memory_needed, | |
6008 | + .storage_needed = toi_bio_storage_needed, | |
6009 | + .save_config_info = toi_bio_save_config_info, | |
6010 | + .load_config_info = toi_bio_load_config_info, | |
6011 | + .initialise = toi_bio_initialise, | |
6012 | + .cleanup = toi_bio_cleanup, | |
6013 | + | |
6014 | + .sysfs_data = sysfs_params, | |
6015 | + .num_sysfs_entries = sizeof(sysfs_params) / | |
6016 | + sizeof(struct toi_sysfs_data), | |
6017 | +}; | |
6018 | + | |
6019 | +/** | |
6020 | + * toi_block_io_load - load time routine for block I/O module | |
6021 | + * | |
6022 | + * Register block i/o ops and sysfs entries. | |
6023 | + **/ | |
6024 | +static __init int toi_block_io_load(void) | |
6025 | +{ | |
6026 | + return toi_register_module(&toi_blockwriter_ops); | |
6027 | +} | |
6028 | + | |
6029 | +#ifdef MODULE | |
6030 | +static __exit void toi_block_io_unload(void) | |
6031 | +{ | |
6032 | + toi_unregister_module(&toi_blockwriter_ops); | |
6033 | +} | |
6034 | + | |
6035 | +module_init(toi_block_io_load); | |
6036 | +module_exit(toi_block_io_unload); | |
6037 | +MODULE_LICENSE("GPL"); | |
6038 | +MODULE_AUTHOR("Nigel Cunningham"); | |
6039 | +MODULE_DESCRIPTION("TuxOnIce block io functions"); | |
6040 | +#else | |
6041 | +late_initcall(toi_block_io_load); | |
6042 | +#endif | |
6043 | diff --git a/kernel/power/tuxonice_block_io.h b/kernel/power/tuxonice_block_io.h | |
6044 | new file mode 100644 | |
0ada99ac | 6045 | index 0000000..b18298c |
2380c486 JR |
6046 | --- /dev/null |
6047 | +++ b/kernel/power/tuxonice_block_io.h | |
e999739a | 6048 | @@ -0,0 +1,59 @@ |
2380c486 JR |
6049 | +/* |
6050 | + * kernel/power/tuxonice_block_io.h | |
6051 | + * | |
6052 | + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net) | |
6053 | + * Copyright (C) 2006 Red Hat, inc. | |
6054 | + * | |
6055 | + * Distributed under GPLv2. | |
6056 | + * | |
6057 | + * This file contains declarations for functions exported from | |
6058 | + * tuxonice_block_io.c, which contains low level io functions. | |
6059 | + */ | |
6060 | + | |
6061 | +#include <linux/buffer_head.h> | |
6062 | +#include "tuxonice_extent.h" | |
6063 | + | |
6064 | +struct toi_bdev_info { | |
6065 | + struct block_device *bdev; | |
6066 | + dev_t dev_t; | |
6067 | + int bmap_shift; | |
6068 | + int blocks_per_page; | |
e999739a | 6069 | + int ignored; |
2380c486 JR |
6070 | +}; |
6071 | + | |
6072 | +/* | |
6073 | + * Our exported interface so the swapwriter and filewriter don't | |
6074 | + * need these functions duplicated. | |
6075 | + */ | |
6076 | +struct toi_bio_ops { | |
6077 | + int (*bdev_page_io) (int rw, struct block_device *bdev, long pos, | |
6078 | + struct page *page); | |
6079 | + void (*check_io_stats) (void); | |
6080 | + void (*reset_io_stats) (void); | |
6081 | + void (*update_throughput_throttle) (int jif_index); | |
0ada99ac | 6082 | + int (*finish_all_io) (void); |
6083 | + int (*forward_one_page) (int writing, int section_barrier); | |
2380c486 JR |
6084 | + void (*set_extra_page_forward) (void); |
6085 | + void (*set_devinfo) (struct toi_bdev_info *info); | |
6086 | + int (*read_page) (unsigned long *index, struct page *buffer_page, | |
6087 | + unsigned int *buf_size); | |
6088 | + int (*write_page) (unsigned long index, struct page *buffer_page, | |
6089 | + unsigned int buf_size); | |
6090 | + void (*read_header_init) (void); | |
6091 | + int (*rw_header_chunk) (int rw, struct toi_module_ops *owner, | |
6092 | + char *buffer, int buffer_size); | |
6093 | + int (*rw_header_chunk_noreadahead) (int rw, | |
6094 | + struct toi_module_ops *owner, | |
6095 | + char *buffer, int buffer_size); | |
6096 | + int (*write_header_chunk_finish) (void); | |
6097 | + int (*rw_init) (int rw, int stream_number); | |
6098 | + int (*rw_cleanup) (int rw); | |
0ada99ac | 6099 | + int (*io_flusher) (int rw); |
2380c486 JR |
6100 | +}; |
6101 | + | |
6102 | +extern struct toi_bio_ops toi_bio_ops; | |
6103 | + | |
6104 | +extern char *toi_writer_buffer; | |
6105 | +extern int toi_writer_buffer_posn; | |
6106 | +extern struct hibernate_extent_iterate_saved_state toi_writer_posn_save[4]; | |
6107 | +extern struct toi_extent_iterate_state toi_writer_posn; | |
6108 | diff --git a/kernel/power/tuxonice_builtin.c b/kernel/power/tuxonice_builtin.c | |
6109 | new file mode 100644 | |
9474138d | 6110 | index 0000000..97472d5 |
2380c486 JR |
6111 | --- /dev/null |
6112 | +++ b/kernel/power/tuxonice_builtin.c | |
9474138d | 6113 | @@ -0,0 +1,313 @@ |
2380c486 JR |
6114 | +/* |
6115 | + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net) | |
6116 | + * | |
6117 | + * This file is released under the GPLv2. | |
6118 | + */ | |
2380c486 JR |
6119 | +#include <linux/resume-trace.h> |
6120 | +#include <linux/kernel.h> | |
6121 | +#include <linux/swap.h> | |
6122 | +#include <linux/syscalls.h> | |
6123 | +#include <linux/bio.h> | |
6124 | +#include <linux/root_dev.h> | |
6125 | +#include <linux/freezer.h> | |
6126 | +#include <linux/reboot.h> | |
6127 | +#include <linux/writeback.h> | |
6128 | +#include <linux/tty.h> | |
6129 | +#include <linux/crypto.h> | |
6130 | +#include <linux/cpu.h> | |
6131 | +#include <linux/ctype.h> | |
6132 | +#include "tuxonice_io.h" | |
6133 | +#include "tuxonice.h" | |
6134 | +#include "tuxonice_extent.h" | |
2380c486 JR |
6135 | +#include "tuxonice_netlink.h" |
6136 | +#include "tuxonice_prepare_image.h" | |
6137 | +#include "tuxonice_ui.h" | |
6138 | +#include "tuxonice_sysfs.h" | |
6139 | +#include "tuxonice_pagedir.h" | |
6140 | +#include "tuxonice_modules.h" | |
6141 | +#include "tuxonice_builtin.h" | |
6142 | +#include "tuxonice_power_off.h" | |
6143 | + | |
6144 | +/* | |
6145 | + * Highmem related functions (x86 only). | |
6146 | + */ | |
6147 | + | |
6148 | +#ifdef CONFIG_HIGHMEM | |
6149 | + | |
6150 | +/** | |
6151 | + * copyback_high: Restore highmem pages. | |
6152 | + * | |
6153 | + * Highmem data and pbe lists are/can be stored in highmem. | |
6154 | + * The format is slightly different to the lowmem pbe lists | |
6155 | + * used for the assembly code: the last pbe in each page is | |
6156 | + * a struct page * instead of struct pbe *, pointing to the | |
6157 | + * next page where pbes are stored (or NULL if happens to be | |
6158 | + * the end of the list). Since we don't want to generate | |
6159 | + * unnecessary deltas against swsusp code, we use a cast | |
6160 | + * instead of a union. | |
6161 | + **/ | |
6162 | + | |
6163 | +static void copyback_high(void) | |
6164 | +{ | |
6165 | + struct page *pbe_page = (struct page *) restore_highmem_pblist; | |
6166 | + struct pbe *this_pbe, *first_pbe; | |
6167 | + unsigned long *origpage, *copypage; | |
6168 | + int pbe_index = 1; | |
6169 | + | |
6170 | + if (!pbe_page) | |
6171 | + return; | |
6172 | + | |
6173 | + this_pbe = (struct pbe *) kmap_atomic(pbe_page, KM_BOUNCE_READ); | |
6174 | + first_pbe = this_pbe; | |
6175 | + | |
6176 | + while (this_pbe) { | |
6177 | + int loop = (PAGE_SIZE / sizeof(unsigned long)) - 1; | |
6178 | + | |
6179 | + origpage = kmap_atomic((struct page *) this_pbe->orig_address, | |
6180 | + KM_BIO_DST_IRQ); | |
6181 | + copypage = kmap_atomic((struct page *) this_pbe->address, | |
6182 | + KM_BIO_SRC_IRQ); | |
6183 | + | |
6184 | + while (loop >= 0) { | |
6185 | + *(origpage + loop) = *(copypage + loop); | |
6186 | + loop--; | |
6187 | + } | |
6188 | + | |
6189 | + kunmap_atomic(origpage, KM_BIO_DST_IRQ); | |
6190 | + kunmap_atomic(copypage, KM_BIO_SRC_IRQ); | |
6191 | + | |
6192 | + if (!this_pbe->next) | |
6193 | + break; | |
6194 | + | |
6195 | + if (pbe_index < PBES_PER_PAGE) { | |
6196 | + this_pbe++; | |
6197 | + pbe_index++; | |
6198 | + } else { | |
6199 | + pbe_page = (struct page *) this_pbe->next; | |
6200 | + kunmap_atomic(first_pbe, KM_BOUNCE_READ); | |
6201 | + if (!pbe_page) | |
6202 | + return; | |
6203 | + this_pbe = (struct pbe *) kmap_atomic(pbe_page, | |
6204 | + KM_BOUNCE_READ); | |
6205 | + first_pbe = this_pbe; | |
6206 | + pbe_index = 1; | |
6207 | + } | |
6208 | + } | |
6209 | + kunmap_atomic(first_pbe, KM_BOUNCE_READ); | |
6210 | +} | |
6211 | + | |
6212 | +#else /* CONFIG_HIGHMEM */ | |
6213 | +static void copyback_high(void) { } | |
6214 | +#endif | |
6215 | + | |
6216 | +char toi_wait_for_keypress_dev_console(int timeout) | |
6217 | +{ | |
6218 | + int fd, this_timeout = 255; | |
6219 | + char key = '\0'; | |
6220 | + struct termios t, t_backup; | |
6221 | + | |
6222 | + /* We should be guaranteed /dev/console exists after populate_rootfs() | |
6223 | + * in init/main.c. | |
6224 | + */ | |
6225 | + fd = sys_open("/dev/console", O_RDONLY, 0); | |
6226 | + if (fd < 0) { | |
6227 | + printk(KERN_INFO "Couldn't open /dev/console.\n"); | |
6228 | + return key; | |
6229 | + } | |
6230 | + | |
6231 | + if (sys_ioctl(fd, TCGETS, (long)&t) < 0) | |
6232 | + goto out_close; | |
6233 | + | |
6234 | + memcpy(&t_backup, &t, sizeof(t)); | |
6235 | + | |
6236 | + t.c_lflag &= ~(ISIG|ICANON|ECHO); | |
6237 | + t.c_cc[VMIN] = 0; | |
6238 | + | |
6239 | +new_timeout: | |
6240 | + if (timeout > 0) { | |
6241 | + this_timeout = timeout < 26 ? timeout : 25; | |
6242 | + timeout -= this_timeout; | |
6243 | + this_timeout *= 10; | |
6244 | + } | |
6245 | + | |
6246 | + t.c_cc[VTIME] = this_timeout; | |
6247 | + | |
6248 | + if (sys_ioctl(fd, TCSETS, (long)&t) < 0) | |
6249 | + goto out_restore; | |
6250 | + | |
6251 | + while (1) { | |
6252 | + if (sys_read(fd, &key, 1) <= 0) { | |
6253 | + if (timeout) | |
6254 | + goto new_timeout; | |
6255 | + key = '\0'; | |
6256 | + break; | |
6257 | + } | |
6258 | + key = tolower(key); | |
6259 | + if (test_toi_state(TOI_SANITY_CHECK_PROMPT)) { | |
6260 | + if (key == 'c') { | |
6261 | + set_toi_state(TOI_CONTINUE_REQ); | |
6262 | + break; | |
6263 | + } else if (key == ' ') | |
6264 | + break; | |
6265 | + } else | |
6266 | + break; | |
6267 | + } | |
6268 | + | |
6269 | +out_restore: | |
6270 | + sys_ioctl(fd, TCSETS, (long)&t_backup); | |
6271 | +out_close: | |
6272 | + sys_close(fd); | |
6273 | + | |
6274 | + return key; | |
6275 | +} | |
6276 | +EXPORT_SYMBOL_GPL(toi_wait_for_keypress_dev_console); | |
6277 | + | |
6278 | +struct toi_boot_kernel_data toi_bkd __nosavedata | |
6279 | + __attribute__((aligned(PAGE_SIZE))) = { | |
6280 | + MY_BOOT_KERNEL_DATA_VERSION, | |
6281 | + 0, | |
6282 | +#ifdef CONFIG_TOI_REPLACE_SWSUSP | |
6283 | + (1 << TOI_REPLACE_SWSUSP) | | |
6284 | +#endif | |
6285 | + (1 << TOI_NO_FLUSHER_THREAD) | | |
6286 | + (1 << TOI_PAGESET2_FULL) | (1 << TOI_LATE_CPU_HOTPLUG), | |
6287 | +}; | |
6288 | +EXPORT_SYMBOL_GPL(toi_bkd); | |
6289 | + | |
6290 | +struct block_device *toi_open_by_devnum(dev_t dev, fmode_t mode) | |
6291 | +{ | |
6292 | + struct block_device *bdev = bdget(dev); | |
6293 | + int err = -ENOMEM; | |
6294 | + if (bdev) | |
6295 | + err = blkdev_get(bdev, mode); | |
6296 | + return err ? ERR_PTR(err) : bdev; | |
6297 | +} | |
6298 | +EXPORT_SYMBOL_GPL(toi_open_by_devnum); | |
6299 | + | |
6300 | +int toi_wait = CONFIG_TOI_DEFAULT_WAIT; | |
6301 | +EXPORT_SYMBOL_GPL(toi_wait); | |
6302 | + | |
6303 | +struct toi_core_fns *toi_core_fns; | |
6304 | +EXPORT_SYMBOL_GPL(toi_core_fns); | |
6305 | + | |
6306 | +unsigned long toi_result; | |
6307 | +EXPORT_SYMBOL_GPL(toi_result); | |
6308 | + | |
6309 | +struct pagedir pagedir1 = {1}; | |
6310 | +EXPORT_SYMBOL_GPL(pagedir1); | |
6311 | + | |
6312 | +unsigned long toi_get_nonconflicting_page(void) | |
6313 | +{ | |
6314 | + return toi_core_fns->get_nonconflicting_page(); | |
6315 | +} | |
6316 | + | |
6317 | +int toi_post_context_save(void) | |
6318 | +{ | |
6319 | + return toi_core_fns->post_context_save(); | |
6320 | +} | |
6321 | + | |
9474138d | 6322 | +int try_tuxonice_hibernate(void) |
2380c486 JR |
6323 | +{ |
6324 | + if (!toi_core_fns) | |
6325 | + return -ENODEV; | |
6326 | + | |
6327 | + return toi_core_fns->try_hibernate(); | |
6328 | +} | |
6329 | + | |
6330 | +static int num_resume_calls; | |
6331 | +#ifdef CONFIG_TOI_IGNORE_LATE_INITCALL | |
6332 | +static int ignore_late_initcall = 1; | |
6333 | +#else | |
6334 | +static int ignore_late_initcall; | |
6335 | +#endif | |
6336 | + | |
9474138d | 6337 | +void try_tuxonice_resume(void) |
2380c486 JR |
6338 | +{ |
6339 | + /* Don't let it wrap around eventually */ | |
6340 | + if (num_resume_calls < 2) | |
6341 | + num_resume_calls++; | |
6342 | + | |
6343 | + if (num_resume_calls == 1 && ignore_late_initcall) { | |
6344 | + printk(KERN_INFO "TuxOnIce: Ignoring late initcall, as requested.\n"); | |
6345 | + return; | |
6346 | + } | |
6347 | + | |
6348 | + if (toi_core_fns) | |
6349 | + toi_core_fns->try_resume(); | |
6350 | + else | |
6351 | + printk(KERN_INFO "TuxOnIce core not loaded yet.\n"); | |
6352 | +} | |
6353 | + | |
6354 | +int toi_lowlevel_builtin(void) | |
6355 | +{ | |
6356 | + int error = 0; | |
6357 | + | |
6358 | + save_processor_state(); | |
6359 | + error = swsusp_arch_suspend(); | |
6360 | + if (error) | |
6361 | + printk(KERN_ERR "Error %d hibernating\n", error); | |
6362 | + | |
6363 | + /* Restore control flow appears here */ | |
6364 | + if (!toi_in_hibernate) { | |
6365 | + copyback_high(); | |
6366 | + set_toi_state(TOI_NOW_RESUMING); | |
6367 | + } | |
6368 | + | |
6369 | + restore_processor_state(); | |
6370 | + | |
6371 | + return error; | |
6372 | +} | |
6373 | +EXPORT_SYMBOL_GPL(toi_lowlevel_builtin); | |
6374 | + | |
6375 | +unsigned long toi_compress_bytes_in; | |
6376 | +EXPORT_SYMBOL_GPL(toi_compress_bytes_in); | |
6377 | + | |
6378 | +unsigned long toi_compress_bytes_out; | |
6379 | +EXPORT_SYMBOL_GPL(toi_compress_bytes_out); | |
6380 | + | |
6381 | +unsigned long toi_state = ((1 << TOI_BOOT_TIME) | | |
6382 | + (1 << TOI_IGNORE_LOGLEVEL) | | |
6383 | + (1 << TOI_IO_STOPPED)); | |
6384 | +EXPORT_SYMBOL_GPL(toi_state); | |
6385 | + | |
6386 | +/* The number of hibernates we have started (some may have been cancelled) */ | |
6387 | +unsigned int nr_hibernates; | |
6388 | +EXPORT_SYMBOL_GPL(nr_hibernates); | |
6389 | + | |
6390 | +int toi_running; | |
6391 | +EXPORT_SYMBOL_GPL(toi_running); | |
6392 | + | |
6393 | +__nosavedata int toi_in_hibernate; | |
6394 | +EXPORT_SYMBOL_GPL(toi_in_hibernate); | |
6395 | + | |
6396 | +__nosavedata struct pbe *restore_highmem_pblist; | |
6397 | +EXPORT_SYMBOL_GPL(restore_highmem_pblist); | |
6398 | + | |
6399 | +static int __init toi_wait_setup(char *str) | |
6400 | +{ | |
6401 | + int value; | |
6402 | + | |
6403 | + if (sscanf(str, "=%d", &value)) { | |
6404 | + if (value < -1 || value > 255) | |
6405 | + printk(KERN_INFO "TuxOnIce_wait outside range -1 to " | |
6406 | + "255.\n"); | |
6407 | + else | |
6408 | + toi_wait = value; | |
6409 | + } | |
6410 | + | |
6411 | + return 1; | |
6412 | +} | |
6413 | + | |
6414 | +__setup("toi_wait", toi_wait_setup); | |
6415 | + | |
6416 | +static int __init toi_ignore_late_initcall_setup(char *str) | |
6417 | +{ | |
6418 | + int value; | |
6419 | + | |
6420 | + if (sscanf(str, "=%d", &value)) | |
6421 | + ignore_late_initcall = value; | |
6422 | + | |
6423 | + return 1; | |
6424 | +} | |
6425 | + | |
6426 | +__setup("toi_initramfs_resume_only", toi_ignore_late_initcall_setup); | |
6427 | diff --git a/kernel/power/tuxonice_builtin.h b/kernel/power/tuxonice_builtin.h | |
6428 | new file mode 100644 | |
6429 | index 0000000..49b25b7 | |
6430 | --- /dev/null | |
6431 | +++ b/kernel/power/tuxonice_builtin.h | |
6432 | @@ -0,0 +1,27 @@ | |
6433 | +/* | |
6434 | + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net) | |
6435 | + * | |
6436 | + * This file is released under the GPLv2. | |
6437 | + */ | |
6438 | +#include <asm/setup.h> | |
6439 | + | |
6440 | +extern struct toi_core_fns *toi_core_fns; | |
6441 | +extern unsigned long toi_compress_bytes_in, toi_compress_bytes_out; | |
6442 | +extern unsigned int nr_hibernates; | |
6443 | +extern int toi_in_hibernate; | |
6444 | + | |
6445 | +extern __nosavedata struct pbe *restore_highmem_pblist; | |
6446 | + | |
6447 | +int toi_lowlevel_builtin(void); | |
6448 | + | |
6449 | +#ifdef CONFIG_HIGHMEM | |
6450 | +extern __nosavedata struct zone_data *toi_nosave_zone_list; | |
6451 | +extern __nosavedata unsigned long toi_nosave_max_pfn; | |
6452 | +#endif | |
6453 | + | |
6454 | +extern unsigned long toi_get_nonconflicting_page(void); | |
6455 | +extern int toi_post_context_save(void); | |
6456 | + | |
6457 | +extern char toi_wait_for_keypress_dev_console(int timeout); | |
6458 | +extern struct block_device *toi_open_by_devnum(dev_t dev, fmode_t mode); | |
6459 | +extern int toi_wait; | |
6460 | diff --git a/kernel/power/tuxonice_checksum.c b/kernel/power/tuxonice_checksum.c | |
6461 | new file mode 100644 | |
e999739a | 6462 | index 0000000..b0adc17 |
2380c486 JR |
6463 | --- /dev/null |
6464 | +++ b/kernel/power/tuxonice_checksum.c | |
6465 | @@ -0,0 +1,375 @@ | |
6466 | +/* | |
6467 | + * kernel/power/tuxonice_checksum.c | |
6468 | + * | |
6469 | + * Copyright (C) 2006-2008 Nigel Cunningham (nigel at tuxonice net) | |
6470 | + * Copyright (C) 2006 Red Hat, inc. | |
6471 | + * | |
6472 | + * This file is released under the GPLv2. | |
6473 | + * | |
6474 | + * This file contains data checksum routines for TuxOnIce, | |
6475 | + * using cryptoapi. They are used to locate any modifications | |
6476 | + * made to pageset 2 while we're saving it. | |
6477 | + */ | |
6478 | + | |
6479 | +#include <linux/suspend.h> | |
6480 | +#include <linux/highmem.h> | |
6481 | +#include <linux/vmalloc.h> | |
6482 | +#include <linux/crypto.h> | |
6483 | +#include <linux/scatterlist.h> | |
6484 | + | |
6485 | +#include "tuxonice.h" | |
6486 | +#include "tuxonice_modules.h" | |
6487 | +#include "tuxonice_sysfs.h" | |
6488 | +#include "tuxonice_io.h" | |
6489 | +#include "tuxonice_pageflags.h" | |
6490 | +#include "tuxonice_checksum.h" | |
6491 | +#include "tuxonice_pagedir.h" | |
6492 | +#include "tuxonice_alloc.h" | |
6493 | + | |
6494 | +static struct toi_module_ops toi_checksum_ops; | |
6495 | + | |
6496 | +/* Constant at the mo, but I might allow tuning later */ | |
6497 | +static char toi_checksum_name[32] = "md4"; | |
6498 | +/* Bytes per checksum */ | |
6499 | +#define CHECKSUM_SIZE (16) | |
6500 | + | |
6501 | +#define CHECKSUMS_PER_PAGE ((PAGE_SIZE - sizeof(void *)) / CHECKSUM_SIZE) | |
6502 | + | |
6503 | +struct cpu_context { | |
6504 | + struct crypto_hash *transform; | |
6505 | + struct hash_desc desc; | |
6506 | + struct scatterlist sg[2]; | |
6507 | + char *buf; | |
6508 | +}; | |
6509 | + | |
6510 | +static DEFINE_PER_CPU(struct cpu_context, contexts); | |
6511 | +static int pages_allocated; | |
6512 | +static unsigned long page_list; | |
6513 | + | |
6514 | +static int toi_num_resaved; | |
6515 | + | |
6516 | +static unsigned long this_checksum, next_page; | |
6517 | +static int checksum_index; | |
6518 | + | |
6519 | +static inline int checksum_pages_needed(void) | |
6520 | +{ | |
6521 | + return DIV_ROUND_UP(pagedir2.size, CHECKSUMS_PER_PAGE); | |
6522 | +} | |
6523 | + | |
6524 | +/* ---- Local buffer management ---- */ | |
6525 | + | |
6526 | +/* | |
6527 | + * toi_checksum_cleanup | |
6528 | + * | |
6529 | + * Frees memory allocated for our labours. | |
6530 | + */ | |
6531 | +static void toi_checksum_cleanup(int ending_cycle) | |
6532 | +{ | |
6533 | + int cpu; | |
6534 | + | |
6535 | + if (ending_cycle) { | |
6536 | + for_each_online_cpu(cpu) { | |
6537 | + struct cpu_context *this = &per_cpu(contexts, cpu); | |
6538 | + if (this->transform) { | |
6539 | + crypto_free_hash(this->transform); | |
6540 | + this->transform = NULL; | |
6541 | + this->desc.tfm = NULL; | |
6542 | + } | |
6543 | + | |
6544 | + if (this->buf) { | |
6545 | + toi_free_page(27, (unsigned long) this->buf); | |
6546 | + this->buf = NULL; | |
6547 | + } | |
6548 | + } | |
6549 | + } | |
6550 | +} | |
6551 | + | |
6552 | +/* | |
6553 | + * toi_crypto_initialise | |
6554 | + * | |
6555 | + * Prepare to do some work by allocating buffers and transforms. | |
6556 | + * Returns: Int: Zero. Even if we can't set up checksum, we still | |
6557 | + * seek to hibernate. | |
6558 | + */ | |
6559 | +static int toi_checksum_initialise(int starting_cycle) | |
6560 | +{ | |
6561 | + int cpu; | |
6562 | + | |
6563 | + if (!(starting_cycle & SYSFS_HIBERNATE) || !toi_checksum_ops.enabled) | |
6564 | + return 0; | |
6565 | + | |
6566 | + if (!*toi_checksum_name) { | |
6567 | + printk(KERN_INFO "TuxOnIce: No checksum algorithm name set.\n"); | |
6568 | + return 1; | |
6569 | + } | |
6570 | + | |
6571 | + for_each_online_cpu(cpu) { | |
6572 | + struct cpu_context *this = &per_cpu(contexts, cpu); | |
6573 | + struct page *page; | |
6574 | + | |
6575 | + this->transform = crypto_alloc_hash(toi_checksum_name, 0, 0); | |
6576 | + if (IS_ERR(this->transform)) { | |
6577 | + printk(KERN_INFO "TuxOnIce: Failed to initialise the " | |
6578 | + "%s checksum algorithm: %ld.\n", | |
6579 | + toi_checksum_name, (long) this->transform); | |
6580 | + this->transform = NULL; | |
6581 | + return 1; | |
6582 | + } | |
6583 | + | |
6584 | + this->desc.tfm = this->transform; | |
6585 | + this->desc.flags = 0; | |
6586 | + | |
6587 | + page = toi_alloc_page(27, GFP_KERNEL); | |
6588 | + if (!page) | |
6589 | + return 1; | |
6590 | + this->buf = page_address(page); | |
6591 | + sg_init_one(&this->sg[0], this->buf, PAGE_SIZE); | |
6592 | + } | |
6593 | + return 0; | |
6594 | +} | |
6595 | + | |
6596 | +/* | |
6597 | + * toi_checksum_print_debug_stats | |
6598 | + * @buffer: Pointer to a buffer into which the debug info will be printed. | |
6599 | + * @size: Size of the buffer. | |
6600 | + * | |
6601 | + * Print information to be recorded for debugging purposes into a buffer. | |
6602 | + * Returns: Number of characters written to the buffer. | |
6603 | + */ | |
6604 | + | |
6605 | +static int toi_checksum_print_debug_stats(char *buffer, int size) | |
6606 | +{ | |
6607 | + int len; | |
6608 | + | |
6609 | + if (!toi_checksum_ops.enabled) | |
6610 | + return scnprintf(buffer, size, | |
6611 | + "- Checksumming disabled.\n"); | |
6612 | + | |
6613 | + len = scnprintf(buffer, size, "- Checksum method is '%s'.\n", | |
6614 | + toi_checksum_name); | |
6615 | + len += scnprintf(buffer + len, size - len, | |
6616 | + " %d pages resaved in atomic copy.\n", toi_num_resaved); | |
6617 | + return len; | |
6618 | +} | |
6619 | + | |
6620 | +static int toi_checksum_memory_needed(void) | |
6621 | +{ | |
6622 | + return toi_checksum_ops.enabled ? | |
6623 | + checksum_pages_needed() << PAGE_SHIFT : 0; | |
6624 | +} | |
6625 | + | |
6626 | +static int toi_checksum_storage_needed(void) | |
6627 | +{ | |
6628 | + if (toi_checksum_ops.enabled) | |
6629 | + return strlen(toi_checksum_name) + sizeof(int) + 1; | |
6630 | + else | |
6631 | + return 0; | |
6632 | +} | |
6633 | + | |
6634 | +/* | |
6635 | + * toi_checksum_save_config_info | |
6636 | + * @buffer: Pointer to a buffer of size PAGE_SIZE. | |
6637 | + * | |
6638 | + * Save informaton needed when reloading the image at resume time. | |
6639 | + * Returns: Number of bytes used for saving our data. | |
6640 | + */ | |
6641 | +static int toi_checksum_save_config_info(char *buffer) | |
6642 | +{ | |
6643 | + int namelen = strlen(toi_checksum_name) + 1; | |
6644 | + int total_len; | |
6645 | + | |
6646 | + *((unsigned int *) buffer) = namelen; | |
6647 | + strncpy(buffer + sizeof(unsigned int), toi_checksum_name, namelen); | |
6648 | + total_len = sizeof(unsigned int) + namelen; | |
6649 | + return total_len; | |
6650 | +} | |
6651 | + | |
6652 | +/* toi_checksum_load_config_info | |
6653 | + * @buffer: Pointer to the start of the data. | |
6654 | + * @size: Number of bytes that were saved. | |
6655 | + * | |
6656 | + * Description: Reload information needed for dechecksuming the image at | |
6657 | + * resume time. | |
6658 | + */ | |
6659 | +static void toi_checksum_load_config_info(char *buffer, int size) | |
6660 | +{ | |
6661 | + int namelen; | |
6662 | + | |
6663 | + namelen = *((unsigned int *) (buffer)); | |
6664 | + strncpy(toi_checksum_name, buffer + sizeof(unsigned int), | |
6665 | + namelen); | |
6666 | + return; | |
6667 | +} | |
6668 | + | |
6669 | +/* | |
6670 | + * Free Checksum Memory | |
6671 | + */ | |
6672 | + | |
6673 | +void free_checksum_pages(void) | |
6674 | +{ | |
6675 | + while (pages_allocated) { | |
6676 | + unsigned long next = *((unsigned long *) page_list); | |
6677 | + ClearPageNosave(virt_to_page(page_list)); | |
6678 | + toi_free_page(15, (unsigned long) page_list); | |
6679 | + page_list = next; | |
6680 | + pages_allocated--; | |
6681 | + } | |
6682 | +} | |
6683 | + | |
6684 | +/* | |
6685 | + * Allocate Checksum Memory | |
6686 | + */ | |
6687 | + | |
6688 | +int allocate_checksum_pages(void) | |
6689 | +{ | |
6690 | + int pages_needed = checksum_pages_needed(); | |
6691 | + | |
6692 | + if (!toi_checksum_ops.enabled) | |
6693 | + return 0; | |
6694 | + | |
6695 | + while (pages_allocated < pages_needed) { | |
6696 | + unsigned long *new_page = | |
6697 | + (unsigned long *) toi_get_zeroed_page(15, TOI_ATOMIC_GFP); | |
6698 | + if (!new_page) { | |
e999739a | 6699 | + printk(KERN_ERR "Unable to allocate checksum pages.\n"); |
2380c486 JR |
6700 | + return -ENOMEM; |
6701 | + } | |
6702 | + SetPageNosave(virt_to_page(new_page)); | |
6703 | + (*new_page) = page_list; | |
6704 | + page_list = (unsigned long) new_page; | |
6705 | + pages_allocated++; | |
6706 | + } | |
6707 | + | |
6708 | + next_page = (unsigned long) page_list; | |
6709 | + checksum_index = 0; | |
6710 | + | |
6711 | + return 0; | |
6712 | +} | |
6713 | + | |
6714 | +char *tuxonice_get_next_checksum(void) | |
6715 | +{ | |
6716 | + if (!toi_checksum_ops.enabled) | |
6717 | + return NULL; | |
6718 | + | |
6719 | + if (checksum_index % CHECKSUMS_PER_PAGE) | |
6720 | + this_checksum += CHECKSUM_SIZE; | |
6721 | + else { | |
6722 | + this_checksum = next_page + sizeof(void *); | |
6723 | + next_page = *((unsigned long *) next_page); | |
6724 | + } | |
6725 | + | |
6726 | + checksum_index++; | |
6727 | + return (char *) this_checksum; | |
6728 | +} | |
6729 | + | |
6730 | +int tuxonice_calc_checksum(struct page *page, char *checksum_locn) | |
6731 | +{ | |
6732 | + char *pa; | |
6733 | + int result, cpu = smp_processor_id(); | |
6734 | + struct cpu_context *ctx = &per_cpu(contexts, cpu); | |
6735 | + | |
6736 | + if (!toi_checksum_ops.enabled) | |
6737 | + return 0; | |
6738 | + | |
6739 | + pa = kmap(page); | |
6740 | + memcpy(ctx->buf, pa, PAGE_SIZE); | |
6741 | + kunmap(page); | |
6742 | + result = crypto_hash_digest(&ctx->desc, ctx->sg, PAGE_SIZE, | |
6743 | + checksum_locn); | |
6744 | + return result; | |
6745 | +} | |
6746 | +/* | |
6747 | + * Calculate checksums | |
6748 | + */ | |
6749 | + | |
6750 | +void check_checksums(void) | |
6751 | +{ | |
6752 | + int pfn, index = 0, cpu = smp_processor_id(); | |
6753 | + char current_checksum[CHECKSUM_SIZE]; | |
6754 | + struct cpu_context *ctx = &per_cpu(contexts, cpu); | |
6755 | + | |
6756 | + if (!toi_checksum_ops.enabled) | |
6757 | + return; | |
6758 | + | |
6759 | + next_page = (unsigned long) page_list; | |
6760 | + | |
6761 | + toi_num_resaved = 0; | |
6762 | + this_checksum = 0; | |
6763 | + | |
6764 | + memory_bm_position_reset(pageset2_map); | |
6765 | + for (pfn = memory_bm_next_pfn(pageset2_map); pfn != BM_END_OF_MAP; | |
6766 | + pfn = memory_bm_next_pfn(pageset2_map)) { | |
6767 | + int ret; | |
6768 | + char *pa; | |
6769 | + struct page *page = pfn_to_page(pfn); | |
6770 | + | |
6771 | + if (index % CHECKSUMS_PER_PAGE) { | |
6772 | + this_checksum += CHECKSUM_SIZE; | |
6773 | + } else { | |
6774 | + this_checksum = next_page + sizeof(void *); | |
6775 | + next_page = *((unsigned long *) next_page); | |
6776 | + } | |
6777 | + | |
6778 | + /* Done when IRQs disabled so must be atomic */ | |
6779 | + pa = kmap_atomic(page, KM_USER1); | |
6780 | + memcpy(ctx->buf, pa, PAGE_SIZE); | |
6781 | + kunmap_atomic(pa, KM_USER1); | |
6782 | + ret = crypto_hash_digest(&ctx->desc, ctx->sg, PAGE_SIZE, | |
6783 | + current_checksum); | |
6784 | + | |
6785 | + if (ret) { | |
6786 | + printk(KERN_INFO "Digest failed. Returned %d.\n", ret); | |
6787 | + return; | |
6788 | + } | |
6789 | + | |
6790 | + if (memcmp(current_checksum, (char *) this_checksum, | |
6791 | + CHECKSUM_SIZE)) { | |
6792 | + SetPageResave(pfn_to_page(pfn)); | |
6793 | + toi_num_resaved++; | |
6794 | + if (test_action_state(TOI_ABORT_ON_RESAVE_NEEDED)) | |
6795 | + set_abort_result(TOI_RESAVE_NEEDED); | |
6796 | + } | |
6797 | + | |
6798 | + index++; | |
6799 | + } | |
6800 | +} | |
6801 | + | |
6802 | +static struct toi_sysfs_data sysfs_params[] = { | |
6803 | + SYSFS_INT("enabled", SYSFS_RW, &toi_checksum_ops.enabled, 0, 1, 0, | |
6804 | + NULL), | |
6805 | + SYSFS_BIT("abort_if_resave_needed", SYSFS_RW, &toi_bkd.toi_action, | |
6806 | + TOI_ABORT_ON_RESAVE_NEEDED, 0) | |
6807 | +}; | |
6808 | + | |
6809 | +/* | |
6810 | + * Ops structure. | |
6811 | + */ | |
6812 | +static struct toi_module_ops toi_checksum_ops = { | |
6813 | + .type = MISC_MODULE, | |
6814 | + .name = "checksumming", | |
6815 | + .directory = "checksum", | |
6816 | + .module = THIS_MODULE, | |
6817 | + .initialise = toi_checksum_initialise, | |
6818 | + .cleanup = toi_checksum_cleanup, | |
6819 | + .print_debug_info = toi_checksum_print_debug_stats, | |
6820 | + .save_config_info = toi_checksum_save_config_info, | |
6821 | + .load_config_info = toi_checksum_load_config_info, | |
6822 | + .memory_needed = toi_checksum_memory_needed, | |
6823 | + .storage_needed = toi_checksum_storage_needed, | |
6824 | + | |
6825 | + .sysfs_data = sysfs_params, | |
6826 | + .num_sysfs_entries = sizeof(sysfs_params) / | |
6827 | + sizeof(struct toi_sysfs_data), | |
6828 | +}; | |
6829 | + | |
6830 | +/* ---- Registration ---- */ | |
6831 | +int toi_checksum_init(void) | |
6832 | +{ | |
6833 | + int result = toi_register_module(&toi_checksum_ops); | |
6834 | + return result; | |
6835 | +} | |
6836 | + | |
6837 | +void toi_checksum_exit(void) | |
6838 | +{ | |
6839 | + toi_unregister_module(&toi_checksum_ops); | |
6840 | +} | |
6841 | diff --git a/kernel/power/tuxonice_checksum.h b/kernel/power/tuxonice_checksum.h | |
6842 | new file mode 100644 | |
6843 | index 0000000..84a9174 | |
6844 | --- /dev/null | |
6845 | +++ b/kernel/power/tuxonice_checksum.h | |
6846 | @@ -0,0 +1,32 @@ | |
6847 | +/* | |
6848 | + * kernel/power/tuxonice_checksum.h | |
6849 | + * | |
6850 | + * Copyright (C) 2006-2008 Nigel Cunningham (nigel at tuxonice net) | |
6851 | + * Copyright (C) 2006 Red Hat, inc. | |
6852 | + * | |
6853 | + * This file is released under the GPLv2. | |
6854 | + * | |
6855 | + * This file contains data checksum routines for TuxOnIce, | |
6856 | + * using cryptoapi. They are used to locate any modifications | |
6857 | + * made to pageset 2 while we're saving it. | |
6858 | + */ | |
6859 | + | |
6860 | +#if defined(CONFIG_TOI_CHECKSUM) | |
6861 | +extern int toi_checksum_init(void); | |
6862 | +extern void toi_checksum_exit(void); | |
6863 | +void check_checksums(void); | |
6864 | +int allocate_checksum_pages(void); | |
6865 | +void free_checksum_pages(void); | |
6866 | +char *tuxonice_get_next_checksum(void); | |
6867 | +int tuxonice_calc_checksum(struct page *page, char *checksum_locn); | |
6868 | +#else | |
6869 | +static inline int toi_checksum_init(void) { return 0; } | |
6870 | +static inline void toi_checksum_exit(void) { } | |
6871 | +static inline void check_checksums(void) { }; | |
6872 | +static inline int allocate_checksum_pages(void) { return 0; }; | |
6873 | +static inline void free_checksum_pages(void) { }; | |
6874 | +static inline char *tuxonice_get_next_checksum(void) { return NULL; }; | |
6875 | +static inline int tuxonice_calc_checksum(struct page *page, char *checksum_locn) | |
6876 | + { return 0; } | |
6877 | +#endif | |
6878 | + | |
6879 | diff --git a/kernel/power/tuxonice_cluster.c b/kernel/power/tuxonice_cluster.c | |
6880 | new file mode 100644 | |
9474138d | 6881 | index 0000000..671006d |
2380c486 JR |
6882 | --- /dev/null |
6883 | +++ b/kernel/power/tuxonice_cluster.c | |
6884 | @@ -0,0 +1,1069 @@ | |
6885 | +/* | |
6886 | + * kernel/power/tuxonice_cluster.c | |
6887 | + * | |
6888 | + * Copyright (C) 2006-2008 Nigel Cunningham (nigel at tuxonice net) | |
6889 | + * | |
6890 | + * This file is released under the GPLv2. | |
6891 | + * | |
6892 | + * This file contains routines for cluster hibernation support. | |
6893 | + * | |
6894 | + * Based on ip autoconfiguration code in net/ipv4/ipconfig.c. | |
6895 | + * | |
6896 | + * How does it work? | |
6897 | + * | |
6898 | + * There is no 'master' node that tells everyone else what to do. All nodes | |
6899 | + * send messages to the broadcast address/port, maintain a list of peers | |
6900 | + * and figure out when to progress to the next step in hibernating or resuming. | |
6901 | + * This makes us more fault tolerant when it comes to nodes coming and going | |
6902 | + * (which may be more of an issue if we're hibernating when power supplies | |
6903 | + * are being unreliable). | |
6904 | + * | |
6905 | + * At boot time, we start a ktuxonice thread that handles communication with | |
6906 | + * other nodes. This node maintains a state machine that controls our progress | |
6907 | + * through hibernating and resuming, keeping us in step with other nodes. Nodes | |
6908 | + * are identified by their hw address. | |
6909 | + * | |
6910 | + * On startup, the node sends CLUSTER_PING on the configured interface's | |
6911 | + * broadcast address, port $toi_cluster_port (see below) and begins to listen | |
6912 | + * for other broadcast messages. CLUSTER_PING messages are repeated at | |
6913 | + * intervals of 5 minutes, with a random offset to spread traffic out. | |
6914 | + * | |
6915 | + * A hibernation cycle is initiated from any node via | |
6916 | + * | |
6917 | + * echo > /sys/power/tuxonice/do_hibernate | |
6918 | + * | |
6919 | + * and (possibily) the hibernate script. At each step of the process, the node | |
6920 | + * completes its work, and waits for all other nodes to signal completion of | |
6921 | + * their work (or timeout) before progressing to the next step. | |
6922 | + * | |
6923 | + * Request/state Action before reply Possible reply Next state | |
6924 | + * HIBERNATE capable, pre-script HIBERNATE|ACK NODE_PREP | |
6925 | + * HIBERNATE|NACK INIT_0 | |
6926 | + * | |
6927 | + * PREP prepare_image PREP|ACK IMAGE_WRITE | |
6928 | + * PREP|NACK INIT_0 | |
6929 | + * ABORT RUNNING | |
6930 | + * | |
6931 | + * IO write image IO|ACK power off | |
6932 | + * ABORT POST_RESUME | |
6933 | + * | |
6934 | + * (Boot time) check for image IMAGE|ACK RESUME_PREP | |
6935 | + * (Note 1) | |
6936 | + * IMAGE|NACK (Note 2) | |
6937 | + * | |
6938 | + * PREP prepare read image PREP|ACK IMAGE_READ | |
6939 | + * PREP|NACK (As NACK_IMAGE) | |
6940 | + * | |
6941 | + * IO read image IO|ACK POST_RESUME | |
6942 | + * | |
6943 | + * POST_RESUME thaw, post-script RUNNING | |
6944 | + * | |
6945 | + * INIT_0 init 0 | |
6946 | + * | |
6947 | + * Other messages: | |
6948 | + * | |
6949 | + * - PING: Request for all other live nodes to send a PONG. Used at startup to | |
6950 | + * announce presence, when a node is suspected dead and periodically, in case | |
6951 | + * segments of the network are [un]plugged. | |
6952 | + * | |
6953 | + * - PONG: Response to a PING. | |
6954 | + * | |
6955 | + * - ABORT: Request to cancel writing an image. | |
6956 | + * | |
6957 | + * - BYE: Notification that this node is shutting down. | |
6958 | + * | |
6959 | + * Note 1: Repeated at 3s intervals until we continue to boot/resume, so that | |
6960 | + * nodes which are slower to start up can get state synchronised. If a node | |
6961 | + * starting up sees other nodes sending RESUME_PREP or IMAGE_READ, it may send | |
6962 | + * ACK_IMAGE and they will wait for it to catch up. If it sees ACK_READ, it | |
6963 | + * must invalidate its image (if any) and boot normally. | |
6964 | + * | |
6965 | + * Note 2: May occur when one node lost power or powered off while others | |
6966 | + * hibernated. This node waits for others to complete resuming (ACK_READ) | |
6967 | + * before completing its boot, so that it appears as a fail node restarting. | |
6968 | + * | |
6969 | + * If any node has an image, then it also has a list of nodes that hibernated | |
6970 | + * in synchronisation with it. The node will wait for other nodes to appear | |
6971 | + * or timeout before beginning its restoration. | |
6972 | + * | |
6973 | + * If a node has no image, it needs to wait, in case other nodes which do have | |
6974 | + * an image are going to resume, but are taking longer to announce their | |
6975 | + * presence. For this reason, the user can specify a timeout value and a number | |
6976 | + * of nodes detected before we just continue. (We might want to assume in a | |
6977 | + * cluster of, say, 15 nodes, if 8 others have booted without finding an image, | |
6978 | + * the remaining nodes will too. This might help in situations where some nodes | |
6979 | + * are much slower to boot, or more subject to hardware failures or such like). | |
6980 | + */ | |
6981 | + | |
6982 | +#include <linux/suspend.h> | |
6983 | +#include <linux/module.h> | |
6984 | +#include <linux/moduleparam.h> | |
6985 | +#include <linux/if.h> | |
6986 | +#include <linux/rtnetlink.h> | |
6987 | +#include <linux/ip.h> | |
6988 | +#include <linux/udp.h> | |
6989 | +#include <linux/in.h> | |
6990 | +#include <linux/if_arp.h> | |
6991 | +#include <linux/kthread.h> | |
6992 | +#include <linux/wait.h> | |
6993 | +#include <linux/netdevice.h> | |
6994 | +#include <net/ip.h> | |
6995 | + | |
6996 | +#include "tuxonice.h" | |
6997 | +#include "tuxonice_modules.h" | |
6998 | +#include "tuxonice_sysfs.h" | |
6999 | +#include "tuxonice_alloc.h" | |
7000 | +#include "tuxonice_io.h" | |
7001 | + | |
7002 | +#if 1 | |
7003 | +#define PRINTK(a, b...) do { printk(a, ##b); } while (0) | |
7004 | +#else | |
7005 | +#define PRINTK(a, b...) do { } while (0) | |
7006 | +#endif | |
7007 | + | |
7008 | +static int loopback_mode; | |
7009 | +static int num_local_nodes = 1; | |
7010 | +#define MAX_LOCAL_NODES 8 | |
7011 | +#define SADDR (loopback_mode ? b->sid : h->saddr) | |
7012 | + | |
7013 | +#define MYNAME "TuxOnIce Clustering" | |
7014 | + | |
7015 | +enum cluster_message { | |
7016 | + MSG_ACK = 1, | |
7017 | + MSG_NACK = 2, | |
7018 | + MSG_PING = 4, | |
7019 | + MSG_ABORT = 8, | |
7020 | + MSG_BYE = 16, | |
7021 | + MSG_HIBERNATE = 32, | |
7022 | + MSG_IMAGE = 64, | |
7023 | + MSG_IO = 128, | |
7024 | + MSG_RUNNING = 256 | |
7025 | +}; | |
7026 | + | |
7027 | +static char *str_message(int message) | |
7028 | +{ | |
7029 | + switch (message) { | |
7030 | + case 4: | |
7031 | + return "Ping"; | |
7032 | + case 8: | |
7033 | + return "Abort"; | |
7034 | + case 9: | |
7035 | + return "Abort acked"; | |
7036 | + case 10: | |
7037 | + return "Abort nacked"; | |
7038 | + case 16: | |
7039 | + return "Bye"; | |
7040 | + case 17: | |
7041 | + return "Bye acked"; | |
7042 | + case 18: | |
7043 | + return "Bye nacked"; | |
7044 | + case 32: | |
7045 | + return "Hibernate request"; | |
7046 | + case 33: | |
7047 | + return "Hibernate ack"; | |
7048 | + case 34: | |
7049 | + return "Hibernate nack"; | |
7050 | + case 64: | |
7051 | + return "Image exists?"; | |
7052 | + case 65: | |
7053 | + return "Image does exist"; | |
7054 | + case 66: | |
7055 | + return "No image here"; | |
7056 | + case 128: | |
7057 | + return "I/O"; | |
7058 | + case 129: | |
7059 | + return "I/O okay"; | |
7060 | + case 130: | |
7061 | + return "I/O failed"; | |
7062 | + case 256: | |
7063 | + return "Running"; | |
7064 | + default: | |
e999739a | 7065 | + printk(KERN_ERR "Unrecognised message %d.\n", message); |
2380c486 JR |
7066 | + return "Unrecognised message (see dmesg)"; |
7067 | + } | |
7068 | +} | |
7069 | + | |
7070 | +#define MSG_ACK_MASK (MSG_ACK | MSG_NACK) | |
7071 | +#define MSG_STATE_MASK (~MSG_ACK_MASK) | |
7072 | + | |
7073 | +struct node_info { | |
7074 | + struct list_head member_list; | |
7075 | + wait_queue_head_t member_events; | |
7076 | + spinlock_t member_list_lock; | |
7077 | + spinlock_t receive_lock; | |
7078 | + int peer_count, ignored_peer_count; | |
7079 | + struct toi_sysfs_data sysfs_data; | |
7080 | + enum cluster_message current_message; | |
7081 | +}; | |
7082 | + | |
7083 | +struct node_info node_array[MAX_LOCAL_NODES]; | |
7084 | + | |
7085 | +struct cluster_member { | |
7086 | + __be32 addr; | |
7087 | + enum cluster_message message; | |
7088 | + struct list_head list; | |
7089 | + int ignore; | |
7090 | +}; | |
7091 | + | |
7092 | +#define toi_cluster_port_send 3501 | |
7093 | +#define toi_cluster_port_recv 3502 | |
7094 | + | |
7095 | +static struct net_device *net_dev; | |
7096 | +static struct toi_module_ops toi_cluster_ops; | |
7097 | + | |
7098 | +static int toi_recv(struct sk_buff *skb, struct net_device *dev, | |
7099 | + struct packet_type *pt, struct net_device *orig_dev); | |
7100 | + | |
7101 | +static struct packet_type toi_cluster_packet_type = { | |
7102 | + .type = __constant_htons(ETH_P_IP), | |
7103 | + .func = toi_recv, | |
7104 | +}; | |
7105 | + | |
7106 | +struct toi_pkt { /* BOOTP packet format */ | |
7107 | + struct iphdr iph; /* IP header */ | |
7108 | + struct udphdr udph; /* UDP header */ | |
7109 | + u8 htype; /* HW address type */ | |
7110 | + u8 hlen; /* HW address length */ | |
7111 | + __be32 xid; /* Transaction ID */ | |
7112 | + __be16 secs; /* Seconds since we started */ | |
7113 | + __be16 flags; /* Just what it says */ | |
7114 | + u8 hw_addr[16]; /* Sender's HW address */ | |
7115 | + u16 message; /* Message */ | |
7116 | + unsigned long sid; /* Source ID for loopback testing */ | |
7117 | +}; | |
7118 | + | |
7119 | +static char toi_cluster_iface[IFNAMSIZ] = CONFIG_TOI_DEFAULT_CLUSTER_INTERFACE; | |
7120 | + | |
7121 | +static int added_pack; | |
7122 | + | |
7123 | +static int others_have_image; | |
7124 | + | |
7125 | +/* Key used to allow multiple clusters on the same lan */ | |
7126 | +static char toi_cluster_key[32] = CONFIG_TOI_DEFAULT_CLUSTER_KEY; | |
7127 | +static char pre_hibernate_script[255] = | |
7128 | + CONFIG_TOI_DEFAULT_CLUSTER_PRE_HIBERNATE; | |
7129 | +static char post_hibernate_script[255] = | |
7130 | + CONFIG_TOI_DEFAULT_CLUSTER_POST_HIBERNATE; | |
7131 | + | |
7132 | +/* List of cluster members */ | |
7133 | +static unsigned long continue_delay = 5 * HZ; | |
7134 | +static unsigned long cluster_message_timeout = 3 * HZ; | |
7135 | + | |
7136 | +/* === Membership list === */ | |
7137 | + | |
7138 | +static void print_member_info(int index) | |
7139 | +{ | |
7140 | + struct cluster_member *this; | |
7141 | + | |
7142 | + printk(KERN_INFO "==> Dumping node %d.\n", index); | |
7143 | + | |
7144 | + list_for_each_entry(this, &node_array[index].member_list, list) | |
7145 | + printk(KERN_INFO "%d.%d.%d.%d last message %s. %s\n", | |
7146 | + NIPQUAD(this->addr), | |
7147 | + str_message(this->message), | |
7148 | + this->ignore ? "(Ignored)" : ""); | |
7149 | + printk(KERN_INFO "== Done ==\n"); | |
7150 | +} | |
7151 | + | |
7152 | +static struct cluster_member *__find_member(int index, __be32 addr) | |
7153 | +{ | |
7154 | + struct cluster_member *this; | |
7155 | + | |
7156 | + list_for_each_entry(this, &node_array[index].member_list, list) { | |
7157 | + if (this->addr != addr) | |
7158 | + continue; | |
7159 | + | |
7160 | + return this; | |
7161 | + } | |
7162 | + | |
7163 | + return NULL; | |
7164 | +} | |
7165 | + | |
7166 | +static void set_ignore(int index, __be32 addr, struct cluster_member *this) | |
7167 | +{ | |
7168 | + if (this->ignore) { | |
7169 | + PRINTK("Node %d already ignoring %d.%d.%d.%d.\n", | |
7170 | + index, NIPQUAD(addr)); | |
7171 | + return; | |
7172 | + } | |
7173 | + | |
7174 | + PRINTK("Node %d sees node %d.%d.%d.%d now being ignored.\n", | |
7175 | + index, NIPQUAD(addr)); | |
7176 | + this->ignore = 1; | |
7177 | + node_array[index].ignored_peer_count++; | |
7178 | +} | |
7179 | + | |
7180 | +static int __add_update_member(int index, __be32 addr, int message) | |
7181 | +{ | |
7182 | + struct cluster_member *this; | |
7183 | + | |
7184 | + this = __find_member(index, addr); | |
7185 | + if (this) { | |
7186 | + if (this->message != message) { | |
7187 | + this->message = message; | |
7188 | + if ((message & MSG_NACK) && | |
7189 | + (message & (MSG_HIBERNATE | MSG_IMAGE | MSG_IO))) | |
7190 | + set_ignore(index, addr, this); | |
7191 | + PRINTK("Node %d sees node %d.%d.%d.%d now sending " | |
7192 | + "%s.\n", index, NIPQUAD(addr), | |
7193 | + str_message(message)); | |
7194 | + wake_up(&node_array[index].member_events); | |
7195 | + } | |
7196 | + return 0; | |
7197 | + } | |
7198 | + | |
7199 | + this = (struct cluster_member *) toi_kzalloc(36, | |
7200 | + sizeof(struct cluster_member), GFP_KERNEL); | |
7201 | + | |
7202 | + if (!this) | |
7203 | + return -1; | |
7204 | + | |
7205 | + this->addr = addr; | |
7206 | + this->message = message; | |
7207 | + this->ignore = 0; | |
7208 | + INIT_LIST_HEAD(&this->list); | |
7209 | + | |
7210 | + node_array[index].peer_count++; | |
7211 | + | |
7212 | + PRINTK("Node %d sees node %d.%d.%d.%d sending %s.\n", index, | |
7213 | + NIPQUAD(addr), str_message(message)); | |
7214 | + | |
7215 | + if ((message & MSG_NACK) && | |
7216 | + (message & (MSG_HIBERNATE | MSG_IMAGE | MSG_IO))) | |
7217 | + set_ignore(index, addr, this); | |
7218 | + list_add_tail(&this->list, &node_array[index].member_list); | |
7219 | + return 1; | |
7220 | +} | |
7221 | + | |
7222 | +static int add_update_member(int index, __be32 addr, int message) | |
7223 | +{ | |
7224 | + int result; | |
7225 | + unsigned long flags; | |
7226 | + spin_lock_irqsave(&node_array[index].member_list_lock, flags); | |
7227 | + result = __add_update_member(index, addr, message); | |
7228 | + spin_unlock_irqrestore(&node_array[index].member_list_lock, flags); | |
7229 | + | |
7230 | + print_member_info(index); | |
7231 | + | |
7232 | + wake_up(&node_array[index].member_events); | |
7233 | + | |
7234 | + return result; | |
7235 | +} | |
7236 | + | |
7237 | +static void del_member(int index, __be32 addr) | |
7238 | +{ | |
7239 | + struct cluster_member *this; | |
7240 | + unsigned long flags; | |
7241 | + | |
7242 | + spin_lock_irqsave(&node_array[index].member_list_lock, flags); | |
7243 | + this = __find_member(index, addr); | |
7244 | + | |
7245 | + if (this) { | |
7246 | + list_del_init(&this->list); | |
9474138d | 7247 | + toi_kfree(36, this, sizeof(*this)); |
2380c486 JR |
7248 | + node_array[index].peer_count--; |
7249 | + } | |
7250 | + | |
7251 | + spin_unlock_irqrestore(&node_array[index].member_list_lock, flags); | |
7252 | +} | |
7253 | + | |
7254 | +/* === Message transmission === */ | |
7255 | + | |
7256 | +static void toi_send_if(int message, unsigned long my_id); | |
7257 | + | |
7258 | +/* | |
7259 | + * Process received TOI packet. | |
7260 | + */ | |
7261 | +static int toi_recv(struct sk_buff *skb, struct net_device *dev, | |
7262 | + struct packet_type *pt, struct net_device *orig_dev) | |
7263 | +{ | |
7264 | + struct toi_pkt *b; | |
7265 | + struct iphdr *h; | |
7266 | + int len, result, index; | |
7267 | + unsigned long addr, message, ack; | |
7268 | + | |
7269 | + /* Perform verifications before taking the lock. */ | |
7270 | + if (skb->pkt_type == PACKET_OTHERHOST) | |
7271 | + goto drop; | |
7272 | + | |
7273 | + if (dev != net_dev) | |
7274 | + goto drop; | |
7275 | + | |
7276 | + skb = skb_share_check(skb, GFP_ATOMIC); | |
7277 | + if (!skb) | |
7278 | + return NET_RX_DROP; | |
7279 | + | |
7280 | + if (!pskb_may_pull(skb, | |
7281 | + sizeof(struct iphdr) + | |
7282 | + sizeof(struct udphdr))) | |
7283 | + goto drop; | |
7284 | + | |
7285 | + b = (struct toi_pkt *)skb_network_header(skb); | |
7286 | + h = &b->iph; | |
7287 | + | |
7288 | + if (h->ihl != 5 || h->version != 4 || h->protocol != IPPROTO_UDP) | |
7289 | + goto drop; | |
7290 | + | |
7291 | + /* Fragments are not supported */ | |
7292 | + if (h->frag_off & htons(IP_OFFSET | IP_MF)) { | |
7293 | + if (net_ratelimit()) | |
7294 | + printk(KERN_ERR "TuxOnIce: Ignoring fragmented " | |
7295 | + "cluster message.\n"); | |
7296 | + goto drop; | |
7297 | + } | |
7298 | + | |
7299 | + if (skb->len < ntohs(h->tot_len)) | |
7300 | + goto drop; | |
7301 | + | |
7302 | + if (ip_fast_csum((char *) h, h->ihl)) | |
7303 | + goto drop; | |
7304 | + | |
7305 | + if (b->udph.source != htons(toi_cluster_port_send) || | |
7306 | + b->udph.dest != htons(toi_cluster_port_recv)) | |
7307 | + goto drop; | |
7308 | + | |
7309 | + if (ntohs(h->tot_len) < ntohs(b->udph.len) + sizeof(struct iphdr)) | |
7310 | + goto drop; | |
7311 | + | |
7312 | + len = ntohs(b->udph.len) - sizeof(struct udphdr); | |
7313 | + | |
7314 | + /* Ok the front looks good, make sure we can get at the rest. */ | |
7315 | + if (!pskb_may_pull(skb, skb->len)) | |
7316 | + goto drop; | |
7317 | + | |
7318 | + b = (struct toi_pkt *)skb_network_header(skb); | |
7319 | + h = &b->iph; | |
7320 | + | |
7321 | + addr = SADDR; | |
7322 | + PRINTK(">>> Message %s received from " NIPQUAD_FMT ".\n", | |
7323 | + str_message(b->message), NIPQUAD(addr)); | |
7324 | + | |
7325 | + message = b->message & MSG_STATE_MASK; | |
7326 | + ack = b->message & MSG_ACK_MASK; | |
7327 | + | |
7328 | + for (index = 0; index < num_local_nodes; index++) { | |
7329 | + int new_message = node_array[index].current_message, | |
7330 | + old_message = new_message; | |
7331 | + | |
7332 | + if (index == SADDR || !old_message) { | |
7333 | + PRINTK("Ignoring node %d (offline or self).\n", index); | |
7334 | + continue; | |
7335 | + } | |
7336 | + | |
7337 | + /* One message at a time, please. */ | |
7338 | + spin_lock(&node_array[index].receive_lock); | |
7339 | + | |
7340 | + result = add_update_member(index, SADDR, b->message); | |
7341 | + if (result == -1) { | |
7342 | + printk(KERN_INFO "Failed to add new cluster member " | |
7343 | + NIPQUAD_FMT ".\n", | |
7344 | + NIPQUAD(addr)); | |
7345 | + goto drop_unlock; | |
7346 | + } | |
7347 | + | |
7348 | + switch (b->message & MSG_STATE_MASK) { | |
7349 | + case MSG_PING: | |
7350 | + break; | |
7351 | + case MSG_ABORT: | |
7352 | + break; | |
7353 | + case MSG_BYE: | |
7354 | + break; | |
7355 | + case MSG_HIBERNATE: | |
7356 | + /* Can I hibernate? */ | |
7357 | + new_message = MSG_HIBERNATE | | |
7358 | + ((index & 1) ? MSG_NACK : MSG_ACK); | |
7359 | + break; | |
7360 | + case MSG_IMAGE: | |
7361 | + /* Can I resume? */ | |
7362 | + new_message = MSG_IMAGE | | |
7363 | + ((index & 1) ? MSG_NACK : MSG_ACK); | |
7364 | + if (new_message != old_message) | |
e999739a | 7365 | + printk(KERN_ERR "Setting whether I can resume " |
7366 | + "to %d.\n", new_message); | |
2380c486 JR |
7367 | + break; |
7368 | + case MSG_IO: | |
7369 | + new_message = MSG_IO | MSG_ACK; | |
7370 | + break; | |
7371 | + case MSG_RUNNING: | |
7372 | + break; | |
7373 | + default: | |
7374 | + if (net_ratelimit()) | |
7375 | + printk(KERN_ERR "Unrecognised TuxOnIce cluster" | |
7376 | + " message %d from " NIPQUAD_FMT ".\n", | |
7377 | + b->message, NIPQUAD(addr)); | |
7378 | + }; | |
7379 | + | |
7380 | + if (old_message != new_message) { | |
7381 | + node_array[index].current_message = new_message; | |
7382 | + printk(KERN_INFO ">>> Sending new message for node " | |
7383 | + "%d.\n", index); | |
7384 | + toi_send_if(new_message, index); | |
7385 | + } else if (!ack) { | |
7386 | + printk(KERN_INFO ">>> Resending message for node %d.\n", | |
7387 | + index); | |
7388 | + toi_send_if(new_message, index); | |
7389 | + } | |
7390 | +drop_unlock: | |
7391 | + spin_unlock(&node_array[index].receive_lock); | |
7392 | + }; | |
7393 | + | |
7394 | +drop: | |
7395 | + /* Throw the packet out. */ | |
7396 | + kfree_skb(skb); | |
7397 | + | |
7398 | + return 0; | |
7399 | +} | |
7400 | + | |
7401 | +/* | |
7402 | + * Send cluster message to single interface. | |
7403 | + */ | |
7404 | +static void toi_send_if(int message, unsigned long my_id) | |
7405 | +{ | |
7406 | + struct sk_buff *skb; | |
7407 | + struct toi_pkt *b; | |
7408 | + int hh_len = LL_RESERVED_SPACE(net_dev); | |
7409 | + struct iphdr *h; | |
7410 | + | |
7411 | + /* Allocate packet */ | |
7412 | + skb = alloc_skb(sizeof(struct toi_pkt) + hh_len + 15, GFP_KERNEL); | |
7413 | + if (!skb) | |
7414 | + return; | |
7415 | + skb_reserve(skb, hh_len); | |
7416 | + b = (struct toi_pkt *) skb_put(skb, sizeof(struct toi_pkt)); | |
7417 | + memset(b, 0, sizeof(struct toi_pkt)); | |
7418 | + | |
7419 | + /* Construct IP header */ | |
7420 | + skb_reset_network_header(skb); | |
7421 | + h = ip_hdr(skb); | |
7422 | + h->version = 4; | |
7423 | + h->ihl = 5; | |
7424 | + h->tot_len = htons(sizeof(struct toi_pkt)); | |
7425 | + h->frag_off = htons(IP_DF); | |
7426 | + h->ttl = 64; | |
7427 | + h->protocol = IPPROTO_UDP; | |
7428 | + h->daddr = htonl(INADDR_BROADCAST); | |
7429 | + h->check = ip_fast_csum((unsigned char *) h, h->ihl); | |
7430 | + | |
7431 | + /* Construct UDP header */ | |
7432 | + b->udph.source = htons(toi_cluster_port_send); | |
7433 | + b->udph.dest = htons(toi_cluster_port_recv); | |
7434 | + b->udph.len = htons(sizeof(struct toi_pkt) - sizeof(struct iphdr)); | |
7435 | + /* UDP checksum not calculated -- explicitly allowed in BOOTP RFC */ | |
7436 | + | |
7437 | + /* Construct message */ | |
7438 | + b->message = message; | |
7439 | + b->sid = my_id; | |
7440 | + b->htype = net_dev->type; /* can cause undefined behavior */ | |
7441 | + b->hlen = net_dev->addr_len; | |
7442 | + memcpy(b->hw_addr, net_dev->dev_addr, net_dev->addr_len); | |
7443 | + b->secs = htons(3); /* 3 seconds */ | |
7444 | + | |
7445 | + /* Chain packet down the line... */ | |
7446 | + skb->dev = net_dev; | |
7447 | + skb->protocol = htons(ETH_P_IP); | |
7448 | + if ((dev_hard_header(skb, net_dev, ntohs(skb->protocol), | |
7449 | + net_dev->broadcast, net_dev->dev_addr, skb->len) < 0) || | |
7450 | + dev_queue_xmit(skb) < 0) | |
7451 | + printk(KERN_INFO "E"); | |
7452 | +} | |
7453 | + | |
7454 | +/* ========================================= */ | |
7455 | + | |
7456 | +/* kTOICluster */ | |
7457 | + | |
7458 | +static atomic_t num_cluster_threads; | |
7459 | +static DECLARE_WAIT_QUEUE_HEAD(clusterd_events); | |
7460 | + | |
7461 | +static int kTOICluster(void *data) | |
7462 | +{ | |
7463 | + unsigned long my_id; | |
7464 | + | |
7465 | + my_id = atomic_add_return(1, &num_cluster_threads) - 1; | |
7466 | + node_array[my_id].current_message = (unsigned long) data; | |
7467 | + | |
7468 | + PRINTK("kTOICluster daemon %lu starting.\n", my_id); | |
7469 | + | |
7470 | + current->flags |= PF_NOFREEZE; | |
7471 | + | |
7472 | + while (node_array[my_id].current_message) { | |
7473 | + toi_send_if(node_array[my_id].current_message, my_id); | |
7474 | + sleep_on_timeout(&clusterd_events, | |
7475 | + cluster_message_timeout); | |
7476 | + PRINTK("Link state %lu is %d.\n", my_id, | |
7477 | + node_array[my_id].current_message); | |
7478 | + } | |
7479 | + | |
7480 | + toi_send_if(MSG_BYE, my_id); | |
7481 | + atomic_dec(&num_cluster_threads); | |
7482 | + wake_up(&clusterd_events); | |
7483 | + | |
7484 | + PRINTK("kTOICluster daemon %lu exiting.\n", my_id); | |
7485 | + __set_current_state(TASK_RUNNING); | |
7486 | + return 0; | |
7487 | +} | |
7488 | + | |
7489 | +static void kill_clusterd(void) | |
7490 | +{ | |
7491 | + int i; | |
7492 | + | |
7493 | + for (i = 0; i < num_local_nodes; i++) { | |
7494 | + if (node_array[i].current_message) { | |
7495 | + PRINTK("Seeking to kill clusterd %d.\n", i); | |
7496 | + node_array[i].current_message = 0; | |
7497 | + } | |
7498 | + } | |
7499 | + wait_event(clusterd_events, | |
7500 | + !atomic_read(&num_cluster_threads)); | |
7501 | + PRINTK("All cluster daemons have exited.\n"); | |
7502 | +} | |
7503 | + | |
7504 | +static int peers_not_in_message(int index, int message, int precise) | |
7505 | +{ | |
7506 | + struct cluster_member *this; | |
7507 | + unsigned long flags; | |
7508 | + int result = 0; | |
7509 | + | |
7510 | + spin_lock_irqsave(&node_array[index].member_list_lock, flags); | |
7511 | + list_for_each_entry(this, &node_array[index].member_list, list) { | |
7512 | + if (this->ignore) | |
7513 | + continue; | |
7514 | + | |
7515 | + PRINTK("Peer %d.%d.%d.%d sending %s. " | |
7516 | + "Seeking %s.\n", | |
7517 | + NIPQUAD(this->addr), | |
7518 | + str_message(this->message), str_message(message)); | |
7519 | + if ((precise ? this->message : | |
7520 | + this->message & MSG_STATE_MASK) != | |
7521 | + message) | |
7522 | + result++; | |
7523 | + } | |
7524 | + spin_unlock_irqrestore(&node_array[index].member_list_lock, flags); | |
7525 | + PRINTK("%d peers in sought message.\n", result); | |
7526 | + return result; | |
7527 | +} | |
7528 | + | |
7529 | +static void reset_ignored(int index) | |
7530 | +{ | |
7531 | + struct cluster_member *this; | |
7532 | + unsigned long flags; | |
7533 | + | |
7534 | + spin_lock_irqsave(&node_array[index].member_list_lock, flags); | |
7535 | + list_for_each_entry(this, &node_array[index].member_list, list) | |
7536 | + this->ignore = 0; | |
7537 | + node_array[index].ignored_peer_count = 0; | |
7538 | + spin_unlock_irqrestore(&node_array[index].member_list_lock, flags); | |
7539 | +} | |
7540 | + | |
7541 | +static int peers_in_message(int index, int message, int precise) | |
7542 | +{ | |
7543 | + return node_array[index].peer_count - | |
7544 | + node_array[index].ignored_peer_count - | |
7545 | + peers_not_in_message(index, message, precise); | |
7546 | +} | |
7547 | + | |
7548 | +static int time_to_continue(int index, unsigned long start, int message) | |
7549 | +{ | |
7550 | + int first = peers_not_in_message(index, message, 0); | |
7551 | + int second = peers_in_message(index, message, 1); | |
7552 | + | |
7553 | + PRINTK("First part returns %d, second returns %d.\n", first, second); | |
7554 | + | |
7555 | + if (!first && !second) { | |
7556 | + PRINTK("All peers answered message %d.\n", | |
7557 | + message); | |
7558 | + return 1; | |
7559 | + } | |
7560 | + | |
7561 | + if (time_after(jiffies, start + continue_delay)) { | |
7562 | + PRINTK("Timeout reached.\n"); | |
7563 | + return 1; | |
7564 | + } | |
7565 | + | |
7566 | + PRINTK("Not time to continue yet (%lu < %lu).\n", jiffies, | |
7567 | + start + continue_delay); | |
7568 | + return 0; | |
7569 | +} | |
7570 | + | |
7571 | +void toi_initiate_cluster_hibernate(void) | |
7572 | +{ | |
7573 | + int result; | |
7574 | + unsigned long start; | |
7575 | + | |
7576 | + result = do_toi_step(STEP_HIBERNATE_PREPARE_IMAGE); | |
7577 | + if (result) | |
7578 | + return; | |
7579 | + | |
7580 | + toi_send_if(MSG_HIBERNATE, 0); | |
7581 | + | |
7582 | + start = jiffies; | |
7583 | + wait_event(node_array[0].member_events, | |
7584 | + time_to_continue(0, start, MSG_HIBERNATE)); | |
7585 | + | |
7586 | + if (test_action_state(TOI_FREEZER_TEST)) { | |
7587 | + toi_send_if(MSG_ABORT, 0); | |
7588 | + | |
7589 | + start = jiffies; | |
7590 | + wait_event(node_array[0].member_events, | |
7591 | + time_to_continue(0, start, MSG_RUNNING)); | |
7592 | + | |
7593 | + do_toi_step(STEP_QUIET_CLEANUP); | |
7594 | + return; | |
7595 | + } | |
7596 | + | |
7597 | + toi_send_if(MSG_IO, 0); | |
7598 | + | |
7599 | + result = do_toi_step(STEP_HIBERNATE_SAVE_IMAGE); | |
7600 | + if (result) | |
7601 | + return; | |
7602 | + | |
7603 | + /* This code runs at resume time too! */ | |
7604 | + if (toi_in_hibernate) | |
7605 | + result = do_toi_step(STEP_HIBERNATE_POWERDOWN); | |
7606 | +} | |
7607 | +EXPORT_SYMBOL_GPL(toi_initiate_cluster_hibernate); | |
7608 | + | |
7609 | +/* toi_cluster_print_debug_stats | |
7610 | + * | |
7611 | + * Description: Print information to be recorded for debugging purposes into a | |
7612 | + * buffer. | |
7613 | + * Arguments: buffer: Pointer to a buffer into which the debug info will be | |
7614 | + * printed. | |
7615 | + * size: Size of the buffer. | |
7616 | + * Returns: Number of characters written to the buffer. | |
7617 | + */ | |
7618 | +static int toi_cluster_print_debug_stats(char *buffer, int size) | |
7619 | +{ | |
7620 | + int len; | |
7621 | + | |
7622 | + if (strlen(toi_cluster_iface)) | |
7623 | + len = scnprintf(buffer, size, | |
7624 | + "- Cluster interface is '%s'.\n", | |
7625 | + toi_cluster_iface); | |
7626 | + else | |
7627 | + len = scnprintf(buffer, size, | |
7628 | + "- Cluster support is disabled.\n"); | |
7629 | + return len; | |
7630 | +} | |
7631 | + | |
7632 | +/* cluster_memory_needed | |
7633 | + * | |
7634 | + * Description: Tell the caller how much memory we need to operate during | |
7635 | + * hibernate/resume. | |
7636 | + * Returns: Unsigned long. Maximum number of bytes of memory required for | |
7637 | + * operation. | |
7638 | + */ | |
7639 | +static int toi_cluster_memory_needed(void) | |
7640 | +{ | |
7641 | + return 0; | |
7642 | +} | |
7643 | + | |
7644 | +static int toi_cluster_storage_needed(void) | |
7645 | +{ | |
7646 | + return 1 + strlen(toi_cluster_iface); | |
7647 | +} | |
7648 | + | |
7649 | +/* toi_cluster_save_config_info | |
7650 | + * | |
7651 | + * Description: Save informaton needed when reloading the image at resume time. | |
7652 | + * Arguments: Buffer: Pointer to a buffer of size PAGE_SIZE. | |
7653 | + * Returns: Number of bytes used for saving our data. | |
7654 | + */ | |
7655 | +static int toi_cluster_save_config_info(char *buffer) | |
7656 | +{ | |
7657 | + strcpy(buffer, toi_cluster_iface); | |
7658 | + return strlen(toi_cluster_iface + 1); | |
7659 | +} | |
7660 | + | |
7661 | +/* toi_cluster_load_config_info | |
7662 | + * | |
7663 | + * Description: Reload information needed for declustering the image at | |
7664 | + * resume time. | |
7665 | + * Arguments: Buffer: Pointer to the start of the data. | |
7666 | + * Size: Number of bytes that were saved. | |
7667 | + */ | |
7668 | +static void toi_cluster_load_config_info(char *buffer, int size) | |
7669 | +{ | |
7670 | + strncpy(toi_cluster_iface, buffer, size); | |
7671 | + return; | |
7672 | +} | |
7673 | + | |
7674 | +static void cluster_startup(void) | |
7675 | +{ | |
7676 | + int have_image = do_check_can_resume(), i; | |
7677 | + unsigned long start = jiffies, initial_message; | |
7678 | + struct task_struct *p; | |
7679 | + | |
7680 | + initial_message = MSG_IMAGE; | |
7681 | + | |
7682 | + have_image = 1; | |
7683 | + | |
7684 | + for (i = 0; i < num_local_nodes; i++) { | |
7685 | + PRINTK("Starting ktoiclusterd %d.\n", i); | |
7686 | + p = kthread_create(kTOICluster, (void *) initial_message, | |
7687 | + "ktoiclusterd/%d", i); | |
7688 | + if (IS_ERR(p)) { | |
e999739a | 7689 | + printk(KERN_ERR "Failed to start ktoiclusterd.\n"); |
2380c486 JR |
7690 | + return; |
7691 | + } | |
7692 | + | |
7693 | + wake_up_process(p); | |
7694 | + } | |
7695 | + | |
7696 | + /* Wait for delay or someone else sending first message */ | |
7697 | + wait_event(node_array[0].member_events, time_to_continue(0, start, | |
7698 | + MSG_IMAGE)); | |
7699 | + | |
7700 | + others_have_image = peers_in_message(0, MSG_IMAGE | MSG_ACK, 1); | |
7701 | + | |
7702 | + printk(KERN_INFO "Continuing. I %shave an image. Peers with image:" | |
7703 | + " %d.\n", have_image ? "" : "don't ", others_have_image); | |
7704 | + | |
7705 | + if (have_image) { | |
7706 | + int result; | |
7707 | + | |
7708 | + /* Start to resume */ | |
7709 | + printk(KERN_INFO " === Starting to resume === \n"); | |
7710 | + node_array[0].current_message = MSG_IO; | |
7711 | + toi_send_if(MSG_IO, 0); | |
7712 | + | |
7713 | + /* result = do_toi_step(STEP_RESUME_LOAD_PS1); */ | |
7714 | + result = 0; | |
7715 | + | |
7716 | + if (!result) { | |
7717 | + /* | |
7718 | + * Atomic restore - we'll come back in the hibernation | |
7719 | + * path. | |
7720 | + */ | |
7721 | + | |
7722 | + /* result = do_toi_step(STEP_RESUME_DO_RESTORE); */ | |
7723 | + result = 0; | |
7724 | + | |
7725 | + /* do_toi_step(STEP_QUIET_CLEANUP); */ | |
7726 | + } | |
7727 | + | |
7728 | + node_array[0].current_message |= MSG_NACK; | |
7729 | + | |
7730 | + /* For debugging - disable for real life? */ | |
7731 | + wait_event(node_array[0].member_events, | |
7732 | + time_to_continue(0, start, MSG_IO)); | |
7733 | + } | |
7734 | + | |
7735 | + if (others_have_image) { | |
7736 | + /* Wait for them to resume */ | |
7737 | + printk(KERN_INFO "Waiting for other nodes to resume.\n"); | |
7738 | + start = jiffies; | |
7739 | + wait_event(node_array[0].member_events, | |
7740 | + time_to_continue(0, start, MSG_RUNNING)); | |
7741 | + if (peers_not_in_message(0, MSG_RUNNING, 0)) | |
7742 | + printk(KERN_INFO "Timed out while waiting for other " | |
7743 | + "nodes to resume.\n"); | |
7744 | + } | |
7745 | + | |
7746 | + /* Find out whether an image exists here. Send ACK_IMAGE or NACK_IMAGE | |
7747 | + * as appropriate. | |
7748 | + * | |
7749 | + * If we don't have an image: | |
7750 | + * - Wait until someone else says they have one, or conditions are met | |
7751 | + * for continuing to boot (n machines or t seconds). | |
7752 | + * - If anyone has an image, wait for them to resume before continuing | |
7753 | + * to boot. | |
7754 | + * | |
7755 | + * If we have an image: | |
7756 | + * - Wait until conditions are met before continuing to resume (n | |
7757 | + * machines or t seconds). Send RESUME_PREP and freeze processes. | |
7758 | + * NACK_PREP if freezing fails (shouldn't) and follow logic for | |
7759 | + * us having no image above. On success, wait for [N]ACK_PREP from | |
7760 | + * other machines. Read image (including atomic restore) until done. | |
7761 | + * Wait for ACK_READ from others (should never fail). Thaw processes | |
7762 | + * and do post-resume. (The section after the atomic restore is done | |
7763 | + * via the code for hibernating). | |
7764 | + */ | |
7765 | + | |
7766 | + node_array[0].current_message = MSG_RUNNING; | |
7767 | +} | |
7768 | + | |
7769 | +/* toi_cluster_open_iface | |
7770 | + * | |
7771 | + * Description: Prepare to use an interface. | |
7772 | + */ | |
7773 | + | |
7774 | +static int toi_cluster_open_iface(void) | |
7775 | +{ | |
7776 | + struct net_device *dev; | |
7777 | + | |
7778 | + rtnl_lock(); | |
7779 | + | |
7780 | + for_each_netdev(&init_net, dev) { | |
7781 | + if (/* dev == &init_net.loopback_dev || */ | |
7782 | + strcmp(dev->name, toi_cluster_iface)) | |
7783 | + continue; | |
7784 | + | |
7785 | + net_dev = dev; | |
7786 | + break; | |
7787 | + } | |
7788 | + | |
7789 | + rtnl_unlock(); | |
7790 | + | |
7791 | + if (!net_dev) { | |
7792 | + printk(KERN_ERR MYNAME ": Device %s not found.\n", | |
7793 | + toi_cluster_iface); | |
7794 | + return -ENODEV; | |
7795 | + } | |
7796 | + | |
7797 | + dev_add_pack(&toi_cluster_packet_type); | |
7798 | + added_pack = 1; | |
7799 | + | |
7800 | + loopback_mode = (net_dev == init_net.loopback_dev); | |
7801 | + num_local_nodes = loopback_mode ? 8 : 1; | |
7802 | + | |
7803 | + PRINTK("Loopback mode is %s. Number of local nodes is %d.\n", | |
7804 | + loopback_mode ? "on" : "off", num_local_nodes); | |
7805 | + | |
7806 | + cluster_startup(); | |
7807 | + return 0; | |
7808 | +} | |
7809 | + | |
7810 | +/* toi_cluster_close_iface | |
7811 | + * | |
7812 | + * Description: Stop using an interface. | |
7813 | + */ | |
7814 | + | |
7815 | +static int toi_cluster_close_iface(void) | |
7816 | +{ | |
7817 | + kill_clusterd(); | |
7818 | + if (added_pack) { | |
7819 | + dev_remove_pack(&toi_cluster_packet_type); | |
7820 | + added_pack = 0; | |
7821 | + } | |
7822 | + return 0; | |
7823 | +} | |
7824 | + | |
7825 | +static void write_side_effect(void) | |
7826 | +{ | |
7827 | + if (toi_cluster_ops.enabled) { | |
7828 | + toi_cluster_open_iface(); | |
7829 | + set_toi_state(TOI_CLUSTER_MODE); | |
7830 | + } else { | |
7831 | + toi_cluster_close_iface(); | |
7832 | + clear_toi_state(TOI_CLUSTER_MODE); | |
7833 | + } | |
7834 | +} | |
7835 | + | |
7836 | +static void node_write_side_effect(void) | |
7837 | +{ | |
7838 | +} | |
7839 | + | |
7840 | +/* | |
7841 | + * data for our sysfs entries. | |
7842 | + */ | |
7843 | +static struct toi_sysfs_data sysfs_params[] = { | |
7844 | + SYSFS_STRING("interface", SYSFS_RW, toi_cluster_iface, IFNAMSIZ, 0, | |
7845 | + NULL), | |
7846 | + SYSFS_INT("enabled", SYSFS_RW, &toi_cluster_ops.enabled, 0, 1, 0, | |
7847 | + write_side_effect), | |
7848 | + SYSFS_STRING("cluster_name", SYSFS_RW, toi_cluster_key, 32, 0, NULL), | |
7849 | + SYSFS_STRING("pre-hibernate-script", SYSFS_RW, pre_hibernate_script, | |
7850 | + 256, 0, NULL), | |
7851 | + SYSFS_STRING("post-hibernate-script", SYSFS_RW, post_hibernate_script, | |
7852 | + 256, 0, STRING), | |
7853 | + SYSFS_UL("continue_delay", SYSFS_RW, &continue_delay, HZ / 2, 60 * HZ, | |
7854 | + 0) | |
7855 | +}; | |
7856 | + | |
7857 | +/* | |
7858 | + * Ops structure. | |
7859 | + */ | |
7860 | + | |
7861 | +static struct toi_module_ops toi_cluster_ops = { | |
7862 | + .type = FILTER_MODULE, | |
7863 | + .name = "Cluster", | |
7864 | + .directory = "cluster", | |
7865 | + .module = THIS_MODULE, | |
7866 | + .memory_needed = toi_cluster_memory_needed, | |
7867 | + .print_debug_info = toi_cluster_print_debug_stats, | |
7868 | + .save_config_info = toi_cluster_save_config_info, | |
7869 | + .load_config_info = toi_cluster_load_config_info, | |
7870 | + .storage_needed = toi_cluster_storage_needed, | |
7871 | + | |
7872 | + .sysfs_data = sysfs_params, | |
7873 | + .num_sysfs_entries = sizeof(sysfs_params) / | |
7874 | + sizeof(struct toi_sysfs_data), | |
7875 | +}; | |
7876 | + | |
7877 | +/* ---- Registration ---- */ | |
7878 | + | |
7879 | +#ifdef MODULE | |
7880 | +#define INIT static __init | |
7881 | +#define EXIT static __exit | |
7882 | +#else | |
7883 | +#define INIT | |
7884 | +#define EXIT | |
7885 | +#endif | |
7886 | + | |
7887 | +INIT int toi_cluster_init(void) | |
7888 | +{ | |
7889 | + int temp = toi_register_module(&toi_cluster_ops), i; | |
7890 | + struct kobject *kobj = toi_cluster_ops.dir_kobj; | |
7891 | + | |
7892 | + for (i = 0; i < MAX_LOCAL_NODES; i++) { | |
7893 | + node_array[i].current_message = 0; | |
7894 | + INIT_LIST_HEAD(&node_array[i].member_list); | |
7895 | + init_waitqueue_head(&node_array[i].member_events); | |
7896 | + spin_lock_init(&node_array[i].member_list_lock); | |
7897 | + spin_lock_init(&node_array[i].receive_lock); | |
7898 | + | |
7899 | + /* Set up sysfs entry */ | |
7900 | + node_array[i].sysfs_data.attr.name = toi_kzalloc(8, | |
7901 | + sizeof(node_array[i].sysfs_data.attr.name), | |
7902 | + GFP_KERNEL); | |
7903 | + sprintf((char *) node_array[i].sysfs_data.attr.name, "node_%d", | |
7904 | + i); | |
7905 | + node_array[i].sysfs_data.attr.mode = SYSFS_RW; | |
7906 | + node_array[i].sysfs_data.type = TOI_SYSFS_DATA_INTEGER; | |
7907 | + node_array[i].sysfs_data.flags = 0; | |
7908 | + node_array[i].sysfs_data.data.integer.variable = | |
7909 | + (int *) &node_array[i].current_message; | |
7910 | + node_array[i].sysfs_data.data.integer.minimum = 0; | |
7911 | + node_array[i].sysfs_data.data.integer.maximum = INT_MAX; | |
7912 | + node_array[i].sysfs_data.write_side_effect = | |
7913 | + node_write_side_effect; | |
7914 | + toi_register_sysfs_file(kobj, &node_array[i].sysfs_data); | |
7915 | + } | |
7916 | + | |
7917 | + toi_cluster_ops.enabled = (strlen(toi_cluster_iface) > 0); | |
7918 | + | |
7919 | + if (toi_cluster_ops.enabled) | |
7920 | + toi_cluster_open_iface(); | |
7921 | + | |
7922 | + return temp; | |
7923 | +} | |
7924 | + | |
7925 | +EXIT void toi_cluster_exit(void) | |
7926 | +{ | |
7927 | + int i; | |
7928 | + toi_cluster_close_iface(); | |
7929 | + | |
7930 | + for (i = 0; i < MAX_LOCAL_NODES; i++) | |
7931 | + toi_unregister_sysfs_file(toi_cluster_ops.dir_kobj, | |
7932 | + &node_array[i].sysfs_data); | |
7933 | + toi_unregister_module(&toi_cluster_ops); | |
7934 | +} | |
7935 | + | |
7936 | +static int __init toi_cluster_iface_setup(char *iface) | |
7937 | +{ | |
7938 | + toi_cluster_ops.enabled = (*iface && | |
7939 | + strcmp(iface, "off")); | |
7940 | + | |
7941 | + if (toi_cluster_ops.enabled) | |
7942 | + strncpy(toi_cluster_iface, iface, strlen(iface)); | |
7943 | +} | |
7944 | + | |
7945 | +__setup("toi_cluster=", toi_cluster_iface_setup); | |
7946 | + | |
7947 | +#ifdef MODULE | |
7948 | +MODULE_LICENSE("GPL"); | |
7949 | +module_init(toi_cluster_init); | |
7950 | +module_exit(toi_cluster_exit); | |
7951 | +MODULE_AUTHOR("Nigel Cunningham"); | |
7952 | +MODULE_DESCRIPTION("Cluster Support for TuxOnIce"); | |
7953 | +#endif | |
7954 | diff --git a/kernel/power/tuxonice_cluster.h b/kernel/power/tuxonice_cluster.h | |
7955 | new file mode 100644 | |
7956 | index 0000000..b0f8918 | |
7957 | --- /dev/null | |
7958 | +++ b/kernel/power/tuxonice_cluster.h | |
7959 | @@ -0,0 +1,19 @@ | |
7960 | +/* | |
7961 | + * kernel/power/tuxonice_cluster.h | |
7962 | + * | |
7963 | + * Copyright (C) 2006-2008 Nigel Cunningham (nigel at tuxonice net) | |
7964 | + * Copyright (C) 2006 Red Hat, inc. | |
7965 | + * | |
7966 | + * This file is released under the GPLv2. | |
7967 | + */ | |
7968 | + | |
7969 | +#ifdef CONFIG_TOI_CLUSTER | |
7970 | +extern int toi_cluster_init(void); | |
7971 | +extern void toi_cluster_exit(void); | |
7972 | +extern void toi_initiate_cluster_hibernate(void); | |
7973 | +#else | |
7974 | +static inline int toi_cluster_init(void) { return 0; } | |
7975 | +static inline void toi_cluster_exit(void) { } | |
7976 | +static inline void toi_initiate_cluster_hibernate(void) { } | |
7977 | +#endif | |
7978 | + | |
7979 | diff --git a/kernel/power/tuxonice_compress.c b/kernel/power/tuxonice_compress.c | |
7980 | new file mode 100644 | |
9474138d | 7981 | index 0000000..8acdf65 |
2380c486 JR |
7982 | --- /dev/null |
7983 | +++ b/kernel/power/tuxonice_compress.c | |
9474138d | 7984 | @@ -0,0 +1,447 @@ |
2380c486 JR |
7985 | +/* |
7986 | + * kernel/power/compression.c | |
7987 | + * | |
7988 | + * Copyright (C) 2003-2008 Nigel Cunningham (nigel at tuxonice net) | |
7989 | + * | |
7990 | + * This file is released under the GPLv2. | |
7991 | + * | |
7992 | + * This file contains data compression routines for TuxOnIce, | |
7993 | + * using cryptoapi. | |
7994 | + */ | |
7995 | + | |
2380c486 JR |
7996 | +#include <linux/suspend.h> |
7997 | +#include <linux/highmem.h> | |
7998 | +#include <linux/vmalloc.h> | |
7999 | +#include <linux/crypto.h> | |
8000 | + | |
8001 | +#include "tuxonice_builtin.h" | |
8002 | +#include "tuxonice.h" | |
8003 | +#include "tuxonice_modules.h" | |
8004 | +#include "tuxonice_sysfs.h" | |
8005 | +#include "tuxonice_io.h" | |
8006 | +#include "tuxonice_ui.h" | |
8007 | +#include "tuxonice_alloc.h" | |
8008 | + | |
8009 | +static int toi_expected_compression; | |
8010 | + | |
8011 | +static struct toi_module_ops toi_compression_ops; | |
8012 | +static struct toi_module_ops *next_driver; | |
8013 | + | |
0ada99ac | 8014 | +static char toi_compressor_name[32] = "lzo"; |
2380c486 JR |
8015 | + |
8016 | +static DEFINE_MUTEX(stats_lock); | |
8017 | + | |
8018 | +struct cpu_context { | |
8019 | + u8 *page_buffer; | |
8020 | + struct crypto_comp *transform; | |
8021 | + unsigned int len; | |
8022 | + char *buffer_start; | |
0ada99ac | 8023 | + char *output_buffer; |
2380c486 JR |
8024 | +}; |
8025 | + | |
8026 | +static DEFINE_PER_CPU(struct cpu_context, contexts); | |
8027 | + | |
8028 | +static int toi_compress_prepare_result; | |
8029 | + | |
8030 | +/* | |
8031 | + * toi_compress_cleanup | |
8032 | + * | |
8033 | + * Frees memory allocated for our labours. | |
8034 | + */ | |
8035 | +static void toi_compress_cleanup(int toi_or_resume) | |
8036 | +{ | |
8037 | + int cpu; | |
8038 | + | |
8039 | + if (!toi_or_resume) | |
8040 | + return; | |
8041 | + | |
8042 | + for_each_online_cpu(cpu) { | |
8043 | + struct cpu_context *this = &per_cpu(contexts, cpu); | |
8044 | + if (this->transform) { | |
8045 | + crypto_free_comp(this->transform); | |
8046 | + this->transform = NULL; | |
8047 | + } | |
8048 | + | |
8049 | + if (this->page_buffer) | |
8050 | + toi_free_page(16, (unsigned long) this->page_buffer); | |
8051 | + | |
8052 | + this->page_buffer = NULL; | |
0ada99ac | 8053 | + |
8054 | + if (this->output_buffer) | |
8055 | + vfree(this->output_buffer); | |
8056 | + | |
8057 | + this->output_buffer = NULL; | |
2380c486 JR |
8058 | + } |
8059 | +} | |
8060 | + | |
8061 | +/* | |
8062 | + * toi_crypto_prepare | |
8063 | + * | |
8064 | + * Prepare to do some work by allocating buffers and transforms. | |
8065 | + */ | |
8066 | +static int toi_compress_crypto_prepare(void) | |
8067 | +{ | |
8068 | + int cpu; | |
8069 | + | |
8070 | + if (!*toi_compressor_name) { | |
8071 | + printk(KERN_INFO "TuxOnIce: Compression enabled but no " | |
8072 | + "compressor name set.\n"); | |
8073 | + return 1; | |
8074 | + } | |
8075 | + | |
8076 | + for_each_online_cpu(cpu) { | |
8077 | + struct cpu_context *this = &per_cpu(contexts, cpu); | |
8078 | + this->transform = crypto_alloc_comp(toi_compressor_name, 0, 0); | |
8079 | + if (IS_ERR(this->transform)) { | |
8080 | + printk(KERN_INFO "TuxOnIce: Failed to initialise the " | |
8081 | + "%s compression transform.\n", | |
8082 | + toi_compressor_name); | |
8083 | + this->transform = NULL; | |
8084 | + return 1; | |
8085 | + } | |
8086 | + | |
8087 | + this->page_buffer = | |
8088 | + (char *) toi_get_zeroed_page(16, TOI_ATOMIC_GFP); | |
8089 | + | |
8090 | + if (!this->page_buffer) { | |
8091 | + printk(KERN_ERR | |
8092 | + "Failed to allocate a page buffer for TuxOnIce " | |
0ada99ac | 8093 | + "compression driver.\n"); |
2380c486 JR |
8094 | + return -ENOMEM; |
8095 | + } | |
0ada99ac | 8096 | + |
8097 | + this->output_buffer = | |
8098 | + (char *) vmalloc_32(2 * PAGE_SIZE); | |
8099 | + | |
8100 | + if (!this->output_buffer) { | |
8101 | + printk(KERN_ERR | |
8102 | + "Failed to allocate a output buffer for TuxOnIce " | |
8103 | + "compression driver.\n"); | |
8104 | + return -ENOMEM; | |
8105 | + } | |
8106 | + | |
2380c486 JR |
8107 | + } |
8108 | + | |
8109 | + return 0; | |
8110 | +} | |
8111 | + | |
8112 | +/* | |
8113 | + * toi_compress_init | |
8114 | + */ | |
8115 | + | |
8116 | +static int toi_compress_init(int toi_or_resume) | |
8117 | +{ | |
8118 | + if (!toi_or_resume) | |
8119 | + return 0; | |
8120 | + | |
8121 | + toi_compress_bytes_in = 0; | |
8122 | + toi_compress_bytes_out = 0; | |
8123 | + | |
8124 | + next_driver = toi_get_next_filter(&toi_compression_ops); | |
8125 | + | |
8126 | + if (!next_driver) | |
8127 | + return -ECHILD; | |
8128 | + | |
8129 | + toi_compress_prepare_result = toi_compress_crypto_prepare(); | |
8130 | + | |
8131 | + return 0; | |
8132 | +} | |
8133 | + | |
8134 | +/* | |
8135 | + * toi_compress_rw_init() | |
8136 | + */ | |
8137 | + | |
8138 | +static int toi_compress_rw_init(int rw, int stream_number) | |
8139 | +{ | |
8140 | + if (toi_compress_prepare_result) { | |
8141 | + printk(KERN_ERR "Failed to initialise compression " | |
8142 | + "algorithm.\n"); | |
8143 | + if (rw == READ) { | |
8144 | + printk(KERN_INFO "Unable to read the image.\n"); | |
8145 | + return -ENODEV; | |
8146 | + } else { | |
8147 | + printk(KERN_INFO "Continuing without " | |
8148 | + "compressing the image.\n"); | |
8149 | + toi_compression_ops.enabled = 0; | |
8150 | + } | |
8151 | + } | |
8152 | + | |
8153 | + return 0; | |
8154 | +} | |
8155 | + | |
8156 | +/* | |
8157 | + * toi_compress_write_page() | |
8158 | + * | |
8159 | + * Compress a page of data, buffering output and passing on filled | |
8160 | + * pages to the next module in the pipeline. | |
8161 | + * | |
8162 | + * Buffer_page: Pointer to a buffer of size PAGE_SIZE, containing | |
8163 | + * data to be compressed. | |
8164 | + * | |
8165 | + * Returns: 0 on success. Otherwise the error is that returned by later | |
8166 | + * modules, -ECHILD if we have a broken pipeline or -EIO if | |
8167 | + * zlib errs. | |
8168 | + */ | |
8169 | +static int toi_compress_write_page(unsigned long index, | |
8170 | + struct page *buffer_page, unsigned int buf_size) | |
8171 | +{ | |
8172 | + int ret, cpu = smp_processor_id(); | |
8173 | + struct cpu_context *ctx = &per_cpu(contexts, cpu); | |
8174 | + | |
8175 | + if (!ctx->transform) | |
8176 | + return next_driver->write_page(index, buffer_page, buf_size); | |
8177 | + | |
8178 | + ctx->buffer_start = kmap(buffer_page); | |
8179 | + | |
8180 | + ctx->len = buf_size; | |
8181 | + | |
8182 | + ret = crypto_comp_compress(ctx->transform, | |
8183 | + ctx->buffer_start, buf_size, | |
0ada99ac | 8184 | + ctx->output_buffer, &ctx->len); |
2380c486 JR |
8185 | + |
8186 | + kunmap(buffer_page); | |
8187 | + | |
2380c486 JR |
8188 | + mutex_lock(&stats_lock); |
8189 | + toi_compress_bytes_in += buf_size; | |
8190 | + toi_compress_bytes_out += ctx->len; | |
8191 | + mutex_unlock(&stats_lock); | |
8192 | + | |
0ada99ac | 8193 | + if (!ret && ctx->len < buf_size) { /* some compression */ |
8194 | + memcpy(ctx->page_buffer, ctx->output_buffer, ctx->len); | |
8195 | + return next_driver->write_page(index, | |
2380c486 JR |
8196 | + virt_to_page(ctx->page_buffer), |
8197 | + ctx->len); | |
0ada99ac | 8198 | + } else |
8199 | + return next_driver->write_page(index, buffer_page, buf_size); | |
2380c486 JR |
8200 | +} |
8201 | + | |
8202 | +/* | |
8203 | + * toi_compress_read_page() | |
8204 | + * @buffer_page: struct page *. Pointer to a buffer of size PAGE_SIZE. | |
8205 | + * | |
8206 | + * Retrieve data from later modules and decompress it until the input buffer | |
8207 | + * is filled. | |
8208 | + * Zero if successful. Error condition from me or from downstream on failure. | |
8209 | + */ | |
8210 | +static int toi_compress_read_page(unsigned long *index, | |
8211 | + struct page *buffer_page, unsigned int *buf_size) | |
8212 | +{ | |
8213 | + int ret, cpu = smp_processor_id(); | |
8214 | + unsigned int len; | |
8215 | + unsigned int outlen = PAGE_SIZE; | |
8216 | + char *buffer_start; | |
8217 | + struct cpu_context *ctx = &per_cpu(contexts, cpu); | |
8218 | + | |
8219 | + if (!ctx->transform) | |
8220 | + return next_driver->read_page(index, buffer_page, buf_size); | |
8221 | + | |
8222 | + /* | |
8223 | + * All our reads must be synchronous - we can't decompress | |
8224 | + * data that hasn't been read yet. | |
8225 | + */ | |
8226 | + | |
8227 | + *buf_size = PAGE_SIZE; | |
8228 | + | |
8229 | + ret = next_driver->read_page(index, buffer_page, &len); | |
8230 | + | |
8231 | + /* Error or uncompressed data */ | |
8232 | + if (ret || len == PAGE_SIZE) | |
8233 | + return ret; | |
8234 | + | |
8235 | + buffer_start = kmap(buffer_page); | |
8236 | + memcpy(ctx->page_buffer, buffer_start, len); | |
8237 | + ret = crypto_comp_decompress( | |
8238 | + ctx->transform, | |
8239 | + ctx->page_buffer, | |
8240 | + len, buffer_start, &outlen); | |
8241 | + if (ret) | |
8242 | + abort_hibernate(TOI_FAILED_IO, | |
8243 | + "Compress_read returned %d.\n", ret); | |
8244 | + else if (outlen != PAGE_SIZE) { | |
8245 | + abort_hibernate(TOI_FAILED_IO, | |
8246 | + "Decompression yielded %d bytes instead of %ld.\n", | |
8247 | + outlen, PAGE_SIZE); | |
8248 | + printk(KERN_ERR "Decompression yielded %d bytes instead of " | |
8249 | + "%ld.\n", outlen, PAGE_SIZE); | |
8250 | + ret = -EIO; | |
8251 | + *buf_size = outlen; | |
8252 | + } | |
8253 | + kunmap(buffer_page); | |
8254 | + return ret; | |
8255 | +} | |
8256 | + | |
8257 | +/* | |
8258 | + * toi_compress_print_debug_stats | |
8259 | + * @buffer: Pointer to a buffer into which the debug info will be printed. | |
8260 | + * @size: Size of the buffer. | |
8261 | + * | |
8262 | + * Print information to be recorded for debugging purposes into a buffer. | |
8263 | + * Returns: Number of characters written to the buffer. | |
8264 | + */ | |
8265 | + | |
8266 | +static int toi_compress_print_debug_stats(char *buffer, int size) | |
8267 | +{ | |
8268 | + unsigned long pages_in = toi_compress_bytes_in >> PAGE_SHIFT, | |
8269 | + pages_out = toi_compress_bytes_out >> PAGE_SHIFT; | |
8270 | + int len; | |
8271 | + | |
8272 | + /* Output the compression ratio achieved. */ | |
8273 | + if (*toi_compressor_name) | |
8274 | + len = scnprintf(buffer, size, "- Compressor is '%s'.\n", | |
8275 | + toi_compressor_name); | |
8276 | + else | |
8277 | + len = scnprintf(buffer, size, "- Compressor is not set.\n"); | |
8278 | + | |
8279 | + if (pages_in) | |
8280 | + len += scnprintf(buffer+len, size - len, " Compressed " | |
8281 | + "%lu bytes into %lu (%ld percent compression).\n", | |
8282 | + toi_compress_bytes_in, | |
8283 | + toi_compress_bytes_out, | |
8284 | + (pages_in - pages_out) * 100 / pages_in); | |
8285 | + return len; | |
8286 | +} | |
8287 | + | |
8288 | +/* | |
8289 | + * toi_compress_compression_memory_needed | |
8290 | + * | |
8291 | + * Tell the caller how much memory we need to operate during hibernate/resume. | |
8292 | + * Returns: Unsigned long. Maximum number of bytes of memory required for | |
8293 | + * operation. | |
8294 | + */ | |
8295 | +static int toi_compress_memory_needed(void) | |
8296 | +{ | |
8297 | + return 2 * PAGE_SIZE; | |
8298 | +} | |
8299 | + | |
8300 | +static int toi_compress_storage_needed(void) | |
8301 | +{ | |
8302 | + return 4 * sizeof(unsigned long) + strlen(toi_compressor_name) + 1; | |
8303 | +} | |
8304 | + | |
8305 | +/* | |
8306 | + * toi_compress_save_config_info | |
8307 | + * @buffer: Pointer to a buffer of size PAGE_SIZE. | |
8308 | + * | |
8309 | + * Save informaton needed when reloading the image at resume time. | |
8310 | + * Returns: Number of bytes used for saving our data. | |
8311 | + */ | |
8312 | +static int toi_compress_save_config_info(char *buffer) | |
8313 | +{ | |
8314 | + int namelen = strlen(toi_compressor_name) + 1; | |
8315 | + int total_len; | |
8316 | + | |
8317 | + *((unsigned long *) buffer) = toi_compress_bytes_in; | |
8318 | + *((unsigned long *) (buffer + 1 * sizeof(unsigned long))) = | |
8319 | + toi_compress_bytes_out; | |
8320 | + *((unsigned long *) (buffer + 2 * sizeof(unsigned long))) = | |
8321 | + toi_expected_compression; | |
8322 | + *((unsigned long *) (buffer + 3 * sizeof(unsigned long))) = namelen; | |
8323 | + strncpy(buffer + 4 * sizeof(unsigned long), toi_compressor_name, | |
8324 | + namelen); | |
8325 | + total_len = 4 * sizeof(unsigned long) + namelen; | |
8326 | + return total_len; | |
8327 | +} | |
8328 | + | |
8329 | +/* toi_compress_load_config_info | |
8330 | + * @buffer: Pointer to the start of the data. | |
8331 | + * @size: Number of bytes that were saved. | |
8332 | + * | |
8333 | + * Description: Reload information needed for decompressing the image at | |
8334 | + * resume time. | |
8335 | + */ | |
8336 | +static void toi_compress_load_config_info(char *buffer, int size) | |
8337 | +{ | |
8338 | + int namelen; | |
8339 | + | |
8340 | + toi_compress_bytes_in = *((unsigned long *) buffer); | |
8341 | + toi_compress_bytes_out = *((unsigned long *) (buffer + 1 * | |
8342 | + sizeof(unsigned long))); | |
8343 | + toi_expected_compression = *((unsigned long *) (buffer + 2 * | |
8344 | + sizeof(unsigned long))); | |
8345 | + namelen = *((unsigned long *) (buffer + 3 * sizeof(unsigned long))); | |
8346 | + if (strncmp(toi_compressor_name, buffer + 4 * sizeof(unsigned long), | |
8347 | + namelen)) { | |
8348 | + toi_compress_cleanup(1); | |
8349 | + strncpy(toi_compressor_name, buffer + 4 * sizeof(unsigned long), | |
8350 | + namelen); | |
8351 | + toi_compress_crypto_prepare(); | |
8352 | + } | |
8353 | + return; | |
8354 | +} | |
8355 | + | |
8356 | +/* | |
8357 | + * toi_expected_compression_ratio | |
8358 | + * | |
8359 | + * Description: Returns the expected ratio between data passed into this module | |
8360 | + * and the amount of data output when writing. | |
8361 | + * Returns: 100 if the module is disabled. Otherwise the value set by the | |
8362 | + * user via our sysfs entry. | |
8363 | + */ | |
8364 | + | |
8365 | +static int toi_compress_expected_ratio(void) | |
8366 | +{ | |
8367 | + if (!toi_compression_ops.enabled) | |
8368 | + return 100; | |
8369 | + else | |
8370 | + return 100 - toi_expected_compression; | |
8371 | +} | |
8372 | + | |
8373 | +/* | |
8374 | + * data for our sysfs entries. | |
8375 | + */ | |
8376 | +static struct toi_sysfs_data sysfs_params[] = { | |
8377 | + SYSFS_INT("expected_compression", SYSFS_RW, &toi_expected_compression, | |
8378 | + 0, 99, 0, NULL), | |
8379 | + SYSFS_INT("enabled", SYSFS_RW, &toi_compression_ops.enabled, 0, 1, 0, | |
8380 | + NULL), | |
8381 | + SYSFS_STRING("algorithm", SYSFS_RW, toi_compressor_name, 31, 0, NULL), | |
8382 | +}; | |
8383 | + | |
8384 | +/* | |
8385 | + * Ops structure. | |
8386 | + */ | |
8387 | +static struct toi_module_ops toi_compression_ops = { | |
8388 | + .type = FILTER_MODULE, | |
8389 | + .name = "compression", | |
8390 | + .directory = "compression", | |
8391 | + .module = THIS_MODULE, | |
8392 | + .initialise = toi_compress_init, | |
8393 | + .cleanup = toi_compress_cleanup, | |
8394 | + .memory_needed = toi_compress_memory_needed, | |
8395 | + .print_debug_info = toi_compress_print_debug_stats, | |
8396 | + .save_config_info = toi_compress_save_config_info, | |
8397 | + .load_config_info = toi_compress_load_config_info, | |
8398 | + .storage_needed = toi_compress_storage_needed, | |
8399 | + .expected_compression = toi_compress_expected_ratio, | |
8400 | + | |
8401 | + .rw_init = toi_compress_rw_init, | |
8402 | + | |
8403 | + .write_page = toi_compress_write_page, | |
8404 | + .read_page = toi_compress_read_page, | |
8405 | + | |
8406 | + .sysfs_data = sysfs_params, | |
8407 | + .num_sysfs_entries = sizeof(sysfs_params) / | |
8408 | + sizeof(struct toi_sysfs_data), | |
8409 | +}; | |
8410 | + | |
8411 | +/* ---- Registration ---- */ | |
8412 | + | |
8413 | +static __init int toi_compress_load(void) | |
8414 | +{ | |
8415 | + return toi_register_module(&toi_compression_ops); | |
8416 | +} | |
8417 | + | |
8418 | +#ifdef MODULE | |
8419 | +static __exit void toi_compress_unload(void) | |
8420 | +{ | |
8421 | + toi_unregister_module(&toi_compression_ops); | |
8422 | +} | |
8423 | + | |
8424 | +module_init(toi_compress_load); | |
8425 | +module_exit(toi_compress_unload); | |
8426 | +MODULE_LICENSE("GPL"); | |
8427 | +MODULE_AUTHOR("Nigel Cunningham"); | |
8428 | +MODULE_DESCRIPTION("Compression Support for TuxOnIce"); | |
8429 | +#else | |
8430 | +late_initcall(toi_compress_load); | |
8431 | +#endif | |
8432 | diff --git a/kernel/power/tuxonice_extent.c b/kernel/power/tuxonice_extent.c | |
8433 | new file mode 100644 | |
9474138d | 8434 | index 0000000..28c421b |
2380c486 JR |
8435 | --- /dev/null |
8436 | +++ b/kernel/power/tuxonice_extent.c | |
9474138d | 8437 | @@ -0,0 +1,313 @@ |
2380c486 JR |
8438 | +/* |
8439 | + * kernel/power/tuxonice_extent.c | |
8440 | + * | |
8441 | + * Copyright (C) 2003-2008 Nigel Cunningham (nigel at tuxonice net) | |
8442 | + * | |
8443 | + * Distributed under GPLv2. | |
8444 | + * | |
8445 | + * These functions encapsulate the manipulation of storage metadata. | |
8446 | + */ | |
8447 | + | |
2380c486 JR |
8448 | +#include <linux/suspend.h> |
8449 | +#include "tuxonice_modules.h" | |
8450 | +#include "tuxonice_extent.h" | |
8451 | +#include "tuxonice_alloc.h" | |
8452 | +#include "tuxonice_ui.h" | |
8453 | +#include "tuxonice.h" | |
8454 | + | |
8455 | +/** | |
8456 | + * toi_get_extent - return a free extent | |
8457 | + * | |
8458 | + * May fail, returning NULL instead. | |
8459 | + **/ | |
8460 | +static struct hibernate_extent *toi_get_extent(void) | |
8461 | +{ | |
8462 | + return (struct hibernate_extent *) toi_kzalloc(2, | |
8463 | + sizeof(struct hibernate_extent), TOI_ATOMIC_GFP); | |
8464 | +} | |
8465 | + | |
8466 | +/** | |
8467 | + * toi_put_extent_chain - free a whole chain of extents | |
8468 | + * @chain: Chain to free. | |
8469 | + **/ | |
8470 | +void toi_put_extent_chain(struct hibernate_extent_chain *chain) | |
8471 | +{ | |
8472 | + struct hibernate_extent *this; | |
8473 | + | |
8474 | + this = chain->first; | |
8475 | + | |
8476 | + while (this) { | |
8477 | + struct hibernate_extent *next = this->next; | |
9474138d | 8478 | + toi_kfree(2, this, sizeof(*this)); |
2380c486 JR |
8479 | + chain->num_extents--; |
8480 | + this = next; | |
8481 | + } | |
8482 | + | |
8483 | + chain->first = NULL; | |
8484 | + chain->last_touched = NULL; | |
8485 | + chain->size = 0; | |
8486 | +} | |
8487 | +EXPORT_SYMBOL_GPL(toi_put_extent_chain); | |
8488 | + | |
8489 | +/** | |
8490 | + * toi_add_to_extent_chain - add an extent to an existing chain | |
8491 | + * @chain: Chain to which the extend should be added | |
8492 | + * @start: Start of the extent (first physical block) | |
8493 | + * @end: End of the extent (last physical block) | |
8494 | + * | |
8495 | + * The chain information is updated if the insertion is successful. | |
8496 | + **/ | |
8497 | +int toi_add_to_extent_chain(struct hibernate_extent_chain *chain, | |
8498 | + unsigned long start, unsigned long end) | |
8499 | +{ | |
8500 | + struct hibernate_extent *new_ext = NULL, *cur_ext = NULL; | |
8501 | + | |
8502 | + /* Find the right place in the chain */ | |
8503 | + if (chain->last_touched && chain->last_touched->start < start) | |
8504 | + cur_ext = chain->last_touched; | |
8505 | + else if (chain->first && chain->first->start < start) | |
8506 | + cur_ext = chain->first; | |
8507 | + | |
8508 | + if (cur_ext) { | |
8509 | + while (cur_ext->next && cur_ext->next->start < start) | |
8510 | + cur_ext = cur_ext->next; | |
8511 | + | |
8512 | + if (cur_ext->end == (start - 1)) { | |
8513 | + struct hibernate_extent *next_ext = cur_ext->next; | |
8514 | + cur_ext->end = end; | |
8515 | + | |
8516 | + /* Merge with the following one? */ | |
8517 | + if (next_ext && cur_ext->end + 1 == next_ext->start) { | |
8518 | + cur_ext->end = next_ext->end; | |
8519 | + cur_ext->next = next_ext->next; | |
9474138d | 8520 | + toi_kfree(2, next_ext, sizeof(*next_ext)); |
2380c486 JR |
8521 | + chain->num_extents--; |
8522 | + } | |
8523 | + | |
8524 | + chain->last_touched = cur_ext; | |
8525 | + chain->size += (end - start + 1); | |
8526 | + | |
8527 | + return 0; | |
8528 | + } | |
8529 | + } | |
8530 | + | |
8531 | + new_ext = toi_get_extent(); | |
8532 | + if (!new_ext) { | |
8533 | + printk(KERN_INFO "Error unable to append a new extent to the " | |
8534 | + "chain.\n"); | |
8535 | + return -ENOMEM; | |
8536 | + } | |
8537 | + | |
8538 | + chain->num_extents++; | |
8539 | + chain->size += (end - start + 1); | |
8540 | + new_ext->start = start; | |
8541 | + new_ext->end = end; | |
8542 | + | |
8543 | + chain->last_touched = new_ext; | |
8544 | + | |
8545 | + if (cur_ext) { | |
8546 | + new_ext->next = cur_ext->next; | |
8547 | + cur_ext->next = new_ext; | |
8548 | + } else { | |
8549 | + if (chain->first) | |
8550 | + new_ext->next = chain->first; | |
8551 | + chain->first = new_ext; | |
8552 | + } | |
8553 | + | |
8554 | + return 0; | |
8555 | +} | |
8556 | +EXPORT_SYMBOL_GPL(toi_add_to_extent_chain); | |
8557 | + | |
8558 | +/** | |
8559 | + * toi_serialise_extent_chain - write a chain in the image | |
8560 | + * @owner: Module writing the chain. | |
8561 | + * @chain: Chain to write. | |
8562 | + **/ | |
8563 | +int toi_serialise_extent_chain(struct toi_module_ops *owner, | |
8564 | + struct hibernate_extent_chain *chain) | |
8565 | +{ | |
8566 | + struct hibernate_extent *this; | |
8567 | + int ret, i = 0; | |
8568 | + | |
8569 | + ret = toiActiveAllocator->rw_header_chunk(WRITE, owner, (char *) chain, | |
8570 | + 2 * sizeof(int)); | |
8571 | + if (ret) | |
8572 | + return ret; | |
8573 | + | |
8574 | + this = chain->first; | |
8575 | + while (this) { | |
8576 | + ret = toiActiveAllocator->rw_header_chunk(WRITE, owner, | |
8577 | + (char *) this, 2 * sizeof(unsigned long)); | |
8578 | + if (ret) | |
8579 | + return ret; | |
8580 | + this = this->next; | |
8581 | + i++; | |
8582 | + } | |
8583 | + | |
8584 | + if (i != chain->num_extents) { | |
8585 | + printk(KERN_EMERG "Saved %d extents but chain metadata says " | |
8586 | + "there should be %d.\n", i, chain->num_extents); | |
8587 | + return 1; | |
8588 | + } | |
8589 | + | |
8590 | + return ret; | |
8591 | +} | |
8592 | +EXPORT_SYMBOL_GPL(toi_serialise_extent_chain); | |
8593 | + | |
8594 | +/** | |
8595 | + * toi_load_extent_chain - read back a chain saved in the image | |
8596 | + * @chain: Chain to load | |
8597 | + * | |
8598 | + * The linked list of extents is reconstructed from the disk. chain will point | |
8599 | + * to the first entry. | |
8600 | + **/ | |
8601 | +int toi_load_extent_chain(struct hibernate_extent_chain *chain) | |
8602 | +{ | |
8603 | + struct hibernate_extent *this, *last = NULL; | |
8604 | + int i, ret; | |
8605 | + | |
8606 | + /* Get the next page */ | |
8607 | + ret = toiActiveAllocator->rw_header_chunk_noreadahead(READ, NULL, | |
8608 | + (char *) chain, 2 * sizeof(int)); | |
8609 | + if (ret) { | |
e999739a | 8610 | + printk(KERN_ERR "Failed to read the size of extent chain.\n"); |
2380c486 JR |
8611 | + return 1; |
8612 | + } | |
8613 | + | |
8614 | + for (i = 0; i < chain->num_extents; i++) { | |
8615 | + this = toi_kzalloc(3, sizeof(struct hibernate_extent), | |
8616 | + TOI_ATOMIC_GFP); | |
8617 | + if (!this) { | |
8618 | + printk(KERN_INFO "Failed to allocate a new extent.\n"); | |
8619 | + return -ENOMEM; | |
8620 | + } | |
8621 | + this->next = NULL; | |
8622 | + /* Get the next page */ | |
8623 | + ret = toiActiveAllocator->rw_header_chunk_noreadahead(READ, | |
8624 | + NULL, (char *) this, 2 * sizeof(unsigned long)); | |
8625 | + if (ret) { | |
8626 | + printk(KERN_INFO "Failed to read an extent.\n"); | |
8627 | + return 1; | |
8628 | + } | |
8629 | + if (last) | |
8630 | + last->next = this; | |
8631 | + else | |
8632 | + chain->first = this; | |
8633 | + last = this; | |
8634 | + } | |
8635 | + return 0; | |
8636 | +} | |
8637 | +EXPORT_SYMBOL_GPL(toi_load_extent_chain); | |
8638 | + | |
8639 | +/** | |
8640 | + * toi_extent_state_next - go to the next extent | |
8641 | + * | |
8642 | + * Given a state, progress to the next valid entry. We may begin in an | |
8643 | + * invalid state, as we do when invoked after extent_state_goto_start below. | |
8644 | + * | |
8645 | + * When using compression and expected_compression > 0, we let the image size | |
8646 | + * be larger than storage, so we can validly run out of data to return. | |
8647 | + **/ | |
8648 | +unsigned long toi_extent_state_next(struct toi_extent_iterate_state *state) | |
8649 | +{ | |
8650 | + if (state->current_chain == state->num_chains) | |
8651 | + return 0; | |
8652 | + | |
8653 | + if (state->current_extent) { | |
8654 | + if (state->current_offset == state->current_extent->end) { | |
8655 | + if (state->current_extent->next) { | |
8656 | + state->current_extent = | |
8657 | + state->current_extent->next; | |
8658 | + state->current_offset = | |
8659 | + state->current_extent->start; | |
8660 | + } else { | |
8661 | + state->current_extent = NULL; | |
8662 | + state->current_offset = 0; | |
8663 | + } | |
8664 | + } else | |
8665 | + state->current_offset++; | |
8666 | + } | |
8667 | + | |
8668 | + while (!state->current_extent) { | |
8669 | + int chain_num = ++(state->current_chain); | |
8670 | + | |
8671 | + if (chain_num == state->num_chains) | |
8672 | + return 0; | |
8673 | + | |
8674 | + state->current_extent = (state->chains + chain_num)->first; | |
8675 | + | |
8676 | + if (!state->current_extent) | |
8677 | + continue; | |
8678 | + | |
8679 | + state->current_offset = state->current_extent->start; | |
8680 | + } | |
8681 | + | |
8682 | + return state->current_offset; | |
8683 | +} | |
8684 | +EXPORT_SYMBOL_GPL(toi_extent_state_next); | |
8685 | + | |
8686 | +/** | |
8687 | + * toi_extent_state_goto_start - reinitialize an extent chain iterator | |
8688 | + * @state: Iterator to reinitialize | |
8689 | + **/ | |
8690 | +void toi_extent_state_goto_start(struct toi_extent_iterate_state *state) | |
8691 | +{ | |
8692 | + state->current_chain = -1; | |
8693 | + state->current_extent = NULL; | |
8694 | + state->current_offset = 0; | |
8695 | +} | |
8696 | +EXPORT_SYMBOL_GPL(toi_extent_state_goto_start); | |
8697 | + | |
8698 | +/** | |
8699 | + * toi_extent_state_save - save state of the iterator | |
8700 | + * @state: Current state of the chain | |
8701 | + * @saved_state: Iterator to populate | |
8702 | + * | |
8703 | + * Given a state and a struct hibernate_extent_state_store, save the current | |
8704 | + * position in a format that can be used with relocated chains (at | |
8705 | + * resume time). | |
8706 | + **/ | |
8707 | +void toi_extent_state_save(struct toi_extent_iterate_state *state, | |
8708 | + struct hibernate_extent_iterate_saved_state *saved_state) | |
8709 | +{ | |
8710 | + struct hibernate_extent *extent; | |
8711 | + | |
8712 | + saved_state->chain_num = state->current_chain; | |
8713 | + saved_state->extent_num = 0; | |
8714 | + saved_state->offset = state->current_offset; | |
8715 | + | |
8716 | + if (saved_state->chain_num == -1) | |
8717 | + return; | |
8718 | + | |
8719 | + extent = (state->chains + state->current_chain)->first; | |
8720 | + | |
8721 | + while (extent != state->current_extent) { | |
8722 | + saved_state->extent_num++; | |
8723 | + extent = extent->next; | |
8724 | + } | |
8725 | +} | |
8726 | +EXPORT_SYMBOL_GPL(toi_extent_state_save); | |
8727 | + | |
8728 | +/** | |
8729 | + * toi_extent_state_restore - restore the position saved by extent_state_save | |
8730 | + * @state: State to populate | |
8731 | + * @saved_state: Iterator saved to restore | |
8732 | + **/ | |
8733 | +void toi_extent_state_restore(struct toi_extent_iterate_state *state, | |
8734 | + struct hibernate_extent_iterate_saved_state *saved_state) | |
8735 | +{ | |
8736 | + int posn = saved_state->extent_num; | |
8737 | + | |
8738 | + if (saved_state->chain_num == -1) { | |
8739 | + toi_extent_state_goto_start(state); | |
8740 | + return; | |
8741 | + } | |
8742 | + | |
8743 | + state->current_chain = saved_state->chain_num; | |
8744 | + state->current_extent = (state->chains + state->current_chain)->first; | |
8745 | + state->current_offset = saved_state->offset; | |
8746 | + | |
8747 | + while (posn--) | |
8748 | + state->current_extent = state->current_extent->next; | |
8749 | +} | |
8750 | +EXPORT_SYMBOL_GPL(toi_extent_state_restore); | |
8751 | diff --git a/kernel/power/tuxonice_extent.h b/kernel/power/tuxonice_extent.h | |
8752 | new file mode 100644 | |
8753 | index 0000000..22ffb9b | |
8754 | --- /dev/null | |
8755 | +++ b/kernel/power/tuxonice_extent.h | |
8756 | @@ -0,0 +1,72 @@ | |
8757 | +/* | |
8758 | + * kernel/power/tuxonice_extent.h | |
8759 | + * | |
8760 | + * Copyright (C) 2003-2008 Nigel Cunningham (nigel at tuxonice net) | |
8761 | + * | |
8762 | + * This file is released under the GPLv2. | |
8763 | + * | |
8764 | + * It contains declarations related to extents. Extents are | |
8765 | + * TuxOnIce's method of storing some of the metadata for the image. | |
8766 | + * See tuxonice_extent.c for more info. | |
8767 | + * | |
8768 | + */ | |
8769 | + | |
8770 | +#include "tuxonice_modules.h" | |
8771 | + | |
8772 | +#ifndef EXTENT_H | |
8773 | +#define EXTENT_H | |
8774 | + | |
8775 | +struct hibernate_extent { | |
8776 | + unsigned long start, end; | |
8777 | + struct hibernate_extent *next; | |
8778 | +}; | |
8779 | + | |
8780 | +struct hibernate_extent_chain { | |
8781 | + int size; /* size of the chain ie sum (max-min+1) */ | |
8782 | + int num_extents; | |
8783 | + struct hibernate_extent *first, *last_touched; | |
8784 | +}; | |
8785 | + | |
8786 | +struct toi_extent_iterate_state { | |
8787 | + struct hibernate_extent_chain *chains; | |
8788 | + int num_chains; | |
8789 | + int current_chain; | |
8790 | + struct hibernate_extent *current_extent; | |
8791 | + unsigned long current_offset; | |
8792 | +}; | |
8793 | + | |
8794 | +struct hibernate_extent_iterate_saved_state { | |
8795 | + int chain_num; | |
8796 | + int extent_num; | |
8797 | + unsigned long offset; | |
8798 | +}; | |
8799 | + | |
8800 | +#define toi_extent_state_eof(state) \ | |
8801 | + ((state)->num_chains == (state)->current_chain) | |
8802 | + | |
8803 | +/* Simplify iterating through all the values in an extent chain */ | |
8804 | +#define toi_extent_for_each(extent_chain, extentpointer, value) \ | |
8805 | +if ((extent_chain)->first) \ | |
8806 | + for ((extentpointer) = (extent_chain)->first, (value) = \ | |
8807 | + (extentpointer)->start; \ | |
8808 | + ((extentpointer) && ((extentpointer)->next || (value) <= \ | |
8809 | + (extentpointer)->end)); \ | |
8810 | + (((value) == (extentpointer)->end) ? \ | |
8811 | + ((extentpointer) = (extentpointer)->next, (value) = \ | |
8812 | + ((extentpointer) ? (extentpointer)->start : 0)) : \ | |
8813 | + (value)++)) | |
8814 | + | |
8815 | +void toi_put_extent_chain(struct hibernate_extent_chain *chain); | |
8816 | +int toi_add_to_extent_chain(struct hibernate_extent_chain *chain, | |
8817 | + unsigned long start, unsigned long end); | |
8818 | +int toi_serialise_extent_chain(struct toi_module_ops *owner, | |
8819 | + struct hibernate_extent_chain *chain); | |
8820 | +int toi_load_extent_chain(struct hibernate_extent_chain *chain); | |
8821 | + | |
8822 | +void toi_extent_state_save(struct toi_extent_iterate_state *state, | |
8823 | + struct hibernate_extent_iterate_saved_state *saved_state); | |
8824 | +void toi_extent_state_restore(struct toi_extent_iterate_state *state, | |
8825 | + struct hibernate_extent_iterate_saved_state *saved_state); | |
8826 | +void toi_extent_state_goto_start(struct toi_extent_iterate_state *state); | |
8827 | +unsigned long toi_extent_state_next(struct toi_extent_iterate_state *state); | |
8828 | +#endif | |
8829 | diff --git a/kernel/power/tuxonice_file.c b/kernel/power/tuxonice_file.c | |
8830 | new file mode 100644 | |
9474138d | 8831 | index 0000000..0458a0c |
2380c486 JR |
8832 | --- /dev/null |
8833 | +++ b/kernel/power/tuxonice_file.c | |
9474138d | 8834 | @@ -0,0 +1,1248 @@ |
2380c486 JR |
8835 | +/* |
8836 | + * kernel/power/tuxonice_file.c | |
8837 | + * | |
8838 | + * Copyright (C) 2005-2008 Nigel Cunningham (nigel at tuxonice net) | |
8839 | + * | |
8840 | + * Distributed under GPLv2. | |
8841 | + * | |
8842 | + * This file encapsulates functions for usage of a simple file as a | |
8843 | + * backing store. It is based upon the swapallocator, and shares the | |
8844 | + * same basic working. Here, though, we have nothing to do with | |
8845 | + * swapspace, and only one device to worry about. | |
8846 | + * | |
8847 | + * The user can just | |
8848 | + * | |
8849 | + * echo TuxOnIce > /path/to/my_file | |
8850 | + * | |
8851 | + * dd if=/dev/zero bs=1M count=<file_size_desired> >> /path/to/my_file | |
8852 | + * | |
8853 | + * and | |
8854 | + * | |
8855 | + * echo /path/to/my_file > /sys/power/tuxonice/file/target | |
8856 | + * | |
8857 | + * then put what they find in /sys/power/tuxonice/resume | |
8858 | + * as their resume= parameter in lilo.conf (and rerun lilo if using it). | |
8859 | + * | |
8860 | + * Having done this, they're ready to hibernate and resume. | |
8861 | + * | |
8862 | + * TODO: | |
8863 | + * - File resizing. | |
8864 | + */ | |
8865 | + | |
8866 | +#include <linux/suspend.h> | |
2380c486 JR |
8867 | +#include <linux/blkdev.h> |
8868 | +#include <linux/file.h> | |
8869 | +#include <linux/stat.h> | |
8870 | +#include <linux/mount.h> | |
8871 | +#include <linux/statfs.h> | |
8872 | +#include <linux/syscalls.h> | |
8873 | +#include <linux/namei.h> | |
8874 | +#include <linux/fs.h> | |
8875 | +#include <linux/root_dev.h> | |
8876 | + | |
8877 | +#include "tuxonice.h" | |
8878 | +#include "tuxonice_sysfs.h" | |
8879 | +#include "tuxonice_modules.h" | |
8880 | +#include "tuxonice_ui.h" | |
8881 | +#include "tuxonice_extent.h" | |
8882 | +#include "tuxonice_io.h" | |
8883 | +#include "tuxonice_storage.h" | |
8884 | +#include "tuxonice_block_io.h" | |
8885 | +#include "tuxonice_alloc.h" | |
8886 | +#include "tuxonice_builtin.h" | |
8887 | + | |
8888 | +static struct toi_module_ops toi_fileops; | |
8889 | + | |
8890 | +/* Details of our target. */ | |
8891 | + | |
8892 | +static char toi_file_target[256]; | |
8893 | +static struct inode *target_inode; | |
8894 | +static struct file *target_file; | |
8895 | +static struct block_device *toi_file_target_bdev; | |
8896 | +static dev_t resume_file_dev_t; | |
8897 | +static int used_devt; | |
8898 | +static int setting_toi_file_target; | |
8899 | +static sector_t target_firstblock, target_header_start; | |
8900 | +static int target_storage_available; | |
8901 | +static int target_claim; | |
8902 | + | |
8903 | +/* Old signatures */ | |
8904 | +static char HaveImage[] = "HaveImage\n"; | |
8905 | +static char NoImage[] = "TuxOnIce\n"; | |
8906 | +#define sig_size (sizeof(HaveImage) + 1) | |
8907 | + | |
8908 | +struct toi_file_header { | |
8909 | + char sig[sig_size]; | |
8910 | + int resumed_before; | |
8911 | + unsigned long first_header_block; | |
8912 | + int have_image; | |
8913 | +}; | |
8914 | + | |
8915 | +/* Header Page Information */ | |
8916 | +static int header_pages_reserved; | |
8917 | + | |
8918 | +/* Main Storage Pages */ | |
8919 | +static int main_pages_allocated, main_pages_requested; | |
8920 | + | |
8921 | +#define target_is_normal_file() (S_ISREG(target_inode->i_mode)) | |
8922 | + | |
8923 | +static struct toi_bdev_info devinfo; | |
8924 | + | |
8925 | +/* Extent chain for blocks */ | |
8926 | +static struct hibernate_extent_chain block_chain; | |
8927 | + | |
8928 | +/* Signature operations */ | |
8929 | +enum { | |
8930 | + GET_IMAGE_EXISTS, | |
8931 | + INVALIDATE, | |
8932 | + MARK_RESUME_ATTEMPTED, | |
8933 | + UNMARK_RESUME_ATTEMPTED, | |
8934 | +}; | |
8935 | + | |
8936 | +/** | |
8937 | + * set_devinfo - populate device information | |
8938 | + * @bdev: Block device on which the file is. | |
8939 | + * @target_blkbits: Number of bits in the page block size of the target | |
8940 | + * file inode. | |
8941 | + * | |
8942 | + * Populate the devinfo structure about the target device. | |
8943 | + * | |
8944 | + * Background: a sector represents a fixed amount of data (generally 512 bytes). | |
8945 | + * The hard drive sector size and the filesystem block size may be different. | |
8946 | + * If fs_blksize mesures the filesystem block size and hd_blksize the hard drive | |
8947 | + * sector size: | |
8948 | + * | |
8949 | + * sector << (fs_blksize - hd_blksize) converts hd sector into fs block | |
8950 | + * fs_block >> (fs_blksize - hd_blksize) converts fs block into hd sector number | |
8951 | + * | |
8952 | + * Here target_blkbits == fs_blksize and hd_blksize == 9, hence: | |
8953 | + * | |
8954 | + * (fs_blksize - hd_blksize) == devinfo.bmap_shift | |
8955 | + * | |
8956 | + * The memory page size is defined by PAGE_SHIFT. devinfo.blocks_per_page is the | |
8957 | + * number of filesystem blocks per memory page. | |
8958 | + * | |
8959 | + * Note that blocks are stored after >>. They are used after being <<. | |
8960 | + * We always only use PAGE_SIZE aligned blocks. | |
8961 | + * | |
8962 | + * Side effects: | |
8963 | + * devinfo.bdev, devinfo.bmap_shift and devinfo.blocks_per_page are set. | |
8964 | + */ | |
8965 | +static void set_devinfo(struct block_device *bdev, int target_blkbits) | |
8966 | +{ | |
8967 | + devinfo.bdev = bdev; | |
8968 | + if (!target_blkbits) { | |
8969 | + devinfo.bmap_shift = 0; | |
8970 | + devinfo.blocks_per_page = 0; | |
8971 | + } else { | |
8972 | + /* We are assuming a hard disk with 512 (2^9) bytes/sector */ | |
8973 | + devinfo.bmap_shift = target_blkbits - 9; | |
8974 | + devinfo.blocks_per_page = (1 << (PAGE_SHIFT - target_blkbits)); | |
8975 | + } | |
8976 | +} | |
8977 | + | |
8978 | +static long raw_to_real(long raw) | |
8979 | +{ | |
8980 | + long result; | |
8981 | + | |
8982 | + result = raw - (raw * (sizeof(unsigned long) + sizeof(int)) + | |
8983 | + (PAGE_SIZE + sizeof(unsigned long) + sizeof(int) + 1)) / | |
8984 | + (PAGE_SIZE + sizeof(unsigned long) + sizeof(int)); | |
8985 | + | |
8986 | + return result < 0 ? 0 : result; | |
8987 | +} | |
8988 | + | |
8989 | +static int toi_file_storage_available(void) | |
8990 | +{ | |
8991 | + int result = 0; | |
8992 | + struct block_device *bdev = toi_file_target_bdev; | |
8993 | + | |
8994 | + if (!target_inode) | |
8995 | + return 0; | |
8996 | + | |
8997 | + switch (target_inode->i_mode & S_IFMT) { | |
8998 | + case S_IFSOCK: | |
8999 | + case S_IFCHR: | |
9000 | + case S_IFIFO: /* Socket, Char, Fifo */ | |
9001 | + return -1; | |
9002 | + case S_IFREG: /* Regular file: current size - holes + free | |
9003 | + space on part */ | |
9004 | + result = target_storage_available; | |
9005 | + break; | |
9006 | + case S_IFBLK: /* Block device */ | |
9007 | + if (!bdev->bd_disk) { | |
9008 | + printk(KERN_INFO "bdev->bd_disk null.\n"); | |
9009 | + return 0; | |
9010 | + } | |
9011 | + | |
9012 | + result = (bdev->bd_part ? | |
9013 | + bdev->bd_part->nr_sects : | |
9014 | + get_capacity(bdev->bd_disk)) >> (PAGE_SHIFT - 9); | |
9015 | + } | |
9016 | + | |
9017 | + return raw_to_real(result); | |
9018 | +} | |
9019 | + | |
9020 | +static int has_contiguous_blocks(int page_num) | |
9021 | +{ | |
9022 | + int j; | |
9023 | + sector_t last = 0; | |
9024 | + | |
9025 | + for (j = 0; j < devinfo.blocks_per_page; j++) { | |
9026 | + sector_t this = bmap(target_inode, | |
9027 | + page_num * devinfo.blocks_per_page + j); | |
9028 | + | |
9029 | + if (!this || (last && (last + 1) != this)) | |
9030 | + break; | |
9031 | + | |
9032 | + last = this; | |
9033 | + } | |
9034 | + | |
9035 | + return j == devinfo.blocks_per_page; | |
9036 | +} | |
9037 | + | |
9038 | +static int size_ignoring_ignored_pages(void) | |
9039 | +{ | |
9040 | + int mappable = 0, i; | |
9041 | + | |
9042 | + if (!target_is_normal_file()) | |
9043 | + return toi_file_storage_available(); | |
9044 | + | |
9045 | + for (i = 0; i < (target_inode->i_size >> PAGE_SHIFT) ; i++) | |
9046 | + if (has_contiguous_blocks(i)) | |
9047 | + mappable++; | |
9048 | + | |
9049 | + return mappable; | |
9050 | +} | |
9051 | + | |
9052 | +/** | |
9053 | + * __populate_block_list - add an extent to the chain | |
9054 | + * @min: Start of the extent (first physical block = sector) | |
9055 | + * @max: End of the extent (last physical block = sector) | |
9056 | + * | |
9057 | + * If TOI_TEST_BIO is set, print a debug message, outputting the min and max | |
9058 | + * fs block numbers. | |
9059 | + **/ | |
9060 | +static int __populate_block_list(int min, int max) | |
9061 | +{ | |
9062 | + if (test_action_state(TOI_TEST_BIO)) | |
9063 | + printk(KERN_INFO "Adding extent %d-%d.\n", | |
9064 | + min << devinfo.bmap_shift, | |
9065 | + ((max + 1) << devinfo.bmap_shift) - 1); | |
9066 | + | |
9067 | + return toi_add_to_extent_chain(&block_chain, min, max); | |
9068 | +} | |
9069 | + | |
9070 | +static int apply_header_reservation(void) | |
9071 | +{ | |
9072 | + int i; | |
9073 | + | |
9074 | + /* Apply header space reservation */ | |
9075 | + toi_extent_state_goto_start(&toi_writer_posn); | |
2380c486 JR |
9076 | + |
9077 | + for (i = 0; i < header_pages_reserved; i++) | |
0ada99ac | 9078 | + if (toi_bio_ops.forward_one_page(1, 0)) |
2380c486 JR |
9079 | + return -ENOSPC; |
9080 | + | |
9081 | + /* The end of header pages will be the start of pageset 2 */ | |
9082 | + toi_extent_state_save(&toi_writer_posn, &toi_writer_posn_save[2]); | |
9083 | + | |
9084 | + return 0; | |
9085 | +} | |
9086 | + | |
9087 | +static int populate_block_list(void) | |
9088 | +{ | |
9089 | + int i, extent_min = -1, extent_max = -1, got_header = 0, result = 0; | |
9090 | + | |
9091 | + if (block_chain.first) | |
9092 | + toi_put_extent_chain(&block_chain); | |
9093 | + | |
9094 | + if (!target_is_normal_file()) { | |
e999739a | 9095 | + result = (target_storage_available > 0) ? |
2380c486 JR |
9096 | + __populate_block_list(devinfo.blocks_per_page, |
9097 | + (target_storage_available + 1) * | |
9098 | + devinfo.blocks_per_page - 1) : 0; | |
e999739a | 9099 | + if (result) |
9100 | + return result; | |
9101 | + goto out; | |
2380c486 JR |
9102 | + } |
9103 | + | |
9104 | + for (i = 0; i < (target_inode->i_size >> PAGE_SHIFT); i++) { | |
9105 | + sector_t new_sector; | |
9106 | + | |
9107 | + if (!has_contiguous_blocks(i)) | |
9108 | + continue; | |
9109 | + | |
0ada99ac | 9110 | + new_sector = bmap(target_inode, (i * devinfo.blocks_per_page)); |
2380c486 JR |
9111 | + |
9112 | + /* | |
9113 | + * Ignore the first block in the file. | |
9114 | + * It gets the header. | |
9115 | + */ | |
9116 | + if (new_sector == target_firstblock >> devinfo.bmap_shift) { | |
9117 | + got_header = 1; | |
9118 | + continue; | |
9119 | + } | |
9120 | + | |
9121 | + /* | |
9122 | + * I'd love to be able to fill in holes and resize | |
9123 | + * files, but not yet... | |
9124 | + */ | |
9125 | + | |
9126 | + if (new_sector == extent_max + 1) | |
9127 | + extent_max += devinfo.blocks_per_page; | |
9128 | + else { | |
9129 | + if (extent_min > -1) { | |
9130 | + result = __populate_block_list(extent_min, | |
9131 | + extent_max); | |
9132 | + if (result) | |
9133 | + return result; | |
9134 | + } | |
9135 | + | |
9136 | + extent_min = new_sector; | |
9137 | + extent_max = extent_min + | |
9138 | + devinfo.blocks_per_page - 1; | |
9139 | + } | |
9140 | + } | |
9141 | + | |
9142 | + if (extent_min > -1) { | |
9143 | + result = __populate_block_list(extent_min, extent_max); | |
9144 | + if (result) | |
9145 | + return result; | |
9146 | + } | |
9147 | + | |
e999739a | 9148 | +out: |
2380c486 JR |
9149 | + return apply_header_reservation(); |
9150 | +} | |
9151 | + | |
9152 | +static void toi_file_cleanup(int finishing_cycle) | |
9153 | +{ | |
9154 | + if (toi_file_target_bdev) { | |
9155 | + if (target_claim) { | |
9156 | + bd_release(toi_file_target_bdev); | |
9157 | + target_claim = 0; | |
9158 | + } | |
9159 | + | |
9160 | + if (used_devt) { | |
9161 | + blkdev_put(toi_file_target_bdev, | |
9162 | + FMODE_READ | FMODE_NDELAY); | |
9163 | + used_devt = 0; | |
9164 | + } | |
9165 | + toi_file_target_bdev = NULL; | |
9166 | + target_inode = NULL; | |
9167 | + set_devinfo(NULL, 0); | |
9168 | + target_storage_available = 0; | |
9169 | + } | |
9170 | + | |
9171 | + if (target_file && !IS_ERR(target_file)) | |
9172 | + filp_close(target_file, NULL); | |
9173 | + | |
9174 | + target_file = NULL; | |
9175 | +} | |
9176 | + | |
9177 | +/** | |
9178 | + * reopen_resume_devt - reset the devinfo struct | |
9179 | + * | |
9180 | + * Having opened resume= once, we remember the major and | |
9181 | + * minor nodes and use them to reopen the bdev for checking | |
9182 | + * whether an image exists (possibly when starting a resume). | |
9183 | + **/ | |
9184 | +static void reopen_resume_devt(void) | |
9185 | +{ | |
9186 | + toi_file_target_bdev = toi_open_by_devnum(resume_file_dev_t, | |
9187 | + FMODE_READ | FMODE_NDELAY); | |
9188 | + if (IS_ERR(toi_file_target_bdev)) { | |
9189 | + printk(KERN_INFO "Got a dev_num (%lx) but failed to open it.\n", | |
9190 | + (unsigned long) resume_file_dev_t); | |
9191 | + return; | |
9192 | + } | |
9193 | + target_inode = toi_file_target_bdev->bd_inode; | |
9194 | + set_devinfo(toi_file_target_bdev, target_inode->i_blkbits); | |
9195 | +} | |
9196 | + | |
9197 | +static void toi_file_get_target_info(char *target, int get_size, | |
9198 | + int resume_param) | |
9199 | +{ | |
9200 | + if (target_file) | |
9201 | + toi_file_cleanup(0); | |
9202 | + | |
9203 | + if (!target || !strlen(target)) | |
9204 | + return; | |
9205 | + | |
e999739a | 9206 | + target_file = filp_open(target, O_RDONLY|O_LARGEFILE, 0); |
2380c486 JR |
9207 | + |
9208 | + if (IS_ERR(target_file) || !target_file) { | |
9209 | + | |
9210 | + if (!resume_param) { | |
9211 | + printk(KERN_INFO "Open file %s returned %p.\n", | |
9212 | + target, target_file); | |
9213 | + target_file = NULL; | |
9214 | + return; | |
9215 | + } | |
9216 | + | |
9217 | + target_file = NULL; | |
9218 | + wait_for_device_probe(); | |
9219 | + resume_file_dev_t = name_to_dev_t(target); | |
9220 | + if (!resume_file_dev_t) { | |
9221 | + struct kstat stat; | |
9222 | + int error = vfs_stat(target, &stat); | |
9223 | + printk(KERN_INFO "Open file %s returned %p and " | |
9224 | + "name_to_devt failed.\n", target, | |
9225 | + target_file); | |
9226 | + if (error) | |
9227 | + printk(KERN_INFO "Stating the file also failed." | |
9228 | + " Nothing more we can do.\n"); | |
9229 | + else | |
9230 | + resume_file_dev_t = stat.rdev; | |
9231 | + return; | |
9232 | + } | |
9233 | + | |
9234 | + toi_file_target_bdev = toi_open_by_devnum(resume_file_dev_t, | |
9235 | + FMODE_READ | FMODE_NDELAY); | |
9236 | + if (IS_ERR(toi_file_target_bdev)) { | |
9237 | + printk(KERN_INFO "Got a dev_num (%lx) but failed to " | |
9238 | + "open it.\n", | |
9239 | + (unsigned long) resume_file_dev_t); | |
9240 | + return; | |
9241 | + } | |
9242 | + used_devt = 1; | |
9243 | + target_inode = toi_file_target_bdev->bd_inode; | |
9244 | + } else | |
9245 | + target_inode = target_file->f_mapping->host; | |
9246 | + | |
9247 | + if (S_ISLNK(target_inode->i_mode) || S_ISDIR(target_inode->i_mode) || | |
9248 | + S_ISSOCK(target_inode->i_mode) || S_ISFIFO(target_inode->i_mode)) { | |
9249 | + printk(KERN_INFO "File support works with regular files," | |
9250 | + " character files and block devices.\n"); | |
9251 | + goto cleanup; | |
9252 | + } | |
9253 | + | |
9254 | + if (!used_devt) { | |
9255 | + if (S_ISBLK(target_inode->i_mode)) { | |
9256 | + toi_file_target_bdev = I_BDEV(target_inode); | |
9257 | + if (!bd_claim(toi_file_target_bdev, &toi_fileops)) | |
9258 | + target_claim = 1; | |
9259 | + } else | |
9260 | + toi_file_target_bdev = target_inode->i_sb->s_bdev; | |
9261 | + resume_file_dev_t = toi_file_target_bdev->bd_dev; | |
9262 | + } | |
9263 | + | |
9264 | + set_devinfo(toi_file_target_bdev, target_inode->i_blkbits); | |
9265 | + | |
9266 | + if (get_size) | |
9267 | + target_storage_available = size_ignoring_ignored_pages(); | |
9268 | + | |
9269 | + if (!resume_param) | |
9270 | + target_firstblock = bmap(target_inode, 0) << devinfo.bmap_shift; | |
9271 | + | |
9272 | + return; | |
9273 | +cleanup: | |
9274 | + target_inode = NULL; | |
9275 | + if (target_file) { | |
9276 | + filp_close(target_file, NULL); | |
9277 | + target_file = NULL; | |
9278 | + } | |
9279 | + set_devinfo(NULL, 0); | |
9280 | + target_storage_available = 0; | |
9281 | +} | |
9282 | + | |
9283 | +static void toi_file_noresume_reset(void) | |
9284 | +{ | |
9285 | + toi_bio_ops.rw_cleanup(READ); | |
9286 | +} | |
9287 | + | |
9288 | +/** | |
9289 | + * parse_signature - check if the file is suitable for resuming | |
9290 | + * @header: Signature of the file | |
9291 | + * | |
9292 | + * Given a file header, check the content of the file. Return true if it | |
9293 | + * contains a valid hibernate image. | |
9294 | + * TOI_RESUMED_BEFORE is set accordingly. | |
9295 | + **/ | |
9296 | +static int parse_signature(struct toi_file_header *header) | |
9297 | +{ | |
9298 | + int have_image = !memcmp(HaveImage, header->sig, sizeof(HaveImage) - 1); | |
9299 | + int no_image_header = !memcmp(NoImage, header->sig, | |
9300 | + sizeof(NoImage) - 1); | |
9301 | + int binary_sig = !memcmp(tuxonice_signature, header->sig, | |
9302 | + sizeof(tuxonice_signature)); | |
9303 | + | |
9304 | + if (no_image_header || (binary_sig && !header->have_image)) | |
9305 | + return 0; | |
9306 | + | |
9307 | + if (!have_image && !binary_sig) | |
9308 | + return -1; | |
9309 | + | |
9310 | + if (header->resumed_before) | |
9311 | + set_toi_state(TOI_RESUMED_BEFORE); | |
9312 | + else | |
9313 | + clear_toi_state(TOI_RESUMED_BEFORE); | |
9314 | + | |
9315 | + target_header_start = header->first_header_block; | |
9316 | + return 1; | |
9317 | +} | |
9318 | + | |
9319 | +/** | |
9320 | + * prepare_signature - populate the signature structure | |
9321 | + * @current_header: Signature structure to populate | |
9322 | + * @first_header_block: Sector with the header containing the extents | |
9323 | + **/ | |
9324 | +static int prepare_signature(struct toi_file_header *current_header, | |
9325 | + unsigned long first_header_block) | |
9326 | +{ | |
9327 | + memcpy(current_header->sig, tuxonice_signature, | |
9328 | + sizeof(tuxonice_signature)); | |
9329 | + current_header->resumed_before = 0; | |
9330 | + current_header->first_header_block = first_header_block; | |
9331 | + current_header->have_image = 1; | |
9332 | + return 0; | |
9333 | +} | |
9334 | + | |
9335 | +static int toi_file_storage_allocated(void) | |
9336 | +{ | |
9337 | + if (!target_inode) | |
9338 | + return 0; | |
9339 | + | |
9340 | + if (target_is_normal_file()) | |
9341 | + return (int) raw_to_real(target_storage_available); | |
9342 | + else | |
9343 | + return (int) raw_to_real(main_pages_requested); | |
9344 | +} | |
9345 | + | |
9346 | +/** | |
9347 | + * toi_file_release_storage - deallocate the block chain | |
9348 | + **/ | |
9349 | +static int toi_file_release_storage(void) | |
9350 | +{ | |
2380c486 JR |
9351 | + toi_put_extent_chain(&block_chain); |
9352 | + | |
9353 | + header_pages_reserved = 0; | |
9354 | + main_pages_allocated = 0; | |
9355 | + main_pages_requested = 0; | |
9356 | + return 0; | |
9357 | +} | |
9358 | + | |
9359 | +static void toi_file_reserve_header_space(int request) | |
9360 | +{ | |
9361 | + header_pages_reserved = request; | |
2380c486 JR |
9362 | +} |
9363 | + | |
9364 | +static int toi_file_allocate_storage(int main_space_requested) | |
9365 | +{ | |
9366 | + int result = 0; | |
9367 | + | |
9368 | + int extra_pages = DIV_ROUND_UP(main_space_requested * | |
9369 | + (sizeof(unsigned long) + sizeof(int)), PAGE_SIZE); | |
9370 | + int pages_to_get = main_space_requested + extra_pages + | |
9371 | + header_pages_reserved; | |
9372 | + int blocks_to_get = pages_to_get - block_chain.size; | |
9373 | + | |
9374 | + /* Only release_storage reduces the size */ | |
9375 | + if (blocks_to_get < 1) | |
0ada99ac | 9376 | + return apply_header_reservation(); |
2380c486 JR |
9377 | + |
9378 | + result = populate_block_list(); | |
9379 | + | |
9380 | + if (result) | |
9381 | + return result; | |
9382 | + | |
9383 | + toi_message(TOI_WRITER, TOI_MEDIUM, 0, | |
9384 | + "Finished with block_chain.size == %d.\n", | |
9385 | + block_chain.size); | |
9386 | + | |
9387 | + if (block_chain.size < pages_to_get) { | |
9388 | + printk(KERN_INFO "Block chain size (%d) < header pages (%d) + " | |
9389 | + "extra pages (%d) + main pages (%d) (=%d " | |
9390 | + "pages).\n", | |
9391 | + block_chain.size, header_pages_reserved, | |
9392 | + extra_pages, main_space_requested, | |
9393 | + pages_to_get); | |
9394 | + result = -ENOSPC; | |
9395 | + } | |
9396 | + | |
9397 | + main_pages_requested = main_space_requested; | |
9398 | + main_pages_allocated = main_space_requested + extra_pages; | |
9399 | + return result; | |
9400 | +} | |
9401 | + | |
9402 | +/** | |
9403 | + * toi_file_write_header_init - save the header on the image | |
9404 | + **/ | |
9405 | +static int toi_file_write_header_init(void) | |
9406 | +{ | |
9407 | + int result; | |
9408 | + | |
9409 | + toi_bio_ops.rw_init(WRITE, 0); | |
9410 | + toi_writer_buffer_posn = 0; | |
9411 | + | |
9412 | + /* Info needed to bootstrap goes at the start of the header. | |
9413 | + * First we save the basic info needed for reading, including the number | |
9414 | + * of header pages. Then we save the structs containing data needed | |
9415 | + * for reading the header pages back. | |
9416 | + * Note that even if header pages take more than one page, when we | |
9417 | + * read back the info, we will have restored the location of the | |
9418 | + * next header page by the time we go to use it. | |
9419 | + */ | |
9420 | + | |
9421 | + result = toi_bio_ops.rw_header_chunk(WRITE, &toi_fileops, | |
9422 | + (char *) &toi_writer_posn_save, | |
9423 | + sizeof(toi_writer_posn_save)); | |
9424 | + | |
9425 | + if (result) | |
9426 | + return result; | |
9427 | + | |
9428 | + result = toi_bio_ops.rw_header_chunk(WRITE, &toi_fileops, | |
9429 | + (char *) &devinfo, sizeof(devinfo)); | |
9430 | + | |
9431 | + if (result) | |
9432 | + return result; | |
9433 | + | |
9434 | + /* Flush the chain */ | |
9435 | + toi_serialise_extent_chain(&toi_fileops, &block_chain); | |
9436 | + | |
9437 | + return 0; | |
9438 | +} | |
9439 | + | |
9440 | +static int toi_file_write_header_cleanup(void) | |
9441 | +{ | |
9442 | + struct toi_file_header *header; | |
0ada99ac | 9443 | + int result, result2; |
2380c486 JR |
9444 | + unsigned long sig_page = toi_get_zeroed_page(38, TOI_ATOMIC_GFP); |
9445 | + | |
9446 | + /* Write any unsaved data */ | |
0ada99ac | 9447 | + result = toi_bio_ops.write_header_chunk_finish(); |
2380c486 | 9448 | + |
0ada99ac | 9449 | + if (result) |
9450 | + goto out; | |
2380c486 JR |
9451 | + |
9452 | + toi_extent_state_goto_start(&toi_writer_posn); | |
0ada99ac | 9453 | + toi_bio_ops.forward_one_page(1, 1); |
2380c486 JR |
9454 | + |
9455 | + /* Adjust image header */ | |
9456 | + result = toi_bio_ops.bdev_page_io(READ, toi_file_target_bdev, | |
9457 | + target_firstblock, | |
9458 | + virt_to_page(sig_page)); | |
9459 | + if (result) | |
9460 | + goto out; | |
9461 | + | |
9462 | + header = (struct toi_file_header *) sig_page; | |
9463 | + | |
9464 | + prepare_signature(header, | |
9465 | + toi_writer_posn.current_offset << | |
9466 | + devinfo.bmap_shift); | |
9467 | + | |
9468 | + result = toi_bio_ops.bdev_page_io(WRITE, toi_file_target_bdev, | |
9469 | + target_firstblock, | |
9470 | + virt_to_page(sig_page)); | |
9471 | + | |
9472 | +out: | |
0ada99ac | 9473 | + result2 = toi_bio_ops.finish_all_io(); |
2380c486 JR |
9474 | + toi_free_page(38, sig_page); |
9475 | + | |
0ada99ac | 9476 | + return result ? result : result2; |
2380c486 JR |
9477 | +} |
9478 | + | |
9479 | +/* HEADER READING */ | |
9480 | + | |
9481 | +/** | |
9482 | + * toi_file_read_header_init - check content of signature | |
9483 | + * | |
9484 | + * Entry point of the resume path. | |
9485 | + * 1. Attempt to read the device specified with resume=. | |
9486 | + * 2. Check the contents of the header for our signature. | |
9487 | + * 3. Warn, ignore, reset and/or continue as appropriate. | |
9488 | + * 4. If continuing, read the toi_file configuration section | |
9489 | + * of the header and set up block device info so we can read | |
9490 | + * the rest of the header & image. | |
9491 | + * | |
9492 | + * Returns: | |
9493 | + * May not return if user choose to reboot at a warning. | |
9494 | + * -EINVAL if cannot resume at this time. Booting should continue | |
9495 | + * normally. | |
9496 | + **/ | |
9497 | +static int toi_file_read_header_init(void) | |
9498 | +{ | |
9499 | + int result; | |
9500 | + struct block_device *tmp; | |
9501 | + | |
9502 | + /* Allocate toi_writer_buffer */ | |
9503 | + toi_bio_ops.read_header_init(); | |
9504 | + | |
9505 | + /* | |
9506 | + * Read toi_file configuration (header containing metadata). | |
9507 | + * target_header_start is the first sector of the header. It has been | |
9508 | + * set when checking if the file was suitable for resuming, see | |
9509 | + * do_toi_step(STEP_RESUME_CAN_RESUME). | |
9510 | + */ | |
9511 | + result = toi_bio_ops.bdev_page_io(READ, toi_file_target_bdev, | |
9512 | + target_header_start, | |
9513 | + virt_to_page((unsigned long) toi_writer_buffer)); | |
9514 | + | |
9515 | + if (result) { | |
e999739a | 9516 | + printk(KERN_ERR "FileAllocator read header init: Failed to " |
9517 | + "initialise reading the first page of data.\n"); | |
2380c486 JR |
9518 | + toi_bio_ops.rw_cleanup(READ); |
9519 | + return result; | |
9520 | + } | |
9521 | + | |
9522 | + /* toi_writer_posn_save[0] contains the header */ | |
9523 | + memcpy(&toi_writer_posn_save, toi_writer_buffer, | |
9524 | + sizeof(toi_writer_posn_save)); | |
9525 | + | |
9526 | + /* Save the position in the buffer */ | |
9527 | + toi_writer_buffer_posn = sizeof(toi_writer_posn_save); | |
9528 | + | |
9529 | + tmp = devinfo.bdev; | |
9530 | + | |
9531 | + /* See tuxonice_block_io.h */ | |
9532 | + memcpy(&devinfo, | |
9533 | + toi_writer_buffer + toi_writer_buffer_posn, | |
9534 | + sizeof(devinfo)); | |
9535 | + | |
9536 | + devinfo.bdev = tmp; | |
9537 | + toi_writer_buffer_posn += sizeof(devinfo); | |
9538 | + | |
9539 | + /* Reinitialize the extent pointer */ | |
9540 | + toi_extent_state_goto_start(&toi_writer_posn); | |
9541 | + /* Jump to the next page */ | |
9542 | + toi_bio_ops.set_extra_page_forward(); | |
9543 | + | |
9544 | + /* Bring back the chain from disk: this will read | |
9545 | + * all extents. | |
9546 | + */ | |
9547 | + return toi_load_extent_chain(&block_chain); | |
9548 | +} | |
9549 | + | |
9550 | +static int toi_file_read_header_cleanup(void) | |
9551 | +{ | |
9552 | + toi_bio_ops.rw_cleanup(READ); | |
9553 | + return 0; | |
9554 | +} | |
9555 | + | |
9556 | +/** | |
9557 | + * toi_file_signature_op - perform an operation on the file signature | |
9558 | + * @op: operation to perform | |
9559 | + * | |
9560 | + * op is either GET_IMAGE_EXISTS, INVALIDATE, MARK_RESUME_ATTEMPTED or | |
9561 | + * UNMARK_RESUME_ATTEMPTED. | |
9562 | + * If the signature is changed, an I/O operation is performed. | |
9563 | + * The signature exists iff toi_file_signature_op(GET_IMAGE_EXISTS)>-1. | |
9564 | + **/ | |
9565 | +static int toi_file_signature_op(int op) | |
9566 | +{ | |
9567 | + char *cur; | |
0ada99ac | 9568 | + int result = 0, result2, changed = 0; |
2380c486 JR |
9569 | + struct toi_file_header *header; |
9570 | + | |
9571 | + if (!toi_file_target_bdev || IS_ERR(toi_file_target_bdev)) | |
9572 | + return -1; | |
9573 | + | |
9574 | + cur = (char *) toi_get_zeroed_page(17, TOI_ATOMIC_GFP); | |
9575 | + if (!cur) { | |
9576 | + printk(KERN_INFO "Unable to allocate a page for reading the " | |
9577 | + "image signature.\n"); | |
9578 | + return -ENOMEM; | |
9579 | + } | |
9580 | + | |
9581 | + result = toi_bio_ops.bdev_page_io(READ, toi_file_target_bdev, | |
9582 | + target_firstblock, | |
9583 | + virt_to_page(cur)); | |
9584 | + | |
9585 | + if (result) | |
9586 | + goto out; | |
9587 | + | |
9588 | + header = (struct toi_file_header *) cur; | |
9589 | + result = parse_signature(header); | |
9590 | + | |
9591 | + switch (op) { | |
9592 | + case INVALIDATE: | |
9593 | + if (result == -1) | |
9594 | + goto out; | |
9595 | + | |
9596 | + memcpy(header->sig, tuxonice_signature, | |
9597 | + sizeof(tuxonice_signature)); | |
9598 | + header->resumed_before = 0; | |
9599 | + header->have_image = 0; | |
9600 | + result = 1; | |
9601 | + changed = 1; | |
9602 | + break; | |
9603 | + case MARK_RESUME_ATTEMPTED: | |
9604 | + if (result == 1) { | |
9605 | + header->resumed_before = 1; | |
9606 | + changed = 1; | |
9607 | + } | |
9608 | + break; | |
9609 | + case UNMARK_RESUME_ATTEMPTED: | |
9610 | + if (result == 1) { | |
9611 | + header->resumed_before = 0; | |
9612 | + changed = 1; | |
9613 | + } | |
9614 | + break; | |
9615 | + } | |
9616 | + | |
9617 | + if (changed) { | |
9618 | + int io_result = toi_bio_ops.bdev_page_io(WRITE, | |
9619 | + toi_file_target_bdev, target_firstblock, | |
9620 | + virt_to_page(cur)); | |
9621 | + if (io_result) | |
9622 | + result = io_result; | |
9623 | + } | |
9624 | + | |
9625 | +out: | |
0ada99ac | 9626 | + result2 = toi_bio_ops.finish_all_io(); |
2380c486 | 9627 | + toi_free_page(17, (unsigned long) cur); |
0ada99ac | 9628 | + return result ? result : result2; |
2380c486 JR |
9629 | +} |
9630 | + | |
9631 | +/** | |
9632 | + * toi_file_print_debug_stats - print debug info | |
9633 | + * @buffer: Buffer to data to populate | |
9634 | + * @size: Size of the buffer | |
9635 | + **/ | |
9636 | +static int toi_file_print_debug_stats(char *buffer, int size) | |
9637 | +{ | |
9638 | + int len = 0; | |
9639 | + | |
9640 | + if (toiActiveAllocator != &toi_fileops) { | |
9641 | + len = scnprintf(buffer, size, | |
9642 | + "- FileAllocator inactive.\n"); | |
9643 | + return len; | |
9644 | + } | |
9645 | + | |
9646 | + len = scnprintf(buffer, size, "- FileAllocator active.\n"); | |
9647 | + | |
9648 | + len += scnprintf(buffer+len, size-len, " Storage available for " | |
9649 | + "image: %d pages.\n", | |
9650 | + toi_file_storage_allocated()); | |
9651 | + | |
9652 | + return len; | |
9653 | +} | |
9654 | + | |
9655 | +/** | |
9656 | + * toi_file_storage_needed - storage needed | |
9657 | + * | |
9658 | + * Returns amount of space in the image header required | |
9659 | + * for the toi_file's data. | |
9660 | + * | |
9661 | + * We ensure the space is allocated, but actually save the | |
9662 | + * data from write_header_init and therefore don't also define a | |
9663 | + * save_config_info routine. | |
9664 | + **/ | |
9665 | +static int toi_file_storage_needed(void) | |
9666 | +{ | |
0ada99ac | 9667 | + return strlen(toi_file_target) + 1 + |
2380c486 JR |
9668 | + sizeof(toi_writer_posn_save) + |
9669 | + sizeof(devinfo) + | |
0ada99ac | 9670 | + 2 * sizeof(int) + |
2380c486 JR |
9671 | + (2 * sizeof(unsigned long) * block_chain.num_extents); |
9672 | +} | |
9673 | + | |
9674 | +/** | |
9675 | + * toi_file_remove_image - invalidate the image | |
9676 | + **/ | |
9677 | +static int toi_file_remove_image(void) | |
9678 | +{ | |
9679 | + toi_file_release_storage(); | |
9680 | + return toi_file_signature_op(INVALIDATE); | |
9681 | +} | |
9682 | + | |
9683 | +/** | |
9684 | + * toi_file_image_exists - test if an image exists | |
9685 | + * | |
9686 | + * Repopulate toi_file_target_bdev if needed. | |
9687 | + **/ | |
9688 | +static int toi_file_image_exists(int quiet) | |
9689 | +{ | |
9690 | + if (!toi_file_target_bdev) | |
9691 | + reopen_resume_devt(); | |
9692 | + return toi_file_signature_op(GET_IMAGE_EXISTS); | |
9693 | +} | |
9694 | + | |
9695 | +/** | |
9696 | + * toi_file_mark_resume_attempted - mark resume attempted if so | |
9697 | + * @mark: attempted flag | |
9698 | + * | |
9699 | + * Record that we tried to resume from this image. Resuming | |
9700 | + * multiple times from the same image may be dangerous | |
9701 | + * (possible filesystem corruption). | |
9702 | + **/ | |
9703 | +static int toi_file_mark_resume_attempted(int mark) | |
9704 | +{ | |
9705 | + return toi_file_signature_op(mark ? MARK_RESUME_ATTEMPTED : | |
9706 | + UNMARK_RESUME_ATTEMPTED); | |
9707 | +} | |
9708 | + | |
9709 | +/** | |
9710 | + * toi_file_set_resume_param - validate the specified resume file | |
9711 | + * | |
9712 | + * Given a target filename, populate the resume parameter. This is | |
9713 | + * meant to be used by the user to populate the kernel command line. | |
9714 | + * By setting /sys/power/tuxonice/file/target, the valid resume | |
9715 | + * parameter to use is set and accessible through | |
9716 | + * /sys/power/tuxonice/resume. | |
9717 | + * | |
9718 | + * If the file could be located, we check if it contains a valid | |
9719 | + * signature. | |
9720 | + **/ | |
9721 | +static void toi_file_set_resume_param(void) | |
9722 | +{ | |
9723 | + char *buffer = (char *) toi_get_zeroed_page(18, TOI_ATOMIC_GFP); | |
9724 | + char *buffer2 = (char *) toi_get_zeroed_page(19, TOI_ATOMIC_GFP); | |
9725 | + unsigned long sector = bmap(target_inode, 0); | |
9726 | + int offset = 0; | |
9727 | + | |
9728 | + if (!buffer || !buffer2) { | |
9729 | + if (buffer) | |
9730 | + toi_free_page(18, (unsigned long) buffer); | |
9731 | + if (buffer2) | |
9732 | + toi_free_page(19, (unsigned long) buffer2); | |
e999739a | 9733 | + printk(KERN_ERR "TuxOnIce: Failed to allocate memory while " |
9734 | + "setting resume= parameter.\n"); | |
2380c486 JR |
9735 | + return; |
9736 | + } | |
9737 | + | |
9738 | + if (toi_file_target_bdev) { | |
9739 | + set_devinfo(toi_file_target_bdev, target_inode->i_blkbits); | |
9740 | + | |
9741 | + bdevname(toi_file_target_bdev, buffer2); | |
9742 | + offset += snprintf(buffer + offset, PAGE_SIZE - offset, | |
9743 | + "/dev/%s", buffer2); | |
9744 | + | |
9745 | + if (sector) | |
9746 | + /* The offset is: sector << (inode->i_blkbits - 9) */ | |
9747 | + offset += snprintf(buffer + offset, PAGE_SIZE - offset, | |
9748 | + ":0x%lx", sector << devinfo.bmap_shift); | |
9749 | + } else | |
9750 | + offset += snprintf(buffer + offset, PAGE_SIZE - offset, | |
9751 | + "%s is not a valid target.", toi_file_target); | |
9752 | + | |
9753 | + sprintf(resume_file, "file:%s", buffer); | |
9754 | + | |
9755 | + toi_free_page(18, (unsigned long) buffer); | |
9756 | + toi_free_page(19, (unsigned long) buffer2); | |
9757 | + | |
9758 | + toi_attempt_to_parse_resume_device(1); | |
9759 | +} | |
9760 | + | |
9761 | +/** | |
9762 | + * __test_toi_file_target - is the file target valid for hibernating? | |
9763 | + * @target: target file | |
9764 | + * @resume_param: whether resume= has been specified | |
9765 | + * @quiet: quiet flag | |
9766 | + * | |
9767 | + * Test whether the file target can be used for hibernating: valid target | |
9768 | + * and signature. | |
9769 | + * The resume parameter is set if needed. | |
9770 | + **/ | |
9771 | +static int __test_toi_file_target(char *target, int resume_param, int quiet) | |
9772 | +{ | |
9773 | + toi_file_get_target_info(target, 0, resume_param); | |
9774 | + if (toi_file_signature_op(GET_IMAGE_EXISTS) > -1) { | |
9775 | + if (!quiet) | |
9776 | + printk(KERN_INFO "TuxOnIce: FileAllocator: File " | |
9777 | + "signature found.\n"); | |
9778 | + if (!resume_param) | |
9779 | + toi_file_set_resume_param(); | |
9780 | + | |
9781 | + toi_bio_ops.set_devinfo(&devinfo); | |
9782 | + toi_writer_posn.chains = &block_chain; | |
9783 | + toi_writer_posn.num_chains = 1; | |
9784 | + | |
9785 | + if (!resume_param) | |
9786 | + set_toi_state(TOI_CAN_HIBERNATE); | |
9787 | + return 0; | |
9788 | + } | |
9789 | + | |
9790 | + /* | |
9791 | + * Target unaccessible or no signature found | |
9792 | + * Most errors have already been reported | |
9793 | + */ | |
9794 | + | |
9795 | + clear_toi_state(TOI_CAN_HIBERNATE); | |
9796 | + | |
9797 | + if (quiet) | |
9798 | + return 1; | |
9799 | + | |
9800 | + if (*target) | |
9801 | + printk(KERN_INFO "TuxOnIce: FileAllocator: Sorry. No signature " | |
9802 | + "found at %s.\n", target); | |
9803 | + else | |
9804 | + if (!resume_param) | |
9805 | + printk(KERN_INFO "TuxOnIce: FileAllocator: Sorry. " | |
9806 | + "Target is not set for hibernating.\n"); | |
9807 | + | |
9808 | + return 1; | |
9809 | +} | |
9810 | + | |
9811 | +/** | |
9812 | + * test_toi_file_target - sysfs callback for /sys/power/tuxonince/file/target | |
9813 | + * | |
9814 | + * Test wheter the target file is valid for hibernating. | |
9815 | + **/ | |
9816 | +static void test_toi_file_target(void) | |
9817 | +{ | |
9818 | + setting_toi_file_target = 1; | |
9819 | + | |
9820 | + printk(KERN_INFO "TuxOnIce: Hibernating %sabled.\n", | |
9821 | + __test_toi_file_target(toi_file_target, 0, 1) ? | |
9822 | + "dis" : "en"); | |
9823 | + | |
9824 | + setting_toi_file_target = 0; | |
9825 | +} | |
9826 | + | |
9827 | +/** | |
9828 | + * toi_file_parse_sig_location - parse image Location | |
9829 | + * @commandline: the resume parameter | |
9830 | + * @only_writer: ?? | |
9831 | + * @quiet: quiet flag | |
9832 | + * | |
9833 | + * Attempt to parse a resume= parameter. | |
9834 | + * File Allocator accepts: | |
9835 | + * resume=file:DEVNAME[:FIRSTBLOCK] | |
9836 | + * | |
9837 | + * Where: | |
9838 | + * DEVNAME is convertable to a dev_t by name_to_dev_t | |
9839 | + * FIRSTBLOCK is the location of the first block in the file. | |
9840 | + * BLOCKSIZE is the logical blocksize >= SECTOR_SIZE & | |
9841 | + * <= PAGE_SIZE, | |
9842 | + * mod SECTOR_SIZE == 0 of the device. | |
9843 | + * | |
9844 | + * Data is validated by attempting to read a header from the | |
9845 | + * location given. Failure will result in toi_file refusing to | |
9846 | + * save an image, and a reboot with correct parameters will be | |
9847 | + * necessary. | |
9848 | + **/ | |
9849 | +static int toi_file_parse_sig_location(char *commandline, | |
9850 | + int only_writer, int quiet) | |
9851 | +{ | |
9852 | + char *thischar, *devstart = NULL, *colon = NULL, *at_symbol = NULL; | |
9853 | + int result = -EINVAL, target_blocksize = 0; | |
9854 | + | |
9855 | + if (strncmp(commandline, "file:", 5)) { | |
9856 | + if (!only_writer) | |
9857 | + return 1; | |
9858 | + } else | |
9859 | + commandline += 5; | |
9860 | + | |
9861 | + /* | |
9862 | + * Don't check signature again if we're beginning a cycle. If we already | |
9863 | + * did the initialisation successfully, assume we'll be okay when it | |
9864 | + * comes to resuming. | |
9865 | + */ | |
9866 | + if (toi_file_target_bdev) | |
9867 | + return 0; | |
9868 | + | |
9869 | + devstart = commandline; | |
9870 | + thischar = commandline; | |
9871 | + while ((*thischar != ':') && (*thischar != '@') && | |
9872 | + ((thischar - commandline) < 250) && (*thischar)) | |
9873 | + thischar++; | |
9874 | + | |
9875 | + if (*thischar == ':') { | |
9876 | + colon = thischar; | |
9877 | + *colon = 0; | |
9878 | + thischar++; | |
9879 | + } | |
9880 | + | |
9881 | + while ((*thischar != '@') && ((thischar - commandline) < 250) | |
9882 | + && (*thischar)) | |
9883 | + thischar++; | |
9884 | + | |
9885 | + if (*thischar == '@') { | |
9886 | + at_symbol = thischar; | |
9887 | + *at_symbol = 0; | |
9888 | + } | |
9889 | + | |
9890 | + /* | |
9891 | + * For the toi_file, you can be able to resume, but not hibernate, | |
9892 | + * because the resume= is set correctly, but the toi_file_target | |
9893 | + * isn't. | |
9894 | + * | |
9895 | + * We may have come here as a result of setting resume or | |
9896 | + * toi_file_target. We only test the toi_file target in the | |
9897 | + * former case (it's already done in the later), and we do it before | |
9898 | + * setting the block number ourselves. It will overwrite the values | |
9899 | + * given on the command line if we don't. | |
9900 | + */ | |
9901 | + | |
9902 | + if (!setting_toi_file_target) /* Concurrent write via /sys? */ | |
9903 | + __test_toi_file_target(toi_file_target, 1, 0); | |
9904 | + | |
9474138d AM |
9905 | + if (colon) { |
9906 | + unsigned long block; | |
9907 | + result = strict_strtoul(colon + 1, 0, &block); | |
9908 | + if (result) | |
9909 | + goto out; | |
9910 | + target_firstblock = (int) block; | |
9911 | + } else | |
2380c486 JR |
9912 | + target_firstblock = 0; |
9913 | + | |
9914 | + if (at_symbol) { | |
9474138d AM |
9915 | + unsigned long block_size; |
9916 | + result = strict_strtoul(at_symbol + 1, 0, &block_size); | |
9917 | + if (result) | |
9918 | + goto out; | |
9919 | + target_blocksize = (int) block_size; | |
2380c486 JR |
9920 | + if (target_blocksize & (SECTOR_SIZE - 1)) { |
9921 | + printk(KERN_INFO "FileAllocator: Blocksizes are " | |
9922 | + "multiples of %d.\n", SECTOR_SIZE); | |
9923 | + result = -EINVAL; | |
9924 | + goto out; | |
9925 | + } | |
9926 | + } | |
9927 | + | |
9928 | + if (!quiet) | |
9929 | + printk(KERN_INFO "TuxOnIce FileAllocator: Testing whether you " | |
9930 | + "can resume:\n"); | |
9931 | + | |
9932 | + toi_file_get_target_info(commandline, 0, 1); | |
9933 | + | |
9934 | + if (!toi_file_target_bdev || IS_ERR(toi_file_target_bdev)) { | |
9935 | + toi_file_target_bdev = NULL; | |
9936 | + result = -1; | |
9937 | + goto out; | |
9938 | + } | |
9939 | + | |
9940 | + if (target_blocksize) | |
9941 | + set_devinfo(toi_file_target_bdev, ffs(target_blocksize)); | |
9942 | + | |
9943 | + result = __test_toi_file_target(commandline, 1, quiet); | |
9944 | + | |
9945 | +out: | |
9946 | + if (result) | |
9947 | + clear_toi_state(TOI_CAN_HIBERNATE); | |
9948 | + | |
9949 | + if (!quiet) | |
9950 | + printk(KERN_INFO "Resuming %sabled.\n", result ? "dis" : "en"); | |
9951 | + | |
9952 | + if (colon) | |
9953 | + *colon = ':'; | |
9954 | + if (at_symbol) | |
9955 | + *at_symbol = '@'; | |
9956 | + | |
9957 | + return result; | |
9958 | +} | |
9959 | + | |
9960 | +/** | |
9961 | + * toi_file_save_config_info - populate toi_file_target | |
9962 | + * @buffer: Pointer to a buffer of size PAGE_SIZE. | |
9963 | + * | |
9964 | + * Save the target's name, not for resume time, but for | |
9965 | + * all_settings. | |
9966 | + * Returns: | |
9967 | + * Number of bytes used for saving our data. | |
9968 | + **/ | |
9969 | +static int toi_file_save_config_info(char *buffer) | |
9970 | +{ | |
9971 | + strcpy(buffer, toi_file_target); | |
9972 | + return strlen(toi_file_target) + 1; | |
9973 | +} | |
9974 | + | |
9975 | +/** | |
9976 | + * toi_file_load_config_info - reload target's name | |
9977 | + * @buffer: pointer to the start of the data | |
9978 | + * @size: number of bytes that were saved | |
9979 | + * | |
9980 | + * toi_file_target is set to buffer. | |
9981 | + **/ | |
9982 | +static void toi_file_load_config_info(char *buffer, int size) | |
9983 | +{ | |
0ada99ac | 9984 | + strlcpy(toi_file_target, buffer, size); |
2380c486 JR |
9985 | +} |
9986 | + | |
9987 | +static int toi_file_initialise(int starting_cycle) | |
9988 | +{ | |
9989 | + if (starting_cycle) { | |
9990 | + if (toiActiveAllocator != &toi_fileops) | |
9991 | + return 0; | |
9992 | + | |
9993 | + if (starting_cycle & SYSFS_HIBERNATE && !*toi_file_target) { | |
9994 | + printk(KERN_INFO "FileAllocator is the active writer, " | |
9995 | + "but no filename has been set.\n"); | |
9996 | + return 1; | |
9997 | + } | |
9998 | + } | |
9999 | + | |
10000 | + if (*toi_file_target) | |
10001 | + toi_file_get_target_info(toi_file_target, starting_cycle, 0); | |
10002 | + | |
10003 | + if (starting_cycle && (toi_file_image_exists(1) == -1)) { | |
10004 | + printk("%s is does not have a valid signature for " | |
10005 | + "hibernating.\n", toi_file_target); | |
10006 | + return 1; | |
10007 | + } | |
10008 | + | |
10009 | + return 0; | |
10010 | +} | |
10011 | + | |
10012 | +static struct toi_sysfs_data sysfs_params[] = { | |
10013 | + | |
10014 | + SYSFS_STRING("target", SYSFS_RW, toi_file_target, 256, | |
10015 | + SYSFS_NEEDS_SM_FOR_WRITE, test_toi_file_target), | |
10016 | + SYSFS_INT("enabled", SYSFS_RW, &toi_fileops.enabled, 0, 1, 0, | |
10017 | + attempt_to_parse_resume_device2) | |
10018 | +}; | |
10019 | + | |
10020 | +static struct toi_module_ops toi_fileops = { | |
10021 | + .type = WRITER_MODULE, | |
10022 | + .name = "file storage", | |
10023 | + .directory = "file", | |
10024 | + .module = THIS_MODULE, | |
10025 | + .print_debug_info = toi_file_print_debug_stats, | |
10026 | + .save_config_info = toi_file_save_config_info, | |
10027 | + .load_config_info = toi_file_load_config_info, | |
10028 | + .storage_needed = toi_file_storage_needed, | |
10029 | + .initialise = toi_file_initialise, | |
10030 | + .cleanup = toi_file_cleanup, | |
10031 | + | |
10032 | + .noresume_reset = toi_file_noresume_reset, | |
10033 | + .storage_available = toi_file_storage_available, | |
10034 | + .storage_allocated = toi_file_storage_allocated, | |
2380c486 JR |
10035 | + .reserve_header_space = toi_file_reserve_header_space, |
10036 | + .allocate_storage = toi_file_allocate_storage, | |
10037 | + .image_exists = toi_file_image_exists, | |
10038 | + .mark_resume_attempted = toi_file_mark_resume_attempted, | |
10039 | + .write_header_init = toi_file_write_header_init, | |
10040 | + .write_header_cleanup = toi_file_write_header_cleanup, | |
10041 | + .read_header_init = toi_file_read_header_init, | |
10042 | + .read_header_cleanup = toi_file_read_header_cleanup, | |
10043 | + .remove_image = toi_file_remove_image, | |
10044 | + .parse_sig_location = toi_file_parse_sig_location, | |
10045 | + | |
10046 | + .sysfs_data = sysfs_params, | |
10047 | + .num_sysfs_entries = sizeof(sysfs_params) / | |
10048 | + sizeof(struct toi_sysfs_data), | |
10049 | +}; | |
10050 | + | |
10051 | +/* ---- Registration ---- */ | |
10052 | +static __init int toi_file_load(void) | |
10053 | +{ | |
10054 | + toi_fileops.rw_init = toi_bio_ops.rw_init; | |
10055 | + toi_fileops.rw_cleanup = toi_bio_ops.rw_cleanup; | |
10056 | + toi_fileops.read_page = toi_bio_ops.read_page; | |
10057 | + toi_fileops.write_page = toi_bio_ops.write_page; | |
10058 | + toi_fileops.rw_header_chunk = toi_bio_ops.rw_header_chunk; | |
10059 | + toi_fileops.rw_header_chunk_noreadahead = | |
10060 | + toi_bio_ops.rw_header_chunk_noreadahead; | |
10061 | + toi_fileops.io_flusher = toi_bio_ops.io_flusher; | |
10062 | + toi_fileops.update_throughput_throttle = | |
10063 | + toi_bio_ops.update_throughput_throttle; | |
10064 | + toi_fileops.finish_all_io = toi_bio_ops.finish_all_io; | |
10065 | + | |
10066 | + return toi_register_module(&toi_fileops); | |
10067 | +} | |
10068 | + | |
10069 | +#ifdef MODULE | |
10070 | +static __exit void toi_file_unload(void) | |
10071 | +{ | |
10072 | + toi_unregister_module(&toi_fileops); | |
10073 | +} | |
10074 | + | |
10075 | +module_init(toi_file_load); | |
10076 | +module_exit(toi_file_unload); | |
10077 | +MODULE_LICENSE("GPL"); | |
10078 | +MODULE_AUTHOR("Nigel Cunningham"); | |
10079 | +MODULE_DESCRIPTION("TuxOnIce FileAllocator"); | |
10080 | +#else | |
10081 | +late_initcall(toi_file_load); | |
10082 | +#endif | |
10083 | diff --git a/kernel/power/tuxonice_highlevel.c b/kernel/power/tuxonice_highlevel.c | |
10084 | new file mode 100644 | |
9474138d | 10085 | index 0000000..c1e99fd |
2380c486 JR |
10086 | --- /dev/null |
10087 | +++ b/kernel/power/tuxonice_highlevel.c | |
9474138d | 10088 | @@ -0,0 +1,1305 @@ |
2380c486 JR |
10089 | +/* |
10090 | + * kernel/power/tuxonice_highlevel.c | |
10091 | + */ | |
10092 | +/** \mainpage TuxOnIce. | |
10093 | + * | |
10094 | + * TuxOnIce provides support for saving and restoring an image of | |
10095 | + * system memory to an arbitrary storage device, either on the local computer, | |
10096 | + * or across some network. The support is entirely OS based, so TuxOnIce | |
10097 | + * works without requiring BIOS, APM or ACPI support. The vast majority of the | |
10098 | + * code is also architecture independant, so it should be very easy to port | |
10099 | + * the code to new architectures. TuxOnIce includes support for SMP, 4G HighMem | |
10100 | + * and preemption. Initramfses and initrds are also supported. | |
10101 | + * | |
10102 | + * TuxOnIce uses a modular design, in which the method of storing the image is | |
10103 | + * completely abstracted from the core code, as are transformations on the data | |
10104 | + * such as compression and/or encryption (multiple 'modules' can be used to | |
10105 | + * provide arbitrary combinations of functionality). The user interface is also | |
10106 | + * modular, so that arbitrarily simple or complex interfaces can be used to | |
10107 | + * provide anything from debugging information through to eye candy. | |
10108 | + * | |
10109 | + * \section Copyright | |
10110 | + * | |
10111 | + * TuxOnIce is released under the GPLv2. | |
10112 | + * | |
10113 | + * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu><BR> | |
10114 | + * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz><BR> | |
10115 | + * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr><BR> | |
10116 | + * Copyright (C) 2002-2008 Nigel Cunningham (nigel at tuxonice net)<BR> | |
10117 | + * | |
10118 | + * \section Credits | |
10119 | + * | |
10120 | + * Nigel would like to thank the following people for their work: | |
10121 | + * | |
10122 | + * Bernard Blackham <bernard@blackham.com.au><BR> | |
10123 | + * Web page & Wiki administration, some coding. A person without whom | |
10124 | + * TuxOnIce would not be where it is. | |
10125 | + * | |
10126 | + * Michael Frank <mhf@linuxmail.org><BR> | |
10127 | + * Extensive testing and help with improving stability. I was constantly | |
10128 | + * amazed by the quality and quantity of Michael's help. | |
10129 | + * | |
10130 | + * Pavel Machek <pavel@ucw.cz><BR> | |
10131 | + * Modifications, defectiveness pointing, being with Gabor at the very | |
10132 | + * beginning, suspend to swap space, stop all tasks. Port to 2.4.18-ac and | |
10133 | + * 2.5.17. Even though Pavel and I disagree on the direction suspend to | |
10134 | + * disk should take, I appreciate the valuable work he did in helping Gabor | |
10135 | + * get the concept working. | |
10136 | + * | |
10137 | + * ..and of course the myriads of TuxOnIce users who have helped diagnose | |
10138 | + * and fix bugs, made suggestions on how to improve the code, proofread | |
10139 | + * documentation, and donated time and money. | |
10140 | + * | |
10141 | + * Thanks also to corporate sponsors: | |
10142 | + * | |
10143 | + * <B>Redhat.</B>Sometime employer from May 2006 (my fault, not Redhat's!). | |
10144 | + * | |
10145 | + * <B>Cyclades.com.</B> Nigel's employers from Dec 2004 until May 2006, who | |
10146 | + * allowed him to work on TuxOnIce and PM related issues on company time. | |
10147 | + * | |
10148 | + * <B>LinuxFund.org.</B> Sponsored Nigel's work on TuxOnIce for four months Oct | |
10149 | + * 2003 to Jan 2004. | |
10150 | + * | |
10151 | + * <B>LAC Linux.</B> Donated P4 hardware that enabled development and ongoing | |
10152 | + * maintenance of SMP and Highmem support. | |
10153 | + * | |
10154 | + * <B>OSDL.</B> Provided access to various hardware configurations, make | |
10155 | + * occasional small donations to the project. | |
10156 | + */ | |
10157 | + | |
10158 | +#include <linux/suspend.h> | |
2380c486 JR |
10159 | +#include <linux/freezer.h> |
10160 | +#include <linux/utsrelease.h> | |
10161 | +#include <linux/cpu.h> | |
10162 | +#include <linux/console.h> | |
10163 | +#include <linux/writeback.h> | |
10164 | +#include <linux/uaccess.h> /* for get/set_fs & KERNEL_DS on i386 */ | |
10165 | + | |
10166 | +#include "tuxonice.h" | |
10167 | +#include "tuxonice_modules.h" | |
10168 | +#include "tuxonice_sysfs.h" | |
10169 | +#include "tuxonice_prepare_image.h" | |
10170 | +#include "tuxonice_io.h" | |
10171 | +#include "tuxonice_ui.h" | |
10172 | +#include "tuxonice_power_off.h" | |
10173 | +#include "tuxonice_storage.h" | |
10174 | +#include "tuxonice_checksum.h" | |
10175 | +#include "tuxonice_builtin.h" | |
10176 | +#include "tuxonice_atomic_copy.h" | |
10177 | +#include "tuxonice_alloc.h" | |
10178 | +#include "tuxonice_cluster.h" | |
10179 | + | |
10180 | +/*! Pageset metadata. */ | |
10181 | +struct pagedir pagedir2 = {2}; | |
10182 | +EXPORT_SYMBOL_GPL(pagedir2); | |
10183 | + | |
10184 | +static mm_segment_t oldfs; | |
10185 | +static DEFINE_MUTEX(tuxonice_in_use); | |
10186 | +static int block_dump_save; | |
2380c486 JR |
10187 | + |
10188 | +/* Binary signature if an image is present */ | |
10189 | +char *tuxonice_signature = "\xed\xc3\x02\xe9\x98\x56\xe5\x0c"; | |
10190 | +EXPORT_SYMBOL_GPL(tuxonice_signature); | |
10191 | + | |
2380c486 JR |
10192 | +unsigned long boot_kernel_data_buffer; |
10193 | + | |
10194 | +static char *result_strings[] = { | |
10195 | + "Hiberation was aborted", | |
10196 | + "The user requested that we cancel the hibernation", | |
10197 | + "No storage was available", | |
10198 | + "Insufficient storage was available", | |
10199 | + "Freezing filesystems and/or tasks failed", | |
10200 | + "A pre-existing image was used", | |
10201 | + "We would free memory, but image size limit doesn't allow this", | |
10202 | + "Unable to free enough memory to hibernate", | |
10203 | + "Unable to obtain the Power Management Semaphore", | |
10204 | + "A device suspend/resume returned an error", | |
10205 | + "A system device suspend/resume returned an error", | |
10206 | + "The extra pages allowance is too small", | |
10207 | + "We were unable to successfully prepare an image", | |
10208 | + "TuxOnIce module initialisation failed", | |
10209 | + "TuxOnIce module cleanup failed", | |
10210 | + "I/O errors were encountered", | |
10211 | + "Ran out of memory", | |
10212 | + "An error was encountered while reading the image", | |
10213 | + "Platform preparation failed", | |
10214 | + "CPU Hotplugging failed", | |
10215 | + "Architecture specific preparation failed", | |
10216 | + "Pages needed resaving, but we were told to abort if this happens", | |
10217 | + "We can't hibernate at the moment (invalid resume= or filewriter " | |
10218 | + "target?)", | |
10219 | + "A hibernation preparation notifier chain member cancelled the " | |
10220 | + "hibernation", | |
10221 | + "Pre-snapshot preparation failed", | |
10222 | + "Pre-restore preparation failed", | |
10223 | + "Failed to disable usermode helpers", | |
10224 | + "Can't resume from alternate image", | |
0ada99ac | 10225 | + "Header reservation too small", |
2380c486 JR |
10226 | +}; |
10227 | + | |
10228 | +/** | |
10229 | + * toi_finish_anything - cleanup after doing anything | |
10230 | + * @hibernate_or_resume: Whether finishing a cycle or attempt at | |
10231 | + * resuming. | |
10232 | + * | |
10233 | + * This is our basic clean-up routine, matching start_anything below. We | |
10234 | + * call cleanup routines, drop module references and restore process fs and | |
10235 | + * cpus allowed masks, together with the global block_dump variable's value. | |
10236 | + **/ | |
10237 | +void toi_finish_anything(int hibernate_or_resume) | |
10238 | +{ | |
10239 | + toi_cleanup_modules(hibernate_or_resume); | |
10240 | + toi_put_modules(); | |
10241 | + if (hibernate_or_resume) { | |
10242 | + block_dump = block_dump_save; | |
10243 | + set_cpus_allowed(current, CPU_MASK_ALL); | |
10244 | + toi_alloc_print_debug_stats(); | |
2380c486 JR |
10245 | + atomic_inc(&snapshot_device_available); |
10246 | + mutex_unlock(&pm_mutex); | |
10247 | + } | |
10248 | + | |
10249 | + set_fs(oldfs); | |
10250 | + mutex_unlock(&tuxonice_in_use); | |
10251 | +} | |
10252 | + | |
10253 | +/** | |
10254 | + * toi_start_anything - basic initialisation for TuxOnIce | |
10255 | + * @toi_or_resume: Whether starting a cycle or attempt at resuming. | |
10256 | + * | |
10257 | + * Our basic initialisation routine. Take references on modules, use the | |
10258 | + * kernel segment, recheck resume= if no active allocator is set, initialise | |
10259 | + * modules, save and reset block_dump and ensure we're running on CPU0. | |
10260 | + **/ | |
10261 | +int toi_start_anything(int hibernate_or_resume) | |
10262 | +{ | |
2380c486 JR |
10263 | + mutex_lock(&tuxonice_in_use); |
10264 | + | |
10265 | + oldfs = get_fs(); | |
10266 | + set_fs(KERNEL_DS); | |
10267 | + | |
10268 | + if (hibernate_or_resume) { | |
10269 | + mutex_lock(&pm_mutex); | |
10270 | + | |
10271 | + if (!atomic_add_unless(&snapshot_device_available, -1, 0)) | |
10272 | + goto snapshotdevice_unavailable; | |
10273 | + } | |
10274 | + | |
2380c486 JR |
10275 | + if (hibernate_or_resume == SYSFS_HIBERNATE) |
10276 | + toi_print_modules(); | |
10277 | + | |
10278 | + if (toi_get_modules()) { | |
10279 | + printk(KERN_INFO "TuxOnIce: Get modules failed!\n"); | |
10280 | + goto prehibernate_err; | |
10281 | + } | |
10282 | + | |
10283 | + if (hibernate_or_resume) { | |
10284 | + block_dump_save = block_dump; | |
10285 | + block_dump = 0; | |
10286 | + set_cpus_allowed(current, | |
10287 | + cpumask_of_cpu(first_cpu(cpu_online_map))); | |
10288 | + } | |
10289 | + | |
10290 | + if (toi_initialise_modules_early(hibernate_or_resume)) | |
10291 | + goto early_init_err; | |
10292 | + | |
10293 | + if (!toiActiveAllocator) | |
10294 | + toi_attempt_to_parse_resume_device(!hibernate_or_resume); | |
10295 | + | |
10296 | + if (!toi_initialise_modules_late(hibernate_or_resume)) | |
10297 | + return 0; | |
10298 | + | |
10299 | + toi_cleanup_modules(hibernate_or_resume); | |
10300 | +early_init_err: | |
10301 | + if (hibernate_or_resume) { | |
10302 | + block_dump_save = block_dump; | |
10303 | + set_cpus_allowed(current, CPU_MASK_ALL); | |
10304 | + } | |
10305 | +prehibernate_err: | |
10306 | + if (hibernate_or_resume) | |
10307 | + atomic_inc(&snapshot_device_available); | |
10308 | +snapshotdevice_unavailable: | |
10309 | + if (hibernate_or_resume) | |
10310 | + mutex_unlock(&pm_mutex); | |
10311 | + set_fs(oldfs); | |
10312 | + mutex_unlock(&tuxonice_in_use); | |
10313 | + return -EBUSY; | |
10314 | +} | |
10315 | + | |
10316 | +/* | |
10317 | + * Nosave page tracking. | |
10318 | + * | |
10319 | + * Here rather than in prepare_image because we want to do it once only at the | |
10320 | + * start of a cycle. | |
10321 | + */ | |
10322 | + | |
10323 | +/** | |
10324 | + * mark_nosave_pages - set up our Nosave bitmap | |
10325 | + * | |
10326 | + * Build a bitmap of Nosave pages from the list. The bitmap allows faster | |
10327 | + * use when preparing the image. | |
10328 | + **/ | |
10329 | +static void mark_nosave_pages(void) | |
10330 | +{ | |
10331 | + struct nosave_region *region; | |
10332 | + | |
10333 | + list_for_each_entry(region, &nosave_regions, list) { | |
10334 | + unsigned long pfn; | |
10335 | + | |
10336 | + for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++) | |
10337 | + if (pfn_valid(pfn)) | |
10338 | + SetPageNosave(pfn_to_page(pfn)); | |
10339 | + } | |
10340 | +} | |
10341 | + | |
10342 | +static int alloc_a_bitmap(struct memory_bitmap **bm) | |
10343 | +{ | |
10344 | + int result = 0; | |
10345 | + | |
10346 | + *bm = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); | |
10347 | + if (!*bm) { | |
10348 | + printk(KERN_ERR "Failed to kzalloc memory for a bitmap.\n"); | |
10349 | + return -ENOMEM; | |
10350 | + } | |
10351 | + | |
10352 | + result = memory_bm_create(*bm, GFP_KERNEL, 0); | |
10353 | + | |
10354 | + if (result) { | |
10355 | + printk(KERN_ERR "Failed to create a bitmap.\n"); | |
10356 | + kfree(*bm); | |
10357 | + } | |
10358 | + | |
10359 | + return result; | |
10360 | +} | |
10361 | + | |
10362 | +/** | |
10363 | + * allocate_bitmaps - allocate bitmaps used to record page states | |
10364 | + * | |
10365 | + * Allocate the bitmaps we use to record the various TuxOnIce related | |
10366 | + * page states. | |
10367 | + **/ | |
10368 | +static int allocate_bitmaps(void) | |
10369 | +{ | |
10370 | + if (alloc_a_bitmap(&pageset1_map) || | |
10371 | + alloc_a_bitmap(&pageset1_copy_map) || | |
10372 | + alloc_a_bitmap(&pageset2_map) || | |
10373 | + alloc_a_bitmap(&io_map) || | |
10374 | + alloc_a_bitmap(&nosave_map) || | |
10375 | + alloc_a_bitmap(&free_map) || | |
10376 | + alloc_a_bitmap(&page_resave_map)) | |
10377 | + return 1; | |
10378 | + | |
10379 | + return 0; | |
10380 | +} | |
10381 | + | |
10382 | +static void free_a_bitmap(struct memory_bitmap **bm) | |
10383 | +{ | |
10384 | + if (!*bm) | |
10385 | + return; | |
10386 | + | |
10387 | + memory_bm_free(*bm, 0); | |
10388 | + kfree(*bm); | |
10389 | + *bm = NULL; | |
10390 | +} | |
10391 | + | |
10392 | +/** | |
10393 | + * free_bitmaps - free the bitmaps used to record page states | |
10394 | + * | |
10395 | + * Free the bitmaps allocated above. It is not an error to call | |
10396 | + * memory_bm_free on a bitmap that isn't currently allocated. | |
10397 | + **/ | |
10398 | +static void free_bitmaps(void) | |
10399 | +{ | |
10400 | + free_a_bitmap(&pageset1_map); | |
10401 | + free_a_bitmap(&pageset1_copy_map); | |
10402 | + free_a_bitmap(&pageset2_map); | |
10403 | + free_a_bitmap(&io_map); | |
10404 | + free_a_bitmap(&nosave_map); | |
10405 | + free_a_bitmap(&free_map); | |
10406 | + free_a_bitmap(&page_resave_map); | |
10407 | +} | |
10408 | + | |
10409 | +/** | |
10410 | + * io_MB_per_second - return the number of MB/s read or written | |
10411 | + * @write: Whether to return the speed at which we wrote. | |
10412 | + * | |
10413 | + * Calculate the number of megabytes per second that were read or written. | |
10414 | + **/ | |
10415 | +static int io_MB_per_second(int write) | |
10416 | +{ | |
10417 | + return (toi_bkd.toi_io_time[write][1]) ? | |
10418 | + MB((unsigned long) toi_bkd.toi_io_time[write][0]) * HZ / | |
10419 | + toi_bkd.toi_io_time[write][1] : 0; | |
10420 | +} | |
10421 | + | |
10422 | +#define SNPRINTF(a...) do { len += scnprintf(((char *) buffer) + len, \ | |
10423 | + count - len - 1, ## a); } while (0) | |
10424 | + | |
10425 | +/** | |
10426 | + * get_debug_info - fill a buffer with debugging information | |
10427 | + * @buffer: The buffer to be filled. | |
10428 | + * @count: The size of the buffer, in bytes. | |
10429 | + * | |
10430 | + * Fill a (usually PAGE_SIZEd) buffer with the debugging info that we will | |
10431 | + * either printk or return via sysfs. | |
10432 | + **/ | |
10433 | +static int get_toi_debug_info(const char *buffer, int count) | |
10434 | +{ | |
10435 | + int len = 0, i, first_result = 1; | |
10436 | + | |
10437 | + SNPRINTF("TuxOnIce debugging info:\n"); | |
10438 | + SNPRINTF("- TuxOnIce core : " TOI_CORE_VERSION "\n"); | |
10439 | + SNPRINTF("- Kernel Version : " UTS_RELEASE "\n"); | |
10440 | + SNPRINTF("- Compiler vers. : %d.%d\n", __GNUC__, __GNUC_MINOR__); | |
10441 | + SNPRINTF("- Attempt number : %d\n", nr_hibernates); | |
10442 | + SNPRINTF("- Parameters : %ld %ld %ld %d %d %ld\n", | |
10443 | + toi_result, | |
10444 | + toi_bkd.toi_action, | |
10445 | + toi_bkd.toi_debug_state, | |
10446 | + toi_bkd.toi_default_console_level, | |
10447 | + image_size_limit, | |
10448 | + toi_poweroff_method); | |
10449 | + SNPRINTF("- Overall expected compression percentage: %d.\n", | |
10450 | + 100 - toi_expected_compression_ratio()); | |
10451 | + len += toi_print_module_debug_info(((char *) buffer) + len, | |
10452 | + count - len - 1); | |
10453 | + if (toi_bkd.toi_io_time[0][1]) { | |
10454 | + if ((io_MB_per_second(0) < 5) || (io_MB_per_second(1) < 5)) { | |
10455 | + SNPRINTF("- I/O speed: Write %ld KB/s", | |
10456 | + (KB((unsigned long) toi_bkd.toi_io_time[0][0]) * HZ / | |
10457 | + toi_bkd.toi_io_time[0][1])); | |
10458 | + if (toi_bkd.toi_io_time[1][1]) | |
10459 | + SNPRINTF(", Read %ld KB/s", | |
10460 | + (KB((unsigned long) | |
10461 | + toi_bkd.toi_io_time[1][0]) * HZ / | |
10462 | + toi_bkd.toi_io_time[1][1])); | |
10463 | + } else { | |
10464 | + SNPRINTF("- I/O speed: Write %ld MB/s", | |
10465 | + (MB((unsigned long) toi_bkd.toi_io_time[0][0]) * HZ / | |
10466 | + toi_bkd.toi_io_time[0][1])); | |
10467 | + if (toi_bkd.toi_io_time[1][1]) | |
10468 | + SNPRINTF(", Read %ld MB/s", | |
10469 | + (MB((unsigned long) | |
10470 | + toi_bkd.toi_io_time[1][0]) * HZ / | |
10471 | + toi_bkd.toi_io_time[1][1])); | |
10472 | + } | |
10473 | + SNPRINTF(".\n"); | |
10474 | + } else | |
10475 | + SNPRINTF("- No I/O speed stats available.\n"); | |
10476 | + SNPRINTF("- Extra pages : %ld used/%ld.\n", | |
10477 | + extra_pd1_pages_used, extra_pd1_pages_allowance); | |
10478 | + | |
10479 | + for (i = 0; i < TOI_NUM_RESULT_STATES; i++) | |
10480 | + if (test_result_state(i)) { | |
10481 | + SNPRINTF("%s: %s.\n", first_result ? | |
10482 | + "- Result " : | |
10483 | + " ", | |
10484 | + result_strings[i]); | |
10485 | + first_result = 0; | |
10486 | + } | |
10487 | + if (first_result) | |
10488 | + SNPRINTF("- Result : %s.\n", nr_hibernates ? | |
10489 | + "Succeeded" : | |
10490 | + "No hibernation attempts so far"); | |
10491 | + return len; | |
10492 | +} | |
10493 | + | |
10494 | +/** | |
10495 | + * do_cleanup - cleanup after attempting to hibernate or resume | |
10496 | + * @get_debug_info: Whether to allocate and return debugging info. | |
10497 | + * | |
10498 | + * Cleanup after attempting to hibernate or resume, possibly getting | |
10499 | + * debugging info as we do so. | |
10500 | + **/ | |
e999739a | 10501 | +static void do_cleanup(int get_debug_info, int restarting) |
2380c486 JR |
10502 | +{ |
10503 | + int i = 0; | |
10504 | + char *buffer = NULL; | |
10505 | + | |
10506 | + if (get_debug_info) | |
10507 | + toi_prepare_status(DONT_CLEAR_BAR, "Cleaning up..."); | |
10508 | + | |
10509 | + free_checksum_pages(); | |
10510 | + | |
10511 | + if (get_debug_info) | |
10512 | + buffer = (char *) toi_get_zeroed_page(20, TOI_ATOMIC_GFP); | |
10513 | + | |
10514 | + if (buffer) | |
10515 | + i = get_toi_debug_info(buffer, PAGE_SIZE); | |
10516 | + | |
10517 | + toi_free_extra_pagedir_memory(); | |
10518 | + | |
10519 | + pagedir1.size = 0; | |
10520 | + pagedir2.size = 0; | |
10521 | + set_highmem_size(pagedir1, 0); | |
10522 | + set_highmem_size(pagedir2, 0); | |
10523 | + | |
10524 | + if (boot_kernel_data_buffer) { | |
10525 | + if (!test_toi_state(TOI_BOOT_KERNEL)) | |
10526 | + toi_free_page(37, boot_kernel_data_buffer); | |
10527 | + boot_kernel_data_buffer = 0; | |
10528 | + } | |
10529 | + | |
10530 | + clear_toi_state(TOI_BOOT_KERNEL); | |
10531 | + thaw_processes(); | |
10532 | + | |
2380c486 JR |
10533 | + if (test_action_state(TOI_KEEP_IMAGE) && |
10534 | + !test_result_state(TOI_ABORTED)) { | |
10535 | + toi_message(TOI_ANY_SECTION, TOI_LOW, 1, | |
10536 | + "TuxOnIce: Not invalidating the image due " | |
10537 | + "to Keep Image being enabled.\n"); | |
10538 | + set_result_state(TOI_KEPT_IMAGE); | |
10539 | + } else | |
2380c486 JR |
10540 | + if (toiActiveAllocator) |
10541 | + toiActiveAllocator->remove_image(); | |
10542 | + | |
10543 | + free_bitmaps(); | |
10544 | + usermodehelper_enable(); | |
10545 | + | |
10546 | + if (test_toi_state(TOI_NOTIFIERS_PREPARE)) { | |
10547 | + pm_notifier_call_chain(PM_POST_HIBERNATION); | |
10548 | + clear_toi_state(TOI_NOTIFIERS_PREPARE); | |
10549 | + } | |
10550 | + | |
10551 | + if (buffer && i) { | |
10552 | + /* Printk can only handle 1023 bytes, including | |
10553 | + * its level mangling. */ | |
10554 | + for (i = 0; i < 3; i++) | |
9474138d | 10555 | + printk(KERN_ERR "%s", buffer + (1023 * i)); |
2380c486 JR |
10556 | + toi_free_page(20, (unsigned long) buffer); |
10557 | + } | |
10558 | + | |
10559 | + if (!test_action_state(TOI_LATE_CPU_HOTPLUG)) | |
10560 | + enable_nonboot_cpus(); | |
e999739a | 10561 | + |
10562 | + if (!restarting) | |
10563 | + toi_cleanup_console(); | |
2380c486 JR |
10564 | + |
10565 | + free_attention_list(); | |
10566 | + | |
e999739a | 10567 | + if (!restarting) |
10568 | + toi_deactivate_storage(0); | |
2380c486 JR |
10569 | + |
10570 | + clear_toi_state(TOI_IGNORE_LOGLEVEL); | |
10571 | + clear_toi_state(TOI_TRYING_TO_RESUME); | |
10572 | + clear_toi_state(TOI_NOW_RESUMING); | |
10573 | +} | |
10574 | + | |
10575 | +/** | |
10576 | + * check_still_keeping_image - we kept an image; check whether to reuse it. | |
10577 | + * | |
10578 | + * We enter this routine when we have kept an image. If the user has said they | |
10579 | + * want to still keep it, all we need to do is powerdown. If powering down | |
10580 | + * means hibernating to ram and the power doesn't run out, we'll return 1. | |
10581 | + * If we do power off properly or the battery runs out, we'll resume via the | |
10582 | + * normal paths. | |
10583 | + * | |
10584 | + * If the user has said they want to remove the previously kept image, we | |
10585 | + * remove it, and return 0. We'll then store a new image. | |
10586 | + **/ | |
10587 | +static int check_still_keeping_image(void) | |
10588 | +{ | |
10589 | + if (test_action_state(TOI_KEEP_IMAGE)) { | |
e999739a | 10590 | + printk(KERN_INFO "Image already stored: powering down " |
10591 | + "immediately."); | |
2380c486 JR |
10592 | + do_toi_step(STEP_HIBERNATE_POWERDOWN); |
10593 | + return 1; /* Just in case we're using S3 */ | |
10594 | + } | |
10595 | + | |
e999739a | 10596 | + printk(KERN_INFO "Invalidating previous image.\n"); |
2380c486 JR |
10597 | + toiActiveAllocator->remove_image(); |
10598 | + | |
10599 | + return 0; | |
10600 | +} | |
10601 | + | |
10602 | +/** | |
10603 | + * toi_init - prepare to hibernate to disk | |
10604 | + * | |
10605 | + * Initialise variables & data structures, in preparation for | |
10606 | + * hibernating to disk. | |
10607 | + **/ | |
e999739a | 10608 | +static int toi_init(int restarting) |
2380c486 JR |
10609 | +{ |
10610 | + int result, i, j; | |
10611 | + | |
10612 | + toi_result = 0; | |
10613 | + | |
10614 | + printk(KERN_INFO "Initiating a hibernation cycle.\n"); | |
10615 | + | |
10616 | + nr_hibernates++; | |
10617 | + | |
10618 | + for (i = 0; i < 2; i++) | |
10619 | + for (j = 0; j < 2; j++) | |
10620 | + toi_bkd.toi_io_time[i][j] = 0; | |
10621 | + | |
10622 | + if (!test_toi_state(TOI_CAN_HIBERNATE) || | |
10623 | + allocate_bitmaps()) | |
10624 | + return 1; | |
10625 | + | |
10626 | + mark_nosave_pages(); | |
10627 | + | |
e999739a | 10628 | + if (!restarting) |
10629 | + toi_prepare_console(); | |
2380c486 JR |
10630 | + |
10631 | + result = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); | |
10632 | + if (result) { | |
10633 | + set_result_state(TOI_NOTIFIERS_PREPARE_FAILED); | |
10634 | + return 1; | |
10635 | + } | |
10636 | + set_toi_state(TOI_NOTIFIERS_PREPARE); | |
10637 | + | |
10638 | + result = usermodehelper_disable(); | |
10639 | + if (result) { | |
10640 | + printk(KERN_ERR "TuxOnIce: Failed to disable usermode " | |
10641 | + "helpers\n"); | |
10642 | + set_result_state(TOI_USERMODE_HELPERS_ERR); | |
10643 | + return 1; | |
10644 | + } | |
10645 | + | |
10646 | + boot_kernel_data_buffer = toi_get_zeroed_page(37, TOI_ATOMIC_GFP); | |
10647 | + if (!boot_kernel_data_buffer) { | |
10648 | + printk(KERN_ERR "TuxOnIce: Failed to allocate " | |
10649 | + "boot_kernel_data_buffer.\n"); | |
10650 | + set_result_state(TOI_OUT_OF_MEMORY); | |
10651 | + return 1; | |
10652 | + } | |
10653 | + | |
10654 | + if (test_action_state(TOI_LATE_CPU_HOTPLUG) || | |
10655 | + !disable_nonboot_cpus()) | |
10656 | + return 1; | |
10657 | + | |
10658 | + set_abort_result(TOI_CPU_HOTPLUG_FAILED); | |
10659 | + return 0; | |
10660 | +} | |
10661 | + | |
10662 | +/** | |
10663 | + * can_hibernate - perform basic 'Can we hibernate?' tests | |
10664 | + * | |
10665 | + * Perform basic tests that must pass if we're going to be able to hibernate: | |
10666 | + * Can we get the pm_mutex? Is resume= valid (we need to know where to write | |
10667 | + * the image header). | |
10668 | + **/ | |
10669 | +static int can_hibernate(void) | |
10670 | +{ | |
10671 | + if (!test_toi_state(TOI_CAN_HIBERNATE)) | |
10672 | + toi_attempt_to_parse_resume_device(0); | |
10673 | + | |
10674 | + if (!test_toi_state(TOI_CAN_HIBERNATE)) { | |
10675 | + printk(KERN_INFO "TuxOnIce: Hibernation is disabled.\n" | |
10676 | + "This may be because you haven't put something along " | |
10677 | + "the lines of\n\nresume=swap:/dev/hda1\n\n" | |
10678 | + "in lilo.conf or equivalent. (Where /dev/hda1 is your " | |
10679 | + "swap partition).\n"); | |
10680 | + set_abort_result(TOI_CANT_SUSPEND); | |
10681 | + return 0; | |
10682 | + } | |
10683 | + | |
10684 | + if (strlen(alt_resume_param)) { | |
10685 | + attempt_to_parse_alt_resume_param(); | |
10686 | + | |
10687 | + if (!strlen(alt_resume_param)) { | |
10688 | + printk(KERN_INFO "Alternate resume parameter now " | |
10689 | + "invalid. Aborting.\n"); | |
10690 | + set_abort_result(TOI_CANT_USE_ALT_RESUME); | |
10691 | + return 0; | |
10692 | + } | |
10693 | + } | |
10694 | + | |
10695 | + return 1; | |
10696 | +} | |
10697 | + | |
10698 | +/** | |
10699 | + * do_post_image_write - having written an image, figure out what to do next | |
10700 | + * | |
10701 | + * After writing an image, we might load an alternate image or power down. | |
10702 | + * Powering down might involve hibernating to ram, in which case we also | |
10703 | + * need to handle reloading pageset2. | |
10704 | + **/ | |
10705 | +static int do_post_image_write(void) | |
10706 | +{ | |
10707 | + /* If switching images fails, do normal powerdown */ | |
10708 | + if (alt_resume_param[0]) | |
10709 | + do_toi_step(STEP_RESUME_ALT_IMAGE); | |
10710 | + | |
10711 | + toi_power_down(); | |
10712 | + | |
10713 | + barrier(); | |
10714 | + mb(); | |
10715 | + return 0; | |
10716 | +} | |
10717 | + | |
10718 | +/** | |
10719 | + * __save_image - do the hard work of saving the image | |
10720 | + * | |
10721 | + * High level routine for getting the image saved. The key assumptions made | |
10722 | + * are that processes have been frozen and sufficient memory is available. | |
10723 | + * | |
10724 | + * We also exit through here at resume time, coming back from toi_hibernate | |
10725 | + * after the atomic restore. This is the reason for the toi_in_hibernate | |
10726 | + * test. | |
10727 | + **/ | |
10728 | +static int __save_image(void) | |
10729 | +{ | |
10730 | + int temp_result, did_copy = 0; | |
10731 | + | |
10732 | + toi_prepare_status(DONT_CLEAR_BAR, "Starting to save the image.."); | |
10733 | + | |
10734 | + toi_message(TOI_ANY_SECTION, TOI_LOW, 1, | |
10735 | + " - Final values: %d and %d.\n", | |
10736 | + pagedir1.size, pagedir2.size); | |
10737 | + | |
10738 | + toi_cond_pause(1, "About to write pagedir2."); | |
10739 | + | |
10740 | + temp_result = write_pageset(&pagedir2); | |
10741 | + | |
10742 | + if (temp_result == -1 || test_result_state(TOI_ABORTED)) | |
10743 | + return 1; | |
10744 | + | |
10745 | + toi_cond_pause(1, "About to copy pageset 1."); | |
10746 | + | |
10747 | + if (test_result_state(TOI_ABORTED)) | |
10748 | + return 1; | |
10749 | + | |
10750 | + toi_deactivate_storage(1); | |
10751 | + | |
10752 | + toi_prepare_status(DONT_CLEAR_BAR, "Doing atomic copy/restore."); | |
10753 | + | |
10754 | + toi_in_hibernate = 1; | |
10755 | + | |
10756 | + if (toi_go_atomic(PMSG_FREEZE, 1)) | |
10757 | + goto Failed; | |
10758 | + | |
10759 | + temp_result = toi_hibernate(); | |
10760 | + if (!temp_result) | |
10761 | + did_copy = 1; | |
10762 | + | |
10763 | + /* We return here at resume time too! */ | |
10764 | + toi_end_atomic(ATOMIC_ALL_STEPS, toi_in_hibernate, temp_result); | |
10765 | + | |
10766 | +Failed: | |
10767 | + if (toi_activate_storage(1)) | |
10768 | + panic("Failed to reactivate our storage."); | |
10769 | + | |
10770 | + /* Resume time? */ | |
10771 | + if (!toi_in_hibernate) { | |
10772 | + copyback_post(); | |
10773 | + return 0; | |
10774 | + } | |
10775 | + | |
10776 | + /* Nope. Hibernating. So, see if we can save the image... */ | |
10777 | + | |
10778 | + if (temp_result || test_result_state(TOI_ABORTED)) { | |
10779 | + if (did_copy) | |
10780 | + goto abort_reloading_pagedir_two; | |
10781 | + else | |
10782 | + return 1; | |
10783 | + } | |
10784 | + | |
10785 | + toi_update_status(pagedir2.size, pagedir1.size + pagedir2.size, | |
10786 | + NULL); | |
10787 | + | |
10788 | + if (test_result_state(TOI_ABORTED)) | |
10789 | + goto abort_reloading_pagedir_two; | |
10790 | + | |
10791 | + toi_cond_pause(1, "About to write pageset1."); | |
10792 | + | |
10793 | + toi_message(TOI_ANY_SECTION, TOI_LOW, 1, "-- Writing pageset1\n"); | |
10794 | + | |
10795 | + temp_result = write_pageset(&pagedir1); | |
10796 | + | |
10797 | + /* We didn't overwrite any memory, so no reread needs to be done. */ | |
10798 | + if (test_action_state(TOI_TEST_FILTER_SPEED)) | |
10799 | + return 1; | |
10800 | + | |
10801 | + if (temp_result == 1 || test_result_state(TOI_ABORTED)) | |
10802 | + goto abort_reloading_pagedir_two; | |
10803 | + | |
10804 | + toi_cond_pause(1, "About to write header."); | |
10805 | + | |
10806 | + if (test_result_state(TOI_ABORTED)) | |
10807 | + goto abort_reloading_pagedir_two; | |
10808 | + | |
10809 | + temp_result = write_image_header(); | |
10810 | + | |
10811 | + if (test_action_state(TOI_TEST_BIO)) | |
10812 | + return 1; | |
10813 | + | |
10814 | + if (!temp_result && !test_result_state(TOI_ABORTED)) | |
10815 | + return 0; | |
10816 | + | |
10817 | +abort_reloading_pagedir_two: | |
10818 | + temp_result = read_pageset2(1); | |
10819 | + | |
10820 | + /* If that failed, we're sunk. Panic! */ | |
10821 | + if (temp_result) | |
10822 | + panic("Attempt to reload pagedir 2 while aborting " | |
10823 | + "a hibernate failed."); | |
10824 | + | |
10825 | + return 1; | |
10826 | +} | |
10827 | + | |
10828 | +static void map_ps2_pages(int enable) | |
10829 | +{ | |
10830 | + unsigned long pfn = 0; | |
10831 | + | |
10832 | + pfn = memory_bm_next_pfn(pageset2_map); | |
10833 | + | |
10834 | + while (pfn != BM_END_OF_MAP) { | |
10835 | + struct page *page = pfn_to_page(pfn); | |
10836 | + kernel_map_pages(page, 1, enable); | |
10837 | + pfn = memory_bm_next_pfn(pageset2_map); | |
10838 | + } | |
10839 | +} | |
10840 | + | |
10841 | +/** | |
10842 | + * do_save_image - save the image and handle the result | |
10843 | + * | |
10844 | + * Save the prepared image. If we fail or we're in the path returning | |
10845 | + * from the atomic restore, cleanup. | |
10846 | + **/ | |
10847 | +static int do_save_image(void) | |
10848 | +{ | |
10849 | + int result; | |
10850 | + map_ps2_pages(0); | |
10851 | + result = __save_image(); | |
10852 | + map_ps2_pages(1); | |
10853 | + return result; | |
10854 | +} | |
10855 | + | |
10856 | +/** | |
10857 | + * do_prepare_image - try to prepare an image | |
10858 | + * | |
10859 | + * Seek to initialise and prepare an image to be saved. On failure, | |
10860 | + * cleanup. | |
10861 | + **/ | |
10862 | +static int do_prepare_image(void) | |
10863 | +{ | |
e999739a | 10864 | + int restarting = test_result_state(TOI_EXTRA_PAGES_ALLOW_TOO_SMALL); |
10865 | + | |
10866 | + if (!restarting && toi_activate_storage(0)) | |
2380c486 JR |
10867 | + return 1; |
10868 | + | |
10869 | + /* | |
10870 | + * If kept image and still keeping image and hibernating to RAM, we will | |
10871 | + * return 1 after hibernating and resuming (provided the power doesn't | |
10872 | + * run out. In that case, we skip directly to cleaning up and exiting. | |
10873 | + */ | |
10874 | + | |
10875 | + if (!can_hibernate() || | |
10876 | + (test_result_state(TOI_KEPT_IMAGE) && | |
10877 | + check_still_keeping_image())) | |
10878 | + return 1; | |
10879 | + | |
e999739a | 10880 | + if (toi_init(restarting) && !toi_prepare_image() && |
2380c486 JR |
10881 | + !test_result_state(TOI_ABORTED)) |
10882 | + return 0; | |
10883 | + | |
10884 | + return 1; | |
10885 | +} | |
10886 | + | |
10887 | +/** | |
10888 | + * do_check_can_resume - find out whether an image has been stored | |
10889 | + * | |
10890 | + * Read whether an image exists. We use the same routine as the | |
10891 | + * image_exists sysfs entry, and just look to see whether the | |
10892 | + * first character in the resulting buffer is a '1'. | |
10893 | + **/ | |
10894 | +int do_check_can_resume(void) | |
10895 | +{ | |
10896 | + char *buf = (char *) toi_get_zeroed_page(21, TOI_ATOMIC_GFP); | |
10897 | + int result = 0; | |
10898 | + | |
10899 | + if (!buf) | |
10900 | + return 0; | |
10901 | + | |
10902 | + /* Only interested in first byte, so throw away return code. */ | |
10903 | + image_exists_read(buf, PAGE_SIZE); | |
10904 | + | |
10905 | + if (buf[0] == '1') | |
10906 | + result = 1; | |
10907 | + | |
10908 | + toi_free_page(21, (unsigned long) buf); | |
10909 | + return result; | |
10910 | +} | |
10911 | +EXPORT_SYMBOL_GPL(do_check_can_resume); | |
10912 | + | |
10913 | +/** | |
10914 | + * do_load_atomic_copy - load the first part of an image, if it exists | |
10915 | + * | |
10916 | + * Check whether we have an image. If one exists, do sanity checking | |
10917 | + * (possibly invalidating the image or even rebooting if the user | |
10918 | + * requests that) before loading it into memory in preparation for the | |
10919 | + * atomic restore. | |
10920 | + * | |
10921 | + * If and only if we have an image loaded and ready to restore, we return 1. | |
10922 | + **/ | |
10923 | +static int do_load_atomic_copy(void) | |
10924 | +{ | |
10925 | + int read_image_result = 0; | |
10926 | + | |
10927 | + if (sizeof(swp_entry_t) != sizeof(long)) { | |
10928 | + printk(KERN_WARNING "TuxOnIce: The size of swp_entry_t != size" | |
10929 | + " of long. Please report this!\n"); | |
10930 | + return 1; | |
10931 | + } | |
10932 | + | |
10933 | + if (!resume_file[0]) | |
10934 | + printk(KERN_WARNING "TuxOnIce: " | |
10935 | + "You need to use a resume= command line parameter to " | |
10936 | + "tell TuxOnIce where to look for an image.\n"); | |
10937 | + | |
10938 | + toi_activate_storage(0); | |
10939 | + | |
10940 | + if (!(test_toi_state(TOI_RESUME_DEVICE_OK)) && | |
10941 | + !toi_attempt_to_parse_resume_device(0)) { | |
10942 | + /* | |
10943 | + * Without a usable storage device we can do nothing - | |
10944 | + * even if noresume is given | |
10945 | + */ | |
10946 | + | |
10947 | + if (!toiNumAllocators) | |
10948 | + printk(KERN_ALERT "TuxOnIce: " | |
10949 | + "No storage allocators have been registered.\n"); | |
10950 | + else | |
10951 | + printk(KERN_ALERT "TuxOnIce: " | |
10952 | + "Missing or invalid storage location " | |
10953 | + "(resume= parameter). Please correct and " | |
10954 | + "rerun lilo (or equivalent) before " | |
10955 | + "hibernating.\n"); | |
10956 | + toi_deactivate_storage(0); | |
10957 | + return 1; | |
10958 | + } | |
10959 | + | |
10960 | + if (allocate_bitmaps()) | |
10961 | + return 1; | |
10962 | + | |
10963 | + read_image_result = read_pageset1(); /* non fatal error ignored */ | |
10964 | + | |
10965 | + if (test_toi_state(TOI_NORESUME_SPECIFIED)) | |
10966 | + clear_toi_state(TOI_NORESUME_SPECIFIED); | |
10967 | + | |
10968 | + toi_deactivate_storage(0); | |
10969 | + | |
10970 | + if (read_image_result) | |
10971 | + return 1; | |
10972 | + | |
10973 | + return 0; | |
10974 | +} | |
10975 | + | |
10976 | +/** | |
10977 | + * prepare_restore_load_alt_image - save & restore alt image variables | |
10978 | + * | |
10979 | + * Save and restore the pageset1 maps, when loading an alternate image. | |
10980 | + **/ | |
10981 | +static void prepare_restore_load_alt_image(int prepare) | |
10982 | +{ | |
10983 | + static struct memory_bitmap *pageset1_map_save, *pageset1_copy_map_save; | |
10984 | + | |
10985 | + if (prepare) { | |
10986 | + pageset1_map_save = pageset1_map; | |
10987 | + pageset1_map = NULL; | |
10988 | + pageset1_copy_map_save = pageset1_copy_map; | |
10989 | + pageset1_copy_map = NULL; | |
10990 | + set_toi_state(TOI_LOADING_ALT_IMAGE); | |
10991 | + toi_reset_alt_image_pageset2_pfn(); | |
10992 | + } else { | |
10993 | + memory_bm_free(pageset1_map, 0); | |
10994 | + pageset1_map = pageset1_map_save; | |
10995 | + memory_bm_free(pageset1_copy_map, 0); | |
10996 | + pageset1_copy_map = pageset1_copy_map_save; | |
10997 | + clear_toi_state(TOI_NOW_RESUMING); | |
10998 | + clear_toi_state(TOI_LOADING_ALT_IMAGE); | |
10999 | + } | |
11000 | +} | |
11001 | + | |
11002 | +/** | |
11003 | + * do_toi_step - perform a step in hibernating or resuming | |
11004 | + * | |
11005 | + * Perform a step in hibernating or resuming an image. This abstraction | |
11006 | + * is in preparation for implementing cluster support, and perhaps replacing | |
11007 | + * uswsusp too (haven't looked whether that's possible yet). | |
11008 | + **/ | |
11009 | +int do_toi_step(int step) | |
11010 | +{ | |
11011 | + switch (step) { | |
11012 | + case STEP_HIBERNATE_PREPARE_IMAGE: | |
11013 | + return do_prepare_image(); | |
11014 | + case STEP_HIBERNATE_SAVE_IMAGE: | |
11015 | + return do_save_image(); | |
11016 | + case STEP_HIBERNATE_POWERDOWN: | |
11017 | + return do_post_image_write(); | |
11018 | + case STEP_RESUME_CAN_RESUME: | |
11019 | + return do_check_can_resume(); | |
11020 | + case STEP_RESUME_LOAD_PS1: | |
11021 | + return do_load_atomic_copy(); | |
11022 | + case STEP_RESUME_DO_RESTORE: | |
11023 | + /* | |
11024 | + * If we succeed, this doesn't return. | |
11025 | + * Instead, we return from do_save_image() in the | |
11026 | + * hibernated kernel. | |
11027 | + */ | |
11028 | + return toi_atomic_restore(); | |
11029 | + case STEP_RESUME_ALT_IMAGE: | |
11030 | + printk(KERN_INFO "Trying to resume alternate image.\n"); | |
11031 | + toi_in_hibernate = 0; | |
11032 | + save_restore_alt_param(SAVE, NOQUIET); | |
11033 | + prepare_restore_load_alt_image(1); | |
11034 | + if (!do_check_can_resume()) { | |
11035 | + printk(KERN_INFO "Nothing to resume from.\n"); | |
11036 | + goto out; | |
11037 | + } | |
11038 | + if (!do_load_atomic_copy()) | |
11039 | + toi_atomic_restore(); | |
11040 | + | |
11041 | + printk(KERN_INFO "Failed to load image.\n"); | |
11042 | +out: | |
11043 | + prepare_restore_load_alt_image(0); | |
11044 | + save_restore_alt_param(RESTORE, NOQUIET); | |
11045 | + break; | |
11046 | + case STEP_CLEANUP: | |
e999739a | 11047 | + do_cleanup(1, 0); |
2380c486 JR |
11048 | + break; |
11049 | + case STEP_QUIET_CLEANUP: | |
e999739a | 11050 | + do_cleanup(0, 0); |
2380c486 JR |
11051 | + break; |
11052 | + } | |
11053 | + | |
11054 | + return 0; | |
11055 | +} | |
11056 | +EXPORT_SYMBOL_GPL(do_toi_step); | |
11057 | + | |
11058 | +/* -- Functions for kickstarting a hibernate or resume --- */ | |
11059 | + | |
11060 | +/** | |
9474138d | 11061 | + * toi_try_resume - try to do the steps in resuming |
2380c486 JR |
11062 | + * |
11063 | + * Check if we have an image and if so try to resume. Clear the status | |
11064 | + * flags too. | |
11065 | + **/ | |
9474138d | 11066 | +void toi_try_resume(void) |
2380c486 JR |
11067 | +{ |
11068 | + set_toi_state(TOI_TRYING_TO_RESUME); | |
11069 | + resume_attempted = 1; | |
11070 | + | |
11071 | + current->flags |= PF_MEMALLOC; | |
11072 | + | |
11073 | + if (do_toi_step(STEP_RESUME_CAN_RESUME) && | |
11074 | + !do_toi_step(STEP_RESUME_LOAD_PS1)) | |
11075 | + do_toi_step(STEP_RESUME_DO_RESTORE); | |
11076 | + | |
e999739a | 11077 | + do_cleanup(0, 0); |
2380c486 JR |
11078 | + |
11079 | + current->flags &= ~PF_MEMALLOC; | |
11080 | + | |
11081 | + clear_toi_state(TOI_IGNORE_LOGLEVEL); | |
11082 | + clear_toi_state(TOI_TRYING_TO_RESUME); | |
11083 | + clear_toi_state(TOI_NOW_RESUMING); | |
11084 | +} | |
11085 | + | |
11086 | +/** | |
9474138d | 11087 | + * toi_sys_power_disk_try_resume - wrapper calling toi_try_resume |
2380c486 | 11088 | + * |
9474138d | 11089 | + * Wrapper for when __toi_try_resume is called from swsusp resume path, |
2380c486 JR |
11090 | + * rather than from echo > /sys/power/tuxonice/do_resume. |
11091 | + **/ | |
9474138d | 11092 | +static void toi_sys_power_disk_try_resume(void) |
2380c486 JR |
11093 | +{ |
11094 | + resume_attempted = 1; | |
11095 | + | |
11096 | + /* | |
11097 | + * There's a comment in kernel/power/disk.c that indicates | |
11098 | + * we should be able to use mutex_lock_nested below. That | |
11099 | + * doesn't seem to cut it, though, so let's just turn lockdep | |
11100 | + * off for now. | |
11101 | + */ | |
11102 | + lockdep_off(); | |
11103 | + | |
11104 | + if (toi_start_anything(SYSFS_RESUMING)) | |
11105 | + goto out; | |
11106 | + | |
9474138d | 11107 | + toi_try_resume(); |
2380c486 JR |
11108 | + |
11109 | + /* | |
11110 | + * For initramfs, we have to clear the boot time | |
11111 | + * flag after trying to resume | |
11112 | + */ | |
11113 | + clear_toi_state(TOI_BOOT_TIME); | |
11114 | + | |
11115 | + toi_finish_anything(SYSFS_RESUMING); | |
11116 | +out: | |
11117 | + lockdep_on(); | |
11118 | +} | |
11119 | + | |
11120 | +/** | |
9474138d | 11121 | + * toi_try_hibernate - try to start a hibernation cycle |
2380c486 JR |
11122 | + * |
11123 | + * Start a hibernation cycle, coming in from either | |
11124 | + * echo > /sys/power/tuxonice/do_suspend | |
11125 | + * | |
11126 | + * or | |
11127 | + * | |
11128 | + * echo disk > /sys/power/state | |
11129 | + * | |
11130 | + * In the later case, we come in without pm_sem taken; in the | |
11131 | + * former, it has been taken. | |
11132 | + **/ | |
9474138d | 11133 | +int toi_try_hibernate(void) |
2380c486 JR |
11134 | +{ |
11135 | + int result = 0, sys_power_disk = 0, retries = 0; | |
11136 | + | |
11137 | + if (!mutex_is_locked(&tuxonice_in_use)) { | |
11138 | + /* Came in via /sys/power/disk */ | |
11139 | + if (toi_start_anything(SYSFS_HIBERNATING)) | |
11140 | + return -EBUSY; | |
11141 | + sys_power_disk = 1; | |
11142 | + } | |
11143 | + | |
11144 | + current->flags |= PF_MEMALLOC; | |
11145 | + | |
11146 | + if (test_toi_state(TOI_CLUSTER_MODE)) { | |
11147 | + toi_initiate_cluster_hibernate(); | |
11148 | + goto out; | |
11149 | + } | |
11150 | + | |
11151 | +prepare: | |
11152 | + result = do_toi_step(STEP_HIBERNATE_PREPARE_IMAGE); | |
11153 | + | |
11154 | + if (result || test_action_state(TOI_FREEZER_TEST)) | |
11155 | + goto out; | |
11156 | + | |
11157 | + result = do_toi_step(STEP_HIBERNATE_SAVE_IMAGE); | |
11158 | + | |
11159 | + if (test_result_state(TOI_EXTRA_PAGES_ALLOW_TOO_SMALL)) { | |
11160 | + if (retries < 2) { | |
e999739a | 11161 | + do_cleanup(0, 1); |
2380c486 | 11162 | + retries++; |
e999739a | 11163 | + clear_result_state(TOI_ABORTED); |
2380c486 JR |
11164 | + extra_pd1_pages_allowance = extra_pd1_pages_used + 500; |
11165 | + printk(KERN_INFO "Automatically adjusting the extra" | |
11166 | + " pages allowance to %ld and restarting.\n", | |
11167 | + extra_pd1_pages_allowance); | |
11168 | + goto prepare; | |
11169 | + } | |
11170 | + | |
11171 | + printk(KERN_INFO "Adjusted extra pages allowance twice and " | |
11172 | + "still couldn't hibernate successfully. Giving up."); | |
11173 | + } | |
11174 | + | |
11175 | + /* This code runs at resume time too! */ | |
11176 | + if (!result && toi_in_hibernate) | |
11177 | + result = do_toi_step(STEP_HIBERNATE_POWERDOWN); | |
11178 | +out: | |
e999739a | 11179 | + do_cleanup(1, 0); |
2380c486 JR |
11180 | + current->flags &= ~PF_MEMALLOC; |
11181 | + | |
11182 | + if (sys_power_disk) | |
11183 | + toi_finish_anything(SYSFS_HIBERNATING); | |
11184 | + | |
11185 | + return result; | |
11186 | +} | |
11187 | + | |
11188 | +/* | |
11189 | + * channel_no: If !0, -c <channel_no> is added to args (userui). | |
11190 | + */ | |
11191 | +int toi_launch_userspace_program(char *command, int channel_no, | |
11192 | + enum umh_wait wait, int debug) | |
11193 | +{ | |
11194 | + int retval; | |
11195 | + static char *envp[] = { | |
11196 | + "HOME=/", | |
11197 | + "TERM=linux", | |
11198 | + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", | |
11199 | + NULL }; | |
11200 | + static char *argv[] = | |
11201 | + { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL }; | |
11202 | + char *channel = NULL; | |
11203 | + int arg = 0, size; | |
11204 | + char test_read[255]; | |
11205 | + char *orig_posn = command; | |
11206 | + | |
11207 | + if (!strlen(orig_posn)) | |
11208 | + return 1; | |
11209 | + | |
11210 | + if (channel_no) { | |
11211 | + channel = toi_kzalloc(4, 6, GFP_KERNEL); | |
11212 | + if (!channel) { | |
11213 | + printk(KERN_INFO "Failed to allocate memory in " | |
11214 | + "preparing to launch userspace program.\n"); | |
11215 | + return 1; | |
11216 | + } | |
11217 | + } | |
11218 | + | |
11219 | + /* Up to 6 args supported */ | |
11220 | + while (arg < 6) { | |
11221 | + sscanf(orig_posn, "%s", test_read); | |
11222 | + size = strlen(test_read); | |
11223 | + if (!(size)) | |
11224 | + break; | |
11225 | + argv[arg] = toi_kzalloc(5, size + 1, TOI_ATOMIC_GFP); | |
11226 | + strcpy(argv[arg], test_read); | |
11227 | + orig_posn += size + 1; | |
11228 | + *test_read = 0; | |
11229 | + arg++; | |
11230 | + } | |
11231 | + | |
11232 | + if (channel_no) { | |
11233 | + sprintf(channel, "-c%d", channel_no); | |
11234 | + argv[arg] = channel; | |
11235 | + } else | |
11236 | + arg--; | |
11237 | + | |
11238 | + if (debug) { | |
11239 | + argv[++arg] = toi_kzalloc(5, 8, TOI_ATOMIC_GFP); | |
11240 | + strcpy(argv[arg], "--debug"); | |
11241 | + } | |
11242 | + | |
11243 | + retval = call_usermodehelper(argv[0], argv, envp, wait); | |
11244 | + | |
11245 | + /* | |
11246 | + * If the program reports an error, retval = 256. Don't complain | |
11247 | + * about that here. | |
11248 | + */ | |
11249 | + if (retval && retval != 256) | |
e999739a | 11250 | + printk(KERN_ERR "Failed to launch userspace program '%s': " |
11251 | + "Error %d\n", command, retval); | |
2380c486 JR |
11252 | + |
11253 | + { | |
11254 | + int i; | |
11255 | + for (i = 0; i < arg; i++) | |
11256 | + if (argv[i] && argv[i] != channel) | |
9474138d | 11257 | + toi_kfree(5, argv[i], sizeof (*argv[i])); |
2380c486 JR |
11258 | + } |
11259 | + | |
9474138d | 11260 | + toi_kfree(4, channel, sizeof(*channel)); |
2380c486 JR |
11261 | + |
11262 | + return retval; | |
11263 | +} | |
11264 | + | |
11265 | +/* | |
11266 | + * This array contains entries that are automatically registered at | |
11267 | + * boot. Modules and the console code register their own entries separately. | |
11268 | + */ | |
11269 | +static struct toi_sysfs_data sysfs_params[] = { | |
11270 | + SYSFS_LONG("extra_pages_allowance", SYSFS_RW, | |
11271 | + &extra_pd1_pages_allowance, 0, LONG_MAX, 0), | |
11272 | + SYSFS_CUSTOM("image_exists", SYSFS_RW, image_exists_read, | |
11273 | + image_exists_write, SYSFS_NEEDS_SM_FOR_BOTH, NULL), | |
11274 | + SYSFS_STRING("resume", SYSFS_RW, resume_file, 255, | |
11275 | + SYSFS_NEEDS_SM_FOR_WRITE, | |
11276 | + attempt_to_parse_resume_device2), | |
11277 | + SYSFS_STRING("alt_resume_param", SYSFS_RW, alt_resume_param, 255, | |
11278 | + SYSFS_NEEDS_SM_FOR_WRITE, | |
11279 | + attempt_to_parse_alt_resume_param), | |
11280 | + SYSFS_CUSTOM("debug_info", SYSFS_READONLY, get_toi_debug_info, NULL, 0, | |
11281 | + NULL), | |
11282 | + SYSFS_BIT("ignore_rootfs", SYSFS_RW, &toi_bkd.toi_action, | |
11283 | + TOI_IGNORE_ROOTFS, 0), | |
11284 | + SYSFS_INT("image_size_limit", SYSFS_RW, &image_size_limit, -2, | |
11285 | + INT_MAX, 0, NULL), | |
11286 | + SYSFS_UL("last_result", SYSFS_RW, &toi_result, 0, 0, 0), | |
11287 | + SYSFS_BIT("no_multithreaded_io", SYSFS_RW, &toi_bkd.toi_action, | |
11288 | + TOI_NO_MULTITHREADED_IO, 0), | |
11289 | + SYSFS_BIT("no_flusher_thread", SYSFS_RW, &toi_bkd.toi_action, | |
11290 | + TOI_NO_FLUSHER_THREAD, 0), | |
11291 | + SYSFS_BIT("full_pageset2", SYSFS_RW, &toi_bkd.toi_action, | |
11292 | + TOI_PAGESET2_FULL, 0), | |
11293 | + SYSFS_BIT("reboot", SYSFS_RW, &toi_bkd.toi_action, TOI_REBOOT, 0), | |
11294 | + SYSFS_BIT("replace_swsusp", SYSFS_RW, &toi_bkd.toi_action, | |
11295 | + TOI_REPLACE_SWSUSP, 0), | |
11296 | + SYSFS_STRING("resume_commandline", SYSFS_RW, | |
11297 | + toi_bkd.toi_nosave_commandline, COMMAND_LINE_SIZE, 0, | |
11298 | + NULL), | |
11299 | + SYSFS_STRING("version", SYSFS_READONLY, TOI_CORE_VERSION, 0, 0, NULL), | |
11300 | + SYSFS_BIT("no_load_direct", SYSFS_RW, &toi_bkd.toi_action, | |
11301 | + TOI_NO_DIRECT_LOAD, 0), | |
11302 | + SYSFS_BIT("freezer_test", SYSFS_RW, &toi_bkd.toi_action, | |
11303 | + TOI_FREEZER_TEST, 0), | |
11304 | + SYSFS_BIT("test_bio", SYSFS_RW, &toi_bkd.toi_action, TOI_TEST_BIO, 0), | |
11305 | + SYSFS_BIT("test_filter_speed", SYSFS_RW, &toi_bkd.toi_action, | |
11306 | + TOI_TEST_FILTER_SPEED, 0), | |
11307 | + SYSFS_BIT("no_pageset2", SYSFS_RW, &toi_bkd.toi_action, | |
11308 | + TOI_NO_PAGESET2, 0), | |
11309 | + SYSFS_BIT("no_pageset2_if_unneeded", SYSFS_RW, &toi_bkd.toi_action, | |
11310 | + TOI_NO_PS2_IF_UNNEEDED, 0), | |
11311 | + SYSFS_BIT("late_cpu_hotplug", SYSFS_RW, &toi_bkd.toi_action, | |
11312 | + TOI_LATE_CPU_HOTPLUG, 0), | |
2380c486 JR |
11313 | +#ifdef CONFIG_TOI_KEEP_IMAGE |
11314 | + SYSFS_BIT("keep_image", SYSFS_RW , &toi_bkd.toi_action, TOI_KEEP_IMAGE, | |
11315 | + 0), | |
11316 | +#endif | |
11317 | +}; | |
11318 | + | |
11319 | +static struct toi_core_fns my_fns = { | |
11320 | + .get_nonconflicting_page = __toi_get_nonconflicting_page, | |
11321 | + .post_context_save = __toi_post_context_save, | |
9474138d AM |
11322 | + .try_hibernate = toi_try_hibernate, |
11323 | + .try_resume = toi_sys_power_disk_try_resume, | |
2380c486 JR |
11324 | +}; |
11325 | + | |
11326 | +/** | |
11327 | + * core_load - initialisation of TuxOnIce core | |
11328 | + * | |
11329 | + * Initialise the core, beginning with sysfs. Checksum and so on are part of | |
11330 | + * the core, but have their own initialisation routines because they either | |
11331 | + * aren't compiled in all the time or have their own subdirectories. | |
11332 | + **/ | |
11333 | +static __init int core_load(void) | |
11334 | +{ | |
11335 | + int i, | |
11336 | + numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data); | |
11337 | + | |
11338 | + printk(KERN_INFO "TuxOnIce " TOI_CORE_VERSION | |
11339 | + " (http://tuxonice.net)\n"); | |
2380c486 JR |
11340 | + |
11341 | + if (toi_sysfs_init()) | |
11342 | + return 1; | |
11343 | + | |
11344 | + for (i = 0; i < numfiles; i++) | |
11345 | + toi_register_sysfs_file(tuxonice_kobj, &sysfs_params[i]); | |
11346 | + | |
11347 | + toi_core_fns = &my_fns; | |
11348 | + | |
11349 | + if (toi_alloc_init()) | |
11350 | + return 1; | |
11351 | + if (toi_checksum_init()) | |
11352 | + return 1; | |
11353 | + if (toi_usm_init()) | |
11354 | + return 1; | |
11355 | + if (toi_ui_init()) | |
11356 | + return 1; | |
11357 | + if (toi_poweroff_init()) | |
11358 | + return 1; | |
11359 | + if (toi_cluster_init()) | |
11360 | + return 1; | |
11361 | + | |
11362 | + return 0; | |
11363 | +} | |
11364 | + | |
11365 | +#ifdef MODULE | |
11366 | +/** | |
11367 | + * core_unload: Prepare to unload the core code. | |
11368 | + **/ | |
11369 | +static __exit void core_unload(void) | |
11370 | +{ | |
11371 | + int i, | |
11372 | + numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data); | |
11373 | + | |
11374 | + toi_alloc_exit(); | |
11375 | + toi_checksum_exit(); | |
11376 | + toi_poweroff_exit(); | |
11377 | + toi_ui_exit(); | |
11378 | + toi_usm_exit(); | |
11379 | + toi_cluster_exit(); | |
11380 | + | |
11381 | + for (i = 0; i < numfiles; i++) | |
11382 | + toi_unregister_sysfs_file(tuxonice_kobj, &sysfs_params[i]); | |
11383 | + | |
11384 | + toi_core_fns = NULL; | |
11385 | + | |
11386 | + toi_sysfs_exit(); | |
11387 | +} | |
11388 | +MODULE_LICENSE("GPL"); | |
11389 | +module_init(core_load); | |
11390 | +module_exit(core_unload); | |
11391 | +#else | |
11392 | +late_initcall(core_load); | |
11393 | +#endif | |
11394 | diff --git a/kernel/power/tuxonice_io.c b/kernel/power/tuxonice_io.c | |
11395 | new file mode 100644 | |
9474138d | 11396 | index 0000000..ca21958 |
2380c486 JR |
11397 | --- /dev/null |
11398 | +++ b/kernel/power/tuxonice_io.c | |
9474138d | 11399 | @@ -0,0 +1,1536 @@ |
2380c486 JR |
11400 | +/* |
11401 | + * kernel/power/tuxonice_io.c | |
11402 | + * | |
11403 | + * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu> | |
11404 | + * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz> | |
11405 | + * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr> | |
11406 | + * Copyright (C) 2002-2008 Nigel Cunningham (nigel at tuxonice net) | |
11407 | + * | |
11408 | + * This file is released under the GPLv2. | |
11409 | + * | |
11410 | + * It contains high level IO routines for hibernating. | |
11411 | + * | |
11412 | + */ | |
11413 | + | |
11414 | +#include <linux/suspend.h> | |
11415 | +#include <linux/version.h> | |
11416 | +#include <linux/utsname.h> | |
11417 | +#include <linux/mount.h> | |
11418 | +#include <linux/highmem.h> | |
2380c486 JR |
11419 | +#include <linux/kthread.h> |
11420 | +#include <linux/cpu.h> | |
9474138d | 11421 | +#include <linux/fs_struct.h> |
2380c486 JR |
11422 | +#include <asm/tlbflush.h> |
11423 | + | |
11424 | +#include "tuxonice.h" | |
11425 | +#include "tuxonice_modules.h" | |
11426 | +#include "tuxonice_pageflags.h" | |
11427 | +#include "tuxonice_io.h" | |
11428 | +#include "tuxonice_ui.h" | |
11429 | +#include "tuxonice_storage.h" | |
11430 | +#include "tuxonice_prepare_image.h" | |
11431 | +#include "tuxonice_extent.h" | |
11432 | +#include "tuxonice_sysfs.h" | |
11433 | +#include "tuxonice_builtin.h" | |
11434 | +#include "tuxonice_checksum.h" | |
11435 | +#include "tuxonice_alloc.h" | |
11436 | +char alt_resume_param[256]; | |
11437 | + | |
11438 | +/* Variables shared between threads and updated under the mutex */ | |
11439 | +static int io_write, io_finish_at, io_base, io_barmax, io_pageset, io_result; | |
11440 | +static int io_index, io_nextupdate, io_pc, io_pc_step; | |
11441 | +static DEFINE_MUTEX(io_mutex); | |
11442 | +static DEFINE_PER_CPU(struct page *, last_sought); | |
11443 | +static DEFINE_PER_CPU(struct page *, last_high_page); | |
11444 | +static DEFINE_PER_CPU(char *, checksum_locn); | |
11445 | +static DEFINE_PER_CPU(struct pbe *, last_low_page); | |
11446 | +static atomic_t io_count; | |
11447 | +atomic_t toi_io_workers; | |
11448 | +EXPORT_SYMBOL_GPL(toi_io_workers); | |
11449 | + | |
11450 | +DECLARE_WAIT_QUEUE_HEAD(toi_io_queue_flusher); | |
11451 | +EXPORT_SYMBOL_GPL(toi_io_queue_flusher); | |
11452 | + | |
11453 | +int toi_bio_queue_flusher_should_finish; | |
11454 | +EXPORT_SYMBOL_GPL(toi_bio_queue_flusher_should_finish); | |
11455 | + | |
11456 | +/* Indicates that this thread should be used for checking throughput */ | |
11457 | +#define MONITOR ((void *) 1) | |
11458 | + | |
11459 | +/** | |
11460 | + * toi_attempt_to_parse_resume_device - determine if we can hibernate | |
11461 | + * | |
11462 | + * Can we hibernate, using the current resume= parameter? | |
11463 | + **/ | |
11464 | +int toi_attempt_to_parse_resume_device(int quiet) | |
11465 | +{ | |
11466 | + struct list_head *Allocator; | |
11467 | + struct toi_module_ops *thisAllocator; | |
11468 | + int result, returning = 0; | |
11469 | + | |
11470 | + if (toi_activate_storage(0)) | |
11471 | + return 0; | |
11472 | + | |
11473 | + toiActiveAllocator = NULL; | |
11474 | + clear_toi_state(TOI_RESUME_DEVICE_OK); | |
11475 | + clear_toi_state(TOI_CAN_RESUME); | |
11476 | + clear_result_state(TOI_ABORTED); | |
11477 | + | |
11478 | + if (!toiNumAllocators) { | |
11479 | + if (!quiet) | |
11480 | + printk(KERN_INFO "TuxOnIce: No storage allocators have " | |
11481 | + "been registered. Hibernating will be " | |
11482 | + "disabled.\n"); | |
11483 | + goto cleanup; | |
11484 | + } | |
11485 | + | |
11486 | + if (!resume_file[0]) { | |
11487 | + if (!quiet) | |
e999739a | 11488 | + printk(KERN_INFO "TuxOnIce: Resume= parameter is empty." |
2380c486 JR |
11489 | + " Hibernating will be disabled.\n"); |
11490 | + goto cleanup; | |
11491 | + } | |
11492 | + | |
11493 | + list_for_each(Allocator, &toiAllocators) { | |
11494 | + thisAllocator = list_entry(Allocator, struct toi_module_ops, | |
11495 | + type_list); | |
11496 | + | |
11497 | + /* | |
11498 | + * Not sure why you'd want to disable an allocator, but | |
11499 | + * we should honour the flag if we're providing it | |
11500 | + */ | |
11501 | + if (!thisAllocator->enabled) | |
11502 | + continue; | |
11503 | + | |
11504 | + result = thisAllocator->parse_sig_location( | |
11505 | + resume_file, (toiNumAllocators == 1), | |
11506 | + quiet); | |
11507 | + | |
11508 | + switch (result) { | |
11509 | + case -EINVAL: | |
11510 | + /* For this allocator, but not a valid | |
11511 | + * configuration. Error already printed. */ | |
11512 | + goto cleanup; | |
11513 | + | |
11514 | + case 0: | |
11515 | + /* For this allocator and valid. */ | |
11516 | + toiActiveAllocator = thisAllocator; | |
11517 | + | |
11518 | + set_toi_state(TOI_RESUME_DEVICE_OK); | |
11519 | + set_toi_state(TOI_CAN_RESUME); | |
11520 | + returning = 1; | |
11521 | + goto cleanup; | |
11522 | + } | |
11523 | + } | |
11524 | + if (!quiet) | |
e999739a | 11525 | + printk(KERN_INFO "TuxOnIce: No matching enabled allocator " |
11526 | + "found. Resuming disabled.\n"); | |
2380c486 JR |
11527 | +cleanup: |
11528 | + toi_deactivate_storage(0); | |
11529 | + return returning; | |
11530 | +} | |
11531 | +EXPORT_SYMBOL_GPL(toi_attempt_to_parse_resume_device); | |
11532 | + | |
11533 | +void attempt_to_parse_resume_device2(void) | |
11534 | +{ | |
11535 | + toi_prepare_usm(); | |
11536 | + toi_attempt_to_parse_resume_device(0); | |
11537 | + toi_cleanup_usm(); | |
11538 | +} | |
11539 | +EXPORT_SYMBOL_GPL(attempt_to_parse_resume_device2); | |
11540 | + | |
11541 | +void save_restore_alt_param(int replace, int quiet) | |
11542 | +{ | |
11543 | + static char resume_param_save[255]; | |
11544 | + static unsigned long toi_state_save; | |
11545 | + | |
11546 | + if (replace) { | |
11547 | + toi_state_save = toi_state; | |
11548 | + strcpy(resume_param_save, resume_file); | |
11549 | + strcpy(resume_file, alt_resume_param); | |
11550 | + } else { | |
11551 | + strcpy(resume_file, resume_param_save); | |
11552 | + toi_state = toi_state_save; | |
11553 | + } | |
11554 | + toi_attempt_to_parse_resume_device(quiet); | |
11555 | +} | |
11556 | + | |
11557 | +void attempt_to_parse_alt_resume_param(void) | |
11558 | +{ | |
11559 | + int ok = 0; | |
11560 | + | |
11561 | + /* Temporarily set resume_param to the poweroff value */ | |
11562 | + if (!strlen(alt_resume_param)) | |
11563 | + return; | |
11564 | + | |
e999739a | 11565 | + printk(KERN_INFO "=== Trying Poweroff Resume2 ===\n"); |
2380c486 JR |
11566 | + save_restore_alt_param(SAVE, NOQUIET); |
11567 | + if (test_toi_state(TOI_CAN_RESUME)) | |
11568 | + ok = 1; | |
11569 | + | |
11570 | + printk(KERN_INFO "=== Done ===\n"); | |
11571 | + save_restore_alt_param(RESTORE, QUIET); | |
11572 | + | |
11573 | + /* If not ok, clear the string */ | |
11574 | + if (ok) | |
11575 | + return; | |
11576 | + | |
11577 | + printk(KERN_INFO "Can't resume from that location; clearing " | |
11578 | + "alt_resume_param.\n"); | |
11579 | + alt_resume_param[0] = '\0'; | |
11580 | +} | |
11581 | + | |
11582 | +/** | |
11583 | + * noresume_reset_modules - reset data structures in case of non resuming | |
11584 | + * | |
11585 | + * When we read the start of an image, modules (and especially the | |
11586 | + * active allocator) might need to reset data structures if we | |
11587 | + * decide to remove the image rather than resuming from it. | |
11588 | + **/ | |
11589 | +static void noresume_reset_modules(void) | |
11590 | +{ | |
11591 | + struct toi_module_ops *this_filter; | |
11592 | + | |
11593 | + list_for_each_entry(this_filter, &toi_filters, type_list) | |
11594 | + if (this_filter->noresume_reset) | |
11595 | + this_filter->noresume_reset(); | |
11596 | + | |
11597 | + if (toiActiveAllocator && toiActiveAllocator->noresume_reset) | |
11598 | + toiActiveAllocator->noresume_reset(); | |
11599 | +} | |
11600 | + | |
11601 | +/** | |
11602 | + * fill_toi_header - fill the hibernate header structure | |
11603 | + * @struct toi_header: Header data structure to be filled. | |
11604 | + **/ | |
11605 | +static int fill_toi_header(struct toi_header *sh) | |
11606 | +{ | |
11607 | + int i, error; | |
11608 | + | |
e999739a | 11609 | + error = init_header((struct swsusp_info *) sh); |
2380c486 JR |
11610 | + if (error) |
11611 | + return error; | |
11612 | + | |
11613 | + sh->pagedir = pagedir1; | |
11614 | + sh->pageset_2_size = pagedir2.size; | |
11615 | + sh->param0 = toi_result; | |
11616 | + sh->param1 = toi_bkd.toi_action; | |
11617 | + sh->param2 = toi_bkd.toi_debug_state; | |
11618 | + sh->param3 = toi_bkd.toi_default_console_level; | |
11619 | + sh->root_fs = current->fs->root.mnt->mnt_sb->s_dev; | |
11620 | + for (i = 0; i < 4; i++) | |
11621 | + sh->io_time[i/2][i%2] = toi_bkd.toi_io_time[i/2][i%2]; | |
11622 | + sh->bkd = boot_kernel_data_buffer; | |
11623 | + return 0; | |
11624 | +} | |
11625 | + | |
11626 | +/** | |
11627 | + * rw_init_modules - initialize modules | |
11628 | + * @rw: Whether we are reading of writing an image. | |
11629 | + * @which: Section of the image being processed. | |
11630 | + * | |
11631 | + * Iterate over modules, preparing the ones that will be used to read or write | |
11632 | + * data. | |
11633 | + **/ | |
11634 | +static int rw_init_modules(int rw, int which) | |
11635 | +{ | |
11636 | + struct toi_module_ops *this_module; | |
11637 | + /* Initialise page transformers */ | |
11638 | + list_for_each_entry(this_module, &toi_filters, type_list) { | |
11639 | + if (!this_module->enabled) | |
11640 | + continue; | |
11641 | + if (this_module->rw_init && this_module->rw_init(rw, which)) { | |
11642 | + abort_hibernate(TOI_FAILED_MODULE_INIT, | |
11643 | + "Failed to initialize the %s filter.", | |
11644 | + this_module->name); | |
11645 | + return 1; | |
11646 | + } | |
11647 | + } | |
11648 | + | |
11649 | + /* Initialise allocator */ | |
11650 | + if (toiActiveAllocator->rw_init(rw, which)) { | |
11651 | + abort_hibernate(TOI_FAILED_MODULE_INIT, | |
11652 | + "Failed to initialise the allocator."); | |
11653 | + return 1; | |
11654 | + } | |
11655 | + | |
11656 | + /* Initialise other modules */ | |
11657 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
11658 | + if (!this_module->enabled || | |
11659 | + this_module->type == FILTER_MODULE || | |
11660 | + this_module->type == WRITER_MODULE) | |
11661 | + continue; | |
11662 | + if (this_module->rw_init && this_module->rw_init(rw, which)) { | |
11663 | + set_abort_result(TOI_FAILED_MODULE_INIT); | |
11664 | + printk(KERN_INFO "Setting aborted flag due to module " | |
11665 | + "init failure.\n"); | |
11666 | + return 1; | |
11667 | + } | |
11668 | + } | |
11669 | + | |
11670 | + return 0; | |
11671 | +} | |
11672 | + | |
11673 | +/** | |
11674 | + * rw_cleanup_modules - cleanup modules | |
11675 | + * @rw: Whether we are reading of writing an image. | |
11676 | + * | |
11677 | + * Cleanup components after reading or writing a set of pages. | |
11678 | + * Only the allocator may fail. | |
11679 | + **/ | |
11680 | +static int rw_cleanup_modules(int rw) | |
11681 | +{ | |
11682 | + struct toi_module_ops *this_module; | |
11683 | + int result = 0; | |
11684 | + | |
11685 | + /* Cleanup other modules */ | |
11686 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
11687 | + if (!this_module->enabled || | |
11688 | + this_module->type == FILTER_MODULE || | |
11689 | + this_module->type == WRITER_MODULE) | |
11690 | + continue; | |
11691 | + if (this_module->rw_cleanup) | |
11692 | + result |= this_module->rw_cleanup(rw); | |
11693 | + } | |
11694 | + | |
11695 | + /* Flush data and cleanup */ | |
11696 | + list_for_each_entry(this_module, &toi_filters, type_list) { | |
11697 | + if (!this_module->enabled) | |
11698 | + continue; | |
11699 | + if (this_module->rw_cleanup) | |
11700 | + result |= this_module->rw_cleanup(rw); | |
11701 | + } | |
11702 | + | |
11703 | + result |= toiActiveAllocator->rw_cleanup(rw); | |
11704 | + | |
11705 | + return result; | |
11706 | +} | |
11707 | + | |
11708 | +static struct page *copy_page_from_orig_page(struct page *orig_page) | |
11709 | +{ | |
11710 | + int is_high = PageHighMem(orig_page), index, min, max; | |
11711 | + struct page *high_page = NULL, | |
11712 | + **my_last_high_page = &__get_cpu_var(last_high_page), | |
11713 | + **my_last_sought = &__get_cpu_var(last_sought); | |
11714 | + struct pbe *this, **my_last_low_page = &__get_cpu_var(last_low_page); | |
11715 | + void *compare; | |
11716 | + | |
11717 | + if (is_high) { | |
11718 | + if (*my_last_sought && *my_last_high_page && | |
11719 | + *my_last_sought < orig_page) | |
11720 | + high_page = *my_last_high_page; | |
11721 | + else | |
11722 | + high_page = (struct page *) restore_highmem_pblist; | |
11723 | + this = (struct pbe *) kmap(high_page); | |
11724 | + compare = orig_page; | |
11725 | + } else { | |
11726 | + if (*my_last_sought && *my_last_low_page && | |
11727 | + *my_last_sought < orig_page) | |
11728 | + this = *my_last_low_page; | |
11729 | + else | |
11730 | + this = restore_pblist; | |
11731 | + compare = page_address(orig_page); | |
11732 | + } | |
11733 | + | |
11734 | + *my_last_sought = orig_page; | |
11735 | + | |
11736 | + /* Locate page containing pbe */ | |
11737 | + while (this[PBES_PER_PAGE - 1].next && | |
11738 | + this[PBES_PER_PAGE - 1].orig_address < compare) { | |
11739 | + if (is_high) { | |
11740 | + struct page *next_high_page = (struct page *) | |
11741 | + this[PBES_PER_PAGE - 1].next; | |
11742 | + kunmap(high_page); | |
11743 | + this = kmap(next_high_page); | |
11744 | + high_page = next_high_page; | |
11745 | + } else | |
11746 | + this = this[PBES_PER_PAGE - 1].next; | |
11747 | + } | |
11748 | + | |
11749 | + /* Do a binary search within the page */ | |
11750 | + min = 0; | |
11751 | + max = PBES_PER_PAGE; | |
11752 | + index = PBES_PER_PAGE / 2; | |
11753 | + while (max - min) { | |
11754 | + if (!this[index].orig_address || | |
11755 | + this[index].orig_address > compare) | |
11756 | + max = index; | |
11757 | + else if (this[index].orig_address == compare) { | |
11758 | + if (is_high) { | |
11759 | + struct page *page = this[index].address; | |
11760 | + *my_last_high_page = high_page; | |
11761 | + kunmap(high_page); | |
11762 | + return page; | |
11763 | + } | |
11764 | + *my_last_low_page = this; | |
11765 | + return virt_to_page(this[index].address); | |
11766 | + } else | |
11767 | + min = index; | |
11768 | + index = ((max + min) / 2); | |
11769 | + }; | |
11770 | + | |
11771 | + if (is_high) | |
11772 | + kunmap(high_page); | |
11773 | + | |
11774 | + abort_hibernate(TOI_FAILED_IO, "Failed to get destination page for" | |
11775 | + " orig page %p. This[min].orig_address=%p.\n", orig_page, | |
11776 | + this[index].orig_address); | |
11777 | + return NULL; | |
11778 | +} | |
11779 | + | |
11780 | +/** | |
9474138d AM |
11781 | + * write_next_page - write the next page in a pageset |
11782 | + * @data_pfn: The pfn where the next data to write is located. | |
11783 | + * @my_io_index: The index of the page in the pageset. | |
11784 | + * @write_pfn: The pfn number to write in the image (where the data belongs). | |
11785 | + * @first_filter: Where to send the page (optimisation). | |
11786 | + * | |
11787 | + * Get the pfn of the next page to write, map the page if necessary and do the | |
11788 | + * write. | |
11789 | + **/ | |
11790 | +static int write_next_page(unsigned long *data_pfn, int *my_io_index, | |
11791 | + unsigned long *write_pfn, struct toi_module_ops *first_filter) | |
11792 | +{ | |
11793 | + struct page *page; | |
11794 | + char **my_checksum_locn = &__get_cpu_var(checksum_locn); | |
11795 | + int result = 0, was_present; | |
11796 | + | |
11797 | + *data_pfn = memory_bm_next_pfn(io_map); | |
11798 | + | |
11799 | + /* Another thread could have beaten us to it. */ | |
11800 | + if (*data_pfn == BM_END_OF_MAP) { | |
11801 | + if (atomic_read(&io_count)) { | |
11802 | + printk(KERN_INFO "Ran out of pfns but io_count is " | |
11803 | + "still %d.\n", atomic_read(&io_count)); | |
11804 | + BUG(); | |
11805 | + } | |
11806 | + return -ENODATA; | |
11807 | + } | |
11808 | + | |
11809 | + *my_io_index = io_finish_at - atomic_sub_return(1, &io_count); | |
11810 | + | |
11811 | + memory_bm_clear_bit(io_map, *data_pfn); | |
11812 | + page = pfn_to_page(*data_pfn); | |
11813 | + | |
11814 | + was_present = kernel_page_present(page); | |
11815 | + if (!was_present) | |
11816 | + kernel_map_pages(page, 1, 1); | |
11817 | + | |
11818 | + if (io_pageset == 1) | |
11819 | + *write_pfn = memory_bm_next_pfn(pageset1_map); | |
11820 | + else { | |
11821 | + *write_pfn = *data_pfn; | |
11822 | + *my_checksum_locn = tuxonice_get_next_checksum(); | |
11823 | + } | |
11824 | + | |
11825 | + mutex_unlock(&io_mutex); | |
11826 | + | |
11827 | + if (io_pageset == 2 && tuxonice_calc_checksum(page, *my_checksum_locn)) | |
11828 | + return 1; | |
11829 | + | |
11830 | + result = first_filter->write_page(*write_pfn, page, PAGE_SIZE); | |
11831 | + | |
11832 | + if (!was_present) | |
11833 | + kernel_map_pages(page, 1, 0); | |
11834 | + | |
11835 | + return result; | |
11836 | +} | |
11837 | + | |
11838 | +/** | |
11839 | + * read_next_page - read the next page in a pageset | |
11840 | + * @my_io_index: The index of the page in the pageset. | |
11841 | + * @write_pfn: The pfn in which the data belongs. | |
11842 | + * | |
11843 | + * Read a page of the image into our buffer. | |
11844 | + **/ | |
11845 | + | |
11846 | +static int read_next_page(int *my_io_index, unsigned long *write_pfn, | |
11847 | + struct page *buffer, struct toi_module_ops *first_filter) | |
11848 | +{ | |
11849 | + unsigned int buf_size; | |
11850 | + int result; | |
11851 | + | |
11852 | + *my_io_index = io_finish_at - atomic_sub_return(1, &io_count); | |
11853 | + mutex_unlock(&io_mutex); | |
11854 | + | |
11855 | + /* | |
11856 | + * Are we aborting? If so, don't submit any more I/O as | |
11857 | + * resetting the resume_attempted flag (from ui.c) will | |
11858 | + * clear the bdev flags, making this thread oops. | |
11859 | + */ | |
11860 | + if (unlikely(test_toi_state(TOI_STOP_RESUME))) { | |
11861 | + atomic_dec(&toi_io_workers); | |
11862 | + if (!atomic_read(&toi_io_workers)) | |
11863 | + set_toi_state(TOI_IO_STOPPED); | |
11864 | + while (1) | |
11865 | + schedule(); | |
11866 | + } | |
11867 | + | |
11868 | + /* See toi_bio_read_page in tuxonice_block_io.c: | |
11869 | + * read the next page in the image. | |
11870 | + */ | |
11871 | + result = first_filter->read_page(write_pfn, buffer, &buf_size); | |
11872 | + if (buf_size != PAGE_SIZE) { | |
11873 | + abort_hibernate(TOI_FAILED_IO, | |
11874 | + "I/O pipeline returned %d bytes instead" | |
11875 | + " of %ud.\n", buf_size, PAGE_SIZE); | |
11876 | + mutex_lock(&io_mutex); | |
11877 | + return -ENODATA; | |
11878 | + } | |
11879 | + | |
11880 | + return result; | |
11881 | +} | |
11882 | + | |
11883 | +/** | |
11884 | + * | |
11885 | + **/ | |
11886 | +static void use_read_page(unsigned long write_pfn, struct page *buffer) | |
11887 | +{ | |
11888 | + struct page *final_page = pfn_to_page(write_pfn), | |
11889 | + *copy_page = final_page; | |
11890 | + char *virt, *buffer_virt; | |
11891 | + | |
11892 | + if (io_pageset == 1 && !load_direct(final_page)) { | |
11893 | + copy_page = copy_page_from_orig_page(final_page); | |
11894 | + BUG_ON(!copy_page); | |
11895 | + } | |
11896 | + | |
11897 | + if (memory_bm_test_bit(io_map, write_pfn)) { | |
11898 | + int was_present; | |
11899 | + | |
11900 | + virt = kmap(copy_page); | |
11901 | + buffer_virt = kmap(buffer); | |
11902 | + was_present = kernel_page_present(copy_page); | |
11903 | + if (!was_present) | |
11904 | + kernel_map_pages(copy_page, 1, 1); | |
11905 | + memcpy(virt, buffer_virt, PAGE_SIZE); | |
11906 | + if (!was_present) | |
11907 | + kernel_map_pages(copy_page, 1, 0); | |
11908 | + kunmap(copy_page); | |
11909 | + kunmap(buffer); | |
11910 | + memory_bm_clear_bit(io_map, write_pfn); | |
11911 | + } else { | |
11912 | + mutex_lock(&io_mutex); | |
11913 | + atomic_inc(&io_count); | |
11914 | + mutex_unlock(&io_mutex); | |
11915 | + } | |
11916 | +} | |
11917 | + | |
11918 | +/** | |
2380c486 JR |
11919 | + * worker_rw_loop - main loop to read/write pages |
11920 | + * | |
11921 | + * The main I/O loop for reading or writing pages. The io_map bitmap is used to | |
11922 | + * track the pages to read/write. | |
11923 | + * If we are reading, the pages are loaded to their final (mapped) pfn. | |
11924 | + **/ | |
11925 | +static int worker_rw_loop(void *data) | |
11926 | +{ | |
11927 | + unsigned long data_pfn, write_pfn, next_jiffies = jiffies + HZ / 2, | |
11928 | + jif_index = 1; | |
0ada99ac | 11929 | + int result = 0, my_io_index = 0, last_worker; |
2380c486 JR |
11930 | + struct toi_module_ops *first_filter = toi_get_next_filter(NULL); |
11931 | + struct page *buffer = toi_alloc_page(28, TOI_ATOMIC_GFP); | |
11932 | + | |
11933 | + current->flags |= PF_NOFREEZE; | |
11934 | + | |
11935 | + atomic_inc(&toi_io_workers); | |
11936 | + mutex_lock(&io_mutex); | |
11937 | + | |
11938 | + do { | |
2380c486 JR |
11939 | + if (data && jiffies > next_jiffies) { |
11940 | + next_jiffies += HZ / 2; | |
11941 | + if (toiActiveAllocator->update_throughput_throttle) | |
11942 | + toiActiveAllocator->update_throughput_throttle( | |
11943 | + jif_index); | |
11944 | + jif_index++; | |
11945 | + } | |
11946 | + | |
11947 | + /* | |
11948 | + * What page to use? If reading, don't know yet which page's | |
11949 | + * data will be read, so always use the buffer. If writing, | |
11950 | + * use the copy (Pageset1) or original page (Pageset2), but | |
11951 | + * always write the pfn of the original page. | |
11952 | + */ | |
9474138d AM |
11953 | + if (io_write) |
11954 | + result = write_next_page(&data_pfn, &my_io_index, | |
11955 | + &write_pfn, first_filter); | |
11956 | + else /* Reading */ | |
11957 | + result = read_next_page(&my_io_index, &write_pfn, | |
11958 | + buffer, first_filter); | |
11959 | + | |
11960 | + if (result == -ENODATA) | |
11961 | + break; | |
2380c486 JR |
11962 | + |
11963 | + if (result) { | |
11964 | + io_result = result; | |
11965 | + if (io_write) { | |
11966 | + printk(KERN_INFO "Write chunk returned %d.\n", | |
11967 | + result); | |
11968 | + abort_hibernate(TOI_FAILED_IO, | |
11969 | + "Failed to write a chunk of the " | |
11970 | + "image."); | |
11971 | + mutex_lock(&io_mutex); | |
11972 | + break; | |
11973 | + } | |
11974 | + panic("Read chunk returned (%d)", result); | |
11975 | + } | |
11976 | + | |
11977 | + /* | |
11978 | + * Discard reads of resaved pages while reading ps2 | |
11979 | + * and unwanted pages while rereading ps2 when aborting. | |
11980 | + */ | |
9474138d AM |
11981 | + if (!io_write && !PageResave(pfn_to_page(write_pfn))) |
11982 | + use_read_page(write_pfn, buffer); | |
2380c486 | 11983 | + |
2380c486 JR |
11984 | + if (my_io_index + io_base == io_nextupdate) |
11985 | + io_nextupdate = toi_update_status(my_io_index + | |
11986 | + io_base, io_barmax, " %d/%d MB ", | |
11987 | + MB(io_base+my_io_index+1), MB(io_barmax)); | |
11988 | + | |
11989 | + if (my_io_index == io_pc) { | |
9474138d | 11990 | + printk(KERN_ERR "...%d%%.\n", 20 * io_pc_step); |
2380c486 JR |
11991 | + io_pc_step++; |
11992 | + io_pc = io_finish_at * io_pc_step / 5; | |
11993 | + } | |
11994 | + | |
11995 | + toi_cond_pause(0, NULL); | |
11996 | + | |
11997 | + /* | |
11998 | + * Subtle: If there's less I/O still to be done than threads | |
11999 | + * running, quit. This stops us doing I/O beyond the end of | |
12000 | + * the image when reading. | |
12001 | + * | |
12002 | + * Possible race condition. Two threads could do the test at | |
12003 | + * the same time; one should exit and one should continue. | |
12004 | + * Therefore we take the mutex before comparing and exiting. | |
12005 | + */ | |
12006 | + | |
12007 | + mutex_lock(&io_mutex); | |
12008 | + | |
12009 | + } while (atomic_read(&io_count) >= atomic_read(&toi_io_workers) && | |
12010 | + !(io_write && test_result_state(TOI_ABORTED))); | |
12011 | + | |
12012 | + last_worker = atomic_dec_and_test(&toi_io_workers); | |
12013 | + mutex_unlock(&io_mutex); | |
12014 | + | |
12015 | + if (last_worker) { | |
12016 | + toi_bio_queue_flusher_should_finish = 1; | |
12017 | + wake_up(&toi_io_queue_flusher); | |
0ada99ac | 12018 | + result = toiActiveAllocator->finish_all_io(); |
2380c486 JR |
12019 | + } |
12020 | + | |
12021 | + toi__free_page(28, buffer); | |
12022 | + | |
0ada99ac | 12023 | + return result; |
2380c486 JR |
12024 | +} |
12025 | + | |
12026 | +static int start_other_threads(void) | |
12027 | +{ | |
12028 | + int cpu, num_started = 0; | |
12029 | + struct task_struct *p; | |
12030 | + | |
12031 | + for_each_online_cpu(cpu) { | |
12032 | + if (cpu == smp_processor_id()) | |
12033 | + continue; | |
12034 | + | |
12035 | + p = kthread_create(worker_rw_loop, num_started ? NULL : MONITOR, | |
12036 | + "ktoi_io/%d", cpu); | |
12037 | + if (IS_ERR(p)) { | |
e999739a | 12038 | + printk(KERN_ERR "ktoi_io for %i failed\n", cpu); |
2380c486 JR |
12039 | + continue; |
12040 | + } | |
12041 | + kthread_bind(p, cpu); | |
12042 | + p->flags |= PF_MEMALLOC; | |
12043 | + wake_up_process(p); | |
12044 | + num_started++; | |
12045 | + } | |
12046 | + | |
12047 | + return num_started; | |
12048 | +} | |
12049 | + | |
12050 | +/** | |
12051 | + * do_rw_loop - main highlevel function for reading or writing pages | |
12052 | + * | |
12053 | + * Create the io_map bitmap and call worker_rw_loop to perform I/O operations. | |
12054 | + **/ | |
12055 | +static int do_rw_loop(int write, int finish_at, struct memory_bitmap *pageflags, | |
12056 | + int base, int barmax, int pageset) | |
12057 | +{ | |
0ada99ac | 12058 | + int index = 0, cpu, num_other_threads = 0, result = 0; |
2380c486 JR |
12059 | + unsigned long pfn; |
12060 | + | |
12061 | + if (!finish_at) | |
12062 | + return 0; | |
12063 | + | |
12064 | + io_write = write; | |
12065 | + io_finish_at = finish_at; | |
12066 | + io_base = base; | |
12067 | + io_barmax = barmax; | |
12068 | + io_pageset = pageset; | |
12069 | + io_index = 0; | |
12070 | + io_pc = io_finish_at / 5; | |
12071 | + io_pc_step = 1; | |
12072 | + io_result = 0; | |
12073 | + io_nextupdate = base + 1; | |
12074 | + toi_bio_queue_flusher_should_finish = 0; | |
12075 | + | |
12076 | + for_each_online_cpu(cpu) { | |
12077 | + per_cpu(last_sought, cpu) = NULL; | |
12078 | + per_cpu(last_low_page, cpu) = NULL; | |
12079 | + per_cpu(last_high_page, cpu) = NULL; | |
12080 | + } | |
12081 | + | |
12082 | + /* Ensure all bits clear */ | |
12083 | + memory_bm_clear(io_map); | |
12084 | + | |
12085 | + /* Set the bits for the pages to write */ | |
12086 | + memory_bm_position_reset(pageflags); | |
12087 | + | |
12088 | + pfn = memory_bm_next_pfn(pageflags); | |
12089 | + | |
12090 | + while (pfn != BM_END_OF_MAP && index < finish_at) { | |
12091 | + memory_bm_set_bit(io_map, pfn); | |
12092 | + pfn = memory_bm_next_pfn(pageflags); | |
12093 | + index++; | |
12094 | + } | |
12095 | + | |
12096 | + BUG_ON(index < finish_at); | |
12097 | + | |
12098 | + atomic_set(&io_count, finish_at); | |
12099 | + | |
12100 | + memory_bm_position_reset(pageset1_map); | |
12101 | + | |
12102 | + clear_toi_state(TOI_IO_STOPPED); | |
12103 | + memory_bm_position_reset(io_map); | |
12104 | + | |
12105 | + if (!test_action_state(TOI_NO_MULTITHREADED_IO)) | |
12106 | + num_other_threads = start_other_threads(); | |
12107 | + | |
12108 | + if (!num_other_threads || !toiActiveAllocator->io_flusher || | |
12109 | + test_action_state(TOI_NO_FLUSHER_THREAD)) | |
12110 | + worker_rw_loop(num_other_threads ? NULL : MONITOR); | |
12111 | + else | |
0ada99ac | 12112 | + result = toiActiveAllocator->io_flusher(write); |
2380c486 JR |
12113 | + |
12114 | + while (atomic_read(&toi_io_workers)) | |
12115 | + schedule(); | |
12116 | + | |
12117 | + set_toi_state(TOI_IO_STOPPED); | |
12118 | + if (unlikely(test_toi_state(TOI_STOP_RESUME))) { | |
12119 | + while (1) | |
12120 | + schedule(); | |
12121 | + } | |
12122 | + | |
0ada99ac | 12123 | + if (!io_result && !result && !test_result_state(TOI_ABORTED)) { |
2380c486 JR |
12124 | + unsigned long next; |
12125 | + | |
2380c486 JR |
12126 | + toi_update_status(io_base + io_finish_at, io_barmax, |
12127 | + " %d/%d MB ", | |
12128 | + MB(io_base + io_finish_at), MB(io_barmax)); | |
12129 | + | |
12130 | + memory_bm_position_reset(io_map); | |
12131 | + next = memory_bm_next_pfn(io_map); | |
12132 | + if (next != BM_END_OF_MAP) { | |
12133 | + printk(KERN_INFO "Finished I/O loop but still work to " | |
12134 | + "do?\nFinish at = %d. io_count = %d.\n", | |
12135 | + finish_at, atomic_read(&io_count)); | |
12136 | + printk(KERN_INFO "I/O bitmap still records work to do." | |
12137 | + "%ld.\n", next); | |
12138 | + BUG(); | |
12139 | + } | |
12140 | + } | |
12141 | + | |
0ada99ac | 12142 | + return io_result ? io_result : result; |
2380c486 JR |
12143 | +} |
12144 | + | |
12145 | +/** | |
12146 | + * write_pageset - write a pageset to disk. | |
12147 | + * @pagedir: Which pagedir to write. | |
12148 | + * | |
12149 | + * Returns: | |
12150 | + * Zero on success or -1 on failure. | |
12151 | + **/ | |
12152 | +int write_pageset(struct pagedir *pagedir) | |
12153 | +{ | |
12154 | + int finish_at, base = 0, start_time, end_time; | |
12155 | + int barmax = pagedir1.size + pagedir2.size; | |
12156 | + long error = 0; | |
12157 | + struct memory_bitmap *pageflags; | |
12158 | + | |
12159 | + /* | |
12160 | + * Even if there is nothing to read or write, the allocator | |
12161 | + * may need the init/cleanup for it's housekeeping. (eg: | |
12162 | + * Pageset1 may start where pageset2 ends when writing). | |
12163 | + */ | |
12164 | + finish_at = pagedir->size; | |
12165 | + | |
12166 | + if (pagedir->id == 1) { | |
12167 | + toi_prepare_status(DONT_CLEAR_BAR, | |
12168 | + "Writing kernel & process data..."); | |
12169 | + base = pagedir2.size; | |
12170 | + if (test_action_state(TOI_TEST_FILTER_SPEED) || | |
12171 | + test_action_state(TOI_TEST_BIO)) | |
12172 | + pageflags = pageset1_map; | |
12173 | + else | |
12174 | + pageflags = pageset1_copy_map; | |
12175 | + } else { | |
12176 | + toi_prepare_status(DONT_CLEAR_BAR, "Writing caches..."); | |
12177 | + pageflags = pageset2_map; | |
12178 | + } | |
12179 | + | |
12180 | + start_time = jiffies; | |
12181 | + | |
12182 | + if (rw_init_modules(1, pagedir->id)) { | |
12183 | + abort_hibernate(TOI_FAILED_MODULE_INIT, | |
12184 | + "Failed to initialise modules for writing."); | |
12185 | + error = 1; | |
12186 | + } | |
12187 | + | |
12188 | + if (!error) | |
12189 | + error = do_rw_loop(1, finish_at, pageflags, base, barmax, | |
12190 | + pagedir->id); | |
12191 | + | |
12192 | + if (rw_cleanup_modules(WRITE) && !error) { | |
12193 | + abort_hibernate(TOI_FAILED_MODULE_CLEANUP, | |
12194 | + "Failed to cleanup after writing."); | |
12195 | + error = 1; | |
12196 | + } | |
12197 | + | |
12198 | + end_time = jiffies; | |
12199 | + | |
12200 | + if ((end_time - start_time) && (!test_result_state(TOI_ABORTED))) { | |
12201 | + toi_bkd.toi_io_time[0][0] += finish_at, | |
12202 | + toi_bkd.toi_io_time[0][1] += (end_time - start_time); | |
12203 | + } | |
12204 | + | |
12205 | + return error; | |
12206 | +} | |
12207 | + | |
12208 | +/** | |
12209 | + * read_pageset - highlevel function to read a pageset from disk | |
12210 | + * @pagedir: pageset to read | |
12211 | + * @overwrittenpagesonly: Whether to read the whole pageset or | |
12212 | + * only part of it. | |
12213 | + * | |
12214 | + * Returns: | |
12215 | + * Zero on success or -1 on failure. | |
12216 | + **/ | |
12217 | +static int read_pageset(struct pagedir *pagedir, int overwrittenpagesonly) | |
12218 | +{ | |
12219 | + int result = 0, base = 0, start_time, end_time; | |
12220 | + int finish_at = pagedir->size; | |
12221 | + int barmax = pagedir1.size + pagedir2.size; | |
12222 | + struct memory_bitmap *pageflags; | |
12223 | + | |
12224 | + if (pagedir->id == 1) { | |
12225 | + toi_prepare_status(DONT_CLEAR_BAR, | |
12226 | + "Reading kernel & process data..."); | |
12227 | + pageflags = pageset1_map; | |
12228 | + } else { | |
12229 | + toi_prepare_status(DONT_CLEAR_BAR, "Reading caches..."); | |
12230 | + if (overwrittenpagesonly) { | |
12231 | + barmax = min(pagedir1.size, pagedir2.size); | |
12232 | + finish_at = min(pagedir1.size, pagedir2.size); | |
12233 | + } else | |
12234 | + base = pagedir1.size; | |
12235 | + pageflags = pageset2_map; | |
12236 | + } | |
12237 | + | |
12238 | + start_time = jiffies; | |
12239 | + | |
12240 | + if (rw_init_modules(0, pagedir->id)) { | |
12241 | + toiActiveAllocator->remove_image(); | |
12242 | + result = 1; | |
12243 | + } else | |
12244 | + result = do_rw_loop(0, finish_at, pageflags, base, barmax, | |
12245 | + pagedir->id); | |
12246 | + | |
12247 | + if (rw_cleanup_modules(READ) && !result) { | |
12248 | + abort_hibernate(TOI_FAILED_MODULE_CLEANUP, | |
12249 | + "Failed to cleanup after reading."); | |
12250 | + result = 1; | |
12251 | + } | |
12252 | + | |
12253 | + /* Statistics */ | |
12254 | + end_time = jiffies; | |
12255 | + | |
12256 | + if ((end_time - start_time) && (!test_result_state(TOI_ABORTED))) { | |
12257 | + toi_bkd.toi_io_time[1][0] += finish_at, | |
12258 | + toi_bkd.toi_io_time[1][1] += (end_time - start_time); | |
12259 | + } | |
12260 | + | |
12261 | + return result; | |
12262 | +} | |
12263 | + | |
12264 | +/** | |
12265 | + * write_module_configs - store the modules configuration | |
12266 | + * | |
12267 | + * The configuration for each module is stored in the image header. | |
12268 | + * Returns: Int | |
12269 | + * Zero on success, Error value otherwise. | |
12270 | + **/ | |
12271 | +static int write_module_configs(void) | |
12272 | +{ | |
12273 | + struct toi_module_ops *this_module; | |
12274 | + char *buffer = (char *) toi_get_zeroed_page(22, TOI_ATOMIC_GFP); | |
12275 | + int len, index = 1; | |
12276 | + struct toi_module_header toi_module_header; | |
12277 | + | |
12278 | + if (!buffer) { | |
12279 | + printk(KERN_INFO "Failed to allocate a buffer for saving " | |
12280 | + "module configuration info.\n"); | |
12281 | + return -ENOMEM; | |
12282 | + } | |
12283 | + | |
12284 | + /* | |
12285 | + * We have to know which data goes with which module, so we at | |
12286 | + * least write a length of zero for a module. Note that we are | |
12287 | + * also assuming every module's config data takes <= PAGE_SIZE. | |
12288 | + */ | |
12289 | + | |
12290 | + /* For each module (in registration order) */ | |
12291 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
12292 | + if (!this_module->enabled || !this_module->storage_needed || | |
12293 | + (this_module->type == WRITER_MODULE && | |
12294 | + toiActiveAllocator != this_module)) | |
12295 | + continue; | |
12296 | + | |
12297 | + /* Get the data from the module */ | |
12298 | + len = 0; | |
12299 | + if (this_module->save_config_info) | |
12300 | + len = this_module->save_config_info(buffer); | |
12301 | + | |
12302 | + /* Save the details of the module */ | |
12303 | + toi_module_header.enabled = this_module->enabled; | |
12304 | + toi_module_header.type = this_module->type; | |
12305 | + toi_module_header.index = index++; | |
12306 | + strncpy(toi_module_header.name, this_module->name, | |
12307 | + sizeof(toi_module_header.name)); | |
12308 | + toiActiveAllocator->rw_header_chunk(WRITE, | |
12309 | + this_module, | |
12310 | + (char *) &toi_module_header, | |
12311 | + sizeof(toi_module_header)); | |
12312 | + | |
12313 | + /* Save the size of the data and any data returned */ | |
12314 | + toiActiveAllocator->rw_header_chunk(WRITE, | |
12315 | + this_module, | |
12316 | + (char *) &len, sizeof(int)); | |
12317 | + if (len) | |
12318 | + toiActiveAllocator->rw_header_chunk( | |
12319 | + WRITE, this_module, buffer, len); | |
12320 | + } | |
12321 | + | |
12322 | + /* Write a blank header to terminate the list */ | |
12323 | + toi_module_header.name[0] = '\0'; | |
12324 | + toiActiveAllocator->rw_header_chunk(WRITE, NULL, | |
12325 | + (char *) &toi_module_header, sizeof(toi_module_header)); | |
12326 | + | |
12327 | + toi_free_page(22, (unsigned long) buffer); | |
12328 | + return 0; | |
12329 | +} | |
12330 | + | |
12331 | +/** | |
12332 | + * read_one_module_config - read and configure one module | |
12333 | + * | |
12334 | + * Read the configuration for one module, and configure the module | |
12335 | + * to match if it is loaded. | |
12336 | + * | |
12337 | + * Returns: Int | |
12338 | + * Zero on success, Error value otherwise. | |
12339 | + **/ | |
12340 | +static int read_one_module_config(struct toi_module_header *header) | |
12341 | +{ | |
12342 | + struct toi_module_ops *this_module; | |
12343 | + int result, len; | |
12344 | + char *buffer; | |
12345 | + | |
12346 | + /* Find the module */ | |
12347 | + this_module = toi_find_module_given_name(header->name); | |
12348 | + | |
12349 | + if (!this_module) { | |
12350 | + if (header->enabled) { | |
12351 | + toi_early_boot_message(1, TOI_CONTINUE_REQ, | |
12352 | + "It looks like we need module %s for reading " | |
12353 | + "the image but it hasn't been registered.\n", | |
12354 | + header->name); | |
12355 | + if (!(test_toi_state(TOI_CONTINUE_REQ))) | |
12356 | + return -EINVAL; | |
12357 | + } else | |
12358 | + printk(KERN_INFO "Module %s configuration data found, " | |
12359 | + "but the module hasn't registered. Looks like " | |
12360 | + "it was disabled, so we're ignoring its data.", | |
12361 | + header->name); | |
12362 | + } | |
12363 | + | |
12364 | + /* Get the length of the data (if any) */ | |
12365 | + result = toiActiveAllocator->rw_header_chunk(READ, NULL, (char *) &len, | |
12366 | + sizeof(int)); | |
12367 | + if (result) { | |
e999739a | 12368 | + printk(KERN_ERR "Failed to read the length of the module %s's" |
2380c486 JR |
12369 | + " configuration data.\n", |
12370 | + header->name); | |
12371 | + return -EINVAL; | |
12372 | + } | |
12373 | + | |
12374 | + /* Read any data and pass to the module (if we found one) */ | |
12375 | + if (!len) | |
12376 | + return 0; | |
12377 | + | |
12378 | + buffer = (char *) toi_get_zeroed_page(23, TOI_ATOMIC_GFP); | |
12379 | + | |
12380 | + if (!buffer) { | |
e999739a | 12381 | + printk(KERN_ERR "Failed to allocate a buffer for reloading " |
12382 | + "module configuration info.\n"); | |
2380c486 JR |
12383 | + return -ENOMEM; |
12384 | + } | |
12385 | + | |
12386 | + toiActiveAllocator->rw_header_chunk(READ, NULL, buffer, len); | |
12387 | + | |
12388 | + if (!this_module) | |
12389 | + goto out; | |
12390 | + | |
12391 | + if (!this_module->save_config_info) | |
e999739a | 12392 | + printk(KERN_ERR "Huh? Module %s appears to have a " |
12393 | + "save_config_info, but not a load_config_info " | |
12394 | + "function!\n", this_module->name); | |
2380c486 JR |
12395 | + else |
12396 | + this_module->load_config_info(buffer, len); | |
12397 | + | |
12398 | + /* | |
12399 | + * Now move this module to the tail of its lists. This will put it in | |
12400 | + * order. Any new modules will end up at the top of the lists. They | |
12401 | + * should have been set to disabled when loaded (people will | |
12402 | + * normally not edit an initrd to load a new module and then hibernate | |
12403 | + * without using it!). | |
12404 | + */ | |
12405 | + | |
12406 | + toi_move_module_tail(this_module); | |
12407 | + | |
12408 | + this_module->enabled = header->enabled; | |
12409 | + | |
12410 | +out: | |
12411 | + toi_free_page(23, (unsigned long) buffer); | |
12412 | + return 0; | |
12413 | +} | |
12414 | + | |
12415 | +/** | |
12416 | + * read_module_configs - reload module configurations from the image header. | |
12417 | + * | |
12418 | + * Returns: Int | |
12419 | + * Zero on success or an error code. | |
12420 | + **/ | |
12421 | +static int read_module_configs(void) | |
12422 | +{ | |
12423 | + int result = 0; | |
12424 | + struct toi_module_header toi_module_header; | |
12425 | + struct toi_module_ops *this_module; | |
12426 | + | |
12427 | + /* All modules are initially disabled. That way, if we have a module | |
12428 | + * loaded now that wasn't loaded when we hibernated, it won't be used | |
12429 | + * in trying to read the data. | |
12430 | + */ | |
12431 | + list_for_each_entry(this_module, &toi_modules, module_list) | |
12432 | + this_module->enabled = 0; | |
12433 | + | |
12434 | + /* Get the first module header */ | |
12435 | + result = toiActiveAllocator->rw_header_chunk(READ, NULL, | |
12436 | + (char *) &toi_module_header, | |
12437 | + sizeof(toi_module_header)); | |
12438 | + if (result) { | |
12439 | + printk(KERN_ERR "Failed to read the next module header.\n"); | |
12440 | + return -EINVAL; | |
12441 | + } | |
12442 | + | |
12443 | + /* For each module (in registration order) */ | |
12444 | + while (toi_module_header.name[0]) { | |
12445 | + result = read_one_module_config(&toi_module_header); | |
12446 | + | |
12447 | + if (result) | |
12448 | + return -EINVAL; | |
12449 | + | |
12450 | + /* Get the next module header */ | |
12451 | + result = toiActiveAllocator->rw_header_chunk(READ, NULL, | |
12452 | + (char *) &toi_module_header, | |
12453 | + sizeof(toi_module_header)); | |
12454 | + | |
12455 | + if (result) { | |
12456 | + printk(KERN_ERR "Failed to read the next module " | |
12457 | + "header.\n"); | |
12458 | + return -EINVAL; | |
12459 | + } | |
12460 | + } | |
12461 | + | |
12462 | + return 0; | |
12463 | +} | |
12464 | + | |
12465 | +/** | |
12466 | + * write_image_header - write the image header after write the image proper | |
12467 | + * | |
12468 | + * Returns: Int | |
12469 | + * Zero on success, error value otherwise. | |
12470 | + **/ | |
12471 | +int write_image_header(void) | |
12472 | +{ | |
12473 | + int ret; | |
12474 | + int total = pagedir1.size + pagedir2.size+2; | |
12475 | + char *header_buffer = NULL; | |
12476 | + | |
12477 | + /* Now prepare to write the header */ | |
12478 | + ret = toiActiveAllocator->write_header_init(); | |
12479 | + if (ret) { | |
12480 | + abort_hibernate(TOI_FAILED_MODULE_INIT, | |
12481 | + "Active allocator's write_header_init" | |
12482 | + " function failed."); | |
12483 | + goto write_image_header_abort; | |
12484 | + } | |
12485 | + | |
12486 | + /* Get a buffer */ | |
12487 | + header_buffer = (char *) toi_get_zeroed_page(24, TOI_ATOMIC_GFP); | |
12488 | + if (!header_buffer) { | |
12489 | + abort_hibernate(TOI_OUT_OF_MEMORY, | |
12490 | + "Out of memory when trying to get page for header!"); | |
12491 | + goto write_image_header_abort; | |
12492 | + } | |
12493 | + | |
12494 | + /* Write hibernate header */ | |
12495 | + if (fill_toi_header((struct toi_header *) header_buffer)) { | |
12496 | + abort_hibernate(TOI_OUT_OF_MEMORY, | |
12497 | + "Failure to fill header information!"); | |
12498 | + goto write_image_header_abort; | |
12499 | + } | |
12500 | + toiActiveAllocator->rw_header_chunk(WRITE, NULL, | |
12501 | + header_buffer, sizeof(struct toi_header)); | |
12502 | + | |
12503 | + toi_free_page(24, (unsigned long) header_buffer); | |
12504 | + | |
12505 | + /* Write module configurations */ | |
12506 | + ret = write_module_configs(); | |
12507 | + if (ret) { | |
12508 | + abort_hibernate(TOI_FAILED_IO, | |
12509 | + "Failed to write module configs."); | |
12510 | + goto write_image_header_abort; | |
12511 | + } | |
12512 | + | |
12513 | + memory_bm_write(pageset1_map, toiActiveAllocator->rw_header_chunk); | |
12514 | + | |
12515 | + /* Flush data and let allocator cleanup */ | |
12516 | + if (toiActiveAllocator->write_header_cleanup()) { | |
12517 | + abort_hibernate(TOI_FAILED_IO, | |
12518 | + "Failed to cleanup writing header."); | |
12519 | + goto write_image_header_abort_no_cleanup; | |
12520 | + } | |
12521 | + | |
12522 | + if (test_result_state(TOI_ABORTED)) | |
12523 | + goto write_image_header_abort_no_cleanup; | |
12524 | + | |
12525 | + toi_update_status(total, total, NULL); | |
12526 | + | |
12527 | + return 0; | |
12528 | + | |
12529 | +write_image_header_abort: | |
12530 | + toiActiveAllocator->write_header_cleanup(); | |
12531 | +write_image_header_abort_no_cleanup: | |
12532 | + return -1; | |
12533 | +} | |
12534 | + | |
12535 | +/** | |
12536 | + * sanity_check - check the header | |
12537 | + * @sh: the header which was saved at hibernate time. | |
12538 | + * | |
12539 | + * Perform a few checks, seeking to ensure that the kernel being | |
12540 | + * booted matches the one hibernated. They need to match so we can | |
12541 | + * be _sure_ things will work. It is not absolutely impossible for | |
12542 | + * resuming from a different kernel to work, just not assured. | |
12543 | + **/ | |
12544 | +static char *sanity_check(struct toi_header *sh) | |
12545 | +{ | |
e999739a | 12546 | + char *reason = check_image_kernel((struct swsusp_info *) sh); |
2380c486 JR |
12547 | + |
12548 | + if (reason) | |
12549 | + return reason; | |
12550 | + | |
12551 | + if (!test_action_state(TOI_IGNORE_ROOTFS)) { | |
12552 | + const struct super_block *sb; | |
12553 | + list_for_each_entry(sb, &super_blocks, s_list) { | |
12554 | + if ((!(sb->s_flags & MS_RDONLY)) && | |
12555 | + (sb->s_type->fs_flags & FS_REQUIRES_DEV)) | |
12556 | + return "Device backed fs has been mounted " | |
12557 | + "rw prior to resume or initrd/ramfs " | |
12558 | + "is mounted rw."; | |
12559 | + } | |
12560 | + } | |
12561 | + | |
12562 | + return NULL; | |
12563 | +} | |
12564 | + | |
12565 | +static DECLARE_WAIT_QUEUE_HEAD(freeze_wait); | |
12566 | + | |
12567 | +#define FREEZE_IN_PROGRESS (~0) | |
12568 | + | |
12569 | +static int freeze_result; | |
12570 | + | |
12571 | +static void do_freeze(struct work_struct *dummy) | |
12572 | +{ | |
12573 | + freeze_result = freeze_processes(); | |
12574 | + wake_up(&freeze_wait); | |
12575 | +} | |
12576 | + | |
12577 | +static DECLARE_WORK(freeze_work, do_freeze); | |
12578 | + | |
12579 | +/** | |
12580 | + * __read_pageset1 - test for the existence of an image and attempt to load it | |
12581 | + * | |
12582 | + * Returns: Int | |
12583 | + * Zero if image found and pageset1 successfully loaded. | |
12584 | + * Error if no image found or loaded. | |
12585 | + **/ | |
12586 | +static int __read_pageset1(void) | |
12587 | +{ | |
12588 | + int i, result = 0; | |
12589 | + char *header_buffer = (char *) toi_get_zeroed_page(25, TOI_ATOMIC_GFP), | |
12590 | + *sanity_error = NULL; | |
12591 | + struct toi_header *toi_header; | |
12592 | + | |
12593 | + if (!header_buffer) { | |
12594 | + printk(KERN_INFO "Unable to allocate a page for reading the " | |
12595 | + "signature.\n"); | |
12596 | + return -ENOMEM; | |
12597 | + } | |
12598 | + | |
12599 | + /* Check for an image */ | |
12600 | + result = toiActiveAllocator->image_exists(1); | |
12601 | + if (!result) { | |
12602 | + result = -ENODATA; | |
12603 | + noresume_reset_modules(); | |
12604 | + printk(KERN_INFO "TuxOnIce: No image found.\n"); | |
12605 | + goto out; | |
12606 | + } | |
12607 | + | |
12608 | + /* | |
12609 | + * Prepare the active allocator for reading the image header. The | |
12610 | + * activate allocator might read its own configuration. | |
12611 | + * | |
12612 | + * NB: This call may never return because there might be a signature | |
12613 | + * for a different image such that we warn the user and they choose | |
12614 | + * to reboot. (If the device ids look erroneous (2.4 vs 2.6) or the | |
12615 | + * location of the image might be unavailable if it was stored on a | |
12616 | + * network connection). | |
12617 | + */ | |
12618 | + | |
12619 | + result = toiActiveAllocator->read_header_init(); | |
12620 | + if (result) { | |
12621 | + printk(KERN_INFO "TuxOnIce: Failed to initialise, reading the " | |
12622 | + "image header.\n"); | |
12623 | + goto out_remove_image; | |
12624 | + } | |
12625 | + | |
12626 | + /* Check for noresume command line option */ | |
12627 | + if (test_toi_state(TOI_NORESUME_SPECIFIED)) { | |
12628 | + printk(KERN_INFO "TuxOnIce: Noresume on command line. Removed " | |
12629 | + "image.\n"); | |
12630 | + goto out_remove_image; | |
12631 | + } | |
12632 | + | |
12633 | + /* Check whether we've resumed before */ | |
12634 | + if (test_toi_state(TOI_RESUMED_BEFORE)) { | |
12635 | + toi_early_boot_message(1, 0, NULL); | |
12636 | + if (!(test_toi_state(TOI_CONTINUE_REQ))) { | |
12637 | + printk(KERN_INFO "TuxOnIce: Tried to resume before: " | |
12638 | + "Invalidated image.\n"); | |
12639 | + goto out_remove_image; | |
12640 | + } | |
12641 | + } | |
12642 | + | |
12643 | + clear_toi_state(TOI_CONTINUE_REQ); | |
12644 | + | |
12645 | + /* Read hibernate header */ | |
12646 | + result = toiActiveAllocator->rw_header_chunk(READ, NULL, | |
12647 | + header_buffer, sizeof(struct toi_header)); | |
12648 | + if (result < 0) { | |
e999739a | 12649 | + printk(KERN_ERR "TuxOnIce: Failed to read the image " |
12650 | + "signature.\n"); | |
2380c486 JR |
12651 | + goto out_remove_image; |
12652 | + } | |
12653 | + | |
12654 | + toi_header = (struct toi_header *) header_buffer; | |
12655 | + | |
12656 | + /* | |
12657 | + * NB: This call may also result in a reboot rather than returning. | |
12658 | + */ | |
12659 | + | |
12660 | + sanity_error = sanity_check(toi_header); | |
12661 | + if (sanity_error) { | |
12662 | + toi_early_boot_message(1, TOI_CONTINUE_REQ, | |
12663 | + sanity_error); | |
12664 | + printk(KERN_INFO "TuxOnIce: Sanity check failed.\n"); | |
12665 | + goto out_remove_image; | |
12666 | + } | |
12667 | + | |
12668 | + /* | |
12669 | + * We have an image and it looks like it will load okay. | |
12670 | + * | |
12671 | + * Get metadata from header. Don't override commandline parameters. | |
12672 | + * | |
12673 | + * We don't need to save the image size limit because it's not used | |
12674 | + * during resume and will be restored with the image anyway. | |
12675 | + */ | |
12676 | + | |
12677 | + memcpy((char *) &pagedir1, | |
12678 | + (char *) &toi_header->pagedir, sizeof(pagedir1)); | |
12679 | + toi_result = toi_header->param0; | |
12680 | + toi_bkd.toi_action = toi_header->param1; | |
12681 | + toi_bkd.toi_debug_state = toi_header->param2; | |
12682 | + toi_bkd.toi_default_console_level = toi_header->param3; | |
12683 | + clear_toi_state(TOI_IGNORE_LOGLEVEL); | |
12684 | + pagedir2.size = toi_header->pageset_2_size; | |
12685 | + for (i = 0; i < 4; i++) | |
12686 | + toi_bkd.toi_io_time[i/2][i%2] = | |
12687 | + toi_header->io_time[i/2][i%2]; | |
12688 | + | |
12689 | + set_toi_state(TOI_BOOT_KERNEL); | |
12690 | + boot_kernel_data_buffer = toi_header->bkd; | |
12691 | + | |
12692 | + /* Read module configurations */ | |
12693 | + result = read_module_configs(); | |
12694 | + if (result) { | |
12695 | + pagedir1.size = 0; | |
12696 | + pagedir2.size = 0; | |
12697 | + printk(KERN_INFO "TuxOnIce: Failed to read TuxOnIce module " | |
12698 | + "configurations.\n"); | |
12699 | + clear_action_state(TOI_KEEP_IMAGE); | |
12700 | + goto out_remove_image; | |
12701 | + } | |
12702 | + | |
12703 | + toi_prepare_console(); | |
12704 | + | |
12705 | + set_toi_state(TOI_NOW_RESUMING); | |
12706 | + | |
12707 | + if (!test_action_state(TOI_LATE_CPU_HOTPLUG)) { | |
12708 | + toi_prepare_status(DONT_CLEAR_BAR, "Disable nonboot cpus."); | |
12709 | + if (disable_nonboot_cpus()) { | |
12710 | + set_abort_result(TOI_CPU_HOTPLUG_FAILED); | |
12711 | + goto out_reset_console; | |
12712 | + } | |
12713 | + } | |
12714 | + | |
12715 | + if (usermodehelper_disable()) | |
12716 | + goto out_enable_nonboot_cpus; | |
12717 | + | |
12718 | + current->flags |= PF_NOFREEZE; | |
12719 | + freeze_result = FREEZE_IN_PROGRESS; | |
12720 | + | |
12721 | + schedule_work_on(first_cpu(cpu_online_map), &freeze_work); | |
12722 | + | |
12723 | + toi_cond_pause(1, "About to read original pageset1 locations."); | |
12724 | + | |
12725 | + /* | |
12726 | + * See _toi_rw_header_chunk in tuxonice_block_io.c: | |
12727 | + * Initialize pageset1_map by reading the map from the image. | |
12728 | + */ | |
12729 | + if (memory_bm_read(pageset1_map, toiActiveAllocator->rw_header_chunk)) | |
12730 | + goto out_thaw; | |
12731 | + | |
12732 | + /* | |
12733 | + * See toi_rw_cleanup in tuxonice_block_io.c: | |
12734 | + * Clean up after reading the header. | |
12735 | + */ | |
12736 | + result = toiActiveAllocator->read_header_cleanup(); | |
12737 | + if (result) { | |
12738 | + printk(KERN_ERR "TuxOnIce: Failed to cleanup after reading the " | |
12739 | + "image header.\n"); | |
12740 | + goto out_thaw; | |
12741 | + } | |
12742 | + | |
12743 | + toi_cond_pause(1, "About to read pagedir."); | |
12744 | + | |
12745 | + /* | |
12746 | + * Get the addresses of pages into which we will load the kernel to | |
12747 | + * be copied back and check if they conflict with the ones we are using. | |
12748 | + */ | |
12749 | + if (toi_get_pageset1_load_addresses()) { | |
12750 | + printk(KERN_INFO "TuxOnIce: Failed to get load addresses for " | |
12751 | + "pageset1.\n"); | |
12752 | + goto out_thaw; | |
12753 | + } | |
12754 | + | |
12755 | + /* Read the original kernel back */ | |
12756 | + toi_cond_pause(1, "About to read pageset 1."); | |
12757 | + | |
12758 | + /* Given the pagemap, read back the data from disk */ | |
12759 | + if (read_pageset(&pagedir1, 0)) { | |
12760 | + toi_prepare_status(DONT_CLEAR_BAR, "Failed to read pageset 1."); | |
12761 | + result = -EIO; | |
12762 | + goto out_thaw; | |
12763 | + } | |
12764 | + | |
12765 | + toi_cond_pause(1, "About to restore original kernel."); | |
12766 | + result = 0; | |
12767 | + | |
12768 | + if (!test_action_state(TOI_KEEP_IMAGE) && | |
12769 | + toiActiveAllocator->mark_resume_attempted) | |
12770 | + toiActiveAllocator->mark_resume_attempted(1); | |
12771 | + | |
12772 | + wait_event(freeze_wait, freeze_result != FREEZE_IN_PROGRESS); | |
12773 | +out: | |
12774 | + current->flags &= ~PF_NOFREEZE; | |
12775 | + toi_free_page(25, (unsigned long) header_buffer); | |
12776 | + return result; | |
12777 | + | |
12778 | +out_thaw: | |
12779 | + wait_event(freeze_wait, freeze_result != FREEZE_IN_PROGRESS); | |
12780 | + thaw_processes(); | |
12781 | + usermodehelper_enable(); | |
12782 | +out_enable_nonboot_cpus: | |
12783 | + enable_nonboot_cpus(); | |
12784 | +out_reset_console: | |
12785 | + toi_cleanup_console(); | |
12786 | +out_remove_image: | |
12787 | + result = -EINVAL; | |
12788 | + if (!test_action_state(TOI_KEEP_IMAGE)) | |
12789 | + toiActiveAllocator->remove_image(); | |
12790 | + toiActiveAllocator->read_header_cleanup(); | |
12791 | + noresume_reset_modules(); | |
12792 | + goto out; | |
12793 | +} | |
12794 | + | |
12795 | +/** | |
12796 | + * read_pageset1 - highlevel function to read the saved pages | |
12797 | + * | |
12798 | + * Attempt to read the header and pageset1 of a hibernate image. | |
12799 | + * Handle the outcome, complaining where appropriate. | |
12800 | + **/ | |
12801 | +int read_pageset1(void) | |
12802 | +{ | |
12803 | + int error; | |
12804 | + | |
12805 | + error = __read_pageset1(); | |
12806 | + | |
12807 | + if (error && error != -ENODATA && error != -EINVAL && | |
12808 | + !test_result_state(TOI_ABORTED)) | |
12809 | + abort_hibernate(TOI_IMAGE_ERROR, | |
12810 | + "TuxOnIce: Error %d resuming\n", error); | |
12811 | + | |
12812 | + return error; | |
12813 | +} | |
12814 | + | |
12815 | +/** | |
12816 | + * get_have_image_data - check the image header | |
12817 | + **/ | |
12818 | +static char *get_have_image_data(void) | |
12819 | +{ | |
12820 | + char *output_buffer = (char *) toi_get_zeroed_page(26, TOI_ATOMIC_GFP); | |
12821 | + struct toi_header *toi_header; | |
12822 | + | |
12823 | + if (!output_buffer) { | |
12824 | + printk(KERN_INFO "Output buffer null.\n"); | |
12825 | + return NULL; | |
12826 | + } | |
12827 | + | |
12828 | + /* Check for an image */ | |
12829 | + if (!toiActiveAllocator->image_exists(1) || | |
12830 | + toiActiveAllocator->read_header_init() || | |
12831 | + toiActiveAllocator->rw_header_chunk(READ, NULL, | |
12832 | + output_buffer, sizeof(struct toi_header))) { | |
12833 | + sprintf(output_buffer, "0\n"); | |
12834 | + /* | |
12835 | + * From an initrd/ramfs, catting have_image and | |
12836 | + * getting a result of 0 is sufficient. | |
12837 | + */ | |
12838 | + clear_toi_state(TOI_BOOT_TIME); | |
12839 | + goto out; | |
12840 | + } | |
12841 | + | |
12842 | + toi_header = (struct toi_header *) output_buffer; | |
12843 | + | |
12844 | + sprintf(output_buffer, "1\n%s\n%s\n", | |
12845 | + toi_header->uts.machine, | |
12846 | + toi_header->uts.version); | |
12847 | + | |
12848 | + /* Check whether we've resumed before */ | |
12849 | + if (test_toi_state(TOI_RESUMED_BEFORE)) | |
12850 | + strcat(output_buffer, "Resumed before.\n"); | |
12851 | + | |
12852 | +out: | |
12853 | + noresume_reset_modules(); | |
12854 | + return output_buffer; | |
12855 | +} | |
12856 | + | |
12857 | +/** | |
12858 | + * read_pageset2 - read second part of the image | |
12859 | + * @overwrittenpagesonly: Read only pages which would have been | |
12860 | + * verwritten by pageset1? | |
12861 | + * | |
12862 | + * Read in part or all of pageset2 of an image, depending upon | |
12863 | + * whether we are hibernating and have only overwritten a portion | |
12864 | + * with pageset1 pages, or are resuming and need to read them | |
12865 | + * all. | |
12866 | + * | |
12867 | + * Returns: Int | |
12868 | + * Zero if no error, otherwise the error value. | |
12869 | + **/ | |
12870 | +int read_pageset2(int overwrittenpagesonly) | |
12871 | +{ | |
12872 | + int result = 0; | |
12873 | + | |
12874 | + if (!pagedir2.size) | |
12875 | + return 0; | |
12876 | + | |
12877 | + result = read_pageset(&pagedir2, overwrittenpagesonly); | |
12878 | + | |
12879 | + toi_cond_pause(1, "Pagedir 2 read."); | |
12880 | + | |
12881 | + return result; | |
12882 | +} | |
12883 | + | |
12884 | +/** | |
12885 | + * image_exists_read - has an image been found? | |
12886 | + * @page: Output buffer | |
12887 | + * | |
12888 | + * Store 0 or 1 in page, depending on whether an image is found. | |
12889 | + * Incoming buffer is PAGE_SIZE and result is guaranteed | |
12890 | + * to be far less than that, so we don't worry about | |
12891 | + * overflow. | |
12892 | + **/ | |
12893 | +int image_exists_read(const char *page, int count) | |
12894 | +{ | |
12895 | + int len = 0; | |
12896 | + char *result; | |
12897 | + | |
12898 | + if (toi_activate_storage(0)) | |
12899 | + return count; | |
12900 | + | |
12901 | + if (!test_toi_state(TOI_RESUME_DEVICE_OK)) | |
12902 | + toi_attempt_to_parse_resume_device(0); | |
12903 | + | |
12904 | + if (!toiActiveAllocator) { | |
12905 | + len = sprintf((char *) page, "-1\n"); | |
12906 | + } else { | |
12907 | + result = get_have_image_data(); | |
12908 | + if (result) { | |
12909 | + len = sprintf((char *) page, "%s", result); | |
12910 | + toi_free_page(26, (unsigned long) result); | |
12911 | + } | |
12912 | + } | |
12913 | + | |
12914 | + toi_deactivate_storage(0); | |
12915 | + | |
12916 | + return len; | |
12917 | +} | |
12918 | + | |
12919 | +/** | |
12920 | + * image_exists_write - invalidate an image if one exists | |
12921 | + **/ | |
12922 | +int image_exists_write(const char *buffer, int count) | |
12923 | +{ | |
12924 | + if (toi_activate_storage(0)) | |
12925 | + return count; | |
12926 | + | |
12927 | + if (toiActiveAllocator && toiActiveAllocator->image_exists(1)) | |
12928 | + toiActiveAllocator->remove_image(); | |
12929 | + | |
12930 | + toi_deactivate_storage(0); | |
12931 | + | |
12932 | + clear_result_state(TOI_KEPT_IMAGE); | |
12933 | + | |
12934 | + return count; | |
12935 | +} | |
12936 | diff --git a/kernel/power/tuxonice_io.h b/kernel/power/tuxonice_io.h | |
12937 | new file mode 100644 | |
9474138d | 12938 | index 0000000..01b3db6 |
2380c486 JR |
12939 | --- /dev/null |
12940 | +++ b/kernel/power/tuxonice_io.h | |
9474138d | 12941 | @@ -0,0 +1,70 @@ |
2380c486 JR |
12942 | +/* |
12943 | + * kernel/power/tuxonice_io.h | |
12944 | + * | |
12945 | + * Copyright (C) 2005-2008 Nigel Cunningham (nigel at tuxonice net) | |
12946 | + * | |
12947 | + * This file is released under the GPLv2. | |
12948 | + * | |
12949 | + * It contains high level IO routines for hibernating. | |
12950 | + * | |
12951 | + */ | |
12952 | + | |
12953 | +#include <linux/utsname.h> | |
12954 | +#include "tuxonice_pagedir.h" | |
2380c486 JR |
12955 | + |
12956 | +/* Non-module data saved in our image header */ | |
12957 | +struct toi_header { | |
12958 | + /* | |
12959 | + * Mirror struct swsusp_info, but without | |
12960 | + * the page aligned attribute | |
12961 | + */ | |
12962 | + struct new_utsname uts; | |
12963 | + u32 version_code; | |
12964 | + unsigned long num_physpages; | |
12965 | + int cpus; | |
12966 | + unsigned long image_pages; | |
12967 | + unsigned long pages; | |
12968 | + unsigned long size; | |
12969 | + | |
12970 | + /* Our own data */ | |
12971 | + unsigned long orig_mem_free; | |
12972 | + int page_size; | |
12973 | + int pageset_2_size; | |
12974 | + int param0; | |
12975 | + int param1; | |
12976 | + int param2; | |
12977 | + int param3; | |
12978 | + int progress0; | |
12979 | + int progress1; | |
12980 | + int progress2; | |
12981 | + int progress3; | |
12982 | + int io_time[2][2]; | |
12983 | + struct pagedir pagedir; | |
12984 | + dev_t root_fs; | |
12985 | + unsigned long bkd; /* Boot kernel data locn */ | |
12986 | +}; | |
12987 | + | |
12988 | +extern int write_pageset(struct pagedir *pagedir); | |
12989 | +extern int write_image_header(void); | |
12990 | +extern int read_pageset1(void); | |
12991 | +extern int read_pageset2(int overwrittenpagesonly); | |
12992 | + | |
12993 | +extern int toi_attempt_to_parse_resume_device(int quiet); | |
12994 | +extern void attempt_to_parse_resume_device2(void); | |
12995 | +extern void attempt_to_parse_alt_resume_param(void); | |
12996 | +int image_exists_read(const char *page, int count); | |
12997 | +int image_exists_write(const char *buffer, int count); | |
12998 | +extern void save_restore_alt_param(int replace, int quiet); | |
12999 | +extern atomic_t toi_io_workers; | |
13000 | + | |
13001 | +/* Args to save_restore_alt_param */ | |
13002 | +#define RESTORE 0 | |
13003 | +#define SAVE 1 | |
13004 | + | |
13005 | +#define NOQUIET 0 | |
13006 | +#define QUIET 1 | |
13007 | + | |
13008 | +extern dev_t name_to_dev_t(char *line); | |
13009 | + | |
13010 | +extern wait_queue_head_t toi_io_queue_flusher; | |
13011 | +extern int toi_bio_queue_flusher_should_finish; | |
13012 | diff --git a/kernel/power/tuxonice_modules.c b/kernel/power/tuxonice_modules.c | |
13013 | new file mode 100644 | |
9474138d | 13014 | index 0000000..c650f5c |
2380c486 JR |
13015 | --- /dev/null |
13016 | +++ b/kernel/power/tuxonice_modules.c | |
0ada99ac | 13017 | @@ -0,0 +1,494 @@ |
2380c486 JR |
13018 | +/* |
13019 | + * kernel/power/tuxonice_modules.c | |
13020 | + * | |
13021 | + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net) | |
13022 | + * | |
13023 | + */ | |
13024 | + | |
13025 | +#include <linux/suspend.h> | |
2380c486 JR |
13026 | +#include "tuxonice.h" |
13027 | +#include "tuxonice_modules.h" | |
13028 | +#include "tuxonice_sysfs.h" | |
13029 | +#include "tuxonice_ui.h" | |
13030 | + | |
13031 | +LIST_HEAD(toi_filters); | |
13032 | +LIST_HEAD(toiAllocators); | |
13033 | +LIST_HEAD(toi_modules); | |
13034 | + | |
13035 | +struct toi_module_ops *toiActiveAllocator; | |
13036 | +EXPORT_SYMBOL_GPL(toiActiveAllocator); | |
13037 | + | |
13038 | +static int toi_num_filters; | |
13039 | +int toiNumAllocators, toi_num_modules; | |
13040 | + | |
13041 | +/* | |
13042 | + * toi_header_storage_for_modules | |
13043 | + * | |
13044 | + * Returns the amount of space needed to store configuration | |
13045 | + * data needed by the modules prior to copying back the original | |
13046 | + * kernel. We can exclude data for pageset2 because it will be | |
13047 | + * available anyway once the kernel is copied back. | |
13048 | + */ | |
13049 | +long toi_header_storage_for_modules(void) | |
13050 | +{ | |
13051 | + struct toi_module_ops *this_module; | |
13052 | + int bytes = 0; | |
13053 | + | |
13054 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
13055 | + if (!this_module->enabled || | |
13056 | + (this_module->type == WRITER_MODULE && | |
13057 | + toiActiveAllocator != this_module)) | |
13058 | + continue; | |
13059 | + if (this_module->storage_needed) { | |
13060 | + int this = this_module->storage_needed() + | |
13061 | + sizeof(struct toi_module_header) + | |
13062 | + sizeof(int); | |
13063 | + this_module->header_requested = this; | |
13064 | + bytes += this; | |
13065 | + } | |
13066 | + } | |
13067 | + | |
13068 | + /* One more for the empty terminator */ | |
13069 | + return bytes + sizeof(struct toi_module_header); | |
13070 | +} | |
13071 | + | |
0ada99ac | 13072 | +void print_toi_header_storage_for_modules(void) |
13073 | +{ | |
13074 | + struct toi_module_ops *this_module; | |
13075 | + int bytes = 0; | |
13076 | + | |
13077 | + printk(KERN_DEBUG "Header storage:\n"); | |
13078 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
13079 | + if (!this_module->enabled || | |
13080 | + (this_module->type == WRITER_MODULE && | |
13081 | + toiActiveAllocator != this_module)) | |
13082 | + continue; | |
13083 | + if (this_module->storage_needed) { | |
13084 | + int this = this_module->storage_needed() + | |
13085 | + sizeof(struct toi_module_header) + | |
13086 | + sizeof(int); | |
13087 | + this_module->header_requested = this; | |
13088 | + bytes += this; | |
13089 | + printk(KERN_DEBUG "+ %16s : %-4d/%d.\n", | |
13090 | + this_module->name, | |
13091 | + this_module->header_used, this); | |
13092 | + } | |
13093 | + } | |
13094 | + | |
13095 | + printk(KERN_DEBUG "+ empty terminator : %ld.\n", | |
13096 | + sizeof(struct toi_module_header)); | |
13097 | + printk(KERN_DEBUG " ====\n"); | |
13098 | + printk(KERN_DEBUG " %ld\n", | |
13099 | + bytes + sizeof(struct toi_module_header)); | |
13100 | +} | |
9474138d | 13101 | +EXPORT_SYMBOL_GPL(print_toi_header_storage_for_modules); |
0ada99ac | 13102 | + |
2380c486 JR |
13103 | +/* |
13104 | + * toi_memory_for_modules | |
13105 | + * | |
13106 | + * Returns the amount of memory requested by modules for | |
13107 | + * doing their work during the cycle. | |
13108 | + */ | |
13109 | + | |
13110 | +long toi_memory_for_modules(int print_parts) | |
13111 | +{ | |
13112 | + long bytes = 0, result; | |
13113 | + struct toi_module_ops *this_module; | |
13114 | + | |
13115 | + if (print_parts) | |
13116 | + printk(KERN_INFO "Memory for modules:\n===================\n"); | |
13117 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
13118 | + int this; | |
13119 | + if (!this_module->enabled) | |
13120 | + continue; | |
13121 | + if (this_module->memory_needed) { | |
13122 | + this = this_module->memory_needed(); | |
13123 | + if (print_parts) | |
13124 | + printk(KERN_INFO "%10d bytes (%5ld pages) for " | |
13125 | + "module '%s'.\n", this, | |
13126 | + DIV_ROUND_UP(this, PAGE_SIZE), | |
13127 | + this_module->name); | |
13128 | + bytes += this; | |
13129 | + } | |
13130 | + } | |
13131 | + | |
13132 | + result = DIV_ROUND_UP(bytes, PAGE_SIZE); | |
13133 | + if (print_parts) | |
13134 | + printk(KERN_INFO " => %ld bytes, %ld pages.\n", bytes, result); | |
13135 | + | |
13136 | + return result; | |
13137 | +} | |
13138 | + | |
13139 | +/* | |
13140 | + * toi_expected_compression_ratio | |
13141 | + * | |
13142 | + * Returns the compression ratio expected when saving the image. | |
13143 | + */ | |
13144 | + | |
13145 | +int toi_expected_compression_ratio(void) | |
13146 | +{ | |
13147 | + int ratio = 100; | |
13148 | + struct toi_module_ops *this_module; | |
13149 | + | |
13150 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
13151 | + if (!this_module->enabled) | |
13152 | + continue; | |
13153 | + if (this_module->expected_compression) | |
13154 | + ratio = ratio * this_module->expected_compression() | |
13155 | + / 100; | |
13156 | + } | |
13157 | + | |
13158 | + return ratio; | |
13159 | +} | |
13160 | + | |
13161 | +/* toi_find_module_given_dir | |
13162 | + * Functionality : Return a module (if found), given a pointer | |
13163 | + * to its directory name | |
13164 | + */ | |
13165 | + | |
13166 | +static struct toi_module_ops *toi_find_module_given_dir(char *name) | |
13167 | +{ | |
13168 | + struct toi_module_ops *this_module, *found_module = NULL; | |
13169 | + | |
13170 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
13171 | + if (!strcmp(name, this_module->directory)) { | |
13172 | + found_module = this_module; | |
13173 | + break; | |
13174 | + } | |
13175 | + } | |
13176 | + | |
13177 | + return found_module; | |
13178 | +} | |
13179 | + | |
13180 | +/* toi_find_module_given_name | |
13181 | + * Functionality : Return a module (if found), given a pointer | |
13182 | + * to its name | |
13183 | + */ | |
13184 | + | |
13185 | +struct toi_module_ops *toi_find_module_given_name(char *name) | |
13186 | +{ | |
13187 | + struct toi_module_ops *this_module, *found_module = NULL; | |
13188 | + | |
13189 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
13190 | + if (!strcmp(name, this_module->name)) { | |
13191 | + found_module = this_module; | |
13192 | + break; | |
13193 | + } | |
13194 | + } | |
13195 | + | |
13196 | + return found_module; | |
13197 | +} | |
13198 | + | |
13199 | +/* | |
13200 | + * toi_print_module_debug_info | |
13201 | + * Functionality : Get debugging info from modules into a buffer. | |
13202 | + */ | |
13203 | +int toi_print_module_debug_info(char *buffer, int buffer_size) | |
13204 | +{ | |
13205 | + struct toi_module_ops *this_module; | |
13206 | + int len = 0; | |
13207 | + | |
13208 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
13209 | + if (!this_module->enabled) | |
13210 | + continue; | |
13211 | + if (this_module->print_debug_info) { | |
13212 | + int result; | |
13213 | + result = this_module->print_debug_info(buffer + len, | |
13214 | + buffer_size - len); | |
13215 | + len += result; | |
13216 | + } | |
13217 | + } | |
13218 | + | |
13219 | + /* Ensure null terminated */ | |
13220 | + buffer[buffer_size] = 0; | |
13221 | + | |
13222 | + return len; | |
13223 | +} | |
13224 | + | |
13225 | +/* | |
13226 | + * toi_register_module | |
13227 | + * | |
13228 | + * Register a module. | |
13229 | + */ | |
13230 | +int toi_register_module(struct toi_module_ops *module) | |
13231 | +{ | |
13232 | + int i; | |
13233 | + struct kobject *kobj; | |
13234 | + | |
13235 | + module->enabled = 1; | |
13236 | + | |
13237 | + if (toi_find_module_given_name(module->name)) { | |
13238 | + printk(KERN_INFO "TuxOnIce: Trying to load module %s," | |
13239 | + " which is already registered.\n", | |
13240 | + module->name); | |
13241 | + return -EBUSY; | |
13242 | + } | |
13243 | + | |
13244 | + switch (module->type) { | |
13245 | + case FILTER_MODULE: | |
13246 | + list_add_tail(&module->type_list, &toi_filters); | |
13247 | + toi_num_filters++; | |
13248 | + break; | |
13249 | + case WRITER_MODULE: | |
13250 | + list_add_tail(&module->type_list, &toiAllocators); | |
13251 | + toiNumAllocators++; | |
13252 | + break; | |
13253 | + case MISC_MODULE: | |
13254 | + case MISC_HIDDEN_MODULE: | |
13255 | + break; | |
13256 | + default: | |
e999739a | 13257 | + printk(KERN_ERR "Hmmm. Module '%s' has an invalid type." |
2380c486 JR |
13258 | + " It has been ignored.\n", module->name); |
13259 | + return -EINVAL; | |
13260 | + } | |
13261 | + list_add_tail(&module->module_list, &toi_modules); | |
13262 | + toi_num_modules++; | |
13263 | + | |
13264 | + if ((!module->directory && !module->shared_directory) || | |
13265 | + !module->sysfs_data || !module->num_sysfs_entries) | |
13266 | + return 0; | |
13267 | + | |
13268 | + /* | |
13269 | + * Modules may share a directory, but those with shared_dir | |
13270 | + * set must be loaded (via symbol dependencies) after parents | |
13271 | + * and unloaded beforehand. | |
13272 | + */ | |
13273 | + if (module->shared_directory) { | |
13274 | + struct toi_module_ops *shared = | |
13275 | + toi_find_module_given_dir(module->shared_directory); | |
13276 | + if (!shared) { | |
e999739a | 13277 | + printk(KERN_ERR "TuxOnIce: Module %s wants to share " |
13278 | + "%s's directory but %s isn't loaded.\n", | |
2380c486 JR |
13279 | + module->name, module->shared_directory, |
13280 | + module->shared_directory); | |
13281 | + toi_unregister_module(module); | |
13282 | + return -ENODEV; | |
13283 | + } | |
13284 | + kobj = shared->dir_kobj; | |
13285 | + } else { | |
13286 | + if (!strncmp(module->directory, "[ROOT]", 6)) | |
13287 | + kobj = tuxonice_kobj; | |
13288 | + else | |
13289 | + kobj = make_toi_sysdir(module->directory); | |
13290 | + } | |
13291 | + module->dir_kobj = kobj; | |
13292 | + for (i = 0; i < module->num_sysfs_entries; i++) { | |
13293 | + int result = toi_register_sysfs_file(kobj, | |
13294 | + &module->sysfs_data[i]); | |
13295 | + if (result) | |
13296 | + return result; | |
13297 | + } | |
13298 | + return 0; | |
13299 | +} | |
13300 | +EXPORT_SYMBOL_GPL(toi_register_module); | |
13301 | + | |
13302 | +/* | |
13303 | + * toi_unregister_module | |
13304 | + * | |
13305 | + * Remove a module. | |
13306 | + */ | |
13307 | +void toi_unregister_module(struct toi_module_ops *module) | |
13308 | +{ | |
13309 | + int i; | |
13310 | + | |
13311 | + if (module->dir_kobj) | |
13312 | + for (i = 0; i < module->num_sysfs_entries; i++) | |
13313 | + toi_unregister_sysfs_file(module->dir_kobj, | |
13314 | + &module->sysfs_data[i]); | |
13315 | + | |
13316 | + if (!module->shared_directory && module->directory && | |
13317 | + strncmp(module->directory, "[ROOT]", 6)) | |
13318 | + remove_toi_sysdir(module->dir_kobj); | |
13319 | + | |
13320 | + switch (module->type) { | |
13321 | + case FILTER_MODULE: | |
13322 | + list_del(&module->type_list); | |
13323 | + toi_num_filters--; | |
13324 | + break; | |
13325 | + case WRITER_MODULE: | |
13326 | + list_del(&module->type_list); | |
13327 | + toiNumAllocators--; | |
13328 | + if (toiActiveAllocator == module) { | |
13329 | + toiActiveAllocator = NULL; | |
13330 | + clear_toi_state(TOI_CAN_RESUME); | |
13331 | + clear_toi_state(TOI_CAN_HIBERNATE); | |
13332 | + } | |
13333 | + break; | |
13334 | + case MISC_MODULE: | |
13335 | + case MISC_HIDDEN_MODULE: | |
13336 | + break; | |
13337 | + default: | |
e999739a | 13338 | + printk(KERN_ERR "Module '%s' has an invalid type." |
2380c486 JR |
13339 | + " It has been ignored.\n", module->name); |
13340 | + return; | |
13341 | + } | |
13342 | + list_del(&module->module_list); | |
13343 | + toi_num_modules--; | |
13344 | +} | |
13345 | +EXPORT_SYMBOL_GPL(toi_unregister_module); | |
13346 | + | |
13347 | +/* | |
13348 | + * toi_move_module_tail | |
13349 | + * | |
13350 | + * Rearrange modules when reloading the config. | |
13351 | + */ | |
13352 | +void toi_move_module_tail(struct toi_module_ops *module) | |
13353 | +{ | |
13354 | + switch (module->type) { | |
13355 | + case FILTER_MODULE: | |
13356 | + if (toi_num_filters > 1) | |
13357 | + list_move_tail(&module->type_list, &toi_filters); | |
13358 | + break; | |
13359 | + case WRITER_MODULE: | |
13360 | + if (toiNumAllocators > 1) | |
13361 | + list_move_tail(&module->type_list, &toiAllocators); | |
13362 | + break; | |
13363 | + case MISC_MODULE: | |
13364 | + case MISC_HIDDEN_MODULE: | |
13365 | + break; | |
13366 | + default: | |
e999739a | 13367 | + printk(KERN_ERR "Module '%s' has an invalid type." |
2380c486 JR |
13368 | + " It has been ignored.\n", module->name); |
13369 | + return; | |
13370 | + } | |
13371 | + if ((toi_num_filters + toiNumAllocators) > 1) | |
13372 | + list_move_tail(&module->module_list, &toi_modules); | |
13373 | +} | |
13374 | + | |
13375 | +/* | |
13376 | + * toi_initialise_modules | |
13377 | + * | |
13378 | + * Get ready to do some work! | |
13379 | + */ | |
13380 | +int toi_initialise_modules(int starting_cycle, int early) | |
13381 | +{ | |
13382 | + struct toi_module_ops *this_module; | |
13383 | + int result; | |
13384 | + | |
13385 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
13386 | + this_module->header_requested = 0; | |
13387 | + this_module->header_used = 0; | |
13388 | + if (!this_module->enabled) | |
13389 | + continue; | |
13390 | + if (this_module->early != early) | |
13391 | + continue; | |
13392 | + if (this_module->initialise) { | |
13393 | + toi_message(TOI_MEMORY, TOI_MEDIUM, 1, | |
13394 | + "Initialising module %s.\n", | |
13395 | + this_module->name); | |
13396 | + result = this_module->initialise(starting_cycle); | |
13397 | + if (result) { | |
13398 | + toi_cleanup_modules(starting_cycle); | |
13399 | + return result; | |
13400 | + } | |
13401 | + this_module->initialised = 1; | |
13402 | + } | |
13403 | + } | |
13404 | + | |
13405 | + return 0; | |
13406 | +} | |
13407 | + | |
13408 | +/* | |
13409 | + * toi_cleanup_modules | |
13410 | + * | |
13411 | + * Tell modules the work is done. | |
13412 | + */ | |
13413 | +void toi_cleanup_modules(int finishing_cycle) | |
13414 | +{ | |
13415 | + struct toi_module_ops *this_module; | |
13416 | + | |
13417 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
13418 | + if (!this_module->enabled || !this_module->initialised) | |
13419 | + continue; | |
13420 | + if (this_module->cleanup) { | |
13421 | + toi_message(TOI_MEMORY, TOI_MEDIUM, 1, | |
13422 | + "Cleaning up module %s.\n", | |
13423 | + this_module->name); | |
13424 | + this_module->cleanup(finishing_cycle); | |
13425 | + } | |
13426 | + this_module->initialised = 0; | |
13427 | + } | |
13428 | +} | |
13429 | + | |
13430 | +/* | |
13431 | + * toi_get_next_filter | |
13432 | + * | |
13433 | + * Get the next filter in the pipeline. | |
13434 | + */ | |
13435 | +struct toi_module_ops *toi_get_next_filter(struct toi_module_ops *filter_sought) | |
13436 | +{ | |
13437 | + struct toi_module_ops *last_filter = NULL, *this_filter = NULL; | |
13438 | + | |
13439 | + list_for_each_entry(this_filter, &toi_filters, type_list) { | |
13440 | + if (!this_filter->enabled) | |
13441 | + continue; | |
13442 | + if ((last_filter == filter_sought) || (!filter_sought)) | |
13443 | + return this_filter; | |
13444 | + last_filter = this_filter; | |
13445 | + } | |
13446 | + | |
13447 | + return toiActiveAllocator; | |
13448 | +} | |
13449 | +EXPORT_SYMBOL_GPL(toi_get_next_filter); | |
13450 | + | |
13451 | +/** | |
13452 | + * toi_show_modules: Printk what support is loaded. | |
13453 | + */ | |
13454 | +void toi_print_modules(void) | |
13455 | +{ | |
13456 | + struct toi_module_ops *this_module; | |
13457 | + int prev = 0; | |
13458 | + | |
e999739a | 13459 | + printk(KERN_INFO "TuxOnIce " TOI_CORE_VERSION ", with support for"); |
2380c486 JR |
13460 | + |
13461 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
13462 | + if (this_module->type == MISC_HIDDEN_MODULE) | |
13463 | + continue; | |
13464 | + printk("%s %s%s%s", prev ? "," : "", | |
13465 | + this_module->enabled ? "" : "[", | |
13466 | + this_module->name, | |
13467 | + this_module->enabled ? "" : "]"); | |
13468 | + prev = 1; | |
13469 | + } | |
13470 | + | |
13471 | + printk(".\n"); | |
13472 | +} | |
13473 | + | |
13474 | +/* toi_get_modules | |
13475 | + * | |
13476 | + * Take a reference to modules so they can't go away under us. | |
13477 | + */ | |
13478 | + | |
13479 | +int toi_get_modules(void) | |
13480 | +{ | |
13481 | + struct toi_module_ops *this_module; | |
13482 | + | |
13483 | + list_for_each_entry(this_module, &toi_modules, module_list) { | |
13484 | + struct toi_module_ops *this_module2; | |
13485 | + | |
13486 | + if (try_module_get(this_module->module)) | |
13487 | + continue; | |
13488 | + | |
13489 | + /* Failed! Reverse gets and return error */ | |
13490 | + list_for_each_entry(this_module2, &toi_modules, | |
13491 | + module_list) { | |
13492 | + if (this_module == this_module2) | |
13493 | + return -EINVAL; | |
13494 | + module_put(this_module2->module); | |
13495 | + } | |
13496 | + } | |
13497 | + return 0; | |
13498 | +} | |
13499 | + | |
13500 | +/* toi_put_modules | |
13501 | + * | |
13502 | + * Release our references to modules we used. | |
13503 | + */ | |
13504 | + | |
13505 | +void toi_put_modules(void) | |
13506 | +{ | |
13507 | + struct toi_module_ops *this_module; | |
13508 | + | |
13509 | + list_for_each_entry(this_module, &toi_modules, module_list) | |
13510 | + module_put(this_module->module); | |
13511 | +} | |
13512 | diff --git a/kernel/power/tuxonice_modules.h b/kernel/power/tuxonice_modules.h | |
13513 | new file mode 100644 | |
0ada99ac | 13514 | index 0000000..79494e2 |
2380c486 JR |
13515 | --- /dev/null |
13516 | +++ b/kernel/power/tuxonice_modules.h | |
e999739a | 13517 | @@ -0,0 +1,181 @@ |
2380c486 JR |
13518 | +/* |
13519 | + * kernel/power/tuxonice_modules.h | |
13520 | + * | |
13521 | + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net) | |
13522 | + * | |
13523 | + * This file is released under the GPLv2. | |
13524 | + * | |
13525 | + * It contains declarations for modules. Modules are additions to | |
13526 | + * TuxOnIce that provide facilities such as image compression or | |
13527 | + * encryption, backends for storage of the image and user interfaces. | |
13528 | + * | |
13529 | + */ | |
13530 | + | |
13531 | +#ifndef TOI_MODULES_H | |
13532 | +#define TOI_MODULES_H | |
13533 | + | |
13534 | +/* This is the maximum size we store in the image header for a module name */ | |
13535 | +#define TOI_MAX_MODULE_NAME_LENGTH 30 | |
13536 | + | |
13537 | +/* Per-module metadata */ | |
13538 | +struct toi_module_header { | |
13539 | + char name[TOI_MAX_MODULE_NAME_LENGTH]; | |
13540 | + int enabled; | |
13541 | + int type; | |
13542 | + int index; | |
13543 | + int data_length; | |
13544 | + unsigned long signature; | |
13545 | +}; | |
13546 | + | |
13547 | +enum { | |
13548 | + FILTER_MODULE, | |
13549 | + WRITER_MODULE, | |
13550 | + MISC_MODULE, /* Block writer, eg. */ | |
13551 | + MISC_HIDDEN_MODULE, | |
13552 | +}; | |
13553 | + | |
13554 | +enum { | |
13555 | + TOI_ASYNC, | |
13556 | + TOI_SYNC | |
13557 | +}; | |
13558 | + | |
13559 | +struct toi_module_ops { | |
13560 | + /* Functions common to all modules */ | |
13561 | + int type; | |
13562 | + char *name; | |
13563 | + char *directory; | |
13564 | + char *shared_directory; | |
13565 | + struct kobject *dir_kobj; | |
13566 | + struct module *module; | |
13567 | + int enabled, early, initialised; | |
13568 | + struct list_head module_list; | |
13569 | + | |
13570 | + /* List of filters or allocators */ | |
13571 | + struct list_head list, type_list; | |
13572 | + | |
13573 | + /* | |
13574 | + * Requirements for memory and storage in | |
13575 | + * the image header.. | |
13576 | + */ | |
13577 | + int (*memory_needed) (void); | |
13578 | + int (*storage_needed) (void); | |
13579 | + | |
13580 | + int header_requested, header_used; | |
13581 | + | |
13582 | + int (*expected_compression) (void); | |
13583 | + | |
13584 | + /* | |
13585 | + * Debug info | |
13586 | + */ | |
13587 | + int (*print_debug_info) (char *buffer, int size); | |
13588 | + int (*save_config_info) (char *buffer); | |
13589 | + void (*load_config_info) (char *buffer, int len); | |
13590 | + | |
13591 | + /* | |
13592 | + * Initialise & cleanup - general routines called | |
13593 | + * at the start and end of a cycle. | |
13594 | + */ | |
13595 | + int (*initialise) (int starting_cycle); | |
13596 | + void (*cleanup) (int finishing_cycle); | |
13597 | + | |
13598 | + /* | |
13599 | + * Calls for allocating storage (allocators only). | |
13600 | + * | |
0ada99ac | 13601 | + * Header space is requested separately and cannot fail, but the |
13602 | + * reservation is only applied when main storage is allocated. | |
13603 | + * The header space reservation is thus always set prior to | |
13604 | + * requesting the allocation of storage - and prior to querying | |
13605 | + * how much storage is available. | |
2380c486 JR |
13606 | + */ |
13607 | + | |
13608 | + int (*storage_available) (void); | |
13609 | + void (*reserve_header_space) (int space_requested); | |
13610 | + int (*allocate_storage) (int space_requested); | |
13611 | + int (*storage_allocated) (void); | |
2380c486 JR |
13612 | + |
13613 | + /* | |
13614 | + * Routines used in image I/O. | |
13615 | + */ | |
13616 | + int (*rw_init) (int rw, int stream_number); | |
13617 | + int (*rw_cleanup) (int rw); | |
13618 | + int (*write_page) (unsigned long index, struct page *buffer_page, | |
13619 | + unsigned int buf_size); | |
13620 | + int (*read_page) (unsigned long *index, struct page *buffer_page, | |
13621 | + unsigned int *buf_size); | |
0ada99ac | 13622 | + int (*io_flusher) (int rw); |
2380c486 JR |
13623 | + |
13624 | + /* Reset module if image exists but reading aborted */ | |
13625 | + void (*noresume_reset) (void); | |
13626 | + | |
13627 | + /* Read and write the metadata */ | |
13628 | + int (*write_header_init) (void); | |
13629 | + int (*write_header_cleanup) (void); | |
13630 | + | |
13631 | + int (*read_header_init) (void); | |
13632 | + int (*read_header_cleanup) (void); | |
13633 | + | |
13634 | + int (*rw_header_chunk) (int rw, struct toi_module_ops *owner, | |
13635 | + char *buffer_start, int buffer_size); | |
13636 | + | |
13637 | + int (*rw_header_chunk_noreadahead) (int rw, | |
13638 | + struct toi_module_ops *owner, char *buffer_start, | |
13639 | + int buffer_size); | |
13640 | + | |
13641 | + /* Attempt to parse an image location */ | |
13642 | + int (*parse_sig_location) (char *buffer, int only_writer, int quiet); | |
13643 | + | |
13644 | + /* Throttle I/O according to throughput */ | |
13645 | + void (*update_throughput_throttle) (int jif_index); | |
13646 | + | |
13647 | + /* Flush outstanding I/O */ | |
0ada99ac | 13648 | + int (*finish_all_io) (void); |
2380c486 JR |
13649 | + |
13650 | + /* Determine whether image exists that we can restore */ | |
13651 | + int (*image_exists) (int quiet); | |
13652 | + | |
13653 | + /* Mark the image as having tried to resume */ | |
13654 | + int (*mark_resume_attempted) (int); | |
13655 | + | |
13656 | + /* Destroy image if one exists */ | |
13657 | + int (*remove_image) (void); | |
13658 | + | |
13659 | + /* Sysfs Data */ | |
13660 | + struct toi_sysfs_data *sysfs_data; | |
13661 | + int num_sysfs_entries; | |
13662 | +}; | |
13663 | + | |
13664 | +extern int toi_num_modules, toiNumAllocators; | |
13665 | + | |
13666 | +extern struct toi_module_ops *toiActiveAllocator; | |
13667 | +extern struct list_head toi_filters, toiAllocators, toi_modules; | |
13668 | + | |
13669 | +extern void toi_prepare_console_modules(void); | |
13670 | +extern void toi_cleanup_console_modules(void); | |
13671 | + | |
13672 | +extern struct toi_module_ops *toi_find_module_given_name(char *name); | |
13673 | +extern struct toi_module_ops *toi_get_next_filter(struct toi_module_ops *); | |
13674 | + | |
13675 | +extern int toi_register_module(struct toi_module_ops *module); | |
13676 | +extern void toi_move_module_tail(struct toi_module_ops *module); | |
13677 | + | |
13678 | +extern long toi_header_storage_for_modules(void); | |
13679 | +extern long toi_memory_for_modules(int print_parts); | |
0ada99ac | 13680 | +extern void print_toi_header_storage_for_modules(void); |
2380c486 JR |
13681 | +extern int toi_expected_compression_ratio(void); |
13682 | + | |
13683 | +extern int toi_print_module_debug_info(char *buffer, int buffer_size); | |
13684 | +extern int toi_register_module(struct toi_module_ops *module); | |
13685 | +extern void toi_unregister_module(struct toi_module_ops *module); | |
13686 | + | |
13687 | +extern int toi_initialise_modules(int starting_cycle, int early); | |
13688 | +#define toi_initialise_modules_early(starting) \ | |
13689 | + toi_initialise_modules(starting, 1) | |
13690 | +#define toi_initialise_modules_late(starting) \ | |
13691 | + toi_initialise_modules(starting, 0) | |
13692 | +extern void toi_cleanup_modules(int finishing_cycle); | |
13693 | + | |
13694 | +extern void toi_print_modules(void); | |
13695 | + | |
13696 | +int toi_get_modules(void); | |
13697 | +void toi_put_modules(void); | |
13698 | +#endif | |
13699 | diff --git a/kernel/power/tuxonice_netlink.c b/kernel/power/tuxonice_netlink.c | |
13700 | new file mode 100644 | |
13701 | index 0000000..bb027a7 | |
13702 | --- /dev/null | |
13703 | +++ b/kernel/power/tuxonice_netlink.c | |
13704 | @@ -0,0 +1,343 @@ | |
13705 | +/* | |
13706 | + * kernel/power/tuxonice_netlink.c | |
13707 | + * | |
13708 | + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net) | |
13709 | + * | |
13710 | + * This file is released under the GPLv2. | |
13711 | + * | |
13712 | + * Functions for communicating with a userspace helper via netlink. | |
13713 | + */ | |
13714 | + | |
13715 | + | |
13716 | +#include <linux/suspend.h> | |
13717 | +#include "tuxonice_netlink.h" | |
13718 | +#include "tuxonice.h" | |
13719 | +#include "tuxonice_modules.h" | |
13720 | +#include "tuxonice_alloc.h" | |
13721 | + | |
13722 | +static struct user_helper_data *uhd_list; | |
13723 | + | |
13724 | +/* | |
13725 | + * Refill our pool of SKBs for use in emergencies (eg, when eating memory and | |
13726 | + * none can be allocated). | |
13727 | + */ | |
13728 | +static void toi_fill_skb_pool(struct user_helper_data *uhd) | |
13729 | +{ | |
13730 | + while (uhd->pool_level < uhd->pool_limit) { | |
13731 | + struct sk_buff *new_skb = | |
13732 | + alloc_skb(NLMSG_SPACE(uhd->skb_size), TOI_ATOMIC_GFP); | |
13733 | + | |
13734 | + if (!new_skb) | |
13735 | + break; | |
13736 | + | |
13737 | + new_skb->next = uhd->emerg_skbs; | |
13738 | + uhd->emerg_skbs = new_skb; | |
13739 | + uhd->pool_level++; | |
13740 | + } | |
13741 | +} | |
13742 | + | |
13743 | +/* | |
13744 | + * Try to allocate a single skb. If we can't get one, try to use one from | |
13745 | + * our pool. | |
13746 | + */ | |
13747 | +static struct sk_buff *toi_get_skb(struct user_helper_data *uhd) | |
13748 | +{ | |
13749 | + struct sk_buff *skb = | |
13750 | + alloc_skb(NLMSG_SPACE(uhd->skb_size), TOI_ATOMIC_GFP); | |
13751 | + | |
13752 | + if (skb) | |
13753 | + return skb; | |
13754 | + | |
13755 | + skb = uhd->emerg_skbs; | |
13756 | + if (skb) { | |
13757 | + uhd->pool_level--; | |
13758 | + uhd->emerg_skbs = skb->next; | |
13759 | + skb->next = NULL; | |
13760 | + } | |
13761 | + | |
13762 | + return skb; | |
13763 | +} | |
13764 | + | |
13765 | +static void put_skb(struct user_helper_data *uhd, struct sk_buff *skb) | |
13766 | +{ | |
13767 | + if (uhd->pool_level < uhd->pool_limit) { | |
13768 | + skb->next = uhd->emerg_skbs; | |
13769 | + uhd->emerg_skbs = skb; | |
13770 | + } else | |
13771 | + kfree_skb(skb); | |
13772 | +} | |
13773 | + | |
13774 | +void toi_send_netlink_message(struct user_helper_data *uhd, | |
13775 | + int type, void *params, size_t len) | |
13776 | +{ | |
13777 | + struct sk_buff *skb; | |
13778 | + struct nlmsghdr *nlh; | |
13779 | + void *dest; | |
13780 | + struct task_struct *t; | |
13781 | + | |
13782 | + if (uhd->pid == -1) | |
13783 | + return; | |
13784 | + | |
13785 | + if (uhd->debug) | |
13786 | + printk(KERN_ERR "toi_send_netlink_message: Send " | |
13787 | + "message type %d.\n", type); | |
13788 | + | |
13789 | + skb = toi_get_skb(uhd); | |
13790 | + if (!skb) { | |
13791 | + printk(KERN_INFO "toi_netlink: Can't allocate skb!\n"); | |
13792 | + return; | |
13793 | + } | |
13794 | + | |
13795 | + /* NLMSG_PUT contains a hidden goto nlmsg_failure */ | |
13796 | + nlh = NLMSG_PUT(skb, 0, uhd->sock_seq, type, len); | |
13797 | + uhd->sock_seq++; | |
13798 | + | |
13799 | + dest = NLMSG_DATA(nlh); | |
13800 | + if (params && len > 0) | |
13801 | + memcpy(dest, params, len); | |
13802 | + | |
13803 | + netlink_unicast(uhd->nl, skb, uhd->pid, 0); | |
13804 | + | |
13805 | + read_lock(&tasklist_lock); | |
13806 | + t = find_task_by_pid_type_ns(PIDTYPE_PID, uhd->pid, &init_pid_ns); | |
13807 | + if (!t) { | |
13808 | + read_unlock(&tasklist_lock); | |
13809 | + if (uhd->pid > -1) | |
13810 | + printk(KERN_INFO "Hmm. Can't find the userspace task" | |
13811 | + " %d.\n", uhd->pid); | |
13812 | + return; | |
13813 | + } | |
13814 | + wake_up_process(t); | |
13815 | + read_unlock(&tasklist_lock); | |
13816 | + | |
13817 | + yield(); | |
13818 | + | |
13819 | + return; | |
13820 | + | |
13821 | +nlmsg_failure: | |
13822 | + if (skb) | |
13823 | + put_skb(uhd, skb); | |
13824 | + | |
13825 | + if (uhd->debug) | |
13826 | + printk(KERN_ERR "toi_send_netlink_message: Failed to send " | |
13827 | + "message type %d.\n", type); | |
13828 | +} | |
13829 | +EXPORT_SYMBOL_GPL(toi_send_netlink_message); | |
13830 | + | |
13831 | +static void send_whether_debugging(struct user_helper_data *uhd) | |
13832 | +{ | |
13833 | + static u8 is_debugging = 1; | |
13834 | + | |
13835 | + toi_send_netlink_message(uhd, NETLINK_MSG_IS_DEBUGGING, | |
13836 | + &is_debugging, sizeof(u8)); | |
13837 | +} | |
13838 | + | |
13839 | +/* | |
13840 | + * Set the PF_NOFREEZE flag on the given process to ensure it can run whilst we | |
13841 | + * are hibernating. | |
13842 | + */ | |
13843 | +static int nl_set_nofreeze(struct user_helper_data *uhd, __u32 pid) | |
13844 | +{ | |
13845 | + struct task_struct *t; | |
13846 | + | |
13847 | + if (uhd->debug) | |
13848 | + printk(KERN_ERR "nl_set_nofreeze for pid %d.\n", pid); | |
13849 | + | |
13850 | + read_lock(&tasklist_lock); | |
13851 | + t = find_task_by_pid_type_ns(PIDTYPE_PID, pid, &init_pid_ns); | |
13852 | + if (!t) { | |
13853 | + read_unlock(&tasklist_lock); | |
13854 | + printk(KERN_INFO "Strange. Can't find the userspace task %d.\n", | |
13855 | + pid); | |
13856 | + return -EINVAL; | |
13857 | + } | |
13858 | + | |
13859 | + t->flags |= PF_NOFREEZE; | |
13860 | + | |
13861 | + read_unlock(&tasklist_lock); | |
13862 | + uhd->pid = pid; | |
13863 | + | |
13864 | + toi_send_netlink_message(uhd, NETLINK_MSG_NOFREEZE_ACK, NULL, 0); | |
13865 | + | |
13866 | + return 0; | |
13867 | +} | |
13868 | + | |
13869 | +/* | |
13870 | + * Called when the userspace process has informed us that it's ready to roll. | |
13871 | + */ | |
13872 | +static int nl_ready(struct user_helper_data *uhd, u32 version) | |
13873 | +{ | |
13874 | + if (version != uhd->interface_version) { | |
13875 | + printk(KERN_INFO "%s userspace process using invalid interface" | |
13876 | + " version (%d - kernel wants %d). Trying to " | |
13877 | + "continue without it.\n", | |
13878 | + uhd->name, version, uhd->interface_version); | |
13879 | + if (uhd->not_ready) | |
13880 | + uhd->not_ready(); | |
13881 | + return -EINVAL; | |
13882 | + } | |
13883 | + | |
13884 | + complete(&uhd->wait_for_process); | |
13885 | + | |
13886 | + return 0; | |
13887 | +} | |
13888 | + | |
13889 | +void toi_netlink_close_complete(struct user_helper_data *uhd) | |
13890 | +{ | |
13891 | + if (uhd->nl) { | |
13892 | + netlink_kernel_release(uhd->nl); | |
13893 | + uhd->nl = NULL; | |
13894 | + } | |
13895 | + | |
13896 | + while (uhd->emerg_skbs) { | |
13897 | + struct sk_buff *next = uhd->emerg_skbs->next; | |
13898 | + kfree_skb(uhd->emerg_skbs); | |
13899 | + uhd->emerg_skbs = next; | |
13900 | + } | |
13901 | + | |
13902 | + uhd->pid = -1; | |
13903 | +} | |
13904 | +EXPORT_SYMBOL_GPL(toi_netlink_close_complete); | |
13905 | + | |
13906 | +static int toi_nl_gen_rcv_msg(struct user_helper_data *uhd, | |
13907 | + struct sk_buff *skb, struct nlmsghdr *nlh) | |
13908 | +{ | |
13909 | + int type = nlh->nlmsg_type; | |
13910 | + int *data; | |
13911 | + int err; | |
13912 | + | |
13913 | + if (uhd->debug) | |
13914 | + printk(KERN_ERR "toi_user_rcv_skb: Received message %d.\n", | |
13915 | + type); | |
13916 | + | |
13917 | + /* Let the more specific handler go first. It returns | |
13918 | + * 1 for valid messages that it doesn't know. */ | |
13919 | + err = uhd->rcv_msg(skb, nlh); | |
13920 | + if (err != 1) | |
13921 | + return err; | |
13922 | + | |
13923 | + /* Only allow one task to receive NOFREEZE privileges */ | |
13924 | + if (type == NETLINK_MSG_NOFREEZE_ME && uhd->pid != -1) { | |
13925 | + printk(KERN_INFO "Received extra nofreeze me requests.\n"); | |
13926 | + return -EBUSY; | |
13927 | + } | |
13928 | + | |
13929 | + data = NLMSG_DATA(nlh); | |
13930 | + | |
13931 | + switch (type) { | |
13932 | + case NETLINK_MSG_NOFREEZE_ME: | |
13933 | + return nl_set_nofreeze(uhd, nlh->nlmsg_pid); | |
13934 | + case NETLINK_MSG_GET_DEBUGGING: | |
13935 | + send_whether_debugging(uhd); | |
13936 | + return 0; | |
13937 | + case NETLINK_MSG_READY: | |
13938 | + if (nlh->nlmsg_len != NLMSG_LENGTH(sizeof(u32))) { | |
13939 | + printk(KERN_INFO "Invalid ready mesage.\n"); | |
13940 | + if (uhd->not_ready) | |
13941 | + uhd->not_ready(); | |
13942 | + return -EINVAL; | |
13943 | + } | |
13944 | + return nl_ready(uhd, (u32) *data); | |
13945 | + case NETLINK_MSG_CLEANUP: | |
13946 | + toi_netlink_close_complete(uhd); | |
13947 | + return 0; | |
13948 | + } | |
13949 | + | |
13950 | + return -EINVAL; | |
13951 | +} | |
13952 | + | |
13953 | +static void toi_user_rcv_skb(struct sk_buff *skb) | |
13954 | +{ | |
13955 | + int err; | |
13956 | + struct nlmsghdr *nlh; | |
13957 | + struct user_helper_data *uhd = uhd_list; | |
13958 | + | |
13959 | + while (uhd && uhd->netlink_id != skb->sk->sk_protocol) | |
13960 | + uhd = uhd->next; | |
13961 | + | |
13962 | + if (!uhd) | |
13963 | + return; | |
13964 | + | |
13965 | + while (skb->len >= NLMSG_SPACE(0)) { | |
13966 | + u32 rlen; | |
13967 | + | |
13968 | + nlh = (struct nlmsghdr *) skb->data; | |
13969 | + if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) | |
13970 | + return; | |
13971 | + | |
13972 | + rlen = NLMSG_ALIGN(nlh->nlmsg_len); | |
13973 | + if (rlen > skb->len) | |
13974 | + rlen = skb->len; | |
13975 | + | |
13976 | + err = toi_nl_gen_rcv_msg(uhd, skb, nlh); | |
13977 | + if (err) | |
13978 | + netlink_ack(skb, nlh, err); | |
13979 | + else if (nlh->nlmsg_flags & NLM_F_ACK) | |
13980 | + netlink_ack(skb, nlh, 0); | |
13981 | + skb_pull(skb, rlen); | |
13982 | + } | |
13983 | +} | |
13984 | + | |
13985 | +static int netlink_prepare(struct user_helper_data *uhd) | |
13986 | +{ | |
13987 | + uhd->next = uhd_list; | |
13988 | + uhd_list = uhd; | |
13989 | + | |
13990 | + uhd->sock_seq = 0x42c0ffee; | |
13991 | + uhd->nl = netlink_kernel_create(&init_net, uhd->netlink_id, 0, | |
13992 | + toi_user_rcv_skb, NULL, THIS_MODULE); | |
13993 | + if (!uhd->nl) { | |
13994 | + printk(KERN_INFO "Failed to allocate netlink socket for %s.\n", | |
13995 | + uhd->name); | |
13996 | + return -ENOMEM; | |
13997 | + } | |
13998 | + | |
13999 | + toi_fill_skb_pool(uhd); | |
14000 | + | |
14001 | + return 0; | |
14002 | +} | |
14003 | + | |
14004 | +void toi_netlink_close(struct user_helper_data *uhd) | |
14005 | +{ | |
14006 | + struct task_struct *t; | |
14007 | + | |
14008 | + read_lock(&tasklist_lock); | |
14009 | + t = find_task_by_pid_type_ns(PIDTYPE_PID, uhd->pid, &init_pid_ns); | |
14010 | + if (t) | |
14011 | + t->flags &= ~PF_NOFREEZE; | |
14012 | + read_unlock(&tasklist_lock); | |
14013 | + | |
14014 | + toi_send_netlink_message(uhd, NETLINK_MSG_CLEANUP, NULL, 0); | |
14015 | +} | |
14016 | +EXPORT_SYMBOL_GPL(toi_netlink_close); | |
14017 | + | |
14018 | +int toi_netlink_setup(struct user_helper_data *uhd) | |
14019 | +{ | |
14020 | + /* In case userui didn't cleanup properly on us */ | |
14021 | + toi_netlink_close_complete(uhd); | |
14022 | + | |
14023 | + if (netlink_prepare(uhd) < 0) { | |
14024 | + printk(KERN_INFO "Netlink prepare failed.\n"); | |
14025 | + return 1; | |
14026 | + } | |
14027 | + | |
14028 | + if (toi_launch_userspace_program(uhd->program, uhd->netlink_id, | |
14029 | + UMH_WAIT_EXEC, uhd->debug) < 0) { | |
14030 | + printk(KERN_INFO "Launch userspace program failed.\n"); | |
14031 | + toi_netlink_close_complete(uhd); | |
14032 | + return 1; | |
14033 | + } | |
14034 | + | |
14035 | + /* Wait 2 seconds for the userspace process to make contact */ | |
14036 | + wait_for_completion_timeout(&uhd->wait_for_process, 2*HZ); | |
14037 | + | |
14038 | + if (uhd->pid == -1) { | |
14039 | + printk(KERN_INFO "%s: Failed to contact userspace process.\n", | |
14040 | + uhd->name); | |
14041 | + toi_netlink_close_complete(uhd); | |
14042 | + return 1; | |
14043 | + } | |
14044 | + | |
14045 | + return 0; | |
14046 | +} | |
14047 | +EXPORT_SYMBOL_GPL(toi_netlink_setup); | |
14048 | diff --git a/kernel/power/tuxonice_netlink.h b/kernel/power/tuxonice_netlink.h | |
14049 | new file mode 100644 | |
14050 | index 0000000..37e174b | |
14051 | --- /dev/null | |
14052 | +++ b/kernel/power/tuxonice_netlink.h | |
14053 | @@ -0,0 +1,62 @@ | |
14054 | +/* | |
14055 | + * kernel/power/tuxonice_netlink.h | |
14056 | + * | |
14057 | + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net) | |
14058 | + * | |
14059 | + * This file is released under the GPLv2. | |
14060 | + * | |
14061 | + * Declarations for functions for communicating with a userspace helper | |
14062 | + * via netlink. | |
14063 | + */ | |
14064 | + | |
14065 | +#include <linux/netlink.h> | |
14066 | +#include <net/sock.h> | |
14067 | + | |
14068 | +#define NETLINK_MSG_BASE 0x10 | |
14069 | + | |
14070 | +#define NETLINK_MSG_READY 0x10 | |
14071 | +#define NETLINK_MSG_NOFREEZE_ME 0x16 | |
14072 | +#define NETLINK_MSG_GET_DEBUGGING 0x19 | |
14073 | +#define NETLINK_MSG_CLEANUP 0x24 | |
14074 | +#define NETLINK_MSG_NOFREEZE_ACK 0x27 | |
14075 | +#define NETLINK_MSG_IS_DEBUGGING 0x28 | |
14076 | + | |
14077 | +struct user_helper_data { | |
14078 | + int (*rcv_msg) (struct sk_buff *skb, struct nlmsghdr *nlh); | |
14079 | + void (*not_ready) (void); | |
14080 | + struct sock *nl; | |
14081 | + u32 sock_seq; | |
14082 | + pid_t pid; | |
14083 | + char *comm; | |
14084 | + char program[256]; | |
14085 | + int pool_level; | |
14086 | + int pool_limit; | |
14087 | + struct sk_buff *emerg_skbs; | |
14088 | + int skb_size; | |
14089 | + int netlink_id; | |
14090 | + char *name; | |
14091 | + struct user_helper_data *next; | |
14092 | + struct completion wait_for_process; | |
14093 | + u32 interface_version; | |
14094 | + int must_init; | |
14095 | + int debug; | |
14096 | +}; | |
14097 | + | |
14098 | +#ifdef CONFIG_NET | |
14099 | +int toi_netlink_setup(struct user_helper_data *uhd); | |
14100 | +void toi_netlink_close(struct user_helper_data *uhd); | |
14101 | +void toi_send_netlink_message(struct user_helper_data *uhd, | |
14102 | + int type, void *params, size_t len); | |
14103 | +void toi_netlink_close_complete(struct user_helper_data *uhd); | |
14104 | +#else | |
14105 | +static inline int toi_netlink_setup(struct user_helper_data *uhd) | |
14106 | +{ | |
14107 | + return 0; | |
14108 | +} | |
14109 | + | |
14110 | +static inline void toi_netlink_close(struct user_helper_data *uhd) { }; | |
14111 | +static inline void toi_send_netlink_message(struct user_helper_data *uhd, | |
14112 | + int type, void *params, size_t len) { }; | |
14113 | +static inline void toi_netlink_close_complete(struct user_helper_data *uhd) | |
14114 | + { }; | |
14115 | +#endif | |
14116 | diff --git a/kernel/power/tuxonice_pagedir.c b/kernel/power/tuxonice_pagedir.c | |
14117 | new file mode 100644 | |
9474138d | 14118 | index 0000000..92c1e5e |
2380c486 JR |
14119 | --- /dev/null |
14120 | +++ b/kernel/power/tuxonice_pagedir.c | |
9474138d | 14121 | @@ -0,0 +1,380 @@ |
2380c486 JR |
14122 | +/* |
14123 | + * kernel/power/tuxonice_pagedir.c | |
14124 | + * | |
14125 | + * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu> | |
14126 | + * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz> | |
14127 | + * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr> | |
14128 | + * Copyright (C) 2006-2008 Nigel Cunningham (nigel at tuxonice net) | |
14129 | + * | |
14130 | + * This file is released under the GPLv2. | |
14131 | + * | |
14132 | + * Routines for handling pagesets. | |
14133 | + * Note that pbes aren't actually stored as such. They're stored as | |
14134 | + * bitmaps and extents. | |
14135 | + */ | |
14136 | + | |
14137 | +#include <linux/suspend.h> | |
14138 | +#include <linux/highmem.h> | |
14139 | +#include <linux/bootmem.h> | |
14140 | +#include <linux/hardirq.h> | |
14141 | +#include <linux/sched.h> | |
e999739a | 14142 | +#include <linux/cpu.h> |
2380c486 JR |
14143 | +#include <asm/tlbflush.h> |
14144 | + | |
14145 | +#include "tuxonice_pageflags.h" | |
14146 | +#include "tuxonice_ui.h" | |
14147 | +#include "tuxonice_pagedir.h" | |
14148 | +#include "tuxonice_prepare_image.h" | |
14149 | +#include "tuxonice.h" | |
2380c486 JR |
14150 | +#include "tuxonice_builtin.h" |
14151 | +#include "tuxonice_alloc.h" | |
14152 | + | |
14153 | +static int ptoi_pfn; | |
14154 | +static struct pbe *this_low_pbe; | |
14155 | +static struct pbe **last_low_pbe_ptr; | |
14156 | +static struct memory_bitmap dup_map1, dup_map2; | |
14157 | + | |
14158 | +void toi_reset_alt_image_pageset2_pfn(void) | |
14159 | +{ | |
14160 | + memory_bm_position_reset(pageset2_map); | |
14161 | +} | |
14162 | + | |
14163 | +static struct page *first_conflicting_page; | |
14164 | + | |
14165 | +/* | |
14166 | + * free_conflicting_pages | |
14167 | + */ | |
14168 | + | |
14169 | +static void free_conflicting_pages(void) | |
14170 | +{ | |
14171 | + while (first_conflicting_page) { | |
14172 | + struct page *next = | |
14173 | + *((struct page **) kmap(first_conflicting_page)); | |
14174 | + kunmap(first_conflicting_page); | |
14175 | + toi__free_page(29, first_conflicting_page); | |
14176 | + first_conflicting_page = next; | |
14177 | + } | |
14178 | +} | |
14179 | + | |
14180 | +/* __toi_get_nonconflicting_page | |
14181 | + * | |
14182 | + * Description: Gets order zero pages that won't be overwritten | |
14183 | + * while copying the original pages. | |
14184 | + */ | |
14185 | + | |
14186 | +struct page *___toi_get_nonconflicting_page(int can_be_highmem) | |
14187 | +{ | |
14188 | + struct page *page; | |
14189 | + gfp_t flags = TOI_ATOMIC_GFP; | |
14190 | + if (can_be_highmem) | |
14191 | + flags |= __GFP_HIGHMEM; | |
14192 | + | |
14193 | + | |
14194 | + if (test_toi_state(TOI_LOADING_ALT_IMAGE) && | |
14195 | + pageset2_map && | |
14196 | + (ptoi_pfn != BM_END_OF_MAP)) { | |
14197 | + do { | |
14198 | + ptoi_pfn = memory_bm_next_pfn(pageset2_map); | |
14199 | + if (ptoi_pfn != BM_END_OF_MAP) { | |
14200 | + page = pfn_to_page(ptoi_pfn); | |
14201 | + if (!PagePageset1(page) && | |
14202 | + (can_be_highmem || !PageHighMem(page))) | |
14203 | + return page; | |
14204 | + } | |
14205 | + } while (ptoi_pfn != BM_END_OF_MAP); | |
14206 | + } | |
14207 | + | |
14208 | + do { | |
14209 | + page = toi_alloc_page(29, flags); | |
14210 | + if (!page) { | |
14211 | + printk(KERN_INFO "Failed to get nonconflicting " | |
14212 | + "page.\n"); | |
14213 | + return NULL; | |
14214 | + } | |
14215 | + if (PagePageset1(page)) { | |
14216 | + struct page **next = (struct page **) kmap(page); | |
14217 | + *next = first_conflicting_page; | |
14218 | + first_conflicting_page = page; | |
14219 | + kunmap(page); | |
14220 | + } | |
14221 | + } while (PagePageset1(page)); | |
14222 | + | |
14223 | + return page; | |
14224 | +} | |
14225 | + | |
14226 | +unsigned long __toi_get_nonconflicting_page(void) | |
14227 | +{ | |
14228 | + struct page *page = ___toi_get_nonconflicting_page(0); | |
14229 | + return page ? (unsigned long) page_address(page) : 0; | |
14230 | +} | |
14231 | + | |
14232 | +static struct pbe *get_next_pbe(struct page **page_ptr, struct pbe *this_pbe, | |
14233 | + int highmem) | |
14234 | +{ | |
14235 | + if (((((unsigned long) this_pbe) & (PAGE_SIZE - 1)) | |
14236 | + + 2 * sizeof(struct pbe)) > PAGE_SIZE) { | |
14237 | + struct page *new_page = | |
14238 | + ___toi_get_nonconflicting_page(highmem); | |
14239 | + if (!new_page) | |
14240 | + return ERR_PTR(-ENOMEM); | |
14241 | + this_pbe = (struct pbe *) kmap(new_page); | |
14242 | + memset(this_pbe, 0, PAGE_SIZE); | |
14243 | + *page_ptr = new_page; | |
14244 | + } else | |
14245 | + this_pbe++; | |
14246 | + | |
14247 | + return this_pbe; | |
14248 | +} | |
14249 | + | |
14250 | +/** | |
14251 | + * get_pageset1_load_addresses - generate pbes for conflicting pages | |
14252 | + * | |
14253 | + * We check here that pagedir & pages it points to won't collide | |
14254 | + * with pages where we're going to restore from the loaded pages | |
14255 | + * later. | |
14256 | + * | |
14257 | + * Returns: | |
14258 | + * Zero on success, one if couldn't find enough pages (shouldn't | |
14259 | + * happen). | |
14260 | + **/ | |
14261 | +int toi_get_pageset1_load_addresses(void) | |
14262 | +{ | |
14263 | + int pfn, highallocd = 0, lowallocd = 0; | |
14264 | + int low_needed = pagedir1.size - get_highmem_size(pagedir1); | |
14265 | + int high_needed = get_highmem_size(pagedir1); | |
14266 | + int low_pages_for_highmem = 0; | |
14267 | + gfp_t flags = GFP_ATOMIC | __GFP_NOWARN | __GFP_HIGHMEM; | |
14268 | + struct page *page, *high_pbe_page = NULL, *last_high_pbe_page = NULL, | |
14269 | + *low_pbe_page; | |
14270 | + struct pbe **last_high_pbe_ptr = &restore_highmem_pblist, | |
14271 | + *this_high_pbe = NULL; | |
14272 | + int orig_low_pfn, orig_high_pfn; | |
14273 | + int high_pbes_done = 0, low_pbes_done = 0; | |
14274 | + int low_direct = 0, high_direct = 0; | |
14275 | + int high_to_free, low_to_free, result = 0; | |
14276 | + | |
9474138d | 14277 | + /* |
e999739a | 14278 | + * We are about to allocate all available memory, and processes |
14279 | + * might not have finished freezing yet. To avoid potential OOMs, | |
14280 | + * disable non boot cpus and do this with IRQs disabled | |
14281 | + */ | |
14282 | + | |
14283 | + disable_nonboot_cpus(); | |
14284 | + local_irq_disable(); | |
14285 | + | |
2380c486 JR |
14286 | + /* |
14287 | + * We need to duplicate pageset1's map because memory_bm_next_pfn's | |
14288 | + * state gets stomped on by the PagePageset1() test in setup_pbes. | |
14289 | + */ | |
e999739a | 14290 | + memory_bm_create(&dup_map1, GFP_ATOMIC, 0); |
2380c486 JR |
14291 | + memory_bm_dup(pageset1_map, &dup_map1); |
14292 | + | |
e999739a | 14293 | + memory_bm_create(&dup_map2, GFP_ATOMIC, 0); |
2380c486 JR |
14294 | + memory_bm_dup(pageset1_map, &dup_map2); |
14295 | + | |
14296 | + memory_bm_position_reset(pageset1_map); | |
14297 | + memory_bm_position_reset(&dup_map1); | |
14298 | + memory_bm_position_reset(&dup_map2); | |
14299 | + | |
14300 | + last_low_pbe_ptr = &restore_pblist; | |
14301 | + | |
14302 | + /* First, allocate pages for the start of our pbe lists. */ | |
14303 | + if (high_needed) { | |
14304 | + high_pbe_page = ___toi_get_nonconflicting_page(1); | |
14305 | + if (!high_pbe_page) { | |
14306 | + result = -ENOMEM; | |
14307 | + goto out; | |
14308 | + } | |
14309 | + this_high_pbe = (struct pbe *) kmap(high_pbe_page); | |
14310 | + memset(this_high_pbe, 0, PAGE_SIZE); | |
14311 | + } | |
14312 | + | |
14313 | + low_pbe_page = ___toi_get_nonconflicting_page(0); | |
14314 | + if (!low_pbe_page) { | |
14315 | + result = -ENOMEM; | |
14316 | + goto out; | |
14317 | + } | |
14318 | + this_low_pbe = (struct pbe *) page_address(low_pbe_page); | |
14319 | + | |
14320 | + /* | |
14321 | + * Next, allocate all possible memory to find where we can | |
14322 | + * load data directly into destination pages. I'd like to do | |
14323 | + * this in bigger chunks, but then we can't free pages | |
14324 | + * individually later. | |
14325 | + */ | |
14326 | + | |
14327 | + do { | |
14328 | + page = toi_alloc_page(30, flags); | |
14329 | + if (page) | |
14330 | + SetPagePageset1Copy(page); | |
14331 | + } while (page); | |
14332 | + | |
14333 | + /* | |
14334 | + * Find out how many high- and lowmem pages we allocated above, | |
14335 | + * and how many pages we can reload directly to their original | |
14336 | + * location. | |
14337 | + */ | |
14338 | + memory_bm_position_reset(pageset1_copy_map); | |
14339 | + for (pfn = memory_bm_next_pfn(pageset1_copy_map); pfn != BM_END_OF_MAP; | |
14340 | + pfn = memory_bm_next_pfn(pageset1_copy_map)) { | |
14341 | + int is_high; | |
14342 | + page = pfn_to_page(pfn); | |
14343 | + is_high = PageHighMem(page); | |
14344 | + | |
14345 | + if (PagePageset1(page)) { | |
14346 | + if (test_action_state(TOI_NO_DIRECT_LOAD)) { | |
14347 | + ClearPagePageset1Copy(page); | |
14348 | + toi__free_page(30, page); | |
14349 | + continue; | |
14350 | + } else { | |
14351 | + if (is_high) | |
14352 | + high_direct++; | |
14353 | + else | |
14354 | + low_direct++; | |
14355 | + } | |
14356 | + } else { | |
14357 | + if (is_high) | |
14358 | + highallocd++; | |
14359 | + else | |
14360 | + lowallocd++; | |
14361 | + } | |
14362 | + } | |
14363 | + | |
14364 | + high_needed -= high_direct; | |
14365 | + low_needed -= low_direct; | |
14366 | + | |
14367 | + /* | |
14368 | + * Do we need to use some lowmem pages for the copies of highmem | |
14369 | + * pages? | |
14370 | + */ | |
14371 | + if (high_needed > highallocd) { | |
14372 | + low_pages_for_highmem = high_needed - highallocd; | |
14373 | + high_needed -= low_pages_for_highmem; | |
14374 | + low_needed += low_pages_for_highmem; | |
14375 | + } | |
14376 | + | |
14377 | + high_to_free = highallocd - high_needed; | |
14378 | + low_to_free = lowallocd - low_needed; | |
14379 | + | |
14380 | + /* | |
14381 | + * Now generate our pbes (which will be used for the atomic restore), | |
14382 | + * and free unneeded pages. | |
14383 | + */ | |
14384 | + memory_bm_position_reset(pageset1_copy_map); | |
14385 | + for (pfn = memory_bm_next_pfn(pageset1_copy_map); pfn != BM_END_OF_MAP; | |
14386 | + pfn = memory_bm_next_pfn(pageset1_copy_map)) { | |
14387 | + int is_high; | |
14388 | + page = pfn_to_page(pfn); | |
14389 | + is_high = PageHighMem(page); | |
14390 | + | |
14391 | + if (PagePageset1(page)) | |
14392 | + continue; | |
14393 | + | |
14394 | + /* Free the page? */ | |
14395 | + if ((is_high && high_to_free) || | |
14396 | + (!is_high && low_to_free)) { | |
14397 | + ClearPagePageset1Copy(page); | |
14398 | + toi__free_page(30, page); | |
14399 | + if (is_high) | |
14400 | + high_to_free--; | |
14401 | + else | |
14402 | + low_to_free--; | |
14403 | + continue; | |
14404 | + } | |
14405 | + | |
14406 | + /* Nope. We're going to use this page. Add a pbe. */ | |
14407 | + if (is_high || low_pages_for_highmem) { | |
14408 | + struct page *orig_page; | |
14409 | + high_pbes_done++; | |
14410 | + if (!is_high) | |
14411 | + low_pages_for_highmem--; | |
14412 | + do { | |
14413 | + orig_high_pfn = memory_bm_next_pfn(&dup_map1); | |
14414 | + BUG_ON(orig_high_pfn == BM_END_OF_MAP); | |
14415 | + orig_page = pfn_to_page(orig_high_pfn); | |
14416 | + } while (!PageHighMem(orig_page) || | |
14417 | + load_direct(orig_page)); | |
14418 | + | |
14419 | + this_high_pbe->orig_address = orig_page; | |
14420 | + this_high_pbe->address = page; | |
14421 | + this_high_pbe->next = NULL; | |
14422 | + if (last_high_pbe_page != high_pbe_page) { | |
14423 | + *last_high_pbe_ptr = | |
14424 | + (struct pbe *) high_pbe_page; | |
14425 | + if (!last_high_pbe_page) | |
14426 | + last_high_pbe_page = high_pbe_page; | |
14427 | + } else | |
14428 | + *last_high_pbe_ptr = this_high_pbe; | |
14429 | + last_high_pbe_ptr = &this_high_pbe->next; | |
14430 | + if (last_high_pbe_page != high_pbe_page) { | |
14431 | + kunmap(last_high_pbe_page); | |
14432 | + last_high_pbe_page = high_pbe_page; | |
14433 | + } | |
14434 | + this_high_pbe = get_next_pbe(&high_pbe_page, | |
14435 | + this_high_pbe, 1); | |
14436 | + if (IS_ERR(this_high_pbe)) { | |
14437 | + printk(KERN_INFO | |
14438 | + "This high pbe is an error.\n"); | |
14439 | + return -ENOMEM; | |
14440 | + } | |
14441 | + } else { | |
14442 | + struct page *orig_page; | |
14443 | + low_pbes_done++; | |
14444 | + do { | |
14445 | + orig_low_pfn = memory_bm_next_pfn(&dup_map2); | |
14446 | + BUG_ON(orig_low_pfn == BM_END_OF_MAP); | |
14447 | + orig_page = pfn_to_page(orig_low_pfn); | |
14448 | + } while (PageHighMem(orig_page) || | |
14449 | + load_direct(orig_page)); | |
14450 | + | |
14451 | + this_low_pbe->orig_address = page_address(orig_page); | |
14452 | + this_low_pbe->address = page_address(page); | |
14453 | + this_low_pbe->next = NULL; | |
14454 | + *last_low_pbe_ptr = this_low_pbe; | |
14455 | + last_low_pbe_ptr = &this_low_pbe->next; | |
14456 | + this_low_pbe = get_next_pbe(&low_pbe_page, | |
14457 | + this_low_pbe, 0); | |
14458 | + if (IS_ERR(this_low_pbe)) { | |
14459 | + printk(KERN_INFO "this_low_pbe is an error.\n"); | |
14460 | + return -ENOMEM; | |
14461 | + } | |
14462 | + } | |
14463 | + } | |
14464 | + | |
14465 | + if (high_pbe_page) | |
14466 | + kunmap(high_pbe_page); | |
14467 | + | |
14468 | + if (last_high_pbe_page != high_pbe_page) { | |
14469 | + if (last_high_pbe_page) | |
14470 | + kunmap(last_high_pbe_page); | |
14471 | + toi__free_page(29, high_pbe_page); | |
14472 | + } | |
14473 | + | |
14474 | + free_conflicting_pages(); | |
14475 | + | |
14476 | +out: | |
14477 | + memory_bm_free(&dup_map1, 0); | |
14478 | + memory_bm_free(&dup_map2, 0); | |
e999739a | 14479 | + |
14480 | + local_irq_enable(); | |
14481 | + enable_nonboot_cpus(); | |
9474138d | 14482 | + |
2380c486 JR |
14483 | + return result; |
14484 | +} | |
14485 | + | |
14486 | +int add_boot_kernel_data_pbe(void) | |
14487 | +{ | |
14488 | + this_low_pbe->address = (char *) __toi_get_nonconflicting_page(); | |
14489 | + if (!this_low_pbe->address) { | |
14490 | + printk(KERN_INFO "Failed to get bkd atomic restore buffer."); | |
14491 | + return -ENOMEM; | |
14492 | + } | |
14493 | + | |
14494 | + toi_bkd.size = sizeof(toi_bkd); | |
14495 | + memcpy(this_low_pbe->address, &toi_bkd, sizeof(toi_bkd)); | |
14496 | + | |
14497 | + *last_low_pbe_ptr = this_low_pbe; | |
14498 | + this_low_pbe->orig_address = (char *) boot_kernel_data_buffer; | |
14499 | + this_low_pbe->next = NULL; | |
14500 | + return 0; | |
14501 | +} | |
14502 | diff --git a/kernel/power/tuxonice_pagedir.h b/kernel/power/tuxonice_pagedir.h | |
14503 | new file mode 100644 | |
14504 | index 0000000..9d0d929 | |
14505 | --- /dev/null | |
14506 | +++ b/kernel/power/tuxonice_pagedir.h | |
14507 | @@ -0,0 +1,50 @@ | |
14508 | +/* | |
14509 | + * kernel/power/tuxonice_pagedir.h | |
14510 | + * | |
14511 | + * Copyright (C) 2006-2008 Nigel Cunningham (nigel at tuxonice net) | |
14512 | + * | |
14513 | + * This file is released under the GPLv2. | |
14514 | + * | |
14515 | + * Declarations for routines for handling pagesets. | |
14516 | + */ | |
14517 | + | |
14518 | +#ifndef KERNEL_POWER_PAGEDIR_H | |
14519 | +#define KERNEL_POWER_PAGEDIR_H | |
14520 | + | |
14521 | +/* Pagedir | |
14522 | + * | |
14523 | + * Contains the metadata for a set of pages saved in the image. | |
14524 | + */ | |
14525 | + | |
14526 | +struct pagedir { | |
14527 | + int id; | |
14528 | + long size; | |
14529 | +#ifdef CONFIG_HIGHMEM | |
14530 | + long size_high; | |
14531 | +#endif | |
14532 | +}; | |
14533 | + | |
14534 | +#ifdef CONFIG_HIGHMEM | |
14535 | +#define get_highmem_size(pagedir) (pagedir.size_high) | |
14536 | +#define set_highmem_size(pagedir, sz) do { pagedir.size_high = sz; } while (0) | |
14537 | +#define inc_highmem_size(pagedir) do { pagedir.size_high++; } while (0) | |
14538 | +#define get_lowmem_size(pagedir) (pagedir.size - pagedir.size_high) | |
14539 | +#else | |
14540 | +#define get_highmem_size(pagedir) (0) | |
14541 | +#define set_highmem_size(pagedir, sz) do { } while (0) | |
14542 | +#define inc_highmem_size(pagedir) do { } while (0) | |
14543 | +#define get_lowmem_size(pagedir) (pagedir.size) | |
14544 | +#endif | |
14545 | + | |
14546 | +extern struct pagedir pagedir1, pagedir2; | |
14547 | + | |
14548 | +extern void toi_copy_pageset1(void); | |
14549 | + | |
14550 | +extern int toi_get_pageset1_load_addresses(void); | |
14551 | + | |
14552 | +extern unsigned long __toi_get_nonconflicting_page(void); | |
14553 | +struct page *___toi_get_nonconflicting_page(int can_be_highmem); | |
14554 | + | |
14555 | +extern void toi_reset_alt_image_pageset2_pfn(void); | |
14556 | +extern int add_boot_kernel_data_pbe(void); | |
14557 | +#endif | |
14558 | diff --git a/kernel/power/tuxonice_pageflags.c b/kernel/power/tuxonice_pageflags.c | |
14559 | new file mode 100644 | |
9474138d | 14560 | index 0000000..626e5df |
2380c486 JR |
14561 | --- /dev/null |
14562 | +++ b/kernel/power/tuxonice_pageflags.c | |
9474138d | 14563 | @@ -0,0 +1,28 @@ |
2380c486 JR |
14564 | +/* |
14565 | + * kernel/power/tuxonice_pageflags.c | |
14566 | + * | |
14567 | + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net) | |
14568 | + * | |
14569 | + * This file is released under the GPLv2. | |
14570 | + * | |
14571 | + * Routines for serialising and relocating pageflags in which we | |
14572 | + * store our image metadata. | |
14573 | + */ | |
14574 | + | |
14575 | +#include <linux/list.h> | |
14576 | +#include "tuxonice_pageflags.h" | |
14577 | +#include "power.h" | |
14578 | + | |
14579 | +int toi_pageflags_space_needed(void) | |
14580 | +{ | |
14581 | + int total = 0; | |
14582 | + struct bm_block *bb; | |
14583 | + | |
14584 | + total = sizeof(unsigned int); | |
14585 | + | |
14586 | + list_for_each_entry(bb, &pageset1_map->blocks, hook) | |
14587 | + total += 2 * sizeof(unsigned long) + PAGE_SIZE; | |
14588 | + | |
14589 | + return total; | |
14590 | +} | |
9474138d | 14591 | +EXPORT_SYMBOL_GPL(toi_pageflags_space_needed); |
2380c486 JR |
14592 | diff --git a/kernel/power/tuxonice_pageflags.h b/kernel/power/tuxonice_pageflags.h |
14593 | new file mode 100644 | |
9474138d | 14594 | index 0000000..610625d |
2380c486 JR |
14595 | --- /dev/null |
14596 | +++ b/kernel/power/tuxonice_pageflags.h | |
9474138d | 14597 | @@ -0,0 +1,72 @@ |
2380c486 JR |
14598 | +/* |
14599 | + * kernel/power/tuxonice_pageflags.h | |
14600 | + * | |
14601 | + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net) | |
14602 | + * | |
14603 | + * This file is released under the GPLv2. | |
14604 | + */ | |
14605 | + | |
14606 | +#ifndef KERNEL_POWER_TUXONICE_PAGEFLAGS_H | |
14607 | +#define KERNEL_POWER_TUXONICE_PAGEFLAGS_H | |
14608 | + | |
2380c486 JR |
14609 | +extern struct memory_bitmap *pageset1_map; |
14610 | +extern struct memory_bitmap *pageset1_copy_map; | |
14611 | +extern struct memory_bitmap *pageset2_map; | |
14612 | +extern struct memory_bitmap *page_resave_map; | |
14613 | +extern struct memory_bitmap *io_map; | |
14614 | +extern struct memory_bitmap *nosave_map; | |
14615 | +extern struct memory_bitmap *free_map; | |
14616 | + | |
14617 | +#define PagePageset1(page) \ | |
14618 | + (memory_bm_test_bit(pageset1_map, page_to_pfn(page))) | |
14619 | +#define SetPagePageset1(page) \ | |
14620 | + (memory_bm_set_bit(pageset1_map, page_to_pfn(page))) | |
14621 | +#define ClearPagePageset1(page) \ | |
14622 | + (memory_bm_clear_bit(pageset1_map, page_to_pfn(page))) | |
14623 | + | |
14624 | +#define PagePageset1Copy(page) \ | |
14625 | + (memory_bm_test_bit(pageset1_copy_map, page_to_pfn(page))) | |
14626 | +#define SetPagePageset1Copy(page) \ | |
14627 | + (memory_bm_set_bit(pageset1_copy_map, page_to_pfn(page))) | |
14628 | +#define ClearPagePageset1Copy(page) \ | |
14629 | + (memory_bm_clear_bit(pageset1_copy_map, page_to_pfn(page))) | |
14630 | + | |
14631 | +#define PagePageset2(page) \ | |
14632 | + (memory_bm_test_bit(pageset2_map, page_to_pfn(page))) | |
14633 | +#define SetPagePageset2(page) \ | |
14634 | + (memory_bm_set_bit(pageset2_map, page_to_pfn(page))) | |
14635 | +#define ClearPagePageset2(page) \ | |
14636 | + (memory_bm_clear_bit(pageset2_map, page_to_pfn(page))) | |
14637 | + | |
14638 | +#define PageWasRW(page) \ | |
14639 | + (memory_bm_test_bit(pageset2_map, page_to_pfn(page))) | |
14640 | +#define SetPageWasRW(page) \ | |
14641 | + (memory_bm_set_bit(pageset2_map, page_to_pfn(page))) | |
14642 | +#define ClearPageWasRW(page) \ | |
14643 | + (memory_bm_clear_bit(pageset2_map, page_to_pfn(page))) | |
14644 | + | |
14645 | +#define PageResave(page) (page_resave_map ? \ | |
14646 | + memory_bm_test_bit(page_resave_map, page_to_pfn(page)) : 0) | |
14647 | +#define SetPageResave(page) \ | |
14648 | + (memory_bm_set_bit(page_resave_map, page_to_pfn(page))) | |
14649 | +#define ClearPageResave(page) \ | |
14650 | + (memory_bm_clear_bit(page_resave_map, page_to_pfn(page))) | |
14651 | + | |
14652 | +#define PageNosave(page) (nosave_map ? \ | |
14653 | + memory_bm_test_bit(nosave_map, page_to_pfn(page)) : 0) | |
14654 | +#define SetPageNosave(page) \ | |
14655 | + (memory_bm_set_bit(nosave_map, page_to_pfn(page))) | |
14656 | +#define ClearPageNosave(page) \ | |
14657 | + (memory_bm_clear_bit(nosave_map, page_to_pfn(page))) | |
14658 | + | |
14659 | +#define PageNosaveFree(page) (free_map ? \ | |
14660 | + memory_bm_test_bit(free_map, page_to_pfn(page)) : 0) | |
14661 | +#define SetPageNosaveFree(page) \ | |
14662 | + (memory_bm_set_bit(free_map, page_to_pfn(page))) | |
14663 | +#define ClearPageNosaveFree(page) \ | |
14664 | + (memory_bm_clear_bit(free_map, page_to_pfn(page))) | |
14665 | + | |
14666 | +extern void save_pageflags(struct memory_bitmap *pagemap); | |
14667 | +extern int load_pageflags(struct memory_bitmap *pagemap); | |
14668 | +extern int toi_pageflags_space_needed(void); | |
14669 | +#endif | |
14670 | diff --git a/kernel/power/tuxonice_power_off.c b/kernel/power/tuxonice_power_off.c | |
14671 | new file mode 100644 | |
9474138d | 14672 | index 0000000..9cdb489 |
2380c486 JR |
14673 | --- /dev/null |
14674 | +++ b/kernel/power/tuxonice_power_off.c | |
e999739a | 14675 | @@ -0,0 +1,282 @@ |
2380c486 JR |
14676 | +/* |
14677 | + * kernel/power/tuxonice_power_off.c | |
14678 | + * | |
14679 | + * Copyright (C) 2006-2008 Nigel Cunningham (nigel at tuxonice net) | |
14680 | + * | |
14681 | + * This file is released under the GPLv2. | |
14682 | + * | |
14683 | + * Support for powering down. | |
14684 | + */ | |
14685 | + | |
14686 | +#include <linux/device.h> | |
14687 | +#include <linux/suspend.h> | |
14688 | +#include <linux/mm.h> | |
14689 | +#include <linux/pm.h> | |
14690 | +#include <linux/reboot.h> | |
14691 | +#include <linux/cpu.h> | |
14692 | +#include <linux/console.h> | |
14693 | +#include <linux/fs.h> | |
14694 | +#include "tuxonice.h" | |
14695 | +#include "tuxonice_ui.h" | |
14696 | +#include "tuxonice_power_off.h" | |
14697 | +#include "tuxonice_sysfs.h" | |
14698 | +#include "tuxonice_modules.h" | |
14699 | +#include "tuxonice_io.h" | |
14700 | + | |
14701 | +unsigned long toi_poweroff_method; /* 0 - Kernel power off */ | |
14702 | +EXPORT_SYMBOL_GPL(toi_poweroff_method); | |
14703 | + | |
14704 | +static int wake_delay; | |
14705 | +static char lid_state_file[256], wake_alarm_dir[256]; | |
14706 | +static struct file *lid_file, *alarm_file, *epoch_file; | |
14707 | +static int post_wake_state = -1; | |
14708 | + | |
14709 | +static int did_suspend_to_both; | |
14710 | + | |
14711 | +/* | |
14712 | + * __toi_power_down | |
14713 | + * Functionality : Powers down or reboots the computer once the image | |
14714 | + * has been written to disk. | |
14715 | + * Key Assumptions : Able to reboot/power down via code called or that | |
14716 | + * the warning emitted if the calls fail will be visible | |
14717 | + * to the user (ie printk resumes devices). | |
14718 | + */ | |
14719 | + | |
14720 | +static void __toi_power_down(int method) | |
14721 | +{ | |
14722 | + int error; | |
14723 | + | |
14724 | + toi_cond_pause(1, test_action_state(TOI_REBOOT) ? "Ready to reboot." : | |
14725 | + "Powering down."); | |
14726 | + | |
14727 | + if (test_result_state(TOI_ABORTED)) | |
14728 | + goto out; | |
14729 | + | |
14730 | + if (test_action_state(TOI_REBOOT)) | |
14731 | + kernel_restart(NULL); | |
14732 | + | |
14733 | + switch (method) { | |
14734 | + case 0: | |
14735 | + break; | |
14736 | + case 3: | |
14737 | + /* | |
14738 | + * Re-read the overwritten part of pageset2 to make post-resume | |
14739 | + * faster. | |
14740 | + */ | |
14741 | + if (read_pageset2(1)) | |
e999739a | 14742 | + panic("Attempt to reload pagedir 2 failed. " |
14743 | + "Try rebooting."); | |
2380c486 JR |
14744 | + |
14745 | + error = pm_notifier_call_chain(PM_SUSPEND_PREPARE); | |
14746 | + if (!error) { | |
14747 | + error = suspend_devices_and_enter(PM_SUSPEND_MEM); | |
14748 | + if (!error) | |
14749 | + did_suspend_to_both = 1; | |
14750 | + } | |
14751 | + pm_notifier_call_chain(PM_POST_SUSPEND); | |
14752 | + | |
14753 | + /* Success - we're now post-resume-from-ram */ | |
14754 | + if (did_suspend_to_both) | |
14755 | + return; | |
14756 | + | |
14757 | + /* Failed to suspend to ram - do normal power off */ | |
14758 | + break; | |
14759 | + case 4: | |
14760 | + /* | |
14761 | + * If succeeds, doesn't return. If fails, do a simple | |
14762 | + * powerdown. | |
14763 | + */ | |
14764 | + hibernation_platform_enter(); | |
14765 | + break; | |
14766 | + case 5: | |
14767 | + /* Historic entry only now */ | |
14768 | + break; | |
14769 | + } | |
14770 | + | |
14771 | + if (method && method != 5) | |
14772 | + toi_cond_pause(1, | |
14773 | + "Falling back to alternate power off method."); | |
14774 | + | |
14775 | + if (test_result_state(TOI_ABORTED)) | |
14776 | + goto out; | |
14777 | + | |
14778 | + kernel_power_off(); | |
14779 | + kernel_halt(); | |
14780 | + toi_cond_pause(1, "Powerdown failed."); | |
14781 | + while (1) | |
14782 | + cpu_relax(); | |
14783 | + | |
14784 | +out: | |
14785 | + if (read_pageset2(1)) | |
14786 | + panic("Attempt to reload pagedir 2 failed. Try rebooting."); | |
14787 | + return; | |
14788 | +} | |
14789 | + | |
14790 | +#define CLOSE_FILE(file) \ | |
14791 | + if (file) { \ | |
14792 | + filp_close(file, NULL); file = NULL; \ | |
14793 | + } | |
14794 | + | |
14795 | +static void powerdown_cleanup(int toi_or_resume) | |
14796 | +{ | |
14797 | + if (!toi_or_resume) | |
14798 | + return; | |
14799 | + | |
14800 | + CLOSE_FILE(lid_file); | |
14801 | + CLOSE_FILE(alarm_file); | |
14802 | + CLOSE_FILE(epoch_file); | |
14803 | +} | |
14804 | + | |
14805 | +static void open_file(char *format, char *arg, struct file **var, int mode, | |
14806 | + char *desc) | |
14807 | +{ | |
14808 | + char buf[256]; | |
14809 | + | |
14810 | + if (strlen(arg)) { | |
14811 | + sprintf(buf, format, arg); | |
14812 | + *var = filp_open(buf, mode, 0); | |
14813 | + if (IS_ERR(*var) || !*var) { | |
14814 | + printk(KERN_INFO "Failed to open %s file '%s' (%p).\n", | |
14815 | + desc, buf, *var); | |
14816 | + *var = NULL; | |
14817 | + } | |
14818 | + } | |
14819 | +} | |
14820 | + | |
14821 | +static int powerdown_init(int toi_or_resume) | |
14822 | +{ | |
14823 | + if (!toi_or_resume) | |
14824 | + return 0; | |
14825 | + | |
14826 | + did_suspend_to_both = 0; | |
14827 | + | |
14828 | + open_file("/proc/acpi/button/%s/state", lid_state_file, &lid_file, | |
14829 | + O_RDONLY, "lid"); | |
14830 | + | |
14831 | + if (strlen(wake_alarm_dir)) { | |
14832 | + open_file("/sys/class/rtc/%s/wakealarm", wake_alarm_dir, | |
14833 | + &alarm_file, O_WRONLY, "alarm"); | |
14834 | + | |
14835 | + open_file("/sys/class/rtc/%s/since_epoch", wake_alarm_dir, | |
14836 | + &epoch_file, O_RDONLY, "epoch"); | |
14837 | + } | |
14838 | + | |
14839 | + return 0; | |
14840 | +} | |
14841 | + | |
14842 | +static int lid_closed(void) | |
14843 | +{ | |
14844 | + char array[25]; | |
14845 | + ssize_t size; | |
14846 | + loff_t pos = 0; | |
14847 | + | |
14848 | + if (!lid_file) | |
14849 | + return 0; | |
14850 | + | |
14851 | + size = vfs_read(lid_file, (char __user *) array, 25, &pos); | |
14852 | + if ((int) size < 1) { | |
14853 | + printk(KERN_INFO "Failed to read lid state file (%d).\n", | |
14854 | + (int) size); | |
14855 | + return 0; | |
14856 | + } | |
14857 | + | |
14858 | + if (!strcmp(array, "state: closed\n")) | |
14859 | + return 1; | |
14860 | + | |
14861 | + return 0; | |
14862 | +} | |
14863 | + | |
14864 | +static void write_alarm_file(int value) | |
14865 | +{ | |
14866 | + ssize_t size; | |
14867 | + char buf[40]; | |
14868 | + loff_t pos = 0; | |
14869 | + | |
14870 | + if (!alarm_file) | |
14871 | + return; | |
14872 | + | |
14873 | + sprintf(buf, "%d\n", value); | |
14874 | + | |
14875 | + size = vfs_write(alarm_file, (char __user *)buf, strlen(buf), &pos); | |
14876 | + | |
14877 | + if (size < 0) | |
14878 | + printk(KERN_INFO "Error %d writing alarm value %s.\n", | |
14879 | + (int) size, buf); | |
14880 | +} | |
14881 | + | |
14882 | +/** | |
14883 | + * toi_check_resleep: See whether to powerdown again after waking. | |
14884 | + * | |
14885 | + * After waking, check whether we should powerdown again in a (usually | |
14886 | + * different) way. We only do this if the lid switch is still closed. | |
14887 | + */ | |
14888 | +void toi_check_resleep(void) | |
14889 | +{ | |
14890 | + /* We only return if we suspended to ram and woke. */ | |
14891 | + if (lid_closed() && post_wake_state >= 0) | |
14892 | + __toi_power_down(post_wake_state); | |
14893 | +} | |
14894 | + | |
14895 | +void toi_power_down(void) | |
14896 | +{ | |
14897 | + if (alarm_file && wake_delay) { | |
14898 | + char array[25]; | |
14899 | + loff_t pos = 0; | |
14900 | + size_t size = vfs_read(epoch_file, (char __user *) array, 25, | |
14901 | + &pos); | |
14902 | + | |
14903 | + if (((int) size) < 1) | |
14904 | + printk(KERN_INFO "Failed to read epoch file (%d).\n", | |
14905 | + (int) size); | |
14906 | + else { | |
9474138d AM |
14907 | + unsigned long since_epoch; |
14908 | + if (!strict_strtoul(array, 0, &since_epoch)) { | |
14909 | + /* Clear any wakeup time. */ | |
14910 | + write_alarm_file(0); | |
2380c486 | 14911 | + |
9474138d AM |
14912 | + /* Set new wakeup time. */ |
14913 | + write_alarm_file(since_epoch + wake_delay); | |
14914 | + } | |
2380c486 JR |
14915 | + } |
14916 | + } | |
14917 | + | |
14918 | + __toi_power_down(toi_poweroff_method); | |
14919 | + | |
14920 | + toi_check_resleep(); | |
14921 | +} | |
14922 | +EXPORT_SYMBOL_GPL(toi_power_down); | |
14923 | + | |
14924 | +static struct toi_sysfs_data sysfs_params[] = { | |
14925 | +#if defined(CONFIG_ACPI) | |
14926 | + SYSFS_STRING("lid_file", SYSFS_RW, lid_state_file, 256, 0, NULL), | |
14927 | + SYSFS_INT("wake_delay", SYSFS_RW, &wake_delay, 0, INT_MAX, 0, NULL), | |
14928 | + SYSFS_STRING("wake_alarm_dir", SYSFS_RW, wake_alarm_dir, 256, 0, NULL), | |
14929 | + SYSFS_INT("post_wake_state", SYSFS_RW, &post_wake_state, -1, 5, 0, | |
14930 | + NULL), | |
14931 | + SYSFS_UL("powerdown_method", SYSFS_RW, &toi_poweroff_method, 0, 5, 0), | |
14932 | + SYSFS_INT("did_suspend_to_both", SYSFS_READONLY, &did_suspend_to_both, | |
14933 | + 0, 0, 0, NULL) | |
14934 | +#endif | |
14935 | +}; | |
14936 | + | |
14937 | +static struct toi_module_ops powerdown_ops = { | |
14938 | + .type = MISC_HIDDEN_MODULE, | |
14939 | + .name = "poweroff", | |
14940 | + .initialise = powerdown_init, | |
14941 | + .cleanup = powerdown_cleanup, | |
14942 | + .directory = "[ROOT]", | |
14943 | + .module = THIS_MODULE, | |
14944 | + .sysfs_data = sysfs_params, | |
14945 | + .num_sysfs_entries = sizeof(sysfs_params) / | |
14946 | + sizeof(struct toi_sysfs_data), | |
14947 | +}; | |
14948 | + | |
14949 | +int toi_poweroff_init(void) | |
14950 | +{ | |
14951 | + return toi_register_module(&powerdown_ops); | |
14952 | +} | |
14953 | + | |
14954 | +void toi_poweroff_exit(void) | |
14955 | +{ | |
14956 | + toi_unregister_module(&powerdown_ops); | |
14957 | +} | |
14958 | diff --git a/kernel/power/tuxonice_power_off.h b/kernel/power/tuxonice_power_off.h | |
14959 | new file mode 100644 | |
14960 | index 0000000..a85633a | |
14961 | --- /dev/null | |
14962 | +++ b/kernel/power/tuxonice_power_off.h | |
14963 | @@ -0,0 +1,24 @@ | |
14964 | +/* | |
14965 | + * kernel/power/tuxonice_power_off.h | |
14966 | + * | |
14967 | + * Copyright (C) 2006-2008 Nigel Cunningham (nigel at tuxonice net) | |
14968 | + * | |
14969 | + * This file is released under the GPLv2. | |
14970 | + * | |
14971 | + * Support for the powering down. | |
14972 | + */ | |
14973 | + | |
14974 | +int toi_pm_state_finish(void); | |
14975 | +void toi_power_down(void); | |
14976 | +extern unsigned long toi_poweroff_method; | |
14977 | +int toi_poweroff_init(void); | |
14978 | +void toi_poweroff_exit(void); | |
14979 | +void toi_check_resleep(void); | |
14980 | + | |
14981 | +extern int platform_begin(int platform_mode); | |
14982 | +extern int platform_pre_snapshot(int platform_mode); | |
14983 | +extern void platform_leave(int platform_mode); | |
14984 | +extern void platform_end(int platform_mode); | |
14985 | +extern void platform_finish(int platform_mode); | |
14986 | +extern int platform_pre_restore(int platform_mode); | |
14987 | +extern void platform_restore_cleanup(int platform_mode); | |
14988 | diff --git a/kernel/power/tuxonice_prepare_image.c b/kernel/power/tuxonice_prepare_image.c | |
14989 | new file mode 100644 | |
9474138d | 14990 | index 0000000..a675de0 |
2380c486 JR |
14991 | --- /dev/null |
14992 | +++ b/kernel/power/tuxonice_prepare_image.c | |
9474138d | 14993 | @@ -0,0 +1,1045 @@ |
2380c486 JR |
14994 | +/* |
14995 | + * kernel/power/tuxonice_prepare_image.c | |
14996 | + * | |
14997 | + * Copyright (C) 2003-2008 Nigel Cunningham (nigel at tuxonice net) | |
14998 | + * | |
14999 | + * This file is released under the GPLv2. | |
15000 | + * | |
15001 | + * We need to eat memory until we can: | |
15002 | + * 1. Perform the save without changing anything (RAM_NEEDED < #pages) | |
15003 | + * 2. Fit it all in available space (toiActiveAllocator->available_space() >= | |
15004 | + * main_storage_needed()) | |
15005 | + * 3. Reload the pagedir and pageset1 to places that don't collide with their | |
15006 | + * final destinations, not knowing to what extent the resumed kernel will | |
15007 | + * overlap with the one loaded at boot time. I think the resumed kernel | |
15008 | + * should overlap completely, but I don't want to rely on this as it is | |
15009 | + * an unproven assumption. We therefore assume there will be no overlap at | |
15010 | + * all (worse case). | |
15011 | + * 4. Meet the user's requested limit (if any) on the size of the image. | |
15012 | + * The limit is in MB, so pages/256 (assuming 4K pages). | |
15013 | + * | |
15014 | + */ | |
15015 | + | |
2380c486 JR |
15016 | +#include <linux/highmem.h> |
15017 | +#include <linux/freezer.h> | |
15018 | +#include <linux/hardirq.h> | |
15019 | +#include <linux/mmzone.h> | |
15020 | +#include <linux/console.h> | |
15021 | + | |
15022 | +#include "tuxonice_pageflags.h" | |
15023 | +#include "tuxonice_modules.h" | |
15024 | +#include "tuxonice_io.h" | |
15025 | +#include "tuxonice_ui.h" | |
15026 | +#include "tuxonice_extent.h" | |
15027 | +#include "tuxonice_prepare_image.h" | |
2380c486 JR |
15028 | +#include "tuxonice.h" |
15029 | +#include "tuxonice_checksum.h" | |
15030 | +#include "tuxonice_sysfs.h" | |
15031 | +#include "tuxonice_alloc.h" | |
15032 | +#include "tuxonice_atomic_copy.h" | |
15033 | + | |
0ada99ac | 15034 | +static long num_nosave, main_storage_allocated, storage_available, |
15035 | + header_storage_needed; | |
2380c486 JR |
15036 | +long extra_pd1_pages_allowance = CONFIG_TOI_DEFAULT_EXTRA_PAGES_ALLOWANCE; |
15037 | +int image_size_limit; | |
15038 | +static int no_ps2_needed; | |
15039 | + | |
15040 | +struct attention_list { | |
15041 | + struct task_struct *task; | |
15042 | + struct attention_list *next; | |
15043 | +}; | |
15044 | + | |
15045 | +static struct attention_list *attention_list; | |
15046 | + | |
15047 | +#define PAGESET1 0 | |
15048 | +#define PAGESET2 1 | |
15049 | + | |
15050 | +void free_attention_list(void) | |
15051 | +{ | |
15052 | + struct attention_list *last = NULL; | |
15053 | + | |
15054 | + while (attention_list) { | |
15055 | + last = attention_list; | |
15056 | + attention_list = attention_list->next; | |
9474138d | 15057 | + toi_kfree(6, last, sizeof(*last)); |
2380c486 JR |
15058 | + } |
15059 | +} | |
15060 | + | |
15061 | +static int build_attention_list(void) | |
15062 | +{ | |
15063 | + int i, task_count = 0; | |
15064 | + struct task_struct *p; | |
15065 | + struct attention_list *next; | |
15066 | + | |
15067 | + /* | |
15068 | + * Count all userspace process (with task->mm) marked PF_NOFREEZE. | |
15069 | + */ | |
15070 | + read_lock(&tasklist_lock); | |
15071 | + for_each_process(p) | |
15072 | + if ((p->flags & PF_NOFREEZE) || p == current) | |
15073 | + task_count++; | |
15074 | + read_unlock(&tasklist_lock); | |
15075 | + | |
15076 | + /* | |
15077 | + * Allocate attention list structs. | |
15078 | + */ | |
15079 | + for (i = 0; i < task_count; i++) { | |
15080 | + struct attention_list *this = | |
15081 | + toi_kzalloc(6, sizeof(struct attention_list), | |
15082 | + TOI_WAIT_GFP); | |
15083 | + if (!this) { | |
15084 | + printk(KERN_INFO "Failed to allocate slab for " | |
15085 | + "attention list.\n"); | |
15086 | + free_attention_list(); | |
15087 | + return 1; | |
15088 | + } | |
15089 | + this->next = NULL; | |
15090 | + if (attention_list) | |
15091 | + this->next = attention_list; | |
15092 | + attention_list = this; | |
15093 | + } | |
15094 | + | |
15095 | + next = attention_list; | |
15096 | + read_lock(&tasklist_lock); | |
15097 | + for_each_process(p) | |
15098 | + if ((p->flags & PF_NOFREEZE) || p == current) { | |
15099 | + next->task = p; | |
15100 | + next = next->next; | |
15101 | + } | |
15102 | + read_unlock(&tasklist_lock); | |
15103 | + return 0; | |
15104 | +} | |
15105 | + | |
15106 | +static void pageset2_full(void) | |
15107 | +{ | |
15108 | + struct zone *zone; | |
15109 | + struct page *page; | |
15110 | + unsigned long flags; | |
15111 | + int i; | |
15112 | + | |
15113 | + for_each_zone(zone) { | |
15114 | + spin_lock_irqsave(&zone->lru_lock, flags); | |
15115 | + for_each_lru(i) { | |
15116 | + if (!zone_page_state(zone, NR_LRU_BASE + i)) | |
15117 | + continue; | |
15118 | + | |
e999739a | 15119 | + list_for_each_entry(page, &zone->lru[i].list, lru) { |
15120 | + struct address_space *mapping; | |
15121 | + | |
15122 | + mapping = page_mapping(page); | |
15123 | + if (!mapping || !mapping->host || | |
15124 | + !(mapping->host->i_flags & S_ATOMIC_COPY)) | |
15125 | + SetPagePageset2(page); | |
15126 | + } | |
2380c486 JR |
15127 | + } |
15128 | + spin_unlock_irqrestore(&zone->lru_lock, flags); | |
15129 | + } | |
15130 | +} | |
15131 | + | |
15132 | +/* | |
15133 | + * toi_mark_task_as_pageset | |
15134 | + * Functionality : Marks all the saveable pages belonging to a given process | |
15135 | + * as belonging to a particular pageset. | |
15136 | + */ | |
15137 | + | |
15138 | +static void toi_mark_task_as_pageset(struct task_struct *t, int pageset2) | |
15139 | +{ | |
15140 | + struct vm_area_struct *vma; | |
15141 | + struct mm_struct *mm; | |
15142 | + | |
15143 | + mm = t->active_mm; | |
15144 | + | |
15145 | + if (!mm || !mm->mmap) | |
15146 | + return; | |
15147 | + | |
15148 | + if (!irqs_disabled()) | |
15149 | + down_read(&mm->mmap_sem); | |
15150 | + | |
15151 | + for (vma = mm->mmap; vma; vma = vma->vm_next) { | |
15152 | + unsigned long posn; | |
15153 | + | |
e999739a | 15154 | + if (!vma->vm_start || vma->vm_flags & VM_SPECIAL) |
2380c486 JR |
15155 | + continue; |
15156 | + | |
15157 | + for (posn = vma->vm_start; posn < vma->vm_end; | |
15158 | + posn += PAGE_SIZE) { | |
15159 | + struct page *page = follow_page(vma, posn, 0); | |
e999739a | 15160 | + struct address_space *mapping; |
15161 | + | |
15162 | + if (!page || !pfn_valid(page_to_pfn(page))) | |
15163 | + continue; | |
15164 | + | |
15165 | + mapping = page_mapping(page); | |
15166 | + if (mapping && mapping->host && | |
15167 | + mapping->host->i_flags & S_ATOMIC_COPY) | |
2380c486 JR |
15168 | + continue; |
15169 | + | |
15170 | + if (pageset2) | |
15171 | + SetPagePageset2(page); | |
15172 | + else { | |
15173 | + ClearPagePageset2(page); | |
15174 | + SetPagePageset1(page); | |
15175 | + } | |
15176 | + } | |
15177 | + } | |
15178 | + | |
15179 | + if (!irqs_disabled()) | |
15180 | + up_read(&mm->mmap_sem); | |
15181 | +} | |
15182 | + | |
e999739a | 15183 | +static void mark_tasks(int pageset) |
15184 | +{ | |
15185 | + struct task_struct *p; | |
15186 | + | |
15187 | + read_lock(&tasklist_lock); | |
15188 | + for_each_process(p) { | |
15189 | + if (!p->mm) | |
15190 | + continue; | |
15191 | + | |
15192 | + if (p->flags & PF_KTHREAD) | |
15193 | + continue; | |
15194 | + | |
15195 | + toi_mark_task_as_pageset(p, pageset); | |
15196 | + } | |
15197 | + read_unlock(&tasklist_lock); | |
15198 | + | |
15199 | +} | |
15200 | + | |
2380c486 JR |
15201 | +/* mark_pages_for_pageset2 |
15202 | + * | |
15203 | + * Description: Mark unshared pages in processes not needed for hibernate as | |
15204 | + * being able to be written out in a separate pagedir. | |
15205 | + * HighMem pages are simply marked as pageset2. They won't be | |
15206 | + * needed during hibernate. | |
15207 | + */ | |
15208 | + | |
15209 | +static void toi_mark_pages_for_pageset2(void) | |
15210 | +{ | |
2380c486 JR |
15211 | + struct attention_list *this = attention_list; |
15212 | + | |
15213 | + memory_bm_clear(pageset2_map); | |
15214 | + | |
15215 | + if (test_action_state(TOI_NO_PAGESET2) || no_ps2_needed) | |
15216 | + return; | |
15217 | + | |
15218 | + if (test_action_state(TOI_PAGESET2_FULL)) | |
15219 | + pageset2_full(); | |
e999739a | 15220 | + else |
15221 | + mark_tasks(PAGESET2); | |
2380c486 JR |
15222 | + |
15223 | + /* | |
15224 | + * Because the tasks in attention_list are ones related to hibernating, | |
15225 | + * we know that they won't go away under us. | |
15226 | + */ | |
15227 | + | |
15228 | + while (this) { | |
15229 | + if (!test_result_state(TOI_ABORTED)) | |
15230 | + toi_mark_task_as_pageset(this->task, PAGESET1); | |
15231 | + this = this->next; | |
15232 | + } | |
15233 | +} | |
15234 | + | |
15235 | +/* | |
15236 | + * The atomic copy of pageset1 is stored in pageset2 pages. | |
15237 | + * But if pageset1 is larger (normally only just after boot), | |
15238 | + * we need to allocate extra pages to store the atomic copy. | |
15239 | + * The following data struct and functions are used to handle | |
15240 | + * the allocation and freeing of that memory. | |
15241 | + */ | |
15242 | + | |
15243 | +static long extra_pages_allocated; | |
15244 | + | |
15245 | +struct extras { | |
15246 | + struct page *page; | |
15247 | + int order; | |
15248 | + struct extras *next; | |
15249 | +}; | |
15250 | + | |
15251 | +static struct extras *extras_list; | |
15252 | + | |
15253 | +/* toi_free_extra_pagedir_memory | |
15254 | + * | |
15255 | + * Description: Free previously allocated extra pagedir memory. | |
15256 | + */ | |
15257 | +void toi_free_extra_pagedir_memory(void) | |
15258 | +{ | |
15259 | + /* Free allocated pages */ | |
15260 | + while (extras_list) { | |
15261 | + struct extras *this = extras_list; | |
15262 | + int i; | |
15263 | + | |
15264 | + extras_list = this->next; | |
15265 | + | |
15266 | + for (i = 0; i < (1 << this->order); i++) | |
15267 | + ClearPageNosave(this->page + i); | |
15268 | + | |
15269 | + toi_free_pages(9, this->page, this->order); | |
9474138d | 15270 | + toi_kfree(7, this, sizeof(*this)); |
2380c486 JR |
15271 | + } |
15272 | + | |
15273 | + extra_pages_allocated = 0; | |
15274 | +} | |
15275 | + | |
15276 | +/* toi_allocate_extra_pagedir_memory | |
15277 | + * | |
15278 | + * Description: Allocate memory for making the atomic copy of pagedir1 in the | |
15279 | + * case where it is bigger than pagedir2. | |
15280 | + * Arguments: int num_to_alloc: Number of extra pages needed. | |
15281 | + * Result: int. Number of extra pages we now have allocated. | |
15282 | + */ | |
15283 | +static int toi_allocate_extra_pagedir_memory(int extra_pages_needed) | |
15284 | +{ | |
15285 | + int j, order, num_to_alloc = extra_pages_needed - extra_pages_allocated; | |
15286 | + gfp_t flags = TOI_ATOMIC_GFP; | |
15287 | + | |
15288 | + if (num_to_alloc < 1) | |
15289 | + return 0; | |
15290 | + | |
15291 | + order = fls(num_to_alloc); | |
15292 | + if (order >= MAX_ORDER) | |
15293 | + order = MAX_ORDER - 1; | |
15294 | + | |
15295 | + while (num_to_alloc) { | |
15296 | + struct page *newpage; | |
15297 | + unsigned long virt; | |
15298 | + struct extras *extras_entry; | |
15299 | + | |
15300 | + while ((1 << order) > num_to_alloc) | |
15301 | + order--; | |
15302 | + | |
15303 | + extras_entry = (struct extras *) toi_kzalloc(7, | |
15304 | + sizeof(struct extras), TOI_ATOMIC_GFP); | |
15305 | + | |
15306 | + if (!extras_entry) | |
15307 | + return extra_pages_allocated; | |
15308 | + | |
15309 | + virt = toi_get_free_pages(9, flags, order); | |
15310 | + while (!virt && order) { | |
15311 | + order--; | |
15312 | + virt = toi_get_free_pages(9, flags, order); | |
15313 | + } | |
15314 | + | |
15315 | + if (!virt) { | |
9474138d | 15316 | + toi_kfree(7, extras_entry, sizeof(*extras_entry)); |
2380c486 JR |
15317 | + return extra_pages_allocated; |
15318 | + } | |
15319 | + | |
15320 | + newpage = virt_to_page(virt); | |
15321 | + | |
15322 | + extras_entry->page = newpage; | |
15323 | + extras_entry->order = order; | |
15324 | + extras_entry->next = NULL; | |
15325 | + | |
15326 | + if (extras_list) | |
15327 | + extras_entry->next = extras_list; | |
15328 | + | |
15329 | + extras_list = extras_entry; | |
15330 | + | |
15331 | + for (j = 0; j < (1 << order); j++) { | |
15332 | + SetPageNosave(newpage + j); | |
15333 | + SetPagePageset1Copy(newpage + j); | |
15334 | + } | |
15335 | + | |
15336 | + extra_pages_allocated += (1 << order); | |
15337 | + num_to_alloc -= (1 << order); | |
15338 | + } | |
15339 | + | |
15340 | + return extra_pages_allocated; | |
15341 | +} | |
15342 | + | |
15343 | +/* | |
15344 | + * real_nr_free_pages: Count pcp pages for a zone type or all zones | |
15345 | + * (-1 for all, otherwise zone_idx() result desired). | |
15346 | + */ | |
15347 | +long real_nr_free_pages(unsigned long zone_idx_mask) | |
15348 | +{ | |
15349 | + struct zone *zone; | |
15350 | + int result = 0, cpu; | |
15351 | + | |
15352 | + /* PCP lists */ | |
9474138d | 15353 | + for_each_populated_zone(zone) { |
2380c486 JR |
15354 | + if (!(zone_idx_mask & (1 << zone_idx(zone)))) |
15355 | + continue; | |
15356 | + | |
15357 | + for_each_online_cpu(cpu) { | |
15358 | + struct per_cpu_pageset *pset = zone_pcp(zone, cpu); | |
15359 | + struct per_cpu_pages *pcp = &pset->pcp; | |
15360 | + result += pcp->count; | |
15361 | + } | |
15362 | + | |
15363 | + result += zone_page_state(zone, NR_FREE_PAGES); | |
15364 | + } | |
15365 | + return result; | |
15366 | +} | |
15367 | +EXPORT_SYMBOL_GPL(real_nr_free_pages); | |
15368 | + | |
15369 | +/* | |
15370 | + * Discover how much extra memory will be required by the drivers | |
15371 | + * when they're asked to hibernate. We can then ensure that amount | |
15372 | + * of memory is available when we really want it. | |
15373 | + */ | |
15374 | +static void get_extra_pd1_allowance(void) | |
15375 | +{ | |
15376 | + long orig_num_free = real_nr_free_pages(all_zones_mask), final; | |
15377 | + | |
15378 | + toi_prepare_status(CLEAR_BAR, "Finding allowance for drivers."); | |
15379 | + | |
15380 | + if (!toi_go_atomic(PMSG_FREEZE, 1)) { | |
15381 | + final = real_nr_free_pages(all_zones_mask); | |
15382 | + toi_end_atomic(ATOMIC_ALL_STEPS, 1, 0); | |
15383 | + | |
15384 | + extra_pd1_pages_allowance = max( | |
15385 | + orig_num_free - final + MIN_EXTRA_PAGES_ALLOWANCE, | |
15386 | + (long) MIN_EXTRA_PAGES_ALLOWANCE); | |
15387 | + } | |
15388 | +} | |
15389 | + | |
15390 | +/* | |
15391 | + * Amount of storage needed, possibly taking into account the | |
15392 | + * expected compression ratio and possibly also ignoring our | |
15393 | + * allowance for extra pages. | |
15394 | + */ | |
15395 | +static long main_storage_needed(int use_ecr, | |
15396 | + int ignore_extra_pd1_allow) | |
15397 | +{ | |
15398 | + return (pagedir1.size + pagedir2.size + | |
15399 | + (ignore_extra_pd1_allow ? 0 : extra_pd1_pages_allowance)) * | |
15400 | + (use_ecr ? toi_expected_compression_ratio() : 100) / 100; | |
15401 | +} | |
15402 | + | |
15403 | +/* | |
15404 | + * Storage needed for the image header, in bytes until the return. | |
15405 | + */ | |
0ada99ac | 15406 | +long get_header_storage_needed(void) |
2380c486 JR |
15407 | +{ |
15408 | + long bytes = (int) sizeof(struct toi_header) + | |
15409 | + toi_header_storage_for_modules() + | |
15410 | + toi_pageflags_space_needed(); | |
15411 | + | |
15412 | + return DIV_ROUND_UP(bytes, PAGE_SIZE); | |
15413 | +} | |
9474138d | 15414 | +EXPORT_SYMBOL_GPL(get_header_storage_needed); |
2380c486 JR |
15415 | + |
15416 | +/* | |
15417 | + * When freeing memory, pages from either pageset might be freed. | |
15418 | + * | |
15419 | + * When seeking to free memory to be able to hibernate, for every ps1 page | |
15420 | + * freed, we need 2 less pages for the atomic copy because there is one less | |
15421 | + * page to copy and one more page into which data can be copied. | |
15422 | + * | |
15423 | + * Freeing ps2 pages saves us nothing directly. No more memory is available | |
15424 | + * for the atomic copy. Indirectly, a ps1 page might be freed (slab?), but | |
15425 | + * that's too much work to figure out. | |
15426 | + * | |
15427 | + * => ps1_to_free functions | |
15428 | + * | |
15429 | + * Of course if we just want to reduce the image size, because of storage | |
15430 | + * limitations or an image size limit either ps will do. | |
15431 | + * | |
15432 | + * => any_to_free function | |
15433 | + */ | |
15434 | + | |
15435 | +static long highpages_ps1_to_free(void) | |
15436 | +{ | |
15437 | + return max_t(long, 0, DIV_ROUND_UP(get_highmem_size(pagedir1) - | |
15438 | + get_highmem_size(pagedir2), 2) - real_nr_free_high_pages()); | |
15439 | +} | |
15440 | + | |
15441 | +static long lowpages_ps1_to_free(void) | |
15442 | +{ | |
15443 | + return max_t(long, 0, DIV_ROUND_UP(get_lowmem_size(pagedir1) + | |
15444 | + extra_pd1_pages_allowance + MIN_FREE_RAM + | |
15445 | + toi_memory_for_modules(0) - get_lowmem_size(pagedir2) - | |
15446 | + real_nr_free_low_pages() - extra_pages_allocated, 2)); | |
15447 | +} | |
15448 | + | |
15449 | +static long current_image_size(void) | |
15450 | +{ | |
0ada99ac | 15451 | + return pagedir1.size + pagedir2.size + header_storage_needed; |
2380c486 JR |
15452 | +} |
15453 | + | |
15454 | +static long storage_still_required(void) | |
15455 | +{ | |
15456 | + return max_t(long, 0, main_storage_needed(1, 1) - storage_available); | |
15457 | +} | |
15458 | + | |
15459 | +static long ram_still_required(void) | |
15460 | +{ | |
15461 | + return max_t(long, 0, MIN_FREE_RAM + toi_memory_for_modules(0) - | |
15462 | + real_nr_free_low_pages() + 2 * extra_pd1_pages_allowance); | |
15463 | +} | |
15464 | + | |
15465 | +static long any_to_free(int use_image_size_limit) | |
15466 | +{ | |
15467 | + long user_limit = (use_image_size_limit && image_size_limit > 0) ? | |
15468 | + max_t(long, 0, current_image_size() - | |
15469 | + (image_size_limit << 8)) : 0, | |
15470 | + storage_limit = storage_still_required(), | |
15471 | + ram_limit = ram_still_required(), | |
15472 | + first_max = max(user_limit, storage_limit); | |
15473 | + | |
15474 | + return max(first_max, ram_limit); | |
15475 | +} | |
15476 | + | |
15477 | +static int need_pageset2(void) | |
15478 | +{ | |
15479 | + return (real_nr_free_low_pages() + extra_pages_allocated - | |
15480 | + 2 * extra_pd1_pages_allowance - MIN_FREE_RAM - | |
15481 | + toi_memory_for_modules(0) - pagedir1.size) < pagedir2.size; | |
15482 | +} | |
15483 | + | |
15484 | +/* amount_needed | |
15485 | + * | |
15486 | + * Calculates the amount by which the image size needs to be reduced to meet | |
15487 | + * our constraints. | |
15488 | + */ | |
15489 | +static long amount_needed(int use_image_size_limit) | |
15490 | +{ | |
15491 | + return max(highpages_ps1_to_free() + lowpages_ps1_to_free(), | |
15492 | + any_to_free(use_image_size_limit)); | |
15493 | +} | |
15494 | + | |
15495 | +static long image_not_ready(int use_image_size_limit) | |
15496 | +{ | |
15497 | + toi_message(TOI_EAT_MEMORY, TOI_LOW, 1, | |
0ada99ac | 15498 | + "Amount still needed (%ld) > 0:%d," |
2380c486 JR |
15499 | + " Storage allocd: %ld < %ld: %d.\n", |
15500 | + amount_needed(use_image_size_limit), | |
15501 | + (amount_needed(use_image_size_limit) > 0), | |
2380c486 JR |
15502 | + main_storage_allocated, |
15503 | + main_storage_needed(1, 1), | |
15504 | + main_storage_allocated < main_storage_needed(1, 1)); | |
15505 | + | |
15506 | + toi_cond_pause(0, NULL); | |
15507 | + | |
15508 | + return (amount_needed(use_image_size_limit) > 0) || | |
2380c486 JR |
15509 | + main_storage_allocated < main_storage_needed(1, 1); |
15510 | +} | |
15511 | + | |
15512 | +static void display_failure_reason(int tries_exceeded) | |
15513 | +{ | |
15514 | + long storage_required = storage_still_required(), | |
15515 | + ram_required = ram_still_required(), | |
15516 | + high_ps1 = highpages_ps1_to_free(), | |
15517 | + low_ps1 = lowpages_ps1_to_free(); | |
15518 | + | |
15519 | + printk(KERN_INFO "Failed to prepare the image because...\n"); | |
15520 | + | |
15521 | + if (!storage_available) { | |
15522 | + printk(KERN_INFO "- You need some storage available to be " | |
15523 | + "able to hibernate.\n"); | |
15524 | + return; | |
15525 | + } | |
15526 | + | |
15527 | + if (tries_exceeded) | |
15528 | + printk(KERN_INFO "- The maximum number of iterations was " | |
15529 | + "reached without successfully preparing the " | |
15530 | + "image.\n"); | |
15531 | + | |
2380c486 JR |
15532 | + if (storage_required) { |
15533 | + printk(KERN_INFO " - We need at least %ld pages of storage " | |
15534 | + "(ignoring the header), but only have %ld.\n", | |
15535 | + main_storage_needed(1, 1), | |
15536 | + main_storage_allocated); | |
15537 | + set_abort_result(TOI_INSUFFICIENT_STORAGE); | |
15538 | + } | |
15539 | + | |
15540 | + if (ram_required) { | |
15541 | + printk(KERN_INFO " - We need %ld more free pages of low " | |
15542 | + "memory.\n", ram_required); | |
15543 | + printk(KERN_INFO " Minimum free : %8d\n", MIN_FREE_RAM); | |
15544 | + printk(KERN_INFO " + Reqd. by modules : %8ld\n", | |
15545 | + toi_memory_for_modules(0)); | |
15546 | + printk(KERN_INFO " + 2 * extra allow : %8ld\n", | |
15547 | + 2 * extra_pd1_pages_allowance); | |
15548 | + printk(KERN_INFO " - Currently free : %8ld\n", | |
15549 | + real_nr_free_low_pages()); | |
15550 | + printk(KERN_INFO " : ========\n"); | |
15551 | + printk(KERN_INFO " Still needed : %8ld\n", | |
15552 | + ram_required); | |
15553 | + | |
15554 | + /* Print breakdown of memory needed for modules */ | |
15555 | + toi_memory_for_modules(1); | |
15556 | + set_abort_result(TOI_UNABLE_TO_FREE_ENOUGH_MEMORY); | |
15557 | + } | |
15558 | + | |
15559 | + if (high_ps1) { | |
15560 | + printk(KERN_INFO "- We need to free %ld highmem pageset 1 " | |
15561 | + "pages.\n", high_ps1); | |
15562 | + set_abort_result(TOI_UNABLE_TO_FREE_ENOUGH_MEMORY); | |
15563 | + } | |
15564 | + | |
15565 | + if (low_ps1) { | |
15566 | + printk(KERN_INFO " - We need to free %ld lowmem pageset 1 " | |
15567 | + "pages.\n", low_ps1); | |
15568 | + set_abort_result(TOI_UNABLE_TO_FREE_ENOUGH_MEMORY); | |
15569 | + } | |
15570 | +} | |
15571 | + | |
15572 | +static void display_stats(int always, int sub_extra_pd1_allow) | |
15573 | +{ | |
15574 | + char buffer[255]; | |
15575 | + snprintf(buffer, 254, | |
0ada99ac | 15576 | + "Free:%ld(%ld). Sets:%ld(%ld),%ld(%ld). " |
2380c486 JR |
15577 | + "Nosave:%ld-%ld=%ld. Storage:%lu/%lu(%lu=>%lu). " |
15578 | + "Needed:%ld,%ld,%ld(%d,%ld,%ld,%ld) (PS2:%s)\n", | |
15579 | + | |
15580 | + /* Free */ | |
15581 | + real_nr_free_pages(all_zones_mask), | |
15582 | + real_nr_free_low_pages(), | |
15583 | + | |
15584 | + /* Sets */ | |
15585 | + pagedir1.size, pagedir1.size - get_highmem_size(pagedir1), | |
15586 | + pagedir2.size, pagedir2.size - get_highmem_size(pagedir2), | |
15587 | + | |
2380c486 JR |
15588 | + /* Nosave */ |
15589 | + num_nosave, extra_pages_allocated, | |
15590 | + num_nosave - extra_pages_allocated, | |
15591 | + | |
15592 | + /* Storage */ | |
15593 | + main_storage_allocated, | |
15594 | + storage_available, | |
15595 | + main_storage_needed(1, sub_extra_pd1_allow), | |
15596 | + main_storage_needed(1, 1), | |
15597 | + | |
15598 | + /* Needed */ | |
15599 | + lowpages_ps1_to_free(), highpages_ps1_to_free(), | |
15600 | + any_to_free(1), | |
15601 | + MIN_FREE_RAM, toi_memory_for_modules(0), | |
15602 | + extra_pd1_pages_allowance, ((long) image_size_limit) << 8, | |
15603 | + | |
15604 | + need_pageset2() ? "yes" : "no"); | |
15605 | + | |
15606 | + if (always) | |
15607 | + printk("%s", buffer); | |
15608 | + else | |
15609 | + toi_message(TOI_EAT_MEMORY, TOI_MEDIUM, 1, buffer); | |
15610 | +} | |
15611 | + | |
15612 | +/* generate_free_page_map | |
15613 | + * | |
15614 | + * Description: This routine generates a bitmap of free pages from the | |
15615 | + * lists used by the memory manager. We then use the bitmap | |
15616 | + * to quickly calculate which pages to save and in which | |
15617 | + * pagesets. | |
15618 | + */ | |
15619 | +static void generate_free_page_map(void) | |
15620 | +{ | |
15621 | + int order, pfn, cpu, t; | |
15622 | + unsigned long flags, i; | |
15623 | + struct zone *zone; | |
15624 | + struct list_head *curr; | |
15625 | + | |
9474138d | 15626 | + for_each_populated_zone(zone) { |
2380c486 JR |
15627 | + spin_lock_irqsave(&zone->lock, flags); |
15628 | + | |
15629 | + for (i = 0; i < zone->spanned_pages; i++) | |
15630 | + ClearPageNosaveFree(pfn_to_page( | |
15631 | + ZONE_START(zone) + i)); | |
15632 | + | |
15633 | + for_each_migratetype_order(order, t) { | |
15634 | + list_for_each(curr, | |
15635 | + &zone->free_area[order].free_list[t]) { | |
15636 | + unsigned long j; | |
15637 | + | |
15638 | + pfn = page_to_pfn(list_entry(curr, struct page, | |
15639 | + lru)); | |
15640 | + for (j = 0; j < (1UL << order); j++) | |
15641 | + SetPageNosaveFree(pfn_to_page(pfn + j)); | |
15642 | + } | |
15643 | + } | |
15644 | + | |
15645 | + for_each_online_cpu(cpu) { | |
15646 | + struct per_cpu_pageset *pset = zone_pcp(zone, cpu); | |
15647 | + struct per_cpu_pages *pcp = &pset->pcp; | |
15648 | + struct page *page; | |
15649 | + | |
15650 | + list_for_each_entry(page, &pcp->list, lru) | |
15651 | + SetPageNosaveFree(page); | |
15652 | + } | |
15653 | + | |
15654 | + spin_unlock_irqrestore(&zone->lock, flags); | |
15655 | + } | |
15656 | +} | |
15657 | + | |
15658 | +/* size_of_free_region | |
15659 | + * | |
15660 | + * Description: Return the number of pages that are free, beginning with and | |
15661 | + * including this one. | |
15662 | + */ | |
15663 | +static int size_of_free_region(struct zone *zone, unsigned long start_pfn) | |
15664 | +{ | |
15665 | + unsigned long this_pfn = start_pfn, | |
15666 | + end_pfn = ZONE_START(zone) + zone->spanned_pages - 1; | |
15667 | + | |
15668 | + while (this_pfn <= end_pfn && PageNosaveFree(pfn_to_page(this_pfn))) | |
15669 | + this_pfn++; | |
15670 | + | |
15671 | + return this_pfn - start_pfn; | |
15672 | +} | |
15673 | + | |
15674 | +/* flag_image_pages | |
15675 | + * | |
15676 | + * This routine generates our lists of pages to be stored in each | |
15677 | + * pageset. Since we store the data using extents, and adding new | |
15678 | + * extents might allocate a new extent page, this routine may well | |
15679 | + * be called more than once. | |
15680 | + */ | |
15681 | +static void flag_image_pages(int atomic_copy) | |
15682 | +{ | |
15683 | + int num_free = 0; | |
15684 | + unsigned long loop; | |
15685 | + struct zone *zone; | |
15686 | + | |
15687 | + pagedir1.size = 0; | |
15688 | + pagedir2.size = 0; | |
15689 | + | |
15690 | + set_highmem_size(pagedir1, 0); | |
15691 | + set_highmem_size(pagedir2, 0); | |
15692 | + | |
15693 | + num_nosave = 0; | |
15694 | + | |
15695 | + memory_bm_clear(pageset1_map); | |
15696 | + | |
15697 | + generate_free_page_map(); | |
15698 | + | |
15699 | + /* | |
15700 | + * Pages not to be saved are marked Nosave irrespective of being | |
15701 | + * reserved. | |
15702 | + */ | |
9474138d | 15703 | + for_each_populated_zone(zone) { |
2380c486 JR |
15704 | + int highmem = is_highmem(zone); |
15705 | + | |
2380c486 JR |
15706 | + for (loop = 0; loop < zone->spanned_pages; loop++) { |
15707 | + unsigned long pfn = ZONE_START(zone) + loop; | |
15708 | + struct page *page; | |
15709 | + int chunk_size; | |
15710 | + | |
15711 | + if (!pfn_valid(pfn)) | |
15712 | + continue; | |
15713 | + | |
15714 | + chunk_size = size_of_free_region(zone, pfn); | |
15715 | + if (chunk_size) { | |
15716 | + num_free += chunk_size; | |
15717 | + loop += chunk_size - 1; | |
15718 | + continue; | |
15719 | + } | |
15720 | + | |
15721 | + page = pfn_to_page(pfn); | |
15722 | + | |
15723 | + if (PageNosave(page)) { | |
15724 | + num_nosave++; | |
15725 | + continue; | |
15726 | + } | |
15727 | + | |
15728 | + page = highmem ? saveable_highmem_page(zone, pfn) : | |
15729 | + saveable_page(zone, pfn); | |
15730 | + | |
15731 | + if (!page) { | |
15732 | + num_nosave++; | |
15733 | + continue; | |
15734 | + } | |
15735 | + | |
15736 | + if (PagePageset2(page)) { | |
15737 | + pagedir2.size++; | |
15738 | + if (PageHighMem(page)) | |
15739 | + inc_highmem_size(pagedir2); | |
15740 | + else | |
15741 | + SetPagePageset1Copy(page); | |
15742 | + if (PageResave(page)) { | |
15743 | + SetPagePageset1(page); | |
15744 | + ClearPagePageset1Copy(page); | |
15745 | + pagedir1.size++; | |
15746 | + if (PageHighMem(page)) | |
15747 | + inc_highmem_size(pagedir1); | |
15748 | + } | |
15749 | + } else { | |
15750 | + pagedir1.size++; | |
15751 | + SetPagePageset1(page); | |
15752 | + if (PageHighMem(page)) | |
15753 | + inc_highmem_size(pagedir1); | |
15754 | + } | |
15755 | + } | |
15756 | + } | |
15757 | + | |
15758 | + if (!atomic_copy) | |
15759 | + toi_message(TOI_EAT_MEMORY, TOI_MEDIUM, 0, | |
15760 | + "Count data pages: Set1 (%d) + Set2 (%d) + Nosave (%ld)" | |
15761 | + " + NumFree (%d) = %d.\n", | |
15762 | + pagedir1.size, pagedir2.size, num_nosave, num_free, | |
15763 | + pagedir1.size + pagedir2.size + num_nosave + num_free); | |
15764 | +} | |
15765 | + | |
15766 | +void toi_recalculate_image_contents(int atomic_copy) | |
15767 | +{ | |
15768 | + memory_bm_clear(pageset1_map); | |
15769 | + if (!atomic_copy) { | |
15770 | + unsigned long pfn; | |
15771 | + memory_bm_position_reset(pageset2_map); | |
15772 | + for (pfn = memory_bm_next_pfn(pageset2_map); | |
15773 | + pfn != BM_END_OF_MAP; | |
15774 | + pfn = memory_bm_next_pfn(pageset2_map)) | |
15775 | + ClearPagePageset1Copy(pfn_to_page(pfn)); | |
15776 | + /* Need to call this before getting pageset1_size! */ | |
15777 | + toi_mark_pages_for_pageset2(); | |
15778 | + } | |
15779 | + flag_image_pages(atomic_copy); | |
15780 | + | |
15781 | + if (!atomic_copy) { | |
15782 | + storage_available = toiActiveAllocator->storage_available(); | |
15783 | + display_stats(0, 0); | |
15784 | + } | |
15785 | +} | |
15786 | + | |
15787 | +/* update_image | |
15788 | + * | |
15789 | + * Allocate [more] memory and storage for the image. | |
15790 | + */ | |
15791 | +static void update_image(int ps2_recalc) | |
15792 | +{ | |
0ada99ac | 15793 | + int wanted, got, old_header_req; |
2380c486 JR |
15794 | + long seek; |
15795 | + | |
2380c486 JR |
15796 | + /* Include allowance for growth in pagedir1 while writing pagedir 2 */ |
15797 | + wanted = pagedir1.size + extra_pd1_pages_allowance - | |
15798 | + get_lowmem_size(pagedir2); | |
15799 | + if (wanted > extra_pages_allocated) { | |
15800 | + got = toi_allocate_extra_pagedir_memory(wanted); | |
15801 | + if (wanted < got) { | |
15802 | + toi_message(TOI_EAT_MEMORY, TOI_LOW, 1, | |
15803 | + "Want %d extra pages for pageset1, got %d.\n", | |
15804 | + wanted, got); | |
15805 | + return; | |
15806 | + } | |
15807 | + } | |
15808 | + | |
15809 | + if (ps2_recalc) | |
15810 | + goto recalc; | |
15811 | + | |
15812 | + thaw_kernel_threads(); | |
15813 | + | |
15814 | + /* | |
15815 | + * Allocate remaining storage space, if possible, up to the | |
15816 | + * maximum we know we'll need. It's okay to allocate the | |
15817 | + * maximum if the writer is the swapwriter, but | |
15818 | + * we don't want to grab all available space on an NFS share. | |
15819 | + * We therefore ignore the expected compression ratio here, | |
15820 | + * thereby trying to allocate the maximum image size we could | |
15821 | + * need (assuming compression doesn't expand the image), but | |
15822 | + * don't complain if we can't get the full amount we're after. | |
15823 | + */ | |
15824 | + | |
0ada99ac | 15825 | + do { |
15826 | + old_header_req = header_storage_needed; | |
15827 | + toiActiveAllocator->reserve_header_space(header_storage_needed); | |
2380c486 | 15828 | + |
0ada99ac | 15829 | + /* How much storage is free with the reservation applied? */ |
15830 | + storage_available = toiActiveAllocator->storage_available(); | |
15831 | + seek = min(storage_available, main_storage_needed(0, 0)); | |
2380c486 | 15832 | + |
0ada99ac | 15833 | + toiActiveAllocator->allocate_storage(seek); |
2380c486 | 15834 | + |
0ada99ac | 15835 | + main_storage_allocated = |
15836 | + toiActiveAllocator->storage_allocated(); | |
2380c486 | 15837 | + |
0ada99ac | 15838 | + /* Need more header because more storage allocated? */ |
15839 | + header_storage_needed = get_header_storage_needed(); | |
2380c486 | 15840 | + |
0ada99ac | 15841 | + } while (header_storage_needed > old_header_req); |
2380c486 JR |
15842 | + |
15843 | + if (freeze_processes()) | |
15844 | + set_abort_result(TOI_FREEZING_FAILED); | |
15845 | + | |
15846 | +recalc: | |
15847 | + toi_recalculate_image_contents(0); | |
15848 | +} | |
15849 | + | |
15850 | +/* attempt_to_freeze | |
15851 | + * | |
15852 | + * Try to freeze processes. | |
15853 | + */ | |
15854 | + | |
15855 | +static int attempt_to_freeze(void) | |
15856 | +{ | |
15857 | + int result; | |
15858 | + | |
15859 | + /* Stop processes before checking again */ | |
15860 | + thaw_processes(); | |
15861 | + toi_prepare_status(CLEAR_BAR, "Freezing processes & syncing " | |
15862 | + "filesystems."); | |
15863 | + result = freeze_processes(); | |
15864 | + | |
15865 | + if (result) | |
15866 | + set_abort_result(TOI_FREEZING_FAILED); | |
15867 | + | |
15868 | + return result; | |
15869 | +} | |
15870 | + | |
15871 | +/* eat_memory | |
15872 | + * | |
15873 | + * Try to free some memory, either to meet hard or soft constraints on the image | |
15874 | + * characteristics. | |
15875 | + * | |
15876 | + * Hard constraints: | |
15877 | + * - Pageset1 must be < half of memory; | |
15878 | + * - We must have enough memory free at resume time to have pageset1 | |
15879 | + * be able to be loaded in pages that don't conflict with where it has to | |
15880 | + * be restored. | |
15881 | + * Soft constraints | |
15882 | + * - User specificied image size limit. | |
15883 | + */ | |
15884 | +static void eat_memory(void) | |
15885 | +{ | |
15886 | + long amount_wanted = 0; | |
15887 | + int did_eat_memory = 0; | |
15888 | + | |
15889 | + /* | |
15890 | + * Note that if we have enough storage space and enough free memory, we | |
15891 | + * may exit without eating anything. We give up when the last 10 | |
15892 | + * iterations ate no extra pages because we're not going to get much | |
15893 | + * more anyway, but the few pages we get will take a lot of time. | |
15894 | + * | |
15895 | + * We freeze processes before beginning, and then unfreeze them if we | |
15896 | + * need to eat memory until we think we have enough. If our attempts | |
15897 | + * to freeze fail, we give up and abort. | |
15898 | + */ | |
15899 | + | |
2380c486 JR |
15900 | + amount_wanted = amount_needed(1); |
15901 | + | |
15902 | + switch (image_size_limit) { | |
15903 | + case -1: /* Don't eat any memory */ | |
15904 | + if (amount_wanted > 0) { | |
15905 | + set_abort_result(TOI_WOULD_EAT_MEMORY); | |
15906 | + return; | |
15907 | + } | |
15908 | + break; | |
15909 | + case -2: /* Free caches only */ | |
15910 | + drop_pagecache(); | |
15911 | + toi_recalculate_image_contents(0); | |
15912 | + amount_wanted = amount_needed(1); | |
2380c486 JR |
15913 | + break; |
15914 | + default: | |
15915 | + break; | |
15916 | + } | |
15917 | + | |
15918 | + if (amount_wanted > 0 && !test_result_state(TOI_ABORTED) && | |
15919 | + image_size_limit != -1) { | |
9474138d | 15920 | + long request = amount_wanted + 50; |
2380c486 JR |
15921 | + |
15922 | + toi_prepare_status(CLEAR_BAR, | |
15923 | + "Seeking to free %ldMB of memory.", | |
15924 | + MB(amount_wanted)); | |
15925 | + | |
15926 | + thaw_kernel_threads(); | |
15927 | + | |
15928 | + /* | |
15929 | + * Ask for too many because shrink_all_memory doesn't | |
15930 | + * currently return enough most of the time. | |
15931 | + */ | |
9474138d | 15932 | + shrink_all_memory(request); |
2380c486 JR |
15933 | + |
15934 | + did_eat_memory = 1; | |
15935 | + | |
15936 | + toi_recalculate_image_contents(0); | |
15937 | + | |
15938 | + amount_wanted = amount_needed(1); | |
15939 | + | |
15940 | + printk("Asked shrink_all_memory for %ld pages, got %ld.\n", | |
15941 | + request, request - amount_wanted); | |
15942 | + | |
15943 | + toi_cond_pause(0, NULL); | |
15944 | + | |
15945 | + if (freeze_processes()) | |
15946 | + set_abort_result(TOI_FREEZING_FAILED); | |
15947 | + } | |
15948 | + | |
15949 | + if (did_eat_memory) | |
15950 | + toi_recalculate_image_contents(0); | |
15951 | +} | |
15952 | + | |
15953 | +/* toi_prepare_image | |
15954 | + * | |
15955 | + * Entry point to the whole image preparation section. | |
15956 | + * | |
15957 | + * We do four things: | |
15958 | + * - Freeze processes; | |
15959 | + * - Ensure image size constraints are met; | |
15960 | + * - Complete all the preparation for saving the image, | |
15961 | + * including allocation of storage. The only memory | |
15962 | + * that should be needed when we're finished is that | |
15963 | + * for actually storing the image (and we know how | |
15964 | + * much is needed for that because the modules tell | |
15965 | + * us). | |
15966 | + * - Make sure that all dirty buffers are written out. | |
15967 | + */ | |
15968 | +#define MAX_TRIES 2 | |
15969 | +int toi_prepare_image(void) | |
15970 | +{ | |
15971 | + int result = 1, tries = 1; | |
15972 | + | |
2380c486 JR |
15973 | + main_storage_allocated = 0; |
15974 | + no_ps2_needed = 0; | |
15975 | + | |
15976 | + if (attempt_to_freeze()) | |
15977 | + return 1; | |
15978 | + | |
15979 | + if (!extra_pd1_pages_allowance) | |
15980 | + get_extra_pd1_allowance(); | |
15981 | + | |
15982 | + storage_available = toiActiveAllocator->storage_available(); | |
15983 | + | |
15984 | + if (!storage_available) { | |
15985 | + printk(KERN_INFO "No storage available. Didn't try to prepare " | |
15986 | + "an image.\n"); | |
15987 | + display_failure_reason(0); | |
15988 | + set_abort_result(TOI_NOSTORAGE_AVAILABLE); | |
15989 | + return 1; | |
15990 | + } | |
15991 | + | |
15992 | + if (build_attention_list()) { | |
15993 | + abort_hibernate(TOI_UNABLE_TO_PREPARE_IMAGE, | |
15994 | + "Unable to successfully prepare the image.\n"); | |
15995 | + return 1; | |
15996 | + } | |
15997 | + | |
9474138d AM |
15998 | + toi_recalculate_image_contents(0); |
15999 | + | |
2380c486 JR |
16000 | + do { |
16001 | + toi_prepare_status(CLEAR_BAR, | |
16002 | + "Preparing Image. Try %d.", tries); | |
16003 | + | |
16004 | + eat_memory(); | |
16005 | + | |
16006 | + if (test_result_state(TOI_ABORTED)) | |
16007 | + break; | |
16008 | + | |
16009 | + update_image(0); | |
16010 | + | |
16011 | + tries++; | |
16012 | + | |
16013 | + } while (image_not_ready(1) && tries <= MAX_TRIES && | |
16014 | + !test_result_state(TOI_ABORTED)); | |
16015 | + | |
16016 | + result = image_not_ready(0); | |
16017 | + | |
16018 | + if (!test_result_state(TOI_ABORTED)) { | |
16019 | + if (result) { | |
16020 | + display_stats(1, 0); | |
16021 | + display_failure_reason(tries > MAX_TRIES); | |
16022 | + abort_hibernate(TOI_UNABLE_TO_PREPARE_IMAGE, | |
16023 | + "Unable to successfully prepare the image.\n"); | |
16024 | + } else { | |
16025 | + /* Pageset 2 needed? */ | |
16026 | + if (!need_pageset2() && | |
16027 | + test_action_state(TOI_NO_PS2_IF_UNNEEDED)) { | |
16028 | + no_ps2_needed = 1; | |
9474138d | 16029 | + toi_recalculate_image_contents(0); |
2380c486 JR |
16030 | + update_image(1); |
16031 | + } | |
16032 | + | |
16033 | + toi_cond_pause(1, "Image preparation complete."); | |
16034 | + } | |
16035 | + } | |
16036 | + | |
16037 | + return result ? result : allocate_checksum_pages(); | |
16038 | +} | |
16039 | diff --git a/kernel/power/tuxonice_prepare_image.h b/kernel/power/tuxonice_prepare_image.h | |
16040 | new file mode 100644 | |
0ada99ac | 16041 | index 0000000..9a1de79 |
2380c486 JR |
16042 | --- /dev/null |
16043 | +++ b/kernel/power/tuxonice_prepare_image.h | |
0ada99ac | 16044 | @@ -0,0 +1,36 @@ |
2380c486 JR |
16045 | +/* |
16046 | + * kernel/power/tuxonice_prepare_image.h | |
16047 | + * | |
16048 | + * Copyright (C) 2003-2008 Nigel Cunningham (nigel at tuxonice net) | |
16049 | + * | |
16050 | + * This file is released under the GPLv2. | |
16051 | + * | |
16052 | + */ | |
16053 | + | |
16054 | +#include <asm/sections.h> | |
16055 | + | |
16056 | +extern int toi_prepare_image(void); | |
16057 | +extern void toi_recalculate_image_contents(int storage_available); | |
16058 | +extern long real_nr_free_pages(unsigned long zone_idx_mask); | |
16059 | +extern int image_size_limit; | |
16060 | +extern void toi_free_extra_pagedir_memory(void); | |
16061 | +extern long extra_pd1_pages_allowance; | |
16062 | +extern void free_attention_list(void); | |
16063 | + | |
16064 | +#define MIN_FREE_RAM 100 | |
16065 | +#define MIN_EXTRA_PAGES_ALLOWANCE 500 | |
16066 | + | |
16067 | +#define all_zones_mask ((unsigned long) ((1 << MAX_NR_ZONES) - 1)) | |
16068 | +#ifdef CONFIG_HIGHMEM | |
16069 | +#define real_nr_free_high_pages() (real_nr_free_pages(1 << ZONE_HIGHMEM)) | |
16070 | +#define real_nr_free_low_pages() (real_nr_free_pages(all_zones_mask - \ | |
16071 | + (1 << ZONE_HIGHMEM))) | |
16072 | +#else | |
16073 | +#define real_nr_free_high_pages() (0) | |
16074 | +#define real_nr_free_low_pages() (real_nr_free_pages(all_zones_mask)) | |
16075 | + | |
16076 | +/* For eat_memory function */ | |
16077 | +#define ZONE_HIGHMEM (MAX_NR_ZONES + 1) | |
16078 | +#endif | |
16079 | + | |
0ada99ac | 16080 | +long get_header_storage_needed(void); |
2380c486 JR |
16081 | diff --git a/kernel/power/tuxonice_storage.c b/kernel/power/tuxonice_storage.c |
16082 | new file mode 100644 | |
16083 | index 0000000..5dafc95 | |
16084 | --- /dev/null | |
16085 | +++ b/kernel/power/tuxonice_storage.c | |
16086 | @@ -0,0 +1,282 @@ | |
16087 | +/* | |
16088 | + * kernel/power/tuxonice_storage.c | |
16089 | + * | |
16090 | + * Copyright (C) 2005-2008 Nigel Cunningham (nigel at tuxonice net) | |
16091 | + * | |
16092 | + * This file is released under the GPLv2. | |
16093 | + * | |
16094 | + * Routines for talking to a userspace program that manages storage. | |
16095 | + * | |
16096 | + * The kernel side: | |
16097 | + * - starts the userspace program; | |
16098 | + * - sends messages telling it when to open and close the connection; | |
16099 | + * - tells it when to quit; | |
16100 | + * | |
16101 | + * The user space side: | |
16102 | + * - passes messages regarding status; | |
16103 | + * | |
16104 | + */ | |
16105 | + | |
16106 | +#include <linux/suspend.h> | |
16107 | +#include <linux/freezer.h> | |
16108 | + | |
16109 | +#include "tuxonice_sysfs.h" | |
16110 | +#include "tuxonice_modules.h" | |
16111 | +#include "tuxonice_netlink.h" | |
16112 | +#include "tuxonice_storage.h" | |
16113 | +#include "tuxonice_ui.h" | |
16114 | + | |
16115 | +static struct user_helper_data usm_helper_data; | |
16116 | +static struct toi_module_ops usm_ops; | |
16117 | +static int message_received, usm_prepare_count; | |
16118 | +static int storage_manager_last_action, storage_manager_action; | |
16119 | + | |
16120 | +static int usm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |
16121 | +{ | |
16122 | + int type; | |
16123 | + int *data; | |
16124 | + | |
16125 | + type = nlh->nlmsg_type; | |
16126 | + | |
16127 | + /* A control message: ignore them */ | |
16128 | + if (type < NETLINK_MSG_BASE) | |
16129 | + return 0; | |
16130 | + | |
16131 | + /* Unknown message: reply with EINVAL */ | |
16132 | + if (type >= USM_MSG_MAX) | |
16133 | + return -EINVAL; | |
16134 | + | |
16135 | + /* All operations require privileges, even GET */ | |
16136 | + if (security_netlink_recv(skb, CAP_NET_ADMIN)) | |
16137 | + return -EPERM; | |
16138 | + | |
16139 | + /* Only allow one task to receive NOFREEZE privileges */ | |
16140 | + if (type == NETLINK_MSG_NOFREEZE_ME && usm_helper_data.pid != -1) | |
16141 | + return -EBUSY; | |
16142 | + | |
16143 | + data = (int *) NLMSG_DATA(nlh); | |
16144 | + | |
16145 | + switch (type) { | |
16146 | + case USM_MSG_SUCCESS: | |
16147 | + case USM_MSG_FAILED: | |
16148 | + message_received = type; | |
16149 | + complete(&usm_helper_data.wait_for_process); | |
16150 | + break; | |
16151 | + default: | |
16152 | + printk(KERN_INFO "Storage manager doesn't recognise " | |
16153 | + "message %d.\n", type); | |
16154 | + } | |
16155 | + | |
16156 | + return 1; | |
16157 | +} | |
16158 | + | |
16159 | +#ifdef CONFIG_NET | |
16160 | +static int activations; | |
16161 | + | |
16162 | +int toi_activate_storage(int force) | |
16163 | +{ | |
16164 | + int tries = 1; | |
16165 | + | |
16166 | + if (usm_helper_data.pid == -1 || !usm_ops.enabled) | |
16167 | + return 0; | |
16168 | + | |
16169 | + message_received = 0; | |
16170 | + activations++; | |
16171 | + | |
16172 | + if (activations > 1 && !force) | |
16173 | + return 0; | |
16174 | + | |
16175 | + while ((!message_received || message_received == USM_MSG_FAILED) && | |
16176 | + tries < 2) { | |
16177 | + toi_prepare_status(DONT_CLEAR_BAR, "Activate storage attempt " | |
16178 | + "%d.\n", tries); | |
16179 | + | |
16180 | + init_completion(&usm_helper_data.wait_for_process); | |
16181 | + | |
16182 | + toi_send_netlink_message(&usm_helper_data, | |
16183 | + USM_MSG_CONNECT, | |
16184 | + NULL, 0); | |
16185 | + | |
16186 | + /* Wait 2 seconds for the userspace process to make contact */ | |
16187 | + wait_for_completion_timeout(&usm_helper_data.wait_for_process, | |
16188 | + 2*HZ); | |
16189 | + | |
16190 | + tries++; | |
16191 | + } | |
16192 | + | |
16193 | + return 0; | |
16194 | +} | |
16195 | + | |
16196 | +int toi_deactivate_storage(int force) | |
16197 | +{ | |
16198 | + if (usm_helper_data.pid == -1 || !usm_ops.enabled) | |
16199 | + return 0; | |
16200 | + | |
16201 | + message_received = 0; | |
16202 | + activations--; | |
16203 | + | |
16204 | + if (activations && !force) | |
16205 | + return 0; | |
16206 | + | |
16207 | + init_completion(&usm_helper_data.wait_for_process); | |
16208 | + | |
16209 | + toi_send_netlink_message(&usm_helper_data, | |
16210 | + USM_MSG_DISCONNECT, | |
16211 | + NULL, 0); | |
16212 | + | |
16213 | + wait_for_completion_timeout(&usm_helper_data.wait_for_process, 2*HZ); | |
16214 | + | |
16215 | + if (!message_received || message_received == USM_MSG_FAILED) { | |
16216 | + printk(KERN_INFO "Returning failure disconnecting storage.\n"); | |
16217 | + return 1; | |
16218 | + } | |
16219 | + | |
16220 | + return 0; | |
16221 | +} | |
16222 | +#endif | |
16223 | + | |
16224 | +static void storage_manager_simulate(void) | |
16225 | +{ | |
16226 | + printk(KERN_INFO "--- Storage manager simulate ---\n"); | |
16227 | + toi_prepare_usm(); | |
16228 | + schedule(); | |
16229 | + printk(KERN_INFO "--- Activate storage 1 ---\n"); | |
16230 | + toi_activate_storage(1); | |
16231 | + schedule(); | |
16232 | + printk(KERN_INFO "--- Deactivate storage 1 ---\n"); | |
16233 | + toi_deactivate_storage(1); | |
16234 | + schedule(); | |
16235 | + printk(KERN_INFO "--- Cleanup usm ---\n"); | |
16236 | + toi_cleanup_usm(); | |
16237 | + schedule(); | |
16238 | + printk(KERN_INFO "--- Storage manager simulate ends ---\n"); | |
16239 | +} | |
16240 | + | |
16241 | +static int usm_storage_needed(void) | |
16242 | +{ | |
16243 | + return strlen(usm_helper_data.program); | |
16244 | +} | |
16245 | + | |
16246 | +static int usm_save_config_info(char *buf) | |
16247 | +{ | |
16248 | + int len = strlen(usm_helper_data.program); | |
16249 | + memcpy(buf, usm_helper_data.program, len); | |
16250 | + return len; | |
16251 | +} | |
16252 | + | |
16253 | +static void usm_load_config_info(char *buf, int size) | |
16254 | +{ | |
16255 | + /* Don't load the saved path if one has already been set */ | |
16256 | + if (usm_helper_data.program[0]) | |
16257 | + return; | |
16258 | + | |
16259 | + memcpy(usm_helper_data.program, buf, size); | |
16260 | +} | |
16261 | + | |
16262 | +static int usm_memory_needed(void) | |
16263 | +{ | |
16264 | + /* ball park figure of 32 pages */ | |
16265 | + return 32 * PAGE_SIZE; | |
16266 | +} | |
16267 | + | |
16268 | +/* toi_prepare_usm | |
16269 | + */ | |
16270 | +int toi_prepare_usm(void) | |
16271 | +{ | |
16272 | + usm_prepare_count++; | |
16273 | + | |
16274 | + if (usm_prepare_count > 1 || !usm_ops.enabled) | |
16275 | + return 0; | |
16276 | + | |
16277 | + usm_helper_data.pid = -1; | |
16278 | + | |
16279 | + if (!*usm_helper_data.program) | |
16280 | + return 0; | |
16281 | + | |
16282 | + toi_netlink_setup(&usm_helper_data); | |
16283 | + | |
16284 | + if (usm_helper_data.pid == -1) | |
16285 | + printk(KERN_INFO "TuxOnIce Storage Manager wanted, but couldn't" | |
16286 | + " start it.\n"); | |
16287 | + | |
16288 | + toi_activate_storage(0); | |
16289 | + | |
16290 | + return usm_helper_data.pid != -1; | |
16291 | +} | |
16292 | + | |
16293 | +void toi_cleanup_usm(void) | |
16294 | +{ | |
16295 | + usm_prepare_count--; | |
16296 | + | |
16297 | + if (usm_helper_data.pid > -1 && !usm_prepare_count) { | |
16298 | + toi_deactivate_storage(0); | |
16299 | + toi_netlink_close(&usm_helper_data); | |
16300 | + } | |
16301 | +} | |
16302 | + | |
16303 | +static void storage_manager_activate(void) | |
16304 | +{ | |
16305 | + if (storage_manager_action == storage_manager_last_action) | |
16306 | + return; | |
16307 | + | |
16308 | + if (storage_manager_action) | |
16309 | + toi_prepare_usm(); | |
16310 | + else | |
16311 | + toi_cleanup_usm(); | |
16312 | + | |
16313 | + storage_manager_last_action = storage_manager_action; | |
16314 | +} | |
16315 | + | |
16316 | +/* | |
16317 | + * User interface specific /sys/power/tuxonice entries. | |
16318 | + */ | |
16319 | + | |
16320 | +static struct toi_sysfs_data sysfs_params[] = { | |
16321 | + SYSFS_NONE("simulate_atomic_copy", storage_manager_simulate), | |
16322 | + SYSFS_INT("enabled", SYSFS_RW, &usm_ops.enabled, 0, 1, 0, NULL), | |
16323 | + SYSFS_STRING("program", SYSFS_RW, usm_helper_data.program, 254, 0, | |
16324 | + NULL), | |
16325 | + SYSFS_INT("activate_storage", SYSFS_RW , &storage_manager_action, 0, 1, | |
16326 | + 0, storage_manager_activate) | |
16327 | +}; | |
16328 | + | |
16329 | +static struct toi_module_ops usm_ops = { | |
16330 | + .type = MISC_MODULE, | |
16331 | + .name = "usm", | |
16332 | + .directory = "storage_manager", | |
16333 | + .module = THIS_MODULE, | |
16334 | + .storage_needed = usm_storage_needed, | |
16335 | + .save_config_info = usm_save_config_info, | |
16336 | + .load_config_info = usm_load_config_info, | |
16337 | + .memory_needed = usm_memory_needed, | |
16338 | + | |
16339 | + .sysfs_data = sysfs_params, | |
16340 | + .num_sysfs_entries = sizeof(sysfs_params) / | |
16341 | + sizeof(struct toi_sysfs_data), | |
16342 | +}; | |
16343 | + | |
16344 | +/* toi_usm_sysfs_init | |
16345 | + * Description: Boot time initialisation for user interface. | |
16346 | + */ | |
16347 | +int toi_usm_init(void) | |
16348 | +{ | |
16349 | + usm_helper_data.nl = NULL; | |
16350 | + usm_helper_data.program[0] = '\0'; | |
16351 | + usm_helper_data.pid = -1; | |
16352 | + usm_helper_data.skb_size = 0; | |
16353 | + usm_helper_data.pool_limit = 6; | |
16354 | + usm_helper_data.netlink_id = NETLINK_TOI_USM; | |
16355 | + usm_helper_data.name = "userspace storage manager"; | |
16356 | + usm_helper_data.rcv_msg = usm_user_rcv_msg; | |
16357 | + usm_helper_data.interface_version = 2; | |
16358 | + usm_helper_data.must_init = 0; | |
16359 | + init_completion(&usm_helper_data.wait_for_process); | |
16360 | + | |
16361 | + return toi_register_module(&usm_ops); | |
16362 | +} | |
16363 | + | |
16364 | +void toi_usm_exit(void) | |
16365 | +{ | |
16366 | + toi_netlink_close_complete(&usm_helper_data); | |
16367 | + toi_unregister_module(&usm_ops); | |
16368 | +} | |
16369 | diff --git a/kernel/power/tuxonice_storage.h b/kernel/power/tuxonice_storage.h | |
16370 | new file mode 100644 | |
16371 | index 0000000..24f8e8a | |
16372 | --- /dev/null | |
16373 | +++ b/kernel/power/tuxonice_storage.h | |
16374 | @@ -0,0 +1,45 @@ | |
16375 | +/* | |
16376 | + * kernel/power/tuxonice_storage.h | |
16377 | + * | |
16378 | + * Copyright (C) 2005-2008 Nigel Cunningham (nigel at tuxonice net) | |
16379 | + * | |
16380 | + * This file is released under the GPLv2. | |
16381 | + */ | |
16382 | + | |
16383 | +#ifdef CONFIG_NET | |
16384 | +int toi_prepare_usm(void); | |
16385 | +void toi_cleanup_usm(void); | |
16386 | + | |
16387 | +int toi_activate_storage(int force); | |
16388 | +int toi_deactivate_storage(int force); | |
16389 | +extern int toi_usm_init(void); | |
16390 | +extern void toi_usm_exit(void); | |
16391 | +#else | |
16392 | +static inline int toi_usm_init(void) { return 0; } | |
16393 | +static inline void toi_usm_exit(void) { } | |
16394 | + | |
16395 | +static inline int toi_activate_storage(int force) | |
16396 | +{ | |
16397 | + return 0; | |
16398 | +} | |
16399 | + | |
16400 | +static inline int toi_deactivate_storage(int force) | |
16401 | +{ | |
16402 | + return 0; | |
16403 | +} | |
16404 | + | |
16405 | +static inline int toi_prepare_usm(void) { return 0; } | |
16406 | +static inline void toi_cleanup_usm(void) { } | |
16407 | +#endif | |
16408 | + | |
16409 | +enum { | |
16410 | + USM_MSG_BASE = 0x10, | |
16411 | + | |
16412 | + /* Kernel -> Userspace */ | |
16413 | + USM_MSG_CONNECT = 0x30, | |
16414 | + USM_MSG_DISCONNECT = 0x31, | |
16415 | + USM_MSG_SUCCESS = 0x40, | |
16416 | + USM_MSG_FAILED = 0x41, | |
16417 | + | |
16418 | + USM_MSG_MAX, | |
16419 | +}; | |
16420 | diff --git a/kernel/power/tuxonice_swap.c b/kernel/power/tuxonice_swap.c | |
16421 | new file mode 100644 | |
9474138d | 16422 | index 0000000..3753f5b |
2380c486 JR |
16423 | --- /dev/null |
16424 | +++ b/kernel/power/tuxonice_swap.c | |
9474138d | 16425 | @@ -0,0 +1,1334 @@ |
2380c486 JR |
16426 | +/* |
16427 | + * kernel/power/tuxonice_swap.c | |
16428 | + * | |
16429 | + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net) | |
16430 | + * | |
16431 | + * Distributed under GPLv2. | |
16432 | + * | |
16433 | + * This file encapsulates functions for usage of swap space as a | |
16434 | + * backing store. | |
16435 | + */ | |
16436 | + | |
16437 | +#include <linux/suspend.h> | |
2380c486 JR |
16438 | +#include <linux/blkdev.h> |
16439 | +#include <linux/swapops.h> | |
16440 | +#include <linux/swap.h> | |
16441 | +#include <linux/syscalls.h> | |
16442 | + | |
16443 | +#include "tuxonice.h" | |
16444 | +#include "tuxonice_sysfs.h" | |
16445 | +#include "tuxonice_modules.h" | |
16446 | +#include "tuxonice_io.h" | |
16447 | +#include "tuxonice_ui.h" | |
16448 | +#include "tuxonice_extent.h" | |
16449 | +#include "tuxonice_block_io.h" | |
16450 | +#include "tuxonice_alloc.h" | |
16451 | +#include "tuxonice_builtin.h" | |
16452 | + | |
16453 | +static struct toi_module_ops toi_swapops; | |
16454 | + | |
16455 | +/* --- Struct of pages stored on disk */ | |
16456 | + | |
16457 | +struct sig_data { | |
16458 | + dev_t device; | |
16459 | + unsigned long sector; | |
16460 | + int resume_attempted; | |
16461 | + int orig_sig_type; | |
16462 | +}; | |
16463 | + | |
16464 | +union diskpage { | |
16465 | + union swap_header swh; /* swh.magic is the only member used */ | |
16466 | + struct sig_data sig_data; | |
16467 | +}; | |
16468 | + | |
16469 | +union p_diskpage { | |
16470 | + union diskpage *pointer; | |
16471 | + char *ptr; | |
16472 | + unsigned long address; | |
16473 | +}; | |
16474 | + | |
16475 | +enum { | |
16476 | + IMAGE_SIGNATURE, | |
16477 | + NO_IMAGE_SIGNATURE, | |
16478 | + TRIED_RESUME, | |
16479 | + NO_TRIED_RESUME, | |
16480 | +}; | |
16481 | + | |
16482 | +/* | |
16483 | + * Both of these point to versions of the swap header page. original_sig points | |
16484 | + * to the data we read from disk at the start of hibernating or checking whether | |
16485 | + * to resume. no_image is the page stored in the image header, showing what the | |
16486 | + * swap header page looked like at the start of hibernating. | |
16487 | + */ | |
16488 | +static char *current_signature_page; | |
16489 | +static char no_image_signature_contents[sizeof(struct sig_data)]; | |
16490 | + | |
16491 | +/* Devices used for swap */ | |
16492 | +static struct toi_bdev_info devinfo[MAX_SWAPFILES]; | |
16493 | + | |
16494 | +/* Extent chains for swap & blocks */ | |
16495 | +static struct hibernate_extent_chain swapextents; | |
16496 | +static struct hibernate_extent_chain block_chain[MAX_SWAPFILES]; | |
16497 | + | |
16498 | +static dev_t header_dev_t; | |
16499 | +static struct block_device *header_block_device; | |
16500 | +static unsigned long headerblock; | |
16501 | + | |
16502 | +/* For swapfile automatically swapon/off'd. */ | |
16503 | +static char swapfilename[32] = ""; | |
16504 | +static int toi_swapon_status; | |
16505 | + | |
16506 | +/* Header Page Information */ | |
16507 | +static long header_pages_reserved; | |
16508 | + | |
16509 | +/* Swap Pages */ | |
16510 | +static long swap_pages_allocated; | |
16511 | + | |
16512 | +/* User Specified Parameters. */ | |
16513 | + | |
16514 | +static unsigned long resume_firstblock; | |
16515 | +static dev_t resume_swap_dev_t; | |
16516 | +static struct block_device *resume_block_device; | |
16517 | + | |
16518 | +static struct sysinfo swapinfo; | |
16519 | + | |
16520 | +/* Block devices open. */ | |
16521 | +struct bdev_opened { | |
16522 | + dev_t device; | |
16523 | + struct block_device *bdev; | |
16524 | +}; | |
16525 | + | |
16526 | +/* | |
16527 | + * Entry MAX_SWAPFILES is the resume block device, which may | |
16528 | + * be a swap device not enabled when we hibernate. | |
16529 | + * Entry MAX_SWAPFILES + 1 is the header block device, which | |
16530 | + * is needed before we find out which slot it occupies. | |
16531 | + * | |
16532 | + * We use a separate struct to devInfo so that we can track | |
16533 | + * the bdevs we open, because if we need to abort resuming | |
16534 | + * prior to the atomic restore, they need to be closed, but | |
16535 | + * closing them after sucessfully resuming would be wrong. | |
16536 | + */ | |
16537 | +static struct bdev_opened *bdevs_opened[MAX_SWAPFILES + 2]; | |
16538 | + | |
16539 | +/** | |
16540 | + * close_bdev: Close a swap bdev. | |
16541 | + * | |
16542 | + * int: The swap entry number to close. | |
16543 | + */ | |
16544 | +static void close_bdev(int i) | |
16545 | +{ | |
16546 | + struct bdev_opened *this = bdevs_opened[i]; | |
16547 | + | |
16548 | + if (!this) | |
16549 | + return; | |
16550 | + | |
16551 | + blkdev_put(this->bdev, FMODE_READ | FMODE_NDELAY); | |
9474138d | 16552 | + toi_kfree(8, this, sizeof(*this)); |
2380c486 JR |
16553 | + bdevs_opened[i] = NULL; |
16554 | +} | |
16555 | + | |
16556 | +/** | |
16557 | + * close_bdevs: Close all bdevs we opened. | |
16558 | + * | |
16559 | + * Close all bdevs that we opened and reset the related vars. | |
16560 | + */ | |
16561 | +static void close_bdevs(void) | |
16562 | +{ | |
16563 | + int i; | |
16564 | + | |
16565 | + for (i = 0; i < MAX_SWAPFILES + 2; i++) | |
16566 | + close_bdev(i); | |
16567 | + | |
16568 | + resume_block_device = NULL; | |
16569 | + header_block_device = NULL; | |
16570 | +} | |
16571 | + | |
16572 | +/** | |
16573 | + * open_bdev: Open a bdev at resume time. | |
16574 | + * | |
16575 | + * index: The swap index. May be MAX_SWAPFILES for the resume_dev_t | |
16576 | + * (the user can have resume= pointing at a swap partition/file that isn't | |
16577 | + * swapon'd when they hibernate. MAX_SWAPFILES+1 for the first page of the | |
16578 | + * header. It will be from a swap partition that was enabled when we hibernated, | |
16579 | + * but we don't know it's real index until we read that first page. | |
16580 | + * dev_t: The device major/minor. | |
16581 | + * display_errs: Whether to try to do this quietly. | |
16582 | + * | |
16583 | + * We stored a dev_t in the image header. Open the matching device without | |
16584 | + * requiring /dev/<whatever> in most cases and record the details needed | |
16585 | + * to close it later and avoid duplicating work. | |
16586 | + */ | |
16587 | +static struct block_device *open_bdev(int index, dev_t device, int display_errs) | |
16588 | +{ | |
16589 | + struct bdev_opened *this; | |
16590 | + struct block_device *bdev; | |
16591 | + | |
16592 | + if (bdevs_opened[index]) { | |
16593 | + if (bdevs_opened[index]->device == device) | |
16594 | + return bdevs_opened[index]->bdev; | |
16595 | + | |
16596 | + close_bdev(index); | |
16597 | + } | |
16598 | + | |
16599 | + bdev = toi_open_by_devnum(device, FMODE_READ | FMODE_NDELAY); | |
16600 | + | |
16601 | + if (IS_ERR(bdev) || !bdev) { | |
16602 | + if (display_errs) | |
16603 | + toi_early_boot_message(1, TOI_CONTINUE_REQ, | |
16604 | + "Failed to get access to block device " | |
16605 | + "\"%x\" (error %d).\n Maybe you need " | |
16606 | + "to run mknod and/or lvmsetup in an " | |
16607 | + "initrd/ramfs?", device, bdev); | |
16608 | + return ERR_PTR(-EINVAL); | |
16609 | + } | |
16610 | + | |
16611 | + this = toi_kzalloc(8, sizeof(struct bdev_opened), GFP_KERNEL); | |
16612 | + if (!this) { | |
16613 | + printk(KERN_WARNING "TuxOnIce: Failed to allocate memory for " | |
16614 | + "opening a bdev."); | |
16615 | + blkdev_put(bdev, FMODE_READ | FMODE_NDELAY); | |
16616 | + return ERR_PTR(-ENOMEM); | |
16617 | + } | |
16618 | + | |
16619 | + bdevs_opened[index] = this; | |
16620 | + this->device = device; | |
16621 | + this->bdev = bdev; | |
16622 | + | |
16623 | + return bdev; | |
16624 | +} | |
16625 | + | |
16626 | +/** | |
16627 | + * enable_swapfile: Swapon the user specified swapfile prior to hibernating. | |
16628 | + * | |
16629 | + * Activate the given swapfile if it wasn't already enabled. Remember whether | |
16630 | + * we really did swapon it for swapoffing later. | |
16631 | + */ | |
16632 | +static void enable_swapfile(void) | |
16633 | +{ | |
16634 | + int activateswapresult = -EINVAL; | |
16635 | + | |
16636 | + if (swapfilename[0]) { | |
16637 | + /* Attempt to swap on with maximum priority */ | |
16638 | + activateswapresult = sys_swapon(swapfilename, 0xFFFF); | |
16639 | + if (activateswapresult && activateswapresult != -EBUSY) | |
16640 | + printk("TuxOnIce: The swapfile/partition specified by " | |
16641 | + "/sys/power/tuxonice/swap/swapfile " | |
16642 | + "(%s) could not be turned on (error %d). " | |
16643 | + "Attempting to continue.\n", | |
16644 | + swapfilename, activateswapresult); | |
16645 | + if (!activateswapresult) | |
16646 | + toi_swapon_status = 1; | |
16647 | + } | |
16648 | +} | |
16649 | + | |
16650 | +/** | |
16651 | + * disable_swapfile: Swapoff any file swaponed at the start of the cycle. | |
16652 | + * | |
16653 | + * If we did successfully swapon a file at the start of the cycle, swapoff | |
16654 | + * it now (finishing up). | |
16655 | + */ | |
16656 | +static void disable_swapfile(void) | |
16657 | +{ | |
16658 | + if (!toi_swapon_status) | |
16659 | + return; | |
16660 | + | |
16661 | + sys_swapoff(swapfilename); | |
16662 | + toi_swapon_status = 0; | |
16663 | +} | |
16664 | + | |
16665 | +/** | |
16666 | + * try_to_parse_resume_device: Try to parse resume= | |
16667 | + * | |
16668 | + * Any "swap:" has been stripped away and we just have the path to deal with. | |
16669 | + * We attempt to do name_to_dev_t, open and stat the file. Having opened the | |
16670 | + * file, get the struct block_device * to match. | |
16671 | + */ | |
16672 | +static int try_to_parse_resume_device(char *commandline, int quiet) | |
16673 | +{ | |
16674 | + struct kstat stat; | |
16675 | + int error = 0; | |
16676 | + | |
16677 | + wait_for_device_probe(); | |
16678 | + resume_swap_dev_t = name_to_dev_t(commandline); | |
16679 | + | |
16680 | + if (!resume_swap_dev_t) { | |
16681 | + struct file *file = filp_open(commandline, | |
16682 | + O_RDONLY|O_LARGEFILE, 0); | |
16683 | + | |
16684 | + if (!IS_ERR(file) && file) { | |
16685 | + vfs_getattr(file->f_vfsmnt, file->f_dentry, &stat); | |
16686 | + filp_close(file, NULL); | |
16687 | + } else | |
16688 | + error = vfs_stat(commandline, &stat); | |
16689 | + if (!error) | |
16690 | + resume_swap_dev_t = stat.rdev; | |
16691 | + } | |
16692 | + | |
16693 | + if (!resume_swap_dev_t) { | |
16694 | + if (quiet) | |
16695 | + return 1; | |
16696 | + | |
16697 | + if (test_toi_state(TOI_TRYING_TO_RESUME)) | |
16698 | + toi_early_boot_message(1, TOI_CONTINUE_REQ, | |
16699 | + "Failed to translate \"%s\" into a device id.\n", | |
16700 | + commandline); | |
16701 | + else | |
16702 | + printk("TuxOnIce: Can't translate \"%s\" into a device " | |
16703 | + "id yet.\n", commandline); | |
16704 | + return 1; | |
16705 | + } | |
16706 | + | |
16707 | + resume_block_device = open_bdev(MAX_SWAPFILES, resume_swap_dev_t, 0); | |
16708 | + if (IS_ERR(resume_block_device)) { | |
16709 | + if (!quiet) | |
16710 | + toi_early_boot_message(1, TOI_CONTINUE_REQ, | |
16711 | + "Failed to get access to \"%s\", where" | |
16712 | + " the swap header should be found.", | |
16713 | + commandline); | |
16714 | + return 1; | |
16715 | + } | |
16716 | + | |
16717 | + return 0; | |
16718 | +} | |
16719 | + | |
16720 | +/* | |
16721 | + * If we have read part of the image, we might have filled memory with | |
16722 | + * data that should be zeroed out. | |
16723 | + */ | |
16724 | +static void toi_swap_noresume_reset(void) | |
16725 | +{ | |
16726 | + toi_bio_ops.rw_cleanup(READ); | |
16727 | + memset((char *) &devinfo, 0, sizeof(devinfo)); | |
16728 | +} | |
16729 | + | |
16730 | +static int get_current_signature(void) | |
16731 | +{ | |
16732 | + if (!current_signature_page) { | |
16733 | + current_signature_page = (char *) toi_get_zeroed_page(38, | |
16734 | + TOI_ATOMIC_GFP); | |
16735 | + if (!current_signature_page) | |
16736 | + return -ENOMEM; | |
16737 | + } | |
16738 | + | |
16739 | + return toi_bio_ops.bdev_page_io(READ, resume_block_device, | |
16740 | + resume_firstblock, virt_to_page(current_signature_page)); | |
16741 | +} | |
16742 | + | |
16743 | +static int parse_signature(void) | |
16744 | +{ | |
16745 | + union p_diskpage swap_header_page; | |
16746 | + struct sig_data *sig; | |
16747 | + int type; | |
16748 | + char *swap_header; | |
16749 | + const char *sigs[] = { | |
16750 | + "SWAP-SPACE", "SWAPSPACE2", "S1SUSP", "S2SUSP", "S1SUSPEND" | |
16751 | + }; | |
16752 | + | |
16753 | + int result = get_current_signature(); | |
16754 | + if (result) | |
16755 | + return result; | |
16756 | + | |
16757 | + swap_header_page = (union p_diskpage) current_signature_page; | |
16758 | + sig = (struct sig_data *) current_signature_page; | |
16759 | + swap_header = swap_header_page.pointer->swh.magic.magic; | |
16760 | + | |
16761 | + for (type = 0; type < 5; type++) | |
16762 | + if (!memcmp(sigs[type], swap_header, strlen(sigs[type]))) | |
16763 | + return type; | |
16764 | + | |
16765 | + if (memcmp(tuxonice_signature, swap_header, sizeof(tuxonice_signature))) | |
16766 | + return -1; | |
16767 | + | |
16768 | + header_dev_t = sig->device; | |
16769 | + clear_toi_state(TOI_RESUMED_BEFORE); | |
16770 | + if (sig->resume_attempted) | |
16771 | + set_toi_state(TOI_RESUMED_BEFORE); | |
16772 | + headerblock = sig->sector; | |
16773 | + | |
16774 | + return 10; | |
16775 | +} | |
16776 | + | |
16777 | +static void forget_signatures(void) | |
16778 | +{ | |
16779 | + if (current_signature_page) { | |
16780 | + toi_free_page(38, (unsigned long) current_signature_page); | |
16781 | + current_signature_page = NULL; | |
16782 | + } | |
16783 | +} | |
16784 | + | |
16785 | +/* | |
16786 | + * write_modified_signature | |
16787 | + * | |
16788 | + * Write a (potentially) modified signature page without forgetting the | |
16789 | + * original contents. | |
16790 | + */ | |
16791 | +static int write_modified_signature(int modification) | |
16792 | +{ | |
16793 | + union p_diskpage swap_header_page; | |
16794 | + struct swap_info_struct *si; | |
16795 | + int result; | |
16796 | + char *orig_sig; | |
16797 | + | |
16798 | + /* In case we haven't already */ | |
16799 | + result = get_current_signature(); | |
16800 | + | |
16801 | + if (result) | |
16802 | + return result; | |
16803 | + | |
16804 | + swap_header_page.address = toi_get_zeroed_page(38, TOI_ATOMIC_GFP); | |
16805 | + | |
16806 | + if (!swap_header_page.address) | |
16807 | + return -ENOMEM; | |
16808 | + | |
16809 | + memcpy(swap_header_page.ptr, current_signature_page, PAGE_SIZE); | |
16810 | + | |
16811 | + switch (modification) { | |
16812 | + case IMAGE_SIGNATURE: | |
16813 | + | |
16814 | + memcpy(no_image_signature_contents, swap_header_page.ptr, | |
16815 | + sizeof(no_image_signature_contents)); | |
16816 | + | |
16817 | + /* Get the details of the header first page. */ | |
16818 | + toi_extent_state_goto_start(&toi_writer_posn); | |
0ada99ac | 16819 | + toi_bio_ops.forward_one_page(1, 1); |
2380c486 JR |
16820 | + |
16821 | + si = get_swap_info_struct(toi_writer_posn.current_chain); | |
16822 | + | |
16823 | + /* Prepare the signature */ | |
16824 | + swap_header_page.pointer->sig_data.device = si->bdev->bd_dev; | |
16825 | + swap_header_page.pointer->sig_data.sector = | |
16826 | + toi_writer_posn.current_offset; | |
16827 | + swap_header_page.pointer->sig_data.resume_attempted = 0; | |
16828 | + swap_header_page.pointer->sig_data.orig_sig_type = | |
16829 | + parse_signature(); | |
16830 | + | |
16831 | + memcpy(swap_header_page.pointer->swh.magic.magic, | |
16832 | + tuxonice_signature, sizeof(tuxonice_signature)); | |
16833 | + | |
16834 | + break; | |
16835 | + case NO_IMAGE_SIGNATURE: | |
16836 | + if (!swap_header_page.pointer->sig_data.orig_sig_type) | |
16837 | + orig_sig = "SWAP-SPACE"; | |
16838 | + else | |
16839 | + orig_sig = "SWAPSPACE2"; | |
16840 | + | |
16841 | + memcpy(swap_header_page.pointer->swh.magic.magic, orig_sig, 10); | |
16842 | + memcpy(swap_header_page.ptr, no_image_signature_contents, | |
16843 | + sizeof(no_image_signature_contents)); | |
16844 | + break; | |
16845 | + case TRIED_RESUME: | |
16846 | + swap_header_page.pointer->sig_data.resume_attempted = 1; | |
16847 | + break; | |
16848 | + case NO_TRIED_RESUME: | |
16849 | + swap_header_page.pointer->sig_data.resume_attempted = 0; | |
16850 | + break; | |
16851 | + } | |
16852 | + | |
16853 | + result = toi_bio_ops.bdev_page_io(WRITE, resume_block_device, | |
16854 | + resume_firstblock, virt_to_page(swap_header_page.address)); | |
16855 | + | |
16856 | + memcpy(current_signature_page, swap_header_page.ptr, PAGE_SIZE); | |
16857 | + | |
16858 | + toi_free_page(38, swap_header_page.address); | |
16859 | + | |
16860 | + return result; | |
16861 | +} | |
16862 | + | |
16863 | +/* | |
16864 | + * apply_header_reservation | |
2380c486 JR |
16865 | + */ |
16866 | +static int apply_header_reservation(void) | |
16867 | +{ | |
16868 | + int i; | |
16869 | + | |
16870 | + toi_extent_state_goto_start(&toi_writer_posn); | |
2380c486 JR |
16871 | + |
16872 | + for (i = 0; i < header_pages_reserved; i++) | |
0ada99ac | 16873 | + if (toi_bio_ops.forward_one_page(1, 0)) |
2380c486 JR |
16874 | + return -ENOSPC; |
16875 | + | |
16876 | + /* The end of header pages will be the start of pageset 2; | |
16877 | + * we are now sitting on the first pageset2 page. */ | |
16878 | + toi_extent_state_save(&toi_writer_posn, &toi_writer_posn_save[2]); | |
16879 | + return 0; | |
16880 | +} | |
16881 | + | |
16882 | +static void toi_swap_reserve_header_space(int request) | |
16883 | +{ | |
16884 | + header_pages_reserved = (long) request; | |
2380c486 JR |
16885 | +} |
16886 | + | |
16887 | +static void free_block_chains(void) | |
16888 | +{ | |
16889 | + int i; | |
16890 | + | |
16891 | + for (i = 0; i < MAX_SWAPFILES; i++) | |
16892 | + if (block_chain[i].first) | |
16893 | + toi_put_extent_chain(&block_chain[i]); | |
16894 | +} | |
16895 | + | |
16896 | +static int add_blocks_to_extent_chain(int chain, int start, int end) | |
16897 | +{ | |
16898 | + if (test_action_state(TOI_TEST_BIO)) | |
16899 | + printk(KERN_INFO "Adding extent chain %d %d-%d.\n", chain, | |
16900 | + start << devinfo[chain].bmap_shift, | |
16901 | + end << devinfo[chain].bmap_shift); | |
16902 | + | |
16903 | + if (toi_add_to_extent_chain(&block_chain[chain], start, end)) { | |
16904 | + free_block_chains(); | |
16905 | + return -ENOMEM; | |
16906 | + } | |
16907 | + | |
16908 | + return 0; | |
16909 | +} | |
16910 | + | |
16911 | + | |
16912 | +static int get_main_pool_phys_params(void) | |
16913 | +{ | |
16914 | + struct hibernate_extent *extentpointer = NULL; | |
16915 | + unsigned long address; | |
16916 | + int extent_min = -1, extent_max = -1, last_chain = -1; | |
16917 | + | |
16918 | + free_block_chains(); | |
16919 | + | |
16920 | + toi_extent_for_each(&swapextents, extentpointer, address) { | |
16921 | + swp_entry_t swap_address = (swp_entry_t) { address }; | |
16922 | + pgoff_t offset = swp_offset(swap_address); | |
16923 | + unsigned swapfilenum = swp_type(swap_address); | |
16924 | + struct swap_info_struct *sis = | |
16925 | + get_swap_info_struct(swapfilenum); | |
16926 | + sector_t new_sector = map_swap_page(sis, offset); | |
16927 | + | |
e999739a | 16928 | + if (devinfo[swapfilenum].ignored) |
16929 | + continue; | |
16930 | + | |
2380c486 JR |
16931 | + if ((new_sector == extent_max + 1) && |
16932 | + (last_chain == swapfilenum)) { | |
16933 | + extent_max++; | |
16934 | + continue; | |
16935 | + } | |
16936 | + | |
16937 | + if (extent_min > -1 && add_blocks_to_extent_chain(last_chain, | |
0ada99ac | 16938 | + extent_min, extent_max)) { |
9474138d AM |
16939 | + printk(KERN_ERR "Out of memory while making block " |
16940 | + "chains.\n"); | |
2380c486 | 16941 | + return -ENOMEM; |
0ada99ac | 16942 | + } |
2380c486 JR |
16943 | + |
16944 | + extent_min = new_sector; | |
16945 | + extent_max = new_sector; | |
16946 | + last_chain = swapfilenum; | |
16947 | + } | |
16948 | + | |
16949 | + if (extent_min > -1 && add_blocks_to_extent_chain(last_chain, | |
0ada99ac | 16950 | + extent_min, extent_max)) { |
9474138d AM |
16951 | + printk(KERN_ERR "Out of memory while making block chains.\n"); |
16952 | + return -ENOMEM; | |
0ada99ac | 16953 | + } |
2380c486 JR |
16954 | + |
16955 | + return apply_header_reservation(); | |
16956 | +} | |
16957 | + | |
16958 | +static long raw_to_real(long raw) | |
16959 | +{ | |
16960 | + long result; | |
16961 | + | |
16962 | + result = raw - (raw * (sizeof(unsigned long) + sizeof(int)) + | |
16963 | + (PAGE_SIZE + sizeof(unsigned long) + sizeof(int) + 1)) / | |
16964 | + (PAGE_SIZE + sizeof(unsigned long) + sizeof(int)); | |
16965 | + | |
16966 | + return result < 0 ? 0 : result; | |
16967 | +} | |
16968 | + | |
16969 | +static int toi_swap_storage_allocated(void) | |
16970 | +{ | |
16971 | + return (int) raw_to_real(swap_pages_allocated - header_pages_reserved); | |
16972 | +} | |
16973 | + | |
16974 | +/* | |
e999739a | 16975 | + * Like si_swapinfo, except that we don't include ram backed swap (compcache!) |
16976 | + * and don't need to use the spinlocks (userspace is stopped when this | |
16977 | + * function is called). | |
16978 | + */ | |
16979 | +void si_swapinfo_no_compcache(struct sysinfo *val) | |
16980 | +{ | |
16981 | + unsigned int i; | |
16982 | + | |
16983 | + si_swapinfo(&swapinfo); | |
16984 | + val->freeswap = 0; | |
16985 | + val->totalswap = 0; | |
16986 | + | |
16987 | + for (i = 0; i < MAX_SWAPFILES; i++) { | |
16988 | + struct swap_info_struct *si = get_swap_info_struct(i); | |
9474138d | 16989 | + if ((si->flags & SWP_USED) && si->swap_map && |
e999739a | 16990 | + (si->flags & SWP_WRITEOK) && |
16991 | + (strncmp(si->bdev->bd_disk->disk_name, "ram", 3))) { | |
16992 | + val->totalswap += si->inuse_pages; | |
16993 | + val->freeswap += si->pages - si->inuse_pages; | |
16994 | + } | |
16995 | + } | |
16996 | +} | |
16997 | +/* | |
2380c486 JR |
16998 | + * We can't just remember the value from allocation time, because other |
16999 | + * processes might have allocated swap in the mean time. | |
17000 | + */ | |
17001 | +static int toi_swap_storage_available(void) | |
17002 | +{ | |
e999739a | 17003 | + si_swapinfo_no_compcache(&swapinfo); |
2380c486 JR |
17004 | + return (int) raw_to_real((long) swapinfo.freeswap + |
17005 | + swap_pages_allocated - header_pages_reserved); | |
17006 | +} | |
17007 | + | |
17008 | +static int toi_swap_initialise(int starting_cycle) | |
17009 | +{ | |
e999739a | 17010 | + int result = 0; |
17011 | + | |
2380c486 JR |
17012 | + if (!starting_cycle) |
17013 | + return 0; | |
17014 | + | |
17015 | + enable_swapfile(); | |
17016 | + | |
e999739a | 17017 | + if (resume_swap_dev_t && !resume_block_device) { |
17018 | + resume_block_device = open_bdev(MAX_SWAPFILES, | |
17019 | + resume_swap_dev_t, 1); | |
17020 | + if (IS_ERR(resume_block_device)) | |
17021 | + result = 1; | |
17022 | + } | |
2380c486 | 17023 | + |
e999739a | 17024 | + return result; |
2380c486 JR |
17025 | +} |
17026 | + | |
17027 | +static void toi_swap_cleanup(int ending_cycle) | |
17028 | +{ | |
17029 | + if (ending_cycle) | |
17030 | + disable_swapfile(); | |
17031 | + | |
17032 | + close_bdevs(); | |
17033 | + | |
17034 | + forget_signatures(); | |
17035 | +} | |
17036 | + | |
17037 | +static int toi_swap_release_storage(void) | |
17038 | +{ | |
2380c486 JR |
17039 | + header_pages_reserved = 0; |
17040 | + swap_pages_allocated = 0; | |
17041 | + | |
17042 | + if (swapextents.first) { | |
17043 | + /* Free swap entries */ | |
17044 | + struct hibernate_extent *extentpointer; | |
17045 | + unsigned long extentvalue; | |
17046 | + toi_extent_for_each(&swapextents, extentpointer, | |
17047 | + extentvalue) | |
17048 | + swap_free((swp_entry_t) { extentvalue }); | |
17049 | + | |
17050 | + toi_put_extent_chain(&swapextents); | |
17051 | + | |
17052 | + free_block_chains(); | |
17053 | + } | |
17054 | + | |
17055 | + return 0; | |
17056 | +} | |
17057 | + | |
17058 | +static void free_swap_range(unsigned long min, unsigned long max) | |
17059 | +{ | |
17060 | + int j; | |
17061 | + | |
17062 | + for (j = min; j <= max; j++) | |
17063 | + swap_free((swp_entry_t) { j }); | |
17064 | +} | |
17065 | + | |
17066 | +/* | |
17067 | + * Round robin allocation (where swap storage has the same priority). | |
17068 | + * could make this very inefficient, so we track extents allocated on | |
17069 | + * a per-swapfile basis. | |
2380c486 JR |
17070 | + */ |
17071 | +static int toi_swap_allocate_storage(int request) | |
17072 | +{ | |
17073 | + int i, result = 0, to_add[MAX_SWAPFILES], pages_to_get, extra_pages, | |
0ada99ac | 17074 | + gotten = 0, result2; |
2380c486 JR |
17075 | + unsigned long extent_min[MAX_SWAPFILES], extent_max[MAX_SWAPFILES]; |
17076 | + | |
17077 | + extra_pages = DIV_ROUND_UP(request * (sizeof(unsigned long) | |
17078 | + + sizeof(int)), PAGE_SIZE); | |
0ada99ac | 17079 | + pages_to_get = request + extra_pages - swapextents.size + |
17080 | + header_pages_reserved; | |
2380c486 JR |
17081 | + |
17082 | + if (pages_to_get < 1) | |
0ada99ac | 17083 | + return apply_header_reservation(); |
2380c486 JR |
17084 | + |
17085 | + for (i = 0; i < MAX_SWAPFILES; i++) { | |
17086 | + struct swap_info_struct *si = get_swap_info_struct(i); | |
17087 | + to_add[i] = 0; | |
9474138d AM |
17088 | + if (!(si->flags & SWP_USED) || !si->swap_map || |
17089 | + !(si->flags & SWP_WRITEOK)) | |
2380c486 | 17090 | + continue; |
e999739a | 17091 | + if (!strncmp(si->bdev->bd_disk->disk_name, "ram", 3)) { |
17092 | + devinfo[i].ignored = 1; | |
17093 | + continue; | |
17094 | + } | |
17095 | + devinfo[i].ignored = 0; | |
2380c486 JR |
17096 | + devinfo[i].bdev = si->bdev; |
17097 | + devinfo[i].dev_t = si->bdev->bd_dev; | |
17098 | + devinfo[i].bmap_shift = 3; | |
17099 | + devinfo[i].blocks_per_page = 1; | |
17100 | + } | |
17101 | + | |
e999739a | 17102 | + while (gotten < pages_to_get) { |
2380c486 JR |
17103 | + swp_entry_t entry; |
17104 | + unsigned long new_value; | |
17105 | + unsigned swapfilenum; | |
17106 | + | |
17107 | + entry = get_swap_page(); | |
17108 | + if (!entry.val) | |
17109 | + break; | |
17110 | + | |
17111 | + swapfilenum = swp_type(entry); | |
17112 | + new_value = entry.val; | |
17113 | + | |
17114 | + if (!to_add[swapfilenum]) { | |
17115 | + to_add[swapfilenum] = 1; | |
17116 | + extent_min[swapfilenum] = new_value; | |
17117 | + extent_max[swapfilenum] = new_value; | |
e999739a | 17118 | + if (!devinfo[swapfilenum].ignored) |
17119 | + gotten++; | |
2380c486 JR |
17120 | + continue; |
17121 | + } | |
17122 | + | |
17123 | + if (new_value == extent_max[swapfilenum] + 1) { | |
17124 | + extent_max[swapfilenum]++; | |
e999739a | 17125 | + if (!devinfo[swapfilenum].ignored) |
17126 | + gotten++; | |
2380c486 JR |
17127 | + continue; |
17128 | + } | |
17129 | + | |
17130 | + if (toi_add_to_extent_chain(&swapextents, | |
17131 | + extent_min[swapfilenum], | |
17132 | + extent_max[swapfilenum])) { | |
17133 | + printk(KERN_INFO "Failed to allocate extent for " | |
17134 | + "%lu-%lu.\n", extent_min[swapfilenum], | |
17135 | + extent_max[swapfilenum]); | |
17136 | + free_swap_range(extent_min[swapfilenum], | |
17137 | + extent_max[swapfilenum]); | |
17138 | + swap_free(entry); | |
e999739a | 17139 | + if (!devinfo[swapfilenum].ignored) |
17140 | + gotten -= (extent_max[swapfilenum] - | |
2380c486 JR |
17141 | + extent_min[swapfilenum] + 1); |
17142 | + /* Don't try to add again below */ | |
17143 | + to_add[swapfilenum] = 0; | |
17144 | + break; | |
17145 | + } else { | |
17146 | + extent_min[swapfilenum] = new_value; | |
17147 | + extent_max[swapfilenum] = new_value; | |
e999739a | 17148 | + if (!devinfo[swapfilenum].ignored) |
17149 | + gotten++; | |
2380c486 JR |
17150 | + } |
17151 | + } | |
17152 | + | |
17153 | + for (i = 0; i < MAX_SWAPFILES; i++) { | |
9474138d AM |
17154 | + int this_result; |
17155 | + | |
17156 | + /* Anything to do for this swap entry? */ | |
17157 | + if (!to_add[i]) | |
17158 | + continue; | |
17159 | + | |
17160 | + this_result = toi_add_to_extent_chain(&swapextents, | |
17161 | + extent_min[i], extent_max[i]); | |
17162 | + | |
17163 | + /* Added okay? */ | |
17164 | + if (!this_result) | |
2380c486 JR |
17165 | + continue; |
17166 | + | |
9474138d AM |
17167 | + /* |
17168 | + * Nope. Remember an error occured, free the swap and subtract | |
17169 | + * from the amount of swap allocated. | |
17170 | + */ | |
17171 | + result = this_result; | |
17172 | + | |
2380c486 | 17173 | + free_swap_range(extent_min[i], extent_max[i]); |
e999739a | 17174 | + if (!devinfo[i].ignored) |
17175 | + gotten -= (extent_max[i] - extent_min[i] + 1); | |
2380c486 JR |
17176 | + } |
17177 | + | |
0ada99ac | 17178 | + if (gotten < pages_to_get) { |
17179 | + printk("Got fewer pages than required " | |
17180 | + "(%d wanted, %d gotten).\n", | |
17181 | + pages_to_get, gotten); | |
2380c486 | 17182 | + result = -ENOSPC; |
0ada99ac | 17183 | + } |
2380c486 JR |
17184 | + |
17185 | + swap_pages_allocated += (long) gotten; | |
17186 | + | |
0ada99ac | 17187 | + result2 = get_main_pool_phys_params(); |
17188 | + | |
17189 | + return result ? result : result2; | |
2380c486 JR |
17190 | +} |
17191 | + | |
17192 | +static int toi_swap_write_header_init(void) | |
17193 | +{ | |
17194 | + int i, result; | |
17195 | + struct swap_info_struct *si; | |
17196 | + | |
17197 | + toi_bio_ops.rw_init(WRITE, 0); | |
17198 | + toi_writer_buffer_posn = 0; | |
17199 | + | |
17200 | + /* Info needed to bootstrap goes at the start of the header. | |
17201 | + * First we save the positions and devinfo, including the number | |
17202 | + * of header pages. Then we save the structs containing data needed | |
17203 | + * for reading the header pages back. | |
17204 | + * Note that even if header pages take more than one page, when we | |
17205 | + * read back the info, we will have restored the location of the | |
17206 | + * next header page by the time we go to use it. | |
17207 | + */ | |
17208 | + | |
17209 | + result = toi_bio_ops.rw_header_chunk(WRITE, &toi_swapops, | |
17210 | + (char *) &no_image_signature_contents, | |
17211 | + sizeof(struct sig_data)); | |
17212 | + | |
17213 | + if (result) | |
17214 | + return result; | |
17215 | + | |
17216 | + /* Forward one page will be done prior to the read */ | |
17217 | + for (i = 0; i < MAX_SWAPFILES; i++) { | |
17218 | + si = get_swap_info_struct(i); | |
9474138d AM |
17219 | + if (si->flags & SWP_USED && si->swap_map && |
17220 | + si->flags & SWP_WRITEOK) | |
2380c486 JR |
17221 | + devinfo[i].dev_t = si->bdev->bd_dev; |
17222 | + else | |
17223 | + devinfo[i].dev_t = (dev_t) 0; | |
17224 | + } | |
17225 | + | |
17226 | + result = toi_bio_ops.rw_header_chunk(WRITE, &toi_swapops, | |
17227 | + (char *) &toi_writer_posn_save, | |
17228 | + sizeof(toi_writer_posn_save)); | |
17229 | + | |
17230 | + if (result) | |
17231 | + return result; | |
17232 | + | |
17233 | + result = toi_bio_ops.rw_header_chunk(WRITE, &toi_swapops, | |
17234 | + (char *) &devinfo, sizeof(devinfo)); | |
17235 | + | |
17236 | + if (result) | |
17237 | + return result; | |
17238 | + | |
17239 | + for (i = 0; i < MAX_SWAPFILES; i++) | |
17240 | + toi_serialise_extent_chain(&toi_swapops, &block_chain[i]); | |
17241 | + | |
17242 | + return 0; | |
17243 | +} | |
17244 | + | |
17245 | +static int toi_swap_write_header_cleanup(void) | |
17246 | +{ | |
0ada99ac | 17247 | + int result = toi_bio_ops.write_header_chunk_finish(); |
2380c486 JR |
17248 | + |
17249 | + /* Set signature to save we have an image */ | |
0ada99ac | 17250 | + if (!result) |
17251 | + result = write_modified_signature(IMAGE_SIGNATURE); | |
17252 | + | |
17253 | + return result; | |
2380c486 JR |
17254 | +} |
17255 | + | |
17256 | +/* ------------------------- HEADER READING ------------------------- */ | |
17257 | + | |
17258 | +/* | |
17259 | + * read_header_init() | |
17260 | + * | |
17261 | + * Description: | |
17262 | + * 1. Attempt to read the device specified with resume=. | |
17263 | + * 2. Check the contents of the swap header for our signature. | |
17264 | + * 3. Warn, ignore, reset and/or continue as appropriate. | |
17265 | + * 4. If continuing, read the toi_swap configuration section | |
17266 | + * of the header and set up block device info so we can read | |
17267 | + * the rest of the header & image. | |
17268 | + * | |
17269 | + * Returns: | |
17270 | + * May not return if user choose to reboot at a warning. | |
17271 | + * -EINVAL if cannot resume at this time. Booting should continue | |
17272 | + * normally. | |
17273 | + */ | |
17274 | + | |
17275 | +static int toi_swap_read_header_init(void) | |
17276 | +{ | |
17277 | + int i, result = 0; | |
17278 | + toi_writer_buffer_posn = 0; | |
17279 | + | |
17280 | + if (!header_dev_t) { | |
17281 | + printk(KERN_INFO "read_header_init called when we haven't " | |
17282 | + "verified there is an image!\n"); | |
17283 | + return -EINVAL; | |
17284 | + } | |
17285 | + | |
17286 | + /* | |
17287 | + * If the header is not on the resume_swap_dev_t, get the resume device | |
17288 | + * first. | |
17289 | + */ | |
17290 | + if (header_dev_t != resume_swap_dev_t) { | |
17291 | + header_block_device = open_bdev(MAX_SWAPFILES + 1, | |
17292 | + header_dev_t, 1); | |
17293 | + | |
17294 | + if (IS_ERR(header_block_device)) | |
17295 | + return PTR_ERR(header_block_device); | |
17296 | + } else | |
17297 | + header_block_device = resume_block_device; | |
17298 | + | |
17299 | + toi_bio_ops.read_header_init(); | |
17300 | + | |
17301 | + /* | |
17302 | + * Read toi_swap configuration. | |
17303 | + * Headerblock size taken into account already. | |
17304 | + */ | |
17305 | + result = toi_bio_ops.bdev_page_io(READ, header_block_device, | |
17306 | + headerblock << 3, | |
17307 | + virt_to_page((unsigned long) toi_writer_buffer)); | |
17308 | + if (result) | |
17309 | + return result; | |
17310 | + | |
17311 | + memcpy(&no_image_signature_contents, toi_writer_buffer, | |
17312 | + sizeof(no_image_signature_contents)); | |
17313 | + | |
17314 | + toi_writer_buffer_posn = sizeof(no_image_signature_contents); | |
17315 | + | |
17316 | + memcpy(&toi_writer_posn_save, toi_writer_buffer + | |
17317 | + toi_writer_buffer_posn, sizeof(toi_writer_posn_save)); | |
17318 | + | |
17319 | + toi_writer_buffer_posn += sizeof(toi_writer_posn_save); | |
17320 | + | |
17321 | + memcpy(&devinfo, toi_writer_buffer + toi_writer_buffer_posn, | |
17322 | + sizeof(devinfo)); | |
17323 | + | |
17324 | + toi_writer_buffer_posn += sizeof(devinfo); | |
17325 | + | |
17326 | + /* Restore device info */ | |
17327 | + for (i = 0; i < MAX_SWAPFILES; i++) { | |
17328 | + dev_t thisdevice = devinfo[i].dev_t; | |
17329 | + struct block_device *bdev_result; | |
17330 | + | |
17331 | + devinfo[i].bdev = NULL; | |
17332 | + | |
e999739a | 17333 | + if (!thisdevice || devinfo[i].ignored) |
2380c486 JR |
17334 | + continue; |
17335 | + | |
17336 | + if (thisdevice == resume_swap_dev_t) { | |
17337 | + devinfo[i].bdev = resume_block_device; | |
17338 | + continue; | |
17339 | + } | |
17340 | + | |
17341 | + if (thisdevice == header_dev_t) { | |
17342 | + devinfo[i].bdev = header_block_device; | |
17343 | + continue; | |
17344 | + } | |
17345 | + | |
17346 | + bdev_result = open_bdev(i, thisdevice, 1); | |
17347 | + if (IS_ERR(bdev_result)) | |
17348 | + return PTR_ERR(bdev_result); | |
17349 | + devinfo[i].bdev = bdevs_opened[i]->bdev; | |
17350 | + } | |
17351 | + | |
17352 | + toi_extent_state_goto_start(&toi_writer_posn); | |
17353 | + toi_bio_ops.set_extra_page_forward(); | |
17354 | + | |
17355 | + for (i = 0; i < MAX_SWAPFILES && !result; i++) | |
17356 | + result = toi_load_extent_chain(&block_chain[i]); | |
17357 | + | |
17358 | + return result; | |
17359 | +} | |
17360 | + | |
17361 | +static int toi_swap_read_header_cleanup(void) | |
17362 | +{ | |
17363 | + toi_bio_ops.rw_cleanup(READ); | |
17364 | + return 0; | |
17365 | +} | |
17366 | + | |
17367 | +/* | |
17368 | + * workspace_size | |
17369 | + * | |
17370 | + * Description: | |
17371 | + * Returns the number of bytes of RAM needed for this | |
17372 | + * code to do its work. (Used when calculating whether | |
17373 | + * we have enough memory to be able to hibernate & resume). | |
17374 | + * | |
17375 | + */ | |
17376 | +static int toi_swap_memory_needed(void) | |
17377 | +{ | |
17378 | + return 1; | |
17379 | +} | |
17380 | + | |
17381 | +/* | |
17382 | + * Print debug info | |
17383 | + * | |
17384 | + * Description: | |
17385 | + */ | |
17386 | +static int toi_swap_print_debug_stats(char *buffer, int size) | |
17387 | +{ | |
17388 | + int len = 0; | |
17389 | + struct sysinfo sysinfo; | |
17390 | + | |
17391 | + if (toiActiveAllocator != &toi_swapops) { | |
17392 | + len = scnprintf(buffer, size, | |
17393 | + "- SwapAllocator inactive.\n"); | |
17394 | + return len; | |
17395 | + } | |
17396 | + | |
17397 | + len = scnprintf(buffer, size, "- SwapAllocator active.\n"); | |
17398 | + if (swapfilename[0]) | |
17399 | + len += scnprintf(buffer+len, size-len, | |
17400 | + " Attempting to automatically swapon: %s.\n", | |
17401 | + swapfilename); | |
17402 | + | |
e999739a | 17403 | + si_swapinfo_no_compcache(&sysinfo); |
2380c486 JR |
17404 | + |
17405 | + len += scnprintf(buffer+len, size-len, | |
17406 | + " Swap available for image: %d pages.\n", | |
17407 | + (int) sysinfo.freeswap + toi_swap_storage_allocated()); | |
17408 | + | |
17409 | + return len; | |
17410 | +} | |
17411 | + | |
17412 | +/* | |
17413 | + * Storage needed | |
17414 | + * | |
17415 | + * Returns amount of space in the swap header required | |
17416 | + * for the toi_swap's data. This ignores the links between | |
17417 | + * pages, which we factor in when allocating the space. | |
17418 | + * | |
17419 | + * We ensure the space is allocated, but actually save the | |
17420 | + * data from write_header_init and therefore don't also define a | |
17421 | + * save_config_info routine. | |
17422 | + */ | |
17423 | +static int toi_swap_storage_needed(void) | |
17424 | +{ | |
17425 | + int i, result; | |
0ada99ac | 17426 | + result = sizeof(struct sig_data) + sizeof(toi_writer_posn_save) + |
17427 | + sizeof(devinfo); | |
2380c486 JR |
17428 | + |
17429 | + for (i = 0; i < MAX_SWAPFILES; i++) { | |
0ada99ac | 17430 | + result += 2 * sizeof(int); |
2380c486 JR |
17431 | + result += (2 * sizeof(unsigned long) * |
17432 | + block_chain[i].num_extents); | |
17433 | + } | |
17434 | + | |
17435 | + return result; | |
17436 | +} | |
17437 | + | |
17438 | +/* | |
17439 | + * Image_exists | |
17440 | + * | |
17441 | + * Returns -1 if don't know, otherwise 0 (no) or 1 (yes). | |
17442 | + */ | |
17443 | +static int toi_swap_image_exists(int quiet) | |
17444 | +{ | |
17445 | + int signature_found; | |
17446 | + | |
17447 | + if (!resume_swap_dev_t) { | |
17448 | + if (!quiet) | |
17449 | + printk(KERN_INFO "Not even trying to read header " | |
17450 | + "because resume_swap_dev_t is not set.\n"); | |
17451 | + return -1; | |
17452 | + } | |
17453 | + | |
e999739a | 17454 | + if (!resume_block_device) { |
9474138d AM |
17455 | + resume_block_device = open_bdev(MAX_SWAPFILES, |
17456 | + resume_swap_dev_t, 1); | |
17457 | + if (IS_ERR(resume_block_device)) { | |
17458 | + if (!quiet) | |
17459 | + printk(KERN_INFO "Failed to open resume dev_t" | |
17460 | + " (%x).\n", resume_swap_dev_t); | |
17461 | + return -1; | |
17462 | + } | |
2380c486 JR |
17463 | + } |
17464 | + | |
17465 | + signature_found = parse_signature(); | |
17466 | + | |
17467 | + switch (signature_found) { | |
17468 | + case -ENOMEM: | |
17469 | + return -1; | |
17470 | + case -1: | |
17471 | + if (!quiet) | |
17472 | + printk(KERN_ERR "TuxOnIce: Unable to find a signature." | |
17473 | + " Could you have moved a swap file?\n"); | |
17474 | + return -1; | |
17475 | + case 0: | |
17476 | + case 1: | |
17477 | + if (!quiet) | |
17478 | + printk(KERN_INFO "TuxOnIce: Normal swapspace found.\n"); | |
17479 | + return 0; | |
17480 | + case 2: | |
17481 | + case 3: | |
17482 | + case 4: | |
17483 | + if (!quiet) | |
17484 | + printk(KERN_INFO "TuxOnIce: Detected another " | |
17485 | + "implementation's signature.\n"); | |
17486 | + return 0; | |
17487 | + case 10: | |
17488 | + if (!quiet) | |
17489 | + printk(KERN_INFO "TuxOnIce: Detected TuxOnIce binary " | |
17490 | + "signature.\n"); | |
17491 | + return 1; | |
17492 | + } | |
17493 | + | |
17494 | + printk("Unrecognised parse_signature result (%d).\n", signature_found); | |
17495 | + return 0; | |
17496 | +} | |
17497 | + | |
17498 | +/* toi_swap_remove_image | |
17499 | + * | |
17500 | + */ | |
17501 | +static int toi_swap_remove_image(void) | |
17502 | +{ | |
17503 | + /* | |
17504 | + * If nr_hibernates == 0, we must be booting, so no swap pages | |
17505 | + * will be recorded as used yet. | |
17506 | + */ | |
17507 | + | |
17508 | + if (nr_hibernates) | |
17509 | + toi_swap_release_storage(); | |
17510 | + | |
17511 | + /* | |
17512 | + * We don't do a sanity check here: we want to restore the swap | |
17513 | + * whatever version of kernel made the hibernate image. | |
17514 | + * | |
17515 | + * We need to write swap, but swap may not be enabled so | |
17516 | + * we write the device directly | |
17517 | + * | |
17518 | + * If we don't have an current_signature_page, we didn't | |
17519 | + * read an image header, so don't change anything. | |
17520 | + */ | |
17521 | + | |
17522 | + return toi_swap_image_exists(1) ? | |
17523 | + write_modified_signature(NO_IMAGE_SIGNATURE) : 0; | |
17524 | +} | |
17525 | + | |
17526 | +/* | |
17527 | + * Mark resume attempted. | |
17528 | + * | |
17529 | + * Record that we tried to resume from this image. We have already read the | |
17530 | + * signature in. We just need to write the modified version. | |
17531 | + */ | |
17532 | +static int toi_swap_mark_resume_attempted(int mark) | |
17533 | +{ | |
17534 | + if (!resume_swap_dev_t) { | |
17535 | + printk(KERN_INFO "Not even trying to record attempt at resuming" | |
17536 | + " because resume_swap_dev_t is not set.\n"); | |
17537 | + return -ENODEV; | |
17538 | + } | |
17539 | + | |
17540 | + return write_modified_signature(mark ? TRIED_RESUME : NO_TRIED_RESUME); | |
17541 | +} | |
17542 | + | |
17543 | +/* | |
17544 | + * Parse Image Location | |
17545 | + * | |
17546 | + * Attempt to parse a resume= parameter. | |
17547 | + * Swap Writer accepts: | |
17548 | + * resume=swap:DEVNAME[:FIRSTBLOCK][@BLOCKSIZE] | |
17549 | + * | |
17550 | + * Where: | |
17551 | + * DEVNAME is convertable to a dev_t by name_to_dev_t | |
17552 | + * FIRSTBLOCK is the location of the first block in the swap file | |
17553 | + * (specifying for a swap partition is nonsensical but not prohibited). | |
17554 | + * Data is validated by attempting to read a swap header from the | |
17555 | + * location given. Failure will result in toi_swap refusing to | |
17556 | + * save an image, and a reboot with correct parameters will be | |
17557 | + * necessary. | |
17558 | + */ | |
17559 | +static int toi_swap_parse_sig_location(char *commandline, | |
17560 | + int only_allocator, int quiet) | |
17561 | +{ | |
17562 | + char *thischar, *devstart, *colon = NULL; | |
9474138d | 17563 | + int signature_found, result = -EINVAL, temp_result = 0; |
2380c486 JR |
17564 | + |
17565 | + if (strncmp(commandline, "swap:", 5)) { | |
17566 | + /* | |
17567 | + * Failing swap:, we'll take a simple | |
17568 | + * resume=/dev/hda2, but fall through to | |
17569 | + * other allocators if /dev/ isn't matched. | |
17570 | + */ | |
17571 | + if (strncmp(commandline, "/dev/", 5)) | |
17572 | + return 1; | |
17573 | + } else | |
17574 | + commandline += 5; | |
17575 | + | |
17576 | + devstart = commandline; | |
17577 | + thischar = commandline; | |
17578 | + while ((*thischar != ':') && (*thischar != '@') && | |
17579 | + ((thischar - commandline) < 250) && (*thischar)) | |
17580 | + thischar++; | |
17581 | + | |
17582 | + if (*thischar == ':') { | |
17583 | + colon = thischar; | |
17584 | + *colon = 0; | |
17585 | + thischar++; | |
17586 | + } | |
17587 | + | |
17588 | + while ((thischar - commandline) < 250 && *thischar) | |
17589 | + thischar++; | |
17590 | + | |
9474138d AM |
17591 | + if (colon) { |
17592 | + unsigned long block; | |
17593 | + temp_result = strict_strtoul(colon + 1, 0, &block); | |
17594 | + if (!temp_result) | |
17595 | + resume_firstblock = (int) block; | |
17596 | + } else | |
2380c486 JR |
17597 | + resume_firstblock = 0; |
17598 | + | |
17599 | + clear_toi_state(TOI_CAN_HIBERNATE); | |
17600 | + clear_toi_state(TOI_CAN_RESUME); | |
17601 | + | |
9474138d AM |
17602 | + if (!temp_result) |
17603 | + temp_result = try_to_parse_resume_device(devstart, quiet); | |
2380c486 JR |
17604 | + |
17605 | + if (colon) | |
17606 | + *colon = ':'; | |
17607 | + | |
17608 | + if (temp_result) | |
17609 | + return -EINVAL; | |
17610 | + | |
17611 | + signature_found = toi_swap_image_exists(quiet); | |
17612 | + | |
17613 | + if (signature_found != -1) { | |
17614 | + result = 0; | |
17615 | + | |
17616 | + toi_bio_ops.set_devinfo(devinfo); | |
17617 | + toi_writer_posn.chains = &block_chain[0]; | |
17618 | + toi_writer_posn.num_chains = MAX_SWAPFILES; | |
17619 | + set_toi_state(TOI_CAN_HIBERNATE); | |
17620 | + set_toi_state(TOI_CAN_RESUME); | |
17621 | + } else | |
17622 | + if (!quiet) | |
17623 | + printk(KERN_ERR "TuxOnIce: SwapAllocator: No swap " | |
17624 | + "signature found at %s.\n", devstart); | |
17625 | + return result; | |
17626 | +} | |
17627 | + | |
17628 | +static int header_locations_read_sysfs(const char *page, int count) | |
17629 | +{ | |
17630 | + int i, printedpartitionsmessage = 0, len = 0, haveswap = 0; | |
17631 | + struct inode *swapf = NULL; | |
17632 | + int zone; | |
17633 | + char *path_page = (char *) toi_get_free_page(10, GFP_KERNEL); | |
17634 | + char *path, *output = (char *) page; | |
17635 | + int path_len; | |
17636 | + | |
17637 | + if (!page) | |
17638 | + return 0; | |
17639 | + | |
17640 | + for (i = 0; i < MAX_SWAPFILES; i++) { | |
17641 | + struct swap_info_struct *si = get_swap_info_struct(i); | |
17642 | + | |
9474138d AM |
17643 | + if ((!si->flags & SWP_USED) || si->swap_map || |
17644 | + !(si->flags & SWP_WRITEOK)) | |
2380c486 JR |
17645 | + continue; |
17646 | + | |
17647 | + if (S_ISBLK(si->swap_file->f_mapping->host->i_mode)) { | |
17648 | + haveswap = 1; | |
17649 | + if (!printedpartitionsmessage) { | |
17650 | + len += sprintf(output + len, | |
17651 | + "For swap partitions, simply use the " | |
17652 | + "format: resume=swap:/dev/hda1.\n"); | |
17653 | + printedpartitionsmessage = 1; | |
17654 | + } | |
17655 | + } else { | |
17656 | + path_len = 0; | |
17657 | + | |
17658 | + path = d_path(&si->swap_file->f_path, path_page, | |
17659 | + PAGE_SIZE); | |
17660 | + path_len = snprintf(path_page, 31, "%s", path); | |
17661 | + | |
17662 | + haveswap = 1; | |
17663 | + swapf = si->swap_file->f_mapping->host; | |
17664 | + zone = bmap(swapf, 0); | |
17665 | + if (!zone) { | |
17666 | + len += sprintf(output + len, | |
17667 | + "Swapfile %s has been corrupted. Reuse" | |
17668 | + " mkswap on it and try again.\n", | |
17669 | + path_page); | |
17670 | + } else { | |
17671 | + char name_buffer[255]; | |
17672 | + len += sprintf(output + len, | |
17673 | + "For swapfile `%s`," | |
17674 | + " use resume=swap:/dev/%s:0x%x.\n", | |
17675 | + path_page, | |
17676 | + bdevname(si->bdev, name_buffer), | |
17677 | + zone << (swapf->i_blkbits - 9)); | |
17678 | + } | |
17679 | + } | |
17680 | + } | |
17681 | + | |
17682 | + if (!haveswap) | |
17683 | + len = sprintf(output, "You need to turn on swap partitions " | |
17684 | + "before examining this file.\n"); | |
17685 | + | |
17686 | + toi_free_page(10, (unsigned long) path_page); | |
17687 | + return len; | |
17688 | +} | |
17689 | + | |
17690 | +static struct toi_sysfs_data sysfs_params[] = { | |
17691 | + SYSFS_STRING("swapfilename", SYSFS_RW, swapfilename, 255, 0, NULL), | |
17692 | + SYSFS_CUSTOM("headerlocations", SYSFS_READONLY, | |
17693 | + header_locations_read_sysfs, NULL, 0, NULL), | |
17694 | + SYSFS_INT("enabled", SYSFS_RW, &toi_swapops.enabled, 0, 1, 0, | |
17695 | + attempt_to_parse_resume_device2), | |
17696 | +}; | |
17697 | + | |
17698 | +static struct toi_module_ops toi_swapops = { | |
17699 | + .type = WRITER_MODULE, | |
17700 | + .name = "swap storage", | |
17701 | + .directory = "swap", | |
17702 | + .module = THIS_MODULE, | |
17703 | + .memory_needed = toi_swap_memory_needed, | |
17704 | + .print_debug_info = toi_swap_print_debug_stats, | |
17705 | + .storage_needed = toi_swap_storage_needed, | |
17706 | + .initialise = toi_swap_initialise, | |
17707 | + .cleanup = toi_swap_cleanup, | |
17708 | + | |
17709 | + .noresume_reset = toi_swap_noresume_reset, | |
17710 | + .storage_available = toi_swap_storage_available, | |
17711 | + .storage_allocated = toi_swap_storage_allocated, | |
2380c486 JR |
17712 | + .reserve_header_space = toi_swap_reserve_header_space, |
17713 | + .allocate_storage = toi_swap_allocate_storage, | |
17714 | + .image_exists = toi_swap_image_exists, | |
17715 | + .mark_resume_attempted = toi_swap_mark_resume_attempted, | |
17716 | + .write_header_init = toi_swap_write_header_init, | |
17717 | + .write_header_cleanup = toi_swap_write_header_cleanup, | |
17718 | + .read_header_init = toi_swap_read_header_init, | |
17719 | + .read_header_cleanup = toi_swap_read_header_cleanup, | |
17720 | + .remove_image = toi_swap_remove_image, | |
17721 | + .parse_sig_location = toi_swap_parse_sig_location, | |
17722 | + | |
17723 | + .sysfs_data = sysfs_params, | |
17724 | + .num_sysfs_entries = sizeof(sysfs_params) / | |
17725 | + sizeof(struct toi_sysfs_data), | |
17726 | +}; | |
17727 | + | |
17728 | +/* ---- Registration ---- */ | |
17729 | +static __init int toi_swap_load(void) | |
17730 | +{ | |
17731 | + toi_swapops.rw_init = toi_bio_ops.rw_init; | |
17732 | + toi_swapops.rw_cleanup = toi_bio_ops.rw_cleanup; | |
17733 | + toi_swapops.read_page = toi_bio_ops.read_page; | |
17734 | + toi_swapops.write_page = toi_bio_ops.write_page; | |
17735 | + toi_swapops.rw_header_chunk = toi_bio_ops.rw_header_chunk; | |
17736 | + toi_swapops.rw_header_chunk_noreadahead = | |
17737 | + toi_bio_ops.rw_header_chunk_noreadahead; | |
17738 | + toi_swapops.io_flusher = toi_bio_ops.io_flusher; | |
17739 | + toi_swapops.update_throughput_throttle = | |
17740 | + toi_bio_ops.update_throughput_throttle; | |
17741 | + toi_swapops.finish_all_io = toi_bio_ops.finish_all_io; | |
17742 | + | |
17743 | + return toi_register_module(&toi_swapops); | |
17744 | +} | |
17745 | + | |
17746 | +#ifdef MODULE | |
17747 | +static __exit void toi_swap_unload(void) | |
17748 | +{ | |
17749 | + toi_unregister_module(&toi_swapops); | |
17750 | +} | |
17751 | + | |
17752 | +module_init(toi_swap_load); | |
17753 | +module_exit(toi_swap_unload); | |
17754 | +MODULE_LICENSE("GPL"); | |
17755 | +MODULE_AUTHOR("Nigel Cunningham"); | |
17756 | +MODULE_DESCRIPTION("TuxOnIce SwapAllocator"); | |
17757 | +#else | |
17758 | +late_initcall(toi_swap_load); | |
17759 | +#endif | |
17760 | diff --git a/kernel/power/tuxonice_sysfs.c b/kernel/power/tuxonice_sysfs.c | |
17761 | new file mode 100644 | |
9474138d | 17762 | index 0000000..3b815ab |
2380c486 JR |
17763 | --- /dev/null |
17764 | +++ b/kernel/power/tuxonice_sysfs.c | |
9474138d | 17765 | @@ -0,0 +1,335 @@ |
2380c486 JR |
17766 | +/* |
17767 | + * kernel/power/tuxonice_sysfs.c | |
17768 | + * | |
17769 | + * Copyright (C) 2002-2008 Nigel Cunningham (nigel at tuxonice net) | |
17770 | + * | |
17771 | + * This file is released under the GPLv2. | |
17772 | + * | |
17773 | + * This file contains support for sysfs entries for tuning TuxOnIce. | |
17774 | + * | |
17775 | + * We have a generic handler that deals with the most common cases, and | |
17776 | + * hooks for special handlers to use. | |
17777 | + */ | |
17778 | + | |
17779 | +#include <linux/suspend.h> | |
2380c486 JR |
17780 | + |
17781 | +#include "tuxonice_sysfs.h" | |
17782 | +#include "tuxonice.h" | |
17783 | +#include "tuxonice_storage.h" | |
17784 | +#include "tuxonice_alloc.h" | |
17785 | + | |
17786 | +static int toi_sysfs_initialised; | |
17787 | + | |
17788 | +static void toi_initialise_sysfs(void); | |
17789 | + | |
17790 | +static struct toi_sysfs_data sysfs_params[]; | |
17791 | + | |
17792 | +#define to_sysfs_data(_attr) container_of(_attr, struct toi_sysfs_data, attr) | |
17793 | + | |
17794 | +static void toi_main_wrapper(void) | |
17795 | +{ | |
9474138d | 17796 | + toi_try_hibernate(); |
2380c486 JR |
17797 | +} |
17798 | + | |
17799 | +static ssize_t toi_attr_show(struct kobject *kobj, struct attribute *attr, | |
17800 | + char *page) | |
17801 | +{ | |
17802 | + struct toi_sysfs_data *sysfs_data = to_sysfs_data(attr); | |
17803 | + int len = 0; | |
17804 | + int full_prep = sysfs_data->flags & SYSFS_NEEDS_SM_FOR_READ; | |
17805 | + | |
17806 | + if (full_prep && toi_start_anything(0)) | |
17807 | + return -EBUSY; | |
17808 | + | |
17809 | + if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_READ) | |
17810 | + toi_prepare_usm(); | |
17811 | + | |
17812 | + switch (sysfs_data->type) { | |
17813 | + case TOI_SYSFS_DATA_CUSTOM: | |
17814 | + len = (sysfs_data->data.special.read_sysfs) ? | |
17815 | + (sysfs_data->data.special.read_sysfs)(page, PAGE_SIZE) | |
17816 | + : 0; | |
17817 | + break; | |
17818 | + case TOI_SYSFS_DATA_BIT: | |
17819 | + len = sprintf(page, "%d\n", | |
17820 | + -test_bit(sysfs_data->data.bit.bit, | |
17821 | + sysfs_data->data.bit.bit_vector)); | |
17822 | + break; | |
17823 | + case TOI_SYSFS_DATA_INTEGER: | |
17824 | + len = sprintf(page, "%d\n", | |
17825 | + *(sysfs_data->data.integer.variable)); | |
17826 | + break; | |
17827 | + case TOI_SYSFS_DATA_LONG: | |
17828 | + len = sprintf(page, "%ld\n", | |
17829 | + *(sysfs_data->data.a_long.variable)); | |
17830 | + break; | |
17831 | + case TOI_SYSFS_DATA_UL: | |
17832 | + len = sprintf(page, "%lu\n", | |
17833 | + *(sysfs_data->data.ul.variable)); | |
17834 | + break; | |
17835 | + case TOI_SYSFS_DATA_STRING: | |
17836 | + len = sprintf(page, "%s\n", | |
17837 | + sysfs_data->data.string.variable); | |
17838 | + break; | |
17839 | + } | |
17840 | + | |
17841 | + if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_READ) | |
17842 | + toi_cleanup_usm(); | |
17843 | + | |
17844 | + if (full_prep) | |
17845 | + toi_finish_anything(0); | |
17846 | + | |
17847 | + return len; | |
17848 | +} | |
17849 | + | |
17850 | +#define BOUND(_variable, _type) do { \ | |
17851 | + if (*_variable < sysfs_data->data._type.minimum) \ | |
17852 | + *_variable = sysfs_data->data._type.minimum; \ | |
17853 | + else if (*_variable > sysfs_data->data._type.maximum) \ | |
17854 | + *_variable = sysfs_data->data._type.maximum; \ | |
17855 | +} while (0) | |
17856 | + | |
17857 | +static ssize_t toi_attr_store(struct kobject *kobj, struct attribute *attr, | |
17858 | + const char *my_buf, size_t count) | |
17859 | +{ | |
17860 | + int assigned_temp_buffer = 0, result = count; | |
17861 | + struct toi_sysfs_data *sysfs_data = to_sysfs_data(attr); | |
17862 | + | |
17863 | + if (toi_start_anything((sysfs_data->flags & SYSFS_HIBERNATE_OR_RESUME))) | |
17864 | + return -EBUSY; | |
17865 | + | |
17866 | + ((char *) my_buf)[count] = 0; | |
17867 | + | |
17868 | + if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_WRITE) | |
17869 | + toi_prepare_usm(); | |
17870 | + | |
17871 | + switch (sysfs_data->type) { | |
17872 | + case TOI_SYSFS_DATA_CUSTOM: | |
17873 | + if (sysfs_data->data.special.write_sysfs) | |
17874 | + result = (sysfs_data->data.special.write_sysfs)(my_buf, | |
17875 | + count); | |
17876 | + break; | |
17877 | + case TOI_SYSFS_DATA_BIT: | |
17878 | + { | |
9474138d AM |
17879 | + unsigned long value; |
17880 | + result = strict_strtoul(my_buf, 0, &value); | |
17881 | + if (result) | |
17882 | + break; | |
2380c486 JR |
17883 | + if (value) |
17884 | + set_bit(sysfs_data->data.bit.bit, | |
17885 | + (sysfs_data->data.bit.bit_vector)); | |
17886 | + else | |
17887 | + clear_bit(sysfs_data->data.bit.bit, | |
17888 | + (sysfs_data->data.bit.bit_vector)); | |
17889 | + } | |
17890 | + break; | |
17891 | + case TOI_SYSFS_DATA_INTEGER: | |
17892 | + { | |
9474138d AM |
17893 | + long temp; |
17894 | + result = strict_strtol(my_buf, 0, &temp); | |
17895 | + if (result) | |
17896 | + break; | |
17897 | + *(sysfs_data->data.integer.variable) = (int) temp; | |
17898 | + BOUND(sysfs_data->data.integer.variable, integer); | |
2380c486 JR |
17899 | + break; |
17900 | + } | |
17901 | + case TOI_SYSFS_DATA_LONG: | |
17902 | + { | |
17903 | + long *variable = | |
17904 | + sysfs_data->data.a_long.variable; | |
9474138d AM |
17905 | + result = strict_strtol(my_buf, 0, variable); |
17906 | + if (result) | |
17907 | + break; | |
2380c486 JR |
17908 | + BOUND(variable, a_long); |
17909 | + break; | |
17910 | + } | |
17911 | + case TOI_SYSFS_DATA_UL: | |
17912 | + { | |
17913 | + unsigned long *variable = | |
17914 | + sysfs_data->data.ul.variable; | |
9474138d AM |
17915 | + result = strict_strtoul(my_buf, 0, variable); |
17916 | + if (result) | |
17917 | + break; | |
2380c486 JR |
17918 | + BOUND(variable, ul); |
17919 | + break; | |
17920 | + } | |
17921 | + break; | |
17922 | + case TOI_SYSFS_DATA_STRING: | |
17923 | + { | |
17924 | + int copy_len = count; | |
17925 | + char *variable = | |
17926 | + sysfs_data->data.string.variable; | |
17927 | + | |
17928 | + if (sysfs_data->data.string.max_length && | |
17929 | + (copy_len > sysfs_data->data.string.max_length)) | |
17930 | + copy_len = sysfs_data->data.string.max_length; | |
17931 | + | |
17932 | + if (!variable) { | |
17933 | + variable = (char *) toi_get_zeroed_page(31, | |
17934 | + TOI_ATOMIC_GFP); | |
17935 | + sysfs_data->data.string.variable = variable; | |
17936 | + assigned_temp_buffer = 1; | |
17937 | + } | |
17938 | + strncpy(variable, my_buf, copy_len); | |
17939 | + if (copy_len && my_buf[copy_len - 1] == '\n') | |
17940 | + variable[count - 1] = 0; | |
17941 | + variable[count] = 0; | |
17942 | + } | |
17943 | + break; | |
17944 | + } | |
17945 | + | |
9474138d AM |
17946 | + if (!result) |
17947 | + result = count; | |
17948 | + | |
2380c486 | 17949 | + /* Side effect routine? */ |
9474138d | 17950 | + if (result == count && sysfs_data->write_side_effect) |
2380c486 JR |
17951 | + sysfs_data->write_side_effect(); |
17952 | + | |
17953 | + /* Free temporary buffers */ | |
17954 | + if (assigned_temp_buffer) { | |
17955 | + toi_free_page(31, | |
17956 | + (unsigned long) sysfs_data->data.string.variable); | |
17957 | + sysfs_data->data.string.variable = NULL; | |
17958 | + } | |
17959 | + | |
17960 | + if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_WRITE) | |
17961 | + toi_cleanup_usm(); | |
17962 | + | |
17963 | + toi_finish_anything(sysfs_data->flags & SYSFS_HIBERNATE_OR_RESUME); | |
17964 | + | |
17965 | + return result; | |
17966 | +} | |
17967 | + | |
17968 | +static struct sysfs_ops toi_sysfs_ops = { | |
17969 | + .show = &toi_attr_show, | |
17970 | + .store = &toi_attr_store, | |
17971 | +}; | |
17972 | + | |
17973 | +static struct kobj_type toi_ktype = { | |
17974 | + .sysfs_ops = &toi_sysfs_ops, | |
17975 | +}; | |
17976 | + | |
17977 | +struct kobject *tuxonice_kobj; | |
17978 | + | |
17979 | +/* Non-module sysfs entries. | |
17980 | + * | |
17981 | + * This array contains entries that are automatically registered at | |
17982 | + * boot. Modules and the console code register their own entries separately. | |
17983 | + */ | |
17984 | + | |
17985 | +static struct toi_sysfs_data sysfs_params[] = { | |
17986 | + SYSFS_CUSTOM("do_hibernate", SYSFS_WRITEONLY, NULL, NULL, | |
17987 | + SYSFS_HIBERNATING, toi_main_wrapper), | |
17988 | + SYSFS_CUSTOM("do_resume", SYSFS_WRITEONLY, NULL, NULL, | |
9474138d | 17989 | + SYSFS_RESUMING, toi_try_resume) |
2380c486 JR |
17990 | +}; |
17991 | + | |
17992 | +void remove_toi_sysdir(struct kobject *kobj) | |
17993 | +{ | |
17994 | + if (!kobj) | |
17995 | + return; | |
17996 | + | |
17997 | + kobject_put(kobj); | |
17998 | +} | |
17999 | + | |
18000 | +struct kobject *make_toi_sysdir(char *name) | |
18001 | +{ | |
18002 | + struct kobject *kobj = kobject_create_and_add(name, tuxonice_kobj); | |
18003 | + | |
18004 | + if (!kobj) { | |
18005 | + printk(KERN_INFO "TuxOnIce: Can't allocate kobject for sysfs " | |
18006 | + "dir!\n"); | |
18007 | + return NULL; | |
18008 | + } | |
18009 | + | |
18010 | + kobj->ktype = &toi_ktype; | |
18011 | + | |
18012 | + return kobj; | |
18013 | +} | |
18014 | + | |
18015 | +/* toi_register_sysfs_file | |
18016 | + * | |
18017 | + * Helper for registering a new /sysfs/tuxonice entry. | |
18018 | + */ | |
18019 | + | |
18020 | +int toi_register_sysfs_file( | |
18021 | + struct kobject *kobj, | |
18022 | + struct toi_sysfs_data *toi_sysfs_data) | |
18023 | +{ | |
18024 | + int result; | |
18025 | + | |
18026 | + if (!toi_sysfs_initialised) | |
18027 | + toi_initialise_sysfs(); | |
18028 | + | |
18029 | + result = sysfs_create_file(kobj, &toi_sysfs_data->attr); | |
18030 | + if (result) | |
18031 | + printk(KERN_INFO "TuxOnIce: sysfs_create_file for %s " | |
18032 | + "returned %d.\n", | |
18033 | + toi_sysfs_data->attr.name, result); | |
18034 | + kobj->ktype = &toi_ktype; | |
18035 | + | |
18036 | + return result; | |
18037 | +} | |
18038 | +EXPORT_SYMBOL_GPL(toi_register_sysfs_file); | |
18039 | + | |
18040 | +/* toi_unregister_sysfs_file | |
18041 | + * | |
18042 | + * Helper for removing unwanted /sys/power/tuxonice entries. | |
18043 | + * | |
18044 | + */ | |
18045 | +void toi_unregister_sysfs_file(struct kobject *kobj, | |
18046 | + struct toi_sysfs_data *toi_sysfs_data) | |
18047 | +{ | |
18048 | + sysfs_remove_file(kobj, &toi_sysfs_data->attr); | |
18049 | +} | |
18050 | +EXPORT_SYMBOL_GPL(toi_unregister_sysfs_file); | |
18051 | + | |
18052 | +void toi_cleanup_sysfs(void) | |
18053 | +{ | |
18054 | + int i, | |
18055 | + numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data); | |
18056 | + | |
18057 | + if (!toi_sysfs_initialised) | |
18058 | + return; | |
18059 | + | |
18060 | + for (i = 0; i < numfiles; i++) | |
18061 | + toi_unregister_sysfs_file(tuxonice_kobj, &sysfs_params[i]); | |
18062 | + | |
18063 | + kobject_put(tuxonice_kobj); | |
18064 | + toi_sysfs_initialised = 0; | |
18065 | +} | |
18066 | + | |
18067 | +/* toi_initialise_sysfs | |
18068 | + * | |
18069 | + * Initialise the /sysfs/tuxonice directory. | |
18070 | + */ | |
18071 | + | |
18072 | +static void toi_initialise_sysfs(void) | |
18073 | +{ | |
18074 | + int i; | |
18075 | + int numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data); | |
18076 | + | |
18077 | + if (toi_sysfs_initialised) | |
18078 | + return; | |
18079 | + | |
18080 | + /* Make our TuxOnIce directory a child of /sys/power */ | |
18081 | + tuxonice_kobj = kobject_create_and_add("tuxonice", power_kobj); | |
18082 | + if (!tuxonice_kobj) | |
18083 | + return; | |
18084 | + | |
18085 | + toi_sysfs_initialised = 1; | |
18086 | + | |
18087 | + for (i = 0; i < numfiles; i++) | |
18088 | + toi_register_sysfs_file(tuxonice_kobj, &sysfs_params[i]); | |
18089 | +} | |
18090 | + | |
18091 | +int toi_sysfs_init(void) | |
18092 | +{ | |
18093 | + toi_initialise_sysfs(); | |
18094 | + return 0; | |
18095 | +} | |
18096 | + | |
18097 | +void toi_sysfs_exit(void) | |
18098 | +{ | |
18099 | + toi_cleanup_sysfs(); | |
18100 | +} | |
18101 | diff --git a/kernel/power/tuxonice_sysfs.h b/kernel/power/tuxonice_sysfs.h | |
18102 | new file mode 100644 | |
9474138d | 18103 | index 0000000..2020ac7 |
2380c486 JR |
18104 | --- /dev/null |
18105 | +++ b/kernel/power/tuxonice_sysfs.h | |
9474138d | 18106 | @@ -0,0 +1,137 @@ |
2380c486 JR |
18107 | +/* |
18108 | + * kernel/power/tuxonice_sysfs.h | |
18109 | + * | |
18110 | + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net) | |
18111 | + * | |
18112 | + * This file is released under the GPLv2. | |
18113 | + */ | |
18114 | + | |
18115 | +#include <linux/sysfs.h> | |
2380c486 JR |
18116 | + |
18117 | +struct toi_sysfs_data { | |
18118 | + struct attribute attr; | |
18119 | + int type; | |
18120 | + int flags; | |
18121 | + union { | |
18122 | + struct { | |
18123 | + unsigned long *bit_vector; | |
18124 | + int bit; | |
18125 | + } bit; | |
18126 | + struct { | |
18127 | + int *variable; | |
18128 | + int minimum; | |
18129 | + int maximum; | |
18130 | + } integer; | |
18131 | + struct { | |
18132 | + long *variable; | |
18133 | + long minimum; | |
18134 | + long maximum; | |
18135 | + } a_long; | |
18136 | + struct { | |
18137 | + unsigned long *variable; | |
18138 | + unsigned long minimum; | |
18139 | + unsigned long maximum; | |
18140 | + } ul; | |
18141 | + struct { | |
18142 | + char *variable; | |
18143 | + int max_length; | |
18144 | + } string; | |
18145 | + struct { | |
18146 | + int (*read_sysfs) (const char *buffer, int count); | |
18147 | + int (*write_sysfs) (const char *buffer, int count); | |
18148 | + void *data; | |
18149 | + } special; | |
18150 | + } data; | |
18151 | + | |
18152 | + /* Side effects routine. Used, eg, for reparsing the | |
18153 | + * resume= entry when it changes */ | |
18154 | + void (*write_side_effect) (void); | |
18155 | + struct list_head sysfs_data_list; | |
18156 | +}; | |
18157 | + | |
18158 | +enum { | |
18159 | + TOI_SYSFS_DATA_NONE = 1, | |
18160 | + TOI_SYSFS_DATA_CUSTOM, | |
18161 | + TOI_SYSFS_DATA_BIT, | |
18162 | + TOI_SYSFS_DATA_INTEGER, | |
18163 | + TOI_SYSFS_DATA_UL, | |
18164 | + TOI_SYSFS_DATA_LONG, | |
18165 | + TOI_SYSFS_DATA_STRING | |
18166 | +}; | |
18167 | + | |
18168 | +#define SYSFS_WRITEONLY 0200 | |
18169 | +#define SYSFS_READONLY 0444 | |
18170 | +#define SYSFS_RW 0644 | |
18171 | + | |
18172 | +#define SYSFS_BIT(_name, _mode, _ul, _bit, _flags) { \ | |
18173 | + .attr = {.name = _name , .mode = _mode }, \ | |
18174 | + .type = TOI_SYSFS_DATA_BIT, \ | |
18175 | + .flags = _flags, \ | |
18176 | + .data = { .bit = { .bit_vector = _ul, .bit = _bit } } } | |
18177 | + | |
18178 | +#define SYSFS_INT(_name, _mode, _int, _min, _max, _flags, _wse) { \ | |
18179 | + .attr = {.name = _name , .mode = _mode }, \ | |
18180 | + .type = TOI_SYSFS_DATA_INTEGER, \ | |
18181 | + .flags = _flags, \ | |
18182 | + .data = { .integer = { .variable = _int, .minimum = _min, \ | |
18183 | + .maximum = _max } }, \ | |
18184 | + .write_side_effect = _wse } | |
18185 | + | |
18186 | +#define SYSFS_UL(_name, _mode, _ul, _min, _max, _flags) { \ | |
18187 | + .attr = {.name = _name , .mode = _mode }, \ | |
18188 | + .type = TOI_SYSFS_DATA_UL, \ | |
18189 | + .flags = _flags, \ | |
18190 | + .data = { .ul = { .variable = _ul, .minimum = _min, \ | |
18191 | + .maximum = _max } } } | |
18192 | + | |
18193 | +#define SYSFS_LONG(_name, _mode, _long, _min, _max, _flags) { \ | |
18194 | + .attr = {.name = _name , .mode = _mode }, \ | |
18195 | + .type = TOI_SYSFS_DATA_LONG, \ | |
18196 | + .flags = _flags, \ | |
18197 | + .data = { .a_long = { .variable = _long, .minimum = _min, \ | |
18198 | + .maximum = _max } } } | |
18199 | + | |
18200 | +#define SYSFS_STRING(_name, _mode, _string, _max_len, _flags, _wse) { \ | |
18201 | + .attr = {.name = _name , .mode = _mode }, \ | |
18202 | + .type = TOI_SYSFS_DATA_STRING, \ | |
18203 | + .flags = _flags, \ | |
18204 | + .data = { .string = { .variable = _string, .max_length = _max_len } }, \ | |
18205 | + .write_side_effect = _wse } | |
18206 | + | |
18207 | +#define SYSFS_CUSTOM(_name, _mode, _read, _write, _flags, _wse) { \ | |
18208 | + .attr = {.name = _name , .mode = _mode }, \ | |
18209 | + .type = TOI_SYSFS_DATA_CUSTOM, \ | |
18210 | + .flags = _flags, \ | |
18211 | + .data = { .special = { .read_sysfs = _read, .write_sysfs = _write } }, \ | |
18212 | + .write_side_effect = _wse } | |
18213 | + | |
18214 | +#define SYSFS_NONE(_name, _wse) { \ | |
18215 | + .attr = {.name = _name , .mode = SYSFS_WRITEONLY }, \ | |
18216 | + .type = TOI_SYSFS_DATA_NONE, \ | |
18217 | + .write_side_effect = _wse, \ | |
18218 | +} | |
18219 | + | |
18220 | +/* Flags */ | |
18221 | +#define SYSFS_NEEDS_SM_FOR_READ 1 | |
18222 | +#define SYSFS_NEEDS_SM_FOR_WRITE 2 | |
18223 | +#define SYSFS_HIBERNATE 4 | |
18224 | +#define SYSFS_RESUME 8 | |
18225 | +#define SYSFS_HIBERNATE_OR_RESUME (SYSFS_HIBERNATE | SYSFS_RESUME) | |
18226 | +#define SYSFS_HIBERNATING (SYSFS_HIBERNATE | SYSFS_NEEDS_SM_FOR_WRITE) | |
18227 | +#define SYSFS_RESUMING (SYSFS_RESUME | SYSFS_NEEDS_SM_FOR_WRITE) | |
18228 | +#define SYSFS_NEEDS_SM_FOR_BOTH \ | |
18229 | + (SYSFS_NEEDS_SM_FOR_READ | SYSFS_NEEDS_SM_FOR_WRITE) | |
18230 | + | |
18231 | +int toi_register_sysfs_file(struct kobject *kobj, | |
18232 | + struct toi_sysfs_data *toi_sysfs_data); | |
18233 | +void toi_unregister_sysfs_file(struct kobject *kobj, | |
18234 | + struct toi_sysfs_data *toi_sysfs_data); | |
18235 | + | |
18236 | +extern struct kobject *tuxonice_kobj; | |
18237 | + | |
18238 | +struct kobject *make_toi_sysdir(char *name); | |
18239 | +void remove_toi_sysdir(struct kobject *obj); | |
18240 | +extern void toi_cleanup_sysfs(void); | |
18241 | + | |
18242 | +extern int toi_sysfs_init(void); | |
18243 | +extern void toi_sysfs_exit(void); | |
18244 | diff --git a/kernel/power/tuxonice_ui.c b/kernel/power/tuxonice_ui.c | |
18245 | new file mode 100644 | |
18246 | index 0000000..4da4afd | |
18247 | --- /dev/null | |
18248 | +++ b/kernel/power/tuxonice_ui.c | |
18249 | @@ -0,0 +1,250 @@ | |
18250 | +/* | |
18251 | + * kernel/power/tuxonice_ui.c | |
18252 | + * | |
18253 | + * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu> | |
18254 | + * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz> | |
18255 | + * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr> | |
18256 | + * Copyright (C) 2002-2008 Nigel Cunningham (nigel at tuxonice net) | |
18257 | + * | |
18258 | + * This file is released under the GPLv2. | |
18259 | + * | |
18260 | + * Routines for TuxOnIce's user interface. | |
18261 | + * | |
18262 | + * The user interface code talks to a userspace program via a | |
18263 | + * netlink socket. | |
18264 | + * | |
18265 | + * The kernel side: | |
18266 | + * - starts the userui program; | |
18267 | + * - sends text messages and progress bar status; | |
18268 | + * | |
18269 | + * The user space side: | |
18270 | + * - passes messages regarding user requests (abort, toggle reboot etc) | |
18271 | + * | |
18272 | + */ | |
18273 | + | |
18274 | +#define __KERNEL_SYSCALLS__ | |
18275 | + | |
18276 | +#include <linux/reboot.h> | |
18277 | + | |
18278 | +#include "tuxonice_sysfs.h" | |
18279 | +#include "tuxonice_modules.h" | |
18280 | +#include "tuxonice.h" | |
18281 | +#include "tuxonice_ui.h" | |
18282 | +#include "tuxonice_netlink.h" | |
18283 | +#include "tuxonice_power_off.h" | |
18284 | +#include "tuxonice_builtin.h" | |
18285 | + | |
18286 | +static char local_printf_buf[1024]; /* Same as printk - should be safe */ | |
18287 | +struct ui_ops *toi_current_ui; | |
18288 | +EXPORT_SYMBOL_GPL(toi_current_ui); | |
18289 | + | |
18290 | +/** | |
18291 | + * toi_wait_for_keypress - Wait for keypress via userui or /dev/console. | |
18292 | + * | |
18293 | + * @timeout: Maximum time to wait. | |
18294 | + * | |
18295 | + * Wait for a keypress, either from userui or /dev/console if userui isn't | |
18296 | + * available. The non-userui path is particularly for at boot-time, prior | |
18297 | + * to userui being started, when we have an important warning to give to | |
18298 | + * the user. | |
18299 | + */ | |
18300 | +static char toi_wait_for_keypress(int timeout) | |
18301 | +{ | |
18302 | + if (toi_current_ui && toi_current_ui->wait_for_key(timeout)) | |
18303 | + return ' '; | |
18304 | + | |
18305 | + return toi_wait_for_keypress_dev_console(timeout); | |
18306 | +} | |
18307 | + | |
18308 | +/* toi_early_boot_message() | |
18309 | + * Description: Handle errors early in the process of booting. | |
18310 | + * The user may press C to continue booting, perhaps | |
18311 | + * invalidating the image, or space to reboot. | |
18312 | + * This works from either the serial console or normally | |
18313 | + * attached keyboard. | |
18314 | + * | |
18315 | + * Note that we come in here from init, while the kernel is | |
18316 | + * locked. If we want to get events from the serial console, | |
18317 | + * we need to temporarily unlock the kernel. | |
18318 | + * | |
18319 | + * toi_early_boot_message may also be called post-boot. | |
18320 | + * In this case, it simply printks the message and returns. | |
18321 | + * | |
18322 | + * Arguments: int Whether we are able to erase the image. | |
18323 | + * int default_answer. What to do when we timeout. This | |
18324 | + * will normally be continue, but the user might | |
18325 | + * provide command line options (__setup) to override | |
18326 | + * particular cases. | |
18327 | + * Char *. Pointer to a string explaining why we're moaning. | |
18328 | + */ | |
18329 | + | |
18330 | +#define say(message, a...) printk(KERN_EMERG message, ##a) | |
18331 | + | |
18332 | +void toi_early_boot_message(int message_detail, int default_answer, | |
18333 | + char *warning_reason, ...) | |
18334 | +{ | |
18335 | +#if defined(CONFIG_VT) || defined(CONFIG_SERIAL_CONSOLE) | |
18336 | + unsigned long orig_state = get_toi_state(), continue_req = 0; | |
18337 | + unsigned long orig_loglevel = console_loglevel; | |
18338 | + int can_ask = 1; | |
18339 | +#else | |
18340 | + int can_ask = 0; | |
18341 | +#endif | |
18342 | + | |
18343 | + va_list args; | |
18344 | + int printed_len; | |
18345 | + | |
18346 | + if (!toi_wait) { | |
18347 | + set_toi_state(TOI_CONTINUE_REQ); | |
18348 | + can_ask = 0; | |
18349 | + } | |
18350 | + | |
18351 | + if (warning_reason) { | |
18352 | + va_start(args, warning_reason); | |
18353 | + printed_len = vsnprintf(local_printf_buf, | |
18354 | + sizeof(local_printf_buf), | |
18355 | + warning_reason, | |
18356 | + args); | |
18357 | + va_end(args); | |
18358 | + } | |
18359 | + | |
18360 | + if (!test_toi_state(TOI_BOOT_TIME)) { | |
18361 | + printk("TuxOnIce: %s\n", local_printf_buf); | |
18362 | + return; | |
18363 | + } | |
18364 | + | |
18365 | + if (!can_ask) { | |
18366 | + continue_req = !!default_answer; | |
18367 | + goto post_ask; | |
18368 | + } | |
18369 | + | |
18370 | +#if defined(CONFIG_VT) || defined(CONFIG_SERIAL_CONSOLE) | |
18371 | + console_loglevel = 7; | |
18372 | + | |
18373 | + say("=== TuxOnIce ===\n\n"); | |
18374 | + if (warning_reason) { | |
18375 | + say("BIG FAT WARNING!! %s\n\n", local_printf_buf); | |
18376 | + switch (message_detail) { | |
18377 | + case 0: | |
18378 | + say("If you continue booting, note that any image WILL" | |
18379 | + "NOT BE REMOVED.\nTuxOnIce is unable to do so " | |
18380 | + "because the appropriate modules aren't\n" | |
18381 | + "loaded. You should manually remove the image " | |
18382 | + "to avoid any\npossibility of corrupting your " | |
18383 | + "filesystem(s) later.\n"); | |
18384 | + break; | |
18385 | + case 1: | |
18386 | + say("If you want to use the current TuxOnIce image, " | |
18387 | + "reboot and try\nagain with the same kernel " | |
18388 | + "that you hibernated from. If you want\n" | |
18389 | + "to forget that image, continue and the image " | |
18390 | + "will be erased.\n"); | |
18391 | + break; | |
18392 | + } | |
18393 | + say("Press SPACE to reboot or C to continue booting with " | |
18394 | + "this kernel\n\n"); | |
18395 | + if (toi_wait > 0) | |
18396 | + say("Default action if you don't select one in %d " | |
18397 | + "seconds is: %s.\n", | |
18398 | + toi_wait, | |
18399 | + default_answer == TOI_CONTINUE_REQ ? | |
18400 | + "continue booting" : "reboot"); | |
18401 | + } else { | |
18402 | + say("BIG FAT WARNING!!\n\n" | |
18403 | + "You have tried to resume from this image before.\n" | |
18404 | + "If it failed once, it may well fail again.\n" | |
18405 | + "Would you like to remove the image and boot " | |
18406 | + "normally?\nThis will be equivalent to entering " | |
18407 | + "noresume on the\nkernel command line.\n\n" | |
18408 | + "Press SPACE to remove the image or C to continue " | |
18409 | + "resuming.\n\n"); | |
18410 | + if (toi_wait > 0) | |
18411 | + say("Default action if you don't select one in %d " | |
18412 | + "seconds is: %s.\n", toi_wait, | |
18413 | + !!default_answer ? | |
18414 | + "continue resuming" : "remove the image"); | |
18415 | + } | |
18416 | + console_loglevel = orig_loglevel; | |
18417 | + | |
18418 | + set_toi_state(TOI_SANITY_CHECK_PROMPT); | |
18419 | + clear_toi_state(TOI_CONTINUE_REQ); | |
18420 | + | |
18421 | + if (toi_wait_for_keypress(toi_wait) == 0) /* We timed out */ | |
18422 | + continue_req = !!default_answer; | |
18423 | + else | |
18424 | + continue_req = test_toi_state(TOI_CONTINUE_REQ); | |
18425 | + | |
18426 | +#endif /* CONFIG_VT or CONFIG_SERIAL_CONSOLE */ | |
18427 | + | |
18428 | +post_ask: | |
18429 | + if ((warning_reason) && (!continue_req)) | |
18430 | + machine_restart(NULL); | |
18431 | + | |
18432 | + restore_toi_state(orig_state); | |
18433 | + if (continue_req) | |
18434 | + set_toi_state(TOI_CONTINUE_REQ); | |
18435 | +} | |
18436 | +EXPORT_SYMBOL_GPL(toi_early_boot_message); | |
18437 | +#undef say | |
18438 | + | |
18439 | +/* | |
18440 | + * User interface specific /sys/power/tuxonice entries. | |
18441 | + */ | |
18442 | + | |
18443 | +static struct toi_sysfs_data sysfs_params[] = { | |
18444 | +#if defined(CONFIG_NET) && defined(CONFIG_SYSFS) | |
18445 | + SYSFS_INT("default_console_level", SYSFS_RW, | |
18446 | + &toi_bkd.toi_default_console_level, 0, 7, 0, NULL), | |
18447 | + SYSFS_UL("debug_sections", SYSFS_RW, &toi_bkd.toi_debug_state, 0, | |
18448 | + 1 << 30, 0), | |
18449 | + SYSFS_BIT("log_everything", SYSFS_RW, &toi_bkd.toi_action, TOI_LOGALL, | |
18450 | + 0) | |
18451 | +#endif | |
18452 | +}; | |
18453 | + | |
18454 | +static struct toi_module_ops userui_ops = { | |
18455 | + .type = MISC_HIDDEN_MODULE, | |
18456 | + .name = "printk ui", | |
18457 | + .directory = "user_interface", | |
18458 | + .module = THIS_MODULE, | |
18459 | + .sysfs_data = sysfs_params, | |
18460 | + .num_sysfs_entries = sizeof(sysfs_params) / | |
18461 | + sizeof(struct toi_sysfs_data), | |
18462 | +}; | |
18463 | + | |
18464 | +int toi_register_ui_ops(struct ui_ops *this_ui) | |
18465 | +{ | |
18466 | + if (toi_current_ui) { | |
18467 | + printk(KERN_INFO "Only one TuxOnIce user interface module can " | |
18468 | + "be loaded at a time."); | |
18469 | + return -EBUSY; | |
18470 | + } | |
18471 | + | |
18472 | + toi_current_ui = this_ui; | |
18473 | + | |
18474 | + return 0; | |
18475 | +} | |
18476 | +EXPORT_SYMBOL_GPL(toi_register_ui_ops); | |
18477 | + | |
18478 | +void toi_remove_ui_ops(struct ui_ops *this_ui) | |
18479 | +{ | |
18480 | + if (toi_current_ui != this_ui) | |
18481 | + return; | |
18482 | + | |
18483 | + toi_current_ui = NULL; | |
18484 | +} | |
18485 | +EXPORT_SYMBOL_GPL(toi_remove_ui_ops); | |
18486 | + | |
18487 | +/* toi_console_sysfs_init | |
18488 | + * Description: Boot time initialisation for user interface. | |
18489 | + */ | |
18490 | + | |
18491 | +int toi_ui_init(void) | |
18492 | +{ | |
18493 | + return toi_register_module(&userui_ops); | |
18494 | +} | |
18495 | + | |
18496 | +void toi_ui_exit(void) | |
18497 | +{ | |
18498 | + toi_unregister_module(&userui_ops); | |
18499 | +} | |
18500 | diff --git a/kernel/power/tuxonice_ui.h b/kernel/power/tuxonice_ui.h | |
18501 | new file mode 100644 | |
e999739a | 18502 | index 0000000..dc45741 |
2380c486 JR |
18503 | --- /dev/null |
18504 | +++ b/kernel/power/tuxonice_ui.h | |
e999739a | 18505 | @@ -0,0 +1,103 @@ |
2380c486 JR |
18506 | +/* |
18507 | + * kernel/power/tuxonice_ui.h | |
18508 | + * | |
18509 | + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net) | |
18510 | + */ | |
18511 | + | |
18512 | +enum { | |
18513 | + DONT_CLEAR_BAR, | |
18514 | + CLEAR_BAR | |
18515 | +}; | |
18516 | + | |
18517 | +enum { | |
18518 | + /* Userspace -> Kernel */ | |
18519 | + USERUI_MSG_ABORT = 0x11, | |
18520 | + USERUI_MSG_SET_STATE = 0x12, | |
18521 | + USERUI_MSG_GET_STATE = 0x13, | |
18522 | + USERUI_MSG_GET_DEBUG_STATE = 0x14, | |
18523 | + USERUI_MSG_SET_DEBUG_STATE = 0x15, | |
18524 | + USERUI_MSG_SPACE = 0x18, | |
18525 | + USERUI_MSG_GET_POWERDOWN_METHOD = 0x1A, | |
18526 | + USERUI_MSG_SET_POWERDOWN_METHOD = 0x1B, | |
18527 | + USERUI_MSG_GET_LOGLEVEL = 0x1C, | |
18528 | + USERUI_MSG_SET_LOGLEVEL = 0x1D, | |
18529 | + USERUI_MSG_PRINTK = 0x1E, | |
18530 | + | |
18531 | + /* Kernel -> Userspace */ | |
18532 | + USERUI_MSG_MESSAGE = 0x21, | |
18533 | + USERUI_MSG_PROGRESS = 0x22, | |
18534 | + USERUI_MSG_POST_ATOMIC_RESTORE = 0x25, | |
18535 | + | |
18536 | + USERUI_MSG_MAX, | |
18537 | +}; | |
18538 | + | |
18539 | +struct userui_msg_params { | |
18540 | + u32 a, b, c, d; | |
18541 | + char text[255]; | |
18542 | +}; | |
18543 | + | |
18544 | +struct ui_ops { | |
18545 | + char (*wait_for_key) (int timeout); | |
18546 | + u32 (*update_status) (u32 value, u32 maximum, const char *fmt, ...); | |
18547 | + void (*prepare_status) (int clearbar, const char *fmt, ...); | |
18548 | + void (*cond_pause) (int pause, char *message); | |
18549 | + void (*abort)(int result_code, const char *fmt, ...); | |
18550 | + void (*prepare)(void); | |
18551 | + void (*cleanup)(void); | |
18552 | + void (*post_atomic_restore)(void); | |
18553 | + void (*message)(u32 section, u32 level, u32 normally_logged, | |
18554 | + const char *fmt, ...); | |
18555 | +}; | |
18556 | + | |
18557 | +extern struct ui_ops *toi_current_ui; | |
18558 | + | |
18559 | +#define toi_update_status(val, max, fmt, args...) \ | |
18560 | + (toi_current_ui ? (toi_current_ui->update_status) (val, max, fmt, ##args) : \ | |
18561 | + max) | |
18562 | + | |
18563 | +#define toi_ui_post_atomic_restore(void) \ | |
18564 | + do { if (toi_current_ui) \ | |
18565 | + (toi_current_ui->post_atomic_restore)(); \ | |
18566 | + } while (0) | |
18567 | + | |
18568 | +#define toi_prepare_console(void) \ | |
e999739a | 18569 | + do { if (toi_current_ui) \ |
2380c486 JR |
18570 | + (toi_current_ui->prepare)(); \ |
18571 | + } while (0) | |
18572 | + | |
18573 | +#define toi_cleanup_console(void) \ | |
e999739a | 18574 | + do { if (toi_current_ui) \ |
2380c486 JR |
18575 | + (toi_current_ui->cleanup)(); \ |
18576 | + } while (0) | |
18577 | + | |
18578 | +#define abort_hibernate(result, fmt, args...) \ | |
18579 | + do { if (toi_current_ui) \ | |
18580 | + (toi_current_ui->abort)(result, fmt, ##args); \ | |
18581 | + else { \ | |
18582 | + set_abort_result(result); \ | |
18583 | + } \ | |
18584 | + } while (0) | |
18585 | + | |
18586 | +#define toi_cond_pause(pause, message) \ | |
18587 | + do { if (toi_current_ui) \ | |
18588 | + (toi_current_ui->cond_pause)(pause, message); \ | |
18589 | + } while (0) | |
18590 | + | |
18591 | +#define toi_prepare_status(clear, fmt, args...) \ | |
18592 | + do { if (toi_current_ui) \ | |
18593 | + (toi_current_ui->prepare_status)(clear, fmt, ##args); \ | |
18594 | + else \ | |
18595 | + printk(KERN_ERR fmt "%s", ##args, "\n"); \ | |
18596 | + } while (0) | |
18597 | + | |
18598 | +#define toi_message(sn, lev, log, fmt, a...) \ | |
18599 | +do { \ | |
18600 | + if (toi_current_ui && (!sn || test_debug_state(sn))) \ | |
18601 | + toi_current_ui->message(sn, lev, log, fmt, ##a); \ | |
18602 | +} while (0) | |
18603 | + | |
18604 | +__exit void toi_ui_cleanup(void); | |
18605 | +extern int toi_ui_init(void); | |
18606 | +extern void toi_ui_exit(void); | |
18607 | +extern int toi_register_ui_ops(struct ui_ops *this_ui); | |
18608 | +extern void toi_remove_ui_ops(struct ui_ops *this_ui); | |
18609 | diff --git a/kernel/power/tuxonice_userui.c b/kernel/power/tuxonice_userui.c | |
18610 | new file mode 100644 | |
9474138d | 18611 | index 0000000..c7b1053 |
2380c486 JR |
18612 | --- /dev/null |
18613 | +++ b/kernel/power/tuxonice_userui.c | |
9474138d | 18614 | @@ -0,0 +1,662 @@ |
2380c486 JR |
18615 | +/* |
18616 | + * kernel/power/user_ui.c | |
18617 | + * | |
18618 | + * Copyright (C) 2005-2007 Bernard Blackham | |
18619 | + * Copyright (C) 2002-2008 Nigel Cunningham (nigel at tuxonice net) | |
18620 | + * | |
18621 | + * This file is released under the GPLv2. | |
18622 | + * | |
18623 | + * Routines for TuxOnIce's user interface. | |
18624 | + * | |
18625 | + * The user interface code talks to a userspace program via a | |
18626 | + * netlink socket. | |
18627 | + * | |
18628 | + * The kernel side: | |
18629 | + * - starts the userui program; | |
18630 | + * - sends text messages and progress bar status; | |
18631 | + * | |
18632 | + * The user space side: | |
18633 | + * - passes messages regarding user requests (abort, toggle reboot etc) | |
18634 | + * | |
18635 | + */ | |
18636 | + | |
18637 | +#define __KERNEL_SYSCALLS__ | |
18638 | + | |
18639 | +#include <linux/suspend.h> | |
18640 | +#include <linux/freezer.h> | |
18641 | +#include <linux/console.h> | |
18642 | +#include <linux/ctype.h> | |
18643 | +#include <linux/tty.h> | |
18644 | +#include <linux/vt_kern.h> | |
2380c486 JR |
18645 | +#include <linux/reboot.h> |
18646 | +#include <linux/kmod.h> | |
18647 | +#include <linux/security.h> | |
18648 | +#include <linux/syscalls.h> | |
18649 | + | |
18650 | +#include "tuxonice_sysfs.h" | |
18651 | +#include "tuxonice_modules.h" | |
18652 | +#include "tuxonice.h" | |
18653 | +#include "tuxonice_ui.h" | |
18654 | +#include "tuxonice_netlink.h" | |
18655 | +#include "tuxonice_power_off.h" | |
18656 | + | |
18657 | +static char local_printf_buf[1024]; /* Same as printk - should be safe */ | |
18658 | + | |
18659 | +static struct user_helper_data ui_helper_data; | |
18660 | +static struct toi_module_ops userui_ops; | |
18661 | +static int orig_kmsg; | |
18662 | + | |
18663 | +static char lastheader[512]; | |
18664 | +static int lastheader_message_len; | |
18665 | +static int ui_helper_changed; /* Used at resume-time so don't overwrite value | |
18666 | + set from initrd/ramfs. */ | |
18667 | + | |
18668 | +/* Number of distinct progress amounts that userspace can display */ | |
18669 | +static int progress_granularity = 30; | |
18670 | + | |
18671 | +static DECLARE_WAIT_QUEUE_HEAD(userui_wait_for_key); | |
18672 | + | |
18673 | +/** | |
18674 | + * ui_nl_set_state - Update toi_action based on a message from userui. | |
18675 | + * | |
18676 | + * @n: The bit (1 << bit) to set. | |
18677 | + */ | |
18678 | +static void ui_nl_set_state(int n) | |
18679 | +{ | |
18680 | + /* Only let them change certain settings */ | |
18681 | + static const u32 toi_action_mask = | |
18682 | + (1 << TOI_REBOOT) | (1 << TOI_PAUSE) | | |
18683 | + (1 << TOI_LOGALL) | | |
18684 | + (1 << TOI_SINGLESTEP) | | |
18685 | + (1 << TOI_PAUSE_NEAR_PAGESET_END); | |
18686 | + | |
18687 | + toi_bkd.toi_action = (toi_bkd.toi_action & (~toi_action_mask)) | | |
18688 | + (n & toi_action_mask); | |
18689 | + | |
18690 | + if (!test_action_state(TOI_PAUSE) && | |
18691 | + !test_action_state(TOI_SINGLESTEP)) | |
18692 | + wake_up_interruptible(&userui_wait_for_key); | |
18693 | +} | |
18694 | + | |
18695 | +/** | |
18696 | + * userui_post_atomic_restore - Tell userui that atomic restore just happened. | |
18697 | + * | |
18698 | + * Tell userui that atomic restore just occured, so that it can do things like | |
18699 | + * redrawing the screen, re-getting settings and so on. | |
18700 | + */ | |
18701 | +static void userui_post_atomic_restore(void) | |
18702 | +{ | |
18703 | + toi_send_netlink_message(&ui_helper_data, | |
18704 | + USERUI_MSG_POST_ATOMIC_RESTORE, NULL, 0); | |
18705 | +} | |
18706 | + | |
18707 | +/** | |
18708 | + * userui_storage_needed - Report how much memory in image header is needed. | |
18709 | + */ | |
18710 | +static int userui_storage_needed(void) | |
18711 | +{ | |
18712 | + return sizeof(ui_helper_data.program) + 1 + sizeof(int); | |
18713 | +} | |
18714 | + | |
18715 | +/** | |
18716 | + * userui_save_config_info - Fill buffer with config info for image header. | |
18717 | + * | |
18718 | + * @buf: Buffer into which to put the config info we want to save. | |
18719 | + */ | |
18720 | +static int userui_save_config_info(char *buf) | |
18721 | +{ | |
18722 | + *((int *) buf) = progress_granularity; | |
18723 | + memcpy(buf + sizeof(int), ui_helper_data.program, | |
18724 | + sizeof(ui_helper_data.program)); | |
18725 | + return sizeof(ui_helper_data.program) + sizeof(int) + 1; | |
18726 | +} | |
18727 | + | |
18728 | +/** | |
18729 | + * userui_load_config_info - Restore config info from buffer. | |
18730 | + * | |
18731 | + * @buf: Buffer containing header info loaded. | |
18732 | + * @size: Size of data loaded for this module. | |
18733 | + */ | |
18734 | +static void userui_load_config_info(char *buf, int size) | |
18735 | +{ | |
18736 | + progress_granularity = *((int *) buf); | |
18737 | + size -= sizeof(int); | |
18738 | + | |
18739 | + /* Don't load the saved path if one has already been set */ | |
18740 | + if (ui_helper_changed) | |
18741 | + return; | |
18742 | + | |
18743 | + if (size > sizeof(ui_helper_data.program)) | |
18744 | + size = sizeof(ui_helper_data.program); | |
18745 | + | |
18746 | + memcpy(ui_helper_data.program, buf + sizeof(int), size); | |
18747 | + ui_helper_data.program[sizeof(ui_helper_data.program)-1] = '\0'; | |
18748 | +} | |
18749 | + | |
18750 | +/** | |
18751 | + * set_ui_program_set: Record that userui program was changed. | |
18752 | + * | |
18753 | + * Side effect routine for when the userui program is set. In an initrd or | |
18754 | + * ramfs, the user may set a location for the userui program. If this happens, | |
18755 | + * we don't want to reload the value that was saved in the image header. This | |
18756 | + * routine allows us to flag that we shouldn't restore the program name from | |
18757 | + * the image header. | |
18758 | + */ | |
18759 | +static void set_ui_program_set(void) | |
18760 | +{ | |
18761 | + ui_helper_changed = 1; | |
18762 | +} | |
18763 | + | |
18764 | +/** | |
18765 | + * userui_memory_needed - Tell core how much memory to reserve for us. | |
18766 | + */ | |
18767 | +static int userui_memory_needed(void) | |
18768 | +{ | |
18769 | + /* ball park figure of 128 pages */ | |
18770 | + return 128 * PAGE_SIZE; | |
18771 | +} | |
18772 | + | |
18773 | +/** | |
18774 | + * userui_update_status - Update the progress bar and (if on) in-bar message. | |
18775 | + * | |
18776 | + * @value: Current progress percentage numerator. | |
18777 | + * @maximum: Current progress percentage denominator. | |
18778 | + * @fmt: Message to be displayed in the middle of the progress bar. | |
18779 | + * | |
18780 | + * Note that a NULL message does not mean that any previous message is erased! | |
18781 | + * For that, you need toi_prepare_status with clearbar on. | |
18782 | + * | |
18783 | + * Returns an unsigned long, being the next numerator (as determined by the | |
18784 | + * maximum and progress granularity) where status needs to be updated. | |
18785 | + * This is to reduce unnecessary calls to update_status. | |
18786 | + */ | |
18787 | +static u32 userui_update_status(u32 value, u32 maximum, const char *fmt, ...) | |
18788 | +{ | |
18789 | + static u32 last_step = 9999; | |
18790 | + struct userui_msg_params msg; | |
18791 | + u32 this_step, next_update; | |
18792 | + int bitshift; | |
18793 | + | |
18794 | + if (ui_helper_data.pid == -1) | |
18795 | + return 0; | |
18796 | + | |
18797 | + if ((!maximum) || (!progress_granularity)) | |
18798 | + return maximum; | |
18799 | + | |
18800 | + if (value < 0) | |
18801 | + value = 0; | |
18802 | + | |
18803 | + if (value > maximum) | |
18804 | + value = maximum; | |
18805 | + | |
18806 | + /* Try to avoid math problems - we can't do 64 bit math here | |
18807 | + * (and shouldn't need it - anyone got screen resolution | |
18808 | + * of 65536 pixels or more?) */ | |
18809 | + bitshift = fls(maximum) - 16; | |
18810 | + if (bitshift > 0) { | |
18811 | + u32 temp_maximum = maximum >> bitshift; | |
18812 | + u32 temp_value = value >> bitshift; | |
18813 | + this_step = (u32) | |
18814 | + (temp_value * progress_granularity / temp_maximum); | |
18815 | + next_update = (((this_step + 1) * temp_maximum / | |
18816 | + progress_granularity) + 1) << bitshift; | |
18817 | + } else { | |
18818 | + this_step = (u32) (value * progress_granularity / maximum); | |
18819 | + next_update = ((this_step + 1) * maximum / | |
18820 | + progress_granularity) + 1; | |
18821 | + } | |
18822 | + | |
18823 | + if (this_step == last_step) | |
18824 | + return next_update; | |
18825 | + | |
18826 | + memset(&msg, 0, sizeof(msg)); | |
18827 | + | |
18828 | + msg.a = this_step; | |
18829 | + msg.b = progress_granularity; | |
18830 | + | |
18831 | + if (fmt) { | |
18832 | + va_list args; | |
18833 | + va_start(args, fmt); | |
18834 | + vsnprintf(msg.text, sizeof(msg.text), fmt, args); | |
18835 | + va_end(args); | |
18836 | + msg.text[sizeof(msg.text)-1] = '\0'; | |
18837 | + } | |
18838 | + | |
18839 | + toi_send_netlink_message(&ui_helper_data, USERUI_MSG_PROGRESS, | |
18840 | + &msg, sizeof(msg)); | |
18841 | + last_step = this_step; | |
18842 | + | |
18843 | + return next_update; | |
18844 | +} | |
18845 | + | |
18846 | +/** | |
18847 | + * userui_message - Display a message without necessarily logging it. | |
18848 | + * | |
18849 | + * @section: Type of message. Messages can be filtered by type. | |
18850 | + * @level: Degree of importance of the message. Lower values = higher priority. | |
18851 | + * @normally_logged: Whether logged even if log_everything is off. | |
18852 | + * @fmt: Message (and parameters). | |
18853 | + * | |
18854 | + * This function is intended to do the same job as printk, but without normally | |
18855 | + * logging what is printed. The point is to be able to get debugging info on | |
18856 | + * screen without filling the logs with "1/534. ^M 2/534^M. 3/534^M" | |
18857 | + * | |
18858 | + * It may be called from an interrupt context - can't sleep! | |
18859 | + */ | |
18860 | +static void userui_message(u32 section, u32 level, u32 normally_logged, | |
18861 | + const char *fmt, ...) | |
18862 | +{ | |
18863 | + struct userui_msg_params msg; | |
18864 | + | |
18865 | + if ((level) && (level > console_loglevel)) | |
18866 | + return; | |
18867 | + | |
18868 | + memset(&msg, 0, sizeof(msg)); | |
18869 | + | |
18870 | + msg.a = section; | |
18871 | + msg.b = level; | |
18872 | + msg.c = normally_logged; | |
18873 | + | |
18874 | + if (fmt) { | |
18875 | + va_list args; | |
18876 | + va_start(args, fmt); | |
18877 | + vsnprintf(msg.text, sizeof(msg.text), fmt, args); | |
18878 | + va_end(args); | |
18879 | + msg.text[sizeof(msg.text)-1] = '\0'; | |
18880 | + } | |
18881 | + | |
18882 | + if (test_action_state(TOI_LOGALL)) | |
18883 | + printk(KERN_INFO "%s\n", msg.text); | |
18884 | + | |
18885 | + toi_send_netlink_message(&ui_helper_data, USERUI_MSG_MESSAGE, | |
18886 | + &msg, sizeof(msg)); | |
18887 | +} | |
18888 | + | |
18889 | +/** | |
18890 | + * wait_for_key_via_userui - Wait for userui to receive a keypress. | |
18891 | + */ | |
18892 | +static void wait_for_key_via_userui(void) | |
18893 | +{ | |
18894 | + DECLARE_WAITQUEUE(wait, current); | |
18895 | + | |
18896 | + add_wait_queue(&userui_wait_for_key, &wait); | |
18897 | + set_current_state(TASK_INTERRUPTIBLE); | |
18898 | + | |
18899 | + interruptible_sleep_on(&userui_wait_for_key); | |
18900 | + | |
18901 | + set_current_state(TASK_RUNNING); | |
18902 | + remove_wait_queue(&userui_wait_for_key, &wait); | |
18903 | +} | |
18904 | + | |
18905 | +/** | |
18906 | + * userui_prepare_status - Display high level messages. | |
18907 | + * | |
18908 | + * @clearbar: Whether to clear the progress bar. | |
18909 | + * @fmt...: New message for the title. | |
18910 | + * | |
18911 | + * Prepare the 'nice display', drawing the header and version, along with the | |
18912 | + * current action and perhaps also resetting the progress bar. | |
18913 | + */ | |
18914 | +static void userui_prepare_status(int clearbar, const char *fmt, ...) | |
18915 | +{ | |
18916 | + va_list args; | |
18917 | + | |
18918 | + if (fmt) { | |
18919 | + va_start(args, fmt); | |
18920 | + lastheader_message_len = vsnprintf(lastheader, 512, fmt, args); | |
18921 | + va_end(args); | |
18922 | + } | |
18923 | + | |
18924 | + if (clearbar) | |
18925 | + toi_update_status(0, 1, NULL); | |
18926 | + | |
18927 | + if (ui_helper_data.pid == -1) | |
18928 | + printk(KERN_EMERG "%s\n", lastheader); | |
18929 | + else | |
18930 | + toi_message(0, TOI_STATUS, 1, lastheader, NULL); | |
18931 | +} | |
18932 | + | |
18933 | +/** | |
18934 | + * toi_wait_for_keypress - Wait for keypress via userui. | |
18935 | + * | |
18936 | + * @timeout: Maximum time to wait. | |
18937 | + * | |
18938 | + * Wait for a keypress from userui. | |
18939 | + * | |
18940 | + * FIXME: Implement timeout? | |
18941 | + */ | |
18942 | +static char userui_wait_for_keypress(int timeout) | |
18943 | +{ | |
18944 | + char key = '\0'; | |
18945 | + | |
18946 | + if (ui_helper_data.pid != -1) { | |
18947 | + wait_for_key_via_userui(); | |
18948 | + key = ' '; | |
18949 | + } | |
18950 | + | |
18951 | + return key; | |
18952 | +} | |
18953 | + | |
18954 | +/** | |
18955 | + * userui_abort_hibernate - Abort a cycle & tell user if they didn't request it. | |
18956 | + * | |
18957 | + * @result_code: Reason why we're aborting (1 << bit). | |
18958 | + * @fmt: Message to display if telling the user what's going on. | |
18959 | + * | |
18960 | + * Abort a cycle. If this wasn't at the user's request (and we're displaying | |
18961 | + * output), tell the user why and wait for them to acknowledge the message. | |
18962 | + */ | |
18963 | +static void userui_abort_hibernate(int result_code, const char *fmt, ...) | |
18964 | +{ | |
18965 | + va_list args; | |
18966 | + int printed_len = 0; | |
18967 | + | |
18968 | + set_result_state(result_code); | |
18969 | + | |
18970 | + if (test_result_state(TOI_ABORTED)) | |
18971 | + return; | |
18972 | + | |
18973 | + set_result_state(TOI_ABORTED); | |
18974 | + | |
18975 | + if (test_result_state(TOI_ABORT_REQUESTED)) | |
18976 | + return; | |
18977 | + | |
18978 | + va_start(args, fmt); | |
18979 | + printed_len = vsnprintf(local_printf_buf, sizeof(local_printf_buf), | |
18980 | + fmt, args); | |
18981 | + va_end(args); | |
18982 | + if (ui_helper_data.pid != -1) | |
18983 | + printed_len = sprintf(local_printf_buf + printed_len, | |
18984 | + " (Press SPACE to continue)"); | |
18985 | + | |
18986 | + toi_prepare_status(CLEAR_BAR, "%s", local_printf_buf); | |
18987 | + | |
18988 | + if (ui_helper_data.pid != -1) | |
18989 | + userui_wait_for_keypress(0); | |
18990 | +} | |
18991 | + | |
18992 | +/** | |
18993 | + * request_abort_hibernate - Abort hibernating or resuming at user request. | |
18994 | + * | |
18995 | + * Handle the user requesting the cancellation of a hibernation or resume by | |
18996 | + * pressing escape. | |
18997 | + */ | |
18998 | +static void request_abort_hibernate(void) | |
18999 | +{ | |
19000 | + if (test_result_state(TOI_ABORT_REQUESTED)) | |
19001 | + return; | |
19002 | + | |
19003 | + if (test_toi_state(TOI_NOW_RESUMING)) { | |
19004 | + toi_prepare_status(CLEAR_BAR, "Escape pressed. " | |
19005 | + "Powering down again."); | |
19006 | + set_toi_state(TOI_STOP_RESUME); | |
19007 | + while (!test_toi_state(TOI_IO_STOPPED)) | |
19008 | + schedule(); | |
19009 | + if (toiActiveAllocator->mark_resume_attempted) | |
19010 | + toiActiveAllocator->mark_resume_attempted(0); | |
19011 | + toi_power_down(); | |
19012 | + } | |
19013 | + | |
19014 | + toi_prepare_status(CLEAR_BAR, "--- ESCAPE PRESSED :" | |
19015 | + " ABORTING HIBERNATION ---"); | |
19016 | + set_abort_result(TOI_ABORT_REQUESTED); | |
19017 | + wake_up_interruptible(&userui_wait_for_key); | |
19018 | +} | |
19019 | + | |
19020 | +/** | |
19021 | + * userui_user_rcv_msg - Receive a netlink message from userui. | |
19022 | + * | |
19023 | + * @skb: skb received. | |
19024 | + * @nlh: Netlink header received. | |
19025 | + */ | |
19026 | +static int userui_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |
19027 | +{ | |
19028 | + int type; | |
19029 | + int *data; | |
19030 | + | |
19031 | + type = nlh->nlmsg_type; | |
19032 | + | |
19033 | + /* A control message: ignore them */ | |
19034 | + if (type < NETLINK_MSG_BASE) | |
19035 | + return 0; | |
19036 | + | |
19037 | + /* Unknown message: reply with EINVAL */ | |
19038 | + if (type >= USERUI_MSG_MAX) | |
19039 | + return -EINVAL; | |
19040 | + | |
19041 | + /* All operations require privileges, even GET */ | |
19042 | + if (security_netlink_recv(skb, CAP_NET_ADMIN)) | |
19043 | + return -EPERM; | |
19044 | + | |
19045 | + /* Only allow one task to receive NOFREEZE privileges */ | |
19046 | + if (type == NETLINK_MSG_NOFREEZE_ME && ui_helper_data.pid != -1) { | |
19047 | + printk(KERN_INFO "Got NOFREEZE_ME request when " | |
19048 | + "ui_helper_data.pid is %d.\n", ui_helper_data.pid); | |
19049 | + return -EBUSY; | |
19050 | + } | |
19051 | + | |
19052 | + data = (int *) NLMSG_DATA(nlh); | |
19053 | + | |
19054 | + switch (type) { | |
19055 | + case USERUI_MSG_ABORT: | |
19056 | + request_abort_hibernate(); | |
19057 | + return 0; | |
19058 | + case USERUI_MSG_GET_STATE: | |
19059 | + toi_send_netlink_message(&ui_helper_data, | |
19060 | + USERUI_MSG_GET_STATE, &toi_bkd.toi_action, | |
19061 | + sizeof(toi_bkd.toi_action)); | |
19062 | + return 0; | |
19063 | + case USERUI_MSG_GET_DEBUG_STATE: | |
19064 | + toi_send_netlink_message(&ui_helper_data, | |
19065 | + USERUI_MSG_GET_DEBUG_STATE, | |
19066 | + &toi_bkd.toi_debug_state, | |
19067 | + sizeof(toi_bkd.toi_debug_state)); | |
19068 | + return 0; | |
19069 | + case USERUI_MSG_SET_STATE: | |
19070 | + if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(int))) | |
19071 | + return -EINVAL; | |
19072 | + ui_nl_set_state(*data); | |
19073 | + return 0; | |
19074 | + case USERUI_MSG_SET_DEBUG_STATE: | |
19075 | + if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(int))) | |
19076 | + return -EINVAL; | |
19077 | + toi_bkd.toi_debug_state = (*data); | |
19078 | + return 0; | |
19079 | + case USERUI_MSG_SPACE: | |
19080 | + wake_up_interruptible(&userui_wait_for_key); | |
19081 | + return 0; | |
19082 | + case USERUI_MSG_GET_POWERDOWN_METHOD: | |
19083 | + toi_send_netlink_message(&ui_helper_data, | |
19084 | + USERUI_MSG_GET_POWERDOWN_METHOD, | |
19085 | + &toi_poweroff_method, | |
19086 | + sizeof(toi_poweroff_method)); | |
19087 | + return 0; | |
19088 | + case USERUI_MSG_SET_POWERDOWN_METHOD: | |
19089 | + if (nlh->nlmsg_len != NLMSG_LENGTH(sizeof(char))) | |
19090 | + return -EINVAL; | |
19091 | + toi_poweroff_method = (unsigned long)(*data); | |
19092 | + return 0; | |
19093 | + case USERUI_MSG_GET_LOGLEVEL: | |
19094 | + toi_send_netlink_message(&ui_helper_data, | |
19095 | + USERUI_MSG_GET_LOGLEVEL, | |
19096 | + &toi_bkd.toi_default_console_level, | |
19097 | + sizeof(toi_bkd.toi_default_console_level)); | |
19098 | + return 0; | |
19099 | + case USERUI_MSG_SET_LOGLEVEL: | |
19100 | + if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(int))) | |
19101 | + return -EINVAL; | |
19102 | + toi_bkd.toi_default_console_level = (*data); | |
19103 | + return 0; | |
19104 | + case USERUI_MSG_PRINTK: | |
e999739a | 19105 | + printk(KERN_INFO "%s", (char *) data); |
2380c486 JR |
19106 | + return 0; |
19107 | + } | |
19108 | + | |
19109 | + /* Unhandled here */ | |
19110 | + return 1; | |
19111 | +} | |
19112 | + | |
19113 | +/** | |
19114 | + * userui_cond_pause - Possibly pause at user request. | |
19115 | + * | |
19116 | + * @pause: Whether to pause or just display the message. | |
19117 | + * @message: Message to display at the start of pausing. | |
19118 | + * | |
19119 | + * Potentially pause and wait for the user to tell us to continue. We normally | |
19120 | + * only pause when @pause is set. While paused, the user can do things like | |
19121 | + * changing the loglevel, toggling the display of debugging sections and such | |
19122 | + * like. | |
19123 | + */ | |
19124 | +static void userui_cond_pause(int pause, char *message) | |
19125 | +{ | |
19126 | + int displayed_message = 0, last_key = 0; | |
19127 | + | |
19128 | + while (last_key != 32 && | |
19129 | + ui_helper_data.pid != -1 && | |
19130 | + ((test_action_state(TOI_PAUSE) && pause) || | |
19131 | + (test_action_state(TOI_SINGLESTEP)))) { | |
19132 | + if (!displayed_message) { | |
19133 | + toi_prepare_status(DONT_CLEAR_BAR, | |
19134 | + "%s Press SPACE to continue.%s", | |
19135 | + message ? message : "", | |
19136 | + (test_action_state(TOI_SINGLESTEP)) ? | |
19137 | + " Single step on." : ""); | |
19138 | + displayed_message = 1; | |
19139 | + } | |
19140 | + last_key = userui_wait_for_keypress(0); | |
19141 | + } | |
19142 | + schedule(); | |
19143 | +} | |
19144 | + | |
19145 | +/** | |
19146 | + * userui_prepare_console - Prepare the console for use. | |
19147 | + * | |
19148 | + * Prepare a console for use, saving current kmsg settings and attempting to | |
19149 | + * start userui. Console loglevel changes are handled by userui. | |
19150 | + */ | |
19151 | +static void userui_prepare_console(void) | |
19152 | +{ | |
19153 | + orig_kmsg = kmsg_redirect; | |
19154 | + kmsg_redirect = fg_console + 1; | |
19155 | + | |
19156 | + ui_helper_data.pid = -1; | |
19157 | + | |
19158 | + if (!userui_ops.enabled) { | |
e999739a | 19159 | + printk(KERN_INFO "TuxOnIce: Userui disabled.\n"); |
2380c486 JR |
19160 | + return; |
19161 | + } | |
19162 | + | |
19163 | + if (*ui_helper_data.program) | |
19164 | + toi_netlink_setup(&ui_helper_data); | |
19165 | + else | |
19166 | + printk(KERN_INFO "TuxOnIce: Userui program not configured.\n"); | |
19167 | +} | |
19168 | + | |
19169 | +/** | |
19170 | + * userui_cleanup_console - Cleanup after a cycle. | |
19171 | + * | |
19172 | + * Tell userui to cleanup, and restore kmsg_redirect to its original value. | |
19173 | + */ | |
19174 | + | |
19175 | +static void userui_cleanup_console(void) | |
19176 | +{ | |
19177 | + if (ui_helper_data.pid > -1) | |
19178 | + toi_netlink_close(&ui_helper_data); | |
19179 | + | |
19180 | + kmsg_redirect = orig_kmsg; | |
19181 | +} | |
19182 | + | |
19183 | +/* | |
19184 | + * User interface specific /sys/power/tuxonice entries. | |
19185 | + */ | |
19186 | + | |
19187 | +static struct toi_sysfs_data sysfs_params[] = { | |
19188 | +#if defined(CONFIG_NET) && defined(CONFIG_SYSFS) | |
19189 | + SYSFS_BIT("enable_escape", SYSFS_RW, &toi_bkd.toi_action, | |
19190 | + TOI_CAN_CANCEL, 0), | |
19191 | + SYSFS_BIT("pause_between_steps", SYSFS_RW, &toi_bkd.toi_action, | |
19192 | + TOI_PAUSE, 0), | |
19193 | + SYSFS_INT("enabled", SYSFS_RW, &userui_ops.enabled, 0, 1, 0, NULL), | |
19194 | + SYSFS_INT("progress_granularity", SYSFS_RW, &progress_granularity, 1, | |
19195 | + 2048, 0, NULL), | |
19196 | + SYSFS_STRING("program", SYSFS_RW, ui_helper_data.program, 255, 0, | |
19197 | + set_ui_program_set), | |
19198 | + SYSFS_INT("debug", SYSFS_RW, &ui_helper_data.debug, 0, 1, 0, NULL) | |
19199 | +#endif | |
19200 | +}; | |
19201 | + | |
19202 | +static struct toi_module_ops userui_ops = { | |
19203 | + .type = MISC_MODULE, | |
19204 | + .name = "userui", | |
19205 | + .shared_directory = "user_interface", | |
19206 | + .module = THIS_MODULE, | |
19207 | + .storage_needed = userui_storage_needed, | |
19208 | + .save_config_info = userui_save_config_info, | |
19209 | + .load_config_info = userui_load_config_info, | |
19210 | + .memory_needed = userui_memory_needed, | |
19211 | + .sysfs_data = sysfs_params, | |
19212 | + .num_sysfs_entries = sizeof(sysfs_params) / | |
19213 | + sizeof(struct toi_sysfs_data), | |
19214 | +}; | |
19215 | + | |
19216 | +static struct ui_ops my_ui_ops = { | |
19217 | + .post_atomic_restore = userui_post_atomic_restore, | |
19218 | + .update_status = userui_update_status, | |
19219 | + .message = userui_message, | |
19220 | + .prepare_status = userui_prepare_status, | |
19221 | + .abort = userui_abort_hibernate, | |
19222 | + .cond_pause = userui_cond_pause, | |
19223 | + .prepare = userui_prepare_console, | |
19224 | + .cleanup = userui_cleanup_console, | |
19225 | + .wait_for_key = userui_wait_for_keypress, | |
19226 | +}; | |
19227 | + | |
19228 | +/** | |
19229 | + * toi_user_ui_init - Boot time initialisation for user interface. | |
19230 | + * | |
19231 | + * Invoked from the core init routine. | |
19232 | + */ | |
19233 | +static __init int toi_user_ui_init(void) | |
19234 | +{ | |
19235 | + int result; | |
19236 | + | |
19237 | + ui_helper_data.nl = NULL; | |
19238 | + strncpy(ui_helper_data.program, CONFIG_TOI_USERUI_DEFAULT_PATH, 255); | |
19239 | + ui_helper_data.pid = -1; | |
19240 | + ui_helper_data.skb_size = sizeof(struct userui_msg_params); | |
19241 | + ui_helper_data.pool_limit = 6; | |
19242 | + ui_helper_data.netlink_id = NETLINK_TOI_USERUI; | |
19243 | + ui_helper_data.name = "userspace ui"; | |
19244 | + ui_helper_data.rcv_msg = userui_user_rcv_msg; | |
19245 | + ui_helper_data.interface_version = 8; | |
19246 | + ui_helper_data.must_init = 0; | |
19247 | + ui_helper_data.not_ready = userui_cleanup_console; | |
19248 | + init_completion(&ui_helper_data.wait_for_process); | |
19249 | + result = toi_register_module(&userui_ops); | |
19250 | + if (!result) | |
19251 | + result = toi_register_ui_ops(&my_ui_ops); | |
19252 | + if (result) | |
19253 | + toi_unregister_module(&userui_ops); | |
19254 | + | |
19255 | + return result; | |
19256 | +} | |
19257 | + | |
19258 | +#ifdef MODULE | |
19259 | +/** | |
19260 | + * toi_user_ui_ext - Cleanup code for if the core is unloaded. | |
19261 | + */ | |
19262 | +static __exit void toi_user_ui_exit(void) | |
19263 | +{ | |
19264 | + toi_netlink_close_complete(&ui_helper_data); | |
19265 | + toi_remove_ui_ops(&my_ui_ops); | |
19266 | + toi_unregister_module(&userui_ops); | |
19267 | +} | |
19268 | + | |
19269 | +module_init(toi_user_ui_init); | |
19270 | +module_exit(toi_user_ui_exit); | |
19271 | +MODULE_AUTHOR("Nigel Cunningham"); | |
19272 | +MODULE_DESCRIPTION("TuxOnIce Userui Support"); | |
19273 | +MODULE_LICENSE("GPL"); | |
19274 | +#else | |
19275 | +late_initcall(toi_user_ui_init); | |
19276 | +#endif | |
19277 | diff --git a/kernel/power/user.c b/kernel/power/user.c | |
9474138d | 19278 | index ed97375..3519246 100644 |
2380c486 JR |
19279 | --- a/kernel/power/user.c |
19280 | +++ b/kernel/power/user.c | |
9474138d | 19281 | @@ -65,6 +65,7 @@ static struct snapshot_data { |
2380c486 JR |
19282 | } snapshot_state; |
19283 | ||
19284 | atomic_t snapshot_device_available = ATOMIC_INIT(1); | |
19285 | +EXPORT_SYMBOL_GPL(snapshot_device_available); | |
19286 | ||
19287 | static int snapshot_open(struct inode *inode, struct file *filp) | |
19288 | { | |
19289 | diff --git a/kernel/printk.c b/kernel/printk.c | |
9474138d | 19290 | index 5052b54..0215fc3 100644 |
2380c486 JR |
19291 | --- a/kernel/printk.c |
19292 | +++ b/kernel/printk.c | |
19293 | @@ -32,6 +32,7 @@ | |
19294 | #include <linux/security.h> | |
19295 | #include <linux/bootmem.h> | |
19296 | #include <linux/syscalls.h> | |
19297 | +#include <linux/suspend.h> | |
9474138d | 19298 | #include <linux/kexec.h> |
2380c486 JR |
19299 | |
19300 | #include <asm/uaccess.h> | |
9474138d | 19301 | @@ -60,6 +61,7 @@ int console_printk[4] = { |
2380c486 JR |
19302 | MINIMUM_CONSOLE_LOGLEVEL, /* minimum_console_loglevel */ |
19303 | DEFAULT_CONSOLE_LOGLEVEL, /* default_console_loglevel */ | |
19304 | }; | |
19305 | +EXPORT_SYMBOL_GPL(console_printk); | |
19306 | ||
19307 | /* | |
19308 | * Low level drivers may need that to know if they can schedule in | |
9474138d | 19309 | @@ -911,6 +913,7 @@ void suspend_console(void) |
2380c486 JR |
19310 | console_suspended = 1; |
19311 | up(&console_sem); | |
19312 | } | |
19313 | +EXPORT_SYMBOL_GPL(suspend_console); | |
19314 | ||
19315 | void resume_console(void) | |
19316 | { | |
9474138d | 19317 | @@ -920,6 +923,7 @@ void resume_console(void) |
2380c486 JR |
19318 | console_suspended = 0; |
19319 | release_console_sem(); | |
19320 | } | |
19321 | +EXPORT_SYMBOL_GPL(resume_console); | |
19322 | ||
19323 | /** | |
19324 | * acquire_console_sem - lock the console system for exclusive use. | |
2380c486 | 19325 | diff --git a/mm/bootmem.c b/mm/bootmem.c |
9474138d | 19326 | index daf9271..fb468a7 100644 |
2380c486 JR |
19327 | --- a/mm/bootmem.c |
19328 | +++ b/mm/bootmem.c | |
19329 | @@ -22,6 +22,7 @@ | |
19330 | unsigned long max_low_pfn; | |
19331 | unsigned long min_low_pfn; | |
19332 | unsigned long max_pfn; | |
19333 | +EXPORT_SYMBOL_GPL(max_pfn); | |
19334 | ||
19335 | #ifdef CONFIG_CRASH_DUMP | |
19336 | /* | |
19337 | diff --git a/mm/highmem.c b/mm/highmem.c | |
9474138d | 19338 | index 68eb1d9..800c7a9 100644 |
2380c486 JR |
19339 | --- a/mm/highmem.c |
19340 | +++ b/mm/highmem.c | |
19341 | @@ -58,6 +58,7 @@ unsigned int nr_free_highpages (void) | |
19342 | ||
19343 | return pages; | |
19344 | } | |
19345 | +EXPORT_SYMBOL_GPL(nr_free_highpages); | |
19346 | ||
19347 | static int pkmap_count[LAST_PKMAP]; | |
19348 | static unsigned int last_pkmap_nr; | |
19349 | diff --git a/mm/memory.c b/mm/memory.c | |
9474138d | 19350 | index 4126dd1..878eff1 100644 |
2380c486 JR |
19351 | --- a/mm/memory.c |
19352 | +++ b/mm/memory.c | |
9474138d | 19353 | @@ -1185,6 +1185,7 @@ no_page_table: |
2380c486 JR |
19354 | } |
19355 | return page; | |
19356 | } | |
19357 | +EXPORT_SYMBOL_GPL(follow_page); | |
19358 | ||
19359 | /* Can we do the FOLL_ANON optimization? */ | |
19360 | static inline int use_zero_page(struct vm_area_struct *vma) | |
19361 | diff --git a/mm/mmzone.c b/mm/mmzone.c | |
9474138d | 19362 | index f5b7d17..72a6770 100644 |
2380c486 JR |
19363 | --- a/mm/mmzone.c |
19364 | +++ b/mm/mmzone.c | |
9474138d | 19365 | @@ -14,6 +14,7 @@ struct pglist_data *first_online_pgdat(void) |
2380c486 JR |
19366 | { |
19367 | return NODE_DATA(first_online_node); | |
19368 | } | |
19369 | +EXPORT_SYMBOL_GPL(first_online_pgdat); | |
19370 | ||
19371 | struct pglist_data *next_online_pgdat(struct pglist_data *pgdat) | |
19372 | { | |
9474138d | 19373 | @@ -23,6 +24,7 @@ struct pglist_data *next_online_pgdat(struct pglist_data *pgdat) |
2380c486 JR |
19374 | return NULL; |
19375 | return NODE_DATA(nid); | |
19376 | } | |
19377 | +EXPORT_SYMBOL_GPL(next_online_pgdat); | |
19378 | ||
19379 | /* | |
19380 | * next_zone - helper magic for for_each_zone() | |
9474138d | 19381 | @@ -42,6 +44,7 @@ struct zone *next_zone(struct zone *zone) |
2380c486 JR |
19382 | } |
19383 | return zone; | |
19384 | } | |
19385 | +EXPORT_SYMBOL_GPL(next_zone); | |
19386 | ||
19387 | static inline int zref_in_nodemask(struct zoneref *zref, nodemask_t *nodes) | |
19388 | { | |
19389 | diff --git a/mm/page-writeback.c b/mm/page-writeback.c | |
9474138d | 19390 | index bb553c3..fb606e5 100644 |
2380c486 JR |
19391 | --- a/mm/page-writeback.c |
19392 | +++ b/mm/page-writeback.c | |
9474138d | 19393 | @@ -105,6 +105,7 @@ unsigned int dirty_expire_interval = 30 * 100; /* centiseconds */ |
2380c486 JR |
19394 | * Flag that makes the machine dump writes/reads and block dirtyings. |
19395 | */ | |
19396 | int block_dump; | |
19397 | +EXPORT_SYMBOL_GPL(block_dump); | |
19398 | ||
19399 | /* | |
19400 | * Flag that puts the machine in "laptop mode". Doubles as a timeout in jiffies: | |
19401 | diff --git a/mm/page_alloc.c b/mm/page_alloc.c | |
9474138d | 19402 | index fe753ec..c2f026b 100644 |
2380c486 JR |
19403 | --- a/mm/page_alloc.c |
19404 | +++ b/mm/page_alloc.c | |
9474138d | 19405 | @@ -1810,6 +1810,26 @@ static unsigned int nr_free_zone_pages(int offset) |
2380c486 JR |
19406 | return sum; |
19407 | } | |
19408 | ||
19409 | +static unsigned int nr_unallocated_zone_pages(int offset) | |
19410 | +{ | |
19411 | + struct zoneref *z; | |
19412 | + struct zone *zone; | |
19413 | + | |
19414 | + /* Just pick one node, since fallback list is circular */ | |
19415 | + unsigned int sum = 0; | |
19416 | + | |
19417 | + struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL); | |
19418 | + | |
19419 | + for_each_zone_zonelist(zone, z, zonelist, offset) { | |
19420 | + unsigned long high = zone->pages_high; | |
19421 | + unsigned long left = zone_page_state(zone, NR_FREE_PAGES); | |
19422 | + if (left > high) | |
19423 | + sum += left - high; | |
19424 | + } | |
19425 | + | |
19426 | + return sum; | |
19427 | +} | |
19428 | + | |
19429 | /* | |
19430 | * Amount of free RAM allocatable within ZONE_DMA and ZONE_NORMAL | |
19431 | */ | |
9474138d | 19432 | @@ -1820,6 +1840,15 @@ unsigned int nr_free_buffer_pages(void) |
2380c486 JR |
19433 | EXPORT_SYMBOL_GPL(nr_free_buffer_pages); |
19434 | ||
19435 | /* | |
19436 | + * Amount of free RAM allocatable within ZONE_DMA and ZONE_NORMAL | |
19437 | + */ | |
19438 | +unsigned int nr_unallocated_buffer_pages(void) | |
19439 | +{ | |
19440 | + return nr_unallocated_zone_pages(gfp_zone(GFP_USER)); | |
19441 | +} | |
19442 | +EXPORT_SYMBOL_GPL(nr_unallocated_buffer_pages); | |
19443 | + | |
19444 | +/* | |
19445 | * Amount of free RAM allocatable within all zones | |
19446 | */ | |
19447 | unsigned int nr_free_pagecache_pages(void) | |
e999739a | 19448 | diff --git a/mm/shmem.c b/mm/shmem.c |
9474138d | 19449 | index b25f95c..4908d20 100644 |
e999739a | 19450 | --- a/mm/shmem.c |
19451 | +++ b/mm/shmem.c | |
9474138d | 19452 | @@ -1557,6 +1557,8 @@ static struct inode *shmem_get_inode(struct super_block *sb, int mode, |
e999739a | 19453 | memset(info, 0, (char *)inode - (char *)info); |
19454 | spin_lock_init(&info->lock); | |
19455 | info->flags = flags & VM_NORESERVE; | |
19456 | + if (flags & VM_ATOMIC_COPY) | |
19457 | + inode->i_flags |= S_ATOMIC_COPY; | |
19458 | INIT_LIST_HEAD(&info->swaplist); | |
19459 | ||
19460 | switch (mode & S_IFMT) { | |
19461 | diff --git a/mm/swap_state.c b/mm/swap_state.c | |
9474138d | 19462 | index 1416e7e..4f75ac3 100644 |
e999739a | 19463 | --- a/mm/swap_state.c |
19464 | +++ b/mm/swap_state.c | |
19465 | @@ -45,6 +45,7 @@ struct address_space swapper_space = { | |
19466 | .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear), | |
19467 | .backing_dev_info = &swap_backing_dev_info, | |
19468 | }; | |
19469 | +EXPORT_SYMBOL_GPL(swapper_space); | |
19470 | ||
19471 | #define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0) | |
19472 | ||
2380c486 JR |
19473 | diff --git a/mm/swapfile.c b/mm/swapfile.c |
19474 | index 312fafe..894fcb5 100644 | |
19475 | --- a/mm/swapfile.c | |
19476 | +++ b/mm/swapfile.c | |
19477 | @@ -414,6 +414,7 @@ noswap: | |
19478 | spin_unlock(&swap_lock); | |
19479 | return (swp_entry_t) {0}; | |
19480 | } | |
19481 | +EXPORT_SYMBOL_GPL(get_swap_page); | |
19482 | ||
19483 | swp_entry_t get_swap_page_of_type(int type) | |
19484 | { | |
19485 | @@ -508,6 +509,7 @@ void swap_free(swp_entry_t entry) | |
19486 | spin_unlock(&swap_lock); | |
19487 | } | |
19488 | } | |
19489 | +EXPORT_SYMBOL_GPL(swap_free); | |
19490 | ||
19491 | /* | |
19492 | * How many references to page are currently swapped out? | |
19493 | @@ -1178,6 +1180,7 @@ sector_t map_swap_page(struct swap_info_struct *sis, pgoff_t offset) | |
19494 | BUG_ON(se == start_se); /* It *must* be present */ | |
19495 | } | |
19496 | } | |
19497 | +EXPORT_SYMBOL_GPL(map_swap_page); | |
19498 | ||
19499 | #ifdef CONFIG_HIBERNATION | |
19500 | /* | |
19501 | @@ -1521,6 +1524,7 @@ out_dput: | |
19502 | out: | |
19503 | return err; | |
19504 | } | |
19505 | +EXPORT_SYMBOL_GPL(sys_swapoff); | |
19506 | ||
19507 | #ifdef CONFIG_PROC_FS | |
19508 | /* iterator */ | |
19509 | @@ -1919,6 +1923,7 @@ out: | |
19510 | } | |
19511 | return error; | |
19512 | } | |
19513 | +EXPORT_SYMBOL_GPL(sys_swapon); | |
19514 | ||
19515 | void si_swapinfo(struct sysinfo *val) | |
19516 | { | |
19517 | @@ -1936,6 +1941,7 @@ void si_swapinfo(struct sysinfo *val) | |
19518 | val->totalswap = total_swap_pages + nr_to_be_unused; | |
19519 | spin_unlock(&swap_lock); | |
19520 | } | |
19521 | +EXPORT_SYMBOL_GPL(si_swapinfo); | |
19522 | ||
19523 | /* | |
19524 | * Verify that a swap entry is valid and increment its swap map count. | |
19525 | @@ -1984,6 +1990,7 @@ get_swap_info_struct(unsigned type) | |
19526 | { | |
19527 | return &swap_info[type]; | |
19528 | } | |
19529 | +EXPORT_SYMBOL_GPL(get_swap_info_struct); | |
19530 | ||
19531 | /* | |
19532 | * swap_lock prevents swap_map being freed. Don't grab an extra | |
19533 | diff --git a/mm/vmscan.c b/mm/vmscan.c | |
9474138d | 19534 | index d254306..e20daf5 100644 |
2380c486 JR |
19535 | --- a/mm/vmscan.c |
19536 | +++ b/mm/vmscan.c | |
9474138d | 19537 | @@ -2036,6 +2036,9 @@ void wakeup_kswapd(struct zone *zone, int order) |
2380c486 JR |
19538 | if (!populated_zone(zone)) |
19539 | return; | |
19540 | ||
19541 | + if (freezer_is_on()) | |
19542 | + return; | |
19543 | + | |
19544 | pgdat = zone->zone_pgdat; | |
19545 | if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0)) | |
19546 | return; | |
9474138d | 19547 | @@ -2196,6 +2199,7 @@ out: |
2380c486 | 19548 | |
9474138d | 19549 | return sc.nr_reclaimed; |
2380c486 JR |
19550 | } |
19551 | +EXPORT_SYMBOL_GPL(shrink_all_memory); | |
19552 | #endif | |
19553 | ||
19554 | /* It's optimal to keep kswapds on the same CPUs as their memory, but |