]> git.pld-linux.org Git - packages/kernel.git/blob - kernel-tuxonice.patch
- new
[packages/kernel.git] / kernel-tuxonice.patch
1 diff --git a/Documentation/power/tuxonice-internals.txt b/Documentation/power/tuxonice-internals.txt
2 new file mode 100644
3 index 0000000..7a96186
4 --- /dev/null
5 +++ b/Documentation/power/tuxonice-internals.txt
6 @@ -0,0 +1,477 @@
7 +                  TuxOnIce 3.0 Internal Documentation.
8 +                       Updated to 26 March 2009
9 +
10 +1.  Introduction.
11 +
12 +    TuxOnIce 3.0 is an addition to the Linux Kernel, designed to
13 +    allow the user to quickly shutdown and quickly boot a computer, without
14 +    needing to close documents or programs. It is equivalent to the
15 +    hibernate facility in some laptops. This implementation, however,
16 +    requires no special BIOS or hardware support.
17 +
18 +    The code in these files is based upon the original implementation
19 +    prepared by Gabor Kuti and additional work by Pavel Machek and a
20 +    host of others. This code has been substantially reworked by Nigel
21 +    Cunningham, again with the help and testing of many others, not the
22 +    least of whom is Michael Frank. At its heart, however, the operation is
23 +    essentially the same as Gabor's version.
24 +
25 +2.  Overview of operation.
26 +
27 +    The basic sequence of operations is as follows:
28 +
29 +       a. Quiesce all other activity.
30 +       b. Ensure enough memory and storage space are available, and attempt
31 +          to free memory/storage if necessary.
32 +       c. Allocate the required memory and storage space.
33 +       d. Write the image.
34 +       e. Power down.
35 +
36 +    There are a number of complicating factors which mean that things are
37 +    not as simple as the above would imply, however...
38 +
39 +    o The activity of each process must be stopped at a point where it will
40 +    not be holding locks necessary for saving the image, or unexpectedly
41 +    restart operations due to something like a timeout and thereby make
42 +    our image inconsistent.
43 +
44 +    o It is desirous that we sync outstanding I/O to disk before calculating
45 +    image statistics. This reduces corruption if one should suspend but
46 +    then not resume, and also makes later parts of the operation safer (see
47 +    below).
48 +
49 +    o We need to get as close as we can to an atomic copy of the data.
50 +    Inconsistencies in the image will result in inconsistent memory contents at
51 +    resume time, and thus in instability of the system and/or file system
52 +    corruption. This would appear to imply a maximum image size of one half of
53 +    the amount of RAM, but we have a solution... (again, below).
54 +
55 +    o In 2.6, we choose to play nicely with the other suspend-to-disk
56 +    implementations.
57 +
58 +3.  Detailed description of internals.
59 +
60 +    a. Quiescing activity.
61 +
62 +    Safely quiescing the system is achieved using three separate but related
63 +    aspects.
64 +
65 +    First, we note that the vast majority of processes don't need to run during
66 +    suspend. They can be 'frozen'. We therefore implement a refrigerator
67 +    routine, which processes enter and in which they remain until the cycle is
68 +    complete. Processes enter the refrigerator via try_to_freeze() invocations
69 +    at appropriate places.  A process cannot be frozen in any old place. It
70 +    must not be holding locks that will be needed for writing the image or
71 +    freezing other processes. For this reason, userspace processes generally
72 +    enter the refrigerator via the signal handling code, and kernel threads at
73 +    the place in their event loops where they drop locks and yield to other
74 +    processes or sleep.
75 +
76 +    The task of freezing processes is complicated by the fact that there can be
77 +    interdependencies between processes. Freezing process A before process B may
78 +    mean that process B cannot be frozen, because it stops at waiting for
79 +    process A rather than in the refrigerator. This issue is seen where
80 +    userspace waits on freezeable kernel threads or fuse filesystem threads. To
81 +    address this issue, we implement the following algorithm for quiescing
82 +    activity:
83 +
84 +       - Freeze filesystems (including fuse - userspace programs starting
85 +               new requests are immediately frozen; programs already running
86 +               requests complete their work before being frozen in the next
87 +               step)
88 +       - Freeze userspace
89 +       - Thaw filesystems (this is safe now that userspace is frozen and no
90 +               fuse requests are outstanding).
91 +       - Invoke sys_sync (noop on fuse).
92 +       - Freeze filesystems
93 +       - Freeze kernel threads
94 +
95 +    If we need to free memory, we thaw kernel threads and filesystems, but not
96 +    userspace. We can then free caches without worrying about deadlocks due to
97 +    swap files being on frozen filesystems or such like.
98 +
99 +    b. Ensure enough memory & storage are available.
100 +
101 +    We have a number of constraints to meet in order to be able to successfully
102 +    suspend and resume.
103 +
104 +    First, the image will be written in two parts, described below. One of these
105 +    parts needs to have an atomic copy made, which of course implies a maximum
106 +    size of one half of the amount of system memory. The other part ('pageset')
107 +    is not atomically copied, and can therefore be as large or small as desired.
108 +
109 +    Second, we have constraints on the amount of storage available. In these
110 +    calculations, we may also consider any compression that will be done. The
111 +    cryptoapi module allows the user to configure an expected compression ratio.
112 +
113 +    Third, the user can specify an arbitrary limit on the image size, in
114 +    megabytes. This limit is treated as a soft limit, so that we don't fail the
115 +    attempt to suspend if we cannot meet this constraint.
116 +
117 +    c. Allocate the required memory and storage space.
118 +
119 +    Having done the initial freeze, we determine whether the above constraints
120 +    are met, and seek to allocate the metadata for the image. If the constraints
121 +    are not met, or we fail to allocate the required space for the metadata, we
122 +    seek to free the amount of memory that we calculate is needed and try again.
123 +    We allow up to four iterations of this loop before aborting the cycle. If we
124 +    do fail, it should only be because of a bug in TuxOnIce's calculations.
125 +
126 +    These steps are merged together in the prepare_image function, found in
127 +    prepare_image.c. The functions are merged because of the cyclical nature
128 +    of the problem of calculating how much memory and storage is needed. Since
129 +    the data structures containing the information about the image must
130 +    themselves take memory and use storage, the amount of memory and storage
131 +    required changes as we prepare the image. Since the changes are not large,
132 +    only one or two iterations will be required to achieve a solution.
133 +
134 +    The recursive nature of the algorithm is miminised by keeping user space
135 +    frozen while preparing the image, and by the fact that our records of which
136 +    pages are to be saved and which pageset they are saved in use bitmaps (so
137 +    that changes in number or fragmentation of the pages to be saved don't
138 +    feedback via changes in the amount of memory needed for metadata). The
139 +    recursiveness is thus limited to any extra slab pages allocated to store the
140 +    extents that record storage used, and the effects of seeking to free memory.
141 +
142 +    d. Write the image.
143 +
144 +    We previously mentioned the need to create an atomic copy of the data, and
145 +    the half-of-memory limitation that is implied in this. This limitation is
146 +    circumvented by dividing the memory to be saved into two parts, called
147 +    pagesets.
148 +
149 +    Pageset2 contains most of the page cache - the pages on the active and
150 +    inactive LRU lists that aren't needed or modified while TuxOnIce is
151 +    running, so they can be safely written without an atomic copy. They are
152 +    therefore saved first and reloaded last. While saving these pages,
153 +    TuxOnIce carefully ensures that the work of writing the pages doesn't make
154 +    the image inconsistent. With the support for Kernel (Video) Mode Setting
155 +    going into the kernel at the time of writing, we need to check for pages
156 +    on the LRU that are used by KMS, and exclude them from pageset2. They are
157 +    atomically copied as part of pageset 1.
158 +
159 +    Once pageset2 has been saved, we prepare to do the atomic copy of remaining
160 +    memory. As part of the preparation, we power down drivers, thereby providing
161 +    them with the opportunity to have their state recorded in the image. The
162 +    amount of memory allocated by drivers for this is usually negligible, but if
163 +    DRI is in use, video drivers may require significants amounts. Ideally we
164 +    would be able to query drivers while preparing the image as to the amount of
165 +    memory they will need. Unfortunately no such mechanism exists at the time of
166 +    writing. For this reason, TuxOnIce allows the user to set an
167 +    'extra_pages_allowance', which is used to seek to ensure sufficient memory
168 +    is available for drivers at this point. TuxOnIce also lets the user set this
169 +    value to 0. In this case, a test driver suspend is done while preparing the
170 +    image, and the difference (plus a margin) used instead. TuxOnIce will also
171 +    automatically restart the hibernation process (twice at most) if it finds
172 +    that the extra pages allowance is not sufficient. It will then use what was
173 +    actually needed (plus a margin, again). Failure to hibernate should thus
174 +    be an extremely rare occurence.
175 +
176 +    Having suspended the drivers, we save the CPU context before making an
177 +    atomic copy of pageset1, resuming the drivers and saving the atomic copy.
178 +    After saving the two pagesets, we just need to save our metadata before
179 +    powering down.
180 +
181 +    As we mentioned earlier, the contents of pageset2 pages aren't needed once
182 +    they've been saved. We therefore use them as the destination of our atomic
183 +    copy. In the unlikely event that pageset1 is larger, extra pages are
184 +    allocated while the image is being prepared. This is normally only a real
185 +    possibility when the system has just been booted and the page cache is
186 +    small.
187 +
188 +    This is where we need to be careful about syncing, however. Pageset2 will
189 +    probably contain filesystem meta data. If this is overwritten with pageset1
190 +    and then a sync occurs, the filesystem will be corrupted - at least until
191 +    resume time and another sync of the restored data. Since there is a
192 +    possibility that the user might not resume or (may it never be!) that
193 +    TuxOnIce might oops, we do our utmost to avoid syncing filesystems after
194 +    copying pageset1.
195 +
196 +    e. Power down.
197 +
198 +    Powering down uses standard kernel routines. TuxOnIce supports powering down
199 +    using the ACPI S3, S4 and S5 methods or the kernel's non-ACPI power-off.
200 +    Supporting suspend to ram (S3) as a power off option might sound strange,
201 +    but it allows the user to quickly get their system up and running again if
202 +    the battery doesn't run out (we just need to re-read the overwritten pages)
203 +    and if the battery does run out (or the user removes power), they can still
204 +    resume.
205 +
206 +4.  Data Structures.
207 +
208 +    TuxOnIce uses three main structures to store its metadata and configuration
209 +    information:
210 +
211 +    a) Pageflags bitmaps.
212 +
213 +    TuxOnIce records which pages will be in pageset1, pageset2, the destination
214 +    of the atomic copy and the source of the atomically restored image using
215 +    bitmaps. The code used is that written for swsusp, with small improvements
216 +    to match TuxOnIce's requirements.
217 +
218 +    The pageset1 bitmap is thus easily stored in the image header for use at
219 +    resume time.
220 +
221 +    As mentioned above, using bitmaps also means that the amount of memory and
222 +    storage required for recording the above information is constant. This
223 +    greatly simplifies the work of preparing the image. In earlier versions of
224 +    TuxOnIce, extents were used to record which pages would be stored. In that
225 +    case, however, eating memory could result in greater fragmentation of the
226 +    lists of pages, which in turn required more memory to store the extents and
227 +    more storage in the image header. These could in turn require further
228 +    freeing of memory, and another iteration. All of this complexity is removed
229 +    by having bitmaps.
230 +
231 +    Bitmaps also make a lot of sense because TuxOnIce only ever iterates
232 +    through the lists. There is therefore no cost to not being able to find the
233 +    nth page in order 0 time. We only need to worry about the cost of finding
234 +    the n+1th page, given the location of the nth page. Bitwise optimisations
235 +    help here.
236 +
237 +    b) Extents for block data.
238 +
239 +    TuxOnIce supports writing the image to multiple block devices. In the case
240 +    of swap, multiple partitions and/or files may be in use, and we happily use
241 +    them all (with the exception of compcache pages, which we allocate but do
242 +    not use). This use of multiple block devices is accomplished as follows:
243 +
244 +    Whatever the actual source of the allocated storage, the destination of the
245 +    image can be viewed in terms of one or more block devices, and on each
246 +    device, a list of sectors. To simplify matters, we only use contiguous,
247 +    PAGE_SIZE aligned sectors, like the swap code does.
248 +
249 +    Since sector numbers on each bdev may well not start at 0, it makes much
250 +    more sense to use extents here. Contiguous ranges of pages can thus be
251 +    represented in the extents by contiguous values.
252 +
253 +    Variations in block size are taken account of in transforming this data
254 +    into the parameters for bio submission.
255 +
256 +    We can thus implement a layer of abstraction wherein the core of TuxOnIce
257 +    doesn't have to worry about which device we're currently writing to or
258 +    where in the device we are. It simply requests that the next page in the
259 +    pageset or header be written, leaving the details to this lower layer.
260 +    The lower layer remembers where in the sequence of devices and blocks each
261 +    pageset starts. The header always starts at the beginning of the allocated
262 +    storage.
263 +
264 +    So extents are:
265 +
266 +    struct extent {
267 +      unsigned long minimum, maximum;
268 +      struct extent *next;
269 +    }
270 +
271 +    These are combined into chains of extents for a device:
272 +
273 +    struct extent_chain {
274 +      int size; /* size of the extent ie sum (max-min+1) */
275 +      int allocs, frees;
276 +      char *name;
277 +      struct extent *first, *last_touched;
278 +    };
279 +
280 +    For each bdev, we need to store a little more info:
281 +
282 +    struct suspend_bdev_info {
283 +       struct block_device *bdev;
284 +       dev_t dev_t;
285 +       int bmap_shift;
286 +       int blocks_per_page;
287 +    };
288 +
289 +    The dev_t is used to identify the device in the stored image. As a result,
290 +    we expect devices at resume time to have the same major and minor numbers
291 +    as they had while suspending.  This is primarily a concern where the user
292 +    utilises LVM for storage, as they will need to dmsetup their partitions in
293 +    such a way as to maintain this consistency at resume time.
294 +
295 +    bmap_shift and blocks_per_page apply the effects of variations in blocks
296 +    per page settings for the filesystem and underlying bdev. For most
297 +    filesystems, these are the same, but for xfs, they can have independant
298 +    values.
299 +
300 +    Combining these two structures together, we have everything we need to
301 +    record what devices and what blocks on each device are being used to
302 +    store the image, and to submit i/o using bio_submit.
303 +
304 +    The last elements in the picture are a means of recording how the storage
305 +    is being used.
306 +
307 +    We do this first and foremost by implementing a layer of abstraction on
308 +    top of the devices and extent chains which allows us to view however many
309 +    devices there might be as one long storage tape, with a single 'head' that
310 +    tracks a 'current position' on the tape:
311 +
312 +    struct extent_iterate_state {
313 +      struct extent_chain *chains;
314 +      int num_chains;
315 +      int current_chain;
316 +      struct extent *current_extent;
317 +      unsigned long current_offset;
318 +    };
319 +
320 +    That is, *chains points to an array of size num_chains of extent chains.
321 +    For the filewriter, this is always a single chain. For the swapwriter, the
322 +    array is of size MAX_SWAPFILES.
323 +
324 +    current_chain, current_extent and current_offset thus point to the current
325 +    index in the chains array (and into a matching array of struct
326 +    suspend_bdev_info), the current extent in that chain (to optimise access),
327 +    and the current value in the offset.
328 +
329 +    The image is divided into three parts:
330 +    - The header
331 +    - Pageset 1
332 +    - Pageset 2
333 +
334 +    The header always starts at the first device and first block. We know its
335 +    size before we begin to save the image because we carefully account for
336 +    everything that will be stored in it.
337 +
338 +    The second pageset (LRU) is stored first. It begins on the next page after
339 +    the end of the header.
340 +
341 +    The first pageset is stored second. It's start location is only known once
342 +    pageset2 has been saved, since pageset2 may be compressed as it is written.
343 +    This location is thus recorded at the end of saving pageset2. It is page
344 +    aligned also.
345 +
346 +    Since this information is needed at resume time, and the location of extents
347 +    in memory will differ at resume time, this needs to be stored in a portable
348 +    way:
349 +
350 +    struct extent_iterate_saved_state {
351 +        int chain_num;
352 +        int extent_num;
353 +        unsigned long offset;
354 +    };
355 +
356 +    We can thus implement a layer of abstraction wherein the core of TuxOnIce
357 +    doesn't have to worry about which device we're currently writing to or
358 +    where in the device we are. It simply requests that the next page in the
359 +    pageset or header be written, leaving the details to this layer, and
360 +    invokes the routines to remember and restore the position, without having
361 +    to worry about the details of how the data is arranged on disk or such like.
362 +
363 +    c) Modules
364 +
365 +    One aim in designing TuxOnIce was to make it flexible. We wanted to allow
366 +    for the implementation of different methods of transforming a page to be
367 +    written to disk and different methods of getting the pages stored.
368 +
369 +    In early versions (the betas and perhaps Suspend1), compression support was
370 +    inlined in the image writing code, and the data structures and code for
371 +    managing swap were intertwined with the rest of the code. A number of people
372 +    had expressed interest in implementing image encryption, and alternative
373 +    methods of storing the image.
374 +
375 +    In order to achieve this, TuxOnIce was given a modular design.
376 +
377 +    A module is a single file which encapsulates the functionality needed
378 +    to transform a pageset of data (encryption or compression, for example),
379 +    or to write the pageset to a device. The former type of module is called
380 +    a 'page-transformer', the later a 'writer'.
381 +
382 +    Modules are linked together in pipeline fashion. There may be zero or more
383 +    page transformers in a pipeline, and there is always exactly one writer.
384 +    The pipeline follows this pattern:
385 +
386 +               ---------------------------------
387 +               |          TuxOnIce Core        |
388 +               ---------------------------------
389 +                               |
390 +                               |
391 +               ---------------------------------
392 +               |       Page transformer 1      |
393 +               ---------------------------------
394 +                               |
395 +                               |
396 +               ---------------------------------
397 +               |       Page transformer 2      |
398 +               ---------------------------------
399 +                               |
400 +                               |
401 +               ---------------------------------
402 +               |            Writer             |
403 +               ---------------------------------
404 +
405 +    During the writing of an image, the core code feeds pages one at a time
406 +    to the first module. This module performs whatever transformations it
407 +    implements on the incoming data, completely consuming the incoming data and
408 +    feeding output in a similar manner to the next module.
409 +
410 +    All routines are SMP safe, and the final result of the transformations is
411 +    written with an index (provided by the core) and size of the output by the
412 +    writer. As a result, we can have multithreaded I/O without needing to
413 +    worry about the sequence in which pages are written (or read).
414 +
415 +    During reading, the pipeline works in the reverse direction. The core code
416 +    calls the first module with the address of a buffer which should be filled.
417 +    (Note that the buffer size is always PAGE_SIZE at this time). This module
418 +    will in turn request data from the next module and so on down until the
419 +    writer is made to read from the stored image.
420 +
421 +    Part of definition of the structure of a module thus looks like this:
422 +
423 +        int (*rw_init) (int rw, int stream_number);
424 +        int (*rw_cleanup) (int rw);
425 +        int (*write_chunk) (struct page *buffer_page);
426 +        int (*read_chunk) (struct page *buffer_page, int sync);
427 +
428 +    It should be noted that the _cleanup routine may be called before the
429 +    full stream of data has been read or written. While writing the image,
430 +    the user may (depending upon settings) choose to abort suspending, and
431 +    if we are in the midst of writing the last portion of the image, a portion
432 +    of the second pageset may be reread. This may also happen if an error
433 +    occurs and we seek to abort the process of writing the image.
434 +
435 +    The modular design is also useful in a number of other ways. It provides
436 +    a means where by we can add support for:
437 +
438 +    - providing overall initialisation and cleanup routines;
439 +    - serialising configuration information in the image header;
440 +    - providing debugging information to the user;
441 +    - determining memory and image storage requirements;
442 +    - dis/enabling components at run-time;
443 +    - configuring the module (see below);
444 +
445 +    ...and routines for writers specific to their work:
446 +    - Parsing a resume= location;
447 +    - Determining whether an image exists;
448 +    - Marking a resume as having been attempted;
449 +    - Invalidating an image;
450 +
451 +    Since some parts of the core - the user interface and storage manager
452 +    support - have use for some of these functions, they are registered as
453 +    'miscellaneous' modules as well.
454 +
455 +    d) Sysfs data structures.
456 +
457 +    This brings us naturally to support for configuring TuxOnIce. We desired to
458 +    provide a way to make TuxOnIce as flexible and configurable as possible.
459 +    The user shouldn't have to reboot just because they want to now hibernate to
460 +    a file instead of a partition, for example.
461 +
462 +    To accomplish this, TuxOnIce implements a very generic means whereby the
463 +    core and modules can register new sysfs entries. All TuxOnIce entries use
464 +    a single _store and _show routine, both of which are found in
465 +    tuxonice_sysfs.c in the kernel/power directory. These routines handle the
466 +    most common operations - getting and setting the values of bits, integers,
467 +    longs, unsigned longs and strings in one place, and allow overrides for
468 +    customised get and set options as well as side-effect routines for all
469 +    reads and writes.
470 +
471 +    When combined with some simple macros, a new sysfs entry can then be defined
472 +    in just a couple of lines:
473 +
474 +        SYSFS_INT("progress_granularity", SYSFS_RW, &progress_granularity, 1,
475 +                        2048, 0, NULL),
476 +
477 +    This defines a sysfs entry named "progress_granularity" which is rw and
478 +    allows the user to access an integer stored at &progress_granularity, giving
479 +    it a value between 1 and 2048 inclusive.
480 +
481 +    Sysfs entries are registered under /sys/power/tuxonice, and entries for
482 +    modules are located in a subdirectory named after the module.
483 +
484 diff --git a/Documentation/power/tuxonice.txt b/Documentation/power/tuxonice.txt
485 new file mode 100644
486 index 0000000..8900b45
487 --- /dev/null
488 +++ b/Documentation/power/tuxonice.txt
489 @@ -0,0 +1,948 @@
490 +       --- TuxOnIce, version 3.0 ---
491 +
492 +1.  What is it?
493 +2.  Why would you want it?
494 +3.  What do you need to use it?
495 +4.  Why not just use the version already in the kernel?
496 +5.  How do you use it?
497 +6.  What do all those entries in /sys/power/tuxonice do?
498 +7.  How do you get support?
499 +8.  I think I've found a bug. What should I do?
500 +9.  When will XXX be supported?
501 +10  How does it work?
502 +11. Who wrote TuxOnIce?
503 +
504 +1. What is it?
505 +
506 +   Imagine you're sitting at your computer, working away. For some reason, you
507 +   need to turn off your computer for a while - perhaps it's time to go home
508 +   for the day. When you come back to your computer next, you're going to want
509 +   to carry on where you left off. Now imagine that you could push a button and
510 +   have your computer store the contents of its memory to disk and power down.
511 +   Then, when you next start up your computer, it loads that image back into
512 +   memory and you can carry on from where you were, just as if you'd never
513 +   turned the computer off. You have far less time to start up, no reopening of
514 +   applications or finding what directory you put that file in yesterday.
515 +   That's what TuxOnIce does.
516 +
517 +   TuxOnIce has a long heritage. It began life as work by Gabor Kuti, who,
518 +   with some help from Pavel Machek, got an early version going in 1999. The
519 +   project was then taken over by Florent Chabaud while still in alpha version
520 +   numbers. Nigel Cunningham came on the scene when Florent was unable to
521 +   continue, moving the project into betas, then 1.0, 2.0 and so on up to
522 +   the present series. During the 2.0 series, the name was contracted to
523 +   Suspend2 and the website suspend2.net created. Beginning around July 2007,
524 +   a transition to calling the software TuxOnIce was made, to seek to help
525 +   make it clear that TuxOnIce is more concerned with hibernation than suspend
526 +   to ram.
527 +
528 +   Pavel Machek's swsusp code, which was merged around 2.5.17 retains the
529 +   original name, and was essentially a fork of the beta code until Rafael
530 +   Wysocki came on the scene in 2005 and began to improve it further.
531 +
532 +2. Why would you want it?
533 +
534 +   Why wouldn't you want it?
535 +
536 +   Being able to save the state of your system and quickly restore it improves
537 +   your productivity - you get a useful system in far less time than through
538 +   the normal boot process. You also get to be completely 'green', using zero
539 +   power, or as close to that as possible (the computer may still provide
540 +   minimal power to some devices, so they can initiate a power on, but that
541 +   will be the same amount of power as would be used if you told the computer
542 +   to shutdown.
543 +
544 +3. What do you need to use it?
545 +
546 +   a. Kernel Support.
547 +
548 +   i) The TuxOnIce patch.
549 +
550 +   TuxOnIce is part of the Linux Kernel. This version is not part of Linus's
551 +   2.6 tree at the moment, so you will need to download the kernel source and
552 +   apply the latest patch. Having done that, enable the appropriate options in
553 +   make [menu|x]config (under Power Management Options - look for "Enhanced
554 +   Hibernation"), compile and install your kernel. TuxOnIce works with SMP,
555 +   Highmem, preemption, fuse filesystems, x86-32, PPC and x86_64.
556 +
557 +   TuxOnIce patches are available from http://tuxonice.net.
558 +
559 +   ii) Compression support.
560 +
561 +   Compression support is implemented via the cryptoapi. You will therefore want
562 +   to select any Cryptoapi transforms that you want to use on your image from
563 +   the Cryptoapi menu while configuring your kernel. We recommend the use of the
564 +   LZO compression method - it is very fast and still achieves good compression.
565 +
566 +   You can also tell TuxOnIce to write its image to an encrypted and/or
567 +   compressed filesystem/swap partition. In that case, you don't need to do
568 +   anything special for TuxOnIce when it comes to kernel configuration.
569 +
570 +   iii) Configuring other options.
571 +
572 +   While you're configuring your kernel, try to configure as much as possible
573 +   to build as modules. We recommend this because there are a number of drivers
574 +   that are still in the process of implementing proper power management
575 +   support. In those cases, the best way to work around their current lack is
576 +   to build them as modules and remove the modules while hibernating. You might
577 +   also bug the driver authors to get their support up to speed, or even help!
578 +
579 +   b. Storage.
580 +
581 +   i) Swap.
582 +
583 +   TuxOnIce can store the hibernation image in your swap partition, a swap file or
584 +   a combination thereof. Whichever combination you choose, you will probably
585 +   want to create enough swap space to store the largest image you could have,
586 +   plus the space you'd normally use for swap. A good rule of thumb would be
587 +   to calculate the amount of swap you'd want without using TuxOnIce, and then
588 +   add the amount of memory you have. This swapspace can be arranged in any way
589 +   you'd like. It can be in one partition or file, or spread over a number. The
590 +   only requirement is that they be active when you start a hibernation cycle.
591 +
592 +   There is one exception to this requirement. TuxOnIce has the ability to turn
593 +   on one swap file or partition at the start of hibernating and turn it back off
594 +   at the end. If you want to ensure you have enough memory to store a image
595 +   when your memory is fully used, you might want to make one swap partition or
596 +   file for 'normal' use, and another for TuxOnIce to activate & deactivate
597 +   automatically. (Further details below).
598 +
599 +   ii) Normal files.
600 +
601 +   TuxOnIce includes a 'file allocator'. The file allocator can store your
602 +   image in a simple file. Since Linux has the concept of everything being a
603 +   file, this is more powerful than it initially sounds. If, for example, you
604 +   were to set up a network block device file, you could hibernate to a network
605 +   server. This has been tested and works to a point, but nbd itself isn't
606 +   stateless enough for our purposes.
607 +
608 +   Take extra care when setting up the file allocator. If you just type
609 +   commands without thinking and then try to hibernate, you could cause
610 +   irreversible corruption on your filesystems! Make sure you have backups.
611 +
612 +   Most people will only want to hibernate to a local file. To achieve that, do
613 +   something along the lines of:
614 +
615 +   echo "TuxOnIce" > /hibernation-file
616 +   dd if=/dev/zero bs=1M count=512 >> hibernation-file
617 +
618 +   This will create a 512MB file called /hibernation-file. To get TuxOnIce to use
619 +   it:
620 +
621 +   echo /hibernation-file > /sys/power/tuxonice/file/target
622 +
623 +   Then
624 +
625 +   cat /sys/power/tuxonice/resume
626 +
627 +   Put the results of this into your bootloader's configuration (see also step
628 +   C, below):
629 +
630 +   ---EXAMPLE-ONLY-DON'T-COPY-AND-PASTE---
631 +   # cat /sys/power/tuxonice/resume
632 +   file:/dev/hda2:0x1e001
633 +
634 +   In this example, we would edit the append= line of our lilo.conf|menu.lst
635 +   so that it included:
636 +
637 +   resume=file:/dev/hda2:0x1e001
638 +   ---EXAMPLE-ONLY-DON'T-COPY-AND-PASTE---
639 +
640 +   For those who are thinking 'Could I make the file sparse?', the answer is
641 +   'No!'. At the moment, there is no way for TuxOnIce to fill in the holes in
642 +   a sparse file while hibernating. In the longer term (post merge!), I'd like
643 +   to change things so that the file could be dynamically resized and have
644 +   holes filled as needed. Right now, however, that's not possible and not a
645 +   priority.
646 +
647 +   c. Bootloader configuration.
648 +
649 +   Using TuxOnIce also requires that you add an extra parameter to
650 +   your lilo.conf or equivalent. Here's an example for a swap partition:
651 +
652 +   append="resume=swap:/dev/hda1"
653 +
654 +   This would tell TuxOnIce that /dev/hda1 is a swap partition you
655 +   have. TuxOnIce will use the swap signature of this partition as a
656 +   pointer to your data when you hibernate. This means that (in this example)
657 +   /dev/hda1 doesn't need to be _the_ swap partition where all of your data
658 +   is actually stored. It just needs to be a swap partition that has a
659 +   valid signature.
660 +
661 +   You don't need to have a swap partition for this purpose. TuxOnIce
662 +   can also use a swap file, but usage is a little more complex. Having made
663 +   your swap file, turn it on and do
664 +
665 +   cat /sys/power/tuxonice/swap/headerlocations
666 +
667 +   (this assumes you've already compiled your kernel with TuxOnIce
668 +   support and booted it). The results of the cat command will tell you
669 +   what you need to put in lilo.conf:
670 +
671 +   For swap partitions like /dev/hda1, simply use resume=/dev/hda1.
672 +   For swapfile `swapfile`, use resume=swap:/dev/hda2:0x242d.
673 +
674 +   If the swapfile changes for any reason (it is moved to a different
675 +   location, it is deleted and recreated, or the filesystem is
676 +   defragmented) then you will have to check
677 +   /sys/power/tuxonice/swap/headerlocations for a new resume_block value.
678 +
679 +   Once you've compiled and installed the kernel and adjusted your bootloader
680 +   configuration, you should only need to reboot for the most basic part
681 +   of TuxOnIce to be ready.
682 +
683 +   If you only compile in the swap allocator, or only compile in the file
684 +   allocator, you don't need to add the "swap:" part of the resume=
685 +   parameters above. resume=/dev/hda2:0x242d will work just as well. If you
686 +   have compiled both and your storage is on swap, you can also use this
687 +   format (the swap allocator is the default allocator).
688 +
689 +   When compiling your kernel, one of the options in the 'Power Management
690 +   Support' menu, just above the 'Enhanced Hibernation (TuxOnIce)' entry is
691 +   called 'Default resume partition'. This can be used to set a default value
692 +   for the resume= parameter.
693 +
694 +   d. The hibernate script.
695 +
696 +   Since the driver model in 2.6 kernels is still being developed, you may need
697 +   to do more than just configure TuxOnIce. Users of TuxOnIce usually start the
698 +   process via a script which prepares for the hibernation cycle, tells the
699 +   kernel to do its stuff and then restore things afterwards. This script might
700 +   involve:
701 +
702 +   - Switching to a text console and back if X doesn't like the video card
703 +     status on resume.
704 +   - Un/reloading drivers that don't play well with hibernation.
705 +
706 +   Note that you might not be able to unload some drivers if there are
707 +   processes using them. You might have to kill off processes that hold
708 +   devices open. Hint: if your X server accesses an USB mouse, doing a
709 +   'chvt' to a text console releases the device and you can unload the
710 +   module.
711 +
712 +   Check out the latest script (available on tuxonice.net).
713 +
714 +   e. The userspace user interface.
715 +
716 +   TuxOnIce has very limited support for displaying status if you only apply
717 +   the kernel patch - it can printk messages, but that is all. In addition,
718 +   some of the functions mentioned in this document (such as cancelling a cycle
719 +   or performing interactive debugging) are unavailable. To utilise these
720 +   functions, or simply get a nice display, you need the 'userui' component.
721 +   Userui comes in three flavours, usplash, fbsplash and text. Text should
722 +   work on any console. Usplash and fbsplash require the appropriate
723 +   (distro specific?) support.
724 +
725 +   To utilise a userui, TuxOnIce just needs to be told where to find the
726 +   userspace binary:
727 +
728 +   echo "/usr/local/sbin/tuxoniceui_fbsplash" > /sys/power/tuxonice/user_interface/program
729 +
730 +   The hibernate script can do this for you, and a default value for this
731 +   setting can be configured when compiling the kernel. This path is also
732 +   stored in the image header, so if you have an initrd or initramfs, you can
733 +   use the userui during the first part of resuming (prior to the atomic
734 +   restore) by putting the binary in the same path in your initrd/ramfs.
735 +   Alternatively, you can put it in a different location and do an echo
736 +   similar to the above prior to the echo > do_resume. The value saved in the
737 +   image header will then be ignored.
738 +
739 +4. Why not just use the version already in the kernel?
740 +
741 +   The version in the vanilla kernel has a number of drawbacks. The most
742 +   serious of these are:
743 +       - it has a maximum image size of 1/2 total memory;
744 +       - it doesn't allocate storage until after it has snapshotted memory.
745 +         This means that you can't be sure hibernating will work until you
746 +         see it start to write the image;
747 +       - it does not allow you to press escape to cancel a cycle;
748 +       - it does not allow you to press escape to cancel resuming;
749 +       - it does not allow you to automatically swapon a file when
750 +         starting a cycle;
751 +       - it does not allow you to use multiple swap partitions or files;
752 +       - it does not allow you to use ordinary files;
753 +       - it just invalidates an image and continues to boot if you
754 +         accidentally boot the wrong kernel after hibernating;
755 +       - it doesn't support any sort of nice display while hibernating;
756 +       - it is moving toward requiring that you have an initrd/initramfs
757 +         to ever have a hope of resuming (uswsusp). While uswsusp will
758 +         address some of the concerns above, it won't address all of them,
759 +          and will be more complicated to get set up;
760 +        - it doesn't have support for suspend-to-both (write a hibernation
761 +         image, then suspend to ram; I think this is known as ReadySafe
762 +         under M$).
763 +
764 +5. How do you use it?
765 +
766 +   A hibernation cycle can be started directly by doing:
767 +
768 +       echo > /sys/power/tuxonice/do_hibernate
769 +
770 +   In practice, though, you'll probably want to use the hibernate script
771 +   to unload modules, configure the kernel the way you like it and so on.
772 +   In that case, you'd do (as root):
773 +
774 +       hibernate
775 +
776 +   See the hibernate script's man page for more details on the options it
777 +   takes.
778 +
779 +   If you're using the text or splash user interface modules, one feature of
780 +   TuxOnIce that you might find useful is that you can press Escape at any time
781 +   during hibernating, and the process will be aborted.
782 +
783 +   Due to the way hibernation works, this means you'll have your system back and
784 +   perfectly usable almost instantly. The only exception is when it's at the
785 +   very end of writing the image. Then it will need to reload a small (usually
786 +   4-50MBs, depending upon the image characteristics) portion first.
787 +
788 +   Likewise, when resuming, you can press escape and resuming will be aborted.
789 +   The computer will then powerdown again according to settings at that time for
790 +   the powerdown method or rebooting.
791 +
792 +   You can change the settings for powering down while the image is being
793 +   written by pressing 'R' to toggle rebooting and 'O' to toggle between
794 +   suspending to ram and powering down completely).
795 +
796 +   If you run into problems with resuming, adding the "noresume" option to
797 +   the kernel command line will let you skip the resume step and recover your
798 +   system. This option shouldn't normally be needed, because TuxOnIce modifies
799 +   the image header prior to the atomic restore, and will thus prompt you
800 +   if it detects that you've tried to resume an image before (this flag is
801 +   removed if you press Escape to cancel a resume, so you won't be prompted
802 +   then).
803 +
804 +   Recent kernels (2.6.24 onwards) add support for resuming from a different
805 +   kernel to the one that was hibernated (thanks to Rafael for his work on
806 +   this - I've just embraced and enhanced the support for TuxOnIce). This
807 +   should further reduce the need for you to use the noresume option.
808 +
809 +6. What do all those entries in /sys/power/tuxonice do?
810 +
811 +   /sys/power/tuxonice is the directory which contains files you can use to
812 +   tune and configure TuxOnIce to your liking. The exact contents of
813 +   the directory will depend upon the version of TuxOnIce you're
814 +   running and the options you selected at compile time. In the following
815 +   descriptions, names in brackets refer to compile time options.
816 +   (Note that they're all dependant upon you having selected CONFIG_TUXONICE
817 +   in the first place!).
818 +
819 +   Since the values of these settings can open potential security risks, the
820 +   writeable ones are accessible only to the root user. You may want to
821 +   configure sudo to allow you to invoke your hibernate script as an ordinary
822 +   user.
823 +
824 +   - alloc/failure_test
825 +
826 +   This debugging option provides a way of testing TuxOnIce's handling of
827 +   memory allocation failures. Each allocation type that TuxOnIce makes has
828 +   been given a unique number (see the source code). Echo the appropriate
829 +   number into this entry, and when TuxOnIce attempts to do that allocation,
830 +   it will pretend there was a failure and act accordingly.
831 +
832 +   - alloc/find_max_mem_allocated
833 +
834 +   This debugging option will cause TuxOnIce to find the maximum amount of
835 +   memory it used during a cycle, and report that information in debugging
836 +   information at the end of the cycle.
837 +
838 +   - alt_resume_param
839 +
840 +   Instead of powering down after writing a hibernation image, TuxOnIce
841 +   supports resuming from a different image. This entry lets you set the
842 +   location of the signature for that image (the resume= value you'd use
843 +   for it). Using an alternate image and keep_image mode, you can do things
844 +   like using an alternate image to power down an uninterruptible power
845 +   supply.
846 +
847 +   - block_io/target_outstanding_io
848 +
849 +   This value controls the amount of memory that the block I/O code says it
850 +   needs when the core code is calculating how much memory is needed for
851 +   hibernating and for resuming. It doesn't directly control the amount of
852 +   I/O that is submitted at any one time - that depends on the amount of
853 +   available memory (we may have more available than we asked for), the
854 +   throughput that is being achieved and the ability of the CPU to keep up
855 +   with disk throughput (particularly where we're compressing pages).
856 +
857 +   - checksum/enabled
858 +
859 +   Use cryptoapi hashing routines to verify that Pageset2 pages don't change
860 +   while we're saving the first part of the image, and to get any pages that
861 +   do change resaved in the atomic copy. This should normally not be needed,
862 +   but if you're seeing issues, please enable this. If your issues stop you
863 +   being able to resume, enable this option, hibernate and cancel the cycle
864 +   after the atomic copy is done. If the debugging info shows a non-zero
865 +   number of pages resaved, please report this to Nigel.
866 +
867 +   - compression/algorithm
868 +
869 +   Set the cryptoapi algorithm used for compressing the image.
870 +
871 +   - compression/expected_compression
872 +
873 +   These values allow you to set an expected compression ratio, which TuxOnice
874 +   will use in calculating whether it meets constraints on the image size. If
875 +   this expected compression ratio is not attained, the hibernation cycle will
876 +   abort, so it is wise to allow some spare. You can see what compression
877 +   ratio is achieved in the logs after hibernating.
878 +
879 +   - debug_info:
880 +
881 +   This file returns information about your configuration that may be helpful
882 +   in diagnosing problems with hibernating.
883 +
884 +   - did_suspend_to_both:
885 +
886 +   This file can be used when you hibernate with powerdown method 3 (ie suspend
887 +   to ram after writing the image). There can be two outcomes in this case. We
888 +   can resume from the suspend-to-ram before the battery runs out, or we can run
889 +   out of juice and and up resuming like normal. This entry lets you find out,
890 +   post resume, which way we went. If the value is 1, we resumed from suspend
891 +   to ram. This can be useful when actions need to be run post suspend-to-ram
892 +   that don't need to be run if we did the normal resume from power off.
893 +
894 +   - do_hibernate:
895 +
896 +   When anything is written to this file, the kernel side of TuxOnIce will
897 +   begin to attempt to write an image to disk and power down. You'll normally
898 +   want to run the hibernate script instead, to get modules unloaded first.
899 +
900 +   - do_resume:
901 +
902 +   When anything is written to this file TuxOnIce will attempt to read and
903 +   restore an image. If there is no image, it will return almost immediately.
904 +   If an image exists, the echo > will never return. Instead, the original
905 +   kernel context will be restored and the original echo > do_hibernate will
906 +   return.
907 +
908 +   - */enabled
909 +
910 +   These option can be used to temporarily disable various parts of TuxOnIce.
911 +
912 +   - extra_pages_allowance
913 +
914 +   When TuxOnIce does its atomic copy, it calls the driver model suspend
915 +   and resume methods. If you have DRI enabled with a driver such as fglrx,
916 +   this can result in the driver allocating a substantial amount of memory
917 +   for storing its state. Extra_pages_allowance tells TuxOnIce how much
918 +   extra memory it should ensure is available for those allocations. If
919 +   your attempts at hibernating end with a message in dmesg indicating that
920 +   insufficient extra pages were allowed, you need to increase this value.
921 +
922 +   - file/target:
923 +
924 +   Read this value to get the current setting. Write to it to point TuxOnice
925 +   at a new storage location for the file allocator. See section 3.b.ii above
926 +   for details of how to set up the file allocator.
927 +
928 +   - freezer_test
929 +
930 +   This entry can be used to get TuxOnIce to just test the freezer and prepare
931 +   an image without actually doing a hibernation cycle. It is useful for
932 +   diagnosing freezing and image preparation issues.
933 +
934 +   - full_pageset2
935 +
936 +   TuxOnIce divides the pages that are stored in an image into two sets. The
937 +   difference between the two sets is that pages in pageset 1 are atomically
938 +   copied, and pages in pageset 2 are written to disk without being copied
939 +   first. A page CAN be written to disk without being copied first if and only
940 +   if its contents will not be modified or used at any time after userspace
941 +   processes are frozen. A page MUST be in pageset 1 if its contents are
942 +   modified or used at any time after userspace processes have been frozen.
943 +
944 +   Normally (ie if this option is enabled), TuxOnIce will put all pages on the
945 +   per-zone LRUs in pageset2, then remove those pages used by any userspace
946 +   user interface helper and TuxOnIce storage manager that are running,
947 +   together with pages used by the GEM memory manager introduced around 2.6.28
948 +   kernels.
949 +
950 +   If this option is disabled, a much more conservative approach will be taken.
951 +   The only pages in pageset2 will be those belonging to userspace processes,
952 +   with the exclusion of those belonging to the TuxOnIce userspace helpers
953 +   mentioned above. This will result in a much smaller pageset2, and will
954 +   therefore result in smaller images than are possible with this option
955 +   enabled.
956 +
957 +   - ignore_rootfs
958 +
959 +   TuxOnIce records which device is mounted as the root filesystem when
960 +   writing the hibernation image. It will normally check at resume time that
961 +   this device isn't already mounted - that would be a cause of filesystem
962 +   corruption. In some particular cases (RAM based root filesystems), you
963 +   might want to disable this check. This option allows you to do that.
964 +
965 +   - image_exists:
966 +
967 +   Can be used in a script to determine whether a valid image exists at the
968 +   location currently pointed to by resume=. Returns up to three lines.
969 +   The first is whether an image exists (-1 for unsure, otherwise 0 or 1).
970 +   If an image eixsts, additional lines will return the machine and version.
971 +   Echoing anything to this entry removes any current image.
972 +
973 +   - image_size_limit:
974 +
975 +   The maximum size of hibernation image written to disk, measured in megabytes
976 +   (1024*1024).
977 +
978 +   - last_result:
979 +
980 +   The result of the last hibernation cycle, as defined in
981 +   include/linux/suspend-debug.h with the values SUSPEND_ABORTED to
982 +   SUSPEND_KEPT_IMAGE. This is a bitmask.
983 +
984 +   - late_cpu_hotplug:
985 +
986 +   This sysfs entry controls whether cpu hotplugging is done - as normal - just
987 +   before (unplug) and after (replug) the atomic copy/restore (so that all
988 +   CPUs/cores are available for multithreaded I/O). The alternative is to
989 +   unplug all secondary CPUs/cores at the start of hibernating/resuming, and
990 +   replug them at the end of resuming. No multithreaded I/O will be possible in
991 +   this configuration, but the odd machine has been reported to require it.
992 +
993 +   - lid_file:
994 +
995 +   This determines which ACPI button file we look in to determine whether the
996 +   lid is open or closed after resuming from suspend to disk or power off.
997 +   If the entry is set to "lid/LID", we'll open /proc/acpi/button/lid/LID/state
998 +   and check its contents at the appropriate moment. See post_wake_state below
999 +   for more details on how this entry is used.
1000 +
1001 +   - log_everything (CONFIG_PM_DEBUG):
1002 +
1003 +   Setting this option results in all messages printed being logged. Normally,
1004 +   only a subset are logged, so as to not slow the process and not clutter the
1005 +   logs. Useful for debugging. It can be toggled during a cycle by pressing
1006 +   'L'.
1007 +
1008 +   - no_load_direct:
1009 +
1010 +   This is a debugging option. If, when loading the atomically copied pages of
1011 +   an image, TuxOnIce finds that the destination address for a page is free,
1012 +   it will normally allocate the image, load the data directly into that
1013 +   address and skip it in the atomic restore. If this option is disabled, the
1014 +   page will be loaded somewhere else and atomically restored like other pages.
1015 +
1016 +   - no_flusher_thread:
1017 +
1018 +   When doing multithreaded I/O (see below), the first online CPU can be used
1019 +   to _just_ submit compressed pages when writing the image, rather than
1020 +   compressing and submitting data. This option is normally disabled, but has
1021 +   been included because Nigel would like to see whether it will be more useful
1022 +   as the number of cores/cpus in computers increases.
1023 +
1024 +   - no_multithreaded_io:
1025 +
1026 +   TuxOnIce will normally create one thread per cpu/core on your computer,
1027 +   each of which will then perform I/O. This will generally result in
1028 +   throughput that's the maximum the storage medium can handle. There
1029 +   shouldn't be any reason to disable multithreaded I/O now, but this option
1030 +   has been retained for debugging purposes.
1031 +
1032 +   - no_pageset2
1033 +
1034 +   See the entry for full_pageset2 above for an explanation of pagesets.
1035 +   Enabling this option causes TuxOnIce to do an atomic copy of all pages,
1036 +   thereby limiting the maximum image size to 1/2 of memory, as swsusp does.
1037 +
1038 +   - no_pageset2_if_unneeded
1039 +
1040 +   See the entry for full_pageset2 above for an explanation of pagesets.
1041 +   Enabling this option causes TuxOnIce to act like no_pageset2 was enabled
1042 +   if and only it isn't needed anyway. This option may still make TuxOnIce
1043 +   less reliable because pageset2 pages are normally used to store the
1044 +   atomic copy - drivers that want to do allocations of larger amounts of
1045 +   memory in one shot will be more likely to find that those amounts aren't
1046 +   available if this option is enabled.
1047 +
1048 +   - pause_between_steps (CONFIG_PM_DEBUG):
1049 +
1050 +   This option is used during debugging, to make TuxOnIce pause between
1051 +   each step of the process. It is ignored when the nice display is on.
1052 +
1053 +   - post_wake_state:
1054 +
1055 +   TuxOnIce provides support for automatically waking after a user-selected
1056 +   delay, and using a different powerdown method if the lid is still closed.
1057 +   (Yes, we're assuming a laptop).  This entry lets you choose what state
1058 +   should be entered next. The values are those described under
1059 +   powerdown_method, below. It can be used to suspend to RAM after hibernating,
1060 +   then powerdown properly (say) 20 minutes. It can also be used to power down
1061 +   properly, then wake at (say) 6.30am and suspend to RAM until you're ready
1062 +   to use the machine.
1063 +
1064 +   - powerdown_method:
1065 +
1066 +   Used to select a method by which TuxOnIce should powerdown after writing the
1067 +   image. Currently:
1068 +
1069 +   0: Don't use ACPI to power off.
1070 +   3: Attempt to enter Suspend-to-ram.
1071 +   4: Attempt to enter ACPI S4 mode.
1072 +   5: Attempt to power down via ACPI S5 mode.
1073 +
1074 +   Note that these options are highly dependant upon your hardware & software:
1075 +
1076 +   3: When succesful, your machine suspends to ram instead of powering off.
1077 +      The advantage of using this mode is that it doesn't matter whether your
1078 +      battery has enough charge to make it through to your next resume. If it
1079 +      lasts, you will simply resume from suspend to ram (and the image on disk
1080 +      will be discarded). If the battery runs out, you will resume from disk
1081 +      instead. The disadvantage is that it takes longer than a normal
1082 +      suspend-to-ram to enter the state, since the suspend-to-disk image needs
1083 +      to be written first.
1084 +   4/5: When successful, your machine will be off and comsume (almost) no power.
1085 +      But it might still react to some external events like opening the lid or
1086 +      trafic on  a network or usb device. For the bios, resume is then the same
1087 +      as warm boot, similar to a situation where you used the command `reboot'
1088 +      to reboot your machine. If your machine has problems on warm boot or if
1089 +      you want to protect your machine with the bios password, this is probably
1090 +      not the right choice. Mode 4 may be necessary on some machines where ACPI
1091 +      wake up methods need to be run to properly reinitialise hardware after a
1092 +      hibernation cycle.
1093 +   0: Switch the machine completely off. The only possible wakeup is the power
1094 +      button. For the bios, resume is then the same as a cold boot, in
1095 +      particular you would  have to provide your bios boot password if your
1096 +      machine uses that feature for booting.
1097 +
1098 +   - progressbar_granularity_limit:
1099 +
1100 +   This option can be used to limit the granularity of the progress bar
1101 +   displayed with a bootsplash screen. The value is the maximum number of
1102 +   steps. That is, 10 will make the progress bar jump in 10% increments.
1103 +
1104 +   - reboot:
1105 +
1106 +   This option causes TuxOnIce to reboot rather than powering down
1107 +   at the end of saving an image. It can be toggled during a cycle by pressing
1108 +   'R'.
1109 +
1110 +   - resume:
1111 +
1112 +   This sysfs entry can be used to read and set the location in which TuxOnIce
1113 +   will look for the signature of an image - the value set using resume= at
1114 +   boot time or CONFIG_PM_STD_PARTITION ("Default resume partition"). By
1115 +   writing to this file as well as modifying your bootloader's configuration
1116 +   file (eg menu.lst), you can set or reset the location of your image or the
1117 +   method of storing the image without rebooting.
1118 +
1119 +   - replace_swsusp (CONFIG_TOI_REPLACE_SWSUSP):
1120 +
1121 +   This option makes
1122 +
1123 +     echo disk > /sys/power/state
1124 +
1125 +   activate TuxOnIce instead of swsusp. Regardless of whether this option is
1126 +   enabled, any invocation of swsusp's resume time trigger will cause TuxOnIce
1127 +   to check for an image too. This is due to the fact that at resume time, we
1128 +   can't know whether this option was enabled until we see if an image is there
1129 +   for us to resume from. (And when an image exists, we don't care whether we
1130 +   did replace swsusp anyway - we just want to resume).
1131 +
1132 +   - resume_commandline:
1133 +
1134 +   This entry can be read after resuming to see the commandline that was used
1135 +   when resuming began. You might use this to set up two bootloader entries
1136 +   that are the same apart from the fact that one includes a extra append=
1137 +   argument "at_work=1". You could then grep resume_commandline in your
1138 +   post-resume scripts and configure networking (for example) differently
1139 +   depending upon whether you're at home or work. resume_commandline can be
1140 +   set to arbitrary text if you wish to remove sensitive contents.
1141 +
1142 +   - swap/swapfilename:
1143 +
1144 +   This entry is used to specify the swapfile or partition that
1145 +   TuxOnIce will attempt to swapon/swapoff automatically. Thus, if
1146 +   I normally use /dev/hda1 for swap, and want to use /dev/hda2 for specifically
1147 +   for my hibernation image, I would
1148 +
1149 +   echo /dev/hda2 > /sys/power/tuxonice/swap/swapfile
1150 +
1151 +   /dev/hda2 would then be automatically swapon'd and swapoff'd. Note that the
1152 +   swapon and swapoff occur while other processes are frozen (including kswapd)
1153 +   so this swap file will not be used up when attempting to free memory. The
1154 +   parition/file is also given the highest priority, so other swapfiles/partitions
1155 +   will only be used to save the image when this one is filled.
1156 +
1157 +   The value of this file is used by headerlocations along with any currently
1158 +   activated swapfiles/partitions.
1159 +
1160 +   - swap/headerlocations:
1161 +
1162 +   This option tells you the resume= options to use for swap devices you
1163 +   currently have activated. It is particularly useful when you only want to
1164 +   use a swap file to store your image. See above for further details.
1165 +
1166 +   - test_bio
1167 +
1168 +   This is a debugging option. When enabled, TuxOnIce will not hibernate.
1169 +   Instead, when asked to write an image, it will skip the atomic copy,
1170 +   just doing the writing of the image and then returning control to the
1171 +   user at the point where it would have powered off. This is useful for
1172 +   testing throughput in different configurations.
1173 +
1174 +   - test_filter_speed
1175 +
1176 +   This is a debugging option. When enabled, TuxOnIce will not hibernate.
1177 +   Instead, when asked to write an image, it will not write anything or do
1178 +   an atomic copy, but will only run any enabled compression algorithm on the
1179 +   data that would have been written (the source pages of the atomic copy in
1180 +   the case of pageset 1). This is useful for comparing the performance of
1181 +   compression algorithms and for determining the extent to which an upgrade
1182 +   to your storage method would improve hibernation speed.
1183 +
1184 +   - user_interface/debug_sections (CONFIG_PM_DEBUG):
1185 +
1186 +   This value, together with the console log level, controls what debugging
1187 +   information is displayed. The console log level determines the level of
1188 +   detail, and this value determines what detail is displayed. This value is
1189 +   a bit vector, and the meaning of the bits can be found in the kernel tree
1190 +   in include/linux/tuxonice.h. It can be overridden using the kernel's
1191 +   command line option suspend_dbg.
1192 +
1193 +   - user_interface/default_console_level (CONFIG_PM_DEBUG):
1194 +
1195 +   This determines the value of the console log level at the start of a
1196 +   hibernation cycle. If debugging is compiled in, the console log level can be
1197 +   changed during a cycle by pressing the digit keys. Meanings are:
1198 +
1199 +   0: Nice display.
1200 +   1: Nice display plus numerical progress.
1201 +   2: Errors only.
1202 +   3: Low level debugging info.
1203 +   4: Medium level debugging info.
1204 +   5: High level debugging info.
1205 +   6: Verbose debugging info.
1206 +
1207 +   - user_interface/enable_escape:
1208 +
1209 +   Setting this to "1" will enable you abort a hibernation cycle or resuming by
1210 +   pressing escape, "0" (default) disables this feature. Note that enabling
1211 +   this option means that you cannot initiate a hibernation cycle and then walk
1212 +   away from your computer, expecting it to be secure. With feature disabled,
1213 +   you can validly have this expectation once TuxOnice begins to write the
1214 +   image to disk. (Prior to this point, it is possible that TuxOnice might
1215 +   about because of failure to freeze all processes or because constraints
1216 +   on its ability to save the image are not met).
1217 +
1218 +   - user_interface/program
1219 +
1220 +   This entry is used to tell TuxOnice what userspace program to use for
1221 +   providing a user interface while hibernating. The program uses a netlink
1222 +   socket to pass messages back and forward to the kernel, allowing all of the
1223 +   functions formerly implemented in the kernel user interface components.
1224 +
1225 +   - version:
1226 +
1227 +   The version of TuxOnIce you have compiled into the currently running kernel.
1228 +
1229 +   - wake_alarm_dir:
1230 +
1231 +   As mentioned above (post_wake_state), TuxOnIce supports automatically waking
1232 +   after some delay. This entry allows you to select which wake alarm to use.
1233 +   It should contain the value "rtc0" if you're wanting to use
1234 +   /sys/class/rtc/rtc0.
1235 +
1236 +   - wake_delay:
1237 +
1238 +   This value determines the delay from the end of writing the image until the
1239 +   wake alarm is triggered. You can set an absolute time by writing the desired
1240 +   time into /sys/class/rtc/<wake_alarm_dir>/wakealarm and leaving these values
1241 +   empty.
1242 +
1243 +   Note that for the wakeup to actually occur, you may need to modify entries
1244 +   in /proc/acpi/wakeup. This is done by echoing the name of the button in the
1245 +   first column (eg PBTN) into the file.
1246 +
1247 +7. How do you get support?
1248 +
1249 +   Glad you asked. TuxOnIce is being actively maintained and supported
1250 +   by Nigel (the guy doing most of the kernel coding at the moment), Bernard
1251 +   (who maintains the hibernate script and userspace user interface components)
1252 +   and its users.
1253 +
1254 +   Resources availble include HowTos, FAQs and a Wiki, all available via
1255 +   tuxonice.net.  You can find the mailing lists there.
1256 +
1257 +8. I think I've found a bug. What should I do?
1258 +
1259 +   By far and a way, the most common problems people have with TuxOnIce
1260 +   related to drivers not having adequate power management support. In this
1261 +   case, it is not a bug with TuxOnIce, but we can still help you. As we
1262 +   mentioned above, such issues can usually be worked around by building the
1263 +   functionality as modules and unloading them while hibernating. Please visit
1264 +   the Wiki for up-to-date lists of known issues and work arounds.
1265 +
1266 +   If this information doesn't help, try running:
1267 +
1268 +   hibernate --bug-report
1269 +
1270 +   ..and sending the output to the users mailing list.
1271 +
1272 +   Good information on how to provide us with useful information from an
1273 +   oops is found in the file REPORTING-BUGS, in the top level directory
1274 +   of the kernel tree. If you get an oops, please especially note the
1275 +   information about running what is printed on the screen through ksymoops.
1276 +   The raw information is useless.
1277 +
1278 +9. When will XXX be supported?
1279 +
1280 +   If there's a feature missing from TuxOnIce that you'd like, feel free to
1281 +   ask. We try to be obliging, within reason.
1282 +
1283 +   Patches are welcome. Please send to the list.
1284 +
1285 +10. How does it work?
1286 +
1287 +   TuxOnIce does its work in a number of steps.
1288 +
1289 +   a. Freezing system activity.
1290 +
1291 +   The first main stage in hibernating is to stop all other activity. This is
1292 +   achieved in stages. Processes are considered in fours groups, which we will
1293 +   describe in reverse order for clarity's sake: Threads with the PF_NOFREEZE
1294 +   flag, kernel threads without this flag, userspace processes with the
1295 +   PF_SYNCTHREAD flag and all other processes. The first set (PF_NOFREEZE) are
1296 +   untouched by the refrigerator code. They are allowed to run during hibernating
1297 +   and resuming, and are used to support user interaction, storage access or the
1298 +   like. Other kernel threads (those unneeded while hibernating) are frozen last.
1299 +   This leaves us with userspace processes that need to be frozen. When a
1300 +   process enters one of the *_sync system calls, we set a PF_SYNCTHREAD flag on
1301 +   that process for the duration of that call. Processes that have this flag are
1302 +   frozen after processes without it, so that we can seek to ensure that dirty
1303 +   data is synced to disk as quickly as possible in a situation where other
1304 +   processes may be submitting writes at the same time. Freezing the processes
1305 +   that are submitting data stops new I/O from being submitted. Syncthreads can
1306 +   then cleanly finish their work. So the order is:
1307 +
1308 +   - Userspace processes without PF_SYNCTHREAD or PF_NOFREEZE;
1309 +   - Userspace processes with PF_SYNCTHREAD (they won't have NOFREEZE);
1310 +   - Kernel processes without PF_NOFREEZE.
1311 +
1312 +   b. Eating memory.
1313 +
1314 +   For a successful hibernation cycle, you need to have enough disk space to store the
1315 +   image and enough memory for the various limitations of TuxOnIce's
1316 +   algorithm. You can also specify a maximum image size. In order to attain
1317 +   to those constraints, TuxOnIce may 'eat' memory. If, after freezing
1318 +   processes, the constraints aren't met, TuxOnIce will thaw all the
1319 +   other processes and begin to eat memory until its calculations indicate
1320 +   the constraints are met. It will then freeze processes again and recheck
1321 +   its calculations.
1322 +
1323 +   c. Allocation of storage.
1324 +
1325 +   Next, TuxOnIce allocates the storage that will be used to save
1326 +   the image.
1327 +
1328 +   The core of TuxOnIce knows nothing about how or where pages are stored. We
1329 +   therefore request the active allocator (remember you might have compiled in
1330 +   more than one!) to allocate enough storage for our expect image size. If
1331 +   this request cannot be fulfilled, we eat more memory and try again. If it
1332 +   is fulfiled, we seek to allocate additional storage, just in case our
1333 +   expected compression ratio (if any) isn't achieved. This time, however, we
1334 +   just continue if we can't allocate enough storage.
1335 +
1336 +   If these calls to our allocator change the characteristics of the image
1337 +   such that we haven't allocated enough memory, we also loop. (The allocator
1338 +   may well need to allocate space for its storage information).
1339 +
1340 +   d. Write the first part of the image.
1341 +
1342 +   TuxOnIce stores the image in two sets of pages called 'pagesets'.
1343 +   Pageset 2 contains pages on the active and inactive lists; essentially
1344 +   the page cache. Pageset 1 contains all other pages, including the kernel.
1345 +   We use two pagesets for one important reason: We need to make an atomic copy
1346 +   of the kernel to ensure consistency of the image. Without a second pageset,
1347 +   that would limit us to an image that was at most half the amount of memory
1348 +   available. Using two pagesets allows us to store a full image. Since pageset
1349 +   2 pages won't be needed in saving pageset 1, we first save pageset 2 pages.
1350 +   We can then make our atomic copy of the remaining pages using both pageset 2
1351 +   pages and any other pages that are free. While saving both pagesets, we are
1352 +   careful not to corrupt the image. Among other things, we use lowlevel block
1353 +   I/O routines that don't change the pagecache contents.
1354 +
1355 +   The next step, then, is writing pageset 2.
1356 +
1357 +   e. Suspending drivers and storing processor context.
1358 +
1359 +   Having written pageset2, TuxOnIce calls the power management functions to
1360 +   notify drivers of the hibernation, and saves the processor state in preparation
1361 +   for the atomic copy of memory we are about to make.
1362 +
1363 +   f. Atomic copy.
1364 +
1365 +   At this stage, everything else but the TuxOnIce code is halted. Processes
1366 +   are frozen or idling, drivers are quiesced and have stored (ideally and where
1367 +   necessary) their configuration in memory we are about to atomically copy.
1368 +   In our lowlevel architecture specific code, we have saved the CPU state.
1369 +   We can therefore now do our atomic copy before resuming drivers etc.
1370 +
1371 +   g. Save the atomic copy (pageset 1).
1372 +
1373 +   TuxOnice can then write the atomic copy of the remaining pages. Since we
1374 +   have copied the pages into other locations, we can continue to use the
1375 +   normal block I/O routines without fear of corruption our image.
1376 +
1377 +   f. Save the image header.
1378 +
1379 +   Nearly there! We save our settings and other parameters needed for
1380 +   reloading pageset 1 in an 'image header'. We also tell our allocator to
1381 +   serialise its data at this stage, so that it can reread the image at resume
1382 +   time.
1383 +
1384 +   g. Set the image header.
1385 +
1386 +   Finally, we edit the header at our resume= location. The signature is
1387 +   changed by the allocator to reflect the fact that an image exists, and to
1388 +   point to the start of that data if necessary (swap allocator).
1389 +
1390 +   h. Power down.
1391 +
1392 +   Or reboot if we're debugging and the appropriate option is selected.
1393 +
1394 +   Whew!
1395 +
1396 +   Reloading the image.
1397 +   --------------------
1398 +
1399 +   Reloading the image is essentially the reverse of all the above. We load
1400 +   our copy of pageset 1, being careful to choose locations that aren't going
1401 +   to be overwritten as we copy it back (We start very early in the boot
1402 +   process, so there are no other processes to quiesce here). We then copy
1403 +   pageset 1 back to its original location in memory and restore the process
1404 +   context. We are now running with the original kernel. Next, we reload the
1405 +   pageset 2 pages, free the memory and swap used by TuxOnIce, restore
1406 +   the pageset header and restart processes. Sounds easy in comparison to
1407 +   hibernating, doesn't it!
1408 +
1409 +   There is of course more to TuxOnIce than this, but this explanation
1410 +   should be a good start. If there's interest, I'll write further
1411 +   documentation on range pages and the low level I/O.
1412 +
1413 +11. Who wrote TuxOnIce?
1414 +
1415 +   (Answer based on the writings of Florent Chabaud, credits in files and
1416 +   Nigel's limited knowledge; apologies to anyone missed out!)
1417 +
1418 +   The main developers of TuxOnIce have been...
1419 +
1420 +   Gabor Kuti
1421 +   Pavel Machek
1422 +   Florent Chabaud
1423 +   Bernard Blackham
1424 +   Nigel Cunningham
1425 +
1426 +   Significant portions of swsusp, the code in the vanilla kernel which
1427 +   TuxOnIce enhances, have been worked on by Rafael Wysocki. Thanks should
1428 +   also be expressed to him.
1429 +
1430 +   The above mentioned developers have been aided in their efforts by a host
1431 +   of hundreds, if not thousands of testers and people who have submitted bug
1432 +   fixes & suggestions. Of special note are the efforts of Michael Frank, who
1433 +   had his computers repetitively hibernate and resume for literally tens of
1434 +   thousands of cycles and developed scripts to stress the system and test
1435 +   TuxOnIce far beyond the point most of us (Nigel included!) would consider
1436 +   testing. His efforts have contributed as much to TuxOnIce as any of the
1437 +   names above.
1438 diff --git a/MAINTAINERS b/MAINTAINERS
1439 index cf4abdd..979b923 100644
1440 --- a/MAINTAINERS
1441 +++ b/MAINTAINERS
1442 @@ -5655,6 +5655,13 @@ S:       Maintained
1443  F:     drivers/tc/
1444  F:     include/linux/tc.h
1445  
1446 +TUXONICE (ENHANCED HIBERNATION)
1447 +P:     Nigel Cunningham
1448 +M:     nigel@tuxonice.net
1449 +L:     tuxonice-devel@tuxonice.net
1450 +W:     http://tuxonice.net
1451 +S:     Maintained
1452 +
1453  U14-34F SCSI DRIVER
1454  P:     Dario Ballabio
1455  M:     ballabio_dario@emc.com
1456 diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
1457 index 5422169..33be4fa 100644
1458 --- a/arch/powerpc/mm/pgtable_32.c
1459 +++ b/arch/powerpc/mm/pgtable_32.c
1460 @@ -396,6 +396,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
1461  
1462         change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
1463  }
1464 +EXPORT_SYMBOL_GPL(kernel_map_pages);
1465  #endif /* CONFIG_DEBUG_PAGEALLOC */
1466  
1467  static int fixmaps;
1468 diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
1469 index 667188e..8113e78 100644
1470 --- a/arch/x86/kernel/reboot.c
1471 +++ b/arch/x86/kernel/reboot.c
1472 @@ -620,6 +620,7 @@ void machine_restart(char *cmd)
1473  {
1474         machine_ops.restart(cmd);
1475  }
1476 +EXPORT_SYMBOL_GPL(machine_restart);
1477  
1478  void machine_halt(void)
1479  {
1480 diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
1481 index e17efed..5d0b4d2 100644
1482 --- a/arch/x86/mm/pageattr.c
1483 +++ b/arch/x86/mm/pageattr.c
1484 @@ -1268,6 +1268,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
1485          */
1486         __flush_tlb_all();
1487  }
1488 +EXPORT_SYMBOL_GPL(kernel_map_pages);
1489  
1490  #ifdef CONFIG_HIBERNATION
1491  
1492 @@ -1282,7 +1283,7 @@ bool kernel_page_present(struct page *page)
1493         pte = lookup_address((unsigned long)page_address(page), &level);
1494         return (pte_val(*pte) & _PAGE_PRESENT);
1495  }
1496 -
1497 +EXPORT_SYMBOL_GPL(kernel_page_present);
1498  #endif /* CONFIG_HIBERNATION */
1499  
1500  #endif /* CONFIG_DEBUG_PAGEALLOC */
1501 diff --git a/arch/x86/power/cpu_64.c b/arch/x86/power/cpu_64.c
1502 index 5343540..d2d55bd 100644
1503 --- a/arch/x86/power/cpu_64.c
1504 +++ b/arch/x86/power/cpu_64.c
1505 @@ -10,6 +10,7 @@
1506  
1507  #include <linux/smp.h>
1508  #include <linux/suspend.h>
1509 +#include <linux/module.h>
1510  #include <asm/proto.h>
1511  #include <asm/page.h>
1512  #include <asm/pgtable.h>
1513 @@ -77,6 +78,7 @@ void save_processor_state(void)
1514  {
1515         __save_processor_state(&saved_context);
1516  }
1517 +EXPORT_SYMBOL_GPL(save_processor_state);
1518  
1519  static void do_fpu_end(void)
1520  {
1521 diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c
1522 index 81197c6..ff7e534 100644
1523 --- a/arch/x86/power/hibernate_32.c
1524 +++ b/arch/x86/power/hibernate_32.c
1525 @@ -8,6 +8,7 @@
1526  
1527  #include <linux/suspend.h>
1528  #include <linux/bootmem.h>
1529 +#include <linux/module.h>
1530  
1531  #include <asm/system.h>
1532  #include <asm/page.h>
1533 @@ -163,6 +164,7 @@ int swsusp_arch_resume(void)
1534         restore_image();
1535         return 0;
1536  }
1537 +EXPORT_SYMBOL_GPL(swsusp_arch_resume);
1538  
1539  /*
1540   *     pfn_is_nosave - check if given pfn is in the 'nosave' section
1541 diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
1542 index 65fdc86..e5c31f6 100644
1543 --- a/arch/x86/power/hibernate_64.c
1544 +++ b/arch/x86/power/hibernate_64.c
1545 @@ -10,6 +10,7 @@
1546  
1547  #include <linux/smp.h>
1548  #include <linux/suspend.h>
1549 +#include <linux/module.h>
1550  #include <asm/proto.h>
1551  #include <asm/page.h>
1552  #include <asm/pgtable.h>
1553 @@ -118,6 +119,7 @@ int swsusp_arch_resume(void)
1554         restore_image();
1555         return 0;
1556  }
1557 +EXPORT_SYMBOL_GPL(swsusp_arch_resume);
1558  
1559  /*
1560   *     pfn_is_nosave - check if given pfn is in the 'nosave' section
1561 @@ -168,3 +170,4 @@ int arch_hibernation_header_restore(void *addr)
1562         restore_cr3 = rdr->cr3;
1563         return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL;
1564  }
1565 +EXPORT_SYMBOL_GPL(arch_hibernation_header_restore);
1566 diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
1567 index 3e4bc69..80612e3 100644
1568 --- a/drivers/base/power/main.c
1569 +++ b/drivers/base/power/main.c
1570 @@ -55,6 +55,7 @@ void device_pm_lock(void)
1571  {
1572         mutex_lock(&dpm_list_mtx);
1573  }
1574 +EXPORT_SYMBOL_GPL(device_pm_lock);
1575  
1576  /**
1577   *     device_pm_unlock - unlock the list of active devices used by the PM core
1578 @@ -63,6 +64,7 @@ void device_pm_unlock(void)
1579  {
1580         mutex_unlock(&dpm_list_mtx);
1581  }
1582 +EXPORT_SYMBOL_GPL(device_pm_unlock);
1583  
1584  /**
1585   *     device_pm_add - add a device to the list of active devices
1586 diff --git a/drivers/char/vt.c b/drivers/char/vt.c
1587 index 08151d4..7377d98 100644
1588 --- a/drivers/char/vt.c
1589 +++ b/drivers/char/vt.c
1590 @@ -187,6 +187,7 @@ int fg_console;
1591  int last_console;
1592  int want_console = -1;
1593  int kmsg_redirect;
1594 +EXPORT_SYMBOL_GPL(kmsg_redirect);
1595  
1596  /*
1597   * For each existing display, we have a pointer to console currently visible
1598 diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
1599 index 4984aa8..c69b548 100644
1600 --- a/drivers/gpu/drm/drm_gem.c
1601 +++ b/drivers/gpu/drm/drm_gem.c
1602 @@ -136,7 +136,8 @@ drm_gem_object_alloc(struct drm_device *dev, size_t size)
1603         obj = kcalloc(1, sizeof(*obj), GFP_KERNEL);
1604  
1605         obj->dev = dev;
1606 -       obj->filp = shmem_file_setup("drm mm object", size, VM_NORESERVE);
1607 +       obj->filp = shmem_file_setup("drm mm object", size,
1608 +                       VM_NORESERVE | VM_ATOMIC_COPY);
1609         if (IS_ERR(obj->filp)) {
1610                 kfree(obj);
1611                 return NULL;
1612 diff --git a/drivers/md/md.c b/drivers/md/md.c
1613 index 641b211..73ccc45 100644
1614 --- a/drivers/md/md.c
1615 +++ b/drivers/md/md.c
1616 @@ -6251,6 +6251,9 @@ void md_do_sync(mddev_t *mddev)
1617                 mddev->curr_resync = 2;
1618  
1619         try_again:
1620 +               while (freezer_is_on())
1621 +                       yield();
1622 +
1623                 if (kthread_should_stop()) {
1624                         set_bit(MD_RECOVERY_INTR, &mddev->recovery);
1625                         goto skip;
1626 @@ -6272,6 +6275,10 @@ void md_do_sync(mddev_t *mddev)
1627                                          * time 'round when curr_resync == 2
1628                                          */
1629                                         continue;
1630 +
1631 +                               while (freezer_is_on())
1632 +                                       yield();
1633 +
1634                                 /* We need to wait 'interruptible' so as not to
1635                                  * contribute to the load average, and not to
1636                                  * be caught by 'softlockup'
1637 @@ -6284,6 +6291,7 @@ void md_do_sync(mddev_t *mddev)
1638                                                " share one or more physical units)\n",
1639                                                desc, mdname(mddev), mdname(mddev2));
1640                                         mddev_put(mddev2);
1641 +                                       try_to_freeze();
1642                                         if (signal_pending(current))
1643                                                 flush_signals(current);
1644                                         schedule();
1645 @@ -6384,6 +6392,9 @@ void md_do_sync(mddev_t *mddev)
1646                                    mddev->resync_max > j
1647                                    || kthread_should_stop());
1648  
1649 +               while (freezer_is_on())
1650 +                       yield();
1651 +
1652                 if (kthread_should_stop())
1653                         goto interrupted;
1654  
1655 @@ -6428,6 +6439,9 @@ void md_do_sync(mddev_t *mddev)
1656                         last_mark = next;
1657                 }
1658  
1659 +               while (freezer_is_on())
1660 +                       yield();
1661 +
1662  
1663                 if (kthread_should_stop())
1664                         goto interrupted;
1665 diff --git a/fs/block_dev.c b/fs/block_dev.c
1666 index f45dbc1..52a7869 100644
1667 --- a/fs/block_dev.c
1668 +++ b/fs/block_dev.c
1669 @@ -321,6 +321,93 @@ int thaw_bdev(struct block_device *bdev, struct super_block *sb)
1670  }
1671  EXPORT_SYMBOL(thaw_bdev);
1672  
1673 +#ifdef CONFIG_FS_FREEZER_DEBUG
1674 +#define FS_PRINTK(fmt, args...) printk(fmt, ## args)
1675 +#else
1676 +#define FS_PRINTK(fmt, args...)
1677 +#endif
1678 +
1679 +/* #define DEBUG_FS_FREEZING */
1680 +
1681 +/**
1682 + * freeze_filesystems - lock all filesystems and force them into a consistent
1683 + * state
1684 + * @which:     What combination of fuse & non-fuse to freeze.
1685 + */
1686 +void freeze_filesystems(int which)
1687 +{
1688 +       struct super_block *sb;
1689 +
1690 +       lockdep_off();
1691 +
1692 +       /*
1693 +        * Freeze in reverse order so filesystems dependant upon others are
1694 +        * frozen in the right order (eg. loopback on ext3).
1695 +        */
1696 +       list_for_each_entry_reverse(sb, &super_blocks, s_list) {
1697 +               FS_PRINTK(KERN_INFO "Considering %s.%s: (root %p, bdev %x)",
1698 +                       sb->s_type->name ? sb->s_type->name : "?",
1699 +                       sb->s_subtype ? sb->s_subtype : "", sb->s_root,
1700 +                       sb->s_bdev ? sb->s_bdev->bd_dev : 0);
1701 +
1702 +               if (sb->s_type->fs_flags & FS_IS_FUSE &&
1703 +                   sb->s_frozen == SB_UNFROZEN &&
1704 +                   which & FS_FREEZER_FUSE) {
1705 +                       sb->s_frozen = SB_FREEZE_TRANS;
1706 +                       sb->s_flags |= MS_FROZEN;
1707 +                       FS_PRINTK("Fuse filesystem done.\n");
1708 +                       continue;
1709 +               }
1710 +
1711 +               if (!sb->s_root || !sb->s_bdev ||
1712 +                   (sb->s_frozen == SB_FREEZE_TRANS) ||
1713 +                   (sb->s_flags & MS_RDONLY) ||
1714 +                   (sb->s_flags & MS_FROZEN) ||
1715 +                   !(which & FS_FREEZER_NORMAL)) {
1716 +                       FS_PRINTK(KERN_INFO "Nope.\n");
1717 +                       continue;
1718 +               }
1719 +
1720 +               FS_PRINTK(KERN_INFO "Freezing %x... ", sb->s_bdev->bd_dev);
1721 +               freeze_bdev(sb->s_bdev);
1722 +               sb->s_flags |= MS_FROZEN;
1723 +               FS_PRINTK(KERN_INFO "Done.\n");
1724 +       }
1725 +
1726 +       lockdep_on();
1727 +}
1728 +
1729 +/**
1730 + * thaw_filesystems - unlock all filesystems
1731 + * @which:     What combination of fuse & non-fuse to thaw.
1732 + */
1733 +void thaw_filesystems(int which)
1734 +{
1735 +       struct super_block *sb;
1736 +
1737 +       lockdep_off();
1738 +
1739 +       list_for_each_entry(sb, &super_blocks, s_list) {
1740 +               if (!(sb->s_flags & MS_FROZEN))
1741 +                       continue;
1742 +
1743 +               if (sb->s_type->fs_flags & FS_IS_FUSE) {
1744 +                       if (!(which & FS_FREEZER_FUSE))
1745 +                               continue;
1746 +
1747 +                       sb->s_frozen = SB_UNFROZEN;
1748 +               } else {
1749 +                       if (!(which & FS_FREEZER_NORMAL))
1750 +                               continue;
1751 +
1752 +                       thaw_bdev(sb->s_bdev, sb);
1753 +               }
1754 +               sb->s_flags &= ~MS_FROZEN;
1755 +       }
1756 +
1757 +       lockdep_on();
1758 +}
1759 +
1760  static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
1761  {
1762         return block_write_full_page(page, blkdev_get_block, wbc);
1763 diff --git a/fs/drop_caches.c b/fs/drop_caches.c
1764 index b6a719a..30ef3f3 100644
1765 --- a/fs/drop_caches.c
1766 +++ b/fs/drop_caches.c
1767 @@ -8,6 +8,7 @@
1768  #include <linux/writeback.h>
1769  #include <linux/sysctl.h>
1770  #include <linux/gfp.h>
1771 +#include <linux/module.h>
1772  
1773  /* A global variable is a bit ugly, but it keeps the code simple */
1774  int sysctl_drop_caches;
1775 @@ -33,7 +34,7 @@ static void drop_pagecache_sb(struct super_block *sb)
1776         iput(toput_inode);
1777  }
1778  
1779 -static void drop_pagecache(void)
1780 +void drop_pagecache(void)
1781  {
1782         struct super_block *sb;
1783  
1784 @@ -61,6 +62,7 @@ static void drop_slab(void)
1785                 nr_objects = shrink_slab(1000, GFP_KERNEL, 1000);
1786         } while (nr_objects > 10);
1787  }
1788 +EXPORT_SYMBOL_GPL(drop_pagecache);
1789  
1790  int drop_caches_sysctl_handler(ctl_table *table, int write,
1791         struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
1792 diff --git a/fs/fuse/control.c b/fs/fuse/control.c
1793 index 99c99df..cadffd8 100644
1794 --- a/fs/fuse/control.c
1795 +++ b/fs/fuse/control.c
1796 @@ -209,6 +209,7 @@ static void fuse_ctl_kill_sb(struct super_block *sb)
1797  static struct file_system_type fuse_ctl_fs_type = {
1798         .owner          = THIS_MODULE,
1799         .name           = "fusectl",
1800 +       .fs_flags       = FS_IS_FUSE,
1801         .get_sb         = fuse_ctl_get_sb,
1802         .kill_sb        = fuse_ctl_kill_sb,
1803  };
1804 diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
1805 index ba76b68..e9942d4 100644
1806 --- a/fs/fuse/dev.c
1807 +++ b/fs/fuse/dev.c
1808 @@ -7,6 +7,7 @@
1809  */
1810  
1811  #include "fuse_i.h"
1812 +#include "fuse.h"
1813  
1814  #include <linux/init.h>
1815  #include <linux/module.h>
1816 @@ -16,6 +17,7 @@
1817  #include <linux/pagemap.h>
1818  #include <linux/file.h>
1819  #include <linux/slab.h>
1820 +#include <linux/freezer.h>
1821  
1822  MODULE_ALIAS_MISCDEV(FUSE_MINOR);
1823  
1824 @@ -752,6 +754,8 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
1825         if (!fc)
1826                 return -EPERM;
1827  
1828 +       FUSE_MIGHT_FREEZE(file->f_mapping->host->i_sb, "fuse_dev_read");
1829 +
1830   restart:
1831         spin_lock(&fc->lock);
1832         err = -EAGAIN;
1833 @@ -912,6 +916,9 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
1834         if (!fc)
1835                 return -EPERM;
1836  
1837 +       FUSE_MIGHT_FREEZE(iocb->ki_filp->f_mapping->host->i_sb,
1838 +                       "fuse_dev_write");
1839 +
1840         fuse_copy_init(&cs, fc, 0, NULL, iov, nr_segs);
1841         if (nbytes < sizeof(struct fuse_out_header))
1842                 return -EINVAL;
1843 diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
1844 index 8b8eebc..31cda20 100644
1845 --- a/fs/fuse/dir.c
1846 +++ b/fs/fuse/dir.c
1847 @@ -7,12 +7,14 @@
1848  */
1849  
1850  #include "fuse_i.h"
1851 +#include "fuse.h"
1852  
1853  #include <linux/pagemap.h>
1854  #include <linux/file.h>
1855  #include <linux/gfp.h>
1856  #include <linux/sched.h>
1857  #include <linux/namei.h>
1858 +#include <linux/freezer.h>
1859  
1860  #if BITS_PER_LONG >= 64
1861  static inline void fuse_dentry_settime(struct dentry *entry, u64 time)
1862 @@ -174,6 +176,9 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
1863                         return 0;
1864  
1865                 fc = get_fuse_conn(inode);
1866 +
1867 +               FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_dentry_revalidate");
1868 +
1869                 req = fuse_get_req(fc);
1870                 if (IS_ERR(req))
1871                         return 0;
1872 @@ -268,6 +273,8 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
1873         if (name->len > FUSE_NAME_MAX)
1874                 goto out;
1875  
1876 +       FUSE_MIGHT_FREEZE(sb, "fuse_lookup_name");
1877 +
1878         req = fuse_get_req(fc);
1879         err = PTR_ERR(req);
1880         if (IS_ERR(req))
1881 @@ -331,6 +338,8 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
1882         if (err)
1883                 goto out_err;
1884  
1885 +       FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_lookup");
1886 +
1887         err = -EIO;
1888         if (inode && get_node_id(inode) == FUSE_ROOT_ID)
1889                 goto out_iput;
1890 @@ -402,6 +411,8 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
1891         if (IS_ERR(forget_req))
1892                 return PTR_ERR(forget_req);
1893  
1894 +       FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_create_open");
1895 +
1896         req = fuse_get_req(fc);
1897         err = PTR_ERR(req);
1898         if (IS_ERR(req))
1899 @@ -488,6 +499,8 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
1900         int err;
1901         struct fuse_req *forget_req;
1902  
1903 +       FUSE_MIGHT_FREEZE(dir->i_sb, "create_new_entry");
1904 +
1905         forget_req = fuse_get_req(fc);
1906         if (IS_ERR(forget_req)) {
1907                 fuse_put_request(fc, req);
1908 @@ -585,7 +598,11 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, int mode)
1909  {
1910         struct fuse_mkdir_in inarg;
1911         struct fuse_conn *fc = get_fuse_conn(dir);
1912 -       struct fuse_req *req = fuse_get_req(fc);
1913 +       struct fuse_req *req;
1914 +
1915 +       FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_mkdir");
1916 +
1917 +       req = fuse_get_req(fc);
1918         if (IS_ERR(req))
1919                 return PTR_ERR(req);
1920  
1921 @@ -605,7 +622,11 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry,
1922  {
1923         struct fuse_conn *fc = get_fuse_conn(dir);
1924         unsigned len = strlen(link) + 1;
1925 -       struct fuse_req *req = fuse_get_req(fc);
1926 +       struct fuse_req *req;
1927 +
1928 +       FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_symlink");
1929 +
1930 +       req = fuse_get_req(fc);
1931         if (IS_ERR(req))
1932                 return PTR_ERR(req);
1933  
1934 @@ -622,7 +643,11 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
1935  {
1936         int err;
1937         struct fuse_conn *fc = get_fuse_conn(dir);
1938 -       struct fuse_req *req = fuse_get_req(fc);
1939 +       struct fuse_req *req;
1940 +
1941 +       FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_unlink");
1942 +
1943 +       req = fuse_get_req(fc);
1944         if (IS_ERR(req))
1945                 return PTR_ERR(req);
1946  
1947 @@ -655,7 +680,11 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
1948  {
1949         int err;
1950         struct fuse_conn *fc = get_fuse_conn(dir);
1951 -       struct fuse_req *req = fuse_get_req(fc);
1952 +       struct fuse_req *req;
1953 +
1954 +       FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_rmdir");
1955 +
1956 +       req = fuse_get_req(fc);
1957         if (IS_ERR(req))
1958                 return PTR_ERR(req);
1959  
1960 diff --git a/fs/fuse/file.c b/fs/fuse/file.c
1961 index 06f30e9..80ad032 100644
1962 --- a/fs/fuse/file.c
1963 +++ b/fs/fuse/file.c
1964 @@ -7,11 +7,13 @@
1965  */
1966  
1967  #include "fuse_i.h"
1968 +#include "fuse.h"
1969  
1970  #include <linux/pagemap.h>
1971  #include <linux/slab.h>
1972  #include <linux/kernel.h>
1973  #include <linux/sched.h>
1974 +#include <linux/freezer.h>
1975  
1976  static const struct file_operations fuse_direct_io_file_operations;
1977  
1978 @@ -23,6 +25,8 @@ static int fuse_send_open(struct inode *inode, struct file *file, int isdir,
1979         struct fuse_req *req;
1980         int err;
1981  
1982 +       FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_send_open");
1983 +
1984         req = fuse_get_req(fc);
1985         if (IS_ERR(req))
1986                 return PTR_ERR(req);
1987 @@ -279,6 +283,8 @@ static int fuse_flush(struct file *file, fl_owner_t id)
1988         if (fc->no_flush)
1989                 return 0;
1990  
1991 +       FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_flush");
1992 +
1993         req = fuse_get_req_nofail(fc, file);
1994         memset(&inarg, 0, sizeof(inarg));
1995         inarg.fh = ff->fh;
1996 @@ -330,6 +336,8 @@ int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
1997         if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
1998                 return 0;
1999  
2000 +       FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_fsync_common");
2001 +
2002         /*
2003          * Start writeback against all dirty pages of the inode, then
2004          * wait for all outstanding writes, before sending the FSYNC
2005 @@ -437,6 +445,8 @@ static int fuse_readpage(struct file *file, struct page *page)
2006         if (is_bad_inode(inode))
2007                 goto out;
2008  
2009 +       FUSE_MIGHT_FREEZE(file->f_mapping->host->i_sb, "fuse_readpage");
2010 +
2011         /*
2012          * Page writeback can extend beyond the liftime of the
2013          * page-cache page, so make sure we read a properly synced
2014 @@ -540,6 +550,9 @@ static int fuse_readpages_fill(void *_data, struct page *page)
2015         struct inode *inode = data->inode;
2016         struct fuse_conn *fc = get_fuse_conn(inode);
2017  
2018 +       FUSE_MIGHT_FREEZE(data->file->f_mapping->host->i_sb,
2019 +                       "fuse_readpages_fill");
2020 +
2021         fuse_wait_on_page_writeback(inode, page->index);
2022  
2023         if (req->num_pages &&
2024 @@ -570,6 +583,8 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
2025         if (is_bad_inode(inode))
2026                 goto out;
2027  
2028 +       FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_readpages");
2029 +
2030         data.file = file;
2031         data.inode = inode;
2032         data.req = fuse_get_req(fc);
2033 @@ -686,6 +701,8 @@ static int fuse_buffered_write(struct file *file, struct inode *inode,
2034         if (is_bad_inode(inode))
2035                 return -EIO;
2036  
2037 +       FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_buffered_write");
2038 +
2039         /*
2040          * Make sure writepages on the same page are not mixed up with
2041          * plain writes.
2042 @@ -842,6 +859,8 @@ static ssize_t fuse_perform_write(struct file *file,
2043                 struct fuse_req *req;
2044                 ssize_t count;
2045  
2046 +               FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_perform_write");
2047 +
2048                 req = fuse_get_req(fc);
2049                 if (IS_ERR(req)) {
2050                         err = PTR_ERR(req);
2051 @@ -992,6 +1011,8 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
2052         if (is_bad_inode(inode))
2053                 return -EIO;
2054  
2055 +       FUSE_MIGHT_FREEZE(file->f_mapping->host->i_sb, "fuse_direct_io");
2056 +
2057         req = fuse_get_req(fc);
2058         if (IS_ERR(req))
2059                 return PTR_ERR(req);
2060 @@ -1360,6 +1381,8 @@ static int fuse_getlk(struct file *file, struct file_lock *fl)
2061         struct fuse_lk_out outarg;
2062         int err;
2063  
2064 +       FUSE_MIGHT_FREEZE(file->f_mapping->host->i_sb, "fuse_getlk");
2065 +
2066         req = fuse_get_req(fc);
2067         if (IS_ERR(req))
2068                 return PTR_ERR(req);
2069 @@ -1395,6 +1418,8 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
2070         if (fl->fl_flags & FL_CLOSE)
2071                 return 0;
2072  
2073 +       FUSE_MIGHT_FREEZE(file->f_mapping->host->i_sb, "fuse_setlk");
2074 +
2075         req = fuse_get_req(fc);
2076         if (IS_ERR(req))
2077                 return PTR_ERR(req);
2078 @@ -1461,6 +1486,8 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
2079         if (!inode->i_sb->s_bdev || fc->no_bmap)
2080                 return 0;
2081  
2082 +       FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_bmap");
2083 +
2084         req = fuse_get_req(fc);
2085         if (IS_ERR(req))
2086                 return 0;
2087 diff --git a/fs/fuse/fuse.h b/fs/fuse/fuse.h
2088 new file mode 100644
2089 index 0000000..170e49a
2090 --- /dev/null
2091 +++ b/fs/fuse/fuse.h
2092 @@ -0,0 +1,13 @@
2093 +#define FUSE_MIGHT_FREEZE(superblock, desc) \
2094 +do { \
2095 +       int printed = 0; \
2096 +       while (superblock->s_frozen != SB_UNFROZEN) { \
2097 +               if (!printed) { \
2098 +                       printk(KERN_INFO "%d frozen in " desc ".\n", \
2099 +                                               current->pid); \
2100 +                       printed = 1; \
2101 +               } \
2102 +               try_to_freeze(); \
2103 +               yield(); \
2104 +       } \
2105 +} while (0)
2106 diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
2107 index 91f7c85..cb18b16 100644
2108 --- a/fs/fuse/inode.c
2109 +++ b/fs/fuse/inode.c
2110 @@ -929,7 +929,7 @@ static int fuse_get_sb(struct file_system_type *fs_type,
2111  static struct file_system_type fuse_fs_type = {
2112         .owner          = THIS_MODULE,
2113         .name           = "fuse",
2114 -       .fs_flags       = FS_HAS_SUBTYPE,
2115 +       .fs_flags       = FS_HAS_SUBTYPE | FS_IS_FUSE,
2116         .get_sb         = fuse_get_sb,
2117         .kill_sb        = kill_anon_super,
2118  };
2119 @@ -948,7 +948,7 @@ static struct file_system_type fuseblk_fs_type = {
2120         .name           = "fuseblk",
2121         .get_sb         = fuse_get_sb_blk,
2122         .kill_sb        = kill_block_super,
2123 -       .fs_flags       = FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
2124 +       .fs_flags       = FS_REQUIRES_DEV | FS_HAS_SUBTYPE | FS_IS_FUSE,
2125  };
2126  
2127  static inline int register_fuseblk(void)
2128 diff --git a/fs/namei.c b/fs/namei.c
2129 index 967c3db..bffeb61 100644
2130 --- a/fs/namei.c
2131 +++ b/fs/namei.c
2132 @@ -2220,6 +2220,8 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
2133         if (!dir->i_op->unlink)
2134                 return -EPERM;
2135  
2136 +       vfs_check_frozen(dir->i_sb, SB_FREEZE_WRITE);
2137 +
2138         vfs_dq_init(dir);
2139  
2140         mutex_lock(&dentry->d_inode->i_mutex);
2141 diff --git a/fs/super.c b/fs/super.c
2142 index 1943fdf..0ad40de 100644
2143 --- a/fs/super.c
2144 +++ b/fs/super.c
2145 @@ -44,6 +44,8 @@
2146  
2147  
2148  LIST_HEAD(super_blocks);
2149 +EXPORT_SYMBOL_GPL(super_blocks);
2150 +
2151  DEFINE_SPINLOCK(sb_lock);
2152  
2153  /**
2154 diff --git a/include/linux/Kbuild b/include/linux/Kbuild
2155 index 3f0eaa3..40e2a83 100644
2156 --- a/include/linux/Kbuild
2157 +++ b/include/linux/Kbuild
2158 @@ -209,6 +209,7 @@ unifdef-y += filter.h
2159  unifdef-y += flat.h
2160  unifdef-y += futex.h
2161  unifdef-y += fs.h
2162 +unifdef-y += freezer.h
2163  unifdef-y += gameport.h
2164  unifdef-y += generic_serial.h
2165  unifdef-y += hayesesp.h
2166 diff --git a/include/linux/freezer.h b/include/linux/freezer.h
2167 index 5a361f8..c775cd1 100644
2168 --- a/include/linux/freezer.h
2169 +++ b/include/linux/freezer.h
2170 @@ -121,6 +121,23 @@ static inline void set_freezable(void)
2171         current->flags &= ~PF_NOFREEZE;
2172  }
2173  
2174 +#ifdef CONFIG_PM_SLEEP
2175 +extern int freezer_state;
2176 +#define FREEZER_OFF 0
2177 +#define FREEZER_FILESYSTEMS_FROZEN 1
2178 +#define FREEZER_USERSPACE_FROZEN 2
2179 +#define FREEZER_FULLY_ON 3
2180 +
2181 +static inline int freezer_is_on(void)
2182 +{
2183 +       return freezer_state == FREEZER_FULLY_ON;
2184 +}
2185 +#else
2186 +static inline int freezer_is_on(void) { return 0; }
2187 +#endif
2188 +
2189 +extern void thaw_kernel_threads(void);
2190 +
2191  /*
2192   * Tell the freezer that the current task should be frozen by it and that it
2193   * should send a fake signal to the task to freeze it.
2194 @@ -172,6 +189,8 @@ static inline int freeze_processes(void) { BUG(); return 0; }
2195  static inline void thaw_processes(void) {}
2196  
2197  static inline int try_to_freeze(void) { return 0; }
2198 +static inline int freezer_is_on(void) { return 0; }
2199 +static inline void thaw_kernel_threads(void) { }
2200  
2201  static inline void freezer_do_not_count(void) {}
2202  static inline void freezer_count(void) {}
2203 diff --git a/include/linux/fs.h b/include/linux/fs.h
2204 index 3b534e5..46dc165 100644
2205 --- a/include/linux/fs.h
2206 +++ b/include/linux/fs.h
2207 @@ -8,6 +8,7 @@
2208  
2209  #include <linux/limits.h>
2210  #include <linux/ioctl.h>
2211 +#include <linux/freezer.h>
2212  
2213  /*
2214   * It's silly to have NR_OPEN bigger than NR_FILE, but you can change
2215 @@ -172,6 +173,7 @@ struct inodes_stat_t {
2216  #define FS_REQUIRES_DEV 1 
2217  #define FS_BINARY_MOUNTDATA 2
2218  #define FS_HAS_SUBTYPE 4
2219 +#define FS_IS_FUSE     8       /* Fuse filesystem - bdev freeze these too */
2220  #define FS_REVAL_DOT   16384   /* Check the paths ".", ".." for staleness */
2221  #define FS_RENAME_DOES_D_MOVE  32768   /* FS will handle d_move()
2222                                          * during rename() internally.
2223 @@ -205,6 +207,7 @@ struct inodes_stat_t {
2224  #define MS_KERNMOUNT   (1<<22) /* this is a kern_mount call */
2225  #define MS_I_VERSION   (1<<23) /* Update inode I_version field */
2226  #define MS_STRICTATIME (1<<24) /* Always perform atime updates */
2227 +#define MS_FROZEN      (1<<25) /* Frozen by freeze_filesystems() */
2228  #define MS_ACTIVE      (1<<30)
2229  #define MS_NOUSER      (1<<31)
2230  
2231 @@ -231,6 +234,8 @@ struct inodes_stat_t {
2232  #define S_NOCMTIME     128     /* Do not update file c/mtime */
2233  #define S_SWAPFILE     256     /* Do not truncate: swapon got its bmaps */
2234  #define S_PRIVATE      512     /* Inode is fs-internal */
2235 +#define S_ATOMIC_COPY  1024    /* Pages mapped with this inode need to be
2236 +                                  atomically copied (gem) */
2237  
2238  /*
2239   * Note that nosuid etc flags are inode-specific: setting some file-system
2240 @@ -1390,8 +1395,11 @@ enum {
2241         SB_FREEZE_TRANS = 2,
2242  };
2243  
2244 -#define vfs_check_frozen(sb, level) \
2245 -       wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level)))
2246 +#define vfs_check_frozen(sb, level) do { \
2247 +       freezer_do_not_count(); \
2248 +       wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level))); \
2249 +       freezer_count(); \
2250 +} while (0)
2251  
2252  #define get_fs_excl() atomic_inc(&current->fs_excl)
2253  #define put_fs_excl() atomic_dec(&current->fs_excl)
2254 @@ -1949,6 +1957,11 @@ extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
2255  extern int fsync_bdev(struct block_device *);
2256  extern int fsync_super(struct super_block *);
2257  extern int fsync_no_super(struct block_device *);
2258 +#define FS_FREEZER_FUSE 1
2259 +#define FS_FREEZER_NORMAL 2
2260 +#define FS_FREEZER_ALL (FS_FREEZER_FUSE | FS_FREEZER_NORMAL)
2261 +void freeze_filesystems(int which);
2262 +void thaw_filesystems(int which);
2263  #else
2264  static inline void bd_forget(struct inode *inode) {}
2265  static inline int sync_blockdev(struct block_device *bdev) { return 0; }
2266 diff --git a/include/linux/mm.h b/include/linux/mm.h
2267 index bff1f0d..c4199cd 100644
2268 --- a/include/linux/mm.h
2269 +++ b/include/linux/mm.h
2270 @@ -105,6 +105,7 @@ extern unsigned int kobjsize(const void *objp);
2271  #define VM_MIXEDMAP    0x10000000      /* Can contain "struct page" and pure PFN pages */
2272  #define VM_SAO         0x20000000      /* Strong Access Ordering (powerpc) */
2273  #define VM_PFN_AT_MMAP 0x40000000      /* PFNMAP vma that is fully mapped at mmap time */
2274 +#define VM_ATOMIC_COPY 0x80000000      /* TuxOnIce should atomically copy */
2275  
2276  #ifndef VM_STACK_DEFAULT_FLAGS         /* arch can override this */
2277  #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
2278 @@ -1297,6 +1298,7 @@ int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *,
2279                                         void __user *, size_t *, loff_t *);
2280  unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
2281                         unsigned long lru_pages);
2282 +void drop_pagecache(void);
2283  
2284  #ifndef CONFIG_MMU
2285  #define randomize_va_space 0
2286 diff --git a/include/linux/netlink.h b/include/linux/netlink.h
2287 index 5ba398e..f220828 100644
2288 --- a/include/linux/netlink.h
2289 +++ b/include/linux/netlink.h
2290 @@ -24,6 +24,8 @@
2291  /* leave room for NETLINK_DM (DM Events) */
2292  #define NETLINK_SCSITRANSPORT  18      /* SCSI Transports */
2293  #define NETLINK_ECRYPTFS       19
2294 +#define NETLINK_TOI_USERUI     20      /* TuxOnIce's userui */
2295 +#define NETLINK_TOI_USM                21      /* Userspace storage manager */
2296  
2297  #define MAX_LINKS 32           
2298  
2299 diff --git a/include/linux/suspend.h b/include/linux/suspend.h
2300 index 795032e..1f52617 100644
2301 --- a/include/linux/suspend.h
2302 +++ b/include/linux/suspend.h
2303 @@ -308,4 +308,70 @@ static inline void register_nosave_region_late(unsigned long b, unsigned long e)
2304  
2305  extern struct mutex pm_mutex;
2306  
2307 +enum {
2308 +       TOI_CAN_HIBERNATE,
2309 +       TOI_CAN_RESUME,
2310 +       TOI_RESUME_DEVICE_OK,
2311 +       TOI_NORESUME_SPECIFIED,
2312 +       TOI_SANITY_CHECK_PROMPT,
2313 +       TOI_CONTINUE_REQ,
2314 +       TOI_RESUMED_BEFORE,
2315 +       TOI_BOOT_TIME,
2316 +       TOI_NOW_RESUMING,
2317 +       TOI_IGNORE_LOGLEVEL,
2318 +       TOI_TRYING_TO_RESUME,
2319 +       TOI_LOADING_ALT_IMAGE,
2320 +       TOI_STOP_RESUME,
2321 +       TOI_IO_STOPPED,
2322 +       TOI_NOTIFIERS_PREPARE,
2323 +       TOI_CLUSTER_MODE,
2324 +       TOI_BOOT_KERNEL,
2325 +};
2326 +
2327 +#ifdef CONFIG_TOI
2328 +
2329 +/* Used in init dir files */
2330 +extern unsigned long toi_state;
2331 +#define set_toi_state(bit) (set_bit(bit, &toi_state))
2332 +#define clear_toi_state(bit) (clear_bit(bit, &toi_state))
2333 +#define test_toi_state(bit) (test_bit(bit, &toi_state))
2334 +extern int toi_running;
2335 +
2336 +#define test_action_state(bit) (test_bit(bit, &toi_bkd.toi_action))
2337 +extern int try_tuxonice_hibernate(void);
2338 +
2339 +#else /* !CONFIG_TOI */
2340 +
2341 +#define toi_state              (0)
2342 +#define set_toi_state(bit) do { } while (0)
2343 +#define clear_toi_state(bit) do { } while (0)
2344 +#define test_toi_state(bit) (0)
2345 +#define toi_running (0)
2346 +
2347 +static inline int try_tuxonice_hibernate(void) { return 0; }
2348 +#define test_action_state(bit) (0)
2349 +
2350 +#endif /* CONFIG_TOI */
2351 +
2352 +#ifdef CONFIG_HIBERNATION
2353 +#ifdef CONFIG_TOI
2354 +extern void try_tuxonice_resume(void);
2355 +#else
2356 +#define try_tuxonice_resume() do { } while (0)
2357 +#endif
2358 +
2359 +extern int resume_attempted;
2360 +extern int software_resume(void);
2361 +
2362 +static inline void check_resume_attempted(void)
2363 +{
2364 +       if (resume_attempted)
2365 +               return;
2366 +
2367 +       software_resume();
2368 +}
2369 +#else
2370 +#define check_resume_attempted() do { } while (0)
2371 +#define resume_attempted (0)
2372 +#endif
2373  #endif /* _LINUX_SUSPEND_H */
2374 diff --git a/include/linux/swap.h b/include/linux/swap.h
2375 index d476aad..b522e83 100644
2376 --- a/include/linux/swap.h
2377 +++ b/include/linux/swap.h
2378 @@ -168,6 +168,7 @@ struct swap_list_t {
2379  extern unsigned long totalram_pages;
2380  extern unsigned long totalreserve_pages;
2381  extern unsigned int nr_free_buffer_pages(void);
2382 +extern unsigned int nr_unallocated_buffer_pages(void);
2383  extern unsigned int nr_free_pagecache_pages(void);
2384  
2385  /* Definition of global_page_state not available yet */
2386 diff --git a/init/do_mounts.c b/init/do_mounts.c
2387 index dd7ee5f..5ecae29 100644
2388 --- a/init/do_mounts.c
2389 +++ b/init/do_mounts.c
2390 @@ -143,6 +143,7 @@ fail:
2391  done:
2392         return res;
2393  }
2394 +EXPORT_SYMBOL_GPL(name_to_dev_t);
2395  
2396  static int __init root_dev_setup(char *line)
2397  {
2398 @@ -412,6 +413,8 @@ void __init prepare_namespace(void)
2399         if (is_floppy && rd_doload && rd_load_disk(0))
2400                 ROOT_DEV = Root_RAM0;
2401  
2402 +       check_resume_attempted();
2403 +
2404         mount_root();
2405  out:
2406         sys_mount(".", "/", NULL, MS_MOVE, NULL);
2407 diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
2408 index 614241b..f3ea292 100644
2409 --- a/init/do_mounts_initrd.c
2410 +++ b/init/do_mounts_initrd.c
2411 @@ -6,6 +6,7 @@
2412  #include <linux/romfs_fs.h>
2413  #include <linux/initrd.h>
2414  #include <linux/sched.h>
2415 +#include <linux/suspend.h>
2416  #include <linux/freezer.h>
2417  
2418  #include "do_mounts.h"
2419 @@ -68,6 +69,11 @@ static void __init handle_initrd(void)
2420  
2421         current->flags &= ~PF_FREEZER_SKIP;
2422  
2423 +       if (!resume_attempted)
2424 +               printk(KERN_ERR "TuxOnIce: No attempt was made to resume from "
2425 +                               "any image that might exist.\n");
2426 +       clear_toi_state(TOI_BOOT_TIME);
2427 +
2428         /* move initrd to rootfs' /old */
2429         sys_fchdir(old_fd);
2430         sys_mount("/", ".", NULL, MS_MOVE, NULL);
2431 diff --git a/init/main.c b/init/main.c
2432 index d721dad..1c0b018 100644
2433 --- a/init/main.c
2434 +++ b/init/main.c
2435 @@ -117,6 +117,7 @@ extern void softirq_init(void);
2436  char __initdata boot_command_line[COMMAND_LINE_SIZE];
2437  /* Untouched saved command line (eg. for /proc) */
2438  char *saved_command_line;
2439 +EXPORT_SYMBOL_GPL(saved_command_line);
2440  /* Command line for parameter parsing */
2441  static char *static_command_line;
2442  
2443 diff --git a/kernel/cpu.c b/kernel/cpu.c
2444 index 395b697..fe274d1 100644
2445 --- a/kernel/cpu.c
2446 +++ b/kernel/cpu.c
2447 @@ -415,6 +415,7 @@ int disable_nonboot_cpus(void)
2448         stop_machine_destroy();
2449         return error;
2450  }
2451 +EXPORT_SYMBOL_GPL(disable_nonboot_cpus);
2452  
2453  void __ref enable_nonboot_cpus(void)
2454  {
2455 @@ -439,6 +440,7 @@ void __ref enable_nonboot_cpus(void)
2456  out:
2457         cpu_maps_update_done();
2458  }
2459 +EXPORT_SYMBOL_GPL(enable_nonboot_cpus);
2460  
2461  static int alloc_frozen_cpus(void)
2462  {
2463 diff --git a/kernel/fork.c b/kernel/fork.c
2464 index 875ffbd..c9df3fe 100644
2465 --- a/kernel/fork.c
2466 +++ b/kernel/fork.c
2467 @@ -82,6 +82,7 @@ int max_threads;              /* tunable limit on nr_threads */
2468  DEFINE_PER_CPU(unsigned long, process_counts) = 0;
2469  
2470  __cacheline_aligned DEFINE_RWLOCK(tasklist_lock);  /* outer */
2471 +EXPORT_SYMBOL_GPL(tasklist_lock);
2472  
2473  DEFINE_TRACE(sched_process_fork);
2474  
2475 diff --git a/kernel/kmod.c b/kernel/kmod.c
2476 index 7e95bed..41e5186 100644
2477 --- a/kernel/kmod.c
2478 +++ b/kernel/kmod.c
2479 @@ -319,6 +319,7 @@ int usermodehelper_disable(void)
2480         usermodehelper_disabled = 0;
2481         return -EAGAIN;
2482  }
2483 +EXPORT_SYMBOL_GPL(usermodehelper_disable);
2484  
2485  /**
2486   * usermodehelper_enable - allow new helpers to be started again
2487 @@ -327,6 +328,7 @@ void usermodehelper_enable(void)
2488  {
2489         usermodehelper_disabled = 0;
2490  }
2491 +EXPORT_SYMBOL_GPL(usermodehelper_enable);
2492  
2493  static void helper_lock(void)
2494  {
2495 diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
2496 index 23bd4da..7638270 100644
2497 --- a/kernel/power/Kconfig
2498 +++ b/kernel/power/Kconfig
2499 @@ -38,6 +38,13 @@ config CAN_PM_TRACE
2500         def_bool y
2501         depends on PM_DEBUG && PM_SLEEP && EXPERIMENTAL
2502  
2503 +config FS_FREEZER_DEBUG
2504 +       bool "Filesystem freezer debugging"
2505 +       depends on PM_DEBUG
2506 +       default n
2507 +       ---help---
2508 +       This option enables debugging of the filesystem freezing code.
2509 +
2510  config PM_TRACE
2511         bool
2512         help
2513 @@ -179,6 +186,237 @@ config PM_STD_PARTITION
2514           suspended image to. It will simply pick the first available swap 
2515           device.
2516  
2517 +menuconfig TOI_CORE
2518 +       tristate "Enhanced Hibernation (TuxOnIce)"
2519 +       depends on HIBERNATION
2520 +       default y
2521 +       ---help---
2522 +         TuxOnIce is the 'new and improved' suspend support.
2523 +
2524 +         See the TuxOnIce home page (tuxonice.net)
2525 +         for FAQs, HOWTOs and other documentation.
2526 +
2527 +       comment "Image Storage (you need at least one allocator)"
2528 +               depends on TOI_CORE
2529 +
2530 +       config TOI_FILE
2531 +               tristate "File Allocator"
2532 +               depends on TOI_CORE
2533 +               default y
2534 +               ---help---
2535 +                 This option enables support for storing an image in a
2536 +                 simple file. This should be possible, but we're still
2537 +                 testing it.
2538 +
2539 +       config TOI_SWAP
2540 +               tristate "Swap Allocator"
2541 +               depends on TOI_CORE && SWAP
2542 +               default y
2543 +               ---help---
2544 +                 This option enables support for storing an image in your
2545 +                 swap space.
2546 +
2547 +       comment "General Options"
2548 +               depends on TOI_CORE
2549 +
2550 +       config TOI_CRYPTO
2551 +               tristate "Compression support"
2552 +               depends on TOI_CORE && CRYPTO
2553 +               default y
2554 +               ---help---
2555 +                 This option adds support for using cryptoapi compression
2556 +                 algorithms. Compression is particularly useful as it can
2557 +                 more than double your suspend and resume speed (depending
2558 +                 upon how well your image compresses).
2559 +
2560 +                 You probably want this, so say Y here.
2561 +
2562 +       comment "No compression support available without Cryptoapi support."
2563 +               depends on TOI_CORE && !CRYPTO
2564 +
2565 +       config TOI_USERUI
2566 +               tristate "Userspace User Interface support"
2567 +               depends on TOI_CORE && NET && (VT || SERIAL_CONSOLE)
2568 +               default y
2569 +               ---help---
2570 +                 This option enabled support for a userspace based user interface
2571 +                 to TuxOnIce, which allows you to have a nice display while suspending
2572 +                 and resuming, and also enables features such as pressing escape to
2573 +                 cancel a cycle or interactive debugging.
2574 +
2575 +       config TOI_USERUI_DEFAULT_PATH
2576 +               string "Default userui program location"
2577 +               default "/usr/local/sbin/tuxoniceui_text"
2578 +               depends on TOI_USERUI
2579 +               ---help---
2580 +                 This entry allows you to specify a default path to the userui binary.
2581 +
2582 +       config TOI_KEEP_IMAGE
2583 +               bool "Allow Keep Image Mode"
2584 +               depends on TOI_CORE
2585 +               ---help---
2586 +                 This option allows you to keep and image and reuse it. It is intended
2587 +                 __ONLY__ for use with systems where all filesystems are mounted read-
2588 +                 only (kiosks, for example). To use it, compile this option in and boot
2589 +                 normally. Set the KEEP_IMAGE flag in /sys/power/tuxonice and suspend.
2590 +                 When you resume, the image will not be removed. You will be unable to turn
2591 +                 off swap partitions (assuming you are using the swap allocator), but future
2592 +                 suspends simply do a power-down. The image can be updated using the
2593 +                 kernel command line parameter suspend_act= to turn off the keep image
2594 +                 bit. Keep image mode is a little less user friendly on purpose - it
2595 +                 should not be used without thought!
2596 +
2597 +       config TOI_REPLACE_SWSUSP
2598 +               bool "Replace swsusp by default"
2599 +               default y
2600 +               depends on TOI_CORE
2601 +               ---help---
2602 +                 TuxOnIce can replace swsusp. This option makes that the default state,
2603 +                 requiring you to echo 0 > /sys/power/tuxonice/replace_swsusp if you want
2604 +                 to use the vanilla kernel functionality. Note that your initrd/ramfs will
2605 +                 need to do this before trying to resume, too.
2606 +                 With overriding swsusp enabled, echoing disk  to /sys/power/state will
2607 +                 start a TuxOnIce cycle. If resume= doesn't specify an allocator and both
2608 +                 the swap and file allocators are compiled in, the swap allocator will be
2609 +                 used by default.
2610 +
2611 +       config TOI_IGNORE_LATE_INITCALL
2612 +               bool "Wait for initrd/ramfs to run, by default"
2613 +               default n
2614 +               depends on TOI_CORE
2615 +               ---help---
2616 +                 When booting, TuxOnIce can check for an image and start to resume prior
2617 +                 to any initrd/ramfs running (via a late initcall).
2618 +
2619 +                 If you don't have an initrd/ramfs, this is what you want to happen -
2620 +                 otherwise you won't be able to safely resume. You should set this option
2621 +                 to 'No'.
2622 +
2623 +                 If, however, you want your initrd/ramfs to run anyway before resuming,
2624 +                 you need to tell TuxOnIce to ignore that earlier opportunity to resume.
2625 +                 This can be done either by using this compile time option, or by
2626 +                 overriding this option with the boot-time parameter toi_initramfs_resume_only=1.
2627 +
2628 +                 Note that if TuxOnIce can't resume at the earlier opportunity, the
2629 +                 value of this option won't matter - the initramfs/initrd (if any) will
2630 +                 run anyway.
2631 +
2632 +       menuconfig TOI_CLUSTER
2633 +               tristate "Cluster support"
2634 +               default n
2635 +               depends on TOI_CORE && NET && BROKEN
2636 +               ---help---
2637 +                 Support for linking multiple machines in a cluster so that they suspend
2638 +                 and resume together.
2639 +
2640 +       config TOI_DEFAULT_CLUSTER_INTERFACE
2641 +               string "Default cluster interface"
2642 +               depends on TOI_CLUSTER
2643 +               ---help---
2644 +                 The default interface on which to communicate with other nodes in
2645 +                 the cluster.
2646 +
2647 +                 If no value is set here, cluster support will be disabled by default.
2648 +
2649 +       config TOI_DEFAULT_CLUSTER_KEY
2650 +               string "Default cluster key"
2651 +               default "Default"
2652 +               depends on TOI_CLUSTER
2653 +               ---help---
2654 +                 The default key used by this node. All nodes in the same cluster
2655 +                 have the same key. Multiple clusters may coexist on the same lan
2656 +                 by using different values for this key.
2657 +
2658 +       config TOI_CLUSTER_IMAGE_TIMEOUT
2659 +               int "Timeout when checking for image"
2660 +               default 15
2661 +               depends on TOI_CLUSTER
2662 +               ---help---
2663 +                 Timeout (seconds) before continuing to boot when waiting to see
2664 +                 whether other nodes might have an image. Set to -1 to wait
2665 +                 indefinitely. In WAIT_UNTIL_NODES is non zero, we might continue
2666 +                 booting sooner than this timeout.
2667 +
2668 +       config TOI_CLUSTER_WAIT_UNTIL_NODES
2669 +               int "Nodes without image before continuing"
2670 +               default 0
2671 +               depends on TOI_CLUSTER
2672 +               ---help---
2673 +                 When booting and no image is found, we wait to see if other nodes
2674 +                 have an image before continuing to boot. This value lets us
2675 +                 continue after seeing a certain number of nodes without an image,
2676 +                 instead of continuing to wait for the timeout. Set to 0 to only
2677 +                 use the timeout.
2678 +
2679 +       config TOI_DEFAULT_CLUSTER_PRE_HIBERNATE
2680 +               string "Default pre-hibernate script"
2681 +               depends on TOI_CLUSTER
2682 +               ---help---
2683 +                 The default script to be called when starting to hibernate.
2684 +
2685 +       config TOI_DEFAULT_CLUSTER_POST_HIBERNATE
2686 +               string "Default post-hibernate script"
2687 +               depends on TOI_CLUSTER
2688 +               ---help---
2689 +                 The default script to be called after resuming from hibernation.
2690 +
2691 +       config TOI_DEFAULT_WAIT
2692 +               int "Default waiting time for emergency boot messages"
2693 +               default "25"
2694 +               range -1 32768
2695 +               depends on TOI_CORE
2696 +               help
2697 +                 TuxOnIce can display warnings very early in the process of resuming,
2698 +                 if (for example) it appears that you have booted a kernel that doesn't
2699 +                 match an image on disk. It can then give you the opportunity to either
2700 +                 continue booting that kernel, or reboot the machine. This option can be
2701 +                 used to control how long to wait in such circumstances. -1 means wait
2702 +                 forever. 0 means don't wait at all (do the default action, which will
2703 +                 generally be to continue booting and remove the image). Values of 1 or
2704 +                 more indicate a number of seconds (up to 255) to wait before doing the
2705 +                 default.
2706 +
2707 +       config  TOI_DEFAULT_EXTRA_PAGES_ALLOWANCE
2708 +               int "Default extra pages allowance"
2709 +               default "2000"
2710 +               range 500 32768
2711 +               depends on TOI_CORE
2712 +               help
2713 +                 This value controls the default for the allowance TuxOnIce makes for
2714 +                 drivers to allocate extra memory during the atomic copy. The default
2715 +                 value of 2000 will be okay in most cases. If you are using
2716 +                 DRI, the easiest way to find what value to use is to try to hibernate
2717 +                 and look at how many pages were actually needed in the sysfs entry
2718 +                 /sys/power/tuxonice/debug_info (first number on the last line), adding
2719 +                 a little extra because the value is not always the same.
2720 +
2721 +       config TOI_CHECKSUM
2722 +               bool "Checksum pageset2"
2723 +               default n
2724 +               depends on TOI_CORE
2725 +               select CRYPTO
2726 +               select CRYPTO_ALGAPI
2727 +               select CRYPTO_MD4
2728 +               ---help---
2729 +                 Adds support for checksumming pageset2 pages, to ensure you really get an
2730 +                 atomic copy. Since some filesystems (XFS especially) change metadata even
2731 +                 when there's no other activity, we need this to check for pages that have
2732 +                 been changed while we were saving the page cache. If your debugging output
2733 +                 always says no pages were resaved, you may be able to safely disable this
2734 +                 option.
2735 +
2736 +config TOI
2737 +       bool
2738 +       depends on TOI_CORE!=n
2739 +       default y
2740 +
2741 +config TOI_EXPORTS
2742 +       bool
2743 +       depends on TOI_SWAP=m || TOI_FILE=m || \
2744 +               TOI_CRYPTO=m || TOI_CLUSTER=m || \
2745 +               TOI_USERUI=m || TOI_CORE=m
2746 +       default y
2747 +
2748  config APM_EMULATION
2749         tristate "Advanced Power Management Emulation"
2750         depends on PM && SYS_SUPPORTS_APM_EMULATION
2751 diff --git a/kernel/power/Makefile b/kernel/power/Makefile
2752 index 720ea4f..e797c0d 100644
2753 --- a/kernel/power/Makefile
2754 +++ b/kernel/power/Makefile
2755 @@ -3,6 +3,34 @@ ifeq ($(CONFIG_PM_DEBUG),y)
2756  EXTRA_CFLAGS   +=      -DDEBUG
2757  endif
2758  
2759 +tuxonice_core-objs := tuxonice_modules.o tuxonice_sysfs.o tuxonice_highlevel.o \
2760 +               tuxonice_io.o tuxonice_pagedir.o tuxonice_prepare_image.o \
2761 +               tuxonice_extent.o tuxonice_pageflags.o tuxonice_ui.o \
2762 +               tuxonice_power_off.o tuxonice_atomic_copy.o
2763 +
2764 +obj-$(CONFIG_TOI)              += tuxonice_builtin.o
2765 +
2766 +ifdef CONFIG_PM_DEBUG
2767 +tuxonice_core-objs             += tuxonice_alloc.o
2768 +endif
2769 +
2770 +ifdef CONFIG_TOI_CHECKSUM
2771 +tuxonice_core-objs             += tuxonice_checksum.o
2772 +endif
2773 +
2774 +ifdef CONFIG_NET
2775 +tuxonice_core-objs             += tuxonice_storage.o tuxonice_netlink.o
2776 +endif
2777 +
2778 +obj-$(CONFIG_TOI_CORE)         += tuxonice_core.o
2779 +obj-$(CONFIG_TOI_CRYPTO)       += tuxonice_compress.o
2780 +
2781 +obj-$(CONFIG_TOI_SWAP)         += tuxonice_block_io.o tuxonice_swap.o
2782 +obj-$(CONFIG_TOI_FILE)         += tuxonice_block_io.o tuxonice_file.o
2783 +obj-$(CONFIG_TOI_CLUSTER)      += tuxonice_cluster.o
2784 +
2785 +obj-$(CONFIG_TOI_USERUI)       += tuxonice_userui.o
2786 +
2787  obj-$(CONFIG_PM)               += main.o
2788  obj-$(CONFIG_PM_SLEEP)         += console.o
2789  obj-$(CONFIG_FREEZER)          += process.o
2790 diff --git a/kernel/power/disk.c b/kernel/power/disk.c
2791 index 5cb080e..4f82ed5 100644
2792 --- a/kernel/power/disk.c
2793 +++ b/kernel/power/disk.c
2794 @@ -25,11 +25,12 @@
2795  #include <scsi/scsi_scan.h>
2796  #include <asm/suspend.h>
2797  
2798 -#include "power.h"
2799 -
2800 +#include "tuxonice.h"
2801  
2802  static int noresume = 0;
2803 -static char resume_file[256] = CONFIG_PM_STD_PARTITION;
2804 +char resume_file[256] = CONFIG_PM_STD_PARTITION;
2805 +EXPORT_SYMBOL_GPL(resume_file);
2806 +
2807  dev_t swsusp_resume_device;
2808  sector_t swsusp_resume_block;
2809  
2810 @@ -115,55 +116,60 @@ static int hibernation_test(int level) { return 0; }
2811   *     hibernation
2812   */
2813  
2814 -static int platform_begin(int platform_mode)
2815 +int platform_begin(int platform_mode)
2816  {
2817         return (platform_mode && hibernation_ops) ?
2818                 hibernation_ops->begin() : 0;
2819  }
2820 +EXPORT_SYMBOL_GPL(platform_begin);
2821  
2822  /**
2823   *     platform_end - tell the platform driver that we've entered the
2824   *     working state
2825   */
2826  
2827 -static void platform_end(int platform_mode)
2828 +void platform_end(int platform_mode)
2829  {
2830         if (platform_mode && hibernation_ops)
2831                 hibernation_ops->end();
2832  }
2833 +EXPORT_SYMBOL_GPL(platform_end);
2834  
2835  /**
2836   *     platform_pre_snapshot - prepare the machine for hibernation using the
2837   *     platform driver if so configured and return an error code if it fails
2838   */
2839  
2840 -static int platform_pre_snapshot(int platform_mode)
2841 +int platform_pre_snapshot(int platform_mode)
2842  {
2843         return (platform_mode && hibernation_ops) ?
2844                 hibernation_ops->pre_snapshot() : 0;
2845  }
2846 +EXPORT_SYMBOL_GPL(platform_pre_snapshot);
2847  
2848  /**
2849   *     platform_leave - prepare the machine for switching to the normal mode
2850   *     of operation using the platform driver (called with interrupts disabled)
2851   */
2852  
2853 -static void platform_leave(int platform_mode)
2854 +void platform_leave(int platform_mode)
2855  {
2856         if (platform_mode && hibernation_ops)
2857                 hibernation_ops->leave();
2858  }
2859 +EXPORT_SYMBOL_GPL(platform_leave);
2860  
2861  /**
2862   *     platform_finish - switch the machine to the normal mode of operation
2863   *     using the platform driver (must be called after platform_prepare())
2864   */
2865  
2866 -static void platform_finish(int platform_mode)
2867 +void platform_finish(int platform_mode)
2868  {
2869         if (platform_mode && hibernation_ops)
2870                 hibernation_ops->finish();
2871  }
2872 +EXPORT_SYMBOL_GPL(platform_finish);
2873  
2874  /**
2875   *     platform_pre_restore - prepare the platform for the restoration from a
2876 @@ -171,11 +177,12 @@ static void platform_finish(int platform_mode)
2877   *     called, platform_restore_cleanup() must be called.
2878   */
2879  
2880 -static int platform_pre_restore(int platform_mode)
2881 +int platform_pre_restore(int platform_mode)
2882  {
2883         return (platform_mode && hibernation_ops) ?
2884                 hibernation_ops->pre_restore() : 0;
2885  }
2886 +EXPORT_SYMBOL_GPL(platform_pre_restore);
2887  
2888  /**
2889   *     platform_restore_cleanup - switch the platform to the normal mode of
2890 @@ -184,22 +191,24 @@ static int platform_pre_restore(int platform_mode)
2891   *     regardless of the result of platform_pre_restore().
2892   */
2893  
2894 -static void platform_restore_cleanup(int platform_mode)
2895 +void platform_restore_cleanup(int platform_mode)
2896  {
2897         if (platform_mode && hibernation_ops)
2898                 hibernation_ops->restore_cleanup();
2899  }
2900 +EXPORT_SYMBOL_GPL(platform_restore_cleanup);
2901  
2902  /**
2903   *     platform_recover - recover the platform from a failure to suspend
2904   *     devices.
2905   */
2906  
2907 -static void platform_recover(int platform_mode)
2908 +void platform_recover(int platform_mode)
2909  {
2910         if (platform_mode && hibernation_ops && hibernation_ops->recover)
2911                 hibernation_ops->recover();
2912  }
2913 +EXPORT_SYMBOL_GPL(platform_recover);
2914  
2915  /**
2916   *     create_image - freeze devices that need to be frozen with interrupts
2917 @@ -423,6 +432,7 @@ int hibernation_restore(int platform_mode)
2918         pm_restore_console();
2919         return error;
2920  }
2921 +EXPORT_SYMBOL_GPL(hibernation_platform_enter);
2922  
2923  /**
2924   *     hibernation_platform_enter - enter the hibernation state using the
2925 @@ -542,6 +552,9 @@ int hibernate(void)
2926  {
2927         int error;
2928  
2929 +       if (test_action_state(TOI_REPLACE_SWSUSP))
2930 +               return try_tuxonice_hibernate();
2931 +
2932         mutex_lock(&pm_mutex);
2933         /* The snapshot device should not be opened while we're running */
2934         if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
2935 @@ -619,10 +632,17 @@ int hibernate(void)
2936   *
2937   */
2938  
2939 -static int software_resume(void)
2940 +int software_resume(void)
2941  {
2942         int error;
2943         unsigned int flags;
2944 +       resume_attempted = 1;
2945 +
2946 +       /*
2947 +        * We can't know (until an image header - if any - is loaded), whether
2948 +        * we did override swsusp. We therefore ensure that both are tried.
2949 +        */
2950 +       try_tuxonice_resume();
2951  
2952         /*
2953          * If the user said "noresume".. bail out early.
2954 @@ -947,6 +967,7 @@ static int __init resume_offset_setup(char *str)
2955  static int __init noresume_setup(char *str)
2956  {
2957         noresume = 1;
2958 +       set_toi_state(TOI_NORESUME_SPECIFIED);
2959         return 1;
2960  }
2961  
2962 diff --git a/kernel/power/main.c b/kernel/power/main.c
2963 index 8680282..ea50274 100644
2964 --- a/kernel/power/main.c
2965 +++ b/kernel/power/main.c
2966 @@ -26,6 +26,7 @@
2967  #include "power.h"
2968  
2969  DEFINE_MUTEX(pm_mutex);
2970 +EXPORT_SYMBOL_GPL(pm_mutex);
2971  
2972  unsigned int pm_flags;
2973  EXPORT_SYMBOL(pm_flags);
2974 @@ -34,7 +35,8 @@ EXPORT_SYMBOL(pm_flags);
2975  
2976  /* Routines for PM-transition notifications */
2977  
2978 -static BLOCKING_NOTIFIER_HEAD(pm_chain_head);
2979 +BLOCKING_NOTIFIER_HEAD(pm_chain_head);
2980 +EXPORT_SYMBOL_GPL(pm_chain_head);
2981  
2982  int register_pm_notifier(struct notifier_block *nb)
2983  {
2984 @@ -204,6 +206,7 @@ void suspend_set_ops(struct platform_suspend_ops *ops)
2985         suspend_ops = ops;
2986         mutex_unlock(&pm_mutex);
2987  }
2988 +EXPORT_SYMBOL_GPL(pm_notifier_call_chain);
2989  
2990  /**
2991   * suspend_valid_only_mem - generic memory-only valid callback
2992 @@ -465,6 +468,7 @@ static int enter_state(suspend_state_t state)
2993         mutex_unlock(&pm_mutex);
2994         return error;
2995  }
2996 +EXPORT_SYMBOL_GPL(suspend_devices_and_enter);
2997  
2998  
2999  /**
3000 @@ -487,6 +491,7 @@ EXPORT_SYMBOL(pm_suspend);
3001  #endif /* CONFIG_SUSPEND */
3002  
3003  struct kobject *power_kobj;
3004 +EXPORT_SYMBOL_GPL(power_kobj);
3005  
3006  /**
3007   *     state - control system power state.
3008 diff --git a/kernel/power/power.h b/kernel/power/power.h
3009 index 46b5ec7..4cc59d5 100644
3010 --- a/kernel/power/power.h
3011 +++ b/kernel/power/power.h
3012 @@ -31,8 +31,12 @@ static inline char *check_image_kernel(struct swsusp_info *info)
3013         return arch_hibernation_header_restore(info) ?
3014                         "architecture specific data" : NULL;
3015  }
3016 +#else
3017 +extern char *check_image_kernel(struct swsusp_info *info);
3018  #endif /* CONFIG_ARCH_HIBERNATION_HEADER */
3019 +extern int init_header(struct swsusp_info *info);
3020  
3021 +extern char resume_file[256];
3022  /*
3023   * Keep some memory free so that I/O operations can succeed without paging
3024   * [Might this be more than 4 MB?]
3025 @@ -49,6 +53,7 @@ static inline char *check_image_kernel(struct swsusp_info *info)
3026  extern int hibernation_snapshot(int platform_mode);
3027  extern int hibernation_restore(int platform_mode);
3028  extern int hibernation_platform_enter(void);
3029 +extern void platform_recover(int platform_mode);
3030  #endif
3031  
3032  extern int pfn_is_nosave(unsigned long);
3033 @@ -63,6 +68,8 @@ static struct kobj_attribute _name##_attr = { \
3034         .store  = _name##_store,                \
3035  }
3036  
3037 +extern struct pbe *restore_pblist;
3038 +
3039  /* Preferred image size in bytes (default 500 MB) */
3040  extern unsigned long image_size;
3041  extern int in_suspend;
3042 @@ -223,3 +230,86 @@ static inline void suspend_thaw_processes(void)
3043  {
3044  }
3045  #endif
3046 +
3047 +extern struct page *saveable_page(struct zone *z, unsigned long p);
3048 +#ifdef CONFIG_HIGHMEM
3049 +extern struct page *saveable_highmem_page(struct zone *z, unsigned long p);
3050 +#else
3051 +static
3052 +inline struct page *saveable_highmem_page(struct zone *z, unsigned long p)
3053 +{
3054 +       return NULL;
3055 +}
3056 +#endif
3057 +
3058 +#define PBES_PER_PAGE (PAGE_SIZE / sizeof(struct pbe))
3059 +extern struct list_head nosave_regions;
3060 +
3061 +/**
3062 + *     This structure represents a range of page frames the contents of which
3063 + *     should not be saved during the suspend.
3064 + */
3065 +
3066 +struct nosave_region {
3067 +       struct list_head list;
3068 +       unsigned long start_pfn;
3069 +       unsigned long end_pfn;
3070 +};
3071 +
3072 +#ifndef PHYS_PFN_OFFSET
3073 +#define PHYS_PFN_OFFSET 0
3074 +#endif
3075 +
3076 +#define ZONE_START(thiszone) ((thiszone)->zone_start_pfn - PHYS_PFN_OFFSET)
3077 +
3078 +#define BM_END_OF_MAP  (~0UL)
3079 +
3080 +#define BM_BITS_PER_BLOCK      (PAGE_SIZE << 3)
3081 +
3082 +struct bm_block {
3083 +       struct list_head hook;          /* hook into a list of bitmap blocks */
3084 +       unsigned long start_pfn;        /* pfn represented by the first bit */
3085 +       unsigned long end_pfn;  /* pfn represented by the last bit plus 1 */
3086 +       unsigned long *data;    /* bitmap representing pages */
3087 +};
3088 +
3089 +/* struct bm_position is used for browsing memory bitmaps */
3090 +
3091 +struct bm_position {
3092 +       struct bm_block *block;
3093 +       int bit;
3094 +};
3095 +
3096 +struct memory_bitmap {
3097 +       struct list_head blocks;        /* list of bitmap blocks */
3098 +       struct linked_page *p_list;     /* list of pages used to store zone
3099 +                                        * bitmap objects and bitmap block
3100 +                                        * objects
3101 +                                        */
3102 +       struct bm_position cur;         /* most recently used bit position */
3103 +       struct bm_position iter;        /* most recently used bit position
3104 +                                        * when iterating over a bitmap.
3105 +                                        */
3106 +};
3107 +
3108 +extern int memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask,
3109 +               int safe_needed);
3110 +extern void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
3111 +extern void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn);
3112 +extern void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn);
3113 +extern int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn);
3114 +extern unsigned long memory_bm_next_pfn(struct memory_bitmap *bm);
3115 +extern void memory_bm_position_reset(struct memory_bitmap *bm);
3116 +extern void memory_bm_clear(struct memory_bitmap *bm);
3117 +extern void memory_bm_copy(struct memory_bitmap *source,
3118 +               struct memory_bitmap *dest);
3119 +extern void memory_bm_dup(struct memory_bitmap *source,
3120 +               struct memory_bitmap *dest);
3121 +
3122 +#ifdef CONFIG_TOI
3123 +struct toi_module_ops;
3124 +extern int memory_bm_read(struct memory_bitmap *bm, int (*rw_chunk)
3125 +       (int rw, struct toi_module_ops *owner, char *buffer, int buffer_size));
3126 +extern int memory_bm_write(struct memory_bitmap *bm, int (*rw_chunk)
3127 +       (int rw, struct toi_module_ops *owner, char *buffer, int buffer_size));
3128 +#endif
3129 diff --git a/kernel/power/process.c b/kernel/power/process.c
3130 index ca63401..10ba50f 100644
3131 --- a/kernel/power/process.c
3132 +++ b/kernel/power/process.c
3133 @@ -13,6 +13,10 @@
3134  #include <linux/module.h>
3135  #include <linux/syscalls.h>
3136  #include <linux/freezer.h>
3137 +#include <linux/buffer_head.h>
3138 +
3139 +int freezer_state;
3140 +EXPORT_SYMBOL_GPL(freezer_state);
3141  
3142  /* 
3143   * Timeout for stopping processes
3144 @@ -86,7 +90,8 @@ static int try_to_freeze_tasks(bool sig_only)
3145                 do_each_thread(g, p) {
3146                         task_lock(p);
3147                         if (freezing(p) && !freezer_should_skip(p))
3148 -                               printk(KERN_ERR " %s\n", p->comm);
3149 +                               printk(KERN_ERR " %s (%d) failed to freeze.\n",
3150 +                                               p->comm, p->pid);
3151                         cancel_freezing(p);
3152                         task_unlock(p);
3153                 } while_each_thread(g, p);
3154 @@ -106,22 +111,31 @@ int freeze_processes(void)
3155  {
3156         int error;
3157  
3158 -       printk("Freezing user space processes ... ");
3159 +       printk(KERN_INFO "Stopping fuse filesystems.\n");
3160 +       freeze_filesystems(FS_FREEZER_FUSE);
3161 +       freezer_state = FREEZER_FILESYSTEMS_FROZEN;
3162 +       printk(KERN_INFO "Freezing user space processes ... ");
3163         error = try_to_freeze_tasks(true);
3164         if (error)
3165                 goto Exit;
3166         printk("done.\n");
3167  
3168 -       printk("Freezing remaining freezable tasks ... ");
3169 +       sys_sync();
3170 +       printk(KERN_INFO "Stopping normal filesystems.\n");
3171 +       freeze_filesystems(FS_FREEZER_NORMAL);
3172 +       freezer_state = FREEZER_USERSPACE_FROZEN;
3173 +       printk(KERN_INFO "Freezing remaining freezable tasks ... ");
3174         error = try_to_freeze_tasks(false);
3175         if (error)
3176                 goto Exit;
3177         printk("done.");
3178 +       freezer_state = FREEZER_FULLY_ON;
3179   Exit:
3180         BUG_ON(in_atomic());
3181         printk("\n");
3182         return error;
3183  }
3184 +EXPORT_SYMBOL_GPL(freeze_processes);
3185  
3186  static void thaw_tasks(bool nosig_only)
3187  {
3188 @@ -145,10 +159,35 @@ static void thaw_tasks(bool nosig_only)
3189  
3190  void thaw_processes(void)
3191  {
3192 -       printk("Restarting tasks ... ");
3193 -       thaw_tasks(true);
3194 +       int old_state = freezer_state;
3195 +
3196 +       if (old_state == FREEZER_OFF)
3197 +               return;
3198 +
3199 +       freezer_state = FREEZER_OFF;
3200 +
3201 +       printk(KERN_INFO "Restarting all filesystems ...\n");
3202 +       thaw_filesystems(FS_FREEZER_ALL);
3203 +
3204 +       printk(KERN_INFO "Restarting tasks ... ");
3205 +       if (old_state == FREEZER_FULLY_ON)
3206 +               thaw_tasks(true);
3207         thaw_tasks(false);
3208         schedule();
3209         printk("done.\n");
3210  }
3211 +EXPORT_SYMBOL_GPL(thaw_processes);
3212  
3213 +void thaw_kernel_threads(void)
3214 +{
3215 +       freezer_state = FREEZER_USERSPACE_FROZEN;
3216 +       printk(KERN_INFO "Restarting normal filesystems.\n");
3217 +       thaw_filesystems(FS_FREEZER_NORMAL);
3218 +       thaw_tasks(true);
3219 +}
3220 +
3221 +/*
3222 + * It's ugly putting this EXPORT down here, but it's necessary so that it
3223 + * doesn't matter whether the fs-freezing patch is applied or not.
3224 + */
3225 +EXPORT_SYMBOL_GPL(thaw_kernel_threads);
3226 diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
3227 index 33e2e4a..a78b7ef 100644
3228 --- a/kernel/power/snapshot.c
3229 +++ b/kernel/power/snapshot.c
3230 @@ -34,6 +34,8 @@
3231  #include <asm/io.h>
3232  
3233  #include "power.h"
3234 +#include "tuxonice_builtin.h"
3235 +#include "tuxonice_pagedir.h"
3236  
3237  static int swsusp_page_is_free(struct page *);
3238  static void swsusp_set_page_forbidden(struct page *);
3239 @@ -45,6 +47,10 @@ static void swsusp_unset_page_forbidden(struct page *);
3240   * directly to their "original" page frames.
3241   */
3242  struct pbe *restore_pblist;
3243 +EXPORT_SYMBOL_GPL(restore_pblist);
3244 +
3245 +int resume_attempted;
3246 +EXPORT_SYMBOL_GPL(resume_attempted);
3247  
3248  /* Pointer to an auxiliary buffer (1 page) */
3249  static void *buffer;
3250 @@ -87,6 +93,9 @@ static void *get_image_page(gfp_t gfp_mask, int safe_needed)
3251  
3252  unsigned long get_safe_page(gfp_t gfp_mask)
3253  {
3254 +       if (toi_running)
3255 +               return toi_get_nonconflicting_page();
3256 +
3257         return (unsigned long)get_image_page(gfp_mask, PG_SAFE);
3258  }
3259  
3260 @@ -223,47 +232,22 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
3261   *     the represented memory area.
3262   */
3263  
3264 -#define BM_END_OF_MAP  (~0UL)
3265 -
3266 -#define BM_BITS_PER_BLOCK      (PAGE_SIZE << 3)
3267 -
3268 -struct bm_block {
3269 -       struct list_head hook;  /* hook into a list of bitmap blocks */
3270 -       unsigned long start_pfn;        /* pfn represented by the first bit */
3271 -       unsigned long end_pfn;  /* pfn represented by the last bit plus 1 */
3272 -       unsigned long *data;    /* bitmap representing pages */
3273 -};
3274 -
3275  static inline unsigned long bm_block_bits(struct bm_block *bb)
3276  {
3277         return bb->end_pfn - bb->start_pfn;
3278  }
3279  
3280 -/* strcut bm_position is used for browsing memory bitmaps */
3281 -
3282 -struct bm_position {
3283 -       struct bm_block *block;
3284 -       int bit;
3285 -};
3286 -
3287 -struct memory_bitmap {
3288 -       struct list_head blocks;        /* list of bitmap blocks */
3289 -       struct linked_page *p_list;     /* list of pages used to store zone
3290 -                                        * bitmap objects and bitmap block
3291 -                                        * objects
3292 -                                        */
3293 -       struct bm_position cur; /* most recently used bit position */
3294 -};
3295 -
3296  /* Functions that operate on memory bitmaps */
3297  
3298 -static void memory_bm_position_reset(struct memory_bitmap *bm)
3299 +void memory_bm_position_reset(struct memory_bitmap *bm)
3300  {
3301         bm->cur.block = list_entry(bm->blocks.next, struct bm_block, hook);
3302         bm->cur.bit = 0;
3303 -}
3304  
3305 -static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
3306 +       bm->iter.block = list_entry(bm->blocks.next, struct bm_block, hook);
3307 +       bm->iter.bit = 0;
3308 +}
3309 +EXPORT_SYMBOL_GPL(memory_bm_position_reset);
3310  
3311  /**
3312   *     create_bm_block_list - create a list of block bitmap objects
3313 @@ -371,7 +355,7 @@ static int create_mem_extents(struct list_head *list, gfp_t gfp_mask)
3314  /**
3315    *    memory_bm_create - allocate memory for a memory bitmap
3316    */
3317 -static int
3318 +int
3319  memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
3320  {
3321         struct chain_allocator ca;
3322 @@ -427,11 +411,12 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
3323         memory_bm_free(bm, PG_UNSAFE_CLEAR);
3324         goto Exit;
3325  }
3326 +EXPORT_SYMBOL_GPL(memory_bm_create);
3327  
3328  /**
3329    *    memory_bm_free - free memory occupied by the memory bitmap @bm
3330    */
3331 -static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
3332 +void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
3333  {
3334         struct bm_block *bb;
3335  
3336 @@ -443,6 +428,7 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
3337  
3338         INIT_LIST_HEAD(&bm->blocks);
3339  }
3340 +EXPORT_SYMBOL_GPL(memory_bm_free);
3341  
3342  /**
3343   *     memory_bm_find_bit - find the bit in the bitmap @bm that corresponds
3344 @@ -481,7 +467,7 @@ static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
3345         return 0;
3346  }
3347  
3348 -static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
3349 +void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
3350  {
3351         void *addr;
3352         unsigned int bit;
3353 @@ -491,6 +477,7 @@ static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
3354         BUG_ON(error);
3355         set_bit(bit, addr);
3356  }
3357 +EXPORT_SYMBOL_GPL(memory_bm_set_bit);
3358  
3359  static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn)
3360  {
3361 @@ -504,7 +491,7 @@ static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn)
3362         return error;
3363  }
3364  
3365 -static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn)
3366 +void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn)
3367  {
3368         void *addr;
3369         unsigned int bit;
3370 @@ -514,8 +501,9 @@ static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn)
3371         BUG_ON(error);
3372         clear_bit(bit, addr);
3373  }
3374 +EXPORT_SYMBOL_GPL(memory_bm_clear_bit);
3375  
3376 -static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
3377 +int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
3378  {
3379         void *addr;
3380         unsigned int bit;
3381 @@ -525,6 +513,7 @@ static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
3382         BUG_ON(error);
3383         return test_bit(bit, addr);
3384  }
3385 +EXPORT_SYMBOL_GPL(memory_bm_test_bit);
3386  
3387  static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn)
3388  {
3389 @@ -543,43 +532,178 @@ static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn)
3390   *     this function.
3391   */
3392  
3393 -static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
3394 +unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
3395  {
3396         struct bm_block *bb;
3397         int bit;
3398  
3399 -       bb = bm->cur.block;
3400 +       bb = bm->iter.block;
3401         do {
3402 -               bit = bm->cur.bit;
3403 +               bit = bm->iter.bit;
3404                 bit = find_next_bit(bb->data, bm_block_bits(bb), bit);
3405                 if (bit < bm_block_bits(bb))
3406                         goto Return_pfn;
3407  
3408                 bb = list_entry(bb->hook.next, struct bm_block, hook);
3409 -               bm->cur.block = bb;
3410 -               bm->cur.bit = 0;
3411 +               bm->iter.block = bb;
3412 +               bm->iter.bit = 0;
3413         } while (&bb->hook != &bm->blocks);
3414  
3415         memory_bm_position_reset(bm);
3416         return BM_END_OF_MAP;
3417  
3418   Return_pfn:
3419 -       bm->cur.bit = bit + 1;
3420 +       bm->iter.bit = bit + 1;
3421         return bb->start_pfn + bit;
3422  }
3423 +EXPORT_SYMBOL_GPL(memory_bm_next_pfn);
3424  
3425 -/**
3426 - *     This structure represents a range of page frames the contents of which
3427 - *     should not be saved during the suspend.
3428 - */
3429 +void memory_bm_clear(struct memory_bitmap *bm)
3430 +{
3431 +       unsigned long pfn;
3432  
3433 -struct nosave_region {
3434 -       struct list_head list;
3435 -       unsigned long start_pfn;
3436 -       unsigned long end_pfn;
3437 -};
3438 +       memory_bm_position_reset(bm);
3439 +       pfn = memory_bm_next_pfn(bm);
3440 +       while (pfn != BM_END_OF_MAP) {
3441 +               memory_bm_clear_bit(bm, pfn);
3442 +               pfn = memory_bm_next_pfn(bm);
3443 +       }
3444 +}
3445 +EXPORT_SYMBOL_GPL(memory_bm_clear);
3446 +
3447 +void memory_bm_copy(struct memory_bitmap *source, struct memory_bitmap *dest)
3448 +{
3449 +       unsigned long pfn;
3450 +
3451 +       memory_bm_position_reset(source);
3452 +       pfn = memory_bm_next_pfn(source);
3453 +       while (pfn != BM_END_OF_MAP) {
3454 +               memory_bm_set_bit(dest, pfn);
3455 +               pfn = memory_bm_next_pfn(source);
3456 +       }
3457 +}
3458 +EXPORT_SYMBOL_GPL(memory_bm_copy);
3459 +
3460 +void memory_bm_dup(struct memory_bitmap *source, struct memory_bitmap *dest)
3461 +{
3462 +       memory_bm_clear(dest);
3463 +       memory_bm_copy(source, dest);
3464 +}
3465 +EXPORT_SYMBOL_GPL(memory_bm_dup);
3466 +
3467 +#ifdef CONFIG_TOI
3468 +#define DEFINE_MEMORY_BITMAP(name) \
3469 +struct memory_bitmap *name; \
3470 +EXPORT_SYMBOL_GPL(name)
3471 +
3472 +DEFINE_MEMORY_BITMAP(pageset1_map);
3473 +DEFINE_MEMORY_BITMAP(pageset1_copy_map);
3474 +DEFINE_MEMORY_BITMAP(pageset2_map);
3475 +DEFINE_MEMORY_BITMAP(page_resave_map);
3476 +DEFINE_MEMORY_BITMAP(io_map);
3477 +DEFINE_MEMORY_BITMAP(nosave_map);
3478 +DEFINE_MEMORY_BITMAP(free_map);
3479 +
3480 +int memory_bm_write(struct memory_bitmap *bm, int (*rw_chunk)
3481 +       (int rw, struct toi_module_ops *owner, char *buffer, int buffer_size))
3482 +{
3483 +       int result = 0;
3484 +       unsigned int nr = 0;
3485 +       struct bm_block *bb;
3486 +
3487 +       if (!bm)
3488 +               return result;
3489  
3490 -static LIST_HEAD(nosave_regions);
3491 +       list_for_each_entry(bb, &bm->blocks, hook)
3492 +               nr++;
3493 +
3494 +       result = (*rw_chunk)(WRITE, NULL, (char *) &nr, sizeof(unsigned int));
3495 +       if (result)
3496 +               return result;
3497 +
3498 +       list_for_each_entry(bb, &bm->blocks, hook) {
3499 +               result = (*rw_chunk)(WRITE, NULL, (char *) &bb->start_pfn,
3500 +                               2 * sizeof(unsigned long));
3501 +               if (result)
3502 +                       return result;
3503 +
3504 +               result = (*rw_chunk)(WRITE, NULL, (char *) bb->data, PAGE_SIZE);
3505 +               if (result)
3506 +                       return result;
3507 +       }
3508 +
3509 +       return 0;
3510 +}
3511 +EXPORT_SYMBOL_GPL(memory_bm_write);
3512 +
3513 +int memory_bm_read(struct memory_bitmap *bm, int (*rw_chunk)
3514 +       (int rw, struct toi_module_ops *owner, char *buffer, int buffer_size))
3515 +{
3516 +       int result = 0;
3517 +       unsigned int nr, i;
3518 +       struct bm_block *bb;
3519 +
3520 +       if (!bm)
3521 +               return result;
3522 +
3523 +       result = memory_bm_create(bm, GFP_KERNEL, 0);
3524 +
3525 +       if (result)
3526 +               return result;
3527 +
3528 +       result = (*rw_chunk)(READ, NULL, (char *) &nr, sizeof(unsigned int));
3529 +       if (result)
3530 +               goto Free;
3531 +
3532 +       for (i = 0; i < nr; i++) {
3533 +               unsigned long pfn;
3534 +
3535 +               result = (*rw_chunk)(READ, NULL, (char *) &pfn,
3536 +                               sizeof(unsigned long));
3537 +               if (result)
3538 +                       goto Free;
3539 +
3540 +               list_for_each_entry(bb, &bm->blocks, hook)
3541 +                       if (bb->start_pfn == pfn)
3542 +                               break;
3543 +
3544 +               if (&bb->hook == &bm->blocks) {
3545 +                       printk(KERN_ERR
3546 +                               "TuxOnIce: Failed to load memory bitmap.\n");
3547 +                       result = -EINVAL;
3548 +                       goto Free;
3549 +               }
3550 +
3551 +               result = (*rw_chunk)(READ, NULL, (char *) &pfn,
3552 +                               sizeof(unsigned long));
3553 +               if (result)
3554 +                       goto Free;
3555 +
3556 +               if (pfn != bb->end_pfn) {
3557 +                       printk(KERN_ERR
3558 +                               "TuxOnIce: Failed to load memory bitmap. "
3559 +                               "End PFN doesn't match what was saved.\n");
3560 +                       result = -EINVAL;
3561 +                       goto Free;
3562 +               }
3563 +
3564 +               result = (*rw_chunk)(READ, NULL, (char *) bb->data, PAGE_SIZE);
3565 +
3566 +               if (result)
3567 +                       goto Free;
3568 +       }
3569 +
3570 +       return 0;
3571 +
3572 +Free:
3573 +       memory_bm_free(bm, PG_ANY);
3574 +       return result;
3575 +}
3576 +EXPORT_SYMBOL_GPL(memory_bm_read);
3577 +#endif
3578 +
3579 +LIST_HEAD(nosave_regions);
3580 +EXPORT_SYMBOL_GPL(nosave_regions);
3581  
3582  /**
3583   *     register_nosave_region - register a range of page frames the contents
3584 @@ -815,7 +939,7 @@ static unsigned int count_free_highmem_pages(void)
3585   *     We should save the page if it isn't Nosave or NosaveFree, or Reserved,
3586   *     and it isn't a part of a free chunk of pages.
3587   */
3588 -static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
3589 +struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
3590  {
3591         struct page *page;
3592  
3593 @@ -834,6 +958,7 @@ static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
3594  
3595         return page;
3596  }
3597 +EXPORT_SYMBOL_GPL(saveable_highmem_page);
3598  
3599  /**
3600   *     count_highmem_pages - compute the total number of saveable highmem
3601 @@ -859,11 +984,6 @@ unsigned int count_highmem_pages(void)
3602         }
3603         return n;
3604  }
3605 -#else
3606 -static inline void *saveable_highmem_page(struct zone *z, unsigned long p)
3607 -{
3608 -       return NULL;
3609 -}
3610  #endif /* CONFIG_HIGHMEM */
3611  
3612  /**
3613 @@ -874,7 +994,7 @@ static inline void *saveable_highmem_page(struct zone *z, unsigned long p)
3614   *     of pages statically defined as 'unsaveable', and it isn't a part of
3615   *     a free chunk of pages.
3616   */
3617 -static struct page *saveable_page(struct zone *zone, unsigned long pfn)
3618 +struct page *saveable_page(struct zone *zone, unsigned long pfn)
3619  {
3620         struct page *page;
3621  
3622 @@ -896,6 +1016,7 @@ static struct page *saveable_page(struct zone *zone, unsigned long pfn)
3623  
3624         return page;
3625  }
3626 +EXPORT_SYMBOL_GPL(saveable_page);
3627  
3628  /**
3629   *     count_data_pages - compute the total number of saveable non-highmem
3630 @@ -1210,6 +1331,9 @@ asmlinkage int swsusp_save(void)
3631  {
3632         unsigned int nr_pages, nr_highmem;
3633  
3634 +       if (toi_running)
3635 +               return toi_post_context_save();
3636 +
3637         printk(KERN_INFO "PM: Creating hibernation image: \n");
3638  
3639         drain_local_pages(NULL);
3640 @@ -1250,14 +1374,14 @@ asmlinkage int swsusp_save(void)
3641  }
3642  
3643  #ifndef CONFIG_ARCH_HIBERNATION_HEADER
3644 -static int init_header_complete(struct swsusp_info *info)
3645 +int init_header_complete(struct swsusp_info *info)
3646  {
3647         memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname));
3648         info->version_code = LINUX_VERSION_CODE;
3649         return 0;
3650  }
3651  
3652 -static char *check_image_kernel(struct swsusp_info *info)
3653 +char *check_image_kernel(struct swsusp_info *info)
3654  {
3655         if (info->version_code != LINUX_VERSION_CODE)
3656                 return "kernel version";
3657 @@ -1271,6 +1395,7 @@ static char *check_image_kernel(struct swsusp_info *info)
3658                 return "machine";
3659         return NULL;
3660  }
3661 +EXPORT_SYMBOL_GPL(check_image_kernel);
3662  #endif /* CONFIG_ARCH_HIBERNATION_HEADER */
3663  
3664  unsigned long snapshot_get_image_size(void)
3665 @@ -1278,7 +1403,7 @@ unsigned long snapshot_get_image_size(void)
3666         return nr_copy_pages + nr_meta_pages + 1;
3667  }
3668  
3669 -static int init_header(struct swsusp_info *info)
3670 +int init_header(struct swsusp_info *info)
3671  {
3672         memset(info, 0, sizeof(struct swsusp_info));
3673         info->num_physpages = num_physpages;
3674 @@ -1288,6 +1413,7 @@ static int init_header(struct swsusp_info *info)
3675         info->size <<= PAGE_SHIFT;
3676         return init_header_complete(info);
3677  }
3678 +EXPORT_SYMBOL_GPL(init_header);
3679  
3680  /**
3681   *     pack_pfns - pfns corresponding to the set bits found in the bitmap @bm
3682 diff --git a/kernel/power/tuxonice.h b/kernel/power/tuxonice.h
3683 new file mode 100644
3684 index 0000000..1d6349d
3685 --- /dev/null
3686 +++ b/kernel/power/tuxonice.h
3687 @@ -0,0 +1,213 @@
3688 +/*
3689 + * kernel/power/tuxonice.h
3690 + *
3691 + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net)
3692 + *
3693 + * This file is released under the GPLv2.
3694 + *
3695 + * It contains declarations used throughout swsusp.
3696 + *
3697 + */
3698 +
3699 +#ifndef KERNEL_POWER_TOI_H
3700 +#define KERNEL_POWER_TOI_H
3701 +
3702 +#include <linux/delay.h>
3703 +#include <linux/bootmem.h>
3704 +#include <linux/suspend.h>
3705 +#include <linux/fs.h>
3706 +#include <linux/kmod.h>
3707 +#include <asm/setup.h>
3708 +#include "tuxonice_pageflags.h"
3709 +#include "power.h"
3710 +
3711 +#define TOI_CORE_VERSION "3.0.1"
3712 +
3713 +#define MY_BOOT_KERNEL_DATA_VERSION 1
3714 +
3715 +struct toi_boot_kernel_data {
3716 +       int version;
3717 +       int size;
3718 +       unsigned long toi_action;
3719 +       unsigned long toi_debug_state;
3720 +       u32 toi_default_console_level;
3721 +       int toi_io_time[2][2];
3722 +       char toi_nosave_commandline[COMMAND_LINE_SIZE];
3723 +};
3724 +
3725 +extern struct toi_boot_kernel_data toi_bkd;
3726 +
3727 +/* Location of book kernel data struct in kernel being resumed */
3728 +extern unsigned long boot_kernel_data_buffer;
3729 +
3730 +/*              == Action states ==            */
3731 +
3732 +enum {
3733 +       TOI_REBOOT,
3734 +       TOI_PAUSE,
3735 +       TOI_LOGALL,
3736 +       TOI_CAN_CANCEL,
3737 +       TOI_KEEP_IMAGE,
3738 +       TOI_FREEZER_TEST,
3739 +       TOI_SINGLESTEP,
3740 +       TOI_PAUSE_NEAR_PAGESET_END,
3741 +       TOI_TEST_FILTER_SPEED,
3742 +       TOI_TEST_BIO,
3743 +       TOI_NO_PAGESET2,
3744 +       TOI_IGNORE_ROOTFS,
3745 +       TOI_REPLACE_SWSUSP,
3746 +       TOI_PAGESET2_FULL,
3747 +       TOI_ABORT_ON_RESAVE_NEEDED,
3748 +       TOI_NO_MULTITHREADED_IO,
3749 +       TOI_NO_DIRECT_LOAD,
3750 +       TOI_LATE_CPU_HOTPLUG,
3751 +       TOI_GET_MAX_MEM_ALLOCD,
3752 +       TOI_NO_FLUSHER_THREAD,
3753 +       TOI_NO_PS2_IF_UNNEEDED
3754 +};
3755 +
3756 +#define clear_action_state(bit) (test_and_clear_bit(bit, &toi_bkd.toi_action))
3757 +
3758 +/*              == Result states ==            */
3759 +
3760 +enum {
3761 +       TOI_ABORTED,
3762 +       TOI_ABORT_REQUESTED,
3763 +       TOI_NOSTORAGE_AVAILABLE,
3764 +       TOI_INSUFFICIENT_STORAGE,
3765 +       TOI_FREEZING_FAILED,
3766 +       TOI_KEPT_IMAGE,
3767 +       TOI_WOULD_EAT_MEMORY,
3768 +       TOI_UNABLE_TO_FREE_ENOUGH_MEMORY,
3769 +       TOI_PM_SEM,
3770 +       TOI_DEVICE_REFUSED,
3771 +       TOI_SYSDEV_REFUSED,
3772 +       TOI_EXTRA_PAGES_ALLOW_TOO_SMALL,
3773 +       TOI_UNABLE_TO_PREPARE_IMAGE,
3774 +       TOI_FAILED_MODULE_INIT,
3775 +       TOI_FAILED_MODULE_CLEANUP,
3776 +       TOI_FAILED_IO,
3777 +       TOI_OUT_OF_MEMORY,
3778 +       TOI_IMAGE_ERROR,
3779 +       TOI_PLATFORM_PREP_FAILED,
3780 +       TOI_CPU_HOTPLUG_FAILED,
3781 +       TOI_ARCH_PREPARE_FAILED,
3782 +       TOI_RESAVE_NEEDED,
3783 +       TOI_CANT_SUSPEND,
3784 +       TOI_NOTIFIERS_PREPARE_FAILED,
3785 +       TOI_PRE_SNAPSHOT_FAILED,
3786 +       TOI_PRE_RESTORE_FAILED,
3787 +       TOI_USERMODE_HELPERS_ERR,
3788 +       TOI_CANT_USE_ALT_RESUME,
3789 +       TOI_HEADER_TOO_BIG,
3790 +       TOI_NUM_RESULT_STATES   /* Used in printing debug info only */
3791 +};
3792 +
3793 +extern unsigned long toi_result;
3794 +
3795 +#define set_result_state(bit) (test_and_set_bit(bit, &toi_result))
3796 +#define set_abort_result(bit) (test_and_set_bit(TOI_ABORTED, &toi_result), \
3797 +                               test_and_set_bit(bit, &toi_result))
3798 +#define clear_result_state(bit) (test_and_clear_bit(bit, &toi_result))
3799 +#define test_result_state(bit) (test_bit(bit, &toi_result))
3800 +
3801 +/*      == Debug sections and levels ==        */
3802 +
3803 +/* debugging levels. */
3804 +enum {
3805 +       TOI_STATUS = 0,
3806 +       TOI_ERROR = 2,
3807 +       TOI_LOW,
3808 +       TOI_MEDIUM,
3809 +       TOI_HIGH,
3810 +       TOI_VERBOSE,
3811 +};
3812 +
3813 +enum {
3814 +       TOI_ANY_SECTION,
3815 +       TOI_EAT_MEMORY,
3816 +       TOI_IO,
3817 +       TOI_HEADER,
3818 +       TOI_WRITER,
3819 +       TOI_MEMORY,
3820 +};
3821 +
3822 +#define set_debug_state(bit) (test_and_set_bit(bit, &toi_bkd.toi_debug_state))
3823 +#define clear_debug_state(bit) \
3824 +       (test_and_clear_bit(bit, &toi_bkd.toi_debug_state))
3825 +#define test_debug_state(bit) (test_bit(bit, &toi_bkd.toi_debug_state))
3826 +
3827 +/*             == Steps in hibernating ==      */
3828 +
3829 +enum {
3830 +       STEP_HIBERNATE_PREPARE_IMAGE,
3831 +       STEP_HIBERNATE_SAVE_IMAGE,
3832 +       STEP_HIBERNATE_POWERDOWN,
3833 +       STEP_RESUME_CAN_RESUME,
3834 +       STEP_RESUME_LOAD_PS1,
3835 +       STEP_RESUME_DO_RESTORE,
3836 +       STEP_RESUME_READ_PS2,
3837 +       STEP_RESUME_GO,
3838 +       STEP_RESUME_ALT_IMAGE,
3839 +       STEP_CLEANUP,
3840 +       STEP_QUIET_CLEANUP
3841 +};
3842 +
3843 +/*             == TuxOnIce states ==
3844 +       (see also include/linux/suspend.h)      */
3845 +
3846 +#define get_toi_state()  (toi_state)
3847 +#define restore_toi_state(saved_state) \
3848 +       do { toi_state = saved_state; } while (0)
3849 +
3850 +/*             == Module support ==            */
3851 +
3852 +struct toi_core_fns {
3853 +       int (*post_context_save)(void);
3854 +       unsigned long (*get_nonconflicting_page)(void);
3855 +       int (*try_hibernate)(void);
3856 +       void (*try_resume)(void);
3857 +};
3858 +
3859 +extern struct toi_core_fns *toi_core_fns;
3860 +
3861 +/*             == All else ==                  */
3862 +#define KB(x) ((x) << (PAGE_SHIFT - 10))
3863 +#define MB(x) ((x) >> (20 - PAGE_SHIFT))
3864 +
3865 +extern int toi_start_anything(int toi_or_resume);
3866 +extern void toi_finish_anything(int toi_or_resume);
3867 +
3868 +extern int save_image_part1(void);
3869 +extern int toi_atomic_restore(void);
3870 +
3871 +extern int toi_try_hibernate(void);
3872 +extern void toi_try_resume(void);
3873 +
3874 +extern int __toi_post_context_save(void);
3875 +
3876 +extern unsigned int nr_hibernates;
3877 +extern char alt_resume_param[256];
3878 +
3879 +extern void copyback_post(void);
3880 +extern int toi_hibernate(void);
3881 +extern long extra_pd1_pages_used;
3882 +
3883 +#define SECTOR_SIZE 512
3884 +
3885 +extern void toi_early_boot_message(int can_erase_image, int default_answer,
3886 +       char *warning_reason, ...);
3887 +
3888 +static inline int load_direct(struct page *page)
3889 +{
3890 +       return test_action_state(TOI_NO_DIRECT_LOAD) ? 0 :
3891 +               PagePageset1Copy(page);
3892 +}
3893 +
3894 +extern int do_check_can_resume(void);
3895 +extern int do_toi_step(int step);
3896 +extern int toi_launch_userspace_program(char *command, int channel_no,
3897 +               enum umh_wait wait, int debug);
3898 +
3899 +extern char *tuxonice_signature;
3900 +#endif
3901 diff --git a/kernel/power/tuxonice_alloc.c b/kernel/power/tuxonice_alloc.c
3902 new file mode 100644
3903 index 0000000..0089ab8
3904 --- /dev/null
3905 +++ b/kernel/power/tuxonice_alloc.c
3906 @@ -0,0 +1,288 @@
3907 +/*
3908 + * kernel/power/tuxonice_alloc.c
3909 + *
3910 + * Copyright (C) 2008 Nigel Cunningham (nigel at tuxonice net)
3911 + *
3912 + * This file is released under the GPLv2.
3913 + *
3914 + */
3915 +
3916 +#ifdef CONFIG_PM_DEBUG
3917 +#include <linux/module.h>
3918 +#include <linux/slab.h>
3919 +#include "tuxonice_modules.h"
3920 +#include "tuxonice_alloc.h"
3921 +#include "tuxonice_sysfs.h"
3922 +#include "tuxonice.h"
3923 +
3924 +#define TOI_ALLOC_PATHS 39
3925 +
3926 +static DEFINE_MUTEX(toi_alloc_mutex);
3927 +
3928 +static struct toi_module_ops toi_alloc_ops;
3929 +
3930 +static int toi_fail_num;
3931 +static atomic_t toi_alloc_count[TOI_ALLOC_PATHS],
3932 +               toi_free_count[TOI_ALLOC_PATHS],
3933 +               toi_test_count[TOI_ALLOC_PATHS],
3934 +               toi_fail_count[TOI_ALLOC_PATHS];
3935 +static int toi_cur_allocd[TOI_ALLOC_PATHS], toi_max_allocd[TOI_ALLOC_PATHS];
3936 +static int cur_allocd, max_allocd;
3937 +
3938 +static char *toi_alloc_desc[TOI_ALLOC_PATHS] = {
3939 +       "", /* 0 */
3940 +       "get_io_info_struct",
3941 +       "extent",
3942 +       "extent (loading chain)",
3943 +       "userui channel",
3944 +       "userui arg", /* 5 */
3945 +       "attention list metadata",
3946 +       "extra pagedir memory metadata",
3947 +       "bdev metadata",
3948 +       "extra pagedir memory",
3949 +       "header_locations_read", /* 10 */
3950 +       "bio queue",
3951 +       "prepare_readahead",
3952 +       "i/o buffer",
3953 +       "writer buffer in bio_init",
3954 +       "checksum buffer", /* 15 */
3955 +       "compression buffer",
3956 +       "filewriter signature op",
3957 +       "set resume param alloc1",
3958 +       "set resume param alloc2",
3959 +       "debugging info buffer", /* 20 */
3960 +       "check can resume buffer",
3961 +       "write module config buffer",
3962 +       "read module config buffer",
3963 +       "write image header buffer",
3964 +       "read pageset1 buffer", /* 25 */
3965 +       "get_have_image_data buffer",
3966 +       "checksum page",
3967 +       "worker rw loop",
3968 +       "get nonconflicting page",
3969 +       "ps1 load addresses", /* 30 */
3970 +       "remove swap image",
3971 +       "swap image exists",
3972 +       "swap parse sig location",
3973 +       "sysfs kobj",
3974 +       "swap mark resume attempted buffer", /* 35 */
3975 +       "cluster member",
3976 +       "boot kernel data buffer",
3977 +       "setting swap signature"
3978 +};
3979 +
3980 +#define MIGHT_FAIL(FAIL_NUM, FAIL_VAL) \
3981 +       do { \
3982 +               BUG_ON(FAIL_NUM >= TOI_ALLOC_PATHS); \
3983 +               \
3984 +               if (FAIL_NUM == toi_fail_num) { \
3985 +                       atomic_inc(&toi_test_count[FAIL_NUM]); \
3986 +                       toi_fail_num = 0; \
3987 +                       return FAIL_VAL; \
3988 +               } \
3989 +       } while (0)
3990 +
3991 +static void alloc_update_stats(int fail_num, void *result, int size)
3992 +{
3993 +       if (!result) {
3994 +               atomic_inc(&toi_fail_count[fail_num]);
3995 +               return;
3996 +       }
3997 +
3998 +       atomic_inc(&toi_alloc_count[fail_num]);
3999 +       if (unlikely(test_action_state(TOI_GET_MAX_MEM_ALLOCD))) {
4000 +               mutex_lock(&toi_alloc_mutex);
4001 +               toi_cur_allocd[fail_num]++;
4002 +               cur_allocd+= size;
4003 +               if (unlikely(cur_allocd > max_allocd)) {
4004 +                       int i;
4005 +
4006 +                       for (i = 0; i < TOI_ALLOC_PATHS; i++)
4007 +                               toi_max_allocd[i] = toi_cur_allocd[i];
4008 +                       max_allocd = cur_allocd;
4009 +               }
4010 +               mutex_unlock(&toi_alloc_mutex);
4011 +       }
4012 +}
4013 +
4014 +static void free_update_stats(int fail_num, int size)
4015 +{
4016 +       BUG_ON(fail_num >= TOI_ALLOC_PATHS);
4017 +       atomic_inc(&toi_free_count[fail_num]);
4018 +       if (unlikely(test_action_state(TOI_GET_MAX_MEM_ALLOCD))) {
4019 +               mutex_lock(&toi_alloc_mutex);
4020 +               cur_allocd-= size;
4021 +               toi_cur_allocd[fail_num]--;
4022 +               mutex_unlock(&toi_alloc_mutex);
4023 +       }
4024 +}
4025 +
4026 +void *toi_kzalloc(int fail_num, size_t size, gfp_t flags)
4027 +{
4028 +       void *result;
4029 +
4030 +       if (toi_alloc_ops.enabled)
4031 +               MIGHT_FAIL(fail_num, NULL);
4032 +       result = kzalloc(size, flags);
4033 +       if (toi_alloc_ops.enabled)
4034 +               alloc_update_stats(fail_num, result, size);
4035 +       return result;
4036 +}
4037 +EXPORT_SYMBOL_GPL(toi_kzalloc);
4038 +
4039 +unsigned long toi_get_free_pages(int fail_num, gfp_t mask,
4040 +               unsigned int order)
4041 +{
4042 +       unsigned long result;
4043 +
4044 +       if (toi_alloc_ops.enabled)
4045 +               MIGHT_FAIL(fail_num, 0);
4046 +       result = __get_free_pages(mask, order);
4047 +       if (toi_alloc_ops.enabled)
4048 +               alloc_update_stats(fail_num, (void *) result,
4049 +                               PAGE_SIZE << order);
4050 +       return result;
4051 +}
4052 +EXPORT_SYMBOL_GPL(toi_get_free_pages);
4053 +
4054 +struct page *toi_alloc_page(int fail_num, gfp_t mask)
4055 +{
4056 +       struct page *result;
4057 +
4058 +       if (toi_alloc_ops.enabled)
4059 +               MIGHT_FAIL(fail_num, NULL);
4060 +       result = alloc_page(mask);
4061 +       if (toi_alloc_ops.enabled)
4062 +               alloc_update_stats(fail_num, (void *) result, PAGE_SIZE);
4063 +       return result;
4064 +}
4065 +EXPORT_SYMBOL_GPL(toi_alloc_page);
4066 +
4067 +unsigned long toi_get_zeroed_page(int fail_num, gfp_t mask)
4068 +{
4069 +       unsigned long result;
4070 +
4071 +       if (toi_alloc_ops.enabled)
4072 +               MIGHT_FAIL(fail_num, 0);
4073 +       result = get_zeroed_page(mask);
4074 +       if (toi_alloc_ops.enabled)
4075 +               alloc_update_stats(fail_num, (void *) result, PAGE_SIZE);
4076 +       return result;
4077 +}
4078 +EXPORT_SYMBOL_GPL(toi_get_zeroed_page);
4079 +
4080 +void toi_kfree(int fail_num, const void *arg, int size)
4081 +{
4082 +       if (arg && toi_alloc_ops.enabled)
4083 +               free_update_stats(fail_num, size);
4084 +
4085 +       kfree(arg);
4086 +}
4087 +EXPORT_SYMBOL_GPL(toi_kfree);
4088 +
4089 +void toi_free_page(int fail_num, unsigned long virt)
4090 +{
4091 +       if (virt && toi_alloc_ops.enabled)
4092 +               free_update_stats(fail_num, PAGE_SIZE);
4093 +
4094 +       free_page(virt);
4095 +}
4096 +EXPORT_SYMBOL_GPL(toi_free_page);
4097 +
4098 +void toi__free_page(int fail_num, struct page *page)
4099 +{
4100 +       if (page && toi_alloc_ops.enabled)
4101 +               free_update_stats(fail_num, PAGE_SIZE);
4102 +
4103 +       __free_page(page);
4104 +}
4105 +EXPORT_SYMBOL_GPL(toi__free_page);
4106 +
4107 +void toi_free_pages(int fail_num, struct page *page, int order)
4108 +{
4109 +       if (page && toi_alloc_ops.enabled)
4110 +               free_update_stats(fail_num, PAGE_SIZE << order);
4111 +
4112 +       __free_pages(page, order);
4113 +}
4114 +
4115 +void toi_alloc_print_debug_stats(void)
4116 +{
4117 +       int i, header_done = 0;
4118 +
4119 +       if (!toi_alloc_ops.enabled)
4120 +               return;
4121 +
4122 +       for (i = 0; i < TOI_ALLOC_PATHS; i++)
4123 +               if (atomic_read(&toi_alloc_count[i]) !=
4124 +                   atomic_read(&toi_free_count[i])) {
4125 +                       if (!header_done) {
4126 +                               printk(KERN_INFO "Idx  Allocs   Frees   Tests "
4127 +                                       "  Fails Max     Description\n");
4128 +                               header_done = 1;
4129 +                       }
4130 +
4131 +                       printk(KERN_INFO "%3d %7d %7d %7d %7d %7d %s\n", i,
4132 +                               atomic_read(&toi_alloc_count[i]),
4133 +                               atomic_read(&toi_free_count[i]),
4134 +                               atomic_read(&toi_test_count[i]),
4135 +                               atomic_read(&toi_fail_count[i]),
4136 +                               toi_max_allocd[i],
4137 +                               toi_alloc_desc[i]);
4138 +               }
4139 +}
4140 +EXPORT_SYMBOL_GPL(toi_alloc_print_debug_stats);
4141 +
4142 +static int toi_alloc_initialise(int starting_cycle)
4143 +{
4144 +       int i;
4145 +
4146 +       if (starting_cycle && toi_alloc_ops.enabled) {
4147 +               for (i = 0; i < TOI_ALLOC_PATHS; i++) {
4148 +                       atomic_set(&toi_alloc_count[i], 0);
4149 +                       atomic_set(&toi_free_count[i], 0);
4150 +                       atomic_set(&toi_test_count[i], 0);
4151 +                       atomic_set(&toi_fail_count[i], 0);
4152 +                       toi_cur_allocd[i] = 0;
4153 +                       toi_max_allocd[i] = 0;
4154 +               };
4155 +               max_allocd = 0;
4156 +               cur_allocd = 0;
4157 +       }
4158 +
4159 +       return 0;
4160 +}
4161 +
4162 +static struct toi_sysfs_data sysfs_params[] = {
4163 +       SYSFS_INT("failure_test", SYSFS_RW, &toi_fail_num, 0, 99, 0, NULL),
4164 +       SYSFS_BIT("find_max_mem_allocated", SYSFS_RW, &toi_bkd.toi_action,
4165 +                       TOI_GET_MAX_MEM_ALLOCD, 0),
4166 +       SYSFS_INT("enabled", SYSFS_RW, &toi_alloc_ops.enabled, 0, 1, 0,
4167 +                       NULL)
4168 +};
4169 +
4170 +static struct toi_module_ops toi_alloc_ops = {
4171 +       .type                                   = MISC_HIDDEN_MODULE,
4172 +       .name                                   = "allocation debugging",
4173 +       .directory                              = "alloc",
4174 +       .module                                 = THIS_MODULE,
4175 +       .early                                  = 1,
4176 +       .initialise                             = toi_alloc_initialise,
4177 +
4178 +       .sysfs_data             = sysfs_params,
4179 +       .num_sysfs_entries      = sizeof(sysfs_params) /
4180 +               sizeof(struct toi_sysfs_data),
4181 +};
4182 +
4183 +int toi_alloc_init(void)
4184 +{
4185 +       int result = toi_register_module(&toi_alloc_ops);
4186 +       toi_alloc_ops.enabled = 0;
4187 +       return result;
4188 +}
4189 +
4190 +void toi_alloc_exit(void)
4191 +{
4192 +       toi_unregister_module(&toi_alloc_ops);
4193 +}
4194 +#endif
4195 diff --git a/kernel/power/tuxonice_alloc.h b/kernel/power/tuxonice_alloc.h
4196 new file mode 100644
4197 index 0000000..6069dfa
4198 --- /dev/null
4199 +++ b/kernel/power/tuxonice_alloc.h
4200 @@ -0,0 +1,51 @@
4201 +/*
4202 + * kernel/power/tuxonice_alloc.h
4203 + *
4204 + * Copyright (C) 2008 Nigel Cunningham (nigel at tuxonice net)
4205 + *
4206 + * This file is released under the GPLv2.
4207 + *
4208 + */
4209 +
4210 +#define TOI_WAIT_GFP (GFP_KERNEL | __GFP_NOWARN)
4211 +#define TOI_ATOMIC_GFP (GFP_ATOMIC | __GFP_NOWARN)
4212 +
4213 +#ifdef CONFIG_PM_DEBUG
4214 +extern void *toi_kzalloc(int fail_num, size_t size, gfp_t flags);
4215 +extern void toi_kfree(int fail_num, const void *arg, int size);
4216 +
4217 +extern unsigned long toi_get_free_pages(int fail_num, gfp_t mask,
4218 +               unsigned int order);
4219 +#define toi_get_free_page(FAIL_NUM, MASK) toi_get_free_pages(FAIL_NUM, MASK, 0)
4220 +extern unsigned long toi_get_zeroed_page(int fail_num, gfp_t mask);
4221 +extern void toi_free_page(int fail_num, unsigned long buf);
4222 +extern void toi__free_page(int fail_num, struct page *page);
4223 +extern void toi_free_pages(int fail_num, struct page *page, int order);
4224 +extern struct page *toi_alloc_page(int fail_num, gfp_t mask);
4225 +extern int toi_alloc_init(void);
4226 +extern void toi_alloc_exit(void);
4227 +
4228 +extern void toi_alloc_print_debug_stats(void);
4229 +
4230 +#else /* CONFIG_PM_DEBUG */
4231 +
4232 +#define toi_kzalloc(FAIL, SIZE, FLAGS) (kzalloc(SIZE, FLAGS))
4233 +#define toi_kfree(FAIL, ALLOCN, SIZE) (kfree(ALLOCN))
4234 +
4235 +#define toi_get_free_pages(FAIL, FLAGS, ORDER) __get_free_pages(FLAGS, ORDER)
4236 +#define toi_get_free_page(FAIL, FLAGS) __get_free_page(FLAGS)
4237 +#define toi_get_zeroed_page(FAIL, FLAGS) get_zeroed_page(FLAGS)
4238 +#define toi_free_page(FAIL, ALLOCN) do { free_page(ALLOCN); } while (0)
4239 +#define toi__free_page(FAIL, PAGE) __free_page(PAGE)
4240 +#define toi_free_pages(FAIL, PAGE, ORDER) __free_pages(PAGE, ORDER)
4241 +#define toi_alloc_page(FAIL, MASK) alloc_page(MASK)
4242 +static inline int toi_alloc_init(void)
4243 +{
4244 +       return 0;
4245 +}
4246 +
4247 +static inline void toi_alloc_exit(void) { }
4248 +
4249 +static inline void toi_alloc_print_debug_stats(void) { }
4250 +
4251 +#endif
4252 diff --git a/kernel/power/tuxonice_atomic_copy.c b/kernel/power/tuxonice_atomic_copy.c
4253 new file mode 100644
4254 index 0000000..d6ef2b0
4255 --- /dev/null
4256 +++ b/kernel/power/tuxonice_atomic_copy.c
4257 @@ -0,0 +1,415 @@
4258 +/*
4259 + * kernel/power/tuxonice_atomic_copy.c
4260 + *
4261 + * Copyright 2004-2008 Nigel Cunningham (nigel at tuxonice net)
4262 + * Copyright (C) 2006 Red Hat, inc.
4263 + *
4264 + * Distributed under GPLv2.
4265 + *
4266 + * Routines for doing the atomic save/restore.
4267 + */
4268 +
4269 +#include <linux/suspend.h>
4270 +#include <linux/highmem.h>
4271 +#include <linux/cpu.h>
4272 +#include <linux/freezer.h>
4273 +#include <linux/console.h>
4274 +#include <asm/suspend.h>
4275 +#include "tuxonice.h"
4276 +#include "tuxonice_storage.h"
4277 +#include "tuxonice_power_off.h"
4278 +#include "tuxonice_ui.h"
4279 +#include "tuxonice_io.h"
4280 +#include "tuxonice_prepare_image.h"
4281 +#include "tuxonice_pageflags.h"
4282 +#include "tuxonice_checksum.h"
4283 +#include "tuxonice_builtin.h"
4284 +#include "tuxonice_atomic_copy.h"
4285 +#include "tuxonice_alloc.h"
4286 +
4287 +long extra_pd1_pages_used;
4288 +
4289 +/**
4290 + * free_pbe_list - free page backup entries used by the atomic copy code.
4291 + * @list:      List to free.
4292 + * @highmem:   Whether the list is in highmem.
4293 + *
4294 + * Normally, this function isn't used. If, however, we need to abort before
4295 + * doing the atomic copy, we use this to free the pbes previously allocated.
4296 + **/
4297 +static void free_pbe_list(struct pbe **list, int highmem)
4298 +{
4299 +       while (*list) {
4300 +               int i;
4301 +               struct pbe *free_pbe, *next_page = NULL;
4302 +               struct page *page;
4303 +
4304 +               if (highmem) {
4305 +                       page = (struct page *) *list;
4306 +                       free_pbe = (struct pbe *) kmap(page);
4307 +               } else {
4308 +                       page = virt_to_page(*list);
4309 +                       free_pbe = *list;
4310 +               }
4311 +
4312 +               for (i = 0; i < PBES_PER_PAGE; i++) {
4313 +                       if (!free_pbe)
4314 +                               break;
4315 +                       if (highmem)
4316 +                               toi__free_page(29, free_pbe->address);
4317 +                       else
4318 +                               toi_free_page(29,
4319 +                                       (unsigned long) free_pbe->address);
4320 +                       free_pbe = free_pbe->next;
4321 +               }
4322 +
4323 +               if (highmem) {
4324 +                       if (free_pbe)
4325 +                               next_page = free_pbe;
4326 +                       kunmap(page);
4327 +               } else {
4328 +                       if (free_pbe)
4329 +                               next_page = free_pbe;
4330 +               }
4331 +
4332 +               toi__free_page(29, page);
4333 +               *list = (struct pbe *) next_page;
4334 +       };
4335 +}
4336 +
4337 +/**
4338 + * copyback_post - post atomic-restore actions
4339 + *
4340 + * After doing the atomic restore, we have a few more things to do:
4341 + *     1) We want to retain some values across the restore, so we now copy
4342 + *     these from the nosave variables to the normal ones.
4343 + *     2) Set the status flags.
4344 + *     3) Resume devices.
4345 + *     4) Tell userui so it can redraw & restore settings.
4346 + *     5) Reread the page cache.
4347 + **/
4348 +void copyback_post(void)
4349 +{
4350 +       struct toi_boot_kernel_data *bkd =
4351 +               (struct toi_boot_kernel_data *) boot_kernel_data_buffer;
4352 +
4353 +       /*
4354 +        * The boot kernel's data may be larger (newer version) or
4355 +        * smaller (older version) than ours. Copy the minimum
4356 +        * of the two sizes, so that we don't overwrite valid values
4357 +        * from pre-atomic copy.
4358 +        */
4359 +
4360 +       memcpy(&toi_bkd, (char *) boot_kernel_data_buffer,
4361 +                       min_t(int, sizeof(struct toi_boot_kernel_data),
4362 +                               bkd->size));
4363 +
4364 +       if (toi_activate_storage(1))
4365 +               panic("Failed to reactivate our storage.");
4366 +
4367 +       toi_ui_post_atomic_restore();
4368 +
4369 +       toi_cond_pause(1, "About to reload secondary pagedir.");
4370 +
4371 +       if (read_pageset2(0))
4372 +               panic("Unable to successfully reread the page cache.");
4373 +
4374 +       /*
4375 +        * If the user wants to sleep again after resuming from full-off,
4376 +        * it's most likely to be in order to suspend to ram, so we'll
4377 +        * do this check after loading pageset2, to give them the fastest
4378 +        * wakeup when they are ready to use the computer again.
4379 +        */
4380 +       toi_check_resleep();
4381 +}
4382 +
4383 +/**
4384 + * toi_copy_pageset1 - do the atomic copy of pageset1
4385 + *
4386 + * Make the atomic copy of pageset1. We can't use copy_page (as we once did)
4387 + * because we can't be sure what side effects it has. On my old Duron, with
4388 + * 3DNOW, kernel_fpu_begin increments preempt count, making our preempt
4389 + * count at resume time 4 instead of 3.
4390 + *
4391 + * We don't want to call kmap_atomic unconditionally because it has the side
4392 + * effect of incrementing the preempt count, which will leave it one too high
4393 + * post resume (the page containing the preempt count will be copied after
4394 + * its incremented. This is essentially the same problem.
4395 + **/
4396 +void toi_copy_pageset1(void)
4397 +{
4398 +       int i;
4399 +       unsigned long source_index, dest_index;
4400 +
4401 +       memory_bm_position_reset(pageset1_map);
4402 +       memory_bm_position_reset(pageset1_copy_map);
4403 +
4404 +       source_index = memory_bm_next_pfn(pageset1_map);
4405 +       dest_index = memory_bm_next_pfn(pageset1_copy_map);
4406 +
4407 +       for (i = 0; i < pagedir1.size; i++) {
4408 +               unsigned long *origvirt, *copyvirt;
4409 +               struct page *origpage, *copypage;
4410 +               int loop = (PAGE_SIZE / sizeof(unsigned long)) - 1,
4411 +                   was_present1, was_present2;
4412 +
4413 +               origpage = pfn_to_page(source_index);
4414 +               copypage = pfn_to_page(dest_index);
4415 +
4416 +               origvirt = PageHighMem(origpage) ?
4417 +                       kmap_atomic(origpage, KM_USER0) :
4418 +                       page_address(origpage);
4419 +
4420 +               copyvirt = PageHighMem(copypage) ?
4421 +                       kmap_atomic(copypage, KM_USER1) :
4422 +                       page_address(copypage);
4423 +
4424 +               was_present1 = kernel_page_present(origpage);
4425 +               if (!was_present1)
4426 +                       kernel_map_pages(origpage, 1, 1);
4427 +
4428 +               was_present2 = kernel_page_present(copypage);
4429 +               if (!was_present2)
4430 +                       kernel_map_pages(copypage, 1, 1);
4431 +
4432 +               while (loop >= 0) {
4433 +                       *(copyvirt + loop) = *(origvirt + loop);
4434 +                       loop--;
4435 +               }
4436 +
4437 +               if (!was_present1)
4438 +                       kernel_map_pages(origpage, 1, 0);
4439 +
4440 +               if (!was_present2)
4441 +                       kernel_map_pages(copypage, 1, 0);
4442 +
4443 +               if (PageHighMem(origpage))
4444 +                       kunmap_atomic(origvirt, KM_USER0);
4445 +
4446 +               if (PageHighMem(copypage))
4447 +                       kunmap_atomic(copyvirt, KM_USER1);
4448 +
4449 +               source_index = memory_bm_next_pfn(pageset1_map);
4450 +               dest_index = memory_bm_next_pfn(pageset1_copy_map);
4451 +       }
4452 +}
4453 +
4454 +/**
4455 + * __toi_post_context_save - steps after saving the cpu context
4456 + *
4457 + * Steps taken after saving the CPU state to make the actual
4458 + * atomic copy.
4459 + *
4460 + * Called from swsusp_save in snapshot.c via toi_post_context_save.
4461 + **/
4462 +int __toi_post_context_save(void)
4463 +{
4464 +       long old_ps1_size = pagedir1.size;
4465 +
4466 +       check_checksums();
4467 +
4468 +       free_checksum_pages();
4469 +
4470 +       toi_recalculate_image_contents(1);
4471 +
4472 +       extra_pd1_pages_used = pagedir1.size - old_ps1_size;
4473 +
4474 +       if (extra_pd1_pages_used > extra_pd1_pages_allowance) {
4475 +               printk(KERN_INFO "Pageset1 has grown by %ld pages. "
4476 +                       "extra_pages_allowance is currently only %lu.\n",
4477 +                       pagedir1.size - old_ps1_size,
4478 +                       extra_pd1_pages_allowance);
4479 +
4480 +               /*
4481 +                * Highlevel code will see this, clear the state and
4482 +                * retry if we haven't already done so twice.
4483 +                */
4484 +               set_abort_result(TOI_EXTRA_PAGES_ALLOW_TOO_SMALL);
4485 +               return 1;
4486 +       }
4487 +
4488 +       if (!test_action_state(TOI_TEST_FILTER_SPEED) &&
4489 +           !test_action_state(TOI_TEST_BIO))
4490 +               toi_copy_pageset1();
4491 +
4492 +       return 0;
4493 +}
4494 +
4495 +/**
4496 + * toi_hibernate - high level code for doing the atomic copy
4497 + *
4498 + * High-level code which prepares to do the atomic copy. Loosely based
4499 + * on the swsusp version, but with the following twists:
4500 + *     - We set toi_running so the swsusp code uses our code paths.
4501 + *     - We give better feedback regarding what goes wrong if there is a
4502 + *       problem.
4503 + *     - We use an extra function to call the assembly, just in case this code
4504 + *       is in a module (return address).
4505 + **/
4506 +int toi_hibernate(void)
4507 +{
4508 +       int error;
4509 +
4510 +       toi_running = 1; /* For the swsusp code we use :< */
4511 +
4512 +       error = toi_lowlevel_builtin();
4513 +
4514 +       toi_running = 0;
4515 +       return error;
4516 +}
4517 +
4518 +/**
4519 + * toi_atomic_restore - prepare to do the atomic restore
4520 + *
4521 + * Get ready to do the atomic restore. This part gets us into the same
4522 + * state we are in prior to do calling do_toi_lowlevel while
4523 + * hibernating: hot-unplugging secondary cpus and freeze processes,
4524 + * before starting the thread that will do the restore.
4525 + **/
4526 +int toi_atomic_restore(void)
4527 +{
4528 +       int error;
4529 +
4530 +       toi_running = 1;
4531 +
4532 +       toi_prepare_status(DONT_CLEAR_BAR,      "Atomic restore.");
4533 +
4534 +       memcpy(&toi_bkd.toi_nosave_commandline, saved_command_line,
4535 +               strlen(saved_command_line));
4536 +
4537 +       if (add_boot_kernel_data_pbe())
4538 +               goto Failed;
4539 +
4540 +       toi_prepare_status(DONT_CLEAR_BAR, "Doing atomic copy/restore.");
4541 +
4542 +       if (toi_go_atomic(PMSG_QUIESCE, 0))
4543 +               goto Failed;
4544 +
4545 +       /* We'll ignore saved state, but this gets preempt count (etc) right */
4546 +       save_processor_state();
4547 +
4548 +       error = swsusp_arch_resume();
4549 +       /*
4550 +        * Code below is only ever reached in case of failure. Otherwise
4551 +        * execution continues at place where swsusp_arch_suspend was called.
4552 +        *
4553 +        * We don't know whether it's safe to continue (this shouldn't happen),
4554 +        * so lets err on the side of caution.
4555 +        */
4556 +       BUG();
4557 +
4558 +Failed:
4559 +       free_pbe_list(&restore_pblist, 0);
4560 +#ifdef CONFIG_HIGHMEM
4561 +       free_pbe_list(&restore_highmem_pblist, 1);
4562 +#endif
4563 +       toi_running = 0;
4564 +       return 1;
4565 +}
4566 +
4567 +/**
4568 + * toi_go_atomic - do the actual atomic copy/restore
4569 + * @state:             The state to use for device_suspend & power_down calls.
4570 + * @suspend_time:      Whether we're suspending or resuming.
4571 + **/
4572 +int toi_go_atomic(pm_message_t state, int suspend_time)
4573 +{
4574 +       if (suspend_time && platform_begin(1)) {
4575 +               set_abort_result(TOI_PLATFORM_PREP_FAILED);
4576 +               return 1;
4577 +       }
4578 +
4579 +       suspend_console();
4580 +
4581 +       if (device_suspend(state)) {
4582 +               set_abort_result(TOI_DEVICE_REFUSED);
4583 +               toi_end_atomic(ATOMIC_STEP_DEVICE_RESUME, suspend_time, 3);
4584 +               return 1;
4585 +       }
4586 +
4587 +       if (suspend_time && arch_prepare_suspend()) {
4588 +               set_abort_result(TOI_ARCH_PREPARE_FAILED);
4589 +               toi_end_atomic(ATOMIC_STEP_DEVICE_RESUME, suspend_time, 1);
4590 +               return 1;
4591 +       }
4592 +
4593 +       /* At this point, device_suspend() has been called, but *not*
4594 +        * device_power_down(). We *must* device_power_down() now.
4595 +        * Otherwise, drivers for some devices (e.g. interrupt controllers)
4596 +        * become desynchronized with the actual state of the hardware
4597 +        * at resume time, and evil weirdness ensues.
4598 +        */
4599 +
4600 +       if (device_power_down(state)) {
4601 +               set_abort_result(TOI_DEVICE_REFUSED);
4602 +               toi_end_atomic(ATOMIC_STEP_DEVICE_RESUME, suspend_time, 1);
4603 +               return 1;
4604 +       }
4605 +
4606 +       if (suspend_time && platform_pre_snapshot(1)) {
4607 +               set_abort_result(TOI_PRE_SNAPSHOT_FAILED);
4608 +               toi_end_atomic(ATOMIC_STEP_PLATFORM_FINISH, suspend_time, 1);
4609 +               return 1;
4610 +       }
4611 +
4612 +       if (!suspend_time && platform_pre_restore(1)) {
4613 +               set_abort_result(TOI_PRE_RESTORE_FAILED);
4614 +               toi_end_atomic(ATOMIC_STEP_PLATFORM_FINISH, suspend_time, 1);
4615 +               return 1;
4616 +       }
4617 +
4618 +       if (test_action_state(TOI_LATE_CPU_HOTPLUG)) {
4619 +               if (disable_nonboot_cpus()) {
4620 +                       set_abort_result(TOI_CPU_HOTPLUG_FAILED);
4621 +                       toi_end_atomic(ATOMIC_STEP_CPU_HOTPLUG,
4622 +                                       suspend_time, 1);
4623 +                       return 1;
4624 +               }
4625 +       }
4626 +
4627 +       local_irq_disable();
4628 +
4629 +       if (sysdev_suspend(state)) {
4630 +               set_abort_result(TOI_SYSDEV_REFUSED);
4631 +               toi_end_atomic(ATOMIC_STEP_IRQS, suspend_time, 1);
4632 +               return 1;
4633 +       }
4634 +
4635 +       return 0;
4636 +}
4637 +
4638 +/**
4639 + * toi_end_atomic - post atomic copy/restore routines
4640 + * @stage:             What step to start at.
4641 + * @suspend_time:      Whether we're suspending or resuming.
4642 + * @error:             Whether we're recovering from an error.
4643 + **/
4644 +void toi_end_atomic(int stage, int suspend_time, int error)
4645 +{
4646 +       switch (stage) {
4647 +       case ATOMIC_ALL_STEPS:
4648 +               if (!suspend_time)
4649 +                       platform_leave(1);
4650 +               sysdev_resume();
4651 +       case ATOMIC_STEP_IRQS:
4652 +               local_irq_enable();
4653 +       case ATOMIC_STEP_CPU_HOTPLUG:
4654 +               if (test_action_state(TOI_LATE_CPU_HOTPLUG))
4655 +                       enable_nonboot_cpus();
4656 +               platform_restore_cleanup(1);
4657 +       case ATOMIC_STEP_PLATFORM_FINISH:
4658 +               platform_finish(1);
4659 +               device_power_up(suspend_time ?
4660 +                       (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
4661 +       case ATOMIC_STEP_DEVICE_RESUME:
4662 +               if (suspend_time && (error & 2))
4663 +                       platform_recover(1);
4664 +               device_resume(suspend_time ?
4665 +                       ((error & 1) ? PMSG_RECOVER : PMSG_THAW) :
4666 +                       PMSG_RESTORE);
4667 +               resume_console();
4668 +               platform_end(1);
4669 +
4670 +               toi_prepare_status(DONT_CLEAR_BAR, "Post atomic.");
4671 +       }
4672 +}
4673 diff --git a/kernel/power/tuxonice_atomic_copy.h b/kernel/power/tuxonice_atomic_copy.h
4674 new file mode 100644
4675 index 0000000..a428f4c
4676 --- /dev/null
4677 +++ b/kernel/power/tuxonice_atomic_copy.h
4678 @@ -0,0 +1,20 @@
4679 +/*
4680 + * kernel/power/tuxonice_atomic_copy.h
4681 + *
4682 + * Copyright 2008 Nigel Cunningham (nigel at tuxonice net)
4683 + *
4684 + * Distributed under GPLv2.
4685 + *
4686 + * Routines for doing the atomic save/restore.
4687 + */
4688 +
4689 +enum {
4690 +       ATOMIC_ALL_STEPS,
4691 +       ATOMIC_STEP_IRQS,
4692 +       ATOMIC_STEP_CPU_HOTPLUG,
4693 +       ATOMIC_STEP_PLATFORM_FINISH,
4694 +       ATOMIC_STEP_DEVICE_RESUME,
4695 +};
4696 +
4697 +int toi_go_atomic(pm_message_t state, int toi_time);
4698 +void toi_end_atomic(int stage, int toi_time, int error);
4699 diff --git a/kernel/power/tuxonice_block_io.c b/kernel/power/tuxonice_block_io.c
4700 new file mode 100644
4701 index 0000000..12fa249
4702 --- /dev/null
4703 +++ b/kernel/power/tuxonice_block_io.c
4704 @@ -0,0 +1,1338 @@
4705 +/*
4706 + * kernel/power/tuxonice_block_io.c
4707 + *
4708 + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net)
4709 + *
4710 + * Distributed under GPLv2.
4711 + *
4712 + * This file contains block io functions for TuxOnIce. These are
4713 + * used by the swapwriter and it is planned that they will also
4714 + * be used by the NFSwriter.
4715 + *
4716 + */
4717 +
4718 +#include <linux/blkdev.h>
4719 +#include <linux/syscalls.h>
4720 +#include <linux/suspend.h>
4721 +
4722 +#include "tuxonice.h"
4723 +#include "tuxonice_sysfs.h"
4724 +#include "tuxonice_modules.h"
4725 +#include "tuxonice_prepare_image.h"
4726 +#include "tuxonice_block_io.h"
4727 +#include "tuxonice_ui.h"
4728 +#include "tuxonice_alloc.h"
4729 +#include "tuxonice_io.h"
4730 +
4731 +#define MEMORY_ONLY 1
4732 +#define THROTTLE_WAIT 2
4733 +
4734 +/* #define MEASURE_MUTEX_CONTENTION */
4735 +#ifndef MEASURE_MUTEX_CONTENTION
4736 +#define my_mutex_lock(index, the_lock) mutex_lock(the_lock)
4737 +#define my_mutex_unlock(index, the_lock) mutex_unlock(the_lock)
4738 +#else
4739 +unsigned long mutex_times[2][2][NR_CPUS];
4740 +#define my_mutex_lock(index, the_lock) do { \
4741 +       int have_mutex; \
4742 +       have_mutex = mutex_trylock(the_lock); \
4743 +       if (!have_mutex) { \
4744 +               mutex_lock(the_lock); \
4745 +               mutex_times[index][0][smp_processor_id()]++; \
4746 +       } else { \
4747 +               mutex_times[index][1][smp_processor_id()]++; \
4748 +       }
4749 +
4750 +#define my_mutex_unlock(index, the_lock) \
4751 +       mutex_unlock(the_lock); \
4752 +} while (0)
4753 +#endif
4754 +
4755 +static int target_outstanding_io = 1024;
4756 +static int max_outstanding_writes, max_outstanding_reads;
4757 +
4758 +static struct page *bio_queue_head, *bio_queue_tail;
4759 +static atomic_t toi_bio_queue_size;
4760 +static DEFINE_SPINLOCK(bio_queue_lock);
4761 +
4762 +static int free_mem_throttle, throughput_throttle;
4763 +static int more_readahead = 1;
4764 +static struct page *readahead_list_head, *readahead_list_tail;
4765 +static DECLARE_WAIT_QUEUE_HEAD(readahead_list_wait);
4766 +
4767 +static struct page *waiting_on;
4768 +
4769 +static atomic_t toi_io_in_progress, toi_io_done;
4770 +static DECLARE_WAIT_QUEUE_HEAD(num_in_progress_wait);
4771 +
4772 +static int extra_page_forward;
4773 +
4774 +static int current_stream;
4775 +/* 0 = Header, 1 = Pageset1, 2 = Pageset2, 3 = End of PS1 */
4776 +struct hibernate_extent_iterate_saved_state toi_writer_posn_save[4];
4777 +EXPORT_SYMBOL_GPL(toi_writer_posn_save);
4778 +
4779 +/* Pointer to current entry being loaded/saved. */
4780 +struct toi_extent_iterate_state toi_writer_posn;
4781 +EXPORT_SYMBOL_GPL(toi_writer_posn);
4782 +
4783 +/* Not static, so that the allocators can setup and complete
4784 + * writing the header */
4785 +char *toi_writer_buffer;
4786 +EXPORT_SYMBOL_GPL(toi_writer_buffer);
4787 +
4788 +int toi_writer_buffer_posn;
4789 +EXPORT_SYMBOL_GPL(toi_writer_buffer_posn);
4790 +
4791 +static struct toi_bdev_info *toi_devinfo;
4792 +
4793 +static DEFINE_MUTEX(toi_bio_mutex);
4794 +static DEFINE_MUTEX(toi_bio_readahead_mutex);
4795 +
4796 +static struct task_struct *toi_queue_flusher;
4797 +static int toi_bio_queue_flush_pages(int dedicated_thread);
4798 +
4799 +#define TOTAL_OUTSTANDING_IO (atomic_read(&toi_io_in_progress) + \
4800 +              atomic_read(&toi_bio_queue_size))
4801 +
4802 +/**
4803 + * set_free_mem_throttle - set the point where we pause to avoid oom.
4804 + *
4805 + * Initially, this value is zero, but when we first fail to allocate memory,
4806 + * we set it (plus a buffer) and thereafter throttle i/o once that limit is
4807 + * reached.
4808 + **/
4809 +static void set_free_mem_throttle(void)
4810 +{
4811 +       int new_throttle = nr_unallocated_buffer_pages() + 256;
4812 +
4813 +       if (new_throttle > free_mem_throttle)
4814 +               free_mem_throttle = new_throttle;
4815 +}
4816 +
4817 +#define NUM_REASONS 7
4818 +static atomic_t reasons[NUM_REASONS];
4819 +static char *reason_name[NUM_REASONS] = {
4820 +       "readahead not ready",
4821 +       "bio allocation",
4822 +       "synchronous I/O",
4823 +       "toi_bio_get_new_page",
4824 +       "memory low",
4825 +       "readahead buffer allocation",
4826 +       "throughput_throttle",
4827 +};
4828 +
4829 +/**
4830 + * do_bio_wait - wait for some TuxOnIce I/O to complete
4831 + * @reason: The array index of the reason we're waiting.
4832 + *
4833 + * Wait for a particular page of I/O if we're after a particular page.
4834 + * If we're not after a particular page, wait instead for all in flight
4835 + * I/O to be completed or for us to have enough free memory to be able
4836 + * to submit more I/O.
4837 + *
4838 + * If we wait, we also update our statistics regarding why we waited.
4839 + **/
4840 +static void do_bio_wait(int reason)
4841 +{
4842 +       struct page *was_waiting_on = waiting_on;
4843 +
4844 +       /* On SMP, waiting_on can be reset, so we make a copy */
4845 +       if (was_waiting_on) {
4846 +               if (PageLocked(was_waiting_on)) {
4847 +                       wait_on_page_bit(was_waiting_on, PG_locked);
4848 +                       atomic_inc(&reasons[reason]);
4849 +               }
4850 +       } else {
4851 +               atomic_inc(&reasons[reason]);
4852 +
4853 +               wait_event(num_in_progress_wait,
4854 +                       !atomic_read(&toi_io_in_progress) ||
4855 +                       nr_unallocated_buffer_pages() > free_mem_throttle);
4856 +       }
4857 +}
4858 +
4859 +/**
4860 + * throttle_if_needed - wait for I/O completion if throttle points are reached
4861 + * @flags: What to check and how to act.
4862 + *
4863 + * Check whether we need to wait for some I/O to complete. We always check
4864 + * whether we have enough memory available, but may also (depending upon
4865 + * @reason) check if the throughput throttle limit has been reached.
4866 + **/
4867 +static int throttle_if_needed(int flags)
4868 +{
4869 +       int free_pages = nr_unallocated_buffer_pages();
4870 +
4871 +       /* Getting low on memory and I/O is in progress? */
4872 +       while (unlikely(free_pages < free_mem_throttle) &&
4873 +                       atomic_read(&toi_io_in_progress)) {
4874 +               if (!(flags & THROTTLE_WAIT))
4875 +                       return -ENOMEM;
4876 +               do_bio_wait(4);
4877 +               free_pages = nr_unallocated_buffer_pages();
4878 +       }
4879 +
4880 +       while (!(flags & MEMORY_ONLY) && throughput_throttle &&
4881 +               TOTAL_OUTSTANDING_IO >= throughput_throttle) {
4882 +               int result = toi_bio_queue_flush_pages(0);
4883 +               if (result)
4884 +                       return result;
4885 +               atomic_inc(&reasons[6]);
4886 +               wait_event(num_in_progress_wait,
4887 +                       !atomic_read(&toi_io_in_progress) ||
4888 +                       TOTAL_OUTSTANDING_IO < throughput_throttle);
4889 +       }
4890 +
4891 +       return 0;
4892 +}
4893 +
4894 +/**
4895 + * update_throughput_throttle - update the raw throughput throttle
4896 + * @jif_index: The number of times this function has been called.
4897 + *
4898 + * This function is called twice per second by the core, and used to limit the
4899 + * amount of I/O we submit at once, spreading out our waiting through the
4900 + * whole job and letting userui get an opportunity to do its work.
4901 + *
4902 + * We don't start limiting I/O until 1/2s has gone so that we get a
4903 + * decent sample for our initial limit, and keep updating it because
4904 + * throughput may vary (on rotating media, eg) with our block number.
4905 + *
4906 + * We throttle to 1/10s worth of I/O.
4907 + **/
4908 +static void update_throughput_throttle(int jif_index)
4909 +{
4910 +       int done = atomic_read(&toi_io_done);
4911 +       throughput_throttle = done / jif_index / 5;
4912 +}
4913 +
4914 +/**
4915 + * toi_finish_all_io - wait for all outstanding i/o to complete
4916 + *
4917 + * Flush any queued but unsubmitted I/O and wait for it all to complete.
4918 + **/
4919 +static int toi_finish_all_io(void)
4920 +{
4921 +       int result = toi_bio_queue_flush_pages(0);
4922 +       wait_event(num_in_progress_wait, !TOTAL_OUTSTANDING_IO);
4923 +       return result;
4924 +}
4925 +
4926 +/**
4927 + * toi_end_bio - bio completion function.
4928 + * @bio: bio that has completed.
4929 + * @err: Error value. Yes, like end_swap_bio_read, we ignore it.
4930 + *
4931 + * Function called by the block driver from interrupt context when I/O is
4932 + * completed. If we were writing the page, we want to free it and will have
4933 + * set bio->bi_private to the parameter we should use in telling the page
4934 + * allocation accounting code what the page was allocated for. If we're
4935 + * reading the page, it will be in the singly linked list made from
4936 + * page->private pointers.
4937 + **/
4938 +static void toi_end_bio(struct bio *bio, int err)
4939 +{
4940 +       struct page *page = bio->bi_io_vec[0].bv_page;
4941 +
4942 +       BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags));
4943 +
4944 +       unlock_page(page);
4945 +       bio_put(bio);
4946 +
4947 +       if (waiting_on == page)
4948 +               waiting_on = NULL;
4949 +
4950 +       put_page(page);
4951 +
4952 +       if (bio->bi_private)
4953 +               toi__free_page((int) ((unsigned long) bio->bi_private) , page);
4954 +
4955 +       bio_put(bio);
4956 +
4957 +       atomic_dec(&toi_io_in_progress);
4958 +       atomic_inc(&toi_io_done);
4959 +
4960 +       wake_up(&num_in_progress_wait);
4961 +}
4962 +
4963 +/**
4964 + * submit - submit BIO request
4965 + * @writing: READ or WRITE.
4966 + * @dev: The block device we're using.
4967 + * @first_block: The first sector we're using.
4968 + * @page: The page being used for I/O.
4969 + * @free_group: If writing, the group that was used in allocating the page
4970 + *     and which will be used in freeing the page from the completion
4971 + *     routine.
4972 + *
4973 + * Based on Patrick Mochell's pmdisk code from long ago: "Straight from the
4974 + * textbook - allocate and initialize the bio. If we're writing, make sure
4975 + * the page is marked as dirty. Then submit it and carry on."
4976 + *
4977 + * If we're just testing the speed of our own code, we fake having done all
4978 + * the hard work and all toi_end_bio immediately.
4979 + **/
4980 +static int submit(int writing, struct block_device *dev, sector_t first_block,
4981 +               struct page *page, int free_group)
4982 +{
4983 +       struct bio *bio = NULL;
4984 +       int cur_outstanding_io, result;
4985 +
4986 +       /*
4987 +        * Shouldn't throttle if reading - can deadlock in the single
4988 +        * threaded case as pages are only freed when we use the
4989 +        * readahead.
4990 +        */
4991 +       if (writing) {
4992 +               result = throttle_if_needed(MEMORY_ONLY | THROTTLE_WAIT);
4993 +               if (result)
4994 +                       return result;
4995 +       }
4996 +
4997 +       while (!bio) {
4998 +               bio = bio_alloc(TOI_ATOMIC_GFP, 1);
4999 +               if (!bio) {
5000 +                       set_free_mem_throttle();
5001 +                       do_bio_wait(1);
5002 +               }
5003 +       }
5004 +
5005 +       bio->bi_bdev = dev;
5006 +       bio->bi_sector = first_block;
5007 +       bio->bi_private = (void *) ((unsigned long) free_group);
5008 +       bio->bi_end_io = toi_end_bio;
5009 +
5010 +       if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
5011 +               printk(KERN_DEBUG "ERROR: adding page to bio at %lld\n",
5012 +                               (unsigned long long) first_block);
5013 +               bio_put(bio);
5014 +               return -EFAULT;
5015 +       }
5016 +
5017 +       bio_get(bio);
5018 +
5019 +       cur_outstanding_io = atomic_add_return(1, &toi_io_in_progress);
5020 +       if (writing) {
5021 +               if (cur_outstanding_io > max_outstanding_writes)
5022 +                       max_outstanding_writes = cur_outstanding_io;
5023 +       } else {
5024 +               if (cur_outstanding_io > max_outstanding_reads)
5025 +                       max_outstanding_reads = cur_outstanding_io;
5026 +       }
5027 +
5028 +
5029 +       if (unlikely(test_action_state(TOI_TEST_FILTER_SPEED))) {
5030 +               /* Fake having done the hard work */
5031 +               set_bit(BIO_UPTODATE, &bio->bi_flags);
5032 +               toi_end_bio(bio, 0);
5033 +       } else
5034 +               submit_bio(writing | (1 << BIO_RW_SYNCIO) |
5035 +                               (1 << BIO_RW_UNPLUG), bio);
5036 +
5037 +       return 0;
5038 +}
5039 +
5040 +/**
5041 + * toi_do_io: Prepare to do some i/o on a page and submit or batch it.
5042 + *
5043 + * @writing: Whether reading or writing.
5044 + * @bdev: The block device which we're using.
5045 + * @block0: The first sector we're reading or writing.
5046 + * @page: The page on which I/O is being done.
5047 + * @readahead_index: If doing readahead, the index (reset this flag when done).
5048 + * @syncio: Whether the i/o is being done synchronously.
5049 + *
5050 + * Prepare and start a read or write operation.
5051 + *
5052 + * Note that we always work with our own page. If writing, we might be given a
5053 + * compression buffer that will immediately be used to start compressing the
5054 + * next page. For reading, we do readahead and therefore don't know the final
5055 + * address where the data needs to go.
5056 + **/
5057 +static int toi_do_io(int writing, struct block_device *bdev, long block0,
5058 +       struct page *page, int is_readahead, int syncio, int free_group)
5059 +{
5060 +       page->private = 0;
5061 +
5062 +       /* Do here so we don't race against toi_bio_get_next_page_read */
5063 +       lock_page(page);
5064 +
5065 +       if (is_readahead) {
5066 +               if (readahead_list_head)
5067 +                       readahead_list_tail->private = (unsigned long) page;
5068 +               else
5069 +                       readahead_list_head = page;
5070 +
5071 +               readahead_list_tail = page;
5072 +               wake_up(&readahead_list_wait);
5073 +       }
5074 +
5075 +       /* Done before submitting to avoid races. */
5076 +       if (syncio)
5077 +               waiting_on = page;
5078 +
5079 +       /* Submit the page */
5080 +       get_page(page);
5081 +
5082 +       if (submit(writing, bdev, block0, page, free_group))
5083 +               return -EFAULT;
5084 +
5085 +       if (syncio)
5086 +               do_bio_wait(2);
5087 +
5088 +       return 0;
5089 +}
5090 +
5091 +/**
5092 + * toi_bdev_page_io - simpler interface to do directly i/o on a single page
5093 + * @writing: Whether reading or writing.
5094 + * @bdev: Block device on which we're operating.
5095 + * @pos: Sector at which page to read or write starts.
5096 + * @page: Page to be read/written.
5097 + *
5098 + * A simple interface to submit a page of I/O and wait for its completion.
5099 + * The caller must free the page used.
5100 + **/
5101 +static int toi_bdev_page_io(int writing, struct block_device *bdev,
5102 +               long pos, struct page *page)
5103 +{
5104 +       return toi_do_io(writing, bdev, pos, page, 0, 1, 0);
5105 +}
5106 +
5107 +/**
5108 + * toi_bio_memory_needed - report the amount of memory needed for block i/o
5109 + *
5110 + * We want to have at least enough memory so as to have target_outstanding_io
5111 + * or more transactions on the fly at once. If we can do more, fine.
5112 + **/
5113 +static int toi_bio_memory_needed(void)
5114 +{
5115 +       return target_outstanding_io * (PAGE_SIZE + sizeof(struct request) +
5116 +                               sizeof(struct bio));
5117 +}
5118 +
5119 +/**
5120 + * toi_bio_print_debug_stats - put out debugging info in the buffer provided
5121 + * @buffer: A buffer of size @size into which text should be placed.
5122 + * @size: The size of @buffer.
5123 + *
5124 + * Fill a buffer with debugging info. This is used for both our debug_info sysfs
5125 + * entry and for recording the same info in dmesg.
5126 + **/
5127 +static int toi_bio_print_debug_stats(char *buffer, int size)
5128 +{
5129 +       int len = scnprintf(buffer, size, "- Max outstanding reads %d. Max "
5130 +                       "writes %d.\n", max_outstanding_reads,
5131 +                       max_outstanding_writes);
5132 +
5133 +       len += scnprintf(buffer + len, size - len,
5134 +               "  Memory_needed: %d x (%lu + %u + %u) = %d bytes.\n",
5135 +               target_outstanding_io,
5136 +               PAGE_SIZE, (unsigned int) sizeof(struct request),
5137 +               (unsigned int) sizeof(struct bio), toi_bio_memory_needed());
5138 +
5139 +#ifdef MEASURE_MUTEX_CONTENTION
5140 +       {
5141 +       int i;
5142 +
5143 +       len += scnprintf(buffer + len, size - len,
5144 +               "  Mutex contention while reading:\n  Contended      Free\n");
5145 +
5146 +       for_each_online_cpu(i)
5147 +               len += scnprintf(buffer + len, size - len,
5148 +               "  %9lu %9lu\n",
5149 +               mutex_times[0][0][i], mutex_times[0][1][i]);
5150 +
5151 +       len += scnprintf(buffer + len, size - len,
5152 +               "  Mutex contention while writing:\n  Contended      Free\n");
5153 +
5154 +       for_each_online_cpu(i)
5155 +               len += scnprintf(buffer + len, size - len,
5156 +               "  %9lu %9lu\n",
5157 +               mutex_times[1][0][i], mutex_times[1][1][i]);
5158 +
5159 +       }
5160 +#endif
5161 +
5162 +       return len + scnprintf(buffer + len, size - len,
5163 +               "  Free mem throttle point reached %d.\n", free_mem_throttle);
5164 +}
5165 +
5166 +/**
5167 + * toi_set_devinfo - set the bdev info used for i/o
5168 + * @info: Pointer to an array of struct toi_bdev_info - the list of
5169 + * bdevs and blocks on them in which the image is stored.
5170 + *
5171 + * Set the list of bdevs and blocks in which the image will be stored.
5172 + * Think of them (all together) as one long tape on which the data will be
5173 + * stored.
5174 + **/
5175 +static void toi_set_devinfo(struct toi_bdev_info *info)
5176 +{
5177 +       toi_devinfo = info;
5178 +}
5179 +
5180 +/**
5181 + * dump_block_chains - print the contents of the bdev info array.
5182 + **/
5183 +static void dump_block_chains(void)
5184 +{
5185 +       int i;
5186 +
5187 +       for (i = 0; i < toi_writer_posn.num_chains; i++) {
5188 +               struct hibernate_extent *this;
5189 +
5190 +               this = (toi_writer_posn.chains + i)->first;
5191 +
5192 +               if (!this)
5193 +                       continue;
5194 +
5195 +               printk(KERN_DEBUG "Chain %d:", i);
5196 +
5197 +               while (this) {
5198 +                       printk(" [%lu-%lu]%s", this->start,
5199 +                                       this->end, this->next ? "," : "");
5200 +                       this = this->next;
5201 +               }
5202 +
5203 +               printk("\n");
5204 +       }
5205 +
5206 +       for (i = 0; i < 4; i++)
5207 +               printk(KERN_DEBUG "Posn %d: Chain %d, extent %d, offset %lu.\n",
5208 +                               i, toi_writer_posn_save[i].chain_num,
5209 +                               toi_writer_posn_save[i].extent_num,
5210 +                               toi_writer_posn_save[i].offset);
5211 +}
5212 +
5213 +static int total_header_bytes;
5214 +static int unowned;
5215 +
5216 +static int debug_broken_header(void)
5217 +{
5218 +       printk(KERN_DEBUG "Image header too big for size allocated!\n");
5219 +       print_toi_header_storage_for_modules();
5220 +       printk(KERN_DEBUG "Page flags : %d.\n", toi_pageflags_space_needed());
5221 +       printk(KERN_DEBUG "toi_header : %ld.\n", sizeof(struct toi_header));
5222 +       printk(KERN_DEBUG "Total unowned : %d.\n", unowned);
5223 +       printk(KERN_DEBUG "Total used : %d (%ld pages).\n", total_header_bytes,
5224 +                       DIV_ROUND_UP(total_header_bytes, PAGE_SIZE));
5225 +       printk(KERN_DEBUG "Space needed now : %ld.\n",
5226 +                       get_header_storage_needed());
5227 +       dump_block_chains();
5228 +       abort_hibernate(TOI_HEADER_TOO_BIG, "Header reservation too small.");
5229 +       return -EIO;
5230 +}
5231 +
5232 +/**
5233 + * go_next_page - skip blocks to the start of the next page
5234 + * @writing: Whether we're reading or writing the image.
5235 + *
5236 + * Go forward one page, or two if extra_page_forward is set. It only gets
5237 + * set at the start of reading the image header, to skip the first page
5238 + * of the header, which is read without using the extent chains.
5239 + **/
5240 +static int go_next_page(int writing, int section_barrier)
5241 +{
5242 +       int i, chain_num = toi_writer_posn.current_chain,
5243 +         max = (chain_num == -1) ? 1 : toi_devinfo[chain_num].blocks_per_page,
5244 +         compare_to = 0, compare_chain, compare_offset;
5245 +
5246 +       /* Have we already used the last page of the stream? */
5247 +       switch (current_stream) {
5248 +       case 0:
5249 +               compare_to = 2;
5250 +               break;
5251 +       case 1:
5252 +               compare_to = 3;
5253 +               break;
5254 +       case 2:
5255 +               compare_to = 1;
5256 +               break;
5257 +       }
5258 +
5259 +       compare_chain = toi_writer_posn_save[compare_to].chain_num;
5260 +       compare_offset = toi_writer_posn_save[compare_to].offset;
5261 +
5262 +       if (section_barrier && chain_num == compare_chain &&
5263 +           toi_writer_posn.current_offset == compare_offset) {
5264 +               if (writing) {
5265 +                       if (!current_stream)
5266 +                               return debug_broken_header();
5267 +               } else {
5268 +                       more_readahead = 0;
5269 +                       return -ENODATA;
5270 +               }
5271 +       }
5272 +
5273 +       /* Nope. Go foward a page - or maybe two */
5274 +       for (i = 0; i < max; i++)
5275 +               toi_extent_state_next(&toi_writer_posn);
5276 +
5277 +       if (toi_extent_state_eof(&toi_writer_posn)) {
5278 +               /* Don't complain if readahead falls off the end */
5279 +               if (writing && section_barrier) {
5280 +                       printk(KERN_DEBUG "Extent state eof. "
5281 +                               "Expected compression ratio too optimistic?\n");
5282 +                       dump_block_chains();
5283 +               }
5284 +               return -ENODATA;
5285 +       }
5286 +
5287 +       if (extra_page_forward) {
5288 +               extra_page_forward = 0;
5289 +               return go_next_page(writing, section_barrier);
5290 +       }
5291 +
5292 +       return 0;
5293 +}
5294 +
5295 +/**
5296 + * set_extra_page_forward - make us skip an extra page on next go_next_page
5297 + *
5298 + * Used in reading header, to jump to 2nd page after getting 1st page
5299 + * direct from image header.
5300 + **/
5301 +static void set_extra_page_forward(void)
5302 +{
5303 +       extra_page_forward = 1;
5304 +}
5305 +
5306 +/**
5307 + * toi_bio_rw_page - do i/o on the next disk page in the image
5308 + * @writing: Whether reading or writing.
5309 + * @page: Page to do i/o on.
5310 + * @is_readahead: Whether we're doing readahead
5311 + * @free_group: The group used in allocating the page
5312 + *
5313 + * Submit a page for reading or writing, possibly readahead.
5314 + * Pass the group used in allocating the page as well, as it should
5315 + * be freed on completion of the bio if we're writing the page.
5316 + **/
5317 +static int toi_bio_rw_page(int writing, struct page *page,
5318 +               int is_readahead, int free_group)
5319 +{
5320 +       struct toi_bdev_info *dev_info;
5321 +       int result = go_next_page(writing, 1);
5322 +
5323 +       if (result)
5324 +               return result;
5325 +
5326 +       dev_info = &toi_devinfo[toi_writer_posn.current_chain];
5327 +
5328 +       return toi_do_io(writing, dev_info->bdev,
5329 +               toi_writer_posn.current_offset <<
5330 +                       dev_info->bmap_shift,
5331 +               page, is_readahead, 0, free_group);
5332 +}
5333 +
5334 +/**
5335 + * toi_rw_init - prepare to read or write a stream in the image
5336 + * @writing: Whether reading or writing.
5337 + * @stream number: Section of the image being processed.
5338 + *
5339 + * Prepare to read or write a section ('stream') in the image.
5340 + **/
5341 +static int toi_rw_init(int writing, int stream_number)
5342 +{
5343 +       if (stream_number)
5344 +               toi_extent_state_restore(&toi_writer_posn,
5345 +                               &toi_writer_posn_save[stream_number]);
5346 +       else
5347 +               toi_extent_state_goto_start(&toi_writer_posn);
5348 +
5349 +       atomic_set(&toi_io_done, 0);
5350 +       toi_writer_buffer = (char *) toi_get_zeroed_page(11, TOI_ATOMIC_GFP);
5351 +       toi_writer_buffer_posn = writing ? 0 : PAGE_SIZE;
5352 +
5353 +       current_stream = stream_number;
5354 +
5355 +       more_readahead = 1;
5356 +
5357 +       return toi_writer_buffer ? 0 : -ENOMEM;
5358 +}
5359 +
5360 +/**
5361 + * toi_read_header_init - prepare to read the image header
5362 + *
5363 + * Reset readahead indices prior to starting to read a section of the image.
5364 + **/
5365 +static void toi_read_header_init(void)
5366 +{
5367 +       toi_writer_buffer = (char *) toi_get_zeroed_page(11, TOI_ATOMIC_GFP);
5368 +       more_readahead = 1;
5369 +}
5370 +
5371 +/**
5372 + * toi_bio_queue_write - queue a page for writing
5373 + * @full_buffer: Pointer to a page to be queued
5374 + *
5375 + * Add a page to the queue to be submitted. If we're the queue flusher,
5376 + * we'll do this once we've dropped toi_bio_mutex, so other threads can
5377 + * continue to submit I/O while we're on the slow path doing the actual
5378 + * submission.
5379 + **/
5380 +static void toi_bio_queue_write(char **full_buffer)
5381 +{
5382 +       struct page *page = virt_to_page(*full_buffer);
5383 +       unsigned long flags;
5384 +
5385 +       page->private = 0;
5386 +
5387 +       spin_lock_irqsave(&bio_queue_lock, flags);
5388 +       if (!bio_queue_head)
5389 +               bio_queue_head = page;
5390 +       else
5391 +               bio_queue_tail->private = (unsigned long) page;
5392 +
5393 +       bio_queue_tail = page;
5394 +       atomic_inc(&toi_bio_queue_size);
5395 +
5396 +       spin_unlock_irqrestore(&bio_queue_lock, flags);
5397 +       wake_up(&toi_io_queue_flusher);
5398 +
5399 +       *full_buffer = NULL;
5400 +}
5401 +
5402 +/**
5403 + * toi_rw_cleanup - Cleanup after i/o.
5404 + * @writing: Whether we were reading or writing.
5405 + *
5406 + * Flush all I/O and clean everything up after reading or writing a
5407 + * section of the image.
5408 + **/
5409 +static int toi_rw_cleanup(int writing)
5410 +{
5411 +       int i, result;
5412 +
5413 +       if (writing) {
5414 +               int result;
5415 +
5416 +               if (toi_writer_buffer_posn && !test_result_state(TOI_ABORTED))
5417 +                       toi_bio_queue_write(&toi_writer_buffer);
5418 +
5419 +               result = toi_bio_queue_flush_pages(0);
5420 +
5421 +               if (result)
5422 +                       return result;
5423 +
5424 +               if (current_stream == 2)
5425 +                       toi_extent_state_save(&toi_writer_posn,
5426 +                                       &toi_writer_posn_save[1]);
5427 +               else if (current_stream == 1)
5428 +                       toi_extent_state_save(&toi_writer_posn,
5429 +                                       &toi_writer_posn_save[3]);
5430 +       }
5431 +
5432 +       result = toi_finish_all_io();
5433 +
5434 +       while (readahead_list_head) {
5435 +               void *next = (void *) readahead_list_head->private;
5436 +               toi__free_page(12, readahead_list_head);
5437 +               readahead_list_head = next;
5438 +       }
5439 +
5440 +       readahead_list_tail = NULL;
5441 +
5442 +       if (!current_stream)
5443 +               return result;
5444 +
5445 +       for (i = 0; i < NUM_REASONS; i++) {
5446 +               if (!atomic_read(&reasons[i]))
5447 +                       continue;
5448 +               printk(KERN_DEBUG "Waited for i/o due to %s %d times.\n",
5449 +                               reason_name[i], atomic_read(&reasons[i]));
5450 +               atomic_set(&reasons[i], 0);
5451 +       }
5452 +
5453 +       current_stream = 0;
5454 +       return result;
5455 +}
5456 +
5457 +/**
5458 + * toi_start_one_readahead - start one page of readahead
5459 + * @dedicated_thread: Is this a thread dedicated to doing readahead?
5460 + *
5461 + * Start one new page of readahead. If this is being called by a thread
5462 + * whose only just is to submit readahead, don't quit because we failed
5463 + * to allocate a page.
5464 + **/
5465 +static int toi_start_one_readahead(int dedicated_thread)
5466 +{
5467 +       char *buffer = NULL;
5468 +       int oom = 0, result;
5469 +
5470 +       result = throttle_if_needed(dedicated_thread ? THROTTLE_WAIT : 0);
5471 +       if (result)
5472 +               return result;
5473 +
5474 +       mutex_lock(&toi_bio_readahead_mutex);
5475 +
5476 +       while (!buffer) {
5477 +               buffer = (char *) toi_get_zeroed_page(12,
5478 +                               TOI_ATOMIC_GFP);
5479 +               if (!buffer) {
5480 +                       if (oom && !dedicated_thread) {
5481 +                               mutex_unlock(&toi_bio_readahead_mutex);
5482 +                               return -ENOMEM;
5483 +                       }
5484 +
5485 +                       oom = 1;
5486 +                       set_free_mem_throttle();
5487 +                       do_bio_wait(5);
5488 +               }
5489 +       }
5490 +
5491 +       result = toi_bio_rw_page(READ, virt_to_page(buffer), 1, 0);
5492 +       mutex_unlock(&toi_bio_readahead_mutex);
5493 +       return result;
5494 +}
5495 +
5496 +/**
5497 + * toi_start_new_readahead - start new readahead
5498 + * @dedicated_thread: Are we dedicated to this task?
5499 + *
5500 + * Start readahead of image pages.
5501 + *
5502 + * We can be called as a thread dedicated to this task (may be helpful on
5503 + * systems with lots of CPUs), in which case we don't exit until there's no
5504 + * more readahead.
5505 + *
5506 + * If this is not called by a dedicated thread, we top up our queue until
5507 + * there's no more readahead to submit, we've submitted the number given
5508 + * in target_outstanding_io or the number in progress exceeds the target
5509 + * outstanding I/O value.
5510 + *
5511 + * No mutex needed because this is only ever called by the first cpu.
5512 + **/
5513 +static int toi_start_new_readahead(int dedicated_thread)
5514 +{
5515 +       int last_result, num_submitted = 0;
5516 +
5517 +       /* Start a new readahead? */
5518 +       if (!more_readahead)
5519 +               return 0;
5520 +
5521 +       do {
5522 +               last_result = toi_start_one_readahead(dedicated_thread);
5523 +
5524 +               if (last_result) {
5525 +                       if (last_result == -ENOMEM || last_result == -ENODATA)
5526 +                               return 0;
5527 +
5528 +                       printk(KERN_DEBUG
5529 +                               "Begin read chunk returned %d.\n",
5530 +                               last_result);
5531 +               } else
5532 +                       num_submitted++;
5533 +
5534 +       } while (more_readahead && !last_result &&
5535 +                (dedicated_thread ||
5536 +                 (num_submitted < target_outstanding_io &&
5537 +                  atomic_read(&toi_io_in_progress) < target_outstanding_io)));
5538 +
5539 +       return last_result;
5540 +}
5541 +
5542 +/**
5543 + * bio_io_flusher - start the dedicated I/O flushing routine
5544 + * @writing: Whether we're writing the image.
5545 + **/
5546 +static int bio_io_flusher(int writing)
5547 +{
5548 +
5549 +       if (writing)
5550 +               return toi_bio_queue_flush_pages(1);
5551 +       else
5552 +               return toi_start_new_readahead(1);
5553 +}
5554 +
5555 +/**
5556 + * toi_bio_get_next_page_read - read a disk page, perhaps with readahead
5557 + * @no_readahead: Whether we can use readahead
5558 + *
5559 + * Read a page from disk, submitting readahead and cleaning up finished i/o
5560 + * while we wait for the page we're after.
5561 + **/
5562 +static int toi_bio_get_next_page_read(int no_readahead)
5563 +{
5564 +       unsigned long *virt;
5565 +       struct page *next;
5566 +
5567 +       /*
5568 +        * When reading the second page of the header, we have to
5569 +        * delay submitting the read until after we've gotten the
5570 +        * extents out of the first page.
5571 +        */
5572 +       if (unlikely(no_readahead && toi_start_one_readahead(0))) {
5573 +               printk(KERN_DEBUG "No readahead and toi_start_one_readahead "
5574 +                               "returned non-zero.\n");
5575 +               return -EIO;
5576 +       }
5577 +
5578 +       if (unlikely(!readahead_list_head)) {
5579 +               BUG_ON(!more_readahead);
5580 +               if (unlikely(toi_start_one_readahead(0))) {
5581 +                       printk(KERN_DEBUG "No readahead and "
5582 +                        "toi_start_one_readahead returned non-zero.\n");
5583 +                       return -EIO;
5584 +               }
5585 +       }
5586 +
5587 +       if (PageLocked(readahead_list_head)) {
5588 +               waiting_on = readahead_list_head;
5589 +               do_bio_wait(0);
5590 +       }
5591 +
5592 +       virt = page_address(readahead_list_head);
5593 +       memcpy(toi_writer_buffer, virt, PAGE_SIZE);
5594 +
5595 +       next = (struct page *) readahead_list_head->private;
5596 +       toi__free_page(12, readahead_list_head);
5597 +       readahead_list_head = next;
5598 +       return 0;
5599 +}
5600 +
5601 +/**
5602 + * toi_bio_queue_flush_pages - flush the queue of pages queued for writing
5603 + * @dedicated_thread: Whether we're a dedicated thread
5604 + *
5605 + * Flush the queue of pages ready to be written to disk.
5606 + *
5607 + * If we're a dedicated thread, stay in here until told to leave,
5608 + * sleeping in wait_event.
5609 + *
5610 + * The first thread is normally the only one to come in here. Another
5611 + * thread can enter this routine too, though, via throttle_if_needed.
5612 + * Since that's the case, we must be careful to only have one thread
5613 + * doing this work at a time. Otherwise we have a race and could save
5614 + * pages out of order.
5615 + *
5616 + * If an error occurs, free all remaining pages without submitting them
5617 + * for I/O.
5618 + **/
5619 +
5620 +int toi_bio_queue_flush_pages(int dedicated_thread)
5621 +{
5622 +       unsigned long flags;
5623 +       int result = 0;
5624 +       static int busy;
5625 +
5626 +       if (busy)
5627 +               return 0;
5628 +
5629 +       busy = 1;
5630 +
5631 +top:
5632 +       spin_lock_irqsave(&bio_queue_lock, flags);
5633 +       while (bio_queue_head) {
5634 +               struct page *page = bio_queue_head;
5635 +               bio_queue_head = (struct page *) page->private;
5636 +               if (bio_queue_tail == page)
5637 +                       bio_queue_tail = NULL;
5638 +               atomic_dec(&toi_bio_queue_size);
5639 +               spin_unlock_irqrestore(&bio_queue_lock, flags);
5640 +               if (!result)
5641 +                       result = toi_bio_rw_page(WRITE, page, 0, 11);
5642 +               if (result)
5643 +                       toi__free_page(11 , page);
5644 +               spin_lock_irqsave(&bio_queue_lock, flags);
5645 +       }
5646 +       spin_unlock_irqrestore(&bio_queue_lock, flags);
5647 +
5648 +       if (dedicated_thread) {
5649 +               wait_event(toi_io_queue_flusher, bio_queue_head ||
5650 +                               toi_bio_queue_flusher_should_finish);
5651 +               if (likely(!toi_bio_queue_flusher_should_finish))
5652 +                       goto top;
5653 +               toi_bio_queue_flusher_should_finish = 0;
5654 +       }
5655 +
5656 +       busy = 0;
5657 +       return result;
5658 +}
5659 +
5660 +/**
5661 + * toi_bio_get_new_page - get a new page for I/O
5662 + * @full_buffer: Pointer to a page to allocate.
5663 + **/
5664 +static int toi_bio_get_new_page(char **full_buffer)
5665 +{
5666 +       int result = throttle_if_needed(THROTTLE_WAIT);
5667 +       if (result)
5668 +               return result;
5669 +
5670 +       while (!*full_buffer) {
5671 +               *full_buffer = (char *) toi_get_zeroed_page(11, TOI_ATOMIC_GFP);
5672 +               if (!*full_buffer) {
5673 +                       set_free_mem_throttle();
5674 +                       do_bio_wait(3);
5675 +               }
5676 +       }
5677 +
5678 +       return 0;
5679 +}
5680 +
5681 +/**
5682 + * toi_rw_buffer - combine smaller buffers into PAGE_SIZE I/O
5683 + * @writing:           Bool - whether writing (or reading).
5684 + * @buffer:            The start of the buffer to write or fill.
5685 + * @buffer_size:       The size of the buffer to write or fill.
5686 + * @no_readahead:      Don't try to start readhead (when getting extents).
5687 + **/
5688 +static int toi_rw_buffer(int writing, char *buffer, int buffer_size,
5689 +               int no_readahead)
5690 +{
5691 +       int bytes_left = buffer_size, result = 0;
5692 +
5693 +       while (bytes_left) {
5694 +               char *source_start = buffer + buffer_size - bytes_left;
5695 +               char *dest_start = toi_writer_buffer + toi_writer_buffer_posn;
5696 +               int capacity = PAGE_SIZE - toi_writer_buffer_posn;
5697 +               char *to = writing ? dest_start : source_start;
5698 +               char *from = writing ? source_start : dest_start;
5699 +
5700 +               if (bytes_left <= capacity) {
5701 +                       memcpy(to, from, bytes_left);
5702 +                       toi_writer_buffer_posn += bytes_left;
5703 +                       return 0;
5704 +               }
5705 +
5706 +               /* Complete this page and start a new one */
5707 +               memcpy(to, from, capacity);
5708 +               bytes_left -= capacity;
5709 +
5710 +               if (!writing) {
5711 +                       /*
5712 +                        * Perform actual I/O:
5713 +                        * read readahead_list_head into toi_writer_buffer
5714 +                        */
5715 +                       int result = toi_bio_get_next_page_read(no_readahead);
5716 +                       if (result)
5717 +                               return result;
5718 +               } else {
5719 +                       toi_bio_queue_write(&toi_writer_buffer);
5720 +                       result = toi_bio_get_new_page(&toi_writer_buffer);
5721 +                       if (result)
5722 +                               return result;
5723 +               }
5724 +
5725 +               toi_writer_buffer_posn = 0;
5726 +               toi_cond_pause(0, NULL);
5727 +       }
5728 +
5729 +       return 0;
5730 +}
5731 +
5732 +/**
5733 + * toi_bio_read_page - read a page of the image
5734 + * @pfn:               The pfn where the data belongs.
5735 + * @buffer_page:       The page containing the (possibly compressed) data.
5736 + * @buf_size:          The number of bytes on @buffer_page used (PAGE_SIZE).
5737 + *
5738 + * Read a (possibly compressed) page from the image, into buffer_page,
5739 + * returning its pfn and the buffer size.
5740 + **/
5741 +static int toi_bio_read_page(unsigned long *pfn, struct page *buffer_page,
5742 +               unsigned int *buf_size)
5743 +{
5744 +       int result = 0;
5745 +       char *buffer_virt = kmap(buffer_page);
5746 +
5747 +       /*
5748 +        * Only call start_new_readahead if we don't have a dedicated thread
5749 +        * and we're the queue flusher.
5750 +        */
5751 +       if (current == toi_queue_flusher) {
5752 +               int result2 = toi_start_new_readahead(0);
5753 +               if (result2) {
5754 +                       printk(KERN_DEBUG "Queue flusher and "
5755 +                        "toi_start_one_readahead returned non-zero.\n");
5756 +                       result = -EIO;
5757 +                       goto out;
5758 +               }
5759 +       }
5760 +
5761 +       my_mutex_lock(0, &toi_bio_mutex);
5762 +
5763 +       /*
5764 +        * Structure in the image:
5765 +        *      [destination pfn|page size|page data]
5766 +        * buf_size is PAGE_SIZE
5767 +        */
5768 +       if (toi_rw_buffer(READ, (char *) pfn, sizeof(unsigned long), 0) ||
5769 +           toi_rw_buffer(READ, (char *) buf_size, sizeof(int), 0) ||
5770 +           toi_rw_buffer(READ, buffer_virt, *buf_size, 0)) {
5771 +               abort_hibernate(TOI_FAILED_IO, "Read of data failed.");
5772 +               result = 1;
5773 +       }
5774 +
5775 +       my_mutex_unlock(0, &toi_bio_mutex);
5776 +out:
5777 +       kunmap(buffer_page);
5778 +       return result;
5779 +}
5780 +
5781 +/**
5782 + * toi_bio_write_page - write a page of the image
5783 + * @pfn:               The pfn where the data belongs.
5784 + * @buffer_page:       The page containing the (possibly compressed) data.
5785 + * @buf_size:  The number of bytes on @buffer_page used.
5786 + *
5787 + * Write a (possibly compressed) page to the image from the buffer, together
5788 + * with it's index and buffer size.
5789 + **/
5790 +static int toi_bio_write_page(unsigned long pfn, struct page *buffer_page,
5791 +               unsigned int buf_size)
5792 +{
5793 +       char *buffer_virt;
5794 +       int result = 0, result2 = 0;
5795 +
5796 +       if (unlikely(test_action_state(TOI_TEST_FILTER_SPEED)))
5797 +               return 0;
5798 +
5799 +       my_mutex_lock(1, &toi_bio_mutex);
5800 +
5801 +       if (test_result_state(TOI_ABORTED)) {
5802 +               my_mutex_unlock(1, &toi_bio_mutex);
5803 +               return -EIO;
5804 +       }
5805 +
5806 +       buffer_virt = kmap(buffer_page);
5807 +
5808 +       /*
5809 +        * Structure in the image:
5810 +        *      [destination pfn|page size|page data]
5811 +        * buf_size is PAGE_SIZE
5812 +        */
5813 +       if (toi_rw_buffer(WRITE, (char *) &pfn, sizeof(unsigned long), 0) ||
5814 +           toi_rw_buffer(WRITE, (char *) &buf_size, sizeof(int), 0) ||
5815 +           toi_rw_buffer(WRITE, buffer_virt, buf_size, 0)) {
5816 +               printk(KERN_DEBUG "toi_rw_buffer returned non-zero to "
5817 +                               "toi_bio_write_page.\n");
5818 +               result = -EIO;
5819 +       }
5820 +
5821 +       kunmap(buffer_page);
5822 +       my_mutex_unlock(1, &toi_bio_mutex);
5823 +
5824 +       if (current == toi_queue_flusher)
5825 +               result2 = toi_bio_queue_flush_pages(0);
5826 +
5827 +       return result ? result : result2;
5828 +}
5829 +
5830 +/**
5831 + * _toi_rw_header_chunk - read or write a portion of the image header
5832 + * @writing:           Whether reading or writing.
5833 + * @owner:             The module for which we're writing.
5834 + *                     Used for confirming that modules
5835 + *                     don't use more header space than they asked for.
5836 + * @buffer:            Address of the data to write.
5837 + * @buffer_size:       Size of the data buffer.
5838 + * @no_readahead:      Don't try to start readhead (when getting extents).
5839 + *
5840 + * Perform PAGE_SIZE I/O. Start readahead if needed.
5841 + **/
5842 +static int _toi_rw_header_chunk(int writing, struct toi_module_ops *owner,
5843 +               char *buffer, int buffer_size, int no_readahead)
5844 +{
5845 +       int result = 0;
5846 +
5847 +       if (owner) {
5848 +               owner->header_used += buffer_size;
5849 +               toi_message(TOI_HEADER, TOI_LOW, 1,
5850 +                       "Header: %s : %d bytes (%d/%d).\n",
5851 +                       owner->name,
5852 +                       buffer_size, owner->header_used,
5853 +                       owner->header_requested);
5854 +               if (owner->header_used > owner->header_requested) {
5855 +                       printk(KERN_EMERG "TuxOnIce module %s is using more "
5856 +                               "header space (%u) than it requested (%u).\n",
5857 +                               owner->name,
5858 +                               owner->header_used,
5859 +                               owner->header_requested);
5860 +                       return buffer_size;
5861 +               }
5862 +       } else {
5863 +               unowned += buffer_size;
5864 +               toi_message(TOI_HEADER, TOI_LOW, 1,
5865 +                       "Header: (No owner): %d bytes (%d total so far)\n",
5866 +                       buffer_size, unowned);
5867 +       }
5868 +
5869 +       if (!writing && !no_readahead)
5870 +               result = toi_start_new_readahead(0);
5871 +
5872 +       if (!result)
5873 +               result = toi_rw_buffer(writing, buffer, buffer_size,
5874 +                               no_readahead);
5875 +
5876 +       total_header_bytes += buffer_size;
5877 +       return result;
5878 +}
5879 +
5880 +static int toi_rw_header_chunk(int writing, struct toi_module_ops *owner,
5881 +               char *buffer, int size)
5882 +{
5883 +       return _toi_rw_header_chunk(writing, owner, buffer, size, 0);
5884 +}
5885 +
5886 +static int toi_rw_header_chunk_noreadahead(int writing,
5887 +               struct toi_module_ops *owner, char *buffer, int size)
5888 +{
5889 +       return _toi_rw_header_chunk(writing, owner, buffer, size, 1);
5890 +}
5891 +
5892 +/**
5893 + * write_header_chunk_finish - flush any buffered header data
5894 + **/
5895 +static int write_header_chunk_finish(void)
5896 +{
5897 +       int result = 0;
5898 +
5899 +       if (toi_writer_buffer_posn)
5900 +               toi_bio_queue_write(&toi_writer_buffer);
5901 +
5902 +       result = toi_finish_all_io();
5903 +
5904 +       unowned = 0;
5905 +       total_header_bytes = 0;
5906 +       return result;
5907 +}
5908 +
5909 +/**
5910 + * toi_bio_storage_needed - get the amount of storage needed for my fns
5911 + **/
5912 +static int toi_bio_storage_needed(void)
5913 +{
5914 +       return sizeof(int);
5915 +}
5916 +
5917 +/**
5918 + * toi_bio_save_config_info - save block I/O config to image header
5919 + * @buf:       PAGE_SIZE'd buffer into which data should be saved.
5920 + **/
5921 +static int toi_bio_save_config_info(char *buf)
5922 +{
5923 +       int *ints = (int *) buf;
5924 +       ints[0] = target_outstanding_io;
5925 +       return sizeof(int);
5926 +}
5927 +
5928 +/**
5929 + * toi_bio_load_config_info - restore block I/O config
5930 + * @buf:       Data to be reloaded.
5931 + * @size:      Size of the buffer saved.
5932 + **/
5933 +static void toi_bio_load_config_info(char *buf, int size)
5934 +{
5935 +       int *ints = (int *) buf;
5936 +       target_outstanding_io  = ints[0];
5937 +}
5938 +
5939 +/**
5940 + * toi_bio_initialise - initialise bio code at start of some action
5941 + * @starting_cycle:    Whether starting a hibernation cycle, or just reading or
5942 + *                     writing a sysfs value.
5943 + **/
5944 +static int toi_bio_initialise(int starting_cycle)
5945 +{
5946 +       if (starting_cycle) {
5947 +               max_outstanding_writes = 0;
5948 +               max_outstanding_reads = 0;
5949 +               toi_queue_flusher = current;
5950 +#ifdef MEASURE_MUTEX_CONTENTION
5951 +               {
5952 +               int i, j, k;
5953 +
5954 +               for (i = 0; i < 2; i++)
5955 +                       for (j = 0; j < 2; j++)
5956 +                               for_each_online_cpu(k)
5957 +                                       mutex_times[i][j][k] = 0;
5958 +               }
5959 +#endif
5960 +       }
5961 +
5962 +       return 0;
5963 +}
5964 +
5965 +/**
5966 + * toi_bio_cleanup - cleanup after some action
5967 + * @finishing_cycle:   Whether completing a cycle.
5968 + **/
5969 +static void toi_bio_cleanup(int finishing_cycle)
5970 +{
5971 +       if (toi_writer_buffer) {
5972 +               toi_free_page(11, (unsigned long) toi_writer_buffer);
5973 +               toi_writer_buffer = NULL;
5974 +       }
5975 +}
5976 +
5977 +struct toi_bio_ops toi_bio_ops = {
5978 +       .bdev_page_io = toi_bdev_page_io,
5979 +       .finish_all_io = toi_finish_all_io,
5980 +       .update_throughput_throttle = update_throughput_throttle,
5981 +       .forward_one_page = go_next_page,
5982 +       .set_extra_page_forward = set_extra_page_forward,
5983 +       .set_devinfo = toi_set_devinfo,
5984 +       .read_page = toi_bio_read_page,
5985 +       .write_page = toi_bio_write_page,
5986 +       .rw_init = toi_rw_init,
5987 +       .rw_cleanup = toi_rw_cleanup,
5988 +       .read_header_init = toi_read_header_init,
5989 +       .rw_header_chunk = toi_rw_header_chunk,
5990 +       .rw_header_chunk_noreadahead = toi_rw_header_chunk_noreadahead,
5991 +       .write_header_chunk_finish = write_header_chunk_finish,
5992 +       .io_flusher = bio_io_flusher,
5993 +};
5994 +EXPORT_SYMBOL_GPL(toi_bio_ops);
5995 +
5996 +static struct toi_sysfs_data sysfs_params[] = {
5997 +       SYSFS_INT("target_outstanding_io", SYSFS_RW, &target_outstanding_io,
5998 +                       0, 16384, 0, NULL),
5999 +};
6000 +
6001 +static struct toi_module_ops toi_blockwriter_ops = {
6002 +       .name                                   = "lowlevel i/o",
6003 +       .type                                   = MISC_HIDDEN_MODULE,
6004 +       .directory                              = "block_io",
6005 +       .module                                 = THIS_MODULE,
6006 +       .print_debug_info                       = toi_bio_print_debug_stats,
6007 +       .memory_needed                          = toi_bio_memory_needed,
6008 +       .storage_needed                         = toi_bio_storage_needed,
6009 +       .save_config_info                       = toi_bio_save_config_info,
6010 +       .load_config_info                       = toi_bio_load_config_info,
6011 +       .initialise                             = toi_bio_initialise,
6012 +       .cleanup                                = toi_bio_cleanup,
6013 +
6014 +       .sysfs_data             = sysfs_params,
6015 +       .num_sysfs_entries      = sizeof(sysfs_params) /
6016 +               sizeof(struct toi_sysfs_data),
6017 +};
6018 +
6019 +/**
6020 + * toi_block_io_load - load time routine for block I/O module
6021 + *
6022 + * Register block i/o ops and sysfs entries.
6023 + **/
6024 +static __init int toi_block_io_load(void)
6025 +{
6026 +       return toi_register_module(&toi_blockwriter_ops);
6027 +}
6028 +
6029 +#ifdef MODULE
6030 +static __exit void toi_block_io_unload(void)
6031 +{
6032 +       toi_unregister_module(&toi_blockwriter_ops);
6033 +}
6034 +
6035 +module_init(toi_block_io_load);
6036 +module_exit(toi_block_io_unload);
6037 +MODULE_LICENSE("GPL");
6038 +MODULE_AUTHOR("Nigel Cunningham");
6039 +MODULE_DESCRIPTION("TuxOnIce block io functions");
6040 +#else
6041 +late_initcall(toi_block_io_load);
6042 +#endif
6043 diff --git a/kernel/power/tuxonice_block_io.h b/kernel/power/tuxonice_block_io.h
6044 new file mode 100644
6045 index 0000000..b18298c
6046 --- /dev/null
6047 +++ b/kernel/power/tuxonice_block_io.h
6048 @@ -0,0 +1,59 @@
6049 +/*
6050 + * kernel/power/tuxonice_block_io.h
6051 + *
6052 + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net)
6053 + * Copyright (C) 2006 Red Hat, inc.
6054 + *
6055 + * Distributed under GPLv2.
6056 + *
6057 + * This file contains declarations for functions exported from
6058 + * tuxonice_block_io.c, which contains low level io functions.
6059 + */
6060 +
6061 +#include <linux/buffer_head.h>
6062 +#include "tuxonice_extent.h"
6063 +
6064 +struct toi_bdev_info {
6065 +       struct block_device *bdev;
6066 +       dev_t dev_t;
6067 +       int bmap_shift;
6068 +       int blocks_per_page;
6069 +       int ignored;
6070 +};
6071 +
6072 +/*
6073 + * Our exported interface so the swapwriter and filewriter don't
6074 + * need these functions duplicated.
6075 + */
6076 +struct toi_bio_ops {
6077 +       int (*bdev_page_io) (int rw, struct block_device *bdev, long pos,
6078 +                       struct page *page);
6079 +       void (*check_io_stats) (void);
6080 +       void (*reset_io_stats) (void);
6081 +       void (*update_throughput_throttle) (int jif_index);
6082 +       int (*finish_all_io) (void);
6083 +       int (*forward_one_page) (int writing, int section_barrier);
6084 +       void (*set_extra_page_forward) (void);
6085 +       void (*set_devinfo) (struct toi_bdev_info *info);
6086 +       int (*read_page) (unsigned long *index, struct page *buffer_page,
6087 +                       unsigned int *buf_size);
6088 +       int (*write_page) (unsigned long index, struct page *buffer_page,
6089 +                       unsigned int buf_size);
6090 +       void (*read_header_init) (void);
6091 +       int (*rw_header_chunk) (int rw, struct toi_module_ops *owner,
6092 +                       char *buffer, int buffer_size);
6093 +       int (*rw_header_chunk_noreadahead) (int rw,
6094 +                       struct toi_module_ops *owner,
6095 +                       char *buffer, int buffer_size);
6096 +       int (*write_header_chunk_finish) (void);
6097 +       int (*rw_init) (int rw, int stream_number);
6098 +       int (*rw_cleanup) (int rw);
6099 +       int (*io_flusher) (int rw);
6100 +};
6101 +
6102 +extern struct toi_bio_ops toi_bio_ops;
6103 +
6104 +extern char *toi_writer_buffer;
6105 +extern int toi_writer_buffer_posn;
6106 +extern struct hibernate_extent_iterate_saved_state toi_writer_posn_save[4];
6107 +extern struct toi_extent_iterate_state toi_writer_posn;
6108 diff --git a/kernel/power/tuxonice_builtin.c b/kernel/power/tuxonice_builtin.c
6109 new file mode 100644
6110 index 0000000..97472d5
6111 --- /dev/null
6112 +++ b/kernel/power/tuxonice_builtin.c
6113 @@ -0,0 +1,313 @@
6114 +/*
6115 + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net)
6116 + *
6117 + * This file is released under the GPLv2.
6118 + */
6119 +#include <linux/resume-trace.h>
6120 +#include <linux/kernel.h>
6121 +#include <linux/swap.h>
6122 +#include <linux/syscalls.h>
6123 +#include <linux/bio.h>
6124 +#include <linux/root_dev.h>
6125 +#include <linux/freezer.h>
6126 +#include <linux/reboot.h>
6127 +#include <linux/writeback.h>
6128 +#include <linux/tty.h>
6129 +#include <linux/crypto.h>
6130 +#include <linux/cpu.h>
6131 +#include <linux/ctype.h>
6132 +#include "tuxonice_io.h"
6133 +#include "tuxonice.h"
6134 +#include "tuxonice_extent.h"
6135 +#include "tuxonice_netlink.h"
6136 +#include "tuxonice_prepare_image.h"
6137 +#include "tuxonice_ui.h"
6138 +#include "tuxonice_sysfs.h"
6139 +#include "tuxonice_pagedir.h"
6140 +#include "tuxonice_modules.h"
6141 +#include "tuxonice_builtin.h"
6142 +#include "tuxonice_power_off.h"
6143 +
6144 +/*
6145 + * Highmem related functions (x86 only).
6146 + */
6147 +
6148 +#ifdef CONFIG_HIGHMEM
6149 +
6150 +/**
6151 + * copyback_high: Restore highmem pages.
6152 + *
6153 + * Highmem data and pbe lists are/can be stored in highmem.
6154 + * The format is slightly different to the lowmem pbe lists
6155 + * used for the assembly code: the last pbe in each page is
6156 + * a struct page * instead of struct pbe *, pointing to the
6157 + * next page where pbes are stored (or NULL if happens to be
6158 + * the end of the list). Since we don't want to generate
6159 + * unnecessary deltas against swsusp code, we use a cast
6160 + * instead of a union.
6161 + **/
6162 +
6163 +static void copyback_high(void)
6164 +{
6165 +       struct page *pbe_page = (struct page *) restore_highmem_pblist;
6166 +       struct pbe *this_pbe, *first_pbe;
6167 +       unsigned long *origpage, *copypage;
6168 +       int pbe_index = 1;
6169 +
6170 +       if (!pbe_page)
6171 +               return;
6172 +
6173 +       this_pbe = (struct pbe *) kmap_atomic(pbe_page, KM_BOUNCE_READ);
6174 +       first_pbe = this_pbe;
6175 +
6176 +       while (this_pbe) {
6177 +               int loop = (PAGE_SIZE / sizeof(unsigned long)) - 1;
6178 +
6179 +               origpage = kmap_atomic((struct page *) this_pbe->orig_address,
6180 +                       KM_BIO_DST_IRQ);
6181 +               copypage = kmap_atomic((struct page *) this_pbe->address,
6182 +                       KM_BIO_SRC_IRQ);
6183 +
6184 +               while (loop >= 0) {
6185 +                       *(origpage + loop) = *(copypage + loop);
6186 +                       loop--;
6187 +               }
6188 +
6189 +               kunmap_atomic(origpage, KM_BIO_DST_IRQ);
6190 +               kunmap_atomic(copypage, KM_BIO_SRC_IRQ);
6191 +
6192 +               if (!this_pbe->next)
6193 +                       break;
6194 +
6195 +               if (pbe_index < PBES_PER_PAGE) {
6196 +                       this_pbe++;
6197 +                       pbe_index++;
6198 +               } else {
6199 +                       pbe_page = (struct page *) this_pbe->next;
6200 +                       kunmap_atomic(first_pbe, KM_BOUNCE_READ);
6201 +                       if (!pbe_page)
6202 +                               return;
6203 +                       this_pbe = (struct pbe *) kmap_atomic(pbe_page,
6204 +                                       KM_BOUNCE_READ);
6205 +                       first_pbe = this_pbe;
6206 +                       pbe_index = 1;
6207 +               }
6208 +       }
6209 +       kunmap_atomic(first_pbe, KM_BOUNCE_READ);
6210 +}
6211 +
6212 +#else /* CONFIG_HIGHMEM */
6213 +static void copyback_high(void) { }
6214 +#endif
6215 +
6216 +char toi_wait_for_keypress_dev_console(int timeout)
6217 +{
6218 +       int fd, this_timeout = 255;
6219 +       char key = '\0';
6220 +       struct termios t, t_backup;
6221 +
6222 +       /* We should be guaranteed /dev/console exists after populate_rootfs()
6223 +        * in init/main.c.
6224 +        */
6225 +       fd = sys_open("/dev/console", O_RDONLY, 0);
6226 +       if (fd < 0) {
6227 +               printk(KERN_INFO "Couldn't open /dev/console.\n");
6228 +               return key;
6229 +       }
6230 +
6231 +       if (sys_ioctl(fd, TCGETS, (long)&t) < 0)
6232 +               goto out_close;
6233 +
6234 +       memcpy(&t_backup, &t, sizeof(t));
6235 +
6236 +       t.c_lflag &= ~(ISIG|ICANON|ECHO);
6237 +       t.c_cc[VMIN] = 0;
6238 +
6239 +new_timeout:
6240 +       if (timeout > 0) {
6241 +               this_timeout = timeout < 26 ? timeout : 25;
6242 +               timeout -= this_timeout;
6243 +               this_timeout *= 10;
6244 +       }
6245 +
6246 +       t.c_cc[VTIME] = this_timeout;
6247 +
6248 +       if (sys_ioctl(fd, TCSETS, (long)&t) < 0)
6249 +               goto out_restore;
6250 +
6251 +       while (1) {
6252 +               if (sys_read(fd, &key, 1) <= 0) {
6253 +                       if (timeout)
6254 +                               goto new_timeout;
6255 +                       key = '\0';
6256 +                       break;
6257 +               }
6258 +               key = tolower(key);
6259 +               if (test_toi_state(TOI_SANITY_CHECK_PROMPT)) {
6260 +                       if (key == 'c') {
6261 +                               set_toi_state(TOI_CONTINUE_REQ);
6262 +                               break;
6263 +                       } else if (key == ' ')
6264 +                               break;
6265 +               } else
6266 +                       break;
6267 +       }
6268 +
6269 +out_restore:
6270 +       sys_ioctl(fd, TCSETS, (long)&t_backup);
6271 +out_close:
6272 +       sys_close(fd);
6273 +
6274 +       return key;
6275 +}
6276 +EXPORT_SYMBOL_GPL(toi_wait_for_keypress_dev_console);
6277 +
6278 +struct toi_boot_kernel_data toi_bkd __nosavedata
6279 +               __attribute__((aligned(PAGE_SIZE))) = {
6280 +       MY_BOOT_KERNEL_DATA_VERSION,
6281 +       0,
6282 +#ifdef CONFIG_TOI_REPLACE_SWSUSP
6283 +       (1 << TOI_REPLACE_SWSUSP) |
6284 +#endif
6285 +       (1 << TOI_NO_FLUSHER_THREAD) |
6286 +       (1 << TOI_PAGESET2_FULL) | (1 << TOI_LATE_CPU_HOTPLUG),
6287 +};
6288 +EXPORT_SYMBOL_GPL(toi_bkd);
6289 +
6290 +struct block_device *toi_open_by_devnum(dev_t dev, fmode_t mode)
6291 +{
6292 +       struct block_device *bdev = bdget(dev);
6293 +       int err = -ENOMEM;
6294 +       if (bdev)
6295 +               err = blkdev_get(bdev, mode);
6296 +       return err ? ERR_PTR(err) : bdev;
6297 +}
6298 +EXPORT_SYMBOL_GPL(toi_open_by_devnum);
6299 +
6300 +int toi_wait = CONFIG_TOI_DEFAULT_WAIT;
6301 +EXPORT_SYMBOL_GPL(toi_wait);
6302 +
6303 +struct toi_core_fns *toi_core_fns;
6304 +EXPORT_SYMBOL_GPL(toi_core_fns);
6305 +
6306 +unsigned long toi_result;
6307 +EXPORT_SYMBOL_GPL(toi_result);
6308 +
6309 +struct pagedir pagedir1 = {1};
6310 +EXPORT_SYMBOL_GPL(pagedir1);
6311 +
6312 +unsigned long toi_get_nonconflicting_page(void)
6313 +{
6314 +       return toi_core_fns->get_nonconflicting_page();
6315 +}
6316 +
6317 +int toi_post_context_save(void)
6318 +{
6319 +       return toi_core_fns->post_context_save();
6320 +}
6321 +
6322 +int try_tuxonice_hibernate(void)
6323 +{
6324 +       if (!toi_core_fns)
6325 +               return -ENODEV;
6326 +
6327 +       return toi_core_fns->try_hibernate();
6328 +}
6329 +
6330 +static int num_resume_calls;
6331 +#ifdef CONFIG_TOI_IGNORE_LATE_INITCALL
6332 +static int ignore_late_initcall = 1;
6333 +#else
6334 +static int ignore_late_initcall;
6335 +#endif
6336 +
6337 +void try_tuxonice_resume(void)
6338 +{
6339 +       /* Don't let it wrap around eventually */
6340 +       if (num_resume_calls < 2)
6341 +               num_resume_calls++;
6342 +
6343 +       if (num_resume_calls == 1 && ignore_late_initcall) {
6344 +               printk(KERN_INFO "TuxOnIce: Ignoring late initcall, as requested.\n");
6345 +               return;
6346 +       }
6347 +
6348 +       if (toi_core_fns)
6349 +               toi_core_fns->try_resume();
6350 +       else
6351 +               printk(KERN_INFO "TuxOnIce core not loaded yet.\n");
6352 +}
6353 +
6354 +int toi_lowlevel_builtin(void)
6355 +{
6356 +       int error = 0;
6357 +
6358 +       save_processor_state();
6359 +       error = swsusp_arch_suspend();
6360 +       if (error)
6361 +               printk(KERN_ERR "Error %d hibernating\n", error);
6362 +
6363 +       /* Restore control flow appears here */
6364 +       if (!toi_in_hibernate) {
6365 +               copyback_high();
6366 +               set_toi_state(TOI_NOW_RESUMING);
6367 +       }
6368 +
6369 +       restore_processor_state();
6370 +
6371 +       return error;
6372 +}
6373 +EXPORT_SYMBOL_GPL(toi_lowlevel_builtin);
6374 +
6375 +unsigned long toi_compress_bytes_in;
6376 +EXPORT_SYMBOL_GPL(toi_compress_bytes_in);
6377 +
6378 +unsigned long toi_compress_bytes_out;
6379 +EXPORT_SYMBOL_GPL(toi_compress_bytes_out);
6380 +
6381 +unsigned long toi_state = ((1 << TOI_BOOT_TIME) |
6382 +               (1 << TOI_IGNORE_LOGLEVEL) |
6383 +               (1 << TOI_IO_STOPPED));
6384 +EXPORT_SYMBOL_GPL(toi_state);
6385 +
6386 +/* The number of hibernates we have started (some may have been cancelled) */
6387 +unsigned int nr_hibernates;
6388 +EXPORT_SYMBOL_GPL(nr_hibernates);
6389 +
6390 +int toi_running;
6391 +EXPORT_SYMBOL_GPL(toi_running);
6392 +
6393 +__nosavedata int toi_in_hibernate;
6394 +EXPORT_SYMBOL_GPL(toi_in_hibernate);
6395 +
6396 +__nosavedata struct pbe *restore_highmem_pblist;
6397 +EXPORT_SYMBOL_GPL(restore_highmem_pblist);
6398 +
6399 +static int __init toi_wait_setup(char *str)
6400 +{
6401 +       int value;
6402 +
6403 +       if (sscanf(str, "=%d", &value)) {
6404 +               if (value < -1 || value > 255)
6405 +                       printk(KERN_INFO "TuxOnIce_wait outside range -1 to "
6406 +                                       "255.\n");
6407 +               else
6408 +                       toi_wait = value;
6409 +       }
6410 +
6411 +       return 1;
6412 +}
6413 +
6414 +__setup("toi_wait", toi_wait_setup);
6415 +
6416 +static int __init toi_ignore_late_initcall_setup(char *str)
6417 +{
6418 +       int value;
6419 +
6420 +       if (sscanf(str, "=%d", &value))
6421 +               ignore_late_initcall = value;
6422 +
6423 +       return 1;
6424 +}
6425 +
6426 +__setup("toi_initramfs_resume_only", toi_ignore_late_initcall_setup);
6427 diff --git a/kernel/power/tuxonice_builtin.h b/kernel/power/tuxonice_builtin.h
6428 new file mode 100644
6429 index 0000000..49b25b7
6430 --- /dev/null
6431 +++ b/kernel/power/tuxonice_builtin.h
6432 @@ -0,0 +1,27 @@
6433 +/*
6434 + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net)
6435 + *
6436 + * This file is released under the GPLv2.
6437 + */
6438 +#include <asm/setup.h>
6439 +
6440 +extern struct toi_core_fns *toi_core_fns;
6441 +extern unsigned long toi_compress_bytes_in, toi_compress_bytes_out;
6442 +extern unsigned int nr_hibernates;
6443 +extern int toi_in_hibernate;
6444 +
6445 +extern __nosavedata struct pbe *restore_highmem_pblist;
6446 +
6447 +int toi_lowlevel_builtin(void);
6448 +
6449 +#ifdef CONFIG_HIGHMEM
6450 +extern __nosavedata struct zone_data *toi_nosave_zone_list;
6451 +extern __nosavedata unsigned long toi_nosave_max_pfn;
6452 +#endif
6453 +
6454 +extern unsigned long toi_get_nonconflicting_page(void);
6455 +extern int toi_post_context_save(void);
6456 +
6457 +extern char toi_wait_for_keypress_dev_console(int timeout);
6458 +extern struct block_device *toi_open_by_devnum(dev_t dev, fmode_t mode);
6459 +extern int toi_wait;
6460 diff --git a/kernel/power/tuxonice_checksum.c b/kernel/power/tuxonice_checksum.c
6461 new file mode 100644
6462 index 0000000..b0adc17
6463 --- /dev/null
6464 +++ b/kernel/power/tuxonice_checksum.c
6465 @@ -0,0 +1,375 @@
6466 +/*
6467 + * kernel/power/tuxonice_checksum.c
6468 + *
6469 + * Copyright (C) 2006-2008 Nigel Cunningham (nigel at tuxonice net)
6470 + * Copyright (C) 2006 Red Hat, inc.
6471 + *
6472 + * This file is released under the GPLv2.
6473 + *
6474 + * This file contains data checksum routines for TuxOnIce,
6475 + * using cryptoapi. They are used to locate any modifications
6476 + * made to pageset 2 while we're saving it.
6477 + */
6478 +
6479 +#include <linux/suspend.h>
6480 +#include <linux/highmem.h>
6481 +#include <linux/vmalloc.h>
6482 +#include <linux/crypto.h>
6483 +#include <linux/scatterlist.h>
6484 +
6485 +#include "tuxonice.h"
6486 +#include "tuxonice_modules.h"
6487 +#include "tuxonice_sysfs.h"
6488 +#include "tuxonice_io.h"
6489 +#include "tuxonice_pageflags.h"
6490 +#include "tuxonice_checksum.h"
6491 +#include "tuxonice_pagedir.h"
6492 +#include "tuxonice_alloc.h"
6493 +
6494 +static struct toi_module_ops toi_checksum_ops;
6495 +
6496 +/* Constant at the mo, but I might allow tuning later */
6497 +static char toi_checksum_name[32] = "md4";
6498 +/* Bytes per checksum */
6499 +#define CHECKSUM_SIZE (16)
6500 +
6501 +#define CHECKSUMS_PER_PAGE ((PAGE_SIZE - sizeof(void *)) / CHECKSUM_SIZE)
6502 +
6503 +struct cpu_context {
6504 +       struct crypto_hash *transform;
6505 +       struct hash_desc desc;
6506 +       struct scatterlist sg[2];
6507 +       char *buf;
6508 +};
6509 +
6510 +static DEFINE_PER_CPU(struct cpu_context, contexts);
6511 +static int pages_allocated;
6512 +static unsigned long page_list;
6513 +
6514 +static int toi_num_resaved;
6515 +
6516 +static unsigned long this_checksum, next_page;
6517 +static int checksum_index;
6518 +
6519 +static inline int checksum_pages_needed(void)
6520 +{
6521 +       return DIV_ROUND_UP(pagedir2.size, CHECKSUMS_PER_PAGE);
6522 +}
6523 +
6524 +/* ---- Local buffer management ---- */
6525 +
6526 +/*
6527 + * toi_checksum_cleanup
6528 + *
6529 + * Frees memory allocated for our labours.
6530 + */
6531 +static void toi_checksum_cleanup(int ending_cycle)
6532 +{
6533 +       int cpu;
6534 +
6535 +       if (ending_cycle) {
6536 +               for_each_online_cpu(cpu) {
6537 +                       struct cpu_context *this = &per_cpu(contexts, cpu);
6538 +                       if (this->transform) {
6539 +                               crypto_free_hash(this->transform);
6540 +                               this->transform = NULL;
6541 +                               this->desc.tfm = NULL;
6542 +                       }
6543 +
6544 +                       if (this->buf) {
6545 +                               toi_free_page(27, (unsigned long) this->buf);
6546 +                               this->buf = NULL;
6547 +                       }
6548 +               }
6549 +       }
6550 +}
6551 +
6552 +/*
6553 + * toi_crypto_initialise
6554 + *
6555 + * Prepare to do some work by allocating buffers and transforms.
6556 + * Returns: Int: Zero. Even if we can't set up checksum, we still
6557 + * seek to hibernate.
6558 + */
6559 +static int toi_checksum_initialise(int starting_cycle)
6560 +{
6561 +       int cpu;
6562 +
6563 +       if (!(starting_cycle & SYSFS_HIBERNATE) || !toi_checksum_ops.enabled)
6564 +               return 0;
6565 +
6566 +       if (!*toi_checksum_name) {
6567 +               printk(KERN_INFO "TuxOnIce: No checksum algorithm name set.\n");
6568 +               return 1;
6569 +       }
6570 +
6571 +       for_each_online_cpu(cpu) {
6572 +               struct cpu_context *this = &per_cpu(contexts, cpu);
6573 +               struct page *page;
6574 +
6575 +               this->transform = crypto_alloc_hash(toi_checksum_name, 0, 0);
6576 +               if (IS_ERR(this->transform)) {
6577 +                       printk(KERN_INFO "TuxOnIce: Failed to initialise the "
6578 +                               "%s checksum algorithm: %ld.\n",
6579 +                               toi_checksum_name, (long) this->transform);
6580 +                       this->transform = NULL;
6581 +                       return 1;
6582 +               }
6583 +
6584 +               this->desc.tfm = this->transform;
6585 +               this->desc.flags = 0;
6586 +
6587 +               page = toi_alloc_page(27, GFP_KERNEL);
6588 +               if (!page)
6589 +                       return 1;
6590 +               this->buf = page_address(page);
6591 +               sg_init_one(&this->sg[0], this->buf, PAGE_SIZE);
6592 +       }
6593 +       return 0;
6594 +}
6595 +
6596 +/*
6597 + * toi_checksum_print_debug_stats
6598 + * @buffer: Pointer to a buffer into which the debug info will be printed.
6599 + * @size: Size of the buffer.
6600 + *
6601 + * Print information to be recorded for debugging purposes into a buffer.
6602 + * Returns: Number of characters written to the buffer.
6603 + */
6604 +
6605 +static int toi_checksum_print_debug_stats(char *buffer, int size)
6606 +{
6607 +       int len;
6608 +
6609 +       if (!toi_checksum_ops.enabled)
6610 +               return scnprintf(buffer, size,
6611 +                       "- Checksumming disabled.\n");
6612 +
6613 +       len = scnprintf(buffer, size, "- Checksum method is '%s'.\n",
6614 +                       toi_checksum_name);
6615 +       len += scnprintf(buffer + len, size - len,
6616 +               "  %d pages resaved in atomic copy.\n", toi_num_resaved);
6617 +       return len;
6618 +}
6619 +
6620 +static int toi_checksum_memory_needed(void)
6621 +{
6622 +       return toi_checksum_ops.enabled ?
6623 +               checksum_pages_needed() << PAGE_SHIFT : 0;
6624 +}
6625 +
6626 +static int toi_checksum_storage_needed(void)
6627 +{
6628 +       if (toi_checksum_ops.enabled)
6629 +               return strlen(toi_checksum_name) + sizeof(int) + 1;
6630 +       else
6631 +               return 0;
6632 +}
6633 +
6634 +/*
6635 + * toi_checksum_save_config_info
6636 + * @buffer: Pointer to a buffer of size PAGE_SIZE.
6637 + *
6638 + * Save informaton needed when reloading the image at resume time.
6639 + * Returns: Number of bytes used for saving our data.
6640 + */
6641 +static int toi_checksum_save_config_info(char *buffer)
6642 +{
6643 +       int namelen = strlen(toi_checksum_name) + 1;
6644 +       int total_len;
6645 +
6646 +       *((unsigned int *) buffer) = namelen;
6647 +       strncpy(buffer + sizeof(unsigned int), toi_checksum_name, namelen);
6648 +       total_len = sizeof(unsigned int) + namelen;
6649 +       return total_len;
6650 +}
6651 +
6652 +/* toi_checksum_load_config_info
6653 + * @buffer: Pointer to the start of the data.
6654 + * @size: Number of bytes that were saved.
6655 + *
6656 + * Description:        Reload information needed for dechecksuming the image at
6657 + * resume time.
6658 + */
6659 +static void toi_checksum_load_config_info(char *buffer, int size)
6660 +{
6661 +       int namelen;
6662 +
6663 +       namelen = *((unsigned int *) (buffer));
6664 +       strncpy(toi_checksum_name, buffer + sizeof(unsigned int),
6665 +                       namelen);
6666 +       return;
6667 +}
6668 +
6669 +/*
6670 + * Free Checksum Memory
6671 + */
6672 +
6673 +void free_checksum_pages(void)
6674 +{
6675 +       while (pages_allocated) {
6676 +               unsigned long next = *((unsigned long *) page_list);
6677 +               ClearPageNosave(virt_to_page(page_list));
6678 +               toi_free_page(15, (unsigned long) page_list);
6679 +               page_list = next;
6680 +               pages_allocated--;
6681 +       }
6682 +}
6683 +
6684 +/*
6685 + * Allocate Checksum Memory
6686 + */
6687 +
6688 +int allocate_checksum_pages(void)
6689 +{
6690 +       int pages_needed = checksum_pages_needed();
6691 +
6692 +       if (!toi_checksum_ops.enabled)
6693 +               return 0;
6694 +
6695 +       while (pages_allocated < pages_needed) {
6696 +               unsigned long *new_page =
6697 +                 (unsigned long *) toi_get_zeroed_page(15, TOI_ATOMIC_GFP);
6698 +               if (!new_page) {
6699 +                       printk(KERN_ERR "Unable to allocate checksum pages.\n");
6700 +                       return -ENOMEM;
6701 +               }
6702 +               SetPageNosave(virt_to_page(new_page));
6703 +               (*new_page) = page_list;
6704 +               page_list = (unsigned long) new_page;
6705 +               pages_allocated++;
6706 +       }
6707 +
6708 +       next_page = (unsigned long) page_list;
6709 +       checksum_index = 0;
6710 +
6711 +       return 0;
6712 +}
6713 +
6714 +char *tuxonice_get_next_checksum(void)
6715 +{
6716 +       if (!toi_checksum_ops.enabled)
6717 +               return NULL;
6718 +
6719 +       if (checksum_index % CHECKSUMS_PER_PAGE)
6720 +               this_checksum += CHECKSUM_SIZE;
6721 +       else {
6722 +               this_checksum = next_page + sizeof(void *);
6723 +               next_page = *((unsigned long *) next_page);
6724 +       }
6725 +
6726 +       checksum_index++;
6727 +       return (char *) this_checksum;
6728 +}
6729 +
6730 +int tuxonice_calc_checksum(struct page *page, char *checksum_locn)
6731 +{
6732 +       char *pa;
6733 +       int result, cpu = smp_processor_id();
6734 +       struct cpu_context *ctx = &per_cpu(contexts, cpu);
6735 +
6736 +       if (!toi_checksum_ops.enabled)
6737 +               return 0;
6738 +
6739 +       pa = kmap(page);
6740 +       memcpy(ctx->buf, pa, PAGE_SIZE);
6741 +       kunmap(page);
6742 +       result = crypto_hash_digest(&ctx->desc, ctx->sg, PAGE_SIZE,
6743 +                                               checksum_locn);
6744 +       return result;
6745 +}
6746 +/*
6747 + * Calculate checksums
6748 + */
6749 +
6750 +void check_checksums(void)
6751 +{
6752 +       int pfn, index = 0, cpu = smp_processor_id();
6753 +       char current_checksum[CHECKSUM_SIZE];
6754 +       struct cpu_context *ctx = &per_cpu(contexts, cpu);
6755 +
6756 +       if (!toi_checksum_ops.enabled)
6757 +               return;
6758 +
6759 +       next_page = (unsigned long) page_list;
6760 +
6761 +       toi_num_resaved = 0;
6762 +       this_checksum = 0;
6763 +
6764 +       memory_bm_position_reset(pageset2_map);
6765 +       for (pfn = memory_bm_next_pfn(pageset2_map); pfn != BM_END_OF_MAP;
6766 +                       pfn = memory_bm_next_pfn(pageset2_map)) {
6767 +               int ret;
6768 +               char *pa;
6769 +               struct page *page = pfn_to_page(pfn);
6770 +
6771 +               if (index % CHECKSUMS_PER_PAGE) {
6772 +                       this_checksum += CHECKSUM_SIZE;
6773 +               } else {
6774 +                       this_checksum = next_page + sizeof(void *);
6775 +                       next_page = *((unsigned long *) next_page);
6776 +               }
6777 +
6778 +               /* Done when IRQs disabled so must be atomic */
6779 +               pa = kmap_atomic(page, KM_USER1);
6780 +               memcpy(ctx->buf, pa, PAGE_SIZE);
6781 +               kunmap_atomic(pa, KM_USER1);
6782 +               ret = crypto_hash_digest(&ctx->desc, ctx->sg, PAGE_SIZE,
6783 +                                                       current_checksum);
6784 +
6785 +               if (ret) {
6786 +                       printk(KERN_INFO "Digest failed. Returned %d.\n", ret);
6787 +                       return;
6788 +               }
6789 +
6790 +               if (memcmp(current_checksum, (char *) this_checksum,
6791 +                                                       CHECKSUM_SIZE)) {
6792 +                       SetPageResave(pfn_to_page(pfn));
6793 +                       toi_num_resaved++;
6794 +                       if (test_action_state(TOI_ABORT_ON_RESAVE_NEEDED))
6795 +                               set_abort_result(TOI_RESAVE_NEEDED);
6796 +               }
6797 +
6798 +               index++;
6799 +       }
6800 +}
6801 +
6802 +static struct toi_sysfs_data sysfs_params[] = {
6803 +       SYSFS_INT("enabled", SYSFS_RW, &toi_checksum_ops.enabled, 0, 1, 0,
6804 +                       NULL),
6805 +       SYSFS_BIT("abort_if_resave_needed", SYSFS_RW, &toi_bkd.toi_action,
6806 +                       TOI_ABORT_ON_RESAVE_NEEDED, 0)
6807 +};
6808 +
6809 +/*
6810 + * Ops structure.
6811 + */
6812 +static struct toi_module_ops toi_checksum_ops = {
6813 +       .type                   = MISC_MODULE,
6814 +       .name                   = "checksumming",
6815 +       .directory              = "checksum",
6816 +       .module                 = THIS_MODULE,
6817 +       .initialise             = toi_checksum_initialise,
6818 +       .cleanup                = toi_checksum_cleanup,
6819 +       .print_debug_info       = toi_checksum_print_debug_stats,
6820 +       .save_config_info       = toi_checksum_save_config_info,
6821 +       .load_config_info       = toi_checksum_load_config_info,
6822 +       .memory_needed          = toi_checksum_memory_needed,
6823 +       .storage_needed         = toi_checksum_storage_needed,
6824 +
6825 +       .sysfs_data             = sysfs_params,
6826 +       .num_sysfs_entries      = sizeof(sysfs_params) /
6827 +               sizeof(struct toi_sysfs_data),
6828 +};
6829 +
6830 +/* ---- Registration ---- */
6831 +int toi_checksum_init(void)
6832 +{
6833 +       int result = toi_register_module(&toi_checksum_ops);
6834 +       return result;
6835 +}
6836 +
6837 +void toi_checksum_exit(void)
6838 +{
6839 +       toi_unregister_module(&toi_checksum_ops);
6840 +}
6841 diff --git a/kernel/power/tuxonice_checksum.h b/kernel/power/tuxonice_checksum.h
6842 new file mode 100644
6843 index 0000000..84a9174
6844 --- /dev/null
6845 +++ b/kernel/power/tuxonice_checksum.h
6846 @@ -0,0 +1,32 @@
6847 +/*
6848 + * kernel/power/tuxonice_checksum.h
6849 + *
6850 + * Copyright (C) 2006-2008 Nigel Cunningham (nigel at tuxonice net)
6851 + * Copyright (C) 2006 Red Hat, inc.
6852 + *
6853 + * This file is released under the GPLv2.
6854 + *
6855 + * This file contains data checksum routines for TuxOnIce,
6856 + * using cryptoapi. They are used to locate any modifications
6857 + * made to pageset 2 while we're saving it.
6858 + */
6859 +
6860 +#if defined(CONFIG_TOI_CHECKSUM)
6861 +extern int toi_checksum_init(void);
6862 +extern void toi_checksum_exit(void);
6863 +void check_checksums(void);
6864 +int allocate_checksum_pages(void);
6865 +void free_checksum_pages(void);
6866 +char *tuxonice_get_next_checksum(void);
6867 +int tuxonice_calc_checksum(struct page *page, char *checksum_locn);
6868 +#else
6869 +static inline int toi_checksum_init(void) { return 0; }
6870 +static inline void toi_checksum_exit(void) { }
6871 +static inline void check_checksums(void) { };
6872 +static inline int allocate_checksum_pages(void) { return 0; };
6873 +static inline void free_checksum_pages(void) { };
6874 +static inline char *tuxonice_get_next_checksum(void) { return NULL; };
6875 +static inline int tuxonice_calc_checksum(struct page *page, char *checksum_locn)
6876 +       { return 0; }
6877 +#endif
6878 +
6879 diff --git a/kernel/power/tuxonice_cluster.c b/kernel/power/tuxonice_cluster.c
6880 new file mode 100644
6881 index 0000000..671006d
6882 --- /dev/null
6883 +++ b/kernel/power/tuxonice_cluster.c
6884 @@ -0,0 +1,1069 @@
6885 +/*
6886 + * kernel/power/tuxonice_cluster.c
6887 + *
6888 + * Copyright (C) 2006-2008 Nigel Cunningham (nigel at tuxonice net)
6889 + *
6890 + * This file is released under the GPLv2.
6891 + *
6892 + * This file contains routines for cluster hibernation support.
6893 + *
6894 + * Based on ip autoconfiguration code in net/ipv4/ipconfig.c.
6895 + *
6896 + * How does it work?
6897 + *
6898 + * There is no 'master' node that tells everyone else what to do. All nodes
6899 + * send messages to the broadcast address/port, maintain a list of peers
6900 + * and figure out when to progress to the next step in hibernating or resuming.
6901 + * This makes us more fault tolerant when it comes to nodes coming and going
6902 + * (which may be more of an issue if we're hibernating when power supplies
6903 + * are being unreliable).
6904 + *
6905 + * At boot time, we start a ktuxonice thread that handles communication with
6906 + * other nodes. This node maintains a state machine that controls our progress
6907 + * through hibernating and resuming, keeping us in step with other nodes. Nodes
6908 + * are identified by their hw address.
6909 + *
6910 + * On startup, the node sends CLUSTER_PING on the configured interface's
6911 + * broadcast address, port $toi_cluster_port (see below) and begins to listen
6912 + * for other broadcast messages. CLUSTER_PING messages are repeated at
6913 + * intervals of 5 minutes, with a random offset to spread traffic out.
6914 + *
6915 + * A hibernation cycle is initiated from any node via
6916 + *
6917 + * echo > /sys/power/tuxonice/do_hibernate
6918 + *
6919 + * and (possibily) the hibernate script. At each step of the process, the node
6920 + * completes its work, and waits for all other nodes to signal completion of
6921 + * their work (or timeout) before progressing to the next step.
6922 + *
6923 + * Request/state  Action before reply  Possible reply  Next state
6924 + * HIBERNATE     capable, pre-script   HIBERNATE|ACK   NODE_PREP
6925 + *                                     HIBERNATE|NACK  INIT_0
6926 + *
6927 + * PREP                  prepare_image         PREP|ACK        IMAGE_WRITE
6928 + *                                     PREP|NACK       INIT_0
6929 + *                                     ABORT           RUNNING
6930 + *
6931 + * IO            write image           IO|ACK          power off
6932 + *                                     ABORT           POST_RESUME
6933 + *
6934 + * (Boot time)   check for image       IMAGE|ACK       RESUME_PREP
6935 + *                                     (Note 1)
6936 + *                                     IMAGE|NACK      (Note 2)
6937 + *
6938 + * PREP                  prepare read image    PREP|ACK        IMAGE_READ
6939 + *                                     PREP|NACK       (As NACK_IMAGE)
6940 + *
6941 + * IO            read image            IO|ACK          POST_RESUME
6942 + *
6943 + * POST_RESUME   thaw, post-script                     RUNNING
6944 + *
6945 + * INIT_0        init 0
6946 + *
6947 + * Other messages:
6948 + *
6949 + * - PING: Request for all other live nodes to send a PONG. Used at startup to
6950 + *   announce presence, when a node is suspected dead and periodically, in case
6951 + *   segments of the network are [un]plugged.
6952 + *
6953 + * - PONG: Response to a PING.
6954 + *
6955 + * - ABORT: Request to cancel writing an image.
6956 + *
6957 + * - BYE: Notification that this node is shutting down.
6958 + *
6959 + * Note 1: Repeated at 3s intervals until we continue to boot/resume, so that
6960 + * nodes which are slower to start up can get state synchronised. If a node
6961 + * starting up sees other nodes sending RESUME_PREP or IMAGE_READ, it may send
6962 + * ACK_IMAGE and they will wait for it to catch up. If it sees ACK_READ, it
6963 + * must invalidate its image (if any) and boot normally.
6964 + *
6965 + * Note 2: May occur when one node lost power or powered off while others
6966 + * hibernated. This node waits for others to complete resuming (ACK_READ)
6967 + * before completing its boot, so that it appears as a fail node restarting.
6968 + *
6969 + * If any node has an image, then it also has a list of nodes that hibernated
6970 + * in synchronisation with it. The node will wait for other nodes to appear
6971 + * or timeout before beginning its restoration.
6972 + *
6973 + * If a node has no image, it needs to wait, in case other nodes which do have
6974 + * an image are going to resume, but are taking longer to announce their
6975 + * presence. For this reason, the user can specify a timeout value and a number
6976 + * of nodes detected before we just continue. (We might want to assume in a
6977 + * cluster of, say, 15 nodes, if 8 others have booted without finding an image,
6978 + * the remaining nodes will too. This might help in situations where some nodes
6979 + * are much slower to boot, or more subject to hardware failures or such like).
6980 + */
6981 +
6982 +#include <linux/suspend.h>
6983 +#include <linux/module.h>
6984 +#include <linux/moduleparam.h>
6985 +#include <linux/if.h>
6986 +#include <linux/rtnetlink.h>
6987 +#include <linux/ip.h>
6988 +#include <linux/udp.h>
6989 +#include <linux/in.h>
6990 +#include <linux/if_arp.h>
6991 +#include <linux/kthread.h>
6992 +#include <linux/wait.h>
6993 +#include <linux/netdevice.h>
6994 +#include <net/ip.h>
6995 +
6996 +#include "tuxonice.h"
6997 +#include "tuxonice_modules.h"
6998 +#include "tuxonice_sysfs.h"
6999 +#include "tuxonice_alloc.h"
7000 +#include "tuxonice_io.h"
7001 +
7002 +#if 1
7003 +#define PRINTK(a, b...) do { printk(a, ##b); } while (0)
7004 +#else
7005 +#define PRINTK(a, b...) do { } while (0)
7006 +#endif
7007 +
7008 +static int loopback_mode;
7009 +static int num_local_nodes = 1;
7010 +#define MAX_LOCAL_NODES 8
7011 +#define SADDR (loopback_mode ? b->sid : h->saddr)
7012 +
7013 +#define MYNAME "TuxOnIce Clustering"
7014 +
7015 +enum cluster_message {
7016 +       MSG_ACK = 1,
7017 +       MSG_NACK = 2,
7018 +       MSG_PING = 4,
7019 +       MSG_ABORT = 8,
7020 +       MSG_BYE = 16,
7021 +       MSG_HIBERNATE = 32,
7022 +       MSG_IMAGE = 64,
7023 +       MSG_IO = 128,
7024 +       MSG_RUNNING = 256
7025 +};
7026 +
7027 +static char *str_message(int message)
7028 +{
7029 +       switch (message) {
7030 +       case 4:
7031 +               return "Ping";
7032 +       case 8:
7033 +               return "Abort";
7034 +       case 9:
7035 +               return "Abort acked";
7036 +       case 10:
7037 +               return "Abort nacked";
7038 +       case 16:
7039 +               return "Bye";
7040 +       case 17:
7041 +               return "Bye acked";
7042 +       case 18:
7043 +               return "Bye nacked";
7044 +       case 32:
7045 +               return "Hibernate request";
7046 +       case 33:
7047 +               return "Hibernate ack";
7048 +       case 34:
7049 +               return "Hibernate nack";
7050 +       case 64:
7051 +               return "Image exists?";
7052 +       case 65:
7053 +               return "Image does exist";
7054 +       case 66:
7055 +               return "No image here";
7056 +       case 128:
7057 +               return "I/O";
7058 +       case 129:
7059 +               return "I/O okay";
7060 +       case 130:
7061 +               return "I/O failed";
7062 +       case 256:
7063 +               return "Running";
7064 +       default:
7065 +               printk(KERN_ERR "Unrecognised message %d.\n", message);
7066 +               return "Unrecognised message (see dmesg)";
7067 +       }
7068 +}
7069 +
7070 +#define MSG_ACK_MASK (MSG_ACK | MSG_NACK)
7071 +#define MSG_STATE_MASK (~MSG_ACK_MASK)
7072 +
7073 +struct node_info {
7074 +       struct list_head member_list;
7075 +       wait_queue_head_t member_events;
7076 +       spinlock_t member_list_lock;
7077 +       spinlock_t receive_lock;
7078 +       int peer_count, ignored_peer_count;
7079 +       struct toi_sysfs_data sysfs_data;
7080 +       enum cluster_message current_message;
7081 +};
7082 +
7083 +struct node_info node_array[MAX_LOCAL_NODES];
7084 +
7085 +struct cluster_member {
7086 +       __be32 addr;
7087 +       enum cluster_message message;
7088 +       struct list_head list;
7089 +       int ignore;
7090 +};
7091 +
7092 +#define toi_cluster_port_send 3501
7093 +#define toi_cluster_port_recv 3502
7094 +
7095 +static struct net_device *net_dev;
7096 +static struct toi_module_ops toi_cluster_ops;
7097 +
7098 +static int toi_recv(struct sk_buff *skb, struct net_device *dev,
7099 +               struct packet_type *pt, struct net_device *orig_dev);
7100 +
7101 +static struct packet_type toi_cluster_packet_type = {
7102 +       .type = __constant_htons(ETH_P_IP),
7103 +       .func = toi_recv,
7104 +};
7105 +
7106 +struct toi_pkt {               /* BOOTP packet format */
7107 +       struct iphdr iph;       /* IP header */
7108 +       struct udphdr udph;     /* UDP header */
7109 +       u8 htype;               /* HW address type */
7110 +       u8 hlen;                /* HW address length */
7111 +       __be32 xid;             /* Transaction ID */
7112 +       __be16 secs;            /* Seconds since we started */
7113 +       __be16 flags;           /* Just what it says */
7114 +       u8 hw_addr[16];         /* Sender's HW address */
7115 +       u16 message;            /* Message */
7116 +       unsigned long sid;      /* Source ID for loopback testing */
7117 +};
7118 +
7119 +static char toi_cluster_iface[IFNAMSIZ] = CONFIG_TOI_DEFAULT_CLUSTER_INTERFACE;
7120 +
7121 +static int added_pack;
7122 +
7123 +static int others_have_image;
7124 +
7125 +/* Key used to allow multiple clusters on the same lan */
7126 +static char toi_cluster_key[32] = CONFIG_TOI_DEFAULT_CLUSTER_KEY;
7127 +static char pre_hibernate_script[255] =
7128 +       CONFIG_TOI_DEFAULT_CLUSTER_PRE_HIBERNATE;
7129 +static char post_hibernate_script[255] =
7130 +       CONFIG_TOI_DEFAULT_CLUSTER_POST_HIBERNATE;
7131 +
7132 +/*                     List of cluster members                 */
7133 +static unsigned long continue_delay = 5 * HZ;
7134 +static unsigned long cluster_message_timeout = 3 * HZ;
7135 +
7136 +/*             === Membership list ===         */
7137 +
7138 +static void print_member_info(int index)
7139 +{
7140 +       struct cluster_member *this;
7141 +
7142 +       printk(KERN_INFO "==> Dumping node %d.\n", index);
7143 +
7144 +       list_for_each_entry(this, &node_array[index].member_list, list)
7145 +               printk(KERN_INFO "%d.%d.%d.%d last message %s. %s\n",
7146 +                               NIPQUAD(this->addr),
7147 +                               str_message(this->message),
7148 +                               this->ignore ? "(Ignored)" : "");
7149 +       printk(KERN_INFO "== Done ==\n");
7150 +}
7151 +
7152 +static struct cluster_member *__find_member(int index, __be32 addr)
7153 +{
7154 +       struct cluster_member *this;
7155 +
7156 +       list_for_each_entry(this, &node_array[index].member_list, list) {
7157 +               if (this->addr != addr)
7158 +                       continue;
7159 +
7160 +               return this;
7161 +       }
7162 +
7163 +       return NULL;
7164 +}
7165 +
7166 +static void set_ignore(int index, __be32 addr, struct cluster_member *this)
7167 +{
7168 +       if (this->ignore) {
7169 +               PRINTK("Node %d already ignoring %d.%d.%d.%d.\n",
7170 +                               index, NIPQUAD(addr));
7171 +               return;
7172 +       }
7173 +
7174 +       PRINTK("Node %d sees node %d.%d.%d.%d now being ignored.\n",
7175 +                               index, NIPQUAD(addr));
7176 +       this->ignore = 1;
7177 +       node_array[index].ignored_peer_count++;
7178 +}
7179 +
7180 +static int __add_update_member(int index, __be32 addr, int message)
7181 +{
7182 +       struct cluster_member *this;
7183 +
7184 +       this = __find_member(index, addr);
7185 +       if (this) {
7186 +               if (this->message != message) {
7187 +                       this->message = message;
7188 +                       if ((message & MSG_NACK) &&
7189 +                           (message & (MSG_HIBERNATE | MSG_IMAGE | MSG_IO)))
7190 +                               set_ignore(index, addr, this);
7191 +                       PRINTK("Node %d sees node %d.%d.%d.%d now sending "
7192 +                                       "%s.\n", index, NIPQUAD(addr),
7193 +                                       str_message(message));
7194 +                       wake_up(&node_array[index].member_events);
7195 +               }
7196 +               return 0;
7197 +       }
7198 +
7199 +       this = (struct cluster_member *) toi_kzalloc(36,
7200 +                       sizeof(struct cluster_member), GFP_KERNEL);
7201 +
7202 +       if (!this)
7203 +               return -1;
7204 +
7205 +       this->addr = addr;
7206 +       this->message = message;
7207 +       this->ignore = 0;
7208 +       INIT_LIST_HEAD(&this->list);
7209 +
7210 +       node_array[index].peer_count++;
7211 +
7212 +       PRINTK("Node %d sees node %d.%d.%d.%d sending %s.\n", index,
7213 +                       NIPQUAD(addr), str_message(message));
7214 +
7215 +       if ((message & MSG_NACK) &&
7216 +           (message & (MSG_HIBERNATE | MSG_IMAGE | MSG_IO)))
7217 +               set_ignore(index, addr, this);
7218 +       list_add_tail(&this->list, &node_array[index].member_list);
7219 +       return 1;
7220 +}
7221 +
7222 +static int add_update_member(int index, __be32 addr, int message)
7223 +{
7224 +       int result;
7225 +       unsigned long flags;
7226 +       spin_lock_irqsave(&node_array[index].member_list_lock, flags);
7227 +       result = __add_update_member(index, addr, message);
7228 +       spin_unlock_irqrestore(&node_array[index].member_list_lock, flags);
7229 +
7230 +       print_member_info(index);
7231 +
7232 +       wake_up(&node_array[index].member_events);
7233 +
7234 +       return result;
7235 +}
7236 +
7237 +static void del_member(int index, __be32 addr)
7238 +{
7239 +       struct cluster_member *this;
7240 +       unsigned long flags;
7241 +
7242 +       spin_lock_irqsave(&node_array[index].member_list_lock, flags);
7243 +       this = __find_member(index, addr);
7244 +
7245 +       if (this) {
7246 +               list_del_init(&this->list);
7247 +               toi_kfree(36, this, sizeof(*this));
7248 +               node_array[index].peer_count--;
7249 +       }
7250 +
7251 +       spin_unlock_irqrestore(&node_array[index].member_list_lock, flags);
7252 +}
7253 +
7254 +/*             === Message transmission ===    */
7255 +
7256 +static void toi_send_if(int message, unsigned long my_id);
7257 +
7258 +/*
7259 + *  Process received TOI packet.
7260 + */
7261 +static int toi_recv(struct sk_buff *skb, struct net_device *dev,
7262 +               struct packet_type *pt, struct net_device *orig_dev)
7263 +{
7264 +       struct toi_pkt *b;
7265 +       struct iphdr *h;
7266 +       int len, result, index;
7267 +       unsigned long addr, message, ack;
7268 +
7269 +       /* Perform verifications before taking the lock.  */
7270 +       if (skb->pkt_type == PACKET_OTHERHOST)
7271 +               goto drop;
7272 +
7273 +       if (dev != net_dev)
7274 +               goto drop;
7275 +
7276 +       skb = skb_share_check(skb, GFP_ATOMIC);
7277 +       if (!skb)
7278 +               return NET_RX_DROP;
7279 +
7280 +       if (!pskb_may_pull(skb,
7281 +                          sizeof(struct iphdr) +
7282 +                          sizeof(struct udphdr)))
7283 +               goto drop;
7284 +
7285 +       b = (struct toi_pkt *)skb_network_header(skb);
7286 +       h = &b->iph;
7287 +
7288 +       if (h->ihl != 5 || h->version != 4 || h->protocol != IPPROTO_UDP)
7289 +               goto drop;
7290 +
7291 +       /* Fragments are not supported */
7292 +       if (h->frag_off & htons(IP_OFFSET | IP_MF)) {
7293 +               if (net_ratelimit())
7294 +                       printk(KERN_ERR "TuxOnIce: Ignoring fragmented "
7295 +                              "cluster message.\n");
7296 +               goto drop;
7297 +       }
7298 +
7299 +       if (skb->len < ntohs(h->tot_len))
7300 +               goto drop;
7301 +
7302 +       if (ip_fast_csum((char *) h, h->ihl))
7303 +               goto drop;
7304 +
7305 +       if (b->udph.source != htons(toi_cluster_port_send) ||
7306 +           b->udph.dest != htons(toi_cluster_port_recv))
7307 +               goto drop;
7308 +
7309 +       if (ntohs(h->tot_len) < ntohs(b->udph.len) + sizeof(struct iphdr))
7310 +               goto drop;
7311 +
7312 +       len = ntohs(b->udph.len) - sizeof(struct udphdr);
7313 +
7314 +       /* Ok the front looks good, make sure we can get at the rest.  */
7315 +       if (!pskb_may_pull(skb, skb->len))
7316 +               goto drop;
7317 +
7318 +       b = (struct toi_pkt *)skb_network_header(skb);
7319 +       h = &b->iph;
7320 +
7321 +       addr = SADDR;
7322 +       PRINTK(">>> Message %s received from " NIPQUAD_FMT ".\n",
7323 +                       str_message(b->message), NIPQUAD(addr));
7324 +
7325 +       message = b->message & MSG_STATE_MASK;
7326 +       ack = b->message & MSG_ACK_MASK;
7327 +
7328 +       for (index = 0; index < num_local_nodes; index++) {
7329 +               int new_message = node_array[index].current_message,
7330 +                   old_message = new_message;
7331 +
7332 +               if (index == SADDR || !old_message) {
7333 +                       PRINTK("Ignoring node %d (offline or self).\n", index);
7334 +                       continue;
7335 +               }
7336 +
7337 +               /* One message at a time, please. */
7338 +               spin_lock(&node_array[index].receive_lock);
7339 +
7340 +               result = add_update_member(index, SADDR, b->message);
7341 +               if (result == -1) {
7342 +                       printk(KERN_INFO "Failed to add new cluster member "
7343 +                                       NIPQUAD_FMT ".\n",
7344 +                                       NIPQUAD(addr));
7345 +                       goto drop_unlock;
7346 +               }
7347 +
7348 +               switch (b->message & MSG_STATE_MASK) {
7349 +               case MSG_PING:
7350 +                       break;
7351 +               case MSG_ABORT:
7352 +                       break;
7353 +               case MSG_BYE:
7354 +                       break;
7355 +               case MSG_HIBERNATE:
7356 +                       /* Can I hibernate? */
7357 +                       new_message = MSG_HIBERNATE |
7358 +                               ((index & 1) ? MSG_NACK : MSG_ACK);
7359 +                       break;
7360 +               case MSG_IMAGE:
7361 +                       /* Can I resume? */
7362 +                       new_message = MSG_IMAGE |
7363 +                               ((index & 1) ? MSG_NACK : MSG_ACK);
7364 +                       if (new_message != old_message)
7365 +                               printk(KERN_ERR "Setting whether I can resume "
7366 +                                               "to %d.\n", new_message);
7367 +                       break;
7368 +               case MSG_IO:
7369 +                       new_message = MSG_IO | MSG_ACK;
7370 +                       break;
7371 +               case MSG_RUNNING:
7372 +                       break;
7373 +               default:
7374 +                       if (net_ratelimit())
7375 +                               printk(KERN_ERR "Unrecognised TuxOnIce cluster"
7376 +                                       " message %d from " NIPQUAD_FMT ".\n",
7377 +                                       b->message, NIPQUAD(addr));
7378 +               };
7379 +
7380 +               if (old_message != new_message) {
7381 +                       node_array[index].current_message = new_message;
7382 +                       printk(KERN_INFO ">>> Sending new message for node "
7383 +                                       "%d.\n", index);
7384 +                       toi_send_if(new_message, index);
7385 +               } else if (!ack) {
7386 +                       printk(KERN_INFO ">>> Resending message for node %d.\n",
7387 +                                       index);
7388 +                       toi_send_if(new_message, index);
7389 +               }
7390 +drop_unlock:
7391 +               spin_unlock(&node_array[index].receive_lock);
7392 +       };
7393 +
7394 +drop:
7395 +       /* Throw the packet out. */
7396 +       kfree_skb(skb);
7397 +
7398 +       return 0;
7399 +}
7400 +
7401 +/*
7402 + *  Send cluster message to single interface.
7403 + */
7404 +static void toi_send_if(int message, unsigned long my_id)
7405 +{
7406 +       struct sk_buff *skb;
7407 +       struct toi_pkt *b;
7408 +       int hh_len = LL_RESERVED_SPACE(net_dev);
7409 +       struct iphdr *h;
7410 +
7411 +       /* Allocate packet */
7412 +       skb = alloc_skb(sizeof(struct toi_pkt) + hh_len + 15, GFP_KERNEL);
7413 +       if (!skb)
7414 +               return;
7415 +       skb_reserve(skb, hh_len);
7416 +       b = (struct toi_pkt *) skb_put(skb, sizeof(struct toi_pkt));
7417 +       memset(b, 0, sizeof(struct toi_pkt));
7418 +
7419 +       /* Construct IP header */
7420 +       skb_reset_network_header(skb);
7421 +       h = ip_hdr(skb);
7422 +       h->version = 4;
7423 +       h->ihl = 5;
7424 +       h->tot_len = htons(sizeof(struct toi_pkt));
7425 +       h->frag_off = htons(IP_DF);
7426 +       h->ttl = 64;
7427 +       h->protocol = IPPROTO_UDP;
7428 +       h->daddr = htonl(INADDR_BROADCAST);
7429 +       h->check = ip_fast_csum((unsigned char *) h, h->ihl);
7430 +
7431 +       /* Construct UDP header */
7432 +       b->udph.source = htons(toi_cluster_port_send);
7433 +       b->udph.dest = htons(toi_cluster_port_recv);
7434 +       b->udph.len = htons(sizeof(struct toi_pkt) - sizeof(struct iphdr));
7435 +       /* UDP checksum not calculated -- explicitly allowed in BOOTP RFC */
7436 +
7437 +       /* Construct message */
7438 +       b->message = message;
7439 +       b->sid = my_id;
7440 +       b->htype = net_dev->type; /* can cause undefined behavior */
7441 +       b->hlen = net_dev->addr_len;
7442 +       memcpy(b->hw_addr, net_dev->dev_addr, net_dev->addr_len);
7443 +       b->secs = htons(3); /* 3 seconds */
7444 +
7445 +       /* Chain packet down the line... */
7446 +       skb->dev = net_dev;
7447 +       skb->protocol = htons(ETH_P_IP);
7448 +       if ((dev_hard_header(skb, net_dev, ntohs(skb->protocol),
7449 +                    net_dev->broadcast, net_dev->dev_addr, skb->len) < 0) ||
7450 +                       dev_queue_xmit(skb) < 0)
7451 +               printk(KERN_INFO "E");
7452 +}
7453 +
7454 +/*     =========================================               */
7455 +
7456 +/*                     kTOICluster                     */
7457 +
7458 +static atomic_t num_cluster_threads;
7459 +static DECLARE_WAIT_QUEUE_HEAD(clusterd_events);
7460 +
7461 +static int kTOICluster(void *data)
7462 +{
7463 +       unsigned long my_id;
7464 +
7465 +       my_id = atomic_add_return(1, &num_cluster_threads) - 1;
7466 +       node_array[my_id].current_message = (unsigned long) data;
7467 +
7468 +       PRINTK("kTOICluster daemon %lu starting.\n", my_id);
7469 +
7470 +       current->flags |= PF_NOFREEZE;
7471 +
7472 +       while (node_array[my_id].current_message) {
7473 +               toi_send_if(node_array[my_id].current_message, my_id);
7474 +               sleep_on_timeout(&clusterd_events,
7475 +                               cluster_message_timeout);
7476 +               PRINTK("Link state %lu is %d.\n", my_id,
7477 +                               node_array[my_id].current_message);
7478 +       }
7479 +
7480 +       toi_send_if(MSG_BYE, my_id);
7481 +       atomic_dec(&num_cluster_threads);
7482 +       wake_up(&clusterd_events);
7483 +
7484 +       PRINTK("kTOICluster daemon %lu exiting.\n", my_id);
7485 +       __set_current_state(TASK_RUNNING);
7486 +       return 0;
7487 +}
7488 +
7489 +static void kill_clusterd(void)
7490 +{
7491 +       int i;
7492 +
7493 +       for (i = 0; i < num_local_nodes; i++) {
7494 +               if (node_array[i].current_message) {
7495 +                       PRINTK("Seeking to kill clusterd %d.\n", i);
7496 +                       node_array[i].current_message = 0;
7497 +               }
7498 +       }
7499 +       wait_event(clusterd_events,
7500 +                       !atomic_read(&num_cluster_threads));
7501 +       PRINTK("All cluster daemons have exited.\n");
7502 +}
7503 +
7504 +static int peers_not_in_message(int index, int message, int precise)
7505 +{
7506 +       struct cluster_member *this;
7507 +       unsigned long flags;
7508 +       int result = 0;
7509 +
7510 +       spin_lock_irqsave(&node_array[index].member_list_lock, flags);
7511 +       list_for_each_entry(this, &node_array[index].member_list, list) {
7512 +               if (this->ignore)
7513 +                       continue;
7514 +
7515 +               PRINTK("Peer %d.%d.%d.%d sending %s. "
7516 +                       "Seeking %s.\n",
7517 +                       NIPQUAD(this->addr),
7518 +                       str_message(this->message), str_message(message));
7519 +               if ((precise ? this->message :
7520 +                                       this->message & MSG_STATE_MASK) !=
7521 +                                       message)
7522 +                       result++;
7523 +       }
7524 +       spin_unlock_irqrestore(&node_array[index].member_list_lock, flags);
7525 +       PRINTK("%d peers in sought message.\n", result);
7526 +       return result;
7527 +}
7528 +
7529 +static void reset_ignored(int index)
7530 +{
7531 +       struct cluster_member *this;
7532 +       unsigned long flags;
7533 +
7534 +       spin_lock_irqsave(&node_array[index].member_list_lock, flags);
7535 +       list_for_each_entry(this, &node_array[index].member_list, list)
7536 +               this->ignore = 0;
7537 +       node_array[index].ignored_peer_count = 0;
7538 +       spin_unlock_irqrestore(&node_array[index].member_list_lock, flags);
7539 +}
7540 +
7541 +static int peers_in_message(int index, int message, int precise)
7542 +{
7543 +       return node_array[index].peer_count -
7544 +               node_array[index].ignored_peer_count -
7545 +               peers_not_in_message(index, message, precise);
7546 +}
7547 +
7548 +static int time_to_continue(int index, unsigned long start, int message)
7549 +{
7550 +       int first = peers_not_in_message(index, message, 0);
7551 +       int second = peers_in_message(index, message, 1);
7552 +
7553 +       PRINTK("First part returns %d, second returns %d.\n", first, second);
7554 +
7555 +       if (!first && !second) {
7556 +               PRINTK("All peers answered message %d.\n",
7557 +                       message);
7558 +               return 1;
7559 +       }
7560 +
7561 +       if (time_after(jiffies, start + continue_delay)) {
7562 +               PRINTK("Timeout reached.\n");
7563 +               return 1;
7564 +       }
7565 +
7566 +       PRINTK("Not time to continue yet (%lu < %lu).\n", jiffies,
7567 +                       start + continue_delay);
7568 +       return 0;
7569 +}
7570 +
7571 +void toi_initiate_cluster_hibernate(void)
7572 +{
7573 +       int result;
7574 +       unsigned long start;
7575 +
7576 +       result = do_toi_step(STEP_HIBERNATE_PREPARE_IMAGE);
7577 +       if (result)
7578 +               return;
7579 +
7580 +       toi_send_if(MSG_HIBERNATE, 0);
7581 +
7582 +       start = jiffies;
7583 +       wait_event(node_array[0].member_events,
7584 +                       time_to_continue(0, start, MSG_HIBERNATE));
7585 +
7586 +       if (test_action_state(TOI_FREEZER_TEST)) {
7587 +               toi_send_if(MSG_ABORT, 0);
7588 +
7589 +               start = jiffies;
7590 +               wait_event(node_array[0].member_events,
7591 +                       time_to_continue(0, start, MSG_RUNNING));
7592 +
7593 +               do_toi_step(STEP_QUIET_CLEANUP);
7594 +               return;
7595 +       }
7596 +
7597 +       toi_send_if(MSG_IO, 0);
7598 +
7599 +       result = do_toi_step(STEP_HIBERNATE_SAVE_IMAGE);
7600 +       if (result)
7601 +               return;
7602 +
7603 +       /* This code runs at resume time too! */
7604 +       if (toi_in_hibernate)
7605 +               result = do_toi_step(STEP_HIBERNATE_POWERDOWN);
7606 +}
7607 +EXPORT_SYMBOL_GPL(toi_initiate_cluster_hibernate);
7608 +
7609 +/* toi_cluster_print_debug_stats
7610 + *
7611 + * Description:        Print information to be recorded for debugging purposes into a
7612 + *             buffer.
7613 + * Arguments:  buffer: Pointer to a buffer into which the debug info will be
7614 + *                     printed.
7615 + *             size:   Size of the buffer.
7616 + * Returns:    Number of characters written to the buffer.
7617 + */
7618 +static int toi_cluster_print_debug_stats(char *buffer, int size)
7619 +{
7620 +       int len;
7621 +
7622 +       if (strlen(toi_cluster_iface))
7623 +               len = scnprintf(buffer, size,
7624 +                               "- Cluster interface is '%s'.\n",
7625 +                               toi_cluster_iface);
7626 +       else
7627 +               len = scnprintf(buffer, size,
7628 +                               "- Cluster support is disabled.\n");
7629 +       return len;
7630 +}
7631 +
7632 +/* cluster_memory_needed
7633 + *
7634 + * Description:        Tell the caller how much memory we need to operate during
7635 + *             hibernate/resume.
7636 + * Returns:    Unsigned long. Maximum number of bytes of memory required for
7637 + *             operation.
7638 + */
7639 +static int toi_cluster_memory_needed(void)
7640 +{
7641 +       return 0;
7642 +}
7643 +
7644 +static int toi_cluster_storage_needed(void)
7645 +{
7646 +       return 1 + strlen(toi_cluster_iface);
7647 +}
7648 +
7649 +/* toi_cluster_save_config_info
7650 + *
7651 + * Description:        Save informaton needed when reloading the image at resume time.
7652 + * Arguments:  Buffer:         Pointer to a buffer of size PAGE_SIZE.
7653 + * Returns:    Number of bytes used for saving our data.
7654 + */
7655 +static int toi_cluster_save_config_info(char *buffer)
7656 +{
7657 +       strcpy(buffer, toi_cluster_iface);
7658 +       return strlen(toi_cluster_iface + 1);
7659 +}
7660 +
7661 +/* toi_cluster_load_config_info
7662 + *
7663 + * Description:        Reload information needed for declustering the image at
7664 + *             resume time.
7665 + * Arguments:  Buffer:         Pointer to the start of the data.
7666 + *             Size:           Number of bytes that were saved.
7667 + */
7668 +static void toi_cluster_load_config_info(char *buffer, int size)
7669 +{
7670 +       strncpy(toi_cluster_iface, buffer, size);
7671 +       return;
7672 +}
7673 +
7674 +static void cluster_startup(void)
7675 +{
7676 +       int have_image = do_check_can_resume(), i;
7677 +       unsigned long start = jiffies, initial_message;
7678 +       struct task_struct *p;
7679 +
7680 +       initial_message = MSG_IMAGE;
7681 +
7682 +       have_image = 1;
7683 +
7684 +       for (i = 0; i < num_local_nodes; i++) {
7685 +               PRINTK("Starting ktoiclusterd %d.\n", i);
7686 +               p = kthread_create(kTOICluster, (void *) initial_message,
7687 +                               "ktoiclusterd/%d", i);
7688 +               if (IS_ERR(p)) {
7689 +                       printk(KERN_ERR "Failed to start ktoiclusterd.\n");
7690 +                       return;
7691 +               }
7692 +
7693 +               wake_up_process(p);
7694 +       }
7695 +
7696 +       /* Wait for delay or someone else sending first message */
7697 +       wait_event(node_array[0].member_events, time_to_continue(0, start,
7698 +                               MSG_IMAGE));
7699 +
7700 +       others_have_image = peers_in_message(0, MSG_IMAGE | MSG_ACK, 1);
7701 +
7702 +       printk(KERN_INFO "Continuing. I %shave an image. Peers with image:"
7703 +               " %d.\n", have_image ? "" : "don't ", others_have_image);
7704 +
7705 +       if (have_image) {
7706 +               int result;
7707 +
7708 +               /* Start to resume */
7709 +               printk(KERN_INFO "  === Starting to resume ===  \n");
7710 +               node_array[0].current_message = MSG_IO;
7711 +               toi_send_if(MSG_IO, 0);
7712 +
7713 +               /* result = do_toi_step(STEP_RESUME_LOAD_PS1); */
7714 +               result = 0;
7715 +
7716 +               if (!result) {
7717 +                       /*
7718 +                        * Atomic restore - we'll come back in the hibernation
7719 +                        * path.
7720 +                        */
7721 +
7722 +                       /* result = do_toi_step(STEP_RESUME_DO_RESTORE); */
7723 +                       result = 0;
7724 +
7725 +                       /* do_toi_step(STEP_QUIET_CLEANUP); */
7726 +               }
7727 +
7728 +               node_array[0].current_message |= MSG_NACK;
7729 +
7730 +               /* For debugging - disable for real life? */
7731 +               wait_event(node_array[0].member_events,
7732 +                               time_to_continue(0, start, MSG_IO));
7733 +       }
7734 +
7735 +       if (others_have_image) {
7736 +               /* Wait for them to resume */
7737 +               printk(KERN_INFO "Waiting for other nodes to resume.\n");
7738 +               start = jiffies;
7739 +               wait_event(node_array[0].member_events,
7740 +                               time_to_continue(0, start, MSG_RUNNING));
7741 +               if (peers_not_in_message(0, MSG_RUNNING, 0))
7742 +                       printk(KERN_INFO "Timed out while waiting for other "
7743 +                                       "nodes to resume.\n");
7744 +       }
7745 +
7746 +       /* Find out whether an image exists here. Send ACK_IMAGE or NACK_IMAGE
7747 +        * as appropriate.
7748 +        *
7749 +        * If we don't have an image:
7750 +        * - Wait until someone else says they have one, or conditions are met
7751 +        *   for continuing to boot (n machines or t seconds).
7752 +        * - If anyone has an image, wait for them to resume before continuing
7753 +        *   to boot.
7754 +        *
7755 +        * If we have an image:
7756 +        * - Wait until conditions are met before continuing to resume (n
7757 +        *   machines or t seconds). Send RESUME_PREP and freeze processes.
7758 +        *   NACK_PREP if freezing fails (shouldn't) and follow logic for
7759 +        *   us having no image above. On success, wait for [N]ACK_PREP from
7760 +        *   other machines. Read image (including atomic restore) until done.
7761 +        *   Wait for ACK_READ from others (should never fail). Thaw processes
7762 +        *   and do post-resume. (The section after the atomic restore is done
7763 +        *   via the code for hibernating).
7764 +        */
7765 +
7766 +       node_array[0].current_message = MSG_RUNNING;
7767 +}
7768 +
7769 +/* toi_cluster_open_iface
7770 + *
7771 + * Description:        Prepare to use an interface.
7772 + */
7773 +
7774 +static int toi_cluster_open_iface(void)
7775 +{
7776 +       struct net_device *dev;
7777 +
7778 +       rtnl_lock();
7779 +
7780 +       for_each_netdev(&init_net, dev) {
7781 +               if (/* dev == &init_net.loopback_dev || */
7782 +                   strcmp(dev->name, toi_cluster_iface))
7783 +                       continue;
7784 +
7785 +               net_dev = dev;
7786 +               break;
7787 +       }
7788 +
7789 +       rtnl_unlock();
7790 +
7791 +       if (!net_dev) {
7792 +               printk(KERN_ERR MYNAME ": Device %s not found.\n",
7793 +                               toi_cluster_iface);
7794 +               return -ENODEV;
7795 +       }
7796 +
7797 +       dev_add_pack(&toi_cluster_packet_type);
7798 +       added_pack = 1;
7799 +
7800 +       loopback_mode = (net_dev == init_net.loopback_dev);
7801 +       num_local_nodes = loopback_mode ? 8 : 1;
7802 +
7803 +       PRINTK("Loopback mode is %s. Number of local nodes is %d.\n",
7804 +                       loopback_mode ? "on" : "off", num_local_nodes);
7805 +
7806 +       cluster_startup();
7807 +       return 0;
7808 +}
7809 +
7810 +/* toi_cluster_close_iface
7811 + *
7812 + * Description: Stop using an interface.
7813 + */
7814 +
7815 +static int toi_cluster_close_iface(void)
7816 +{
7817 +       kill_clusterd();
7818 +       if (added_pack) {
7819 +               dev_remove_pack(&toi_cluster_packet_type);
7820 +               added_pack = 0;
7821 +       }
7822 +       return 0;
7823 +}
7824 +
7825 +static void write_side_effect(void)
7826 +{
7827 +       if (toi_cluster_ops.enabled) {
7828 +               toi_cluster_open_iface();
7829 +               set_toi_state(TOI_CLUSTER_MODE);
7830 +       } else {
7831 +               toi_cluster_close_iface();
7832 +               clear_toi_state(TOI_CLUSTER_MODE);
7833 +       }
7834 +}
7835 +
7836 +static void node_write_side_effect(void)
7837 +{
7838 +}
7839 +
7840 +/*
7841 + * data for our sysfs entries.
7842 + */
7843 +static struct toi_sysfs_data sysfs_params[] = {
7844 +       SYSFS_STRING("interface", SYSFS_RW, toi_cluster_iface, IFNAMSIZ, 0,
7845 +                       NULL),
7846 +       SYSFS_INT("enabled", SYSFS_RW, &toi_cluster_ops.enabled, 0, 1, 0,
7847 +                       write_side_effect),
7848 +       SYSFS_STRING("cluster_name", SYSFS_RW, toi_cluster_key, 32, 0, NULL),
7849 +       SYSFS_STRING("pre-hibernate-script", SYSFS_RW, pre_hibernate_script,
7850 +                       256, 0, NULL),
7851 +       SYSFS_STRING("post-hibernate-script", SYSFS_RW, post_hibernate_script,
7852 +                       256, 0, STRING),
7853 +       SYSFS_UL("continue_delay", SYSFS_RW, &continue_delay, HZ / 2, 60 * HZ,
7854 +                       0)
7855 +};
7856 +
7857 +/*
7858 + * Ops structure.
7859 + */
7860 +
7861 +static struct toi_module_ops toi_cluster_ops = {
7862 +       .type                   = FILTER_MODULE,
7863 +       .name                   = "Cluster",
7864 +       .directory              = "cluster",
7865 +       .module                 = THIS_MODULE,
7866 +       .memory_needed          = toi_cluster_memory_needed,
7867 +       .print_debug_info       = toi_cluster_print_debug_stats,
7868 +       .save_config_info       = toi_cluster_save_config_info,
7869 +       .load_config_info       = toi_cluster_load_config_info,
7870 +       .storage_needed         = toi_cluster_storage_needed,
7871 +
7872 +       .sysfs_data             = sysfs_params,
7873 +       .num_sysfs_entries      = sizeof(sysfs_params) /
7874 +               sizeof(struct toi_sysfs_data),
7875 +};
7876 +
7877 +/* ---- Registration ---- */
7878 +
7879 +#ifdef MODULE
7880 +#define INIT static __init
7881 +#define EXIT static __exit
7882 +#else
7883 +#define INIT
7884 +#define EXIT
7885 +#endif
7886 +
7887 +INIT int toi_cluster_init(void)
7888 +{
7889 +       int temp = toi_register_module(&toi_cluster_ops), i;
7890 +       struct kobject *kobj = toi_cluster_ops.dir_kobj;
7891 +
7892 +       for (i = 0; i < MAX_LOCAL_NODES; i++) {
7893 +               node_array[i].current_message = 0;
7894 +               INIT_LIST_HEAD(&node_array[i].member_list);
7895 +               init_waitqueue_head(&node_array[i].member_events);
7896 +               spin_lock_init(&node_array[i].member_list_lock);
7897 +               spin_lock_init(&node_array[i].receive_lock);
7898 +
7899 +               /* Set up sysfs entry */
7900 +               node_array[i].sysfs_data.attr.name = toi_kzalloc(8,
7901 +                               sizeof(node_array[i].sysfs_data.attr.name),
7902 +                               GFP_KERNEL);
7903 +               sprintf((char *) node_array[i].sysfs_data.attr.name, "node_%d",
7904 +                               i);
7905 +               node_array[i].sysfs_data.attr.mode = SYSFS_RW;
7906 +               node_array[i].sysfs_data.type = TOI_SYSFS_DATA_INTEGER;
7907 +               node_array[i].sysfs_data.flags = 0;
7908 +               node_array[i].sysfs_data.data.integer.variable =
7909 +                       (int *) &node_array[i].current_message;
7910 +               node_array[i].sysfs_data.data.integer.minimum = 0;
7911 +               node_array[i].sysfs_data.data.integer.maximum = INT_MAX;
7912 +               node_array[i].sysfs_data.write_side_effect =
7913 +                       node_write_side_effect;
7914 +               toi_register_sysfs_file(kobj, &node_array[i].sysfs_data);
7915 +       }
7916 +
7917 +       toi_cluster_ops.enabled = (strlen(toi_cluster_iface) > 0);
7918 +
7919 +       if (toi_cluster_ops.enabled)
7920 +               toi_cluster_open_iface();
7921 +
7922 +       return temp;
7923 +}
7924 +
7925 +EXIT void toi_cluster_exit(void)
7926 +{
7927 +       int i;
7928 +       toi_cluster_close_iface();
7929 +
7930 +       for (i = 0; i < MAX_LOCAL_NODES; i++)
7931 +               toi_unregister_sysfs_file(toi_cluster_ops.dir_kobj,
7932 +                               &node_array[i].sysfs_data);
7933 +       toi_unregister_module(&toi_cluster_ops);
7934 +}
7935 +
7936 +static int __init toi_cluster_iface_setup(char *iface)
7937 +{
7938 +       toi_cluster_ops.enabled = (*iface &&
7939 +                       strcmp(iface, "off"));
7940 +
7941 +       if (toi_cluster_ops.enabled)
7942 +               strncpy(toi_cluster_iface, iface, strlen(iface));
7943 +}
7944 +
7945 +__setup("toi_cluster=", toi_cluster_iface_setup);
7946 +
7947 +#ifdef MODULE
7948 +MODULE_LICENSE("GPL");
7949 +module_init(toi_cluster_init);
7950 +module_exit(toi_cluster_exit);
7951 +MODULE_AUTHOR("Nigel Cunningham");
7952 +MODULE_DESCRIPTION("Cluster Support for TuxOnIce");
7953 +#endif
7954 diff --git a/kernel/power/tuxonice_cluster.h b/kernel/power/tuxonice_cluster.h
7955 new file mode 100644
7956 index 0000000..b0f8918
7957 --- /dev/null
7958 +++ b/kernel/power/tuxonice_cluster.h
7959 @@ -0,0 +1,19 @@
7960 +/*
7961 + * kernel/power/tuxonice_cluster.h
7962 + *
7963 + * Copyright (C) 2006-2008 Nigel Cunningham (nigel at tuxonice net)
7964 + * Copyright (C) 2006 Red Hat, inc.
7965 + *
7966 + * This file is released under the GPLv2.
7967 + */
7968 +
7969 +#ifdef CONFIG_TOI_CLUSTER
7970 +extern int toi_cluster_init(void);
7971 +extern void toi_cluster_exit(void);
7972 +extern void toi_initiate_cluster_hibernate(void);
7973 +#else
7974 +static inline int toi_cluster_init(void) { return 0; }
7975 +static inline void toi_cluster_exit(void) { }
7976 +static inline void toi_initiate_cluster_hibernate(void) { }
7977 +#endif
7978 +
7979 diff --git a/kernel/power/tuxonice_compress.c b/kernel/power/tuxonice_compress.c
7980 new file mode 100644
7981 index 0000000..8acdf65
7982 --- /dev/null
7983 +++ b/kernel/power/tuxonice_compress.c
7984 @@ -0,0 +1,447 @@
7985 +/*
7986 + * kernel/power/compression.c
7987 + *
7988 + * Copyright (C) 2003-2008 Nigel Cunningham (nigel at tuxonice net)
7989 + *
7990 + * This file is released under the GPLv2.
7991 + *
7992 + * This file contains data compression routines for TuxOnIce,
7993 + * using cryptoapi.
7994 + */
7995 +
7996 +#include <linux/suspend.h>
7997 +#include <linux/highmem.h>
7998 +#include <linux/vmalloc.h>
7999 +#include <linux/crypto.h>
8000 +
8001 +#include "tuxonice_builtin.h"
8002 +#include "tuxonice.h"
8003 +#include "tuxonice_modules.h"
8004 +#include "tuxonice_sysfs.h"
8005 +#include "tuxonice_io.h"
8006 +#include "tuxonice_ui.h"
8007 +#include "tuxonice_alloc.h"
8008 +
8009 +static int toi_expected_compression;
8010 +
8011 +static struct toi_module_ops toi_compression_ops;
8012 +static struct toi_module_ops *next_driver;
8013 +
8014 +static char toi_compressor_name[32] = "lzo";
8015 +
8016 +static DEFINE_MUTEX(stats_lock);
8017 +
8018 +struct cpu_context {
8019 +       u8 *page_buffer;
8020 +       struct crypto_comp *transform;
8021 +       unsigned int len;
8022 +       char *buffer_start;
8023 +       char *output_buffer;
8024 +};
8025 +
8026 +static DEFINE_PER_CPU(struct cpu_context, contexts);
8027 +
8028 +static int toi_compress_prepare_result;
8029 +
8030 +/*
8031 + * toi_compress_cleanup
8032 + *
8033 + * Frees memory allocated for our labours.
8034 + */
8035 +static void toi_compress_cleanup(int toi_or_resume)
8036 +{
8037 +       int cpu;
8038 +
8039 +       if (!toi_or_resume)
8040 +               return;
8041 +
8042 +       for_each_online_cpu(cpu) {
8043 +               struct cpu_context *this = &per_cpu(contexts, cpu);
8044 +               if (this->transform) {
8045 +                       crypto_free_comp(this->transform);
8046 +                       this->transform = NULL;
8047 +               }
8048 +
8049 +               if (this->page_buffer)
8050 +                       toi_free_page(16, (unsigned long) this->page_buffer);
8051 +
8052 +               this->page_buffer = NULL;
8053 +
8054 +               if (this->output_buffer)
8055 +                       vfree(this->output_buffer);
8056 +
8057 +               this->output_buffer = NULL;
8058 +       }
8059 +}
8060 +
8061 +/*
8062 + * toi_crypto_prepare
8063 + *
8064 + * Prepare to do some work by allocating buffers and transforms.
8065 + */
8066 +static int toi_compress_crypto_prepare(void)
8067 +{
8068 +       int cpu;
8069 +
8070 +       if (!*toi_compressor_name) {
8071 +               printk(KERN_INFO "TuxOnIce: Compression enabled but no "
8072 +                               "compressor name set.\n");
8073 +               return 1;
8074 +       }
8075 +
8076 +       for_each_online_cpu(cpu) {
8077 +               struct cpu_context *this = &per_cpu(contexts, cpu);
8078 +               this->transform = crypto_alloc_comp(toi_compressor_name, 0, 0);
8079 +               if (IS_ERR(this->transform)) {
8080 +                       printk(KERN_INFO "TuxOnIce: Failed to initialise the "
8081 +                                       "%s compression transform.\n",
8082 +                                       toi_compressor_name);
8083 +                       this->transform = NULL;
8084 +                       return 1;
8085 +               }
8086 +
8087 +               this->page_buffer =
8088 +                       (char *) toi_get_zeroed_page(16, TOI_ATOMIC_GFP);
8089 +
8090 +               if (!this->page_buffer) {
8091 +                       printk(KERN_ERR
8092 +                         "Failed to allocate a page buffer for TuxOnIce "
8093 +                         "compression driver.\n");
8094 +                       return -ENOMEM;
8095 +               }
8096 +
8097 +               this->output_buffer =
8098 +                       (char *) vmalloc_32(2 * PAGE_SIZE);
8099 +
8100 +               if (!this->output_buffer) {
8101 +                       printk(KERN_ERR
8102 +                         "Failed to allocate a output buffer for TuxOnIce "
8103 +                         "compression driver.\n");
8104 +                       return -ENOMEM;
8105 +               }
8106 +
8107 +       }
8108 +
8109 +       return 0;
8110 +}
8111 +
8112 +/*
8113 + * toi_compress_init
8114 + */
8115 +
8116 +static int toi_compress_init(int toi_or_resume)
8117 +{
8118 +       if (!toi_or_resume)
8119 +               return 0;
8120 +
8121 +       toi_compress_bytes_in = 0;
8122 +       toi_compress_bytes_out = 0;
8123 +
8124 +       next_driver = toi_get_next_filter(&toi_compression_ops);
8125 +
8126 +       if (!next_driver)
8127 +               return -ECHILD;
8128 +
8129 +       toi_compress_prepare_result = toi_compress_crypto_prepare();
8130 +
8131 +       return 0;
8132 +}
8133 +
8134 +/*
8135 + * toi_compress_rw_init()
8136 + */
8137 +
8138 +static int toi_compress_rw_init(int rw, int stream_number)
8139 +{
8140 +       if (toi_compress_prepare_result) {
8141 +               printk(KERN_ERR "Failed to initialise compression "
8142 +                               "algorithm.\n");
8143 +               if (rw == READ) {
8144 +                       printk(KERN_INFO "Unable to read the image.\n");
8145 +                       return -ENODEV;
8146 +               } else {
8147 +                       printk(KERN_INFO "Continuing without "
8148 +                               "compressing the image.\n");
8149 +                       toi_compression_ops.enabled = 0;
8150 +               }
8151 +       }
8152 +
8153 +       return 0;
8154 +}
8155 +
8156 +/*
8157 + * toi_compress_write_page()
8158 + *
8159 + * Compress a page of data, buffering output and passing on filled
8160 + * pages to the next module in the pipeline.
8161 + *
8162 + * Buffer_page:        Pointer to a buffer of size PAGE_SIZE, containing
8163 + * data to be compressed.
8164 + *
8165 + * Returns:    0 on success. Otherwise the error is that returned by later
8166 + *             modules, -ECHILD if we have a broken pipeline or -EIO if
8167 + *             zlib errs.
8168 + */
8169 +static int toi_compress_write_page(unsigned long index,
8170 +               struct page *buffer_page, unsigned int buf_size)
8171 +{
8172 +       int ret, cpu = smp_processor_id();
8173 +       struct cpu_context *ctx = &per_cpu(contexts, cpu);
8174 +
8175 +       if (!ctx->transform)
8176 +               return next_driver->write_page(index, buffer_page, buf_size);
8177 +
8178 +       ctx->buffer_start = kmap(buffer_page);
8179 +
8180 +       ctx->len = buf_size;
8181 +
8182 +       ret = crypto_comp_compress(ctx->transform,
8183 +                       ctx->buffer_start, buf_size,
8184 +                       ctx->output_buffer, &ctx->len);
8185 +
8186 +       kunmap(buffer_page);
8187 +
8188 +       mutex_lock(&stats_lock);
8189 +       toi_compress_bytes_in += buf_size;
8190 +       toi_compress_bytes_out += ctx->len;
8191 +       mutex_unlock(&stats_lock);
8192 +
8193 +       if (!ret && ctx->len < buf_size) { /* some compression */
8194 +               memcpy(ctx->page_buffer, ctx->output_buffer, ctx->len);
8195 +               return next_driver->write_page(index,
8196 +                               virt_to_page(ctx->page_buffer),
8197 +                               ctx->len);
8198 +       } else
8199 +               return next_driver->write_page(index, buffer_page, buf_size);
8200 +}
8201 +
8202 +/*
8203 + * toi_compress_read_page()
8204 + * @buffer_page: struct page *. Pointer to a buffer of size PAGE_SIZE.
8205 + *
8206 + * Retrieve data from later modules and decompress it until the input buffer
8207 + * is filled.
8208 + * Zero if successful. Error condition from me or from downstream on failure.
8209 + */
8210 +static int toi_compress_read_page(unsigned long *index,
8211 +               struct page *buffer_page, unsigned int *buf_size)
8212 +{
8213 +       int ret, cpu = smp_processor_id();
8214 +       unsigned int len;
8215 +       unsigned int outlen = PAGE_SIZE;
8216 +       char *buffer_start;
8217 +       struct cpu_context *ctx = &per_cpu(contexts, cpu);
8218 +
8219 +       if (!ctx->transform)
8220 +               return next_driver->read_page(index, buffer_page, buf_size);
8221 +
8222 +       /*
8223 +        * All our reads must be synchronous - we can't decompress
8224 +        * data that hasn't been read yet.
8225 +        */
8226 +
8227 +       *buf_size = PAGE_SIZE;
8228 +
8229 +       ret = next_driver->read_page(index, buffer_page, &len);
8230 +
8231 +       /* Error or uncompressed data */
8232 +       if (ret || len == PAGE_SIZE)
8233 +               return ret;
8234 +
8235 +       buffer_start = kmap(buffer_page);
8236 +       memcpy(ctx->page_buffer, buffer_start, len);
8237 +       ret = crypto_comp_decompress(
8238 +                       ctx->transform,
8239 +                       ctx->page_buffer,
8240 +                       len, buffer_start, &outlen);
8241 +       if (ret)
8242 +               abort_hibernate(TOI_FAILED_IO,
8243 +                       "Compress_read returned %d.\n", ret);
8244 +       else if (outlen != PAGE_SIZE) {
8245 +               abort_hibernate(TOI_FAILED_IO,
8246 +                       "Decompression yielded %d bytes instead of %ld.\n",
8247 +                       outlen, PAGE_SIZE);
8248 +               printk(KERN_ERR "Decompression yielded %d bytes instead of "
8249 +                               "%ld.\n", outlen, PAGE_SIZE);
8250 +               ret = -EIO;
8251 +               *buf_size = outlen;
8252 +       }
8253 +       kunmap(buffer_page);
8254 +       return ret;
8255 +}
8256 +
8257 +/*
8258 + * toi_compress_print_debug_stats
8259 + * @buffer: Pointer to a buffer into which the debug info will be printed.
8260 + * @size: Size of the buffer.
8261 + *
8262 + * Print information to be recorded for debugging purposes into a buffer.
8263 + * Returns: Number of characters written to the buffer.
8264 + */
8265 +
8266 +static int toi_compress_print_debug_stats(char *buffer, int size)
8267 +{
8268 +       unsigned long pages_in = toi_compress_bytes_in >> PAGE_SHIFT,
8269 +                     pages_out = toi_compress_bytes_out >> PAGE_SHIFT;
8270 +       int len;
8271 +
8272 +       /* Output the compression ratio achieved. */
8273 +       if (*toi_compressor_name)
8274 +               len = scnprintf(buffer, size, "- Compressor is '%s'.\n",
8275 +                               toi_compressor_name);
8276 +       else
8277 +               len = scnprintf(buffer, size, "- Compressor is not set.\n");
8278 +
8279 +       if (pages_in)
8280 +               len += scnprintf(buffer+len, size - len, "  Compressed "
8281 +                       "%lu bytes into %lu (%ld percent compression).\n",
8282 +                 toi_compress_bytes_in,
8283 +                 toi_compress_bytes_out,
8284 +                 (pages_in - pages_out) * 100 / pages_in);
8285 +       return len;
8286 +}
8287 +
8288 +/*
8289 + * toi_compress_compression_memory_needed
8290 + *
8291 + * Tell the caller how much memory we need to operate during hibernate/resume.
8292 + * Returns: Unsigned long. Maximum number of bytes of memory required for
8293 + * operation.
8294 + */
8295 +static int toi_compress_memory_needed(void)
8296 +{
8297 +       return 2 * PAGE_SIZE;
8298 +}
8299 +
8300 +static int toi_compress_storage_needed(void)
8301 +{
8302 +       return 4 * sizeof(unsigned long) + strlen(toi_compressor_name) + 1;
8303 +}
8304 +
8305 +/*
8306 + * toi_compress_save_config_info
8307 + * @buffer: Pointer to a buffer of size PAGE_SIZE.
8308 + *
8309 + * Save informaton needed when reloading the image at resume time.
8310 + * Returns: Number of bytes used for saving our data.
8311 + */
8312 +static int toi_compress_save_config_info(char *buffer)
8313 +{
8314 +       int namelen = strlen(toi_compressor_name) + 1;
8315 +       int total_len;
8316 +
8317 +       *((unsigned long *) buffer) = toi_compress_bytes_in;
8318 +       *((unsigned long *) (buffer + 1 * sizeof(unsigned long))) =
8319 +               toi_compress_bytes_out;
8320 +       *((unsigned long *) (buffer + 2 * sizeof(unsigned long))) =
8321 +               toi_expected_compression;
8322 +       *((unsigned long *) (buffer + 3 * sizeof(unsigned long))) = namelen;
8323 +       strncpy(buffer + 4 * sizeof(unsigned long), toi_compressor_name,
8324 +                                                               namelen);
8325 +       total_len = 4 * sizeof(unsigned long) + namelen;
8326 +       return total_len;
8327 +}
8328 +
8329 +/* toi_compress_load_config_info
8330 + * @buffer: Pointer to the start of the data.
8331 + * @size: Number of bytes that were saved.
8332 + *
8333 + * Description:        Reload information needed for decompressing the image at
8334 + * resume time.
8335 + */
8336 +static void toi_compress_load_config_info(char *buffer, int size)
8337 +{
8338 +       int namelen;
8339 +
8340 +       toi_compress_bytes_in = *((unsigned long *) buffer);
8341 +       toi_compress_bytes_out = *((unsigned long *) (buffer + 1 *
8342 +                               sizeof(unsigned long)));
8343 +       toi_expected_compression = *((unsigned long *) (buffer + 2 *
8344 +                               sizeof(unsigned long)));
8345 +       namelen = *((unsigned long *) (buffer + 3 * sizeof(unsigned long)));
8346 +       if (strncmp(toi_compressor_name, buffer + 4 * sizeof(unsigned long),
8347 +                               namelen)) {
8348 +               toi_compress_cleanup(1);
8349 +               strncpy(toi_compressor_name, buffer + 4 * sizeof(unsigned long),
8350 +                       namelen);
8351 +               toi_compress_crypto_prepare();
8352 +       }
8353 +       return;
8354 +}
8355 +
8356 +/*
8357 + * toi_expected_compression_ratio
8358 + *
8359 + * Description:        Returns the expected ratio between data passed into this module
8360 + *             and the amount of data output when writing.
8361 + * Returns:    100 if the module is disabled. Otherwise the value set by the
8362 + *             user via our sysfs entry.
8363 + */
8364 +
8365 +static int toi_compress_expected_ratio(void)
8366 +{
8367 +       if (!toi_compression_ops.enabled)
8368 +               return 100;
8369 +       else
8370 +               return 100 - toi_expected_compression;
8371 +}
8372 +
8373 +/*
8374 + * data for our sysfs entries.
8375 + */
8376 +static struct toi_sysfs_data sysfs_params[] = {
8377 +       SYSFS_INT("expected_compression", SYSFS_RW, &toi_expected_compression,
8378 +                       0, 99, 0, NULL),
8379 +       SYSFS_INT("enabled", SYSFS_RW, &toi_compression_ops.enabled, 0, 1, 0,
8380 +                       NULL),
8381 +       SYSFS_STRING("algorithm", SYSFS_RW, toi_compressor_name, 31, 0, NULL),
8382 +};
8383 +
8384 +/*
8385 + * Ops structure.
8386 + */
8387 +static struct toi_module_ops toi_compression_ops = {
8388 +       .type                   = FILTER_MODULE,
8389 +       .name                   = "compression",
8390 +       .directory              = "compression",
8391 +       .module                 = THIS_MODULE,
8392 +       .initialise             = toi_compress_init,
8393 +       .cleanup                = toi_compress_cleanup,
8394 +       .memory_needed          = toi_compress_memory_needed,
8395 +       .print_debug_info       = toi_compress_print_debug_stats,
8396 +       .save_config_info       = toi_compress_save_config_info,
8397 +       .load_config_info       = toi_compress_load_config_info,
8398 +       .storage_needed         = toi_compress_storage_needed,
8399 +       .expected_compression   = toi_compress_expected_ratio,
8400 +
8401 +       .rw_init                = toi_compress_rw_init,
8402 +
8403 +       .write_page             = toi_compress_write_page,
8404 +       .read_page              = toi_compress_read_page,
8405 +
8406 +       .sysfs_data             = sysfs_params,
8407 +       .num_sysfs_entries      = sizeof(sysfs_params) /
8408 +               sizeof(struct toi_sysfs_data),
8409 +};
8410 +
8411 +/* ---- Registration ---- */
8412 +
8413 +static __init int toi_compress_load(void)
8414 +{
8415 +       return toi_register_module(&toi_compression_ops);
8416 +}
8417 +
8418 +#ifdef MODULE
8419 +static __exit void toi_compress_unload(void)
8420 +{
8421 +       toi_unregister_module(&toi_compression_ops);
8422 +}
8423 +
8424 +module_init(toi_compress_load);
8425 +module_exit(toi_compress_unload);
8426 +MODULE_LICENSE("GPL");
8427 +MODULE_AUTHOR("Nigel Cunningham");
8428 +MODULE_DESCRIPTION("Compression Support for TuxOnIce");
8429 +#else
8430 +late_initcall(toi_compress_load);
8431 +#endif
8432 diff --git a/kernel/power/tuxonice_extent.c b/kernel/power/tuxonice_extent.c
8433 new file mode 100644
8434 index 0000000..28c421b
8435 --- /dev/null
8436 +++ b/kernel/power/tuxonice_extent.c
8437 @@ -0,0 +1,313 @@
8438 +/*
8439 + * kernel/power/tuxonice_extent.c
8440 + *
8441 + * Copyright (C) 2003-2008 Nigel Cunningham (nigel at tuxonice net)
8442 + *
8443 + * Distributed under GPLv2.
8444 + *
8445 + * These functions encapsulate the manipulation of storage metadata.
8446 + */
8447 +
8448 +#include <linux/suspend.h>
8449 +#include "tuxonice_modules.h"
8450 +#include "tuxonice_extent.h"
8451 +#include "tuxonice_alloc.h"
8452 +#include "tuxonice_ui.h"
8453 +#include "tuxonice.h"
8454 +
8455 +/**
8456 + * toi_get_extent - return a free extent
8457 + *
8458 + * May fail, returning NULL instead.
8459 + **/
8460 +static struct hibernate_extent *toi_get_extent(void)
8461 +{
8462 +       return (struct hibernate_extent *) toi_kzalloc(2,
8463 +                       sizeof(struct hibernate_extent), TOI_ATOMIC_GFP);
8464 +}
8465 +
8466 +/**
8467 + * toi_put_extent_chain - free a whole chain of extents
8468 + * @chain:     Chain to free.
8469 + **/
8470 +void toi_put_extent_chain(struct hibernate_extent_chain *chain)
8471 +{
8472 +       struct hibernate_extent *this;
8473 +
8474 +       this = chain->first;
8475 +
8476 +       while (this) {
8477 +               struct hibernate_extent *next = this->next;
8478 +               toi_kfree(2, this, sizeof(*this));
8479 +               chain->num_extents--;
8480 +               this = next;
8481 +       }
8482 +
8483 +       chain->first = NULL;
8484 +       chain->last_touched = NULL;
8485 +       chain->size = 0;
8486 +}
8487 +EXPORT_SYMBOL_GPL(toi_put_extent_chain);
8488 +
8489 +/**
8490 + * toi_add_to_extent_chain - add an extent to an existing chain
8491 + * @chain:     Chain to which the extend should be added
8492 + * @start:     Start of the extent (first physical block)
8493 + * @end:       End of the extent (last physical block)
8494 + *
8495 + * The chain information is updated if the insertion is successful.
8496 + **/
8497 +int toi_add_to_extent_chain(struct hibernate_extent_chain *chain,
8498 +               unsigned long start, unsigned long end)
8499 +{
8500 +       struct hibernate_extent *new_ext = NULL, *cur_ext = NULL;
8501 +
8502 +       /* Find the right place in the chain */
8503 +       if (chain->last_touched && chain->last_touched->start < start)
8504 +               cur_ext = chain->last_touched;
8505 +       else if (chain->first && chain->first->start < start)
8506 +               cur_ext = chain->first;
8507 +
8508 +       if (cur_ext) {
8509 +               while (cur_ext->next && cur_ext->next->start < start)
8510 +                       cur_ext = cur_ext->next;
8511 +
8512 +               if (cur_ext->end == (start - 1)) {
8513 +                       struct hibernate_extent *next_ext = cur_ext->next;
8514 +                       cur_ext->end = end;
8515 +
8516 +                       /* Merge with the following one? */
8517 +                       if (next_ext && cur_ext->end + 1 == next_ext->start) {
8518 +                               cur_ext->end = next_ext->end;
8519 +                               cur_ext->next = next_ext->next;
8520 +                               toi_kfree(2, next_ext, sizeof(*next_ext));
8521 +                               chain->num_extents--;
8522 +                       }
8523 +
8524 +                       chain->last_touched = cur_ext;
8525 +                       chain->size += (end - start + 1);
8526 +
8527 +                       return 0;
8528 +               }
8529 +       }
8530 +
8531 +       new_ext = toi_get_extent();
8532 +       if (!new_ext) {
8533 +               printk(KERN_INFO "Error unable to append a new extent to the "
8534 +                               "chain.\n");
8535 +               return -ENOMEM;
8536 +       }
8537 +
8538 +       chain->num_extents++;
8539 +       chain->size += (end - start + 1);
8540 +       new_ext->start = start;
8541 +       new_ext->end = end;
8542 +
8543 +       chain->last_touched = new_ext;
8544 +
8545 +       if (cur_ext) {
8546 +               new_ext->next = cur_ext->next;
8547 +               cur_ext->next = new_ext;
8548 +       } else {
8549 +               if (chain->first)
8550 +                       new_ext->next = chain->first;
8551 +               chain->first = new_ext;
8552 +       }
8553 +
8554 +       return 0;
8555 +}
8556 +EXPORT_SYMBOL_GPL(toi_add_to_extent_chain);
8557 +
8558 +/**
8559 + * toi_serialise_extent_chain - write a chain in the image
8560 + * @owner:     Module writing the chain.
8561 + * @chain:     Chain to write.
8562 + **/
8563 +int toi_serialise_extent_chain(struct toi_module_ops *owner,
8564 +               struct hibernate_extent_chain *chain)
8565 +{
8566 +       struct hibernate_extent *this;
8567 +       int ret, i = 0;
8568 +
8569 +       ret = toiActiveAllocator->rw_header_chunk(WRITE, owner, (char *) chain,
8570 +                       2 * sizeof(int));
8571 +       if (ret)
8572 +               return ret;
8573 +
8574 +       this = chain->first;
8575 +       while (this) {
8576 +               ret = toiActiveAllocator->rw_header_chunk(WRITE, owner,
8577 +                               (char *) this, 2 * sizeof(unsigned long));
8578 +               if (ret)
8579 +                       return ret;
8580 +               this = this->next;
8581 +               i++;
8582 +       }
8583 +
8584 +       if (i != chain->num_extents) {
8585 +               printk(KERN_EMERG "Saved %d extents but chain metadata says "
8586 +                       "there should be %d.\n", i, chain->num_extents);
8587 +               return 1;
8588 +       }
8589 +
8590 +       return ret;
8591 +}
8592 +EXPORT_SYMBOL_GPL(toi_serialise_extent_chain);
8593 +
8594 +/**
8595 + * toi_load_extent_chain - read back a chain saved in the image
8596 + * @chain:     Chain to load
8597 + *
8598 + * The linked list of extents is reconstructed from the disk. chain will point
8599 + * to the first entry.
8600 + **/
8601 +int toi_load_extent_chain(struct hibernate_extent_chain *chain)
8602 +{
8603 +       struct hibernate_extent *this, *last = NULL;
8604 +       int i, ret;
8605 +
8606 +       /* Get the next page */
8607 +       ret = toiActiveAllocator->rw_header_chunk_noreadahead(READ, NULL,
8608 +                       (char *) chain, 2 * sizeof(int));
8609 +       if (ret) {
8610 +               printk(KERN_ERR "Failed to read the size of extent chain.\n");
8611 +               return 1;
8612 +       }
8613 +
8614 +       for (i = 0; i < chain->num_extents; i++) {
8615 +               this = toi_kzalloc(3, sizeof(struct hibernate_extent),
8616 +                               TOI_ATOMIC_GFP);
8617 +               if (!this) {
8618 +                       printk(KERN_INFO "Failed to allocate a new extent.\n");
8619 +                       return -ENOMEM;
8620 +               }
8621 +               this->next = NULL;
8622 +               /* Get the next page */
8623 +               ret = toiActiveAllocator->rw_header_chunk_noreadahead(READ,
8624 +                               NULL, (char *) this, 2 * sizeof(unsigned long));
8625 +               if (ret) {
8626 +                       printk(KERN_INFO "Failed to read an extent.\n");
8627 +                       return 1;
8628 +               }
8629 +               if (last)
8630 +                       last->next = this;
8631 +               else
8632 +                       chain->first = this;
8633 +               last = this;
8634 +       }
8635 +       return 0;
8636 +}
8637 +EXPORT_SYMBOL_GPL(toi_load_extent_chain);
8638 +
8639 +/**
8640 + * toi_extent_state_next - go to the next extent
8641 + *
8642 + * Given a state, progress to the next valid entry. We may begin in an
8643 + * invalid state, as we do when invoked after extent_state_goto_start below.
8644 + *
8645 + * When using compression and expected_compression > 0, we let the image size
8646 + * be larger than storage, so we can validly run out of data to return.
8647 + **/
8648 +unsigned long toi_extent_state_next(struct toi_extent_iterate_state *state)
8649 +{
8650 +       if (state->current_chain == state->num_chains)
8651 +               return 0;
8652 +
8653 +       if (state->current_extent) {
8654 +               if (state->current_offset == state->current_extent->end) {
8655 +                       if (state->current_extent->next) {
8656 +                               state->current_extent =
8657 +                                       state->current_extent->next;
8658 +                               state->current_offset =
8659 +                                       state->current_extent->start;
8660 +                       } else {
8661 +                               state->current_extent = NULL;
8662 +                               state->current_offset = 0;
8663 +                       }
8664 +               } else
8665 +                       state->current_offset++;
8666 +       }
8667 +
8668 +       while (!state->current_extent) {
8669 +               int chain_num = ++(state->current_chain);
8670 +
8671 +               if (chain_num == state->num_chains)
8672 +                       return 0;
8673 +
8674 +               state->current_extent = (state->chains + chain_num)->first;
8675 +
8676 +               if (!state->current_extent)
8677 +                       continue;
8678 +
8679 +               state->current_offset = state->current_extent->start;
8680 +       }
8681 +
8682 +       return state->current_offset;
8683 +}
8684 +EXPORT_SYMBOL_GPL(toi_extent_state_next);
8685 +
8686 +/**
8687 + * toi_extent_state_goto_start - reinitialize an extent chain iterator
8688 + * @state:     Iterator to reinitialize
8689 + **/
8690 +void toi_extent_state_goto_start(struct toi_extent_iterate_state *state)
8691 +{
8692 +       state->current_chain = -1;
8693 +       state->current_extent = NULL;
8694 +       state->current_offset = 0;
8695 +}
8696 +EXPORT_SYMBOL_GPL(toi_extent_state_goto_start);
8697 +
8698 +/**
8699 + * toi_extent_state_save - save state of the iterator
8700 + * @state:             Current state of the chain
8701 + * @saved_state:       Iterator to populate
8702 + *
8703 + * Given a state and a struct hibernate_extent_state_store, save the current
8704 + * position in a format that can be used with relocated chains (at
8705 + * resume time).
8706 + **/
8707 +void toi_extent_state_save(struct toi_extent_iterate_state *state,
8708 +               struct hibernate_extent_iterate_saved_state *saved_state)
8709 +{
8710 +       struct hibernate_extent *extent;
8711 +
8712 +       saved_state->chain_num = state->current_chain;
8713 +       saved_state->extent_num = 0;
8714 +       saved_state->offset = state->current_offset;
8715 +
8716 +       if (saved_state->chain_num == -1)
8717 +               return;
8718 +
8719 +       extent = (state->chains + state->current_chain)->first;
8720 +
8721 +       while (extent != state->current_extent) {
8722 +               saved_state->extent_num++;
8723 +               extent = extent->next;
8724 +       }
8725 +}
8726 +EXPORT_SYMBOL_GPL(toi_extent_state_save);
8727 +
8728 +/**
8729 + * toi_extent_state_restore - restore the position saved by extent_state_save
8730 + * @state:             State to populate
8731 + * @saved_state:       Iterator saved to restore
8732 + **/
8733 +void toi_extent_state_restore(struct toi_extent_iterate_state *state,
8734 +               struct hibernate_extent_iterate_saved_state *saved_state)
8735 +{
8736 +       int posn = saved_state->extent_num;
8737 +
8738 +       if (saved_state->chain_num == -1) {
8739 +               toi_extent_state_goto_start(state);
8740 +               return;
8741 +       }
8742 +
8743 +       state->current_chain = saved_state->chain_num;
8744 +       state->current_extent = (state->chains + state->current_chain)->first;
8745 +       state->current_offset = saved_state->offset;
8746 +
8747 +       while (posn--)
8748 +               state->current_extent = state->current_extent->next;
8749 +}
8750 +EXPORT_SYMBOL_GPL(toi_extent_state_restore);
8751 diff --git a/kernel/power/tuxonice_extent.h b/kernel/power/tuxonice_extent.h
8752 new file mode 100644
8753 index 0000000..22ffb9b
8754 --- /dev/null
8755 +++ b/kernel/power/tuxonice_extent.h
8756 @@ -0,0 +1,72 @@
8757 +/*
8758 + * kernel/power/tuxonice_extent.h
8759 + *
8760 + * Copyright (C) 2003-2008 Nigel Cunningham (nigel at tuxonice net)
8761 + *
8762 + * This file is released under the GPLv2.
8763 + *
8764 + * It contains declarations related to extents. Extents are
8765 + * TuxOnIce's method of storing some of the metadata for the image.
8766 + * See tuxonice_extent.c for more info.
8767 + *
8768 + */
8769 +
8770 +#include "tuxonice_modules.h"
8771 +
8772 +#ifndef EXTENT_H
8773 +#define EXTENT_H
8774 +
8775 +struct hibernate_extent {
8776 +       unsigned long start, end;
8777 +       struct hibernate_extent *next;
8778 +};
8779 +
8780 +struct hibernate_extent_chain {
8781 +       int size; /* size of the chain ie sum (max-min+1) */
8782 +       int num_extents;
8783 +       struct hibernate_extent *first, *last_touched;
8784 +};
8785 +
8786 +struct toi_extent_iterate_state {
8787 +       struct hibernate_extent_chain *chains;
8788 +       int num_chains;
8789 +       int current_chain;
8790 +       struct hibernate_extent *current_extent;
8791 +       unsigned long current_offset;
8792 +};
8793 +
8794 +struct hibernate_extent_iterate_saved_state {
8795 +       int chain_num;
8796 +       int extent_num;
8797 +       unsigned long offset;
8798 +};
8799 +
8800 +#define toi_extent_state_eof(state) \
8801 +       ((state)->num_chains == (state)->current_chain)
8802 +
8803 +/* Simplify iterating through all the values in an extent chain */
8804 +#define toi_extent_for_each(extent_chain, extentpointer, value) \
8805 +if ((extent_chain)->first) \
8806 +       for ((extentpointer) = (extent_chain)->first, (value) = \
8807 +                       (extentpointer)->start; \
8808 +            ((extentpointer) && ((extentpointer)->next || (value) <= \
8809 +                                (extentpointer)->end)); \
8810 +            (((value) == (extentpointer)->end) ? \
8811 +               ((extentpointer) = (extentpointer)->next, (value) = \
8812 +                ((extentpointer) ? (extentpointer)->start : 0)) : \
8813 +                       (value)++))
8814 +
8815 +void toi_put_extent_chain(struct hibernate_extent_chain *chain);
8816 +int toi_add_to_extent_chain(struct hibernate_extent_chain *chain,
8817 +               unsigned long start, unsigned long end);
8818 +int toi_serialise_extent_chain(struct toi_module_ops *owner,
8819 +               struct hibernate_extent_chain *chain);
8820 +int toi_load_extent_chain(struct hibernate_extent_chain *chain);
8821 +
8822 +void toi_extent_state_save(struct toi_extent_iterate_state *state,
8823 +               struct hibernate_extent_iterate_saved_state *saved_state);
8824 +void toi_extent_state_restore(struct toi_extent_iterate_state *state,
8825 +               struct hibernate_extent_iterate_saved_state *saved_state);
8826 +void toi_extent_state_goto_start(struct toi_extent_iterate_state *state);
8827 +unsigned long toi_extent_state_next(struct toi_extent_iterate_state *state);
8828 +#endif
8829 diff --git a/kernel/power/tuxonice_file.c b/kernel/power/tuxonice_file.c
8830 new file mode 100644
8831 index 0000000..0458a0c
8832 --- /dev/null
8833 +++ b/kernel/power/tuxonice_file.c
8834 @@ -0,0 +1,1248 @@
8835 +/*
8836 + * kernel/power/tuxonice_file.c
8837 + *
8838 + * Copyright (C) 2005-2008 Nigel Cunningham (nigel at tuxonice net)
8839 + *
8840 + * Distributed under GPLv2.
8841 + *
8842 + * This file encapsulates functions for usage of a simple file as a
8843 + * backing store. It is based upon the swapallocator, and shares the
8844 + * same basic working. Here, though, we have nothing to do with
8845 + * swapspace, and only one device to worry about.
8846 + *
8847 + * The user can just
8848 + *
8849 + * echo TuxOnIce > /path/to/my_file
8850 + *
8851 + * dd if=/dev/zero bs=1M count=<file_size_desired> >> /path/to/my_file
8852 + *
8853 + * and
8854 + *
8855 + * echo /path/to/my_file > /sys/power/tuxonice/file/target
8856 + *
8857 + * then put what they find in /sys/power/tuxonice/resume
8858 + * as their resume= parameter in lilo.conf (and rerun lilo if using it).
8859 + *
8860 + * Having done this, they're ready to hibernate and resume.
8861 + *
8862 + * TODO:
8863 + * - File resizing.
8864 + */
8865 +
8866 +#include <linux/suspend.h>
8867 +#include <linux/blkdev.h>
8868 +#include <linux/file.h>
8869 +#include <linux/stat.h>
8870 +#include <linux/mount.h>
8871 +#include <linux/statfs.h>
8872 +#include <linux/syscalls.h>
8873 +#include <linux/namei.h>
8874 +#include <linux/fs.h>
8875 +#include <linux/root_dev.h>
8876 +
8877 +#include "tuxonice.h"
8878 +#include "tuxonice_sysfs.h"
8879 +#include "tuxonice_modules.h"
8880 +#include "tuxonice_ui.h"
8881 +#include "tuxonice_extent.h"
8882 +#include "tuxonice_io.h"
8883 +#include "tuxonice_storage.h"
8884 +#include "tuxonice_block_io.h"
8885 +#include "tuxonice_alloc.h"
8886 +#include "tuxonice_builtin.h"
8887 +
8888 +static struct toi_module_ops toi_fileops;
8889 +
8890 +/* Details of our target.  */
8891 +
8892 +static char toi_file_target[256];
8893 +static struct inode *target_inode;
8894 +static struct file *target_file;
8895 +static struct block_device *toi_file_target_bdev;
8896 +static dev_t resume_file_dev_t;
8897 +static int used_devt;
8898 +static int setting_toi_file_target;
8899 +static sector_t target_firstblock, target_header_start;
8900 +static int target_storage_available;
8901 +static int target_claim;
8902 +
8903 +/* Old signatures */
8904 +static char HaveImage[] = "HaveImage\n";
8905 +static char NoImage[] =   "TuxOnIce\n";
8906 +#define sig_size (sizeof(HaveImage) + 1)
8907 +
8908 +struct toi_file_header {
8909 +       char sig[sig_size];
8910 +       int resumed_before;
8911 +       unsigned long first_header_block;
8912 +       int have_image;
8913 +};
8914 +
8915 +/* Header Page Information */
8916 +static int header_pages_reserved;
8917 +
8918 +/* Main Storage Pages */
8919 +static int main_pages_allocated, main_pages_requested;
8920 +
8921 +#define target_is_normal_file() (S_ISREG(target_inode->i_mode))
8922 +
8923 +static struct toi_bdev_info devinfo;
8924 +
8925 +/* Extent chain for blocks */
8926 +static struct hibernate_extent_chain block_chain;
8927 +
8928 +/* Signature operations */
8929 +enum {
8930 +       GET_IMAGE_EXISTS,
8931 +       INVALIDATE,
8932 +       MARK_RESUME_ATTEMPTED,
8933 +       UNMARK_RESUME_ATTEMPTED,
8934 +};
8935 +
8936 +/**
8937 + * set_devinfo - populate device information
8938 + * @bdev:              Block device on which the file is.
8939 + * @target_blkbits:    Number of bits in the page block size of the target
8940 + *                     file inode.
8941 + *
8942 + * Populate the devinfo structure about the target device.
8943 + *
8944 + * Background: a sector represents a fixed amount of data (generally 512 bytes).
8945 + * The hard drive sector size and the filesystem block size may be different.
8946 + * If fs_blksize mesures the filesystem block size and hd_blksize the hard drive
8947 + * sector size:
8948 + *
8949 + * sector << (fs_blksize - hd_blksize) converts hd sector into fs block
8950 + * fs_block >> (fs_blksize - hd_blksize) converts fs block into hd sector number
8951 + *
8952 + * Here target_blkbits == fs_blksize and hd_blksize == 9, hence:
8953 + *
8954 + *     (fs_blksize - hd_blksize) == devinfo.bmap_shift
8955 + *
8956 + * The memory page size is defined by PAGE_SHIFT. devinfo.blocks_per_page is the
8957 + * number of filesystem blocks per memory page.
8958 + *
8959 + * Note that blocks are stored after >>. They are used after being <<.
8960 + * We always only use PAGE_SIZE aligned blocks.
8961 + *
8962 + * Side effects:
8963 + *     devinfo.bdev, devinfo.bmap_shift and devinfo.blocks_per_page are set.
8964 + */
8965 +static void set_devinfo(struct block_device *bdev, int target_blkbits)
8966 +{
8967 +       devinfo.bdev = bdev;
8968 +       if (!target_blkbits) {
8969 +               devinfo.bmap_shift = 0;
8970 +               devinfo.blocks_per_page = 0;
8971 +       } else {
8972 +               /* We are assuming a hard disk with 512 (2^9) bytes/sector */
8973 +               devinfo.bmap_shift = target_blkbits - 9;
8974 +               devinfo.blocks_per_page = (1 << (PAGE_SHIFT - target_blkbits));
8975 +       }
8976 +}
8977 +
8978 +static long raw_to_real(long raw)
8979 +{
8980 +       long result;
8981 +
8982 +       result = raw - (raw * (sizeof(unsigned long) + sizeof(int)) +
8983 +               (PAGE_SIZE + sizeof(unsigned long) + sizeof(int) + 1)) /
8984 +               (PAGE_SIZE + sizeof(unsigned long) + sizeof(int));
8985 +
8986 +       return result < 0 ? 0 : result;
8987 +}
8988 +
8989 +static int toi_file_storage_available(void)
8990 +{
8991 +       int result = 0;
8992 +       struct block_device *bdev = toi_file_target_bdev;
8993 +
8994 +       if (!target_inode)
8995 +               return 0;
8996 +
8997 +       switch (target_inode->i_mode & S_IFMT) {
8998 +       case S_IFSOCK:
8999 +       case S_IFCHR:
9000 +       case S_IFIFO: /* Socket, Char, Fifo */
9001 +               return -1;
9002 +       case S_IFREG: /* Regular file: current size - holes + free
9003 +                        space on part */
9004 +               result = target_storage_available;
9005 +               break;
9006 +       case S_IFBLK: /* Block device */
9007 +               if (!bdev->bd_disk) {
9008 +                       printk(KERN_INFO "bdev->bd_disk null.\n");
9009 +                       return 0;
9010 +               }
9011 +
9012 +               result = (bdev->bd_part ?
9013 +                       bdev->bd_part->nr_sects :
9014 +                       get_capacity(bdev->bd_disk)) >> (PAGE_SHIFT - 9);
9015 +       }
9016 +
9017 +       return raw_to_real(result);
9018 +}
9019 +
9020 +static int has_contiguous_blocks(int page_num)
9021 +{
9022 +       int j;
9023 +       sector_t last = 0;
9024 +
9025 +       for (j = 0; j < devinfo.blocks_per_page; j++) {
9026 +               sector_t this = bmap(target_inode,
9027 +                               page_num * devinfo.blocks_per_page + j);
9028 +
9029 +               if (!this || (last && (last + 1) != this))
9030 +                       break;
9031 +
9032 +               last = this;
9033 +       }
9034 +
9035 +       return j == devinfo.blocks_per_page;
9036 +}
9037 +
9038 +static int size_ignoring_ignored_pages(void)
9039 +{
9040 +       int mappable = 0, i;
9041 +
9042 +       if (!target_is_normal_file())
9043 +               return toi_file_storage_available();
9044 +
9045 +       for (i = 0; i < (target_inode->i_size >> PAGE_SHIFT) ; i++)
9046 +               if (has_contiguous_blocks(i))
9047 +                       mappable++;
9048 +
9049 +       return mappable;
9050 +}
9051 +
9052 +/**
9053 + * __populate_block_list - add an extent to the chain
9054 + * @min:       Start of the extent (first physical block = sector)
9055 + * @max:       End of the extent (last physical block = sector)
9056 + *
9057 + * If TOI_TEST_BIO is set, print a debug message, outputting the min and max
9058 + * fs block numbers.
9059 + **/
9060 +static int __populate_block_list(int min, int max)
9061 +{
9062 +       if (test_action_state(TOI_TEST_BIO))
9063 +               printk(KERN_INFO "Adding extent %d-%d.\n",
9064 +                       min << devinfo.bmap_shift,
9065 +                       ((max + 1) << devinfo.bmap_shift) - 1);
9066 +
9067 +       return toi_add_to_extent_chain(&block_chain, min, max);
9068 +}
9069 +
9070 +static int apply_header_reservation(void)
9071 +{
9072 +       int i;
9073 +
9074 +       /* Apply header space reservation */
9075 +       toi_extent_state_goto_start(&toi_writer_posn);
9076 +
9077 +       for (i = 0; i < header_pages_reserved; i++)
9078 +               if (toi_bio_ops.forward_one_page(1, 0))
9079 +                       return -ENOSPC;
9080 +
9081 +       /* The end of header pages will be the start of pageset 2 */
9082 +       toi_extent_state_save(&toi_writer_posn, &toi_writer_posn_save[2]);
9083 +
9084 +       return 0;
9085 +}
9086 +
9087 +static int populate_block_list(void)
9088 +{
9089 +       int i, extent_min = -1, extent_max = -1, got_header = 0, result = 0;
9090 +
9091 +       if (block_chain.first)
9092 +               toi_put_extent_chain(&block_chain);
9093 +
9094 +       if (!target_is_normal_file()) {
9095 +               result = (target_storage_available > 0) ?
9096 +                       __populate_block_list(devinfo.blocks_per_page,
9097 +                               (target_storage_available + 1) *
9098 +                               devinfo.blocks_per_page - 1) : 0;
9099 +               if (result)
9100 +                       return result;
9101 +               goto out;
9102 +       }
9103 +
9104 +       for (i = 0; i < (target_inode->i_size >> PAGE_SHIFT); i++) {
9105 +               sector_t new_sector;
9106 +
9107 +               if (!has_contiguous_blocks(i))
9108 +                       continue;
9109 +
9110 +               new_sector = bmap(target_inode, (i * devinfo.blocks_per_page));
9111 +
9112 +               /*
9113 +                * Ignore the first block in the file.
9114 +                * It gets the header.
9115 +                */
9116 +               if (new_sector == target_firstblock >> devinfo.bmap_shift) {
9117 +                       got_header = 1;
9118 +                       continue;
9119 +               }
9120 +
9121 +               /*
9122 +                * I'd love to be able to fill in holes and resize
9123 +                * files, but not yet...
9124 +                */
9125 +
9126 +               if (new_sector == extent_max + 1)
9127 +                       extent_max += devinfo.blocks_per_page;
9128 +               else {
9129 +                       if (extent_min > -1) {
9130 +                               result = __populate_block_list(extent_min,
9131 +                                               extent_max);
9132 +                               if (result)
9133 +                                       return result;
9134 +                       }
9135 +
9136 +                       extent_min = new_sector;
9137 +                       extent_max = extent_min +
9138 +                               devinfo.blocks_per_page - 1;
9139 +               }
9140 +       }
9141 +
9142 +       if (extent_min > -1) {
9143 +               result = __populate_block_list(extent_min, extent_max);
9144 +               if (result)
9145 +                       return result;
9146 +       }
9147 +
9148 +out:
9149 +       return apply_header_reservation();
9150 +}
9151 +
9152 +static void toi_file_cleanup(int finishing_cycle)
9153 +{
9154 +       if (toi_file_target_bdev) {
9155 +               if (target_claim) {
9156 +                       bd_release(toi_file_target_bdev);
9157 +                       target_claim = 0;
9158 +               }
9159 +
9160 +               if (used_devt) {
9161 +                       blkdev_put(toi_file_target_bdev,
9162 +                                       FMODE_READ | FMODE_NDELAY);
9163 +                       used_devt = 0;
9164 +               }
9165 +               toi_file_target_bdev = NULL;
9166 +               target_inode = NULL;
9167 +               set_devinfo(NULL, 0);
9168 +               target_storage_available = 0;
9169 +       }
9170 +
9171 +       if (target_file && !IS_ERR(target_file))
9172 +               filp_close(target_file, NULL);
9173 +
9174 +       target_file = NULL;
9175 +}
9176 +
9177 +/**
9178 + * reopen_resume_devt - reset the devinfo struct
9179 + *
9180 + * Having opened resume= once, we remember the major and
9181 + * minor nodes and use them to reopen the bdev for checking
9182 + * whether an image exists (possibly when starting a resume).
9183 + **/
9184 +static void reopen_resume_devt(void)
9185 +{
9186 +       toi_file_target_bdev = toi_open_by_devnum(resume_file_dev_t,
9187 +                       FMODE_READ | FMODE_NDELAY);
9188 +       if (IS_ERR(toi_file_target_bdev)) {
9189 +               printk(KERN_INFO "Got a dev_num (%lx) but failed to open it.\n",
9190 +                               (unsigned long) resume_file_dev_t);
9191 +               return;
9192 +       }
9193 +       target_inode = toi_file_target_bdev->bd_inode;
9194 +       set_devinfo(toi_file_target_bdev, target_inode->i_blkbits);
9195 +}
9196 +
9197 +static void toi_file_get_target_info(char *target, int get_size,
9198 +               int resume_param)
9199 +{
9200 +       if (target_file)
9201 +               toi_file_cleanup(0);
9202 +
9203 +       if (!target || !strlen(target))
9204 +               return;
9205 +
9206 +       target_file = filp_open(target, O_RDONLY|O_LARGEFILE, 0);
9207 +
9208 +       if (IS_ERR(target_file) || !target_file) {
9209 +
9210 +               if (!resume_param) {
9211 +                       printk(KERN_INFO "Open file %s returned %p.\n",
9212 +                                       target, target_file);
9213 +                       target_file = NULL;
9214 +                       return;
9215 +               }
9216 +
9217 +               target_file = NULL;
9218 +               wait_for_device_probe();
9219 +               resume_file_dev_t = name_to_dev_t(target);
9220 +               if (!resume_file_dev_t) {
9221 +                       struct kstat stat;
9222 +                       int error = vfs_stat(target, &stat);
9223 +                       printk(KERN_INFO "Open file %s returned %p and "
9224 +                                       "name_to_devt failed.\n", target,
9225 +                                       target_file);
9226 +                       if (error)
9227 +                               printk(KERN_INFO "Stating the file also failed."
9228 +                                       " Nothing more we can do.\n");
9229 +                       else
9230 +                               resume_file_dev_t = stat.rdev;
9231 +                       return;
9232 +               }
9233 +
9234 +               toi_file_target_bdev = toi_open_by_devnum(resume_file_dev_t,
9235 +                               FMODE_READ | FMODE_NDELAY);
9236 +               if (IS_ERR(toi_file_target_bdev)) {
9237 +                       printk(KERN_INFO "Got a dev_num (%lx) but failed to "
9238 +                                       "open it.\n",
9239 +                                       (unsigned long) resume_file_dev_t);
9240 +                       return;
9241 +               }
9242 +               used_devt = 1;
9243 +               target_inode = toi_file_target_bdev->bd_inode;
9244 +       } else
9245 +               target_inode = target_file->f_mapping->host;
9246 +
9247 +       if (S_ISLNK(target_inode->i_mode) || S_ISDIR(target_inode->i_mode) ||
9248 +           S_ISSOCK(target_inode->i_mode) || S_ISFIFO(target_inode->i_mode)) {
9249 +               printk(KERN_INFO "File support works with regular files,"
9250 +                               " character files and block devices.\n");
9251 +               goto cleanup;
9252 +       }
9253 +
9254 +       if (!used_devt) {
9255 +               if (S_ISBLK(target_inode->i_mode)) {
9256 +                       toi_file_target_bdev = I_BDEV(target_inode);
9257 +                       if (!bd_claim(toi_file_target_bdev, &toi_fileops))
9258 +                               target_claim = 1;
9259 +               } else
9260 +                       toi_file_target_bdev = target_inode->i_sb->s_bdev;
9261 +               resume_file_dev_t = toi_file_target_bdev->bd_dev;
9262 +       }
9263 +
9264 +       set_devinfo(toi_file_target_bdev, target_inode->i_blkbits);
9265 +
9266 +       if (get_size)
9267 +               target_storage_available = size_ignoring_ignored_pages();
9268 +
9269 +       if (!resume_param)
9270 +               target_firstblock = bmap(target_inode, 0) << devinfo.bmap_shift;
9271 +
9272 +       return;
9273 +cleanup:
9274 +       target_inode = NULL;
9275 +       if (target_file) {
9276 +               filp_close(target_file, NULL);
9277 +               target_file = NULL;
9278 +       }
9279 +       set_devinfo(NULL, 0);
9280 +       target_storage_available = 0;
9281 +}
9282 +
9283 +static void toi_file_noresume_reset(void)
9284 +{
9285 +       toi_bio_ops.rw_cleanup(READ);
9286 +}
9287 +
9288 +/**
9289 + * parse_signature - check if the file is suitable for resuming
9290 + * @header:    Signature of the file
9291 + *
9292 + * Given a file header, check the content of the file. Return true if it
9293 + * contains a valid hibernate image.
9294 + * TOI_RESUMED_BEFORE is set accordingly.
9295 + **/
9296 +static int parse_signature(struct toi_file_header *header)
9297 +{
9298 +       int have_image = !memcmp(HaveImage, header->sig, sizeof(HaveImage) - 1);
9299 +       int no_image_header = !memcmp(NoImage, header->sig,
9300 +                       sizeof(NoImage) - 1);
9301 +       int binary_sig = !memcmp(tuxonice_signature, header->sig,
9302 +                       sizeof(tuxonice_signature));
9303 +
9304 +       if (no_image_header || (binary_sig && !header->have_image))
9305 +               return 0;
9306 +
9307 +       if (!have_image && !binary_sig)
9308 +               return -1;
9309 +
9310 +       if (header->resumed_before)
9311 +               set_toi_state(TOI_RESUMED_BEFORE);
9312 +       else
9313 +               clear_toi_state(TOI_RESUMED_BEFORE);
9314 +
9315 +       target_header_start = header->first_header_block;
9316 +       return 1;
9317 +}
9318 +
9319 +/**
9320 + * prepare_signature - populate the signature structure
9321 + * @current_header:    Signature structure to populate
9322 + * @first_header_block:        Sector with the header containing the extents
9323 + **/
9324 +static int prepare_signature(struct toi_file_header *current_header,
9325 +               unsigned long first_header_block)
9326 +{
9327 +       memcpy(current_header->sig, tuxonice_signature,
9328 +                       sizeof(tuxonice_signature));
9329 +       current_header->resumed_before = 0;
9330 +       current_header->first_header_block = first_header_block;
9331 +       current_header->have_image = 1;
9332 +       return 0;
9333 +}
9334 +
9335 +static int toi_file_storage_allocated(void)
9336 +{
9337 +       if (!target_inode)
9338 +               return 0;
9339 +
9340 +       if (target_is_normal_file())
9341 +               return (int) raw_to_real(target_storage_available);
9342 +       else
9343 +               return (int) raw_to_real(main_pages_requested);
9344 +}
9345 +
9346 +/**
9347 + * toi_file_release_storage - deallocate the block chain
9348 + **/
9349 +static int toi_file_release_storage(void)
9350 +{
9351 +       toi_put_extent_chain(&block_chain);
9352 +
9353 +       header_pages_reserved = 0;
9354 +       main_pages_allocated = 0;
9355 +       main_pages_requested = 0;
9356 +       return 0;
9357 +}
9358 +
9359 +static void toi_file_reserve_header_space(int request)
9360 +{
9361 +       header_pages_reserved = request;
9362 +}
9363 +
9364 +static int toi_file_allocate_storage(int main_space_requested)
9365 +{
9366 +       int result = 0;
9367 +
9368 +       int extra_pages = DIV_ROUND_UP(main_space_requested *
9369 +                       (sizeof(unsigned long) + sizeof(int)), PAGE_SIZE);
9370 +       int pages_to_get = main_space_requested + extra_pages +
9371 +               header_pages_reserved;
9372 +       int blocks_to_get = pages_to_get - block_chain.size;
9373 +
9374 +       /* Only release_storage reduces the size */
9375 +       if (blocks_to_get < 1)
9376 +               return apply_header_reservation();
9377 +
9378 +       result = populate_block_list();
9379 +
9380 +       if (result)
9381 +               return result;
9382 +
9383 +       toi_message(TOI_WRITER, TOI_MEDIUM, 0,
9384 +               "Finished with block_chain.size == %d.\n",
9385 +               block_chain.size);
9386 +
9387 +       if (block_chain.size < pages_to_get) {
9388 +               printk(KERN_INFO "Block chain size (%d) < header pages (%d) + "
9389 +                                "extra pages (%d) + main pages (%d) (=%d "
9390 +                                "pages).\n",
9391 +                                block_chain.size, header_pages_reserved,
9392 +                                extra_pages, main_space_requested,
9393 +                                pages_to_get);
9394 +               result = -ENOSPC;
9395 +       }
9396 +
9397 +       main_pages_requested = main_space_requested;
9398 +       main_pages_allocated = main_space_requested + extra_pages;
9399 +       return result;
9400 +}
9401 +
9402 +/**
9403 + * toi_file_write_header_init - save the header on the image
9404 + **/
9405 +static int toi_file_write_header_init(void)
9406 +{
9407 +       int result;
9408 +
9409 +       toi_bio_ops.rw_init(WRITE, 0);
9410 +       toi_writer_buffer_posn = 0;
9411 +
9412 +       /* Info needed to bootstrap goes at the start of the header.
9413 +        * First we save the basic info needed for reading, including the number
9414 +        * of header pages. Then we save the structs containing data needed
9415 +        * for reading the header pages back.
9416 +        * Note that even if header pages take more than one page, when we
9417 +        * read back the info, we will have restored the location of the
9418 +        * next header page by the time we go to use it.
9419 +        */
9420 +
9421 +       result = toi_bio_ops.rw_header_chunk(WRITE, &toi_fileops,
9422 +                       (char *) &toi_writer_posn_save,
9423 +                       sizeof(toi_writer_posn_save));
9424 +
9425 +       if (result)
9426 +               return result;
9427 +
9428 +       result = toi_bio_ops.rw_header_chunk(WRITE, &toi_fileops,
9429 +                       (char *) &devinfo, sizeof(devinfo));
9430 +
9431 +       if (result)
9432 +               return result;
9433 +
9434 +       /* Flush the chain */
9435 +       toi_serialise_extent_chain(&toi_fileops, &block_chain);
9436 +
9437 +       return 0;
9438 +}
9439 +
9440 +static int toi_file_write_header_cleanup(void)
9441 +{
9442 +       struct toi_file_header *header;
9443 +       int result, result2;
9444 +       unsigned long sig_page = toi_get_zeroed_page(38, TOI_ATOMIC_GFP);
9445 +
9446 +       /* Write any unsaved data */
9447 +       result = toi_bio_ops.write_header_chunk_finish();
9448 +
9449 +       if (result)
9450 +               goto out;
9451 +
9452 +       toi_extent_state_goto_start(&toi_writer_posn);
9453 +       toi_bio_ops.forward_one_page(1, 1);
9454 +
9455 +       /* Adjust image header */
9456 +       result = toi_bio_ops.bdev_page_io(READ, toi_file_target_bdev,
9457 +                       target_firstblock,
9458 +                       virt_to_page(sig_page));
9459 +       if (result)
9460 +               goto out;
9461 +
9462 +       header = (struct toi_file_header *) sig_page;
9463 +
9464 +       prepare_signature(header,
9465 +                       toi_writer_posn.current_offset <<
9466 +                       devinfo.bmap_shift);
9467 +
9468 +       result = toi_bio_ops.bdev_page_io(WRITE, toi_file_target_bdev,
9469 +                       target_firstblock,
9470 +                       virt_to_page(sig_page));
9471 +
9472 +out:
9473 +       result2 = toi_bio_ops.finish_all_io();
9474 +       toi_free_page(38, sig_page);
9475 +
9476 +       return result ? result : result2;
9477 +}
9478 +
9479 +/* HEADER READING */
9480 +
9481 +/**
9482 + * toi_file_read_header_init - check content of signature
9483 + *
9484 + * Entry point of the resume path.
9485 + * 1. Attempt to read the device specified with resume=.
9486 + * 2. Check the contents of the header for our signature.
9487 + * 3. Warn, ignore, reset and/or continue as appropriate.
9488 + * 4. If continuing, read the toi_file configuration section
9489 + *    of the header and set up block device info so we can read
9490 + *    the rest of the header & image.
9491 + *
9492 + * Returns:
9493 + *     May not return if user choose to reboot at a warning.
9494 + *     -EINVAL if cannot resume at this time. Booting should continue
9495 + *     normally.
9496 + **/
9497 +static int toi_file_read_header_init(void)
9498 +{
9499 +       int result;
9500 +       struct block_device *tmp;
9501 +
9502 +       /* Allocate toi_writer_buffer */
9503 +       toi_bio_ops.read_header_init();
9504 +
9505 +       /*
9506 +        * Read toi_file configuration (header containing metadata).
9507 +        * target_header_start is the first sector of the header. It has been
9508 +        * set when checking if the file was suitable for resuming, see
9509 +        * do_toi_step(STEP_RESUME_CAN_RESUME).
9510 +        */
9511 +       result = toi_bio_ops.bdev_page_io(READ, toi_file_target_bdev,
9512 +                       target_header_start,
9513 +                       virt_to_page((unsigned long) toi_writer_buffer));
9514 +
9515 +       if (result) {
9516 +               printk(KERN_ERR "FileAllocator read header init: Failed to "
9517 +                               "initialise reading the first page of data.\n");
9518 +               toi_bio_ops.rw_cleanup(READ);
9519 +               return result;
9520 +       }
9521 +
9522 +       /* toi_writer_posn_save[0] contains the header */
9523 +       memcpy(&toi_writer_posn_save, toi_writer_buffer,
9524 +              sizeof(toi_writer_posn_save));
9525 +
9526 +       /* Save the position in the buffer */
9527 +       toi_writer_buffer_posn = sizeof(toi_writer_posn_save);
9528 +
9529 +       tmp = devinfo.bdev;
9530 +
9531 +       /* See tuxonice_block_io.h */
9532 +       memcpy(&devinfo,
9533 +              toi_writer_buffer + toi_writer_buffer_posn,
9534 +              sizeof(devinfo));
9535 +
9536 +       devinfo.bdev = tmp;
9537 +       toi_writer_buffer_posn += sizeof(devinfo);
9538 +
9539 +       /* Reinitialize the extent pointer */
9540 +       toi_extent_state_goto_start(&toi_writer_posn);
9541 +       /* Jump to the next page */
9542 +       toi_bio_ops.set_extra_page_forward();
9543 +
9544 +       /* Bring back the chain from disk: this will read
9545 +        * all extents.
9546 +        */
9547 +       return toi_load_extent_chain(&block_chain);
9548 +}
9549 +
9550 +static int toi_file_read_header_cleanup(void)
9551 +{
9552 +       toi_bio_ops.rw_cleanup(READ);
9553 +       return 0;
9554 +}
9555 +
9556 +/**
9557 + * toi_file_signature_op - perform an operation on the file signature
9558 + * @op:        operation to perform
9559 + *
9560 + * op is either GET_IMAGE_EXISTS, INVALIDATE, MARK_RESUME_ATTEMPTED or
9561 + * UNMARK_RESUME_ATTEMPTED.
9562 + * If the signature is changed, an I/O operation is performed.
9563 + * The signature exists iff toi_file_signature_op(GET_IMAGE_EXISTS)>-1.
9564 + **/
9565 +static int toi_file_signature_op(int op)
9566 +{
9567 +       char *cur;
9568 +       int result = 0, result2, changed = 0;
9569 +       struct toi_file_header *header;
9570 +
9571 +       if (!toi_file_target_bdev || IS_ERR(toi_file_target_bdev))
9572 +               return -1;
9573 +
9574 +       cur = (char *) toi_get_zeroed_page(17, TOI_ATOMIC_GFP);
9575 +       if (!cur) {
9576 +               printk(KERN_INFO "Unable to allocate a page for reading the "
9577 +                                "image signature.\n");
9578 +               return -ENOMEM;
9579 +       }
9580 +
9581 +       result = toi_bio_ops.bdev_page_io(READ, toi_file_target_bdev,
9582 +                       target_firstblock,
9583 +                       virt_to_page(cur));
9584 +
9585 +       if (result)
9586 +               goto out;
9587 +
9588 +       header = (struct toi_file_header *) cur;
9589 +       result = parse_signature(header);
9590 +
9591 +       switch (op) {
9592 +       case INVALIDATE:
9593 +               if (result == -1)
9594 +                       goto out;
9595 +
9596 +               memcpy(header->sig, tuxonice_signature,
9597 +                               sizeof(tuxonice_signature));
9598 +               header->resumed_before = 0;
9599 +               header->have_image = 0;
9600 +               result = 1;
9601 +               changed = 1;
9602 +               break;
9603 +       case MARK_RESUME_ATTEMPTED:
9604 +               if (result == 1) {
9605 +                       header->resumed_before = 1;
9606 +                       changed = 1;
9607 +               }
9608 +               break;
9609 +       case UNMARK_RESUME_ATTEMPTED:
9610 +               if (result == 1) {
9611 +                       header->resumed_before = 0;
9612 +                       changed = 1;
9613 +               }
9614 +               break;
9615 +       }
9616 +
9617 +       if (changed) {
9618 +               int io_result = toi_bio_ops.bdev_page_io(WRITE,
9619 +                               toi_file_target_bdev, target_firstblock,
9620 +                               virt_to_page(cur));
9621 +               if (io_result)
9622 +                       result = io_result;
9623 +       }
9624 +
9625 +out:
9626 +       result2 = toi_bio_ops.finish_all_io();
9627 +       toi_free_page(17, (unsigned long) cur);
9628 +       return result ? result : result2;
9629 +}
9630 +
9631 +/**
9632 + * toi_file_print_debug_stats - print debug info
9633 + * @buffer:    Buffer to data to populate
9634 + * @size:      Size of the buffer
9635 + **/
9636 +static int toi_file_print_debug_stats(char *buffer, int size)
9637 +{
9638 +       int len = 0;
9639 +
9640 +       if (toiActiveAllocator != &toi_fileops) {
9641 +               len = scnprintf(buffer, size,
9642 +                               "- FileAllocator inactive.\n");
9643 +               return len;
9644 +       }
9645 +
9646 +       len = scnprintf(buffer, size, "- FileAllocator active.\n");
9647 +
9648 +       len += scnprintf(buffer+len, size-len, "  Storage available for "
9649 +                       "image: %d pages.\n",
9650 +                       toi_file_storage_allocated());
9651 +
9652 +       return len;
9653 +}
9654 +
9655 +/**
9656 + * toi_file_storage_needed - storage needed
9657 + *
9658 + * Returns amount of space in the image header required
9659 + * for the toi_file's data.
9660 + *
9661 + * We ensure the space is allocated, but actually save the
9662 + * data from write_header_init and therefore don't also define a
9663 + * save_config_info routine.
9664 + **/
9665 +static int toi_file_storage_needed(void)
9666 +{
9667 +       return strlen(toi_file_target) + 1 +
9668 +               sizeof(toi_writer_posn_save) +
9669 +               sizeof(devinfo) +
9670 +               2 * sizeof(int) +
9671 +               (2 * sizeof(unsigned long) * block_chain.num_extents);
9672 +}
9673 +
9674 +/**
9675 + * toi_file_remove_image - invalidate the image
9676 + **/
9677 +static int toi_file_remove_image(void)
9678 +{
9679 +       toi_file_release_storage();
9680 +       return toi_file_signature_op(INVALIDATE);
9681 +}
9682 +
9683 +/**
9684 + * toi_file_image_exists - test if an image exists
9685 + *
9686 + * Repopulate toi_file_target_bdev if needed.
9687 + **/
9688 +static int toi_file_image_exists(int quiet)
9689 +{
9690 +       if (!toi_file_target_bdev)
9691 +               reopen_resume_devt();
9692 +       return toi_file_signature_op(GET_IMAGE_EXISTS);
9693 +}
9694 +
9695 +/**
9696 + * toi_file_mark_resume_attempted - mark resume attempted if so
9697 + * @mark:      attempted flag
9698 + *
9699 + * Record that we tried to resume from this image. Resuming
9700 + * multiple times from the same image may be dangerous
9701 + * (possible filesystem corruption).
9702 + **/
9703 +static int toi_file_mark_resume_attempted(int mark)
9704 +{
9705 +       return toi_file_signature_op(mark ? MARK_RESUME_ATTEMPTED :
9706 +               UNMARK_RESUME_ATTEMPTED);
9707 +}
9708 +
9709 +/**
9710 + * toi_file_set_resume_param - validate the specified resume file
9711 + *
9712 + * Given a target filename, populate the resume parameter. This is
9713 + * meant to be used by the user to populate the kernel command line.
9714 + * By setting /sys/power/tuxonice/file/target, the valid resume
9715 + * parameter to use is set and accessible through
9716 + * /sys/power/tuxonice/resume.
9717 + *
9718 + * If the file could be located, we check if it contains a valid
9719 + * signature.
9720 + **/
9721 +static void toi_file_set_resume_param(void)
9722 +{
9723 +       char *buffer = (char *) toi_get_zeroed_page(18, TOI_ATOMIC_GFP);
9724 +       char *buffer2 = (char *) toi_get_zeroed_page(19, TOI_ATOMIC_GFP);
9725 +       unsigned long sector = bmap(target_inode, 0);
9726 +       int offset = 0;
9727 +
9728 +       if (!buffer || !buffer2) {
9729 +               if (buffer)
9730 +                       toi_free_page(18, (unsigned long) buffer);
9731 +               if (buffer2)
9732 +                       toi_free_page(19, (unsigned long) buffer2);
9733 +               printk(KERN_ERR "TuxOnIce: Failed to allocate memory while "
9734 +                               "setting resume= parameter.\n");
9735 +               return;
9736 +       }
9737 +
9738 +       if (toi_file_target_bdev) {
9739 +               set_devinfo(toi_file_target_bdev, target_inode->i_blkbits);
9740 +
9741 +               bdevname(toi_file_target_bdev, buffer2);
9742 +               offset += snprintf(buffer + offset, PAGE_SIZE - offset,
9743 +                               "/dev/%s", buffer2);
9744 +
9745 +               if (sector)
9746 +                       /* The offset is: sector << (inode->i_blkbits - 9) */
9747 +                       offset += snprintf(buffer + offset, PAGE_SIZE - offset,
9748 +                               ":0x%lx", sector << devinfo.bmap_shift);
9749 +       } else
9750 +               offset += snprintf(buffer + offset, PAGE_SIZE - offset,
9751 +                               "%s is not a valid target.", toi_file_target);
9752 +
9753 +       sprintf(resume_file, "file:%s", buffer);
9754 +
9755 +       toi_free_page(18, (unsigned long) buffer);
9756 +       toi_free_page(19, (unsigned long) buffer2);
9757 +
9758 +       toi_attempt_to_parse_resume_device(1);
9759 +}
9760 +
9761 +/**
9762 + * __test_toi_file_target - is the file target valid for hibernating?
9763 + * @target:            target file
9764 + * @resume_param:      whether resume= has been specified
9765 + * @quiet:             quiet flag
9766 + *
9767 + * Test whether the file target can be used for hibernating: valid target
9768 + * and signature.
9769 + * The resume parameter is set if needed.
9770 + **/
9771 +static int __test_toi_file_target(char *target, int resume_param, int quiet)
9772 +{
9773 +       toi_file_get_target_info(target, 0, resume_param);
9774 +       if (toi_file_signature_op(GET_IMAGE_EXISTS) > -1) {
9775 +               if (!quiet)
9776 +                       printk(KERN_INFO "TuxOnIce: FileAllocator: File "
9777 +                                        "signature found.\n");
9778 +               if (!resume_param)
9779 +                       toi_file_set_resume_param();
9780 +
9781 +               toi_bio_ops.set_devinfo(&devinfo);
9782 +               toi_writer_posn.chains = &block_chain;
9783 +               toi_writer_posn.num_chains = 1;
9784 +
9785 +               if (!resume_param)
9786 +                       set_toi_state(TOI_CAN_HIBERNATE);
9787 +               return 0;
9788 +       }
9789 +
9790 +       /*
9791 +        * Target unaccessible or no signature found
9792 +        * Most errors have already been reported
9793 +        */
9794 +
9795 +       clear_toi_state(TOI_CAN_HIBERNATE);
9796 +
9797 +       if (quiet)
9798 +               return 1;
9799 +
9800 +       if (*target)
9801 +               printk(KERN_INFO "TuxOnIce: FileAllocator: Sorry. No signature "
9802 +                                "found at  %s.\n", target);
9803 +       else
9804 +               if (!resume_param)
9805 +                       printk(KERN_INFO "TuxOnIce: FileAllocator: Sorry. "
9806 +                                       "Target is not set for hibernating.\n");
9807 +
9808 +       return 1;
9809 +}
9810 +
9811 +/**
9812 + * test_toi_file_target - sysfs callback for /sys/power/tuxonince/file/target
9813 + *
9814 + * Test wheter the target file is valid for hibernating.
9815 + **/
9816 +static void test_toi_file_target(void)
9817 +{
9818 +       setting_toi_file_target = 1;
9819 +
9820 +       printk(KERN_INFO "TuxOnIce: Hibernating %sabled.\n",
9821 +                       __test_toi_file_target(toi_file_target, 0, 1) ?
9822 +                       "dis" : "en");
9823 +
9824 +       setting_toi_file_target = 0;
9825 +}
9826 +
9827 +/**
9828 + * toi_file_parse_sig_location - parse image Location
9829 + * @commandline:       the resume parameter
9830 + * @only_writer:       ??
9831 + * @quiet:             quiet flag
9832 + *
9833 + * Attempt to parse a resume= parameter.
9834 + * File Allocator accepts:
9835 + *     resume=file:DEVNAME[:FIRSTBLOCK]
9836 + *
9837 + * Where:
9838 + *     DEVNAME is convertable to a dev_t by name_to_dev_t
9839 + *     FIRSTBLOCK is the location of the first block in the file.
9840 + *     BLOCKSIZE is the logical blocksize >= SECTOR_SIZE &
9841 + *                                     <= PAGE_SIZE,
9842 + *     mod SECTOR_SIZE == 0 of the device.
9843 + *
9844 + * Data is validated by attempting to read a header from the
9845 + * location given. Failure will result in toi_file refusing to
9846 + * save an image, and a reboot with correct parameters will be
9847 + * necessary.
9848 + **/
9849 +static int toi_file_parse_sig_location(char *commandline,
9850 +               int only_writer, int quiet)
9851 +{
9852 +       char *thischar, *devstart = NULL, *colon = NULL, *at_symbol = NULL;
9853 +       int result = -EINVAL, target_blocksize = 0;
9854 +
9855 +       if (strncmp(commandline, "file:", 5)) {
9856 +               if (!only_writer)
9857 +                       return 1;
9858 +       } else
9859 +               commandline += 5;
9860 +
9861 +       /*
9862 +        * Don't check signature again if we're beginning a cycle. If we already
9863 +        * did the initialisation successfully, assume we'll be okay when it
9864 +        * comes to resuming.
9865 +        */
9866 +       if (toi_file_target_bdev)
9867 +               return 0;
9868 +
9869 +       devstart = commandline;
9870 +       thischar = commandline;
9871 +       while ((*thischar != ':') && (*thischar != '@') &&
9872 +               ((thischar - commandline) < 250) && (*thischar))
9873 +               thischar++;
9874 +
9875 +       if (*thischar == ':') {
9876 +               colon = thischar;
9877 +               *colon = 0;
9878 +               thischar++;
9879 +       }
9880 +
9881 +       while ((*thischar != '@') && ((thischar - commandline) < 250)
9882 +                       && (*thischar))
9883 +               thischar++;
9884 +
9885 +       if (*thischar == '@') {
9886 +               at_symbol = thischar;
9887 +               *at_symbol = 0;
9888 +       }
9889 +
9890 +       /*
9891 +        * For the toi_file, you can be able to resume, but not hibernate,
9892 +        * because the resume= is set correctly, but the toi_file_target
9893 +        * isn't.
9894 +        *
9895 +        * We may have come here as a result of setting resume or
9896 +        * toi_file_target. We only test the toi_file target in the
9897 +        * former case (it's already done in the later), and we do it before
9898 +        * setting the block number ourselves. It will overwrite the values
9899 +        * given on the command line if we don't.
9900 +        */
9901 +
9902 +       if (!setting_toi_file_target) /* Concurrent write via /sys? */
9903 +               __test_toi_file_target(toi_file_target, 1, 0);
9904 +
9905 +       if (colon) {
9906 +               unsigned long block;
9907 +               result = strict_strtoul(colon + 1, 0, &block);
9908 +               if (result)
9909 +                       goto out;
9910 +               target_firstblock = (int) block;
9911 +       } else
9912 +               target_firstblock = 0;
9913 +
9914 +       if (at_symbol) {
9915 +               unsigned long block_size;
9916 +               result = strict_strtoul(at_symbol + 1, 0, &block_size);
9917 +               if (result)
9918 +                       goto out;
9919 +               target_blocksize = (int) block_size;
9920 +               if (target_blocksize & (SECTOR_SIZE - 1)) {
9921 +                       printk(KERN_INFO "FileAllocator: Blocksizes are "
9922 +                                        "multiples of %d.\n", SECTOR_SIZE);
9923 +                       result = -EINVAL;
9924 +                       goto out;
9925 +               }
9926 +       }
9927 +
9928 +       if (!quiet)
9929 +               printk(KERN_INFO "TuxOnIce FileAllocator: Testing whether you "
9930 +                                "can resume:\n");
9931 +
9932 +       toi_file_get_target_info(commandline, 0, 1);
9933 +
9934 +       if (!toi_file_target_bdev || IS_ERR(toi_file_target_bdev)) {
9935 +               toi_file_target_bdev = NULL;
9936 +               result = -1;
9937 +               goto out;
9938 +       }
9939 +
9940 +       if (target_blocksize)
9941 +               set_devinfo(toi_file_target_bdev, ffs(target_blocksize));
9942 +
9943 +       result = __test_toi_file_target(commandline, 1, quiet);
9944 +
9945 +out:
9946 +       if (result)
9947 +               clear_toi_state(TOI_CAN_HIBERNATE);
9948 +
9949 +       if (!quiet)
9950 +               printk(KERN_INFO "Resuming %sabled.\n",  result ? "dis" : "en");
9951 +
9952 +       if (colon)
9953 +               *colon = ':';
9954 +       if (at_symbol)
9955 +               *at_symbol = '@';
9956 +
9957 +       return result;
9958 +}
9959 +
9960 +/**
9961 + * toi_file_save_config_info - populate toi_file_target
9962 + * @buffer:    Pointer to a buffer of size PAGE_SIZE.
9963 + *
9964 + * Save the target's name, not for resume time, but for
9965 + * all_settings.
9966 + * Returns:
9967 + *     Number of bytes used for saving our data.
9968 + **/
9969 +static int toi_file_save_config_info(char *buffer)
9970 +{
9971 +       strcpy(buffer, toi_file_target);
9972 +       return strlen(toi_file_target) + 1;
9973 +}
9974 +
9975 +/**
9976 + * toi_file_load_config_info - reload target's name
9977 + * @buffer:    pointer to the start of the data
9978 + * @size:      number of bytes that were saved
9979 + *
9980 + * toi_file_target is set to buffer.
9981 + **/
9982 +static void toi_file_load_config_info(char *buffer, int size)
9983 +{
9984 +       strlcpy(toi_file_target, buffer, size);
9985 +}
9986 +
9987 +static int toi_file_initialise(int starting_cycle)
9988 +{
9989 +       if (starting_cycle) {
9990 +               if (toiActiveAllocator != &toi_fileops)
9991 +                       return 0;
9992 +
9993 +               if (starting_cycle & SYSFS_HIBERNATE && !*toi_file_target) {
9994 +                       printk(KERN_INFO "FileAllocator is the active writer,  "
9995 +                                       "but no filename has been set.\n");
9996 +                       return 1;
9997 +               }
9998 +       }
9999 +
10000 +       if (*toi_file_target)
10001 +               toi_file_get_target_info(toi_file_target, starting_cycle, 0);
10002 +
10003 +       if (starting_cycle && (toi_file_image_exists(1) == -1)) {
10004 +               printk("%s is does not have a valid signature for "
10005 +                               "hibernating.\n", toi_file_target);
10006 +               return 1;
10007 +       }
10008 +
10009 +       return 0;
10010 +}
10011 +
10012 +static struct toi_sysfs_data sysfs_params[] = {
10013 +
10014 +       SYSFS_STRING("target", SYSFS_RW, toi_file_target, 256,
10015 +               SYSFS_NEEDS_SM_FOR_WRITE, test_toi_file_target),
10016 +       SYSFS_INT("enabled", SYSFS_RW, &toi_fileops.enabled, 0, 1, 0,
10017 +               attempt_to_parse_resume_device2)
10018 +};
10019 +
10020 +static struct toi_module_ops toi_fileops = {
10021 +       .type                                   = WRITER_MODULE,
10022 +       .name                                   = "file storage",
10023 +       .directory                              = "file",
10024 +       .module                                 = THIS_MODULE,
10025 +       .print_debug_info                       = toi_file_print_debug_stats,
10026 +       .save_config_info                       = toi_file_save_config_info,
10027 +       .load_config_info                       = toi_file_load_config_info,
10028 +       .storage_needed                         = toi_file_storage_needed,
10029 +       .initialise                             = toi_file_initialise,
10030 +       .cleanup                                = toi_file_cleanup,
10031 +
10032 +       .noresume_reset         = toi_file_noresume_reset,
10033 +       .storage_available      = toi_file_storage_available,
10034 +       .storage_allocated      = toi_file_storage_allocated,
10035 +       .reserve_header_space   = toi_file_reserve_header_space,
10036 +       .allocate_storage       = toi_file_allocate_storage,
10037 +       .image_exists           = toi_file_image_exists,
10038 +       .mark_resume_attempted  = toi_file_mark_resume_attempted,
10039 +       .write_header_init      = toi_file_write_header_init,
10040 +       .write_header_cleanup   = toi_file_write_header_cleanup,
10041 +       .read_header_init       = toi_file_read_header_init,
10042 +       .read_header_cleanup    = toi_file_read_header_cleanup,
10043 +       .remove_image           = toi_file_remove_image,
10044 +       .parse_sig_location     = toi_file_parse_sig_location,
10045 +
10046 +       .sysfs_data             = sysfs_params,
10047 +       .num_sysfs_entries      = sizeof(sysfs_params) /
10048 +               sizeof(struct toi_sysfs_data),
10049 +};
10050 +
10051 +/* ---- Registration ---- */
10052 +static __init int toi_file_load(void)
10053 +{
10054 +       toi_fileops.rw_init = toi_bio_ops.rw_init;
10055 +       toi_fileops.rw_cleanup = toi_bio_ops.rw_cleanup;
10056 +       toi_fileops.read_page = toi_bio_ops.read_page;
10057 +       toi_fileops.write_page = toi_bio_ops.write_page;
10058 +       toi_fileops.rw_header_chunk = toi_bio_ops.rw_header_chunk;
10059 +       toi_fileops.rw_header_chunk_noreadahead =
10060 +               toi_bio_ops.rw_header_chunk_noreadahead;
10061 +       toi_fileops.io_flusher = toi_bio_ops.io_flusher;
10062 +       toi_fileops.update_throughput_throttle =
10063 +               toi_bio_ops.update_throughput_throttle;
10064 +       toi_fileops.finish_all_io = toi_bio_ops.finish_all_io;
10065 +
10066 +       return toi_register_module(&toi_fileops);
10067 +}
10068 +
10069 +#ifdef MODULE
10070 +static __exit void toi_file_unload(void)
10071 +{
10072 +       toi_unregister_module(&toi_fileops);
10073 +}
10074 +
10075 +module_init(toi_file_load);
10076 +module_exit(toi_file_unload);
10077 +MODULE_LICENSE("GPL");
10078 +MODULE_AUTHOR("Nigel Cunningham");
10079 +MODULE_DESCRIPTION("TuxOnIce FileAllocator");
10080 +#else
10081 +late_initcall(toi_file_load);
10082 +#endif
10083 diff --git a/kernel/power/tuxonice_highlevel.c b/kernel/power/tuxonice_highlevel.c
10084 new file mode 100644
10085 index 0000000..c1e99fd
10086 --- /dev/null
10087 +++ b/kernel/power/tuxonice_highlevel.c
10088 @@ -0,0 +1,1305 @@
10089 +/*
10090 + * kernel/power/tuxonice_highlevel.c
10091 + */
10092 +/** \mainpage TuxOnIce.
10093 + *
10094 + * TuxOnIce provides support for saving and restoring an image of
10095 + * system memory to an arbitrary storage device, either on the local computer,
10096 + * or across some network. The support is entirely OS based, so TuxOnIce
10097 + * works without requiring BIOS, APM or ACPI support. The vast majority of the
10098 + * code is also architecture independant, so it should be very easy to port
10099 + * the code to new architectures. TuxOnIce includes support for SMP, 4G HighMem
10100 + * and preemption. Initramfses and initrds are also supported.
10101 + *
10102 + * TuxOnIce uses a modular design, in which the method of storing the image is
10103 + * completely abstracted from the core code, as are transformations on the data
10104 + * such as compression and/or encryption (multiple 'modules' can be used to
10105 + * provide arbitrary combinations of functionality). The user interface is also
10106 + * modular, so that arbitrarily simple or complex interfaces can be used to
10107 + * provide anything from debugging information through to eye candy.
10108 + *
10109 + * \section Copyright
10110 + *
10111 + * TuxOnIce is released under the GPLv2.
10112 + *
10113 + * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu><BR>
10114 + * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz><BR>
10115 + * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr><BR>
10116 + * Copyright (C) 2002-2008 Nigel Cunningham (nigel at tuxonice net)<BR>
10117 + *
10118 + * \section Credits
10119 + *
10120 + * Nigel would like to thank the following people for their work:
10121 + *
10122 + * Bernard Blackham <bernard@blackham.com.au><BR>
10123 + * Web page & Wiki administration, some coding. A person without whom
10124 + * TuxOnIce would not be where it is.
10125 + *
10126 + * Michael Frank <mhf@linuxmail.org><BR>
10127 + * Extensive testing and help with improving stability. I was constantly
10128 + * amazed by the quality and quantity of Michael's help.
10129 + *
10130 + * Pavel Machek <pavel@ucw.cz><BR>
10131 + * Modifications, defectiveness pointing, being with Gabor at the very
10132 + * beginning, suspend to swap space, stop all tasks. Port to 2.4.18-ac and
10133 + * 2.5.17. Even though Pavel and I disagree on the direction suspend to
10134 + * disk should take, I appreciate the valuable work he did in helping Gabor
10135 + * get the concept working.
10136 + *
10137 + * ..and of course the myriads of TuxOnIce users who have helped diagnose
10138 + * and fix bugs, made suggestions on how to improve the code, proofread
10139 + * documentation, and donated time and money.
10140 + *
10141 + * Thanks also to corporate sponsors:
10142 + *
10143 + * <B>Redhat.</B>Sometime employer from May 2006 (my fault, not Redhat's!).
10144 + *
10145 + * <B>Cyclades.com.</B> Nigel's employers from Dec 2004 until May 2006, who
10146 + * allowed him to work on TuxOnIce and PM related issues on company time.
10147 + *
10148 + * <B>LinuxFund.org.</B> Sponsored Nigel's work on TuxOnIce for four months Oct
10149 + * 2003 to Jan 2004.
10150 + *
10151 + * <B>LAC Linux.</B> Donated P4 hardware that enabled development and ongoing
10152 + * maintenance of SMP and Highmem support.
10153 + *
10154 + * <B>OSDL.</B> Provided access to various hardware configurations, make
10155 + * occasional small donations to the project.
10156 + */
10157 +
10158 +#include <linux/suspend.h>
10159 +#include <linux/freezer.h>
10160 +#include <linux/utsrelease.h>
10161 +#include <linux/cpu.h>
10162 +#include <linux/console.h>
10163 +#include <linux/writeback.h>
10164 +#include <linux/uaccess.h> /* for get/set_fs & KERNEL_DS on i386 */
10165 +
10166 +#include "tuxonice.h"
10167 +#include "tuxonice_modules.h"
10168 +#include "tuxonice_sysfs.h"
10169 +#include "tuxonice_prepare_image.h"
10170 +#include "tuxonice_io.h"
10171 +#include "tuxonice_ui.h"
10172 +#include "tuxonice_power_off.h"
10173 +#include "tuxonice_storage.h"
10174 +#include "tuxonice_checksum.h"
10175 +#include "tuxonice_builtin.h"
10176 +#include "tuxonice_atomic_copy.h"
10177 +#include "tuxonice_alloc.h"
10178 +#include "tuxonice_cluster.h"
10179 +
10180 +/*! Pageset metadata. */
10181 +struct pagedir pagedir2 = {2};
10182 +EXPORT_SYMBOL_GPL(pagedir2);
10183 +
10184 +static mm_segment_t oldfs;
10185 +static DEFINE_MUTEX(tuxonice_in_use);
10186 +static int block_dump_save;
10187 +
10188 +/* Binary signature if an image is present */
10189 +char *tuxonice_signature = "\xed\xc3\x02\xe9\x98\x56\xe5\x0c";
10190 +EXPORT_SYMBOL_GPL(tuxonice_signature);
10191 +
10192 +unsigned long boot_kernel_data_buffer;
10193 +
10194 +static char *result_strings[] = {
10195 +       "Hiberation was aborted",
10196 +       "The user requested that we cancel the hibernation",
10197 +       "No storage was available",
10198 +       "Insufficient storage was available",
10199 +       "Freezing filesystems and/or tasks failed",
10200 +       "A pre-existing image was used",
10201 +       "We would free memory, but image size limit doesn't allow this",
10202 +       "Unable to free enough memory to hibernate",
10203 +       "Unable to obtain the Power Management Semaphore",
10204 +       "A device suspend/resume returned an error",
10205 +       "A system device suspend/resume returned an error",
10206 +       "The extra pages allowance is too small",
10207 +       "We were unable to successfully prepare an image",
10208 +       "TuxOnIce module initialisation failed",
10209 +       "TuxOnIce module cleanup failed",
10210 +       "I/O errors were encountered",
10211 +       "Ran out of memory",
10212 +       "An error was encountered while reading the image",
10213 +       "Platform preparation failed",
10214 +       "CPU Hotplugging failed",
10215 +       "Architecture specific preparation failed",
10216 +       "Pages needed resaving, but we were told to abort if this happens",
10217 +       "We can't hibernate at the moment (invalid resume= or filewriter "
10218 +               "target?)",
10219 +       "A hibernation preparation notifier chain member cancelled the "
10220 +               "hibernation",
10221 +       "Pre-snapshot preparation failed",
10222 +       "Pre-restore preparation failed",
10223 +       "Failed to disable usermode helpers",
10224 +       "Can't resume from alternate image",
10225 +       "Header reservation too small",
10226 +};
10227 +
10228 +/**
10229 + * toi_finish_anything - cleanup after doing anything
10230 + * @hibernate_or_resume:       Whether finishing a cycle or attempt at
10231 + *                             resuming.
10232 + *
10233 + * This is our basic clean-up routine, matching start_anything below. We
10234 + * call cleanup routines, drop module references and restore process fs and
10235 + * cpus allowed masks, together with the global block_dump variable's value.
10236 + **/
10237 +void toi_finish_anything(int hibernate_or_resume)
10238 +{
10239 +       toi_cleanup_modules(hibernate_or_resume);
10240 +       toi_put_modules();
10241 +       if (hibernate_or_resume) {
10242 +               block_dump = block_dump_save;
10243 +               set_cpus_allowed(current, CPU_MASK_ALL);
10244 +               toi_alloc_print_debug_stats();
10245 +               atomic_inc(&snapshot_device_available);
10246 +               mutex_unlock(&pm_mutex);
10247 +       }
10248 +
10249 +       set_fs(oldfs);
10250 +       mutex_unlock(&tuxonice_in_use);
10251 +}
10252 +
10253 +/**
10254 + * toi_start_anything - basic initialisation for TuxOnIce
10255 + * @toi_or_resume:     Whether starting a cycle or attempt at resuming.
10256 + *
10257 + * Our basic initialisation routine. Take references on modules, use the
10258 + * kernel segment, recheck resume= if no active allocator is set, initialise
10259 + * modules, save and reset block_dump and ensure we're running on CPU0.
10260 + **/
10261 +int toi_start_anything(int hibernate_or_resume)
10262 +{
10263 +       mutex_lock(&tuxonice_in_use);
10264 +
10265 +       oldfs = get_fs();
10266 +       set_fs(KERNEL_DS);
10267 +
10268 +       if (hibernate_or_resume) {
10269 +               mutex_lock(&pm_mutex);
10270 +
10271 +               if (!atomic_add_unless(&snapshot_device_available, -1, 0))
10272 +                       goto snapshotdevice_unavailable;
10273 +       }
10274 +
10275 +       if (hibernate_or_resume == SYSFS_HIBERNATE)
10276 +               toi_print_modules();
10277 +
10278 +       if (toi_get_modules()) {
10279 +               printk(KERN_INFO "TuxOnIce: Get modules failed!\n");
10280 +               goto prehibernate_err;
10281 +       }
10282 +
10283 +       if (hibernate_or_resume) {
10284 +               block_dump_save = block_dump;
10285 +               block_dump = 0;
10286 +               set_cpus_allowed(current,
10287 +                               cpumask_of_cpu(first_cpu(cpu_online_map)));
10288 +       }
10289 +
10290 +       if (toi_initialise_modules_early(hibernate_or_resume))
10291 +               goto early_init_err;
10292 +
10293 +       if (!toiActiveAllocator)
10294 +               toi_attempt_to_parse_resume_device(!hibernate_or_resume);
10295 +
10296 +       if (!toi_initialise_modules_late(hibernate_or_resume))
10297 +               return 0;
10298 +
10299 +       toi_cleanup_modules(hibernate_or_resume);
10300 +early_init_err:
10301 +       if (hibernate_or_resume) {
10302 +               block_dump_save = block_dump;
10303 +               set_cpus_allowed(current, CPU_MASK_ALL);
10304 +       }
10305 +prehibernate_err:
10306 +       if (hibernate_or_resume)
10307 +               atomic_inc(&snapshot_device_available);
10308 +snapshotdevice_unavailable:
10309 +       if (hibernate_or_resume)
10310 +               mutex_unlock(&pm_mutex);
10311 +       set_fs(oldfs);
10312 +       mutex_unlock(&tuxonice_in_use);
10313 +       return -EBUSY;
10314 +}
10315 +
10316 +/*
10317 + * Nosave page tracking.
10318 + *
10319 + * Here rather than in prepare_image because we want to do it once only at the
10320 + * start of a cycle.
10321 + */
10322 +
10323 +/**
10324 + * mark_nosave_pages - set up our Nosave bitmap
10325 + *
10326 + * Build a bitmap of Nosave pages from the list. The bitmap allows faster
10327 + * use when preparing the image.
10328 + **/
10329 +static void mark_nosave_pages(void)
10330 +{
10331 +       struct nosave_region *region;
10332 +
10333 +       list_for_each_entry(region, &nosave_regions, list) {
10334 +               unsigned long pfn;
10335 +
10336 +               for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++)
10337 +                       if (pfn_valid(pfn))
10338 +                               SetPageNosave(pfn_to_page(pfn));
10339 +       }
10340 +}
10341 +
10342 +static int alloc_a_bitmap(struct memory_bitmap **bm)
10343 +{
10344 +       int result = 0;
10345 +
10346 +       *bm = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
10347 +       if (!*bm) {
10348 +               printk(KERN_ERR "Failed to kzalloc memory for a bitmap.\n");
10349 +               return -ENOMEM;
10350 +       }
10351 +
10352 +       result = memory_bm_create(*bm, GFP_KERNEL, 0);
10353 +
10354 +       if (result) {
10355 +               printk(KERN_ERR "Failed to create a bitmap.\n");
10356 +               kfree(*bm);
10357 +       }
10358 +
10359 +       return result;
10360 +}
10361 +
10362 +/**
10363 + * allocate_bitmaps - allocate bitmaps used to record page states
10364 + *
10365 + * Allocate the bitmaps we use to record the various TuxOnIce related
10366 + * page states.
10367 + **/
10368 +static int allocate_bitmaps(void)
10369 +{
10370 +       if (alloc_a_bitmap(&pageset1_map) ||
10371 +           alloc_a_bitmap(&pageset1_copy_map) ||
10372 +           alloc_a_bitmap(&pageset2_map) ||
10373 +           alloc_a_bitmap(&io_map) ||
10374 +           alloc_a_bitmap(&nosave_map) ||
10375 +           alloc_a_bitmap(&free_map) ||
10376 +           alloc_a_bitmap(&page_resave_map))
10377 +               return 1;
10378 +
10379 +       return 0;
10380 +}
10381 +
10382 +static void free_a_bitmap(struct memory_bitmap **bm)
10383 +{
10384 +       if (!*bm)
10385 +               return;
10386 +
10387 +       memory_bm_free(*bm, 0);
10388 +       kfree(*bm);
10389 +       *bm = NULL;
10390 +}
10391 +
10392 +/**
10393 + * free_bitmaps - free the bitmaps used to record page states
10394 + *
10395 + * Free the bitmaps allocated above. It is not an error to call
10396 + * memory_bm_free on a bitmap that isn't currently allocated.
10397 + **/
10398 +static void free_bitmaps(void)
10399 +{
10400 +       free_a_bitmap(&pageset1_map);
10401 +       free_a_bitmap(&pageset1_copy_map);
10402 +       free_a_bitmap(&pageset2_map);
10403 +       free_a_bitmap(&io_map);
10404 +       free_a_bitmap(&nosave_map);
10405 +       free_a_bitmap(&free_map);
10406 +       free_a_bitmap(&page_resave_map);
10407 +}
10408 +
10409 +/**
10410 + * io_MB_per_second - return the number of MB/s read or written
10411 + * @write:     Whether to return the speed at which we wrote.
10412 + *
10413 + * Calculate the number of megabytes per second that were read or written.
10414 + **/
10415 +static int io_MB_per_second(int write)
10416 +{
10417 +       return (toi_bkd.toi_io_time[write][1]) ?
10418 +               MB((unsigned long) toi_bkd.toi_io_time[write][0]) * HZ /
10419 +               toi_bkd.toi_io_time[write][1] : 0;
10420 +}
10421 +
10422 +#define SNPRINTF(a...)         do { len += scnprintf(((char *) buffer) + len, \
10423 +               count - len - 1, ## a); } while (0)
10424 +
10425 +/**
10426 + * get_debug_info - fill a buffer with debugging information
10427 + * @buffer:    The buffer to be filled.
10428 + * @count:     The size of the buffer, in bytes.
10429 + *
10430 + * Fill a (usually PAGE_SIZEd) buffer with the debugging info that we will
10431 + * either printk or return via sysfs.
10432 + **/
10433 +static int get_toi_debug_info(const char *buffer, int count)
10434 +{
10435 +       int len = 0, i, first_result = 1;
10436 +
10437 +       SNPRINTF("TuxOnIce debugging info:\n");
10438 +       SNPRINTF("- TuxOnIce core  : " TOI_CORE_VERSION "\n");
10439 +       SNPRINTF("- Kernel Version : " UTS_RELEASE "\n");
10440 +       SNPRINTF("- Compiler vers. : %d.%d\n", __GNUC__, __GNUC_MINOR__);
10441 +       SNPRINTF("- Attempt number : %d\n", nr_hibernates);
10442 +       SNPRINTF("- Parameters     : %ld %ld %ld %d %d %ld\n",
10443 +                       toi_result,
10444 +                       toi_bkd.toi_action,
10445 +                       toi_bkd.toi_debug_state,
10446 +                       toi_bkd.toi_default_console_level,
10447 +                       image_size_limit,
10448 +                       toi_poweroff_method);
10449 +       SNPRINTF("- Overall expected compression percentage: %d.\n",
10450 +                       100 - toi_expected_compression_ratio());
10451 +       len += toi_print_module_debug_info(((char *) buffer) + len,
10452 +                       count - len - 1);
10453 +       if (toi_bkd.toi_io_time[0][1]) {
10454 +               if ((io_MB_per_second(0) < 5) || (io_MB_per_second(1) < 5)) {
10455 +                       SNPRINTF("- I/O speed: Write %ld KB/s",
10456 +                         (KB((unsigned long) toi_bkd.toi_io_time[0][0]) * HZ /
10457 +                         toi_bkd.toi_io_time[0][1]));
10458 +                       if (toi_bkd.toi_io_time[1][1])
10459 +                               SNPRINTF(", Read %ld KB/s",
10460 +                                 (KB((unsigned long)
10461 +                                     toi_bkd.toi_io_time[1][0]) * HZ /
10462 +                                 toi_bkd.toi_io_time[1][1]));
10463 +               } else {
10464 +                       SNPRINTF("- I/O speed: Write %ld MB/s",
10465 +                        (MB((unsigned long) toi_bkd.toi_io_time[0][0]) * HZ /
10466 +                         toi_bkd.toi_io_time[0][1]));
10467 +                       if (toi_bkd.toi_io_time[1][1])
10468 +                               SNPRINTF(", Read %ld MB/s",
10469 +                                (MB((unsigned long)
10470 +                                    toi_bkd.toi_io_time[1][0]) * HZ /
10471 +                                 toi_bkd.toi_io_time[1][1]));
10472 +               }
10473 +               SNPRINTF(".\n");
10474 +       } else
10475 +               SNPRINTF("- No I/O speed stats available.\n");
10476 +       SNPRINTF("- Extra pages    : %ld used/%ld.\n",
10477 +                       extra_pd1_pages_used, extra_pd1_pages_allowance);
10478 +
10479 +       for (i = 0; i < TOI_NUM_RESULT_STATES; i++)
10480 +               if (test_result_state(i)) {
10481 +                       SNPRINTF("%s: %s.\n", first_result ?
10482 +                                       "- Result         " :
10483 +                                       "                 ",
10484 +                                       result_strings[i]);
10485 +                       first_result = 0;
10486 +               }
10487 +       if (first_result)
10488 +               SNPRINTF("- Result         : %s.\n", nr_hibernates ?
10489 +                       "Succeeded" :
10490 +                       "No hibernation attempts so far");
10491 +       return len;
10492 +}
10493 +
10494 +/**
10495 + * do_cleanup - cleanup after attempting to hibernate or resume
10496 + * @get_debug_info:    Whether to allocate and return debugging info.
10497 + *
10498 + * Cleanup after attempting to hibernate or resume, possibly getting
10499 + * debugging info as we do so.
10500 + **/
10501 +static void do_cleanup(int get_debug_info, int restarting)
10502 +{
10503 +       int i = 0;
10504 +       char *buffer = NULL;
10505 +
10506 +       if (get_debug_info)
10507 +               toi_prepare_status(DONT_CLEAR_BAR, "Cleaning up...");
10508 +
10509 +       free_checksum_pages();
10510 +
10511 +       if (get_debug_info)
10512 +               buffer = (char *) toi_get_zeroed_page(20, TOI_ATOMIC_GFP);
10513 +
10514 +       if (buffer)
10515 +               i = get_toi_debug_info(buffer, PAGE_SIZE);
10516 +
10517 +       toi_free_extra_pagedir_memory();
10518 +
10519 +       pagedir1.size = 0;
10520 +       pagedir2.size = 0;
10521 +       set_highmem_size(pagedir1, 0);
10522 +       set_highmem_size(pagedir2, 0);
10523 +
10524 +       if (boot_kernel_data_buffer) {
10525 +               if (!test_toi_state(TOI_BOOT_KERNEL))
10526 +                       toi_free_page(37, boot_kernel_data_buffer);
10527 +               boot_kernel_data_buffer = 0;
10528 +       }
10529 +
10530 +       clear_toi_state(TOI_BOOT_KERNEL);
10531 +       thaw_processes();
10532 +
10533 +       if (test_action_state(TOI_KEEP_IMAGE) &&
10534 +           !test_result_state(TOI_ABORTED)) {
10535 +               toi_message(TOI_ANY_SECTION, TOI_LOW, 1,
10536 +                       "TuxOnIce: Not invalidating the image due "
10537 +                       "to Keep Image being enabled.\n");
10538 +               set_result_state(TOI_KEPT_IMAGE);
10539 +       } else
10540 +               if (toiActiveAllocator)
10541 +                       toiActiveAllocator->remove_image();
10542 +
10543 +       free_bitmaps();
10544 +       usermodehelper_enable();
10545 +
10546 +       if (test_toi_state(TOI_NOTIFIERS_PREPARE)) {
10547 +               pm_notifier_call_chain(PM_POST_HIBERNATION);
10548 +               clear_toi_state(TOI_NOTIFIERS_PREPARE);
10549 +       }
10550 +
10551 +       if (buffer && i) {
10552 +               /* Printk can only handle 1023 bytes, including
10553 +                * its level mangling. */
10554 +               for (i = 0; i < 3; i++)
10555 +                       printk(KERN_ERR "%s", buffer + (1023 * i));
10556 +               toi_free_page(20, (unsigned long) buffer);
10557 +       }
10558 +
10559 +       if (!test_action_state(TOI_LATE_CPU_HOTPLUG))
10560 +               enable_nonboot_cpus();
10561 +
10562 +       if (!restarting)
10563 +               toi_cleanup_console();
10564 +
10565 +       free_attention_list();
10566 +
10567 +       if (!restarting)
10568 +               toi_deactivate_storage(0);
10569 +
10570 +       clear_toi_state(TOI_IGNORE_LOGLEVEL);
10571 +       clear_toi_state(TOI_TRYING_TO_RESUME);
10572 +       clear_toi_state(TOI_NOW_RESUMING);
10573 +}
10574 +
10575 +/**
10576 + * check_still_keeping_image - we kept an image; check whether to reuse it.
10577 + *
10578 + * We enter this routine when we have kept an image. If the user has said they
10579 + * want to still keep it, all we need to do is powerdown. If powering down
10580 + * means hibernating to ram and the power doesn't run out, we'll return 1.
10581 + * If we do power off properly or the battery runs out, we'll resume via the
10582 + * normal paths.
10583 + *
10584 + * If the user has said they want to remove the previously kept image, we
10585 + * remove it, and return 0. We'll then store a new image.
10586 + **/
10587 +static int check_still_keeping_image(void)
10588 +{
10589 +       if (test_action_state(TOI_KEEP_IMAGE)) {
10590 +               printk(KERN_INFO "Image already stored: powering down "
10591 +                               "immediately.");
10592 +               do_toi_step(STEP_HIBERNATE_POWERDOWN);
10593 +               return 1;       /* Just in case we're using S3 */
10594 +       }
10595 +
10596 +       printk(KERN_INFO "Invalidating previous image.\n");
10597 +       toiActiveAllocator->remove_image();
10598 +
10599 +       return 0;
10600 +}
10601 +
10602 +/**
10603 + * toi_init - prepare to hibernate to disk
10604 + *
10605 + * Initialise variables & data structures, in preparation for
10606 + * hibernating to disk.
10607 + **/
10608 +static int toi_init(int restarting)
10609 +{
10610 +       int result, i, j;
10611 +
10612 +       toi_result = 0;
10613 +
10614 +       printk(KERN_INFO "Initiating a hibernation cycle.\n");
10615 +
10616 +       nr_hibernates++;
10617 +
10618 +       for (i = 0; i < 2; i++)
10619 +               for (j = 0; j < 2; j++)
10620 +                       toi_bkd.toi_io_time[i][j] = 0;
10621 +
10622 +       if (!test_toi_state(TOI_CAN_HIBERNATE) ||
10623 +           allocate_bitmaps())
10624 +               return 1;
10625 +
10626 +       mark_nosave_pages();
10627 +
10628 +       if (!restarting)
10629 +               toi_prepare_console();
10630 +
10631 +       result = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
10632 +       if (result) {
10633 +               set_result_state(TOI_NOTIFIERS_PREPARE_FAILED);
10634 +               return 1;
10635 +       }
10636 +       set_toi_state(TOI_NOTIFIERS_PREPARE);
10637 +
10638 +       result = usermodehelper_disable();
10639 +       if (result) {
10640 +               printk(KERN_ERR "TuxOnIce: Failed to disable usermode "
10641 +                               "helpers\n");
10642 +               set_result_state(TOI_USERMODE_HELPERS_ERR);
10643 +               return 1;
10644 +       }
10645 +
10646 +       boot_kernel_data_buffer = toi_get_zeroed_page(37, TOI_ATOMIC_GFP);
10647 +       if (!boot_kernel_data_buffer) {
10648 +               printk(KERN_ERR "TuxOnIce: Failed to allocate "
10649 +                               "boot_kernel_data_buffer.\n");
10650 +               set_result_state(TOI_OUT_OF_MEMORY);
10651 +               return 1;
10652 +       }
10653 +
10654 +       if (test_action_state(TOI_LATE_CPU_HOTPLUG) ||
10655 +                       !disable_nonboot_cpus())
10656 +               return 1;
10657 +
10658 +       set_abort_result(TOI_CPU_HOTPLUG_FAILED);
10659 +       return 0;
10660 +}
10661 +
10662 +/**
10663 + * can_hibernate - perform basic 'Can we hibernate?' tests
10664 + *
10665 + * Perform basic tests that must pass if we're going to be able to hibernate:
10666 + * Can we get the pm_mutex? Is resume= valid (we need to know where to write
10667 + * the image header).
10668 + **/
10669 +static int can_hibernate(void)
10670 +{
10671 +       if (!test_toi_state(TOI_CAN_HIBERNATE))
10672 +               toi_attempt_to_parse_resume_device(0);
10673 +
10674 +       if (!test_toi_state(TOI_CAN_HIBERNATE)) {
10675 +               printk(KERN_INFO "TuxOnIce: Hibernation is disabled.\n"
10676 +                       "This may be because you haven't put something along "
10677 +                       "the lines of\n\nresume=swap:/dev/hda1\n\n"
10678 +                       "in lilo.conf or equivalent. (Where /dev/hda1 is your "
10679 +                       "swap partition).\n");
10680 +               set_abort_result(TOI_CANT_SUSPEND);
10681 +               return 0;
10682 +       }
10683 +
10684 +       if (strlen(alt_resume_param)) {
10685 +               attempt_to_parse_alt_resume_param();
10686 +
10687 +               if (!strlen(alt_resume_param)) {
10688 +                       printk(KERN_INFO "Alternate resume parameter now "
10689 +                                       "invalid. Aborting.\n");
10690 +                       set_abort_result(TOI_CANT_USE_ALT_RESUME);
10691 +                       return 0;
10692 +               }
10693 +       }
10694 +
10695 +       return 1;
10696 +}
10697 +
10698 +/**
10699 + * do_post_image_write - having written an image, figure out what to do next
10700 + *
10701 + * After writing an image, we might load an alternate image or power down.
10702 + * Powering down might involve hibernating to ram, in which case we also
10703 + * need to handle reloading pageset2.
10704 + **/
10705 +static int do_post_image_write(void)
10706 +{
10707 +       /* If switching images fails, do normal powerdown */
10708 +       if (alt_resume_param[0])
10709 +               do_toi_step(STEP_RESUME_ALT_IMAGE);
10710 +
10711 +       toi_power_down();
10712 +
10713 +       barrier();
10714 +       mb();
10715 +       return 0;
10716 +}
10717 +
10718 +/**
10719 + * __save_image - do the hard work of saving the image
10720 + *
10721 + * High level routine for getting the image saved. The key assumptions made
10722 + * are that processes have been frozen and sufficient memory is available.
10723 + *
10724 + * We also exit through here at resume time, coming back from toi_hibernate
10725 + * after the atomic restore. This is the reason for the toi_in_hibernate
10726 + * test.
10727 + **/
10728 +static int __save_image(void)
10729 +{
10730 +       int temp_result, did_copy = 0;
10731 +
10732 +       toi_prepare_status(DONT_CLEAR_BAR, "Starting to save the image..");
10733 +
10734 +       toi_message(TOI_ANY_SECTION, TOI_LOW, 1,
10735 +               " - Final values: %d and %d.\n",
10736 +               pagedir1.size, pagedir2.size);
10737 +
10738 +       toi_cond_pause(1, "About to write pagedir2.");
10739 +
10740 +       temp_result = write_pageset(&pagedir2);
10741 +
10742 +       if (temp_result == -1 || test_result_state(TOI_ABORTED))
10743 +               return 1;
10744 +
10745 +       toi_cond_pause(1, "About to copy pageset 1.");
10746 +
10747 +       if (test_result_state(TOI_ABORTED))
10748 +               return 1;
10749 +
10750 +       toi_deactivate_storage(1);
10751 +
10752 +       toi_prepare_status(DONT_CLEAR_BAR, "Doing atomic copy/restore.");
10753 +
10754 +       toi_in_hibernate = 1;
10755 +
10756 +       if (toi_go_atomic(PMSG_FREEZE, 1))
10757 +               goto Failed;
10758 +
10759 +       temp_result = toi_hibernate();
10760 +       if (!temp_result)
10761 +               did_copy = 1;
10762 +
10763 +       /* We return here at resume time too! */
10764 +       toi_end_atomic(ATOMIC_ALL_STEPS, toi_in_hibernate, temp_result);
10765 +
10766 +Failed:
10767 +       if (toi_activate_storage(1))
10768 +               panic("Failed to reactivate our storage.");
10769 +
10770 +       /* Resume time? */
10771 +       if (!toi_in_hibernate) {
10772 +               copyback_post();
10773 +               return 0;
10774 +       }
10775 +
10776 +       /* Nope. Hibernating. So, see if we can save the image... */
10777 +
10778 +       if (temp_result || test_result_state(TOI_ABORTED)) {
10779 +               if (did_copy)
10780 +                       goto abort_reloading_pagedir_two;
10781 +               else
10782 +                       return 1;
10783 +       }
10784 +
10785 +       toi_update_status(pagedir2.size, pagedir1.size + pagedir2.size,
10786 +                       NULL);
10787 +
10788 +       if (test_result_state(TOI_ABORTED))
10789 +               goto abort_reloading_pagedir_two;
10790 +
10791 +       toi_cond_pause(1, "About to write pageset1.");
10792 +
10793 +       toi_message(TOI_ANY_SECTION, TOI_LOW, 1, "-- Writing pageset1\n");
10794 +
10795 +       temp_result = write_pageset(&pagedir1);
10796 +
10797 +       /* We didn't overwrite any memory, so no reread needs to be done. */
10798 +       if (test_action_state(TOI_TEST_FILTER_SPEED))
10799 +               return 1;
10800 +
10801 +       if (temp_result == 1 || test_result_state(TOI_ABORTED))
10802 +               goto abort_reloading_pagedir_two;
10803 +
10804 +       toi_cond_pause(1, "About to write header.");
10805 +
10806 +       if (test_result_state(TOI_ABORTED))
10807 +               goto abort_reloading_pagedir_two;
10808 +
10809 +       temp_result = write_image_header();
10810 +
10811 +       if (test_action_state(TOI_TEST_BIO))
10812 +               return 1;
10813 +
10814 +       if (!temp_result && !test_result_state(TOI_ABORTED))
10815 +               return 0;
10816 +
10817 +abort_reloading_pagedir_two:
10818 +       temp_result = read_pageset2(1);
10819 +
10820 +       /* If that failed, we're sunk. Panic! */
10821 +       if (temp_result)
10822 +               panic("Attempt to reload pagedir 2 while aborting "
10823 +                               "a hibernate failed.");
10824 +
10825 +       return 1;
10826 +}
10827 +
10828 +static void map_ps2_pages(int enable)
10829 +{
10830 +       unsigned long pfn = 0;
10831 +
10832 +       pfn = memory_bm_next_pfn(pageset2_map);
10833 +
10834 +       while (pfn != BM_END_OF_MAP) {
10835 +               struct page *page = pfn_to_page(pfn);
10836 +               kernel_map_pages(page, 1, enable);
10837 +               pfn = memory_bm_next_pfn(pageset2_map);
10838 +       }
10839 +}
10840 +
10841 +/**
10842 + * do_save_image - save the image and handle the result
10843 + *
10844 + * Save the prepared image. If we fail or we're in the path returning
10845 + * from the atomic restore, cleanup.
10846 + **/
10847 +static int do_save_image(void)
10848 +{
10849 +       int result;
10850 +       map_ps2_pages(0);
10851 +       result = __save_image();
10852 +       map_ps2_pages(1);
10853 +       return result;
10854 +}
10855 +
10856 +/**
10857 + * do_prepare_image - try to prepare an image
10858 + *
10859 + * Seek to initialise and prepare an image to be saved. On failure,
10860 + * cleanup.
10861 + **/
10862 +static int do_prepare_image(void)
10863 +{
10864 +       int restarting = test_result_state(TOI_EXTRA_PAGES_ALLOW_TOO_SMALL);
10865 +
10866 +       if (!restarting && toi_activate_storage(0))
10867 +               return 1;
10868 +
10869 +       /*
10870 +        * If kept image and still keeping image and hibernating to RAM, we will
10871 +        * return 1 after hibernating and resuming (provided the power doesn't
10872 +        * run out. In that case, we skip directly to cleaning up and exiting.
10873 +        */
10874 +
10875 +       if (!can_hibernate() ||
10876 +           (test_result_state(TOI_KEPT_IMAGE) &&
10877 +            check_still_keeping_image()))
10878 +               return 1;
10879 +
10880 +       if (toi_init(restarting) && !toi_prepare_image() &&
10881 +                       !test_result_state(TOI_ABORTED))
10882 +               return 0;
10883 +
10884 +       return 1;
10885 +}
10886 +
10887 +/**
10888 + * do_check_can_resume - find out whether an image has been stored
10889 + *
10890 + * Read whether an image exists. We use the same routine as the
10891 + * image_exists sysfs entry, and just look to see whether the
10892 + * first character in the resulting buffer is a '1'.
10893 + **/
10894 +int do_check_can_resume(void)
10895 +{
10896 +       char *buf = (char *) toi_get_zeroed_page(21, TOI_ATOMIC_GFP);
10897 +       int result = 0;
10898 +
10899 +       if (!buf)
10900 +               return 0;
10901 +
10902 +       /* Only interested in first byte, so throw away return code. */
10903 +       image_exists_read(buf, PAGE_SIZE);
10904 +
10905 +       if (buf[0] == '1')
10906 +               result = 1;
10907 +
10908 +       toi_free_page(21, (unsigned long) buf);
10909 +       return result;
10910 +}
10911 +EXPORT_SYMBOL_GPL(do_check_can_resume);
10912 +
10913 +/**
10914 + * do_load_atomic_copy - load the first part of an image, if it exists
10915 + *
10916 + * Check whether we have an image. If one exists, do sanity checking
10917 + * (possibly invalidating the image or even rebooting if the user
10918 + * requests that) before loading it into memory in preparation for the
10919 + * atomic restore.
10920 + *
10921 + * If and only if we have an image loaded and ready to restore, we return 1.
10922 + **/
10923 +static int do_load_atomic_copy(void)
10924 +{
10925 +       int read_image_result = 0;
10926 +
10927 +       if (sizeof(swp_entry_t) != sizeof(long)) {
10928 +               printk(KERN_WARNING "TuxOnIce: The size of swp_entry_t != size"
10929 +                       " of long. Please report this!\n");
10930 +               return 1;
10931 +       }
10932 +
10933 +       if (!resume_file[0])
10934 +               printk(KERN_WARNING "TuxOnIce: "
10935 +                       "You need to use a resume= command line parameter to "
10936 +                       "tell TuxOnIce where to look for an image.\n");
10937 +
10938 +       toi_activate_storage(0);
10939 +
10940 +       if (!(test_toi_state(TOI_RESUME_DEVICE_OK)) &&
10941 +               !toi_attempt_to_parse_resume_device(0)) {
10942 +               /*
10943 +                * Without a usable storage device we can do nothing -
10944 +                * even if noresume is given
10945 +                */
10946 +
10947 +               if (!toiNumAllocators)
10948 +                       printk(KERN_ALERT "TuxOnIce: "
10949 +                         "No storage allocators have been registered.\n");
10950 +               else
10951 +                       printk(KERN_ALERT "TuxOnIce: "
10952 +                               "Missing or invalid storage location "
10953 +                               "(resume= parameter). Please correct and "
10954 +                               "rerun lilo (or equivalent) before "
10955 +                               "hibernating.\n");
10956 +               toi_deactivate_storage(0);
10957 +               return 1;
10958 +       }
10959 +
10960 +       if (allocate_bitmaps())
10961 +               return 1;
10962 +
10963 +       read_image_result = read_pageset1(); /* non fatal error ignored */
10964 +
10965 +       if (test_toi_state(TOI_NORESUME_SPECIFIED))
10966 +               clear_toi_state(TOI_NORESUME_SPECIFIED);
10967 +
10968 +       toi_deactivate_storage(0);
10969 +
10970 +       if (read_image_result)
10971 +               return 1;
10972 +
10973 +       return 0;
10974 +}
10975 +
10976 +/**
10977 + * prepare_restore_load_alt_image - save & restore alt image variables
10978 + *
10979 + * Save and restore the pageset1 maps, when loading an alternate image.
10980 + **/
10981 +static void prepare_restore_load_alt_image(int prepare)
10982 +{
10983 +       static struct memory_bitmap *pageset1_map_save, *pageset1_copy_map_save;
10984 +
10985 +       if (prepare) {
10986 +               pageset1_map_save = pageset1_map;
10987 +               pageset1_map = NULL;
10988 +               pageset1_copy_map_save = pageset1_copy_map;
10989 +               pageset1_copy_map = NULL;
10990 +               set_toi_state(TOI_LOADING_ALT_IMAGE);
10991 +               toi_reset_alt_image_pageset2_pfn();
10992 +       } else {
10993 +               memory_bm_free(pageset1_map, 0);
10994 +               pageset1_map = pageset1_map_save;
10995 +               memory_bm_free(pageset1_copy_map, 0);
10996 +               pageset1_copy_map = pageset1_copy_map_save;
10997 +               clear_toi_state(TOI_NOW_RESUMING);
10998 +               clear_toi_state(TOI_LOADING_ALT_IMAGE);
10999 +       }
11000 +}
11001 +
11002 +/**
11003 + * do_toi_step - perform a step in hibernating or resuming
11004 + *
11005 + * Perform a step in hibernating or resuming an image. This abstraction
11006 + * is in preparation for implementing cluster support, and perhaps replacing
11007 + * uswsusp too (haven't looked whether that's possible yet).
11008 + **/
11009 +int do_toi_step(int step)
11010 +{
11011 +       switch (step) {
11012 +       case STEP_HIBERNATE_PREPARE_IMAGE:
11013 +               return do_prepare_image();
11014 +       case STEP_HIBERNATE_SAVE_IMAGE:
11015 +               return do_save_image();
11016 +       case STEP_HIBERNATE_POWERDOWN:
11017 +               return do_post_image_write();
11018 +       case STEP_RESUME_CAN_RESUME:
11019 +               return do_check_can_resume();
11020 +       case STEP_RESUME_LOAD_PS1:
11021 +               return do_load_atomic_copy();
11022 +       case STEP_RESUME_DO_RESTORE:
11023 +               /*
11024 +                * If we succeed, this doesn't return.
11025 +                * Instead, we return from do_save_image() in the
11026 +                * hibernated kernel.
11027 +                */
11028 +               return toi_atomic_restore();
11029 +       case STEP_RESUME_ALT_IMAGE:
11030 +               printk(KERN_INFO "Trying to resume alternate image.\n");
11031 +               toi_in_hibernate = 0;
11032 +               save_restore_alt_param(SAVE, NOQUIET);
11033 +               prepare_restore_load_alt_image(1);
11034 +               if (!do_check_can_resume()) {
11035 +                       printk(KERN_INFO "Nothing to resume from.\n");
11036 +                       goto out;
11037 +               }
11038 +               if (!do_load_atomic_copy())
11039 +                       toi_atomic_restore();
11040 +
11041 +               printk(KERN_INFO "Failed to load image.\n");
11042 +out:
11043 +               prepare_restore_load_alt_image(0);
11044 +               save_restore_alt_param(RESTORE, NOQUIET);
11045 +               break;
11046 +       case STEP_CLEANUP:
11047 +               do_cleanup(1, 0);
11048 +               break;
11049 +       case STEP_QUIET_CLEANUP:
11050 +               do_cleanup(0, 0);
11051 +               break;
11052 +       }
11053 +
11054 +       return 0;
11055 +}
11056 +EXPORT_SYMBOL_GPL(do_toi_step);
11057 +
11058 +/* -- Functions for kickstarting a hibernate or resume --- */
11059 +
11060 +/**
11061 + * toi_try_resume - try to do the steps in resuming
11062 + *
11063 + * Check if we have an image and if so try to resume. Clear the status
11064 + * flags too.
11065 + **/
11066 +void toi_try_resume(void)
11067 +{
11068 +       set_toi_state(TOI_TRYING_TO_RESUME);
11069 +       resume_attempted = 1;
11070 +
11071 +       current->flags |= PF_MEMALLOC;
11072 +
11073 +       if (do_toi_step(STEP_RESUME_CAN_RESUME) &&
11074 +                       !do_toi_step(STEP_RESUME_LOAD_PS1))
11075 +               do_toi_step(STEP_RESUME_DO_RESTORE);
11076 +
11077 +       do_cleanup(0, 0);
11078 +
11079 +       current->flags &= ~PF_MEMALLOC;
11080 +
11081 +       clear_toi_state(TOI_IGNORE_LOGLEVEL);
11082 +       clear_toi_state(TOI_TRYING_TO_RESUME);
11083 +       clear_toi_state(TOI_NOW_RESUMING);
11084 +}
11085 +
11086 +/**
11087 + * toi_sys_power_disk_try_resume - wrapper calling toi_try_resume
11088 + *
11089 + * Wrapper for when __toi_try_resume is called from swsusp resume path,
11090 + * rather than from echo > /sys/power/tuxonice/do_resume.
11091 + **/
11092 +static void toi_sys_power_disk_try_resume(void)
11093 +{
11094 +       resume_attempted = 1;
11095 +
11096 +       /*
11097 +        * There's a comment in kernel/power/disk.c that indicates
11098 +        * we should be able to use mutex_lock_nested below. That
11099 +        * doesn't seem to cut it, though, so let's just turn lockdep
11100 +        * off for now.
11101 +        */
11102 +       lockdep_off();
11103 +
11104 +       if (toi_start_anything(SYSFS_RESUMING))
11105 +               goto out;
11106 +
11107 +       toi_try_resume();
11108 +
11109 +       /*
11110 +        * For initramfs, we have to clear the boot time
11111 +        * flag after trying to resume
11112 +        */
11113 +       clear_toi_state(TOI_BOOT_TIME);
11114 +
11115 +       toi_finish_anything(SYSFS_RESUMING);
11116 +out:
11117 +       lockdep_on();
11118 +}
11119 +
11120 +/**
11121 + * toi_try_hibernate - try to start a hibernation cycle
11122 + *
11123 + * Start a hibernation cycle, coming in from either
11124 + * echo > /sys/power/tuxonice/do_suspend
11125 + *
11126 + * or
11127 + *
11128 + * echo disk > /sys/power/state
11129 + *
11130 + * In the later case, we come in without pm_sem taken; in the
11131 + * former, it has been taken.
11132 + **/
11133 +int toi_try_hibernate(void)
11134 +{
11135 +       int result = 0, sys_power_disk = 0, retries = 0;
11136 +
11137 +       if (!mutex_is_locked(&tuxonice_in_use)) {
11138 +               /* Came in via /sys/power/disk */
11139 +               if (toi_start_anything(SYSFS_HIBERNATING))
11140 +                       return -EBUSY;
11141 +               sys_power_disk = 1;
11142 +       }
11143 +
11144 +       current->flags |= PF_MEMALLOC;
11145 +
11146 +       if (test_toi_state(TOI_CLUSTER_MODE)) {
11147 +               toi_initiate_cluster_hibernate();
11148 +               goto out;
11149 +       }
11150 +
11151 +prepare:
11152 +       result = do_toi_step(STEP_HIBERNATE_PREPARE_IMAGE);
11153 +
11154 +       if (result || test_action_state(TOI_FREEZER_TEST))
11155 +               goto out;
11156 +
11157 +       result = do_toi_step(STEP_HIBERNATE_SAVE_IMAGE);
11158 +
11159 +       if (test_result_state(TOI_EXTRA_PAGES_ALLOW_TOO_SMALL)) {
11160 +               if (retries < 2) {
11161 +                       do_cleanup(0, 1);
11162 +                       retries++;
11163 +                       clear_result_state(TOI_ABORTED);
11164 +                       extra_pd1_pages_allowance = extra_pd1_pages_used + 500;
11165 +                       printk(KERN_INFO "Automatically adjusting the extra"
11166 +                               " pages allowance to %ld and restarting.\n",
11167 +                               extra_pd1_pages_allowance);
11168 +                       goto prepare;
11169 +               }
11170 +
11171 +               printk(KERN_INFO "Adjusted extra pages allowance twice and "
11172 +                       "still couldn't hibernate successfully. Giving up.");
11173 +       }
11174 +
11175 +       /* This code runs at resume time too! */
11176 +       if (!result && toi_in_hibernate)
11177 +               result = do_toi_step(STEP_HIBERNATE_POWERDOWN);
11178 +out:
11179 +       do_cleanup(1, 0);
11180 +       current->flags &= ~PF_MEMALLOC;
11181 +
11182 +       if (sys_power_disk)
11183 +               toi_finish_anything(SYSFS_HIBERNATING);
11184 +
11185 +       return result;
11186 +}
11187 +
11188 +/*
11189 + * channel_no: If !0, -c <channel_no> is added to args (userui).
11190 + */
11191 +int toi_launch_userspace_program(char *command, int channel_no,
11192 +               enum umh_wait wait, int debug)
11193 +{
11194 +       int retval;
11195 +       static char *envp[] = {
11196 +                       "HOME=/",
11197 +                       "TERM=linux",
11198 +                       "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
11199 +                       NULL };
11200 +       static char *argv[] =
11201 +               { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL };
11202 +       char *channel = NULL;
11203 +       int arg = 0, size;
11204 +       char test_read[255];
11205 +       char *orig_posn = command;
11206 +
11207 +       if (!strlen(orig_posn))
11208 +               return 1;
11209 +
11210 +       if (channel_no) {
11211 +               channel = toi_kzalloc(4, 6, GFP_KERNEL);
11212 +               if (!channel) {
11213 +                       printk(KERN_INFO "Failed to allocate memory in "
11214 +                               "preparing to launch userspace program.\n");
11215 +                       return 1;
11216 +               }
11217 +       }
11218 +
11219 +       /* Up to 6 args supported */
11220 +       while (arg < 6) {
11221 +               sscanf(orig_posn, "%s", test_read);
11222 +               size = strlen(test_read);
11223 +               if (!(size))
11224 +                       break;
11225 +               argv[arg] = toi_kzalloc(5, size + 1, TOI_ATOMIC_GFP);
11226 +               strcpy(argv[arg], test_read);
11227 +               orig_posn += size + 1;
11228 +               *test_read = 0;
11229 +               arg++;
11230 +       }
11231 +
11232 +       if (channel_no) {
11233 +               sprintf(channel, "-c%d", channel_no);
11234 +               argv[arg] = channel;
11235 +       } else
11236 +               arg--;
11237 +
11238 +       if (debug) {
11239 +               argv[++arg] = toi_kzalloc(5, 8, TOI_ATOMIC_GFP);
11240 +               strcpy(argv[arg], "--debug");
11241 +       }
11242 +
11243 +       retval = call_usermodehelper(argv[0], argv, envp, wait);
11244 +
11245 +       /*
11246 +        * If the program reports an error, retval = 256. Don't complain
11247 +        * about that here.
11248 +        */
11249 +       if (retval && retval != 256)
11250 +               printk(KERN_ERR "Failed to launch userspace program '%s': "
11251 +                               "Error %d\n", command, retval);
11252 +
11253 +       {
11254 +               int i;
11255 +               for (i = 0; i < arg; i++)
11256 +                       if (argv[i] && argv[i] != channel)
11257 +                               toi_kfree(5, argv[i], sizeof (*argv[i]));
11258 +       }
11259 +
11260 +       toi_kfree(4, channel, sizeof(*channel));
11261 +
11262 +       return retval;
11263 +}
11264 +
11265 +/*
11266 + * This array contains entries that are automatically registered at
11267 + * boot. Modules and the console code register their own entries separately.
11268 + */
11269 +static struct toi_sysfs_data sysfs_params[] = {
11270 +       SYSFS_LONG("extra_pages_allowance", SYSFS_RW,
11271 +                       &extra_pd1_pages_allowance, 0, LONG_MAX, 0),
11272 +       SYSFS_CUSTOM("image_exists", SYSFS_RW, image_exists_read,
11273 +                       image_exists_write, SYSFS_NEEDS_SM_FOR_BOTH, NULL),
11274 +       SYSFS_STRING("resume", SYSFS_RW, resume_file, 255,
11275 +                       SYSFS_NEEDS_SM_FOR_WRITE,
11276 +                       attempt_to_parse_resume_device2),
11277 +       SYSFS_STRING("alt_resume_param", SYSFS_RW, alt_resume_param, 255,
11278 +                       SYSFS_NEEDS_SM_FOR_WRITE,
11279 +                       attempt_to_parse_alt_resume_param),
11280 +       SYSFS_CUSTOM("debug_info", SYSFS_READONLY, get_toi_debug_info, NULL, 0,
11281 +                       NULL),
11282 +       SYSFS_BIT("ignore_rootfs", SYSFS_RW, &toi_bkd.toi_action,
11283 +                       TOI_IGNORE_ROOTFS, 0),
11284 +       SYSFS_INT("image_size_limit", SYSFS_RW, &image_size_limit, -2,
11285 +                       INT_MAX, 0, NULL),
11286 +       SYSFS_UL("last_result", SYSFS_RW, &toi_result, 0, 0, 0),
11287 +       SYSFS_BIT("no_multithreaded_io", SYSFS_RW, &toi_bkd.toi_action,
11288 +                       TOI_NO_MULTITHREADED_IO, 0),
11289 +       SYSFS_BIT("no_flusher_thread", SYSFS_RW, &toi_bkd.toi_action,
11290 +                       TOI_NO_FLUSHER_THREAD, 0),
11291 +       SYSFS_BIT("full_pageset2", SYSFS_RW, &toi_bkd.toi_action,
11292 +                       TOI_PAGESET2_FULL, 0),
11293 +       SYSFS_BIT("reboot", SYSFS_RW, &toi_bkd.toi_action, TOI_REBOOT, 0),
11294 +       SYSFS_BIT("replace_swsusp", SYSFS_RW, &toi_bkd.toi_action,
11295 +                       TOI_REPLACE_SWSUSP, 0),
11296 +       SYSFS_STRING("resume_commandline", SYSFS_RW,
11297 +                       toi_bkd.toi_nosave_commandline, COMMAND_LINE_SIZE, 0,
11298 +                       NULL),
11299 +       SYSFS_STRING("version", SYSFS_READONLY, TOI_CORE_VERSION, 0, 0, NULL),
11300 +       SYSFS_BIT("no_load_direct", SYSFS_RW, &toi_bkd.toi_action,
11301 +                       TOI_NO_DIRECT_LOAD, 0),
11302 +       SYSFS_BIT("freezer_test", SYSFS_RW, &toi_bkd.toi_action,
11303 +                       TOI_FREEZER_TEST, 0),
11304 +       SYSFS_BIT("test_bio", SYSFS_RW, &toi_bkd.toi_action, TOI_TEST_BIO, 0),
11305 +       SYSFS_BIT("test_filter_speed", SYSFS_RW, &toi_bkd.toi_action,
11306 +                       TOI_TEST_FILTER_SPEED, 0),
11307 +       SYSFS_BIT("no_pageset2", SYSFS_RW, &toi_bkd.toi_action,
11308 +                       TOI_NO_PAGESET2, 0),
11309 +       SYSFS_BIT("no_pageset2_if_unneeded", SYSFS_RW, &toi_bkd.toi_action,
11310 +                       TOI_NO_PS2_IF_UNNEEDED, 0),
11311 +       SYSFS_BIT("late_cpu_hotplug", SYSFS_RW, &toi_bkd.toi_action,
11312 +                       TOI_LATE_CPU_HOTPLUG, 0),
11313 +#ifdef CONFIG_TOI_KEEP_IMAGE
11314 +       SYSFS_BIT("keep_image", SYSFS_RW , &toi_bkd.toi_action, TOI_KEEP_IMAGE,
11315 +                       0),
11316 +#endif
11317 +};
11318 +
11319 +static struct toi_core_fns my_fns = {
11320 +       .get_nonconflicting_page = __toi_get_nonconflicting_page,
11321 +       .post_context_save = __toi_post_context_save,
11322 +       .try_hibernate = toi_try_hibernate,
11323 +       .try_resume = toi_sys_power_disk_try_resume,
11324 +};
11325 +
11326 +/**
11327 + * core_load - initialisation of TuxOnIce core
11328 + *
11329 + * Initialise the core, beginning with sysfs. Checksum and so on are part of
11330 + * the core, but have their own initialisation routines because they either
11331 + * aren't compiled in all the time or have their own subdirectories.
11332 + **/
11333 +static __init int core_load(void)
11334 +{
11335 +       int i,
11336 +           numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data);
11337 +
11338 +       printk(KERN_INFO "TuxOnIce " TOI_CORE_VERSION
11339 +                       " (http://tuxonice.net)\n");
11340 +
11341 +       if (toi_sysfs_init())
11342 +               return 1;
11343 +
11344 +       for (i = 0; i < numfiles; i++)
11345 +               toi_register_sysfs_file(tuxonice_kobj, &sysfs_params[i]);
11346 +
11347 +       toi_core_fns = &my_fns;
11348 +
11349 +       if (toi_alloc_init())
11350 +               return 1;
11351 +       if (toi_checksum_init())
11352 +               return 1;
11353 +       if (toi_usm_init())
11354 +               return 1;
11355 +       if (toi_ui_init())
11356 +               return 1;
11357 +       if (toi_poweroff_init())
11358 +               return 1;
11359 +       if (toi_cluster_init())
11360 +               return 1;
11361 +
11362 +       return 0;
11363 +}
11364 +
11365 +#ifdef MODULE
11366 +/**
11367 + * core_unload: Prepare to unload the core code.
11368 + **/
11369 +static __exit void core_unload(void)
11370 +{
11371 +       int i,
11372 +           numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data);
11373 +
11374 +       toi_alloc_exit();
11375 +       toi_checksum_exit();
11376 +       toi_poweroff_exit();
11377 +       toi_ui_exit();
11378 +       toi_usm_exit();
11379 +       toi_cluster_exit();
11380 +
11381 +       for (i = 0; i < numfiles; i++)
11382 +               toi_unregister_sysfs_file(tuxonice_kobj, &sysfs_params[i]);
11383 +
11384 +       toi_core_fns = NULL;
11385 +
11386 +       toi_sysfs_exit();
11387 +}
11388 +MODULE_LICENSE("GPL");
11389 +module_init(core_load);
11390 +module_exit(core_unload);
11391 +#else
11392 +late_initcall(core_load);
11393 +#endif
11394 diff --git a/kernel/power/tuxonice_io.c b/kernel/power/tuxonice_io.c
11395 new file mode 100644
11396 index 0000000..ca21958
11397 --- /dev/null
11398 +++ b/kernel/power/tuxonice_io.c
11399 @@ -0,0 +1,1536 @@
11400 +/*
11401 + * kernel/power/tuxonice_io.c
11402 + *
11403 + * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
11404 + * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz>
11405 + * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr>
11406 + * Copyright (C) 2002-2008 Nigel Cunningham (nigel at tuxonice net)
11407 + *
11408 + * This file is released under the GPLv2.
11409 + *
11410 + * It contains high level IO routines for hibernating.
11411 + *
11412 + */
11413 +
11414 +#include <linux/suspend.h>
11415 +#include <linux/version.h>
11416 +#include <linux/utsname.h>
11417 +#include <linux/mount.h>
11418 +#include <linux/highmem.h>
11419 +#include <linux/kthread.h>
11420 +#include <linux/cpu.h>
11421 +#include <linux/fs_struct.h>
11422 +#include <asm/tlbflush.h>
11423 +
11424 +#include "tuxonice.h"
11425 +#include "tuxonice_modules.h"
11426 +#include "tuxonice_pageflags.h"
11427 +#include "tuxonice_io.h"
11428 +#include "tuxonice_ui.h"
11429 +#include "tuxonice_storage.h"
11430 +#include "tuxonice_prepare_image.h"
11431 +#include "tuxonice_extent.h"
11432 +#include "tuxonice_sysfs.h"
11433 +#include "tuxonice_builtin.h"
11434 +#include "tuxonice_checksum.h"
11435 +#include "tuxonice_alloc.h"
11436 +char alt_resume_param[256];
11437 +
11438 +/* Variables shared between threads and updated under the mutex */
11439 +static int io_write, io_finish_at, io_base, io_barmax, io_pageset, io_result;
11440 +static int io_index, io_nextupdate, io_pc, io_pc_step;
11441 +static DEFINE_MUTEX(io_mutex);
11442 +static DEFINE_PER_CPU(struct page *, last_sought);
11443 +static DEFINE_PER_CPU(struct page *, last_high_page);
11444 +static DEFINE_PER_CPU(char *, checksum_locn);
11445 +static DEFINE_PER_CPU(struct pbe *, last_low_page);
11446 +static atomic_t io_count;
11447 +atomic_t toi_io_workers;
11448 +EXPORT_SYMBOL_GPL(toi_io_workers);
11449 +
11450 +DECLARE_WAIT_QUEUE_HEAD(toi_io_queue_flusher);
11451 +EXPORT_SYMBOL_GPL(toi_io_queue_flusher);
11452 +
11453 +int toi_bio_queue_flusher_should_finish;
11454 +EXPORT_SYMBOL_GPL(toi_bio_queue_flusher_should_finish);
11455 +
11456 +/* Indicates that this thread should be used for checking throughput */
11457 +#define MONITOR ((void *) 1)
11458 +
11459 +/**
11460 + * toi_attempt_to_parse_resume_device - determine if we can hibernate
11461 + *
11462 + * Can we hibernate, using the current resume= parameter?
11463 + **/
11464 +int toi_attempt_to_parse_resume_device(int quiet)
11465 +{
11466 +       struct list_head *Allocator;
11467 +       struct toi_module_ops *thisAllocator;
11468 +       int result, returning = 0;
11469 +
11470 +       if (toi_activate_storage(0))
11471 +               return 0;
11472 +
11473 +       toiActiveAllocator = NULL;
11474 +       clear_toi_state(TOI_RESUME_DEVICE_OK);
11475 +       clear_toi_state(TOI_CAN_RESUME);
11476 +       clear_result_state(TOI_ABORTED);
11477 +
11478 +       if (!toiNumAllocators) {
11479 +               if (!quiet)
11480 +                       printk(KERN_INFO "TuxOnIce: No storage allocators have "
11481 +                               "been registered. Hibernating will be "
11482 +                               "disabled.\n");
11483 +               goto cleanup;
11484 +       }
11485 +
11486 +       if (!resume_file[0]) {
11487 +               if (!quiet)
11488 +                       printk(KERN_INFO "TuxOnIce: Resume= parameter is empty."
11489 +                               " Hibernating will be disabled.\n");
11490 +               goto cleanup;
11491 +       }
11492 +
11493 +       list_for_each(Allocator, &toiAllocators) {
11494 +               thisAllocator = list_entry(Allocator, struct toi_module_ops,
11495 +                                                               type_list);
11496 +
11497 +               /*
11498 +                * Not sure why you'd want to disable an allocator, but
11499 +                * we should honour the flag if we're providing it
11500 +                */
11501 +               if (!thisAllocator->enabled)
11502 +                       continue;
11503 +
11504 +               result = thisAllocator->parse_sig_location(
11505 +                               resume_file, (toiNumAllocators == 1),
11506 +                               quiet);
11507 +
11508 +               switch (result) {
11509 +               case -EINVAL:
11510 +                       /* For this allocator, but not a valid
11511 +                        * configuration. Error already printed. */
11512 +                       goto cleanup;
11513 +
11514 +               case 0:
11515 +                       /* For this allocator and valid. */
11516 +                       toiActiveAllocator = thisAllocator;
11517 +
11518 +                       set_toi_state(TOI_RESUME_DEVICE_OK);
11519 +                       set_toi_state(TOI_CAN_RESUME);
11520 +                       returning = 1;
11521 +                       goto cleanup;
11522 +               }
11523 +       }
11524 +       if (!quiet)
11525 +               printk(KERN_INFO "TuxOnIce: No matching enabled allocator "
11526 +                               "found. Resuming disabled.\n");
11527 +cleanup:
11528 +       toi_deactivate_storage(0);
11529 +       return returning;
11530 +}
11531 +EXPORT_SYMBOL_GPL(toi_attempt_to_parse_resume_device);
11532 +
11533 +void attempt_to_parse_resume_device2(void)
11534 +{
11535 +       toi_prepare_usm();
11536 +       toi_attempt_to_parse_resume_device(0);
11537 +       toi_cleanup_usm();
11538 +}
11539 +EXPORT_SYMBOL_GPL(attempt_to_parse_resume_device2);
11540 +
11541 +void save_restore_alt_param(int replace, int quiet)
11542 +{
11543 +       static char resume_param_save[255];
11544 +       static unsigned long toi_state_save;
11545 +
11546 +       if (replace) {
11547 +               toi_state_save = toi_state;
11548 +               strcpy(resume_param_save, resume_file);
11549 +               strcpy(resume_file, alt_resume_param);
11550 +       } else {
11551 +               strcpy(resume_file, resume_param_save);
11552 +               toi_state = toi_state_save;
11553 +       }
11554 +       toi_attempt_to_parse_resume_device(quiet);
11555 +}
11556 +
11557 +void attempt_to_parse_alt_resume_param(void)
11558 +{
11559 +       int ok = 0;
11560 +
11561 +       /* Temporarily set resume_param to the poweroff value */
11562 +       if (!strlen(alt_resume_param))
11563 +               return;
11564 +
11565 +       printk(KERN_INFO "=== Trying Poweroff Resume2 ===\n");
11566 +       save_restore_alt_param(SAVE, NOQUIET);
11567 +       if (test_toi_state(TOI_CAN_RESUME))
11568 +               ok = 1;
11569 +
11570 +       printk(KERN_INFO "=== Done ===\n");
11571 +       save_restore_alt_param(RESTORE, QUIET);
11572 +
11573 +       /* If not ok, clear the string */
11574 +       if (ok)
11575 +               return;
11576 +
11577 +       printk(KERN_INFO "Can't resume from that location; clearing "
11578 +                       "alt_resume_param.\n");
11579 +       alt_resume_param[0] = '\0';
11580 +}
11581 +
11582 +/**
11583 + * noresume_reset_modules - reset data structures in case of non resuming
11584 + *
11585 + * When we read the start of an image, modules (and especially the
11586 + * active allocator) might need to reset data structures if we
11587 + * decide to remove the image rather than resuming from it.
11588 + **/
11589 +static void noresume_reset_modules(void)
11590 +{
11591 +       struct toi_module_ops *this_filter;
11592 +
11593 +       list_for_each_entry(this_filter, &toi_filters, type_list)
11594 +               if (this_filter->noresume_reset)
11595 +                       this_filter->noresume_reset();
11596 +
11597 +       if (toiActiveAllocator && toiActiveAllocator->noresume_reset)
11598 +               toiActiveAllocator->noresume_reset();
11599 +}
11600 +
11601 +/**
11602 + * fill_toi_header - fill the hibernate header structure
11603 + * @struct toi_header: Header data structure to be filled.
11604 + **/
11605 +static int fill_toi_header(struct toi_header *sh)
11606 +{
11607 +       int i, error;
11608 +
11609 +       error = init_header((struct swsusp_info *) sh);
11610 +       if (error)
11611 +               return error;
11612 +
11613 +       sh->pagedir = pagedir1;
11614 +       sh->pageset_2_size = pagedir2.size;
11615 +       sh->param0 = toi_result;
11616 +       sh->param1 = toi_bkd.toi_action;
11617 +       sh->param2 = toi_bkd.toi_debug_state;
11618 +       sh->param3 = toi_bkd.toi_default_console_level;
11619 +       sh->root_fs = current->fs->root.mnt->mnt_sb->s_dev;
11620 +       for (i = 0; i < 4; i++)
11621 +               sh->io_time[i/2][i%2] = toi_bkd.toi_io_time[i/2][i%2];
11622 +       sh->bkd = boot_kernel_data_buffer;
11623 +       return 0;
11624 +}
11625 +
11626 +/**
11627 + * rw_init_modules - initialize modules
11628 + * @rw:                Whether we are reading of writing an image.
11629 + * @which:     Section of the image being processed.
11630 + *
11631 + * Iterate over modules, preparing the ones that will be used to read or write
11632 + * data.
11633 + **/
11634 +static int rw_init_modules(int rw, int which)
11635 +{
11636 +       struct toi_module_ops *this_module;
11637 +       /* Initialise page transformers */
11638 +       list_for_each_entry(this_module, &toi_filters, type_list) {
11639 +               if (!this_module->enabled)
11640 +                       continue;
11641 +               if (this_module->rw_init && this_module->rw_init(rw, which)) {
11642 +                       abort_hibernate(TOI_FAILED_MODULE_INIT,
11643 +                               "Failed to initialize the %s filter.",
11644 +                               this_module->name);
11645 +                       return 1;
11646 +               }
11647 +       }
11648 +
11649 +       /* Initialise allocator */
11650 +       if (toiActiveAllocator->rw_init(rw, which)) {
11651 +               abort_hibernate(TOI_FAILED_MODULE_INIT,
11652 +                               "Failed to initialise the allocator.");
11653 +               return 1;
11654 +       }
11655 +
11656 +       /* Initialise other modules */
11657 +       list_for_each_entry(this_module, &toi_modules, module_list) {
11658 +               if (!this_module->enabled ||
11659 +                   this_module->type == FILTER_MODULE ||
11660 +                   this_module->type == WRITER_MODULE)
11661 +                       continue;
11662 +               if (this_module->rw_init && this_module->rw_init(rw, which)) {
11663 +                       set_abort_result(TOI_FAILED_MODULE_INIT);
11664 +                       printk(KERN_INFO "Setting aborted flag due to module "
11665 +                                       "init failure.\n");
11666 +                       return 1;
11667 +               }
11668 +       }
11669 +
11670 +       return 0;
11671 +}
11672 +
11673 +/**
11674 + * rw_cleanup_modules - cleanup modules
11675 + * @rw:        Whether we are reading of writing an image.
11676 + *
11677 + * Cleanup components after reading or writing a set of pages.
11678 + * Only the allocator may fail.
11679 + **/
11680 +static int rw_cleanup_modules(int rw)
11681 +{
11682 +       struct toi_module_ops *this_module;
11683 +       int result = 0;
11684 +
11685 +       /* Cleanup other modules */
11686 +       list_for_each_entry(this_module, &toi_modules, module_list) {
11687 +               if (!this_module->enabled ||
11688 +                   this_module->type == FILTER_MODULE ||
11689 +                   this_module->type == WRITER_MODULE)
11690 +                       continue;
11691 +               if (this_module->rw_cleanup)
11692 +                       result |= this_module->rw_cleanup(rw);
11693 +       }
11694 +
11695 +       /* Flush data and cleanup */
11696 +       list_for_each_entry(this_module, &toi_filters, type_list) {
11697 +               if (!this_module->enabled)
11698 +                       continue;
11699 +               if (this_module->rw_cleanup)
11700 +                       result |= this_module->rw_cleanup(rw);
11701 +       }
11702 +
11703 +       result |= toiActiveAllocator->rw_cleanup(rw);
11704 +
11705 +       return result;
11706 +}
11707 +
11708 +static struct page *copy_page_from_orig_page(struct page *orig_page)
11709 +{
11710 +       int is_high = PageHighMem(orig_page), index, min, max;
11711 +       struct page *high_page = NULL,
11712 +                   **my_last_high_page = &__get_cpu_var(last_high_page),
11713 +                   **my_last_sought = &__get_cpu_var(last_sought);
11714 +       struct pbe *this, **my_last_low_page = &__get_cpu_var(last_low_page);
11715 +       void *compare;
11716 +
11717 +       if (is_high) {
11718 +               if (*my_last_sought && *my_last_high_page &&
11719 +                               *my_last_sought < orig_page)
11720 +                       high_page = *my_last_high_page;
11721 +               else
11722 +                       high_page = (struct page *) restore_highmem_pblist;
11723 +               this = (struct pbe *) kmap(high_page);
11724 +               compare = orig_page;
11725 +       } else {
11726 +               if (*my_last_sought && *my_last_low_page &&
11727 +                               *my_last_sought < orig_page)
11728 +                       this = *my_last_low_page;
11729 +               else
11730 +                       this = restore_pblist;
11731 +               compare = page_address(orig_page);
11732 +       }
11733 +
11734 +       *my_last_sought = orig_page;
11735 +
11736 +       /* Locate page containing pbe */
11737 +       while (this[PBES_PER_PAGE - 1].next &&
11738 +                       this[PBES_PER_PAGE - 1].orig_address < compare) {
11739 +               if (is_high) {
11740 +                       struct page *next_high_page = (struct page *)
11741 +                               this[PBES_PER_PAGE - 1].next;
11742 +                       kunmap(high_page);
11743 +                       this = kmap(next_high_page);
11744 +                       high_page = next_high_page;
11745 +               } else
11746 +                       this = this[PBES_PER_PAGE - 1].next;
11747 +       }
11748 +
11749 +       /* Do a binary search within the page */
11750 +       min = 0;
11751 +       max = PBES_PER_PAGE;
11752 +       index = PBES_PER_PAGE / 2;
11753 +       while (max - min) {
11754 +               if (!this[index].orig_address ||
11755 +                   this[index].orig_address > compare)
11756 +                       max = index;
11757 +               else if (this[index].orig_address == compare) {
11758 +                       if (is_high) {
11759 +                               struct page *page = this[index].address;
11760 +                               *my_last_high_page = high_page;
11761 +                               kunmap(high_page);
11762 +                               return page;
11763 +                       }
11764 +                       *my_last_low_page = this;
11765 +                       return virt_to_page(this[index].address);
11766 +               } else
11767 +                       min = index;
11768 +               index = ((max + min) / 2);
11769 +       };
11770 +
11771 +       if (is_high)
11772 +               kunmap(high_page);
11773 +
11774 +       abort_hibernate(TOI_FAILED_IO, "Failed to get destination page for"
11775 +               " orig page %p. This[min].orig_address=%p.\n", orig_page,
11776 +               this[index].orig_address);
11777 +       return NULL;
11778 +}
11779 +
11780 +/**
11781 + * write_next_page - write the next page in a pageset
11782 + * @data_pfn: The pfn where the next data to write is located.
11783 + * @my_io_index: The index of the page in the pageset.
11784 + * @write_pfn: The pfn number to write in the image (where the data belongs).
11785 + * @first_filter: Where to send the page (optimisation).
11786 + *
11787 + * Get the pfn of the next page to write, map the page if necessary and do the
11788 + * write.
11789 + **/
11790 +static int write_next_page(unsigned long *data_pfn, int *my_io_index,
11791 +               unsigned long *write_pfn, struct toi_module_ops *first_filter)
11792 +{
11793 +       struct page *page;
11794 +       char **my_checksum_locn = &__get_cpu_var(checksum_locn);
11795 +       int result = 0, was_present;
11796 +
11797 +       *data_pfn = memory_bm_next_pfn(io_map);
11798 +
11799 +       /* Another thread could have beaten us to it. */
11800 +       if (*data_pfn == BM_END_OF_MAP) {
11801 +               if (atomic_read(&io_count)) {
11802 +                       printk(KERN_INFO "Ran out of pfns but io_count is "
11803 +                                       "still %d.\n", atomic_read(&io_count));
11804 +                       BUG();
11805 +               }
11806 +               return -ENODATA;
11807 +       }
11808 +
11809 +       *my_io_index = io_finish_at - atomic_sub_return(1, &io_count);
11810 +
11811 +       memory_bm_clear_bit(io_map, *data_pfn);
11812 +       page = pfn_to_page(*data_pfn);
11813 +
11814 +       was_present = kernel_page_present(page);
11815 +       if (!was_present)
11816 +               kernel_map_pages(page, 1, 1);
11817 +
11818 +       if (io_pageset == 1)
11819 +               *write_pfn = memory_bm_next_pfn(pageset1_map);
11820 +       else {
11821 +               *write_pfn = *data_pfn;
11822 +               *my_checksum_locn = tuxonice_get_next_checksum();
11823 +       }
11824 +
11825 +       mutex_unlock(&io_mutex);
11826 +
11827 +       if (io_pageset == 2 && tuxonice_calc_checksum(page, *my_checksum_locn))
11828 +               return 1;
11829 +
11830 +       result = first_filter->write_page(*write_pfn, page, PAGE_SIZE);
11831 +
11832 +       if (!was_present)
11833 +               kernel_map_pages(page, 1, 0);
11834 +
11835 +       return result;
11836 +}
11837 +
11838 +/**
11839 + * read_next_page - read the next page in a pageset
11840 + * @my_io_index: The index of the page in the pageset.
11841 + * @write_pfn: The pfn in which the data belongs.
11842 + *
11843 + * Read a page of the image into our buffer.
11844 + **/
11845 +
11846 +static int read_next_page(int *my_io_index, unsigned long *write_pfn,
11847 +               struct page *buffer, struct toi_module_ops *first_filter)
11848 +{
11849 +       unsigned int buf_size;
11850 +       int result;
11851 +
11852 +       *my_io_index = io_finish_at - atomic_sub_return(1, &io_count);
11853 +       mutex_unlock(&io_mutex);
11854 +
11855 +       /*
11856 +        * Are we aborting? If so, don't submit any more I/O as
11857 +        * resetting the resume_attempted flag (from ui.c) will
11858 +        * clear the bdev flags, making this thread oops.
11859 +        */
11860 +       if (unlikely(test_toi_state(TOI_STOP_RESUME))) {
11861 +               atomic_dec(&toi_io_workers);
11862 +               if (!atomic_read(&toi_io_workers))
11863 +                       set_toi_state(TOI_IO_STOPPED);
11864 +               while (1)
11865 +                       schedule();
11866 +       }
11867 +
11868 +       /* See toi_bio_read_page in tuxonice_block_io.c:
11869 +        * read the next page in the image.
11870 +        */
11871 +       result = first_filter->read_page(write_pfn, buffer, &buf_size);
11872 +       if (buf_size != PAGE_SIZE) {
11873 +               abort_hibernate(TOI_FAILED_IO,
11874 +                       "I/O pipeline returned %d bytes instead"
11875 +                       " of %ud.\n", buf_size, PAGE_SIZE);
11876 +               mutex_lock(&io_mutex);
11877 +               return -ENODATA;
11878 +       }
11879 +
11880 +       return result;
11881 +}
11882 +
11883 +/**
11884 + * 
11885 + **/
11886 +static void use_read_page(unsigned long write_pfn, struct page *buffer)
11887 +{
11888 +       struct page *final_page = pfn_to_page(write_pfn),
11889 +                   *copy_page = final_page;
11890 +       char *virt, *buffer_virt;
11891 +
11892 +       if (io_pageset == 1 && !load_direct(final_page)) {
11893 +               copy_page = copy_page_from_orig_page(final_page);
11894 +               BUG_ON(!copy_page);
11895 +       }
11896 +
11897 +       if (memory_bm_test_bit(io_map, write_pfn)) {
11898 +               int was_present;
11899 +
11900 +               virt = kmap(copy_page);
11901 +               buffer_virt = kmap(buffer);
11902 +               was_present = kernel_page_present(copy_page);
11903 +               if (!was_present)
11904 +                       kernel_map_pages(copy_page, 1, 1);
11905 +               memcpy(virt, buffer_virt, PAGE_SIZE);
11906 +               if (!was_present)
11907 +                       kernel_map_pages(copy_page, 1, 0);
11908 +               kunmap(copy_page);
11909 +               kunmap(buffer);
11910 +               memory_bm_clear_bit(io_map, write_pfn);
11911 +       } else {
11912 +               mutex_lock(&io_mutex);
11913 +               atomic_inc(&io_count);
11914 +               mutex_unlock(&io_mutex);
11915 +       }
11916 +}
11917 +
11918 +/**
11919 + * worker_rw_loop - main loop to read/write pages
11920 + *
11921 + * The main I/O loop for reading or writing pages. The io_map bitmap is used to
11922 + * track the pages to read/write.
11923 + * If we are reading, the pages are loaded to their final (mapped) pfn.
11924 + **/
11925 +static int worker_rw_loop(void *data)
11926 +{
11927 +       unsigned long data_pfn, write_pfn, next_jiffies = jiffies + HZ / 2,
11928 +                     jif_index = 1;
11929 +       int result = 0, my_io_index = 0, last_worker;
11930 +       struct toi_module_ops *first_filter = toi_get_next_filter(NULL);
11931 +       struct page *buffer = toi_alloc_page(28, TOI_ATOMIC_GFP);
11932 +
11933 +       current->flags |= PF_NOFREEZE;
11934 +
11935 +       atomic_inc(&toi_io_workers);
11936 +       mutex_lock(&io_mutex);
11937 +
11938 +       do {
11939 +               if (data && jiffies > next_jiffies) {
11940 +                       next_jiffies += HZ / 2;
11941 +                       if (toiActiveAllocator->update_throughput_throttle)
11942 +                               toiActiveAllocator->update_throughput_throttle(
11943 +                                               jif_index);
11944 +                       jif_index++;
11945 +               }
11946 +
11947 +               /*
11948 +                * What page to use? If reading, don't know yet which page's
11949 +                * data will be read, so always use the buffer. If writing,
11950 +                * use the copy (Pageset1) or original page (Pageset2), but
11951 +                * always write the pfn of the original page.
11952 +                */
11953 +               if (io_write)
11954 +                       result = write_next_page(&data_pfn, &my_io_index,
11955 +                                       &write_pfn, first_filter);
11956 +               else /* Reading */
11957 +                       result = read_next_page(&my_io_index, &write_pfn,
11958 +                                       buffer, first_filter);
11959 +
11960 +               if (result == -ENODATA)
11961 +                       break;
11962 +
11963 +               if (result) {
11964 +                       io_result = result;
11965 +                       if (io_write) {
11966 +                               printk(KERN_INFO "Write chunk returned %d.\n",
11967 +                                               result);
11968 +                               abort_hibernate(TOI_FAILED_IO,
11969 +                                       "Failed to write a chunk of the "
11970 +                                       "image.");
11971 +                               mutex_lock(&io_mutex);
11972 +                               break;
11973 +                       }
11974 +                       panic("Read chunk returned (%d)", result);
11975 +               }
11976 +
11977 +               /*
11978 +                * Discard reads of resaved pages while reading ps2
11979 +                * and unwanted pages while rereading ps2 when aborting.
11980 +                */
11981 +               if (!io_write && !PageResave(pfn_to_page(write_pfn)))
11982 +                       use_read_page(write_pfn, buffer);
11983 +
11984 +               if (my_io_index + io_base == io_nextupdate)
11985 +                       io_nextupdate = toi_update_status(my_io_index +
11986 +                               io_base, io_barmax, " %d/%d MB ",
11987 +                               MB(io_base+my_io_index+1), MB(io_barmax));
11988 +
11989 +               if (my_io_index == io_pc) {
11990 +                       printk(KERN_ERR "...%d%%.\n", 20 * io_pc_step);
11991 +                       io_pc_step++;
11992 +                       io_pc = io_finish_at * io_pc_step / 5;
11993 +               }
11994 +
11995 +               toi_cond_pause(0, NULL);
11996 +
11997 +               /*
11998 +                * Subtle: If there's less I/O still to be done than threads
11999 +                * running, quit. This stops us doing I/O beyond the end of
12000 +                * the image when reading.
12001 +                *
12002 +                * Possible race condition. Two threads could do the test at
12003 +                * the same time; one should exit and one should continue.
12004 +                * Therefore we take the mutex before comparing and exiting.
12005 +                */
12006 +
12007 +               mutex_lock(&io_mutex);
12008 +
12009 +       } while (atomic_read(&io_count) >= atomic_read(&toi_io_workers) &&
12010 +               !(io_write && test_result_state(TOI_ABORTED)));
12011 +
12012 +       last_worker = atomic_dec_and_test(&toi_io_workers);
12013 +       mutex_unlock(&io_mutex);
12014 +
12015 +       if (last_worker) {
12016 +               toi_bio_queue_flusher_should_finish = 1;
12017 +               wake_up(&toi_io_queue_flusher);
12018 +               result = toiActiveAllocator->finish_all_io();
12019 +       }
12020 +
12021 +       toi__free_page(28, buffer);
12022 +
12023 +       return result;
12024 +}
12025 +
12026 +static int start_other_threads(void)
12027 +{
12028 +       int cpu, num_started = 0;
12029 +       struct task_struct *p;
12030 +
12031 +       for_each_online_cpu(cpu) {
12032 +               if (cpu == smp_processor_id())
12033 +                       continue;
12034 +
12035 +               p = kthread_create(worker_rw_loop, num_started ? NULL : MONITOR,
12036 +                               "ktoi_io/%d", cpu);
12037 +               if (IS_ERR(p)) {
12038 +                       printk(KERN_ERR "ktoi_io for %i failed\n", cpu);
12039 +                       continue;
12040 +               }
12041 +               kthread_bind(p, cpu);
12042 +               p->flags |= PF_MEMALLOC;
12043 +               wake_up_process(p);
12044 +               num_started++;
12045 +       }
12046 +
12047 +       return num_started;
12048 +}
12049 +
12050 +/**
12051 + * do_rw_loop - main highlevel function for reading or writing pages
12052 + *
12053 + * Create the io_map bitmap and call worker_rw_loop to perform I/O operations.
12054 + **/
12055 +static int do_rw_loop(int write, int finish_at, struct memory_bitmap *pageflags,
12056 +               int base, int barmax, int pageset)
12057 +{
12058 +       int index = 0, cpu, num_other_threads = 0, result = 0;
12059 +       unsigned long pfn;
12060 +
12061 +       if (!finish_at)
12062 +               return 0;
12063 +
12064 +       io_write = write;
12065 +       io_finish_at = finish_at;
12066 +       io_base = base;
12067 +       io_barmax = barmax;
12068 +       io_pageset = pageset;
12069 +       io_index = 0;
12070 +       io_pc = io_finish_at / 5;
12071 +       io_pc_step = 1;
12072 +       io_result = 0;
12073 +       io_nextupdate = base + 1;
12074 +       toi_bio_queue_flusher_should_finish = 0;
12075 +
12076 +       for_each_online_cpu(cpu) {
12077 +               per_cpu(last_sought, cpu) = NULL;
12078 +               per_cpu(last_low_page, cpu) = NULL;
12079 +               per_cpu(last_high_page, cpu) = NULL;
12080 +       }
12081 +
12082 +       /* Ensure all bits clear */
12083 +       memory_bm_clear(io_map);
12084 +
12085 +       /* Set the bits for the pages to write */
12086 +       memory_bm_position_reset(pageflags);
12087 +
12088 +       pfn = memory_bm_next_pfn(pageflags);
12089 +
12090 +       while (pfn != BM_END_OF_MAP && index < finish_at) {
12091 +               memory_bm_set_bit(io_map, pfn);
12092 +               pfn = memory_bm_next_pfn(pageflags);
12093 +               index++;
12094 +       }
12095 +
12096 +       BUG_ON(index < finish_at);
12097 +
12098 +       atomic_set(&io_count, finish_at);
12099 +
12100 +       memory_bm_position_reset(pageset1_map);
12101 +
12102 +       clear_toi_state(TOI_IO_STOPPED);
12103 +       memory_bm_position_reset(io_map);
12104 +
12105 +       if (!test_action_state(TOI_NO_MULTITHREADED_IO))
12106 +               num_other_threads = start_other_threads();
12107 +
12108 +       if (!num_other_threads || !toiActiveAllocator->io_flusher ||
12109 +               test_action_state(TOI_NO_FLUSHER_THREAD))
12110 +               worker_rw_loop(num_other_threads ? NULL : MONITOR);
12111 +       else
12112 +               result = toiActiveAllocator->io_flusher(write);
12113 +
12114 +       while (atomic_read(&toi_io_workers))
12115 +               schedule();
12116 +
12117 +       set_toi_state(TOI_IO_STOPPED);
12118 +       if (unlikely(test_toi_state(TOI_STOP_RESUME))) {
12119 +               while (1)
12120 +                       schedule();
12121 +       }
12122 +
12123 +       if (!io_result && !result && !test_result_state(TOI_ABORTED)) {
12124 +               unsigned long next;
12125 +
12126 +               toi_update_status(io_base + io_finish_at, io_barmax,
12127 +                               " %d/%d MB ",
12128 +                               MB(io_base + io_finish_at), MB(io_barmax));
12129 +
12130 +               memory_bm_position_reset(io_map);
12131 +               next = memory_bm_next_pfn(io_map);
12132 +               if  (next != BM_END_OF_MAP) {
12133 +                       printk(KERN_INFO "Finished I/O loop but still work to "
12134 +                                       "do?\nFinish at = %d. io_count = %d.\n",
12135 +                                       finish_at, atomic_read(&io_count));
12136 +                       printk(KERN_INFO "I/O bitmap still records work to do."
12137 +                                       "%ld.\n", next);
12138 +                       BUG();
12139 +               }
12140 +       }
12141 +
12142 +       return io_result ? io_result : result;
12143 +}
12144 +
12145 +/**
12146 + * write_pageset - write a pageset to disk.
12147 + * @pagedir:   Which pagedir to write.
12148 + *
12149 + * Returns:
12150 + *     Zero on success or -1 on failure.
12151 + **/
12152 +int write_pageset(struct pagedir *pagedir)
12153 +{
12154 +       int finish_at, base = 0, start_time, end_time;
12155 +       int barmax = pagedir1.size + pagedir2.size;
12156 +       long error = 0;
12157 +       struct memory_bitmap *pageflags;
12158 +
12159 +       /*
12160 +        * Even if there is nothing to read or write, the allocator
12161 +        * may need the init/cleanup for it's housekeeping.  (eg:
12162 +        * Pageset1 may start where pageset2 ends when writing).
12163 +        */
12164 +       finish_at = pagedir->size;
12165 +
12166 +       if (pagedir->id == 1) {
12167 +               toi_prepare_status(DONT_CLEAR_BAR,
12168 +                               "Writing kernel & process data...");
12169 +               base = pagedir2.size;
12170 +               if (test_action_state(TOI_TEST_FILTER_SPEED) ||
12171 +                   test_action_state(TOI_TEST_BIO))
12172 +                       pageflags = pageset1_map;
12173 +               else
12174 +                       pageflags = pageset1_copy_map;
12175 +       } else {
12176 +               toi_prepare_status(DONT_CLEAR_BAR, "Writing caches...");
12177 +               pageflags = pageset2_map;
12178 +       }
12179 +
12180 +       start_time = jiffies;
12181 +
12182 +       if (rw_init_modules(1, pagedir->id)) {
12183 +               abort_hibernate(TOI_FAILED_MODULE_INIT,
12184 +                               "Failed to initialise modules for writing.");
12185 +               error = 1;
12186 +       }
12187 +
12188 +       if (!error)
12189 +               error = do_rw_loop(1, finish_at, pageflags, base, barmax,
12190 +                               pagedir->id);
12191 +
12192 +       if (rw_cleanup_modules(WRITE) && !error) {
12193 +               abort_hibernate(TOI_FAILED_MODULE_CLEANUP,
12194 +                               "Failed to cleanup after writing.");
12195 +               error = 1;
12196 +       }
12197 +
12198 +       end_time = jiffies;
12199 +
12200 +       if ((end_time - start_time) && (!test_result_state(TOI_ABORTED))) {
12201 +               toi_bkd.toi_io_time[0][0] += finish_at,
12202 +               toi_bkd.toi_io_time[0][1] += (end_time - start_time);
12203 +       }
12204 +
12205 +       return error;
12206 +}
12207 +
12208 +/**
12209 + * read_pageset - highlevel function to read a pageset from disk
12210 + * @pagedir:                   pageset to read
12211 + * @overwrittenpagesonly:      Whether to read the whole pageset or
12212 + *                             only part of it.
12213 + *
12214 + * Returns:
12215 + *     Zero on success or -1 on failure.
12216 + **/
12217 +static int read_pageset(struct pagedir *pagedir, int overwrittenpagesonly)
12218 +{
12219 +       int result = 0, base = 0, start_time, end_time;
12220 +       int finish_at = pagedir->size;
12221 +       int barmax = pagedir1.size + pagedir2.size;
12222 +       struct memory_bitmap *pageflags;
12223 +
12224 +       if (pagedir->id == 1) {
12225 +               toi_prepare_status(DONT_CLEAR_BAR,
12226 +                               "Reading kernel & process data...");
12227 +               pageflags = pageset1_map;
12228 +       } else {
12229 +               toi_prepare_status(DONT_CLEAR_BAR, "Reading caches...");
12230 +               if (overwrittenpagesonly) {
12231 +                       barmax = min(pagedir1.size, pagedir2.size);
12232 +                       finish_at = min(pagedir1.size, pagedir2.size);
12233 +               } else
12234 +                       base = pagedir1.size;
12235 +               pageflags = pageset2_map;
12236 +       }
12237 +
12238 +       start_time = jiffies;
12239 +
12240 +       if (rw_init_modules(0, pagedir->id)) {
12241 +               toiActiveAllocator->remove_image();
12242 +               result = 1;
12243 +       } else
12244 +               result = do_rw_loop(0, finish_at, pageflags, base, barmax,
12245 +                               pagedir->id);
12246 +
12247 +       if (rw_cleanup_modules(READ) && !result) {
12248 +               abort_hibernate(TOI_FAILED_MODULE_CLEANUP,
12249 +                               "Failed to cleanup after reading.");
12250 +               result = 1;
12251 +       }
12252 +
12253 +       /* Statistics */
12254 +       end_time = jiffies;
12255 +
12256 +       if ((end_time - start_time) && (!test_result_state(TOI_ABORTED))) {
12257 +               toi_bkd.toi_io_time[1][0] += finish_at,
12258 +               toi_bkd.toi_io_time[1][1] += (end_time - start_time);
12259 +       }
12260 +
12261 +       return result;
12262 +}
12263 +
12264 +/**
12265 + * write_module_configs - store the modules configuration
12266 + *
12267 + * The configuration for each module is stored in the image header.
12268 + * Returns: Int
12269 + *     Zero on success, Error value otherwise.
12270 + **/
12271 +static int write_module_configs(void)
12272 +{
12273 +       struct toi_module_ops *this_module;
12274 +       char *buffer = (char *) toi_get_zeroed_page(22, TOI_ATOMIC_GFP);
12275 +       int len, index = 1;
12276 +       struct toi_module_header toi_module_header;
12277 +
12278 +       if (!buffer) {
12279 +               printk(KERN_INFO "Failed to allocate a buffer for saving "
12280 +                               "module configuration info.\n");
12281 +               return -ENOMEM;
12282 +       }
12283 +
12284 +       /*
12285 +        * We have to know which data goes with which module, so we at
12286 +        * least write a length of zero for a module. Note that we are
12287 +        * also assuming every module's config data takes <= PAGE_SIZE.
12288 +        */
12289 +
12290 +       /* For each module (in registration order) */
12291 +       list_for_each_entry(this_module, &toi_modules, module_list) {
12292 +               if (!this_module->enabled || !this_module->storage_needed ||
12293 +                   (this_module->type == WRITER_MODULE &&
12294 +                    toiActiveAllocator != this_module))
12295 +                       continue;
12296 +
12297 +               /* Get the data from the module */
12298 +               len = 0;
12299 +               if (this_module->save_config_info)
12300 +                       len = this_module->save_config_info(buffer);
12301 +
12302 +               /* Save the details of the module */
12303 +               toi_module_header.enabled = this_module->enabled;
12304 +               toi_module_header.type = this_module->type;
12305 +               toi_module_header.index = index++;
12306 +               strncpy(toi_module_header.name, this_module->name,
12307 +                                       sizeof(toi_module_header.name));
12308 +               toiActiveAllocator->rw_header_chunk(WRITE,
12309 +                               this_module,
12310 +                               (char *) &toi_module_header,
12311 +                               sizeof(toi_module_header));
12312 +
12313 +               /* Save the size of the data and any data returned */
12314 +               toiActiveAllocator->rw_header_chunk(WRITE,
12315 +                               this_module,
12316 +                               (char *) &len, sizeof(int));
12317 +               if (len)
12318 +                       toiActiveAllocator->rw_header_chunk(
12319 +                               WRITE, this_module, buffer, len);
12320 +       }
12321 +
12322 +       /* Write a blank header to terminate the list */
12323 +       toi_module_header.name[0] = '\0';
12324 +       toiActiveAllocator->rw_header_chunk(WRITE, NULL,
12325 +                       (char *) &toi_module_header, sizeof(toi_module_header));
12326 +
12327 +       toi_free_page(22, (unsigned long) buffer);
12328 +       return 0;
12329 +}
12330 +
12331 +/**
12332 + * read_one_module_config - read and configure one module
12333 + *
12334 + * Read the configuration for one module, and configure the module
12335 + * to match if it is loaded.
12336 + *
12337 + * Returns: Int
12338 + *     Zero on success, Error value otherwise.
12339 + **/
12340 +static int read_one_module_config(struct toi_module_header *header)
12341 +{
12342 +       struct toi_module_ops *this_module;
12343 +       int result, len;
12344 +       char *buffer;
12345 +
12346 +       /* Find the module */
12347 +       this_module = toi_find_module_given_name(header->name);
12348 +
12349 +       if (!this_module) {
12350 +               if (header->enabled) {
12351 +                       toi_early_boot_message(1, TOI_CONTINUE_REQ,
12352 +                               "It looks like we need module %s for reading "
12353 +                               "the image but it hasn't been registered.\n",
12354 +                               header->name);
12355 +                       if (!(test_toi_state(TOI_CONTINUE_REQ)))
12356 +                               return -EINVAL;
12357 +               } else
12358 +                       printk(KERN_INFO "Module %s configuration data found, "
12359 +                               "but the module hasn't registered. Looks like "
12360 +                               "it was disabled, so we're ignoring its data.",
12361 +                               header->name);
12362 +       }
12363 +
12364 +       /* Get the length of the data (if any) */
12365 +       result = toiActiveAllocator->rw_header_chunk(READ, NULL, (char *) &len,
12366 +                       sizeof(int));
12367 +       if (result) {
12368 +               printk(KERN_ERR "Failed to read the length of the module %s's"
12369 +                               " configuration data.\n",
12370 +                               header->name);
12371 +               return -EINVAL;
12372 +       }
12373 +
12374 +       /* Read any data and pass to the module (if we found one) */
12375 +       if (!len)
12376 +               return 0;
12377 +
12378 +       buffer = (char *) toi_get_zeroed_page(23, TOI_ATOMIC_GFP);
12379 +
12380 +       if (!buffer) {
12381 +               printk(KERN_ERR "Failed to allocate a buffer for reloading "
12382 +                               "module configuration info.\n");
12383 +               return -ENOMEM;
12384 +       }
12385 +
12386 +       toiActiveAllocator->rw_header_chunk(READ, NULL, buffer, len);
12387 +
12388 +       if (!this_module)
12389 +               goto out;
12390 +
12391 +       if (!this_module->save_config_info)
12392 +               printk(KERN_ERR "Huh? Module %s appears to have a "
12393 +                               "save_config_info, but not a load_config_info "
12394 +                               "function!\n", this_module->name);
12395 +       else
12396 +               this_module->load_config_info(buffer, len);
12397 +
12398 +       /*
12399 +        * Now move this module to the tail of its lists. This will put it in
12400 +        * order. Any new modules will end up at the top of the lists. They
12401 +        * should have been set to disabled when loaded (people will
12402 +        * normally not edit an initrd to load a new module and then hibernate
12403 +        * without using it!).
12404 +        */
12405 +
12406 +       toi_move_module_tail(this_module);
12407 +
12408 +       this_module->enabled = header->enabled;
12409 +
12410 +out:
12411 +       toi_free_page(23, (unsigned long) buffer);
12412 +       return 0;
12413 +}
12414 +
12415 +/**
12416 + * read_module_configs - reload module configurations from the image header.
12417 + *
12418 + * Returns: Int
12419 + *     Zero on success or an error code.
12420 + **/
12421 +static int read_module_configs(void)
12422 +{
12423 +       int result = 0;
12424 +       struct toi_module_header toi_module_header;
12425 +       struct toi_module_ops *this_module;
12426 +
12427 +       /* All modules are initially disabled. That way, if we have a module
12428 +        * loaded now that wasn't loaded when we hibernated, it won't be used
12429 +        * in trying to read the data.
12430 +        */
12431 +       list_for_each_entry(this_module, &toi_modules, module_list)
12432 +               this_module->enabled = 0;
12433 +
12434 +       /* Get the first module header */
12435 +       result = toiActiveAllocator->rw_header_chunk(READ, NULL,
12436 +                       (char *) &toi_module_header,
12437 +                       sizeof(toi_module_header));
12438 +       if (result) {
12439 +               printk(KERN_ERR "Failed to read the next module header.\n");
12440 +               return -EINVAL;
12441 +       }
12442 +
12443 +       /* For each module (in registration order) */
12444 +       while (toi_module_header.name[0]) {
12445 +               result = read_one_module_config(&toi_module_header);
12446 +
12447 +               if (result)
12448 +                       return -EINVAL;
12449 +
12450 +               /* Get the next module header */
12451 +               result = toiActiveAllocator->rw_header_chunk(READ, NULL,
12452 +                               (char *) &toi_module_header,
12453 +                               sizeof(toi_module_header));
12454 +
12455 +               if (result) {
12456 +                       printk(KERN_ERR "Failed to read the next module "
12457 +                                       "header.\n");
12458 +                       return -EINVAL;
12459 +               }
12460 +       }
12461 +
12462 +       return 0;
12463 +}
12464 +
12465 +/**
12466 + * write_image_header - write the image header after write the image proper
12467 + *
12468 + * Returns: Int
12469 + *     Zero on success, error value otherwise.
12470 + **/
12471 +int write_image_header(void)
12472 +{
12473 +       int ret;
12474 +       int total = pagedir1.size + pagedir2.size+2;
12475 +       char *header_buffer = NULL;
12476 +
12477 +       /* Now prepare to write the header */
12478 +       ret = toiActiveAllocator->write_header_init();
12479 +       if (ret) {
12480 +               abort_hibernate(TOI_FAILED_MODULE_INIT,
12481 +                               "Active allocator's write_header_init"
12482 +                               " function failed.");
12483 +               goto write_image_header_abort;
12484 +       }
12485 +
12486 +       /* Get a buffer */
12487 +       header_buffer = (char *) toi_get_zeroed_page(24, TOI_ATOMIC_GFP);
12488 +       if (!header_buffer) {
12489 +               abort_hibernate(TOI_OUT_OF_MEMORY,
12490 +                       "Out of memory when trying to get page for header!");
12491 +               goto write_image_header_abort;
12492 +       }
12493 +
12494 +       /* Write hibernate header */
12495 +       if (fill_toi_header((struct toi_header *) header_buffer)) {
12496 +               abort_hibernate(TOI_OUT_OF_MEMORY,
12497 +                       "Failure to fill header information!");
12498 +               goto write_image_header_abort;
12499 +       }
12500 +       toiActiveAllocator->rw_header_chunk(WRITE, NULL,
12501 +                       header_buffer, sizeof(struct toi_header));
12502 +
12503 +       toi_free_page(24, (unsigned long) header_buffer);
12504 +
12505 +       /* Write module configurations */
12506 +       ret = write_module_configs();
12507 +       if (ret) {
12508 +               abort_hibernate(TOI_FAILED_IO,
12509 +                               "Failed to write module configs.");
12510 +               goto write_image_header_abort;
12511 +       }
12512 +
12513 +       memory_bm_write(pageset1_map, toiActiveAllocator->rw_header_chunk);
12514 +
12515 +       /* Flush data and let allocator cleanup */
12516 +       if (toiActiveAllocator->write_header_cleanup()) {
12517 +               abort_hibernate(TOI_FAILED_IO,
12518 +                               "Failed to cleanup writing header.");
12519 +               goto write_image_header_abort_no_cleanup;
12520 +       }
12521 +
12522 +       if (test_result_state(TOI_ABORTED))
12523 +               goto write_image_header_abort_no_cleanup;
12524 +
12525 +       toi_update_status(total, total, NULL);
12526 +
12527 +       return 0;
12528 +
12529 +write_image_header_abort:
12530 +       toiActiveAllocator->write_header_cleanup();
12531 +write_image_header_abort_no_cleanup:
12532 +       return -1;
12533 +}
12534 +
12535 +/**
12536 + * sanity_check - check the header
12537 + * @sh:        the header which was saved at hibernate time.
12538 + *
12539 + * Perform a few checks, seeking to ensure that the kernel being
12540 + * booted matches the one hibernated. They need to match so we can
12541 + * be _sure_ things will work. It is not absolutely impossible for
12542 + * resuming from a different kernel to work, just not assured.
12543 + **/
12544 +static char *sanity_check(struct toi_header *sh)
12545 +{
12546 +       char *reason = check_image_kernel((struct swsusp_info *) sh);
12547 +
12548 +       if (reason)
12549 +               return reason;
12550 +
12551 +       if (!test_action_state(TOI_IGNORE_ROOTFS)) {
12552 +               const struct super_block *sb;
12553 +               list_for_each_entry(sb, &super_blocks, s_list) {
12554 +                       if ((!(sb->s_flags & MS_RDONLY)) &&
12555 +                           (sb->s_type->fs_flags & FS_REQUIRES_DEV))
12556 +                               return "Device backed fs has been mounted "
12557 +                                       "rw prior to resume or initrd/ramfs "
12558 +                                       "is mounted rw.";
12559 +               }
12560 +       }
12561 +
12562 +       return NULL;
12563 +}
12564 +
12565 +static DECLARE_WAIT_QUEUE_HEAD(freeze_wait);
12566 +
12567 +#define FREEZE_IN_PROGRESS (~0)
12568 +
12569 +static int freeze_result;
12570 +
12571 +static void do_freeze(struct work_struct *dummy)
12572 +{
12573 +       freeze_result = freeze_processes();
12574 +       wake_up(&freeze_wait);
12575 +}
12576 +
12577 +static DECLARE_WORK(freeze_work, do_freeze);
12578 +
12579 +/**
12580 + * __read_pageset1 - test for the existence of an image and attempt to load it
12581 + *
12582 + * Returns:    Int
12583 + *     Zero if image found and pageset1 successfully loaded.
12584 + *     Error if no image found or loaded.
12585 + **/
12586 +static int __read_pageset1(void)
12587 +{
12588 +       int i, result = 0;
12589 +       char *header_buffer = (char *) toi_get_zeroed_page(25, TOI_ATOMIC_GFP),
12590 +            *sanity_error = NULL;
12591 +       struct toi_header *toi_header;
12592 +
12593 +       if (!header_buffer) {
12594 +               printk(KERN_INFO "Unable to allocate a page for reading the "
12595 +                               "signature.\n");
12596 +               return -ENOMEM;
12597 +       }
12598 +
12599 +       /* Check for an image */
12600 +       result = toiActiveAllocator->image_exists(1);
12601 +       if (!result) {
12602 +               result = -ENODATA;
12603 +               noresume_reset_modules();
12604 +               printk(KERN_INFO "TuxOnIce: No image found.\n");
12605 +               goto out;
12606 +       }
12607 +
12608 +       /*
12609 +        * Prepare the active allocator for reading the image header. The
12610 +        * activate allocator might read its own configuration.
12611 +        *
12612 +        * NB: This call may never return because there might be a signature
12613 +        * for a different image such that we warn the user and they choose
12614 +        * to reboot. (If the device ids look erroneous (2.4 vs 2.6) or the
12615 +        * location of the image might be unavailable if it was stored on a
12616 +        * network connection).
12617 +        */
12618 +
12619 +       result = toiActiveAllocator->read_header_init();
12620 +       if (result) {
12621 +               printk(KERN_INFO "TuxOnIce: Failed to initialise, reading the "
12622 +                               "image header.\n");
12623 +               goto out_remove_image;
12624 +       }
12625 +
12626 +       /* Check for noresume command line option */
12627 +       if (test_toi_state(TOI_NORESUME_SPECIFIED)) {
12628 +               printk(KERN_INFO "TuxOnIce: Noresume on command line. Removed "
12629 +                               "image.\n");
12630 +               goto out_remove_image;
12631 +       }
12632 +
12633 +       /* Check whether we've resumed before */
12634 +       if (test_toi_state(TOI_RESUMED_BEFORE)) {
12635 +               toi_early_boot_message(1, 0, NULL);
12636 +               if (!(test_toi_state(TOI_CONTINUE_REQ))) {
12637 +                       printk(KERN_INFO "TuxOnIce: Tried to resume before: "
12638 +                                       "Invalidated image.\n");
12639 +                       goto out_remove_image;
12640 +               }
12641 +       }
12642 +
12643 +       clear_toi_state(TOI_CONTINUE_REQ);
12644 +
12645 +       /* Read hibernate header */
12646 +       result = toiActiveAllocator->rw_header_chunk(READ, NULL,
12647 +                       header_buffer, sizeof(struct toi_header));
12648 +       if (result < 0) {
12649 +               printk(KERN_ERR "TuxOnIce: Failed to read the image "
12650 +                               "signature.\n");
12651 +               goto out_remove_image;
12652 +       }
12653 +
12654 +       toi_header = (struct toi_header *) header_buffer;
12655 +
12656 +       /*
12657 +        * NB: This call may also result in a reboot rather than returning.
12658 +        */
12659 +
12660 +       sanity_error = sanity_check(toi_header);
12661 +       if (sanity_error) {
12662 +               toi_early_boot_message(1, TOI_CONTINUE_REQ,
12663 +                               sanity_error);
12664 +               printk(KERN_INFO "TuxOnIce: Sanity check failed.\n");
12665 +               goto out_remove_image;
12666 +       }
12667 +
12668 +       /*
12669 +        * We have an image and it looks like it will load okay.
12670 +        *
12671 +        * Get metadata from header. Don't override commandline parameters.
12672 +        *
12673 +        * We don't need to save the image size limit because it's not used
12674 +        * during resume and will be restored with the image anyway.
12675 +        */
12676 +
12677 +       memcpy((char *) &pagedir1,
12678 +               (char *) &toi_header->pagedir, sizeof(pagedir1));
12679 +       toi_result = toi_header->param0;
12680 +       toi_bkd.toi_action = toi_header->param1;
12681 +       toi_bkd.toi_debug_state = toi_header->param2;
12682 +       toi_bkd.toi_default_console_level = toi_header->param3;
12683 +       clear_toi_state(TOI_IGNORE_LOGLEVEL);
12684 +       pagedir2.size = toi_header->pageset_2_size;
12685 +       for (i = 0; i < 4; i++)
12686 +               toi_bkd.toi_io_time[i/2][i%2] =
12687 +                       toi_header->io_time[i/2][i%2];
12688 +
12689 +       set_toi_state(TOI_BOOT_KERNEL);
12690 +       boot_kernel_data_buffer = toi_header->bkd;
12691 +
12692 +       /* Read module configurations */
12693 +       result = read_module_configs();
12694 +       if (result) {
12695 +               pagedir1.size = 0;
12696 +               pagedir2.size = 0;
12697 +               printk(KERN_INFO "TuxOnIce: Failed to read TuxOnIce module "
12698 +                               "configurations.\n");
12699 +               clear_action_state(TOI_KEEP_IMAGE);
12700 +               goto out_remove_image;
12701 +       }
12702 +
12703 +       toi_prepare_console();
12704 +
12705 +       set_toi_state(TOI_NOW_RESUMING);
12706 +
12707 +       if (!test_action_state(TOI_LATE_CPU_HOTPLUG)) {
12708 +               toi_prepare_status(DONT_CLEAR_BAR, "Disable nonboot cpus.");
12709 +               if (disable_nonboot_cpus()) {
12710 +                       set_abort_result(TOI_CPU_HOTPLUG_FAILED);
12711 +                       goto out_reset_console;
12712 +               }
12713 +       }
12714 +
12715 +       if (usermodehelper_disable())
12716 +               goto out_enable_nonboot_cpus;
12717 +
12718 +       current->flags |= PF_NOFREEZE;
12719 +       freeze_result = FREEZE_IN_PROGRESS;
12720 +
12721 +       schedule_work_on(first_cpu(cpu_online_map), &freeze_work);
12722 +
12723 +       toi_cond_pause(1, "About to read original pageset1 locations.");
12724 +
12725 +       /*
12726 +        * See _toi_rw_header_chunk in tuxonice_block_io.c:
12727 +        * Initialize pageset1_map by reading the map from the image.
12728 +        */
12729 +       if (memory_bm_read(pageset1_map, toiActiveAllocator->rw_header_chunk))
12730 +               goto out_thaw;
12731 +
12732 +       /*
12733 +        * See toi_rw_cleanup in tuxonice_block_io.c:
12734 +        * Clean up after reading the header.
12735 +        */
12736 +       result = toiActiveAllocator->read_header_cleanup();
12737 +       if (result) {
12738 +               printk(KERN_ERR "TuxOnIce: Failed to cleanup after reading the "
12739 +                               "image header.\n");
12740 +               goto out_thaw;
12741 +       }
12742 +
12743 +       toi_cond_pause(1, "About to read pagedir.");
12744 +
12745 +       /*
12746 +        * Get the addresses of pages into which we will load the kernel to
12747 +        * be copied back and check if they conflict with the ones we are using.
12748 +        */
12749 +       if (toi_get_pageset1_load_addresses()) {
12750 +               printk(KERN_INFO "TuxOnIce: Failed to get load addresses for "
12751 +                               "pageset1.\n");
12752 +               goto out_thaw;
12753 +       }
12754 +
12755 +       /* Read the original kernel back */
12756 +       toi_cond_pause(1, "About to read pageset 1.");
12757 +
12758 +       /* Given the pagemap, read back the data from disk */
12759 +       if (read_pageset(&pagedir1, 0)) {
12760 +               toi_prepare_status(DONT_CLEAR_BAR, "Failed to read pageset 1.");
12761 +               result = -EIO;
12762 +               goto out_thaw;
12763 +       }
12764 +
12765 +       toi_cond_pause(1, "About to restore original kernel.");
12766 +       result = 0;
12767 +
12768 +       if (!test_action_state(TOI_KEEP_IMAGE) &&
12769 +           toiActiveAllocator->mark_resume_attempted)
12770 +               toiActiveAllocator->mark_resume_attempted(1);
12771 +
12772 +       wait_event(freeze_wait, freeze_result != FREEZE_IN_PROGRESS);
12773 +out:
12774 +       current->flags &= ~PF_NOFREEZE;
12775 +       toi_free_page(25, (unsigned long) header_buffer);
12776 +       return result;
12777 +
12778 +out_thaw:
12779 +       wait_event(freeze_wait, freeze_result != FREEZE_IN_PROGRESS);
12780 +       thaw_processes();
12781 +       usermodehelper_enable();
12782 +out_enable_nonboot_cpus:
12783 +       enable_nonboot_cpus();
12784 +out_reset_console:
12785 +       toi_cleanup_console();
12786 +out_remove_image:
12787 +       result = -EINVAL;
12788 +       if (!test_action_state(TOI_KEEP_IMAGE))
12789 +               toiActiveAllocator->remove_image();
12790 +       toiActiveAllocator->read_header_cleanup();
12791 +       noresume_reset_modules();
12792 +       goto out;
12793 +}
12794 +
12795 +/**
12796 + * read_pageset1 - highlevel function to read the saved pages
12797 + *
12798 + * Attempt to read the header and pageset1 of a hibernate image.
12799 + * Handle the outcome, complaining where appropriate.
12800 + **/
12801 +int read_pageset1(void)
12802 +{
12803 +       int error;
12804 +
12805 +       error = __read_pageset1();
12806 +
12807 +       if (error && error != -ENODATA && error != -EINVAL &&
12808 +                                       !test_result_state(TOI_ABORTED))
12809 +               abort_hibernate(TOI_IMAGE_ERROR,
12810 +                       "TuxOnIce: Error %d resuming\n", error);
12811 +
12812 +       return error;
12813 +}
12814 +
12815 +/**
12816 + * get_have_image_data - check the image header
12817 + **/
12818 +static char *get_have_image_data(void)
12819 +{
12820 +       char *output_buffer = (char *) toi_get_zeroed_page(26, TOI_ATOMIC_GFP);
12821 +       struct toi_header *toi_header;
12822 +
12823 +       if (!output_buffer) {
12824 +               printk(KERN_INFO "Output buffer null.\n");
12825 +               return NULL;
12826 +       }
12827 +
12828 +       /* Check for an image */
12829 +       if (!toiActiveAllocator->image_exists(1) ||
12830 +           toiActiveAllocator->read_header_init() ||
12831 +           toiActiveAllocator->rw_header_chunk(READ, NULL,
12832 +                       output_buffer, sizeof(struct toi_header))) {
12833 +               sprintf(output_buffer, "0\n");
12834 +               /*
12835 +                * From an initrd/ramfs, catting have_image and
12836 +                * getting a result of 0 is sufficient.
12837 +                */
12838 +               clear_toi_state(TOI_BOOT_TIME);
12839 +               goto out;
12840 +       }
12841 +
12842 +       toi_header = (struct toi_header *) output_buffer;
12843 +
12844 +       sprintf(output_buffer, "1\n%s\n%s\n",
12845 +                       toi_header->uts.machine,
12846 +                       toi_header->uts.version);
12847 +
12848 +       /* Check whether we've resumed before */
12849 +       if (test_toi_state(TOI_RESUMED_BEFORE))
12850 +               strcat(output_buffer, "Resumed before.\n");
12851 +
12852 +out:
12853 +       noresume_reset_modules();
12854 +       return output_buffer;
12855 +}
12856 +
12857 +/**
12858 + * read_pageset2 - read second part of the image
12859 + * @overwrittenpagesonly:      Read only pages which would have been
12860 + *                             verwritten by pageset1?
12861 + *
12862 + * Read in part or all of pageset2 of an image, depending upon
12863 + * whether we are hibernating and have only overwritten a portion
12864 + * with pageset1 pages, or are resuming and need to read them
12865 + * all.
12866 + *
12867 + * Returns: Int
12868 + *     Zero if no error, otherwise the error value.
12869 + **/
12870 +int read_pageset2(int overwrittenpagesonly)
12871 +{
12872 +       int result = 0;
12873 +
12874 +       if (!pagedir2.size)
12875 +               return 0;
12876 +
12877 +       result = read_pageset(&pagedir2, overwrittenpagesonly);
12878 +
12879 +       toi_cond_pause(1, "Pagedir 2 read.");
12880 +
12881 +       return result;
12882 +}
12883 +
12884 +/**
12885 + * image_exists_read - has an image been found?
12886 + * @page:      Output buffer
12887 + *
12888 + * Store 0 or 1 in page, depending on whether an image is found.
12889 + * Incoming buffer is PAGE_SIZE and result is guaranteed
12890 + * to be far less than that, so we don't worry about
12891 + * overflow.
12892 + **/
12893 +int image_exists_read(const char *page, int count)
12894 +{
12895 +       int len = 0;
12896 +       char *result;
12897 +
12898 +       if (toi_activate_storage(0))
12899 +               return count;
12900 +
12901 +       if (!test_toi_state(TOI_RESUME_DEVICE_OK))
12902 +               toi_attempt_to_parse_resume_device(0);
12903 +
12904 +       if (!toiActiveAllocator) {
12905 +               len = sprintf((char *) page, "-1\n");
12906 +       } else {
12907 +               result = get_have_image_data();
12908 +               if (result) {
12909 +                       len = sprintf((char *) page, "%s",  result);
12910 +                       toi_free_page(26, (unsigned long) result);
12911 +               }
12912 +       }
12913 +
12914 +       toi_deactivate_storage(0);
12915 +
12916 +       return len;
12917 +}
12918 +
12919 +/**
12920 + * image_exists_write - invalidate an image if one exists
12921 + **/
12922 +int image_exists_write(const char *buffer, int count)
12923 +{
12924 +       if (toi_activate_storage(0))
12925 +               return count;
12926 +
12927 +       if (toiActiveAllocator && toiActiveAllocator->image_exists(1))
12928 +               toiActiveAllocator->remove_image();
12929 +
12930 +       toi_deactivate_storage(0);
12931 +
12932 +       clear_result_state(TOI_KEPT_IMAGE);
12933 +
12934 +       return count;
12935 +}
12936 diff --git a/kernel/power/tuxonice_io.h b/kernel/power/tuxonice_io.h
12937 new file mode 100644
12938 index 0000000..01b3db6
12939 --- /dev/null
12940 +++ b/kernel/power/tuxonice_io.h
12941 @@ -0,0 +1,70 @@
12942 +/*
12943 + * kernel/power/tuxonice_io.h
12944 + *
12945 + * Copyright (C) 2005-2008 Nigel Cunningham (nigel at tuxonice net)
12946 + *
12947 + * This file is released under the GPLv2.
12948 + *
12949 + * It contains high level IO routines for hibernating.
12950 + *
12951 + */
12952 +
12953 +#include <linux/utsname.h>
12954 +#include "tuxonice_pagedir.h"
12955 +
12956 +/* Non-module data saved in our image header */
12957 +struct toi_header {
12958 +       /*
12959 +        * Mirror struct swsusp_info, but without
12960 +        * the page aligned attribute
12961 +        */
12962 +       struct new_utsname uts;
12963 +       u32 version_code;
12964 +       unsigned long num_physpages;
12965 +       int cpus;
12966 +       unsigned long image_pages;
12967 +       unsigned long pages;
12968 +       unsigned long size;
12969 +
12970 +       /* Our own data */
12971 +       unsigned long orig_mem_free;
12972 +       int page_size;
12973 +       int pageset_2_size;
12974 +       int param0;
12975 +       int param1;
12976 +       int param2;
12977 +       int param3;
12978 +       int progress0;
12979 +       int progress1;
12980 +       int progress2;
12981 +       int progress3;
12982 +       int io_time[2][2];
12983 +       struct pagedir pagedir;
12984 +       dev_t root_fs;
12985 +       unsigned long bkd; /* Boot kernel data locn */
12986 +};
12987 +
12988 +extern int write_pageset(struct pagedir *pagedir);
12989 +extern int write_image_header(void);
12990 +extern int read_pageset1(void);
12991 +extern int read_pageset2(int overwrittenpagesonly);
12992 +
12993 +extern int toi_attempt_to_parse_resume_device(int quiet);
12994 +extern void attempt_to_parse_resume_device2(void);
12995 +extern void attempt_to_parse_alt_resume_param(void);
12996 +int image_exists_read(const char *page, int count);
12997 +int image_exists_write(const char *buffer, int count);
12998 +extern void save_restore_alt_param(int replace, int quiet);
12999 +extern atomic_t toi_io_workers;
13000 +
13001 +/* Args to save_restore_alt_param */
13002 +#define RESTORE 0
13003 +#define SAVE 1
13004 +
13005 +#define NOQUIET 0
13006 +#define QUIET 1
13007 +
13008 +extern dev_t name_to_dev_t(char *line);
13009 +
13010 +extern wait_queue_head_t toi_io_queue_flusher;
13011 +extern int toi_bio_queue_flusher_should_finish;
13012 diff --git a/kernel/power/tuxonice_modules.c b/kernel/power/tuxonice_modules.c
13013 new file mode 100644
13014 index 0000000..c650f5c
13015 --- /dev/null
13016 +++ b/kernel/power/tuxonice_modules.c
13017 @@ -0,0 +1,494 @@
13018 +/*
13019 + * kernel/power/tuxonice_modules.c
13020 + *
13021 + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net)
13022 + *
13023 + */
13024 +
13025 +#include <linux/suspend.h>
13026 +#include "tuxonice.h"
13027 +#include "tuxonice_modules.h"
13028 +#include "tuxonice_sysfs.h"
13029 +#include "tuxonice_ui.h"
13030 +
13031 +LIST_HEAD(toi_filters);
13032 +LIST_HEAD(toiAllocators);
13033 +LIST_HEAD(toi_modules);
13034 +
13035 +struct toi_module_ops *toiActiveAllocator;
13036 +EXPORT_SYMBOL_GPL(toiActiveAllocator);
13037 +
13038 +static int toi_num_filters;
13039 +int toiNumAllocators, toi_num_modules;
13040 +
13041 +/*
13042 + * toi_header_storage_for_modules
13043 + *
13044 + * Returns the amount of space needed to store configuration
13045 + * data needed by the modules prior to copying back the original
13046 + * kernel. We can exclude data for pageset2 because it will be
13047 + * available anyway once the kernel is copied back.
13048 + */
13049 +long toi_header_storage_for_modules(void)
13050 +{
13051 +       struct toi_module_ops *this_module;
13052 +       int bytes = 0;
13053 +
13054 +       list_for_each_entry(this_module, &toi_modules, module_list) {
13055 +               if (!this_module->enabled ||
13056 +                   (this_module->type == WRITER_MODULE &&
13057 +                    toiActiveAllocator != this_module))
13058 +                       continue;
13059 +               if (this_module->storage_needed) {
13060 +                       int this = this_module->storage_needed() +
13061 +                               sizeof(struct toi_module_header) +
13062 +                               sizeof(int);
13063 +                       this_module->header_requested = this;
13064 +                       bytes += this;
13065 +               }
13066 +       }
13067 +
13068 +       /* One more for the empty terminator */
13069 +       return bytes + sizeof(struct toi_module_header);
13070 +}
13071 +
13072 +void print_toi_header_storage_for_modules(void)
13073 +{
13074 +       struct toi_module_ops *this_module;
13075 +       int bytes = 0;
13076 +
13077 +       printk(KERN_DEBUG "Header storage:\n");
13078 +       list_for_each_entry(this_module, &toi_modules, module_list) {
13079 +               if (!this_module->enabled ||
13080 +                   (this_module->type == WRITER_MODULE &&
13081 +                    toiActiveAllocator != this_module))
13082 +                       continue;
13083 +               if (this_module->storage_needed) {
13084 +                       int this = this_module->storage_needed() +
13085 +                               sizeof(struct toi_module_header) +
13086 +                               sizeof(int);
13087 +                       this_module->header_requested = this;
13088 +                       bytes += this;
13089 +                       printk(KERN_DEBUG "+ %16s : %-4d/%d.\n",
13090 +                                       this_module->name,
13091 +                                       this_module->header_used, this);
13092 +               }
13093 +       }
13094 +
13095 +       printk(KERN_DEBUG "+ empty terminator : %ld.\n",
13096 +                       sizeof(struct toi_module_header));
13097 +       printk(KERN_DEBUG "                     ====\n");
13098 +       printk(KERN_DEBUG "                     %ld\n",
13099 +                       bytes + sizeof(struct toi_module_header));
13100 +}
13101 +EXPORT_SYMBOL_GPL(print_toi_header_storage_for_modules);
13102 +
13103 +/*
13104 + * toi_memory_for_modules
13105 + *
13106 + * Returns the amount of memory requested by modules for
13107 + * doing their work during the cycle.
13108 + */
13109 +
13110 +long toi_memory_for_modules(int print_parts)
13111 +{
13112 +       long bytes = 0, result;
13113 +       struct toi_module_ops *this_module;
13114 +
13115 +       if (print_parts)
13116 +               printk(KERN_INFO "Memory for modules:\n===================\n");
13117 +       list_for_each_entry(this_module, &toi_modules, module_list) {
13118 +               int this;
13119 +               if (!this_module->enabled)
13120 +                       continue;
13121 +               if (this_module->memory_needed) {
13122 +                       this = this_module->memory_needed();
13123 +                       if (print_parts)
13124 +                               printk(KERN_INFO "%10d bytes (%5ld pages) for "
13125 +                                               "module '%s'.\n", this,
13126 +                                               DIV_ROUND_UP(this, PAGE_SIZE),
13127 +                                               this_module->name);
13128 +                       bytes += this;
13129 +               }
13130 +       }
13131 +
13132 +       result = DIV_ROUND_UP(bytes, PAGE_SIZE);
13133 +       if (print_parts)
13134 +               printk(KERN_INFO " => %ld bytes, %ld pages.\n", bytes, result);
13135 +
13136 +       return result;
13137 +}
13138 +
13139 +/*
13140 + * toi_expected_compression_ratio
13141 + *
13142 + * Returns the compression ratio expected when saving the image.
13143 + */
13144 +
13145 +int toi_expected_compression_ratio(void)
13146 +{
13147 +       int ratio = 100;
13148 +       struct toi_module_ops *this_module;
13149 +
13150 +       list_for_each_entry(this_module, &toi_modules, module_list) {
13151 +               if (!this_module->enabled)
13152 +                       continue;
13153 +               if (this_module->expected_compression)
13154 +                       ratio = ratio * this_module->expected_compression()
13155 +                               / 100;
13156 +       }
13157 +
13158 +       return ratio;
13159 +}
13160 +
13161 +/* toi_find_module_given_dir
13162 + * Functionality :     Return a module (if found), given a pointer
13163 + *                     to its directory name
13164 + */
13165 +
13166 +static struct toi_module_ops *toi_find_module_given_dir(char *name)
13167 +{
13168 +       struct toi_module_ops *this_module, *found_module = NULL;
13169 +
13170 +       list_for_each_entry(this_module, &toi_modules, module_list) {
13171 +               if (!strcmp(name, this_module->directory)) {
13172 +                       found_module = this_module;
13173 +                       break;
13174 +               }
13175 +       }
13176 +
13177 +       return found_module;
13178 +}
13179 +
13180 +/* toi_find_module_given_name
13181 + * Functionality :     Return a module (if found), given a pointer
13182 + *                     to its name
13183 + */
13184 +
13185 +struct toi_module_ops *toi_find_module_given_name(char *name)
13186 +{
13187 +       struct toi_module_ops *this_module, *found_module = NULL;
13188 +
13189 +       list_for_each_entry(this_module, &toi_modules, module_list) {
13190 +               if (!strcmp(name, this_module->name)) {
13191 +                       found_module = this_module;
13192 +                       break;
13193 +               }
13194 +       }
13195 +
13196 +       return found_module;
13197 +}
13198 +
13199 +/*
13200 + * toi_print_module_debug_info
13201 + * Functionality   : Get debugging info from modules into a buffer.
13202 + */
13203 +int toi_print_module_debug_info(char *buffer, int buffer_size)
13204 +{
13205 +       struct toi_module_ops *this_module;
13206 +       int len = 0;
13207 +
13208 +       list_for_each_entry(this_module, &toi_modules, module_list) {
13209 +               if (!this_module->enabled)
13210 +                       continue;
13211 +               if (this_module->print_debug_info) {
13212 +                       int result;
13213 +                       result = this_module->print_debug_info(buffer + len,
13214 +                                       buffer_size - len);
13215 +                       len += result;
13216 +               }
13217 +       }
13218 +
13219 +       /* Ensure null terminated */
13220 +       buffer[buffer_size] = 0;
13221 +
13222 +       return len;
13223 +}
13224 +
13225 +/*
13226 + * toi_register_module
13227 + *
13228 + * Register a module.
13229 + */
13230 +int toi_register_module(struct toi_module_ops *module)
13231 +{
13232 +       int i;
13233 +       struct kobject *kobj;
13234 +
13235 +       module->enabled = 1;
13236 +
13237 +       if (toi_find_module_given_name(module->name)) {
13238 +               printk(KERN_INFO "TuxOnIce: Trying to load module %s,"
13239 +                               " which is already registered.\n",
13240 +                               module->name);
13241 +               return -EBUSY;
13242 +       }
13243 +
13244 +       switch (module->type) {
13245 +       case FILTER_MODULE:
13246 +               list_add_tail(&module->type_list, &toi_filters);
13247 +               toi_num_filters++;
13248 +               break;
13249 +       case WRITER_MODULE:
13250 +               list_add_tail(&module->type_list, &toiAllocators);
13251 +               toiNumAllocators++;
13252 +               break;
13253 +       case MISC_MODULE:
13254 +       case MISC_HIDDEN_MODULE:
13255 +               break;
13256 +       default:
13257 +               printk(KERN_ERR "Hmmm. Module '%s' has an invalid type."
13258 +                       " It has been ignored.\n", module->name);
13259 +               return -EINVAL;
13260 +       }
13261 +       list_add_tail(&module->module_list, &toi_modules);
13262 +       toi_num_modules++;
13263 +
13264 +       if ((!module->directory && !module->shared_directory) ||
13265 +                       !module->sysfs_data || !module->num_sysfs_entries)
13266 +               return 0;
13267 +
13268 +       /*
13269 +        * Modules may share a directory, but those with shared_dir
13270 +        * set must be loaded (via symbol dependencies) after parents
13271 +        * and unloaded beforehand.
13272 +        */
13273 +       if (module->shared_directory) {
13274 +               struct toi_module_ops *shared =
13275 +                       toi_find_module_given_dir(module->shared_directory);
13276 +               if (!shared) {
13277 +                       printk(KERN_ERR "TuxOnIce: Module %s wants to share "
13278 +                                       "%s's directory but %s isn't loaded.\n",
13279 +                                       module->name, module->shared_directory,
13280 +                                       module->shared_directory);
13281 +                       toi_unregister_module(module);
13282 +                       return -ENODEV;
13283 +               }
13284 +               kobj = shared->dir_kobj;
13285 +       } else {
13286 +               if (!strncmp(module->directory, "[ROOT]", 6))
13287 +                       kobj = tuxonice_kobj;
13288 +               else
13289 +                       kobj = make_toi_sysdir(module->directory);
13290 +       }
13291 +       module->dir_kobj = kobj;
13292 +       for (i = 0; i < module->num_sysfs_entries; i++) {
13293 +               int result = toi_register_sysfs_file(kobj,
13294 +                               &module->sysfs_data[i]);
13295 +               if (result)
13296 +                       return result;
13297 +       }
13298 +       return 0;
13299 +}
13300 +EXPORT_SYMBOL_GPL(toi_register_module);
13301 +
13302 +/*
13303 + * toi_unregister_module
13304 + *
13305 + * Remove a module.
13306 + */
13307 +void toi_unregister_module(struct toi_module_ops *module)
13308 +{
13309 +       int i;
13310 +
13311 +       if (module->dir_kobj)
13312 +               for (i = 0; i < module->num_sysfs_entries; i++)
13313 +                       toi_unregister_sysfs_file(module->dir_kobj,
13314 +                                       &module->sysfs_data[i]);
13315 +
13316 +       if (!module->shared_directory && module->directory &&
13317 +                       strncmp(module->directory, "[ROOT]", 6))
13318 +               remove_toi_sysdir(module->dir_kobj);
13319 +
13320 +       switch (module->type) {
13321 +       case FILTER_MODULE:
13322 +               list_del(&module->type_list);
13323 +               toi_num_filters--;
13324 +               break;
13325 +       case WRITER_MODULE:
13326 +               list_del(&module->type_list);
13327 +               toiNumAllocators--;
13328 +               if (toiActiveAllocator == module) {
13329 +                       toiActiveAllocator = NULL;
13330 +                       clear_toi_state(TOI_CAN_RESUME);
13331 +                       clear_toi_state(TOI_CAN_HIBERNATE);
13332 +               }
13333 +               break;
13334 +       case MISC_MODULE:
13335 +       case MISC_HIDDEN_MODULE:
13336 +               break;
13337 +       default:
13338 +               printk(KERN_ERR "Module '%s' has an invalid type."
13339 +                       " It has been ignored.\n", module->name);
13340 +               return;
13341 +       }
13342 +       list_del(&module->module_list);
13343 +       toi_num_modules--;
13344 +}
13345 +EXPORT_SYMBOL_GPL(toi_unregister_module);
13346 +
13347 +/*
13348 + * toi_move_module_tail
13349 + *
13350 + * Rearrange modules when reloading the config.
13351 + */
13352 +void toi_move_module_tail(struct toi_module_ops *module)
13353 +{
13354 +       switch (module->type) {
13355 +       case FILTER_MODULE:
13356 +               if (toi_num_filters > 1)
13357 +                       list_move_tail(&module->type_list, &toi_filters);
13358 +               break;
13359 +       case WRITER_MODULE:
13360 +               if (toiNumAllocators > 1)
13361 +                       list_move_tail(&module->type_list, &toiAllocators);
13362 +               break;
13363 +       case MISC_MODULE:
13364 +       case MISC_HIDDEN_MODULE:
13365 +               break;
13366 +       default:
13367 +               printk(KERN_ERR "Module '%s' has an invalid type."
13368 +                       " It has been ignored.\n", module->name);
13369 +               return;
13370 +       }
13371 +       if ((toi_num_filters + toiNumAllocators) > 1)
13372 +               list_move_tail(&module->module_list, &toi_modules);
13373 +}
13374 +
13375 +/*
13376 + * toi_initialise_modules
13377 + *
13378 + * Get ready to do some work!
13379 + */
13380 +int toi_initialise_modules(int starting_cycle, int early)
13381 +{
13382 +       struct toi_module_ops *this_module;
13383 +       int result;
13384 +
13385 +       list_for_each_entry(this_module, &toi_modules, module_list) {
13386 +               this_module->header_requested = 0;
13387 +               this_module->header_used = 0;
13388 +               if (!this_module->enabled)
13389 +                       continue;
13390 +               if (this_module->early != early)
13391 +                       continue;
13392 +               if (this_module->initialise) {
13393 +                       toi_message(TOI_MEMORY, TOI_MEDIUM, 1,
13394 +                               "Initialising module %s.\n",
13395 +                               this_module->name);
13396 +                       result = this_module->initialise(starting_cycle);
13397 +                       if (result) {
13398 +                               toi_cleanup_modules(starting_cycle);
13399 +                               return result;
13400 +                       }
13401 +                       this_module->initialised = 1;
13402 +               }
13403 +       }
13404 +
13405 +       return 0;
13406 +}
13407 +
13408 +/*
13409 + * toi_cleanup_modules
13410 + *
13411 + * Tell modules the work is done.
13412 + */
13413 +void toi_cleanup_modules(int finishing_cycle)
13414 +{
13415 +       struct toi_module_ops *this_module;
13416 +
13417 +       list_for_each_entry(this_module, &toi_modules, module_list) {
13418 +               if (!this_module->enabled || !this_module->initialised)
13419 +                       continue;
13420 +               if (this_module->cleanup) {
13421 +                       toi_message(TOI_MEMORY, TOI_MEDIUM, 1,
13422 +                               "Cleaning up module %s.\n",
13423 +                               this_module->name);
13424 +                       this_module->cleanup(finishing_cycle);
13425 +               }
13426 +               this_module->initialised = 0;
13427 +       }
13428 +}
13429 +
13430 +/*
13431 + * toi_get_next_filter
13432 + *
13433 + * Get the next filter in the pipeline.
13434 + */
13435 +struct toi_module_ops *toi_get_next_filter(struct toi_module_ops *filter_sought)
13436 +{
13437 +       struct toi_module_ops *last_filter = NULL, *this_filter = NULL;
13438 +
13439 +       list_for_each_entry(this_filter, &toi_filters, type_list) {
13440 +               if (!this_filter->enabled)
13441 +                       continue;
13442 +               if ((last_filter == filter_sought) || (!filter_sought))
13443 +                       return this_filter;
13444 +               last_filter = this_filter;
13445 +       }
13446 +
13447 +       return toiActiveAllocator;
13448 +}
13449 +EXPORT_SYMBOL_GPL(toi_get_next_filter);
13450 +
13451 +/**
13452 + * toi_show_modules: Printk what support is loaded.
13453 + */
13454 +void toi_print_modules(void)
13455 +{
13456 +       struct toi_module_ops *this_module;
13457 +       int prev = 0;
13458 +
13459 +       printk(KERN_INFO "TuxOnIce " TOI_CORE_VERSION ", with support for");
13460 +
13461 +       list_for_each_entry(this_module, &toi_modules, module_list) {
13462 +               if (this_module->type == MISC_HIDDEN_MODULE)
13463 +                       continue;
13464 +               printk("%s %s%s%s", prev ? "," : "",
13465 +                               this_module->enabled ? "" : "[",
13466 +                               this_module->name,
13467 +                               this_module->enabled ? "" : "]");
13468 +               prev = 1;
13469 +       }
13470 +
13471 +       printk(".\n");
13472 +}
13473 +
13474 +/* toi_get_modules
13475 + *
13476 + * Take a reference to modules so they can't go away under us.
13477 + */
13478 +
13479 +int toi_get_modules(void)
13480 +{
13481 +       struct toi_module_ops *this_module;
13482 +
13483 +       list_for_each_entry(this_module, &toi_modules, module_list) {
13484 +               struct toi_module_ops *this_module2;
13485 +
13486 +               if (try_module_get(this_module->module))
13487 +                       continue;
13488 +
13489 +               /* Failed! Reverse gets and return error */
13490 +               list_for_each_entry(this_module2, &toi_modules,
13491 +                               module_list) {
13492 +                       if (this_module == this_module2)
13493 +                               return -EINVAL;
13494 +                       module_put(this_module2->module);
13495 +               }
13496 +       }
13497 +       return 0;
13498 +}
13499 +
13500 +/* toi_put_modules
13501 + *
13502 + * Release our references to modules we used.
13503 + */
13504 +
13505 +void toi_put_modules(void)
13506 +{
13507 +       struct toi_module_ops *this_module;
13508 +
13509 +       list_for_each_entry(this_module, &toi_modules, module_list)
13510 +               module_put(this_module->module);
13511 +}
13512 diff --git a/kernel/power/tuxonice_modules.h b/kernel/power/tuxonice_modules.h
13513 new file mode 100644
13514 index 0000000..79494e2
13515 --- /dev/null
13516 +++ b/kernel/power/tuxonice_modules.h
13517 @@ -0,0 +1,181 @@
13518 +/*
13519 + * kernel/power/tuxonice_modules.h
13520 + *
13521 + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net)
13522 + *
13523 + * This file is released under the GPLv2.
13524 + *
13525 + * It contains declarations for modules. Modules are additions to
13526 + * TuxOnIce that provide facilities such as image compression or
13527 + * encryption, backends for storage of the image and user interfaces.
13528 + *
13529 + */
13530 +
13531 +#ifndef TOI_MODULES_H
13532 +#define TOI_MODULES_H
13533 +
13534 +/* This is the maximum size we store in the image header for a module name */
13535 +#define TOI_MAX_MODULE_NAME_LENGTH 30
13536 +
13537 +/* Per-module metadata */
13538 +struct toi_module_header {
13539 +       char name[TOI_MAX_MODULE_NAME_LENGTH];
13540 +       int enabled;
13541 +       int type;
13542 +       int index;
13543 +       int data_length;
13544 +       unsigned long signature;
13545 +};
13546 +
13547 +enum {
13548 +       FILTER_MODULE,
13549 +       WRITER_MODULE,
13550 +       MISC_MODULE, /* Block writer, eg. */
13551 +       MISC_HIDDEN_MODULE,
13552 +};
13553 +
13554 +enum {
13555 +       TOI_ASYNC,
13556 +       TOI_SYNC
13557 +};
13558 +
13559 +struct toi_module_ops {
13560 +       /* Functions common to all modules */
13561 +       int type;
13562 +       char *name;
13563 +       char *directory;
13564 +       char *shared_directory;
13565 +       struct kobject *dir_kobj;
13566 +       struct module *module;
13567 +       int enabled, early, initialised;
13568 +       struct list_head module_list;
13569 +
13570 +       /* List of filters or allocators */
13571 +       struct list_head list, type_list;
13572 +
13573 +       /*
13574 +        * Requirements for memory and storage in
13575 +        * the image header..
13576 +        */
13577 +       int (*memory_needed) (void);
13578 +       int (*storage_needed) (void);
13579 +
13580 +       int header_requested, header_used;
13581 +
13582 +       int (*expected_compression) (void);
13583 +
13584 +       /*
13585 +        * Debug info
13586 +        */
13587 +       int (*print_debug_info) (char *buffer, int size);
13588 +       int (*save_config_info) (char *buffer);
13589 +       void (*load_config_info) (char *buffer, int len);
13590 +
13591 +       /*
13592 +        * Initialise & cleanup - general routines called
13593 +        * at the start and end of a cycle.
13594 +        */
13595 +       int (*initialise) (int starting_cycle);
13596 +       void (*cleanup) (int finishing_cycle);
13597 +
13598 +       /*
13599 +        * Calls for allocating storage (allocators only).
13600 +        *
13601 +        * Header space is requested separately and cannot fail, but the
13602 +        * reservation is only applied when main storage is allocated.
13603 +        * The header space reservation is thus always set prior to
13604 +        * requesting the allocation of storage - and prior to querying
13605 +        * how much storage is available.
13606 +        */
13607 +
13608 +       int (*storage_available) (void);
13609 +       void (*reserve_header_space) (int space_requested);
13610 +       int (*allocate_storage) (int space_requested);
13611 +       int (*storage_allocated) (void);
13612 +
13613 +       /*
13614 +        * Routines used in image I/O.
13615 +        */
13616 +       int (*rw_init) (int rw, int stream_number);
13617 +       int (*rw_cleanup) (int rw);
13618 +       int (*write_page) (unsigned long index, struct page *buffer_page,
13619 +                       unsigned int buf_size);
13620 +       int (*read_page) (unsigned long *index, struct page *buffer_page,
13621 +                       unsigned int *buf_size);
13622 +       int (*io_flusher) (int rw);
13623 +
13624 +       /* Reset module if image exists but reading aborted */
13625 +       void (*noresume_reset) (void);
13626 +
13627 +       /* Read and write the metadata */
13628 +       int (*write_header_init) (void);
13629 +       int (*write_header_cleanup) (void);
13630 +
13631 +       int (*read_header_init) (void);
13632 +       int (*read_header_cleanup) (void);
13633 +
13634 +       int (*rw_header_chunk) (int rw, struct toi_module_ops *owner,
13635 +                       char *buffer_start, int buffer_size);
13636 +
13637 +       int (*rw_header_chunk_noreadahead) (int rw,
13638 +                       struct toi_module_ops *owner, char *buffer_start,
13639 +                       int buffer_size);
13640 +
13641 +       /* Attempt to parse an image location */
13642 +       int (*parse_sig_location) (char *buffer, int only_writer, int quiet);
13643 +
13644 +       /* Throttle I/O according to throughput */
13645 +       void (*update_throughput_throttle) (int jif_index);
13646 +
13647 +       /* Flush outstanding I/O */
13648 +       int (*finish_all_io) (void);
13649 +
13650 +       /* Determine whether image exists that we can restore */
13651 +       int (*image_exists) (int quiet);
13652 +
13653 +       /* Mark the image as having tried to resume */
13654 +       int (*mark_resume_attempted) (int);
13655 +
13656 +       /* Destroy image if one exists */
13657 +       int (*remove_image) (void);
13658 +
13659 +       /* Sysfs Data */
13660 +       struct toi_sysfs_data *sysfs_data;
13661 +       int num_sysfs_entries;
13662 +};
13663 +
13664 +extern int toi_num_modules, toiNumAllocators;
13665 +
13666 +extern struct toi_module_ops *toiActiveAllocator;
13667 +extern struct list_head toi_filters, toiAllocators, toi_modules;
13668 +
13669 +extern void toi_prepare_console_modules(void);
13670 +extern void toi_cleanup_console_modules(void);
13671 +
13672 +extern struct toi_module_ops *toi_find_module_given_name(char *name);
13673 +extern struct toi_module_ops *toi_get_next_filter(struct toi_module_ops *);
13674 +
13675 +extern int toi_register_module(struct toi_module_ops *module);
13676 +extern void toi_move_module_tail(struct toi_module_ops *module);
13677 +
13678 +extern long toi_header_storage_for_modules(void);
13679 +extern long toi_memory_for_modules(int print_parts);
13680 +extern void print_toi_header_storage_for_modules(void);
13681 +extern int toi_expected_compression_ratio(void);
13682 +
13683 +extern int toi_print_module_debug_info(char *buffer, int buffer_size);
13684 +extern int toi_register_module(struct toi_module_ops *module);
13685 +extern void toi_unregister_module(struct toi_module_ops *module);
13686 +
13687 +extern int toi_initialise_modules(int starting_cycle, int early);
13688 +#define toi_initialise_modules_early(starting) \
13689 +       toi_initialise_modules(starting, 1)
13690 +#define toi_initialise_modules_late(starting) \
13691 +       toi_initialise_modules(starting, 0)
13692 +extern void toi_cleanup_modules(int finishing_cycle);
13693 +
13694 +extern void toi_print_modules(void);
13695 +
13696 +int toi_get_modules(void);
13697 +void toi_put_modules(void);
13698 +#endif
13699 diff --git a/kernel/power/tuxonice_netlink.c b/kernel/power/tuxonice_netlink.c
13700 new file mode 100644
13701 index 0000000..bb027a7
13702 --- /dev/null
13703 +++ b/kernel/power/tuxonice_netlink.c
13704 @@ -0,0 +1,343 @@
13705 +/*
13706 + * kernel/power/tuxonice_netlink.c
13707 + *
13708 + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net)
13709 + *
13710 + * This file is released under the GPLv2.
13711 + *
13712 + * Functions for communicating with a userspace helper via netlink.
13713 + */
13714 +
13715 +
13716 +#include <linux/suspend.h>
13717 +#include "tuxonice_netlink.h"
13718 +#include "tuxonice.h"
13719 +#include "tuxonice_modules.h"
13720 +#include "tuxonice_alloc.h"
13721 +
13722 +static struct user_helper_data *uhd_list;
13723 +
13724 +/*
13725 + * Refill our pool of SKBs for use in emergencies (eg, when eating memory and
13726 + * none can be allocated).
13727 + */
13728 +static void toi_fill_skb_pool(struct user_helper_data *uhd)
13729 +{
13730 +       while (uhd->pool_level < uhd->pool_limit) {
13731 +               struct sk_buff *new_skb =
13732 +                       alloc_skb(NLMSG_SPACE(uhd->skb_size), TOI_ATOMIC_GFP);
13733 +
13734 +               if (!new_skb)
13735 +                       break;
13736 +
13737 +               new_skb->next = uhd->emerg_skbs;
13738 +               uhd->emerg_skbs = new_skb;
13739 +               uhd->pool_level++;
13740 +       }
13741 +}
13742 +
13743 +/*
13744 + * Try to allocate a single skb. If we can't get one, try to use one from
13745 + * our pool.
13746 + */
13747 +static struct sk_buff *toi_get_skb(struct user_helper_data *uhd)
13748 +{
13749 +       struct sk_buff *skb =
13750 +               alloc_skb(NLMSG_SPACE(uhd->skb_size), TOI_ATOMIC_GFP);
13751 +
13752 +       if (skb)
13753 +               return skb;
13754 +
13755 +       skb = uhd->emerg_skbs;
13756 +       if (skb) {
13757 +               uhd->pool_level--;
13758 +               uhd->emerg_skbs = skb->next;
13759 +               skb->next = NULL;
13760 +       }
13761 +
13762 +       return skb;
13763 +}
13764 +
13765 +static void put_skb(struct user_helper_data *uhd, struct sk_buff *skb)
13766 +{
13767 +       if (uhd->pool_level < uhd->pool_limit) {
13768 +               skb->next = uhd->emerg_skbs;
13769 +               uhd->emerg_skbs = skb;
13770 +       } else
13771 +               kfree_skb(skb);
13772 +}
13773 +
13774 +void toi_send_netlink_message(struct user_helper_data *uhd,
13775 +               int type, void *params, size_t len)
13776 +{
13777 +       struct sk_buff *skb;
13778 +       struct nlmsghdr *nlh;
13779 +       void *dest;
13780 +       struct task_struct *t;
13781 +
13782 +       if (uhd->pid == -1)
13783 +               return;
13784 +
13785 +       if (uhd->debug)
13786 +               printk(KERN_ERR "toi_send_netlink_message: Send "
13787 +                               "message type %d.\n", type);
13788 +
13789 +       skb = toi_get_skb(uhd);
13790 +       if (!skb) {
13791 +               printk(KERN_INFO "toi_netlink: Can't allocate skb!\n");
13792 +               return;
13793 +       }
13794 +
13795 +       /* NLMSG_PUT contains a hidden goto nlmsg_failure */
13796 +       nlh = NLMSG_PUT(skb, 0, uhd->sock_seq, type, len);
13797 +       uhd->sock_seq++;
13798 +
13799 +       dest = NLMSG_DATA(nlh);
13800 +       if (params && len > 0)
13801 +               memcpy(dest, params, len);
13802 +
13803 +       netlink_unicast(uhd->nl, skb, uhd->pid, 0);
13804 +
13805 +       read_lock(&tasklist_lock);
13806 +       t = find_task_by_pid_type_ns(PIDTYPE_PID, uhd->pid, &init_pid_ns);
13807 +       if (!t) {
13808 +               read_unlock(&tasklist_lock);
13809 +               if (uhd->pid > -1)
13810 +                       printk(KERN_INFO "Hmm. Can't find the userspace task"
13811 +                               " %d.\n", uhd->pid);
13812 +               return;
13813 +       }
13814 +       wake_up_process(t);
13815 +       read_unlock(&tasklist_lock);
13816 +
13817 +       yield();
13818 +
13819 +       return;
13820 +
13821 +nlmsg_failure:
13822 +       if (skb)
13823 +               put_skb(uhd, skb);
13824 +
13825 +       if (uhd->debug)
13826 +               printk(KERN_ERR "toi_send_netlink_message: Failed to send "
13827 +                               "message type %d.\n", type);
13828 +}
13829 +EXPORT_SYMBOL_GPL(toi_send_netlink_message);
13830 +
13831 +static void send_whether_debugging(struct user_helper_data *uhd)
13832 +{
13833 +       static u8 is_debugging = 1;
13834 +
13835 +       toi_send_netlink_message(uhd, NETLINK_MSG_IS_DEBUGGING,
13836 +                       &is_debugging, sizeof(u8));
13837 +}
13838 +
13839 +/*
13840 + * Set the PF_NOFREEZE flag on the given process to ensure it can run whilst we
13841 + * are hibernating.
13842 + */
13843 +static int nl_set_nofreeze(struct user_helper_data *uhd, __u32 pid)
13844 +{
13845 +       struct task_struct *t;
13846 +
13847 +       if (uhd->debug)
13848 +               printk(KERN_ERR "nl_set_nofreeze for pid %d.\n", pid);
13849 +
13850 +       read_lock(&tasklist_lock);
13851 +       t = find_task_by_pid_type_ns(PIDTYPE_PID, pid, &init_pid_ns);
13852 +       if (!t) {
13853 +               read_unlock(&tasklist_lock);
13854 +               printk(KERN_INFO "Strange. Can't find the userspace task %d.\n",
13855 +                               pid);
13856 +               return -EINVAL;
13857 +       }
13858 +
13859 +       t->flags |= PF_NOFREEZE;
13860 +
13861 +       read_unlock(&tasklist_lock);
13862 +       uhd->pid = pid;
13863 +
13864 +       toi_send_netlink_message(uhd, NETLINK_MSG_NOFREEZE_ACK, NULL, 0);
13865 +
13866 +       return 0;
13867 +}
13868 +
13869 +/*
13870 + * Called when the userspace process has informed us that it's ready to roll.
13871 + */
13872 +static int nl_ready(struct user_helper_data *uhd, u32 version)
13873 +{
13874 +       if (version != uhd->interface_version) {
13875 +               printk(KERN_INFO "%s userspace process using invalid interface"
13876 +                               " version (%d - kernel wants %d). Trying to "
13877 +                               "continue without it.\n",
13878 +                               uhd->name, version, uhd->interface_version);
13879 +               if (uhd->not_ready)
13880 +                       uhd->not_ready();
13881 +               return -EINVAL;
13882 +       }
13883 +
13884 +       complete(&uhd->wait_for_process);
13885 +
13886 +       return 0;
13887 +}
13888 +
13889 +void toi_netlink_close_complete(struct user_helper_data *uhd)
13890 +{
13891 +       if (uhd->nl) {
13892 +               netlink_kernel_release(uhd->nl);
13893 +               uhd->nl = NULL;
13894 +       }
13895 +
13896 +       while (uhd->emerg_skbs) {
13897 +               struct sk_buff *next = uhd->emerg_skbs->next;
13898 +               kfree_skb(uhd->emerg_skbs);
13899 +               uhd->emerg_skbs = next;
13900 +       }
13901 +
13902 +       uhd->pid = -1;
13903 +}
13904 +EXPORT_SYMBOL_GPL(toi_netlink_close_complete);
13905 +
13906 +static int toi_nl_gen_rcv_msg(struct user_helper_data *uhd,
13907 +               struct sk_buff *skb, struct nlmsghdr *nlh)
13908 +{
13909 +       int type = nlh->nlmsg_type;
13910 +       int *data;
13911 +       int err;
13912 +
13913 +       if (uhd->debug)
13914 +               printk(KERN_ERR "toi_user_rcv_skb: Received message %d.\n",
13915 +                               type);
13916 +
13917 +       /* Let the more specific handler go first. It returns
13918 +        * 1 for valid messages that it doesn't know. */
13919 +       err = uhd->rcv_msg(skb, nlh);
13920 +       if (err != 1)
13921 +               return err;
13922 +
13923 +       /* Only allow one task to receive NOFREEZE privileges */
13924 +       if (type == NETLINK_MSG_NOFREEZE_ME && uhd->pid != -1) {
13925 +               printk(KERN_INFO "Received extra nofreeze me requests.\n");
13926 +               return -EBUSY;
13927 +       }
13928 +
13929 +       data = NLMSG_DATA(nlh);
13930 +
13931 +       switch (type) {
13932 +       case NETLINK_MSG_NOFREEZE_ME:
13933 +               return nl_set_nofreeze(uhd, nlh->nlmsg_pid);
13934 +       case NETLINK_MSG_GET_DEBUGGING:
13935 +               send_whether_debugging(uhd);
13936 +               return 0;
13937 +       case NETLINK_MSG_READY:
13938 +               if (nlh->nlmsg_len != NLMSG_LENGTH(sizeof(u32))) {
13939 +                       printk(KERN_INFO "Invalid ready mesage.\n");
13940 +                       if (uhd->not_ready)
13941 +                               uhd->not_ready();
13942 +                       return -EINVAL;
13943 +               }
13944 +               return nl_ready(uhd, (u32) *data);
13945 +       case NETLINK_MSG_CLEANUP:
13946 +               toi_netlink_close_complete(uhd);
13947 +               return 0;
13948 +       }
13949 +
13950 +       return -EINVAL;
13951 +}
13952 +
13953 +static void toi_user_rcv_skb(struct sk_buff *skb)
13954 +{
13955 +       int err;
13956 +       struct nlmsghdr *nlh;
13957 +       struct user_helper_data *uhd = uhd_list;
13958 +
13959 +       while (uhd && uhd->netlink_id != skb->sk->sk_protocol)
13960 +               uhd = uhd->next;
13961 +
13962 +       if (!uhd)
13963 +               return;
13964 +
13965 +       while (skb->len >= NLMSG_SPACE(0)) {
13966 +               u32 rlen;
13967 +
13968 +               nlh = (struct nlmsghdr *) skb->data;
13969 +               if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
13970 +                       return;
13971 +
13972 +               rlen = NLMSG_ALIGN(nlh->nlmsg_len);
13973 +               if (rlen > skb->len)
13974 +                       rlen = skb->len;
13975 +
13976 +               err = toi_nl_gen_rcv_msg(uhd, skb, nlh);
13977 +               if (err)
13978 +                       netlink_ack(skb, nlh, err);
13979 +               else if (nlh->nlmsg_flags & NLM_F_ACK)
13980 +                       netlink_ack(skb, nlh, 0);
13981 +               skb_pull(skb, rlen);
13982 +       }
13983 +}
13984 +
13985 +static int netlink_prepare(struct user_helper_data *uhd)
13986 +{
13987 +       uhd->next = uhd_list;
13988 +       uhd_list = uhd;
13989 +
13990 +       uhd->sock_seq = 0x42c0ffee;
13991 +       uhd->nl = netlink_kernel_create(&init_net, uhd->netlink_id, 0,
13992 +                       toi_user_rcv_skb, NULL, THIS_MODULE);
13993 +       if (!uhd->nl) {
13994 +               printk(KERN_INFO "Failed to allocate netlink socket for %s.\n",
13995 +                               uhd->name);
13996 +               return -ENOMEM;
13997 +       }
13998 +
13999 +       toi_fill_skb_pool(uhd);
14000 +
14001 +       return 0;
14002 +}
14003 +
14004 +void toi_netlink_close(struct user_helper_data *uhd)
14005 +{
14006 +       struct task_struct *t;
14007 +
14008 +       read_lock(&tasklist_lock);
14009 +       t = find_task_by_pid_type_ns(PIDTYPE_PID, uhd->pid, &init_pid_ns);
14010 +       if (t)
14011 +               t->flags &= ~PF_NOFREEZE;
14012 +       read_unlock(&tasklist_lock);
14013 +
14014 +       toi_send_netlink_message(uhd, NETLINK_MSG_CLEANUP, NULL, 0);
14015 +}
14016 +EXPORT_SYMBOL_GPL(toi_netlink_close);
14017 +
14018 +int toi_netlink_setup(struct user_helper_data *uhd)
14019 +{
14020 +       /* In case userui didn't cleanup properly on us */
14021 +       toi_netlink_close_complete(uhd);
14022 +
14023 +       if (netlink_prepare(uhd) < 0) {
14024 +               printk(KERN_INFO "Netlink prepare failed.\n");
14025 +               return 1;
14026 +       }
14027 +
14028 +       if (toi_launch_userspace_program(uhd->program, uhd->netlink_id,
14029 +                               UMH_WAIT_EXEC, uhd->debug) < 0) {
14030 +               printk(KERN_INFO "Launch userspace program failed.\n");
14031 +               toi_netlink_close_complete(uhd);
14032 +               return 1;
14033 +       }
14034 +
14035 +       /* Wait 2 seconds for the userspace process to make contact */
14036 +       wait_for_completion_timeout(&uhd->wait_for_process, 2*HZ);
14037 +
14038 +       if (uhd->pid == -1) {
14039 +               printk(KERN_INFO "%s: Failed to contact userspace process.\n",
14040 +                               uhd->name);
14041 +               toi_netlink_close_complete(uhd);
14042 +               return 1;
14043 +       }
14044 +
14045 +       return 0;
14046 +}
14047 +EXPORT_SYMBOL_GPL(toi_netlink_setup);
14048 diff --git a/kernel/power/tuxonice_netlink.h b/kernel/power/tuxonice_netlink.h
14049 new file mode 100644
14050 index 0000000..37e174b
14051 --- /dev/null
14052 +++ b/kernel/power/tuxonice_netlink.h
14053 @@ -0,0 +1,62 @@
14054 +/*
14055 + * kernel/power/tuxonice_netlink.h
14056 + *
14057 + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net)
14058 + *
14059 + * This file is released under the GPLv2.
14060 + *
14061 + * Declarations for functions for communicating with a userspace helper
14062 + * via netlink.
14063 + */
14064 +
14065 +#include <linux/netlink.h>
14066 +#include <net/sock.h>
14067 +
14068 +#define NETLINK_MSG_BASE 0x10
14069 +
14070 +#define NETLINK_MSG_READY 0x10
14071 +#define        NETLINK_MSG_NOFREEZE_ME 0x16
14072 +#define NETLINK_MSG_GET_DEBUGGING 0x19
14073 +#define NETLINK_MSG_CLEANUP 0x24
14074 +#define NETLINK_MSG_NOFREEZE_ACK 0x27
14075 +#define NETLINK_MSG_IS_DEBUGGING 0x28
14076 +
14077 +struct user_helper_data {
14078 +       int (*rcv_msg) (struct sk_buff *skb, struct nlmsghdr *nlh);
14079 +       void (*not_ready) (void);
14080 +       struct sock *nl;
14081 +       u32 sock_seq;
14082 +       pid_t pid;
14083 +       char *comm;
14084 +       char program[256];
14085 +       int pool_level;
14086 +       int pool_limit;
14087 +       struct sk_buff *emerg_skbs;
14088 +       int skb_size;
14089 +       int netlink_id;
14090 +       char *name;
14091 +       struct user_helper_data *next;
14092 +       struct completion wait_for_process;
14093 +       u32 interface_version;
14094 +       int must_init;
14095 +       int debug;
14096 +};
14097 +
14098 +#ifdef CONFIG_NET
14099 +int toi_netlink_setup(struct user_helper_data *uhd);
14100 +void toi_netlink_close(struct user_helper_data *uhd);
14101 +void toi_send_netlink_message(struct user_helper_data *uhd,
14102 +               int type, void *params, size_t len);
14103 +void toi_netlink_close_complete(struct user_helper_data *uhd);
14104 +#else
14105 +static inline int toi_netlink_setup(struct user_helper_data *uhd)
14106 +{
14107 +       return 0;
14108 +}
14109 +
14110 +static inline void toi_netlink_close(struct user_helper_data *uhd) { };
14111 +static inline void toi_send_netlink_message(struct user_helper_data *uhd,
14112 +               int type, void *params, size_t len) { };
14113 +static inline void toi_netlink_close_complete(struct user_helper_data *uhd)
14114 +       { };
14115 +#endif
14116 diff --git a/kernel/power/tuxonice_pagedir.c b/kernel/power/tuxonice_pagedir.c
14117 new file mode 100644
14118 index 0000000..92c1e5e
14119 --- /dev/null
14120 +++ b/kernel/power/tuxonice_pagedir.c
14121 @@ -0,0 +1,380 @@
14122 +/*
14123 + * kernel/power/tuxonice_pagedir.c
14124 + *
14125 + * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
14126 + * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz>
14127 + * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr>
14128 + * Copyright (C) 2006-2008 Nigel Cunningham (nigel at tuxonice net)
14129 + *
14130 + * This file is released under the GPLv2.
14131 + *
14132 + * Routines for handling pagesets.
14133 + * Note that pbes aren't actually stored as such. They're stored as
14134 + * bitmaps and extents.
14135 + */
14136 +
14137 +#include <linux/suspend.h>
14138 +#include <linux/highmem.h>
14139 +#include <linux/bootmem.h>
14140 +#include <linux/hardirq.h>
14141 +#include <linux/sched.h>
14142 +#include <linux/cpu.h>
14143 +#include <asm/tlbflush.h>
14144 +
14145 +#include "tuxonice_pageflags.h"
14146 +#include "tuxonice_ui.h"
14147 +#include "tuxonice_pagedir.h"
14148 +#include "tuxonice_prepare_image.h"
14149 +#include "tuxonice.h"
14150 +#include "tuxonice_builtin.h"
14151 +#include "tuxonice_alloc.h"
14152 +
14153 +static int ptoi_pfn;
14154 +static struct pbe *this_low_pbe;
14155 +static struct pbe **last_low_pbe_ptr;
14156 +static struct memory_bitmap dup_map1, dup_map2;
14157 +
14158 +void toi_reset_alt_image_pageset2_pfn(void)
14159 +{
14160 +       memory_bm_position_reset(pageset2_map);
14161 +}
14162 +
14163 +static struct page *first_conflicting_page;
14164 +
14165 +/*
14166 + * free_conflicting_pages
14167 + */
14168 +
14169 +static void free_conflicting_pages(void)
14170 +{
14171 +       while (first_conflicting_page) {
14172 +               struct page *next =
14173 +                       *((struct page **) kmap(first_conflicting_page));
14174 +               kunmap(first_conflicting_page);
14175 +               toi__free_page(29, first_conflicting_page);
14176 +               first_conflicting_page = next;
14177 +       }
14178 +}
14179 +
14180 +/* __toi_get_nonconflicting_page
14181 + *
14182 + * Description: Gets order zero pages that won't be overwritten
14183 + *             while copying the original pages.
14184 + */
14185 +
14186 +struct page *___toi_get_nonconflicting_page(int can_be_highmem)
14187 +{
14188 +       struct page *page;
14189 +       gfp_t flags = TOI_ATOMIC_GFP;
14190 +       if (can_be_highmem)
14191 +               flags |= __GFP_HIGHMEM;
14192 +
14193 +
14194 +       if (test_toi_state(TOI_LOADING_ALT_IMAGE) &&
14195 +                       pageset2_map &&
14196 +                       (ptoi_pfn != BM_END_OF_MAP)) {
14197 +               do {
14198 +                       ptoi_pfn = memory_bm_next_pfn(pageset2_map);
14199 +                       if (ptoi_pfn != BM_END_OF_MAP) {
14200 +                               page = pfn_to_page(ptoi_pfn);
14201 +                               if (!PagePageset1(page) &&
14202 +                                   (can_be_highmem || !PageHighMem(page)))
14203 +                                       return page;
14204 +                       }
14205 +               } while (ptoi_pfn != BM_END_OF_MAP);
14206 +       }
14207 +
14208 +       do {
14209 +               page = toi_alloc_page(29, flags);
14210 +               if (!page) {
14211 +                       printk(KERN_INFO "Failed to get nonconflicting "
14212 +                                       "page.\n");
14213 +                       return NULL;
14214 +               }
14215 +               if (PagePageset1(page)) {
14216 +                       struct page **next = (struct page **) kmap(page);
14217 +                       *next = first_conflicting_page;
14218 +                       first_conflicting_page = page;
14219 +                       kunmap(page);
14220 +               }
14221 +       } while (PagePageset1(page));
14222 +
14223 +       return page;
14224 +}
14225 +
14226 +unsigned long __toi_get_nonconflicting_page(void)
14227 +{
14228 +       struct page *page = ___toi_get_nonconflicting_page(0);
14229 +       return page ? (unsigned long) page_address(page) : 0;
14230 +}
14231 +
14232 +static struct pbe *get_next_pbe(struct page **page_ptr, struct pbe *this_pbe,
14233 +               int highmem)
14234 +{
14235 +       if (((((unsigned long) this_pbe) & (PAGE_SIZE - 1))
14236 +                    + 2 * sizeof(struct pbe)) > PAGE_SIZE) {
14237 +               struct page *new_page =
14238 +                       ___toi_get_nonconflicting_page(highmem);
14239 +               if (!new_page)
14240 +                       return ERR_PTR(-ENOMEM);
14241 +               this_pbe = (struct pbe *) kmap(new_page);
14242 +               memset(this_pbe, 0, PAGE_SIZE);
14243 +               *page_ptr = new_page;
14244 +       } else
14245 +               this_pbe++;
14246 +
14247 +       return this_pbe;
14248 +}
14249 +
14250 +/**
14251 + * get_pageset1_load_addresses - generate pbes for conflicting pages
14252 + *
14253 + * We check here that pagedir & pages it points to won't collide
14254 + * with pages where we're going to restore from the loaded pages
14255 + * later.
14256 + *
14257 + * Returns:
14258 + *     Zero on success, one if couldn't find enough pages (shouldn't
14259 + *     happen).
14260 + **/
14261 +int toi_get_pageset1_load_addresses(void)
14262 +{
14263 +       int pfn, highallocd = 0, lowallocd = 0;
14264 +       int low_needed = pagedir1.size - get_highmem_size(pagedir1);
14265 +       int high_needed = get_highmem_size(pagedir1);
14266 +       int low_pages_for_highmem = 0;
14267 +       gfp_t flags = GFP_ATOMIC | __GFP_NOWARN | __GFP_HIGHMEM;
14268 +       struct page *page, *high_pbe_page = NULL, *last_high_pbe_page = NULL,
14269 +                   *low_pbe_page;
14270 +       struct pbe **last_high_pbe_ptr = &restore_highmem_pblist,
14271 +                  *this_high_pbe = NULL;
14272 +       int orig_low_pfn, orig_high_pfn;
14273 +       int high_pbes_done = 0, low_pbes_done = 0;
14274 +       int low_direct = 0, high_direct = 0;
14275 +       int high_to_free, low_to_free, result = 0;
14276 +
14277 +       /*
14278 +        * We are about to allocate all available memory, and processes
14279 +        * might not have finished freezing yet. To avoid potential OOMs,
14280 +        * disable non boot cpus and do this with IRQs disabled
14281 +        */
14282 +
14283 +       disable_nonboot_cpus();
14284 +       local_irq_disable();
14285 +
14286 +       /*
14287 +        * We need to duplicate pageset1's map because memory_bm_next_pfn's
14288 +        * state gets stomped on by the PagePageset1() test in setup_pbes.
14289 +        */
14290 +       memory_bm_create(&dup_map1, GFP_ATOMIC, 0);
14291 +       memory_bm_dup(pageset1_map, &dup_map1);
14292 +
14293 +       memory_bm_create(&dup_map2, GFP_ATOMIC, 0);
14294 +       memory_bm_dup(pageset1_map, &dup_map2);
14295 +
14296 +       memory_bm_position_reset(pageset1_map);
14297 +       memory_bm_position_reset(&dup_map1);
14298 +       memory_bm_position_reset(&dup_map2);
14299 +
14300 +       last_low_pbe_ptr = &restore_pblist;
14301 +
14302 +       /* First, allocate pages for the start of our pbe lists. */
14303 +       if (high_needed) {
14304 +               high_pbe_page = ___toi_get_nonconflicting_page(1);
14305 +               if (!high_pbe_page) {
14306 +                       result = -ENOMEM;
14307 +                       goto out;
14308 +               }
14309 +               this_high_pbe = (struct pbe *) kmap(high_pbe_page);
14310 +               memset(this_high_pbe, 0, PAGE_SIZE);
14311 +       }
14312 +
14313 +       low_pbe_page = ___toi_get_nonconflicting_page(0);
14314 +       if (!low_pbe_page) {
14315 +               result = -ENOMEM;
14316 +               goto out;
14317 +       }
14318 +       this_low_pbe = (struct pbe *) page_address(low_pbe_page);
14319 +
14320 +       /*
14321 +        * Next, allocate all possible memory to find where we can
14322 +        * load data directly into destination pages. I'd like to do
14323 +        * this in bigger chunks, but then we can't free pages
14324 +        * individually later.
14325 +        */
14326 +
14327 +       do {
14328 +               page = toi_alloc_page(30, flags);
14329 +               if (page)
14330 +                       SetPagePageset1Copy(page);
14331 +       } while (page);
14332 +
14333 +       /*
14334 +        * Find out how many high- and lowmem pages we allocated above,
14335 +        * and how many pages we can reload directly to their original
14336 +        * location.
14337 +        */
14338 +       memory_bm_position_reset(pageset1_copy_map);
14339 +       for (pfn = memory_bm_next_pfn(pageset1_copy_map); pfn != BM_END_OF_MAP;
14340 +                       pfn = memory_bm_next_pfn(pageset1_copy_map)) {
14341 +               int is_high;
14342 +               page = pfn_to_page(pfn);
14343 +               is_high = PageHighMem(page);
14344 +
14345 +               if (PagePageset1(page)) {
14346 +                       if (test_action_state(TOI_NO_DIRECT_LOAD)) {
14347 +                               ClearPagePageset1Copy(page);
14348 +                               toi__free_page(30, page);
14349 +                               continue;
14350 +                       } else {
14351 +                               if (is_high)
14352 +                                       high_direct++;
14353 +                               else
14354 +                                       low_direct++;
14355 +                       }
14356 +               } else {
14357 +                       if (is_high)
14358 +                               highallocd++;
14359 +                       else
14360 +                               lowallocd++;
14361 +               }
14362 +       }
14363 +
14364 +       high_needed -= high_direct;
14365 +       low_needed -= low_direct;
14366 +
14367 +       /*
14368 +        * Do we need to use some lowmem pages for the copies of highmem
14369 +        * pages?
14370 +        */
14371 +       if (high_needed > highallocd) {
14372 +               low_pages_for_highmem = high_needed - highallocd;
14373 +               high_needed -= low_pages_for_highmem;
14374 +               low_needed += low_pages_for_highmem;
14375 +       }
14376 +
14377 +       high_to_free = highallocd - high_needed;
14378 +       low_to_free = lowallocd - low_needed;
14379 +
14380 +       /*
14381 +        * Now generate our pbes (which will be used for the atomic restore),
14382 +        * and free unneeded pages.
14383 +        */
14384 +       memory_bm_position_reset(pageset1_copy_map);
14385 +       for (pfn = memory_bm_next_pfn(pageset1_copy_map); pfn != BM_END_OF_MAP;
14386 +                       pfn = memory_bm_next_pfn(pageset1_copy_map)) {
14387 +               int is_high;
14388 +               page = pfn_to_page(pfn);
14389 +               is_high = PageHighMem(page);
14390 +
14391 +               if (PagePageset1(page))
14392 +                       continue;
14393 +
14394 +               /* Free the page? */
14395 +               if ((is_high && high_to_free) ||
14396 +                   (!is_high && low_to_free)) {
14397 +                       ClearPagePageset1Copy(page);
14398 +                       toi__free_page(30, page);
14399 +                       if (is_high)
14400 +                               high_to_free--;
14401 +                       else
14402 +                               low_to_free--;
14403 +                       continue;
14404 +               }
14405 +
14406 +               /* Nope. We're going to use this page. Add a pbe. */
14407 +               if (is_high || low_pages_for_highmem) {
14408 +                       struct page *orig_page;
14409 +                       high_pbes_done++;
14410 +                       if (!is_high)
14411 +                               low_pages_for_highmem--;
14412 +                       do {
14413 +                               orig_high_pfn = memory_bm_next_pfn(&dup_map1);
14414 +                               BUG_ON(orig_high_pfn == BM_END_OF_MAP);
14415 +                               orig_page = pfn_to_page(orig_high_pfn);
14416 +                       } while (!PageHighMem(orig_page) ||
14417 +                                       load_direct(orig_page));
14418 +
14419 +                       this_high_pbe->orig_address = orig_page;
14420 +                       this_high_pbe->address = page;
14421 +                       this_high_pbe->next = NULL;
14422 +                       if (last_high_pbe_page != high_pbe_page) {
14423 +                               *last_high_pbe_ptr =
14424 +                                       (struct pbe *) high_pbe_page;
14425 +                               if (!last_high_pbe_page)
14426 +                                       last_high_pbe_page = high_pbe_page;
14427 +                       } else
14428 +                               *last_high_pbe_ptr = this_high_pbe;
14429 +                       last_high_pbe_ptr = &this_high_pbe->next;
14430 +                       if (last_high_pbe_page != high_pbe_page) {
14431 +                               kunmap(last_high_pbe_page);
14432 +                               last_high_pbe_page = high_pbe_page;
14433 +                       }
14434 +                       this_high_pbe = get_next_pbe(&high_pbe_page,
14435 +                                       this_high_pbe, 1);
14436 +                       if (IS_ERR(this_high_pbe)) {
14437 +                               printk(KERN_INFO
14438 +                                               "This high pbe is an error.\n");
14439 +                               return -ENOMEM;
14440 +                       }
14441 +               } else {
14442 +                       struct page *orig_page;
14443 +                       low_pbes_done++;
14444 +                       do {
14445 +                               orig_low_pfn = memory_bm_next_pfn(&dup_map2);
14446 +                               BUG_ON(orig_low_pfn == BM_END_OF_MAP);
14447 +                               orig_page = pfn_to_page(orig_low_pfn);
14448 +                       } while (PageHighMem(orig_page) ||
14449 +                                       load_direct(orig_page));
14450 +
14451 +                       this_low_pbe->orig_address = page_address(orig_page);
14452 +                       this_low_pbe->address = page_address(page);
14453 +                       this_low_pbe->next = NULL;
14454 +                       *last_low_pbe_ptr = this_low_pbe;
14455 +                       last_low_pbe_ptr = &this_low_pbe->next;
14456 +                       this_low_pbe = get_next_pbe(&low_pbe_page,
14457 +                                       this_low_pbe, 0);
14458 +                       if (IS_ERR(this_low_pbe)) {
14459 +                               printk(KERN_INFO "this_low_pbe is an error.\n");
14460 +                               return -ENOMEM;
14461 +                       }
14462 +               }
14463 +       }
14464 +
14465 +       if (high_pbe_page)
14466 +               kunmap(high_pbe_page);
14467 +
14468 +       if (last_high_pbe_page != high_pbe_page) {
14469 +               if (last_high_pbe_page)
14470 +                       kunmap(last_high_pbe_page);
14471 +               toi__free_page(29, high_pbe_page);
14472 +       }
14473 +
14474 +       free_conflicting_pages();
14475 +
14476 +out:
14477 +       memory_bm_free(&dup_map1, 0);
14478 +       memory_bm_free(&dup_map2, 0);
14479 +
14480 +       local_irq_enable();
14481 +       enable_nonboot_cpus();
14482 +
14483 +       return result;
14484 +}
14485 +
14486 +int add_boot_kernel_data_pbe(void)
14487 +{
14488 +       this_low_pbe->address = (char *) __toi_get_nonconflicting_page();
14489 +       if (!this_low_pbe->address) {
14490 +               printk(KERN_INFO "Failed to get bkd atomic restore buffer.");
14491 +               return -ENOMEM;
14492 +       }
14493 +
14494 +       toi_bkd.size = sizeof(toi_bkd);
14495 +       memcpy(this_low_pbe->address, &toi_bkd, sizeof(toi_bkd));
14496 +
14497 +       *last_low_pbe_ptr = this_low_pbe;
14498 +       this_low_pbe->orig_address = (char *) boot_kernel_data_buffer;
14499 +       this_low_pbe->next = NULL;
14500 +       return 0;
14501 +}
14502 diff --git a/kernel/power/tuxonice_pagedir.h b/kernel/power/tuxonice_pagedir.h
14503 new file mode 100644
14504 index 0000000..9d0d929
14505 --- /dev/null
14506 +++ b/kernel/power/tuxonice_pagedir.h
14507 @@ -0,0 +1,50 @@
14508 +/*
14509 + * kernel/power/tuxonice_pagedir.h
14510 + *
14511 + * Copyright (C) 2006-2008 Nigel Cunningham (nigel at tuxonice net)
14512 + *
14513 + * This file is released under the GPLv2.
14514 + *
14515 + * Declarations for routines for handling pagesets.
14516 + */
14517 +
14518 +#ifndef KERNEL_POWER_PAGEDIR_H
14519 +#define KERNEL_POWER_PAGEDIR_H
14520 +
14521 +/* Pagedir
14522 + *
14523 + * Contains the metadata for a set of pages saved in the image.
14524 + */
14525 +
14526 +struct pagedir {
14527 +       int id;
14528 +       long size;
14529 +#ifdef CONFIG_HIGHMEM
14530 +       long size_high;
14531 +#endif
14532 +};
14533 +
14534 +#ifdef CONFIG_HIGHMEM
14535 +#define get_highmem_size(pagedir) (pagedir.size_high)
14536 +#define set_highmem_size(pagedir, sz) do { pagedir.size_high = sz; } while (0)
14537 +#define inc_highmem_size(pagedir) do { pagedir.size_high++; } while (0)
14538 +#define get_lowmem_size(pagedir) (pagedir.size - pagedir.size_high)
14539 +#else
14540 +#define get_highmem_size(pagedir) (0)
14541 +#define set_highmem_size(pagedir, sz) do { } while (0)
14542 +#define inc_highmem_size(pagedir) do { } while (0)
14543 +#define get_lowmem_size(pagedir) (pagedir.size)
14544 +#endif
14545 +
14546 +extern struct pagedir pagedir1, pagedir2;
14547 +
14548 +extern void toi_copy_pageset1(void);
14549 +
14550 +extern int toi_get_pageset1_load_addresses(void);
14551 +
14552 +extern unsigned long __toi_get_nonconflicting_page(void);
14553 +struct page *___toi_get_nonconflicting_page(int can_be_highmem);
14554 +
14555 +extern void toi_reset_alt_image_pageset2_pfn(void);
14556 +extern int add_boot_kernel_data_pbe(void);
14557 +#endif
14558 diff --git a/kernel/power/tuxonice_pageflags.c b/kernel/power/tuxonice_pageflags.c
14559 new file mode 100644
14560 index 0000000..626e5df
14561 --- /dev/null
14562 +++ b/kernel/power/tuxonice_pageflags.c
14563 @@ -0,0 +1,28 @@
14564 +/*
14565 + * kernel/power/tuxonice_pageflags.c
14566 + *
14567 + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net)
14568 + *
14569 + * This file is released under the GPLv2.
14570 + *
14571 + * Routines for serialising and relocating pageflags in which we
14572 + * store our image metadata.
14573 + */
14574 +
14575 +#include <linux/list.h>
14576 +#include "tuxonice_pageflags.h"
14577 +#include "power.h"
14578 +
14579 +int toi_pageflags_space_needed(void)
14580 +{
14581 +       int total = 0;
14582 +       struct bm_block *bb;
14583 +
14584 +       total = sizeof(unsigned int);
14585 +
14586 +       list_for_each_entry(bb, &pageset1_map->blocks, hook)
14587 +               total += 2 * sizeof(unsigned long) + PAGE_SIZE;
14588 +
14589 +       return total;
14590 +}
14591 +EXPORT_SYMBOL_GPL(toi_pageflags_space_needed);
14592 diff --git a/kernel/power/tuxonice_pageflags.h b/kernel/power/tuxonice_pageflags.h
14593 new file mode 100644
14594 index 0000000..610625d
14595 --- /dev/null
14596 +++ b/kernel/power/tuxonice_pageflags.h
14597 @@ -0,0 +1,72 @@
14598 +/*
14599 + * kernel/power/tuxonice_pageflags.h
14600 + *
14601 + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net)
14602 + *
14603 + * This file is released under the GPLv2.
14604 + */
14605 +
14606 +#ifndef KERNEL_POWER_TUXONICE_PAGEFLAGS_H
14607 +#define KERNEL_POWER_TUXONICE_PAGEFLAGS_H
14608 +
14609 +extern struct memory_bitmap *pageset1_map;
14610 +extern struct memory_bitmap *pageset1_copy_map;
14611 +extern struct memory_bitmap *pageset2_map;
14612 +extern struct memory_bitmap *page_resave_map;
14613 +extern struct memory_bitmap *io_map;
14614 +extern struct memory_bitmap *nosave_map;
14615 +extern struct memory_bitmap *free_map;
14616 +
14617 +#define PagePageset1(page) \
14618 +       (memory_bm_test_bit(pageset1_map, page_to_pfn(page)))
14619 +#define SetPagePageset1(page) \
14620 +       (memory_bm_set_bit(pageset1_map, page_to_pfn(page)))
14621 +#define ClearPagePageset1(page) \
14622 +       (memory_bm_clear_bit(pageset1_map, page_to_pfn(page)))
14623 +
14624 +#define PagePageset1Copy(page) \
14625 +       (memory_bm_test_bit(pageset1_copy_map, page_to_pfn(page)))
14626 +#define SetPagePageset1Copy(page) \
14627 +       (memory_bm_set_bit(pageset1_copy_map, page_to_pfn(page)))
14628 +#define ClearPagePageset1Copy(page) \
14629 +       (memory_bm_clear_bit(pageset1_copy_map, page_to_pfn(page)))
14630 +
14631 +#define PagePageset2(page) \
14632 +       (memory_bm_test_bit(pageset2_map, page_to_pfn(page)))
14633 +#define SetPagePageset2(page) \
14634 +       (memory_bm_set_bit(pageset2_map, page_to_pfn(page)))
14635 +#define ClearPagePageset2(page) \
14636 +       (memory_bm_clear_bit(pageset2_map, page_to_pfn(page)))
14637 +
14638 +#define PageWasRW(page) \
14639 +       (memory_bm_test_bit(pageset2_map, page_to_pfn(page)))
14640 +#define SetPageWasRW(page) \
14641 +       (memory_bm_set_bit(pageset2_map, page_to_pfn(page)))
14642 +#define ClearPageWasRW(page) \
14643 +       (memory_bm_clear_bit(pageset2_map, page_to_pfn(page)))
14644 +
14645 +#define PageResave(page) (page_resave_map ? \
14646 +       memory_bm_test_bit(page_resave_map, page_to_pfn(page)) : 0)
14647 +#define SetPageResave(page) \
14648 +       (memory_bm_set_bit(page_resave_map, page_to_pfn(page)))
14649 +#define ClearPageResave(page) \
14650 +       (memory_bm_clear_bit(page_resave_map, page_to_pfn(page)))
14651 +
14652 +#define PageNosave(page) (nosave_map ? \
14653 +               memory_bm_test_bit(nosave_map, page_to_pfn(page)) : 0)
14654 +#define SetPageNosave(page) \
14655 +       (memory_bm_set_bit(nosave_map, page_to_pfn(page)))
14656 +#define ClearPageNosave(page) \
14657 +       (memory_bm_clear_bit(nosave_map, page_to_pfn(page)))
14658 +
14659 +#define PageNosaveFree(page) (free_map ? \
14660 +               memory_bm_test_bit(free_map, page_to_pfn(page)) : 0)
14661 +#define SetPageNosaveFree(page) \
14662 +       (memory_bm_set_bit(free_map, page_to_pfn(page)))
14663 +#define ClearPageNosaveFree(page) \
14664 +       (memory_bm_clear_bit(free_map, page_to_pfn(page)))
14665 +
14666 +extern void save_pageflags(struct memory_bitmap *pagemap);
14667 +extern int load_pageflags(struct memory_bitmap *pagemap);
14668 +extern int toi_pageflags_space_needed(void);
14669 +#endif
14670 diff --git a/kernel/power/tuxonice_power_off.c b/kernel/power/tuxonice_power_off.c
14671 new file mode 100644
14672 index 0000000..9cdb489
14673 --- /dev/null
14674 +++ b/kernel/power/tuxonice_power_off.c
14675 @@ -0,0 +1,282 @@
14676 +/*
14677 + * kernel/power/tuxonice_power_off.c
14678 + *
14679 + * Copyright (C) 2006-2008 Nigel Cunningham (nigel at tuxonice net)
14680 + *
14681 + * This file is released under the GPLv2.
14682 + *
14683 + * Support for powering down.
14684 + */
14685 +
14686 +#include <linux/device.h>
14687 +#include <linux/suspend.h>
14688 +#include <linux/mm.h>
14689 +#include <linux/pm.h>
14690 +#include <linux/reboot.h>
14691 +#include <linux/cpu.h>
14692 +#include <linux/console.h>
14693 +#include <linux/fs.h>
14694 +#include "tuxonice.h"
14695 +#include "tuxonice_ui.h"
14696 +#include "tuxonice_power_off.h"
14697 +#include "tuxonice_sysfs.h"
14698 +#include "tuxonice_modules.h"
14699 +#include "tuxonice_io.h"
14700 +
14701 +unsigned long toi_poweroff_method; /* 0 - Kernel power off */
14702 +EXPORT_SYMBOL_GPL(toi_poweroff_method);
14703 +
14704 +static int wake_delay;
14705 +static char lid_state_file[256], wake_alarm_dir[256];
14706 +static struct file *lid_file, *alarm_file, *epoch_file;
14707 +static int post_wake_state = -1;
14708 +
14709 +static int did_suspend_to_both;
14710 +
14711 +/*
14712 + * __toi_power_down
14713 + * Functionality   : Powers down or reboots the computer once the image
14714 + *                   has been written to disk.
14715 + * Key Assumptions : Able to reboot/power down via code called or that
14716 + *                   the warning emitted if the calls fail will be visible
14717 + *                   to the user (ie printk resumes devices).
14718 + */
14719 +
14720 +static void __toi_power_down(int method)
14721 +{
14722 +       int error;
14723 +
14724 +       toi_cond_pause(1, test_action_state(TOI_REBOOT) ? "Ready to reboot." :
14725 +                       "Powering down.");
14726 +
14727 +       if (test_result_state(TOI_ABORTED))
14728 +               goto out;
14729 +
14730 +       if (test_action_state(TOI_REBOOT))
14731 +               kernel_restart(NULL);
14732 +
14733 +       switch (method) {
14734 +       case 0:
14735 +               break;
14736 +       case 3:
14737 +               /*
14738 +                * Re-read the overwritten part of pageset2 to make post-resume
14739 +                * faster.
14740 +                */
14741 +               if (read_pageset2(1))
14742 +                       panic("Attempt to reload pagedir 2 failed. "
14743 +                                       "Try rebooting.");
14744 +
14745 +               error = pm_notifier_call_chain(PM_SUSPEND_PREPARE);
14746 +               if (!error) {
14747 +                       error = suspend_devices_and_enter(PM_SUSPEND_MEM);
14748 +                       if (!error)
14749 +                               did_suspend_to_both = 1;
14750 +               }
14751 +               pm_notifier_call_chain(PM_POST_SUSPEND);
14752 +
14753 +               /* Success - we're now post-resume-from-ram */
14754 +               if (did_suspend_to_both)
14755 +                       return;
14756 +
14757 +               /* Failed to suspend to ram - do normal power off */
14758 +               break;
14759 +       case 4:
14760 +               /*
14761 +                * If succeeds, doesn't return. If fails, do a simple
14762 +                * powerdown.
14763 +                */
14764 +               hibernation_platform_enter();
14765 +               break;
14766 +       case 5:
14767 +               /* Historic entry only now */
14768 +               break;
14769 +       }
14770 +
14771 +       if (method && method != 5)
14772 +               toi_cond_pause(1,
14773 +                       "Falling back to alternate power off method.");
14774 +
14775 +       if (test_result_state(TOI_ABORTED))
14776 +               goto out;
14777 +
14778 +       kernel_power_off();
14779 +       kernel_halt();
14780 +       toi_cond_pause(1, "Powerdown failed.");
14781 +       while (1)
14782 +               cpu_relax();
14783 +
14784 +out:
14785 +       if (read_pageset2(1))
14786 +               panic("Attempt to reload pagedir 2 failed. Try rebooting.");
14787 +       return;
14788 +}
14789 +
14790 +#define CLOSE_FILE(file) \
14791 +       if (file) { \
14792 +               filp_close(file, NULL); file = NULL; \
14793 +       }
14794 +
14795 +static void powerdown_cleanup(int toi_or_resume)
14796 +{
14797 +       if (!toi_or_resume)
14798 +               return;
14799 +
14800 +       CLOSE_FILE(lid_file);
14801 +       CLOSE_FILE(alarm_file);
14802 +       CLOSE_FILE(epoch_file);
14803 +}
14804 +
14805 +static void open_file(char *format, char *arg, struct file **var, int mode,
14806 +               char *desc)
14807 +{
14808 +       char buf[256];
14809 +
14810 +       if (strlen(arg)) {
14811 +               sprintf(buf, format, arg);
14812 +               *var = filp_open(buf, mode, 0);
14813 +               if (IS_ERR(*var) || !*var) {
14814 +                       printk(KERN_INFO "Failed to open %s file '%s' (%p).\n",
14815 +                               desc, buf, *var);
14816 +                       *var = NULL;
14817 +               }
14818 +       }
14819 +}
14820 +
14821 +static int powerdown_init(int toi_or_resume)
14822 +{
14823 +       if (!toi_or_resume)
14824 +               return 0;
14825 +
14826 +       did_suspend_to_both = 0;
14827 +
14828 +       open_file("/proc/acpi/button/%s/state", lid_state_file, &lid_file,
14829 +                       O_RDONLY, "lid");
14830 +
14831 +       if (strlen(wake_alarm_dir)) {
14832 +               open_file("/sys/class/rtc/%s/wakealarm", wake_alarm_dir,
14833 +                               &alarm_file, O_WRONLY, "alarm");
14834 +
14835 +               open_file("/sys/class/rtc/%s/since_epoch", wake_alarm_dir,
14836 +                               &epoch_file, O_RDONLY, "epoch");
14837 +       }
14838 +
14839 +       return 0;
14840 +}
14841 +
14842 +static int lid_closed(void)
14843 +{
14844 +       char array[25];
14845 +       ssize_t size;
14846 +       loff_t pos = 0;
14847 +
14848 +       if (!lid_file)
14849 +               return 0;
14850 +
14851 +       size = vfs_read(lid_file, (char __user *) array, 25, &pos);
14852 +       if ((int) size < 1) {
14853 +               printk(KERN_INFO "Failed to read lid state file (%d).\n",
14854 +                       (int) size);
14855 +               return 0;
14856 +       }
14857 +
14858 +       if (!strcmp(array, "state:      closed\n"))
14859 +               return 1;
14860 +
14861 +       return 0;
14862 +}
14863 +
14864 +static void write_alarm_file(int value)
14865 +{
14866 +       ssize_t size;
14867 +       char buf[40];
14868 +       loff_t pos = 0;
14869 +
14870 +       if (!alarm_file)
14871 +               return;
14872 +
14873 +       sprintf(buf, "%d\n", value);
14874 +
14875 +       size = vfs_write(alarm_file, (char __user *)buf, strlen(buf), &pos);
14876 +
14877 +       if (size < 0)
14878 +               printk(KERN_INFO "Error %d writing alarm value %s.\n",
14879 +                               (int) size, buf);
14880 +}
14881 +
14882 +/**
14883 + * toi_check_resleep: See whether to powerdown again after waking.
14884 + *
14885 + * After waking, check whether we should powerdown again in a (usually
14886 + * different) way. We only do this if the lid switch is still closed.
14887 + */
14888 +void toi_check_resleep(void)
14889 +{
14890 +       /* We only return if we suspended to ram and woke. */
14891 +       if (lid_closed() && post_wake_state >= 0)
14892 +               __toi_power_down(post_wake_state);
14893 +}
14894 +
14895 +void toi_power_down(void)
14896 +{
14897 +       if (alarm_file && wake_delay) {
14898 +               char array[25];
14899 +               loff_t pos = 0;
14900 +               size_t size = vfs_read(epoch_file, (char __user *) array, 25,
14901 +                               &pos);
14902 +
14903 +               if (((int) size) < 1)
14904 +                       printk(KERN_INFO "Failed to read epoch file (%d).\n",
14905 +                                       (int) size);
14906 +               else {
14907 +                       unsigned long since_epoch;
14908 +                       if (!strict_strtoul(array, 0, &since_epoch)) {
14909 +                               /* Clear any wakeup time. */
14910 +                               write_alarm_file(0);
14911 +
14912 +                               /* Set new wakeup time. */
14913 +                               write_alarm_file(since_epoch + wake_delay);
14914 +                       }
14915 +               }
14916 +       }
14917 +
14918 +       __toi_power_down(toi_poweroff_method);
14919 +
14920 +       toi_check_resleep();
14921 +}
14922 +EXPORT_SYMBOL_GPL(toi_power_down);
14923 +
14924 +static struct toi_sysfs_data sysfs_params[] = {
14925 +#if defined(CONFIG_ACPI)
14926 +       SYSFS_STRING("lid_file", SYSFS_RW, lid_state_file, 256, 0, NULL),
14927 +       SYSFS_INT("wake_delay", SYSFS_RW, &wake_delay, 0, INT_MAX, 0, NULL),
14928 +       SYSFS_STRING("wake_alarm_dir", SYSFS_RW, wake_alarm_dir, 256, 0, NULL),
14929 +       SYSFS_INT("post_wake_state", SYSFS_RW, &post_wake_state, -1, 5, 0,
14930 +                       NULL),
14931 +       SYSFS_UL("powerdown_method", SYSFS_RW, &toi_poweroff_method, 0, 5, 0),
14932 +       SYSFS_INT("did_suspend_to_both", SYSFS_READONLY, &did_suspend_to_both,
14933 +               0, 0, 0, NULL)
14934 +#endif
14935 +};
14936 +
14937 +static struct toi_module_ops powerdown_ops = {
14938 +       .type                           = MISC_HIDDEN_MODULE,
14939 +       .name                           = "poweroff",
14940 +       .initialise                     = powerdown_init,
14941 +       .cleanup                        = powerdown_cleanup,
14942 +       .directory                      = "[ROOT]",
14943 +       .module                         = THIS_MODULE,
14944 +       .sysfs_data                     = sysfs_params,
14945 +       .num_sysfs_entries              = sizeof(sysfs_params) /
14946 +               sizeof(struct toi_sysfs_data),
14947 +};
14948 +
14949 +int toi_poweroff_init(void)
14950 +{
14951 +       return toi_register_module(&powerdown_ops);
14952 +}
14953 +
14954 +void toi_poweroff_exit(void)
14955 +{
14956 +       toi_unregister_module(&powerdown_ops);
14957 +}
14958 diff --git a/kernel/power/tuxonice_power_off.h b/kernel/power/tuxonice_power_off.h
14959 new file mode 100644
14960 index 0000000..a85633a
14961 --- /dev/null
14962 +++ b/kernel/power/tuxonice_power_off.h
14963 @@ -0,0 +1,24 @@
14964 +/*
14965 + * kernel/power/tuxonice_power_off.h
14966 + *
14967 + * Copyright (C) 2006-2008 Nigel Cunningham (nigel at tuxonice net)
14968 + *
14969 + * This file is released under the GPLv2.
14970 + *
14971 + * Support for the powering down.
14972 + */
14973 +
14974 +int toi_pm_state_finish(void);
14975 +void toi_power_down(void);
14976 +extern unsigned long toi_poweroff_method;
14977 +int toi_poweroff_init(void);
14978 +void toi_poweroff_exit(void);
14979 +void toi_check_resleep(void);
14980 +
14981 +extern int platform_begin(int platform_mode);
14982 +extern int platform_pre_snapshot(int platform_mode);
14983 +extern void platform_leave(int platform_mode);
14984 +extern void platform_end(int platform_mode);
14985 +extern void platform_finish(int platform_mode);
14986 +extern int platform_pre_restore(int platform_mode);
14987 +extern void platform_restore_cleanup(int platform_mode);
14988 diff --git a/kernel/power/tuxonice_prepare_image.c b/kernel/power/tuxonice_prepare_image.c
14989 new file mode 100644
14990 index 0000000..a675de0
14991 --- /dev/null
14992 +++ b/kernel/power/tuxonice_prepare_image.c
14993 @@ -0,0 +1,1045 @@
14994 +/*
14995 + * kernel/power/tuxonice_prepare_image.c
14996 + *
14997 + * Copyright (C) 2003-2008 Nigel Cunningham (nigel at tuxonice net)
14998 + *
14999 + * This file is released under the GPLv2.
15000 + *
15001 + * We need to eat memory until we can:
15002 + * 1. Perform the save without changing anything (RAM_NEEDED < #pages)
15003 + * 2. Fit it all in available space (toiActiveAllocator->available_space() >=
15004 + *    main_storage_needed())
15005 + * 3. Reload the pagedir and pageset1 to places that don't collide with their
15006 + *    final destinations, not knowing to what extent the resumed kernel will
15007 + *    overlap with the one loaded at boot time. I think the resumed kernel
15008 + *    should overlap completely, but I don't want to rely on this as it is
15009 + *    an unproven assumption. We therefore assume there will be no overlap at
15010 + *    all (worse case).
15011 + * 4. Meet the user's requested limit (if any) on the size of the image.
15012 + *    The limit is in MB, so pages/256 (assuming 4K pages).
15013 + *
15014 + */
15015 +
15016 +#include <linux/highmem.h>
15017 +#include <linux/freezer.h>
15018 +#include <linux/hardirq.h>
15019 +#include <linux/mmzone.h>
15020 +#include <linux/console.h>
15021 +
15022 +#include "tuxonice_pageflags.h"
15023 +#include "tuxonice_modules.h"
15024 +#include "tuxonice_io.h"
15025 +#include "tuxonice_ui.h"
15026 +#include "tuxonice_extent.h"
15027 +#include "tuxonice_prepare_image.h"
15028 +#include "tuxonice.h"
15029 +#include "tuxonice_checksum.h"
15030 +#include "tuxonice_sysfs.h"
15031 +#include "tuxonice_alloc.h"
15032 +#include "tuxonice_atomic_copy.h"
15033 +
15034 +static long num_nosave, main_storage_allocated, storage_available,
15035 +           header_storage_needed;
15036 +long extra_pd1_pages_allowance = CONFIG_TOI_DEFAULT_EXTRA_PAGES_ALLOWANCE;
15037 +int image_size_limit;
15038 +static int no_ps2_needed;
15039 +
15040 +struct attention_list {
15041 +       struct task_struct *task;
15042 +       struct attention_list *next;
15043 +};
15044 +
15045 +static struct attention_list *attention_list;
15046 +
15047 +#define PAGESET1 0
15048 +#define PAGESET2 1
15049 +
15050 +void free_attention_list(void)
15051 +{
15052 +       struct attention_list *last = NULL;
15053 +
15054 +       while (attention_list) {
15055 +               last = attention_list;
15056 +               attention_list = attention_list->next;
15057 +               toi_kfree(6, last, sizeof(*last));
15058 +       }
15059 +}
15060 +
15061 +static int build_attention_list(void)
15062 +{
15063 +       int i, task_count = 0;
15064 +       struct task_struct *p;
15065 +       struct attention_list *next;
15066 +
15067 +       /*
15068 +        * Count all userspace process (with task->mm) marked PF_NOFREEZE.
15069 +        */
15070 +       read_lock(&tasklist_lock);
15071 +       for_each_process(p)
15072 +               if ((p->flags & PF_NOFREEZE) || p == current)
15073 +                       task_count++;
15074 +       read_unlock(&tasklist_lock);
15075 +
15076 +       /*
15077 +        * Allocate attention list structs.
15078 +        */
15079 +       for (i = 0; i < task_count; i++) {
15080 +               struct attention_list *this =
15081 +                       toi_kzalloc(6, sizeof(struct attention_list),
15082 +                                       TOI_WAIT_GFP);
15083 +               if (!this) {
15084 +                       printk(KERN_INFO "Failed to allocate slab for "
15085 +                                       "attention list.\n");
15086 +                       free_attention_list();
15087 +                       return 1;
15088 +               }
15089 +               this->next = NULL;
15090 +               if (attention_list)
15091 +                       this->next = attention_list;
15092 +               attention_list = this;
15093 +       }
15094 +
15095 +       next = attention_list;
15096 +       read_lock(&tasklist_lock);
15097 +       for_each_process(p)
15098 +               if ((p->flags & PF_NOFREEZE) || p == current) {
15099 +                       next->task = p;
15100 +                       next = next->next;
15101 +               }
15102 +       read_unlock(&tasklist_lock);
15103 +       return 0;
15104 +}
15105 +
15106 +static void pageset2_full(void)
15107 +{
15108 +       struct zone *zone;
15109 +       struct page *page;
15110 +       unsigned long flags;
15111 +       int i;
15112 +
15113 +       for_each_zone(zone) {
15114 +               spin_lock_irqsave(&zone->lru_lock, flags);
15115 +               for_each_lru(i) {
15116 +                       if (!zone_page_state(zone, NR_LRU_BASE + i))
15117 +                               continue;
15118 +
15119 +                       list_for_each_entry(page, &zone->lru[i].list, lru) {
15120 +                               struct address_space *mapping;
15121 +
15122 +                               mapping = page_mapping(page);
15123 +                               if (!mapping || !mapping->host ||
15124 +                                   !(mapping->host->i_flags & S_ATOMIC_COPY))
15125 +                                       SetPagePageset2(page);
15126 +                       }
15127 +               }
15128 +               spin_unlock_irqrestore(&zone->lru_lock, flags);
15129 +       }
15130 +}
15131 +
15132 +/*
15133 + * toi_mark_task_as_pageset
15134 + * Functionality   : Marks all the saveable pages belonging to a given process
15135 + *                  as belonging to a particular pageset.
15136 + */
15137 +
15138 +static void toi_mark_task_as_pageset(struct task_struct *t, int pageset2)
15139 +{
15140 +       struct vm_area_struct *vma;
15141 +       struct mm_struct *mm;
15142 +
15143 +       mm = t->active_mm;
15144 +
15145 +       if (!mm || !mm->mmap)
15146 +               return;
15147 +
15148 +       if (!irqs_disabled())
15149 +               down_read(&mm->mmap_sem);
15150 +
15151 +       for (vma = mm->mmap; vma; vma = vma->vm_next) {
15152 +               unsigned long posn;
15153 +
15154 +               if (!vma->vm_start || vma->vm_flags & VM_SPECIAL)
15155 +                       continue;
15156 +
15157 +               for (posn = vma->vm_start; posn < vma->vm_end;
15158 +                               posn += PAGE_SIZE) {
15159 +                       struct page *page = follow_page(vma, posn, 0);
15160 +                       struct address_space *mapping;
15161 +
15162 +                       if (!page || !pfn_valid(page_to_pfn(page)))
15163 +                               continue;
15164 +
15165 +                       mapping = page_mapping(page);
15166 +                       if (mapping && mapping->host &&
15167 +                           mapping->host->i_flags & S_ATOMIC_COPY)
15168 +                               continue;
15169 +
15170 +                       if (pageset2)
15171 +                               SetPagePageset2(page);
15172 +                       else {
15173 +                               ClearPagePageset2(page);
15174 +                               SetPagePageset1(page);
15175 +                       }
15176 +               }
15177 +       }
15178 +
15179 +       if (!irqs_disabled())
15180 +               up_read(&mm->mmap_sem);
15181 +}
15182 +
15183 +static void mark_tasks(int pageset)
15184 +{
15185 +       struct task_struct *p;
15186 +
15187 +       read_lock(&tasklist_lock);
15188 +       for_each_process(p) {
15189 +               if (!p->mm)
15190 +                       continue;
15191 +
15192 +               if (p->flags & PF_KTHREAD)
15193 +                       continue;
15194 +
15195 +               toi_mark_task_as_pageset(p, pageset);
15196 +       }
15197 +       read_unlock(&tasklist_lock);
15198 +
15199 +}
15200 +
15201 +/* mark_pages_for_pageset2
15202 + *
15203 + * Description:        Mark unshared pages in processes not needed for hibernate as
15204 + *             being able to be written out in a separate pagedir.
15205 + *             HighMem pages are simply marked as pageset2. They won't be
15206 + *             needed during hibernate.
15207 + */
15208 +
15209 +static void toi_mark_pages_for_pageset2(void)
15210 +{
15211 +       struct attention_list *this = attention_list;
15212 +
15213 +       memory_bm_clear(pageset2_map);
15214 +
15215 +       if (test_action_state(TOI_NO_PAGESET2) || no_ps2_needed)
15216 +               return;
15217 +
15218 +       if (test_action_state(TOI_PAGESET2_FULL))
15219 +               pageset2_full();
15220 +       else
15221 +               mark_tasks(PAGESET2);
15222 +
15223 +       /*
15224 +        * Because the tasks in attention_list are ones related to hibernating,
15225 +        * we know that they won't go away under us.
15226 +        */
15227 +
15228 +       while (this) {
15229 +               if (!test_result_state(TOI_ABORTED))
15230 +                       toi_mark_task_as_pageset(this->task, PAGESET1);
15231 +               this = this->next;
15232 +       }
15233 +}
15234 +
15235 +/*
15236 + * The atomic copy of pageset1 is stored in pageset2 pages.
15237 + * But if pageset1 is larger (normally only just after boot),
15238 + * we need to allocate extra pages to store the atomic copy.
15239 + * The following data struct and functions are used to handle
15240 + * the allocation and freeing of that memory.
15241 + */
15242 +
15243 +static long extra_pages_allocated;
15244 +
15245 +struct extras {
15246 +       struct page *page;
15247 +       int order;
15248 +       struct extras *next;
15249 +};
15250 +
15251 +static struct extras *extras_list;
15252 +
15253 +/* toi_free_extra_pagedir_memory
15254 + *
15255 + * Description:        Free previously allocated extra pagedir memory.
15256 + */
15257 +void toi_free_extra_pagedir_memory(void)
15258 +{
15259 +       /* Free allocated pages */
15260 +       while (extras_list) {
15261 +               struct extras *this = extras_list;
15262 +               int i;
15263 +
15264 +               extras_list = this->next;
15265 +
15266 +               for (i = 0; i < (1 << this->order); i++)
15267 +                       ClearPageNosave(this->page + i);
15268 +
15269 +               toi_free_pages(9, this->page, this->order);
15270 +               toi_kfree(7, this, sizeof(*this));
15271 +       }
15272 +
15273 +       extra_pages_allocated = 0;
15274 +}
15275 +
15276 +/* toi_allocate_extra_pagedir_memory
15277 + *
15278 + * Description:        Allocate memory for making the atomic copy of pagedir1 in the
15279 + *             case where it is bigger than pagedir2.
15280 + * Arguments:  int     num_to_alloc: Number of extra pages needed.
15281 + * Result:     int.    Number of extra pages we now have allocated.
15282 + */
15283 +static int toi_allocate_extra_pagedir_memory(int extra_pages_needed)
15284 +{
15285 +       int j, order, num_to_alloc = extra_pages_needed - extra_pages_allocated;
15286 +       gfp_t flags = TOI_ATOMIC_GFP;
15287 +
15288 +       if (num_to_alloc < 1)
15289 +               return 0;
15290 +
15291 +       order = fls(num_to_alloc);
15292 +       if (order >= MAX_ORDER)
15293 +               order = MAX_ORDER - 1;
15294 +
15295 +       while (num_to_alloc) {
15296 +               struct page *newpage;
15297 +               unsigned long virt;
15298 +               struct extras *extras_entry;
15299 +
15300 +               while ((1 << order) > num_to_alloc)
15301 +                       order--;
15302 +
15303 +               extras_entry = (struct extras *) toi_kzalloc(7,
15304 +                       sizeof(struct extras), TOI_ATOMIC_GFP);
15305 +
15306 +               if (!extras_entry)
15307 +                       return extra_pages_allocated;
15308 +
15309 +               virt = toi_get_free_pages(9, flags, order);
15310 +               while (!virt && order) {
15311 +                       order--;
15312 +                       virt = toi_get_free_pages(9, flags, order);
15313 +               }
15314 +
15315 +               if (!virt) {
15316 +                       toi_kfree(7, extras_entry, sizeof(*extras_entry));
15317 +                       return extra_pages_allocated;
15318 +               }
15319 +
15320 +               newpage = virt_to_page(virt);
15321 +
15322 +               extras_entry->page = newpage;
15323 +               extras_entry->order = order;
15324 +               extras_entry->next = NULL;
15325 +
15326 +               if (extras_list)
15327 +                       extras_entry->next = extras_list;
15328 +
15329 +               extras_list = extras_entry;
15330 +
15331 +               for (j = 0; j < (1 << order); j++) {
15332 +                       SetPageNosave(newpage + j);
15333 +                       SetPagePageset1Copy(newpage + j);
15334 +               }
15335 +
15336 +               extra_pages_allocated += (1 << order);
15337 +               num_to_alloc -= (1 << order);
15338 +       }
15339 +
15340 +       return extra_pages_allocated;
15341 +}
15342 +
15343 +/*
15344 + * real_nr_free_pages: Count pcp pages for a zone type or all zones
15345 + * (-1 for all, otherwise zone_idx() result desired).
15346 + */
15347 +long real_nr_free_pages(unsigned long zone_idx_mask)
15348 +{
15349 +       struct zone *zone;
15350 +       int result = 0, cpu;
15351 +
15352 +       /* PCP lists */
15353 +       for_each_populated_zone(zone) {
15354 +               if (!(zone_idx_mask & (1 << zone_idx(zone))))
15355 +                       continue;
15356 +
15357 +               for_each_online_cpu(cpu) {
15358 +                       struct per_cpu_pageset *pset = zone_pcp(zone, cpu);
15359 +                       struct per_cpu_pages *pcp = &pset->pcp;
15360 +                       result += pcp->count;
15361 +               }
15362 +
15363 +               result += zone_page_state(zone, NR_FREE_PAGES);
15364 +       }
15365 +       return result;
15366 +}
15367 +EXPORT_SYMBOL_GPL(real_nr_free_pages);
15368 +
15369 +/*
15370 + * Discover how much extra memory will be required by the drivers
15371 + * when they're asked to hibernate. We can then ensure that amount
15372 + * of memory is available when we really want it.
15373 + */
15374 +static void get_extra_pd1_allowance(void)
15375 +{
15376 +       long orig_num_free = real_nr_free_pages(all_zones_mask), final;
15377 +
15378 +       toi_prepare_status(CLEAR_BAR, "Finding allowance for drivers.");
15379 +
15380 +       if (!toi_go_atomic(PMSG_FREEZE, 1)) {
15381 +               final = real_nr_free_pages(all_zones_mask);
15382 +               toi_end_atomic(ATOMIC_ALL_STEPS, 1, 0);
15383 +
15384 +               extra_pd1_pages_allowance = max(
15385 +                       orig_num_free - final + MIN_EXTRA_PAGES_ALLOWANCE,
15386 +                       (long) MIN_EXTRA_PAGES_ALLOWANCE);
15387 +       }
15388 +}
15389 +
15390 +/*
15391 + * Amount of storage needed, possibly taking into account the
15392 + * expected compression ratio and possibly also ignoring our
15393 + * allowance for extra pages.
15394 + */
15395 +static long main_storage_needed(int use_ecr,
15396 +               int ignore_extra_pd1_allow)
15397 +{
15398 +       return (pagedir1.size + pagedir2.size +
15399 +         (ignore_extra_pd1_allow ? 0 : extra_pd1_pages_allowance)) *
15400 +        (use_ecr ? toi_expected_compression_ratio() : 100) / 100;
15401 +}
15402 +
15403 +/*
15404 + * Storage needed for the image header, in bytes until the return.
15405 + */
15406 +long get_header_storage_needed(void)
15407 +{
15408 +       long bytes = (int) sizeof(struct toi_header) +
15409 +                       toi_header_storage_for_modules() +
15410 +                       toi_pageflags_space_needed();
15411 +
15412 +       return DIV_ROUND_UP(bytes, PAGE_SIZE);
15413 +}
15414 +EXPORT_SYMBOL_GPL(get_header_storage_needed);
15415 +
15416 +/*
15417 + * When freeing memory, pages from either pageset might be freed.
15418 + *
15419 + * When seeking to free memory to be able to hibernate, for every ps1 page
15420 + * freed, we need 2 less pages for the atomic copy because there is one less
15421 + * page to copy and one more page into which data can be copied.
15422 + *
15423 + * Freeing ps2 pages saves us nothing directly. No more memory is available
15424 + * for the atomic copy. Indirectly, a ps1 page might be freed (slab?), but
15425 + * that's too much work to figure out.
15426 + *
15427 + * => ps1_to_free functions
15428 + *
15429 + * Of course if we just want to reduce the image size, because of storage
15430 + * limitations or an image size limit either ps will do.
15431 + *
15432 + * => any_to_free function
15433 + */
15434 +
15435 +static long highpages_ps1_to_free(void)
15436 +{
15437 +       return max_t(long, 0, DIV_ROUND_UP(get_highmem_size(pagedir1) -
15438 +               get_highmem_size(pagedir2), 2) - real_nr_free_high_pages());
15439 +}
15440 +
15441 +static long lowpages_ps1_to_free(void)
15442 +{
15443 +       return max_t(long, 0, DIV_ROUND_UP(get_lowmem_size(pagedir1) +
15444 +               extra_pd1_pages_allowance + MIN_FREE_RAM +
15445 +               toi_memory_for_modules(0) - get_lowmem_size(pagedir2) -
15446 +               real_nr_free_low_pages() - extra_pages_allocated, 2));
15447 +}
15448 +
15449 +static long current_image_size(void)
15450 +{
15451 +       return pagedir1.size + pagedir2.size + header_storage_needed;
15452 +}
15453 +
15454 +static long storage_still_required(void)
15455 +{
15456 +       return max_t(long, 0, main_storage_needed(1, 1) - storage_available);
15457 +}
15458 +
15459 +static long ram_still_required(void)
15460 +{
15461 +       return max_t(long, 0, MIN_FREE_RAM + toi_memory_for_modules(0) -
15462 +               real_nr_free_low_pages() + 2 * extra_pd1_pages_allowance);
15463 +}
15464 +
15465 +static long any_to_free(int use_image_size_limit)
15466 +{
15467 +       long user_limit = (use_image_size_limit && image_size_limit > 0) ?
15468 +                       max_t(long, 0, current_image_size() -
15469 +                                       (image_size_limit << 8)) : 0,
15470 +               storage_limit = storage_still_required(),
15471 +               ram_limit = ram_still_required(),
15472 +               first_max = max(user_limit, storage_limit);
15473 +
15474 +       return max(first_max, ram_limit);
15475 +}
15476 +
15477 +static int need_pageset2(void)
15478 +{
15479 +       return (real_nr_free_low_pages() + extra_pages_allocated -
15480 +               2 * extra_pd1_pages_allowance - MIN_FREE_RAM -
15481 +                toi_memory_for_modules(0) - pagedir1.size) < pagedir2.size;
15482 +}
15483 +
15484 +/* amount_needed
15485 + *
15486 + * Calculates the amount by which the image size needs to be reduced to meet
15487 + * our constraints.
15488 + */
15489 +static long amount_needed(int use_image_size_limit)
15490 +{
15491 +       return max(highpages_ps1_to_free() + lowpages_ps1_to_free(),
15492 +                       any_to_free(use_image_size_limit));
15493 +}
15494 +
15495 +static long image_not_ready(int use_image_size_limit)
15496 +{
15497 +       toi_message(TOI_EAT_MEMORY, TOI_LOW, 1,
15498 +               "Amount still needed (%ld) > 0:%d,"
15499 +               " Storage allocd: %ld < %ld: %d.\n",
15500 +                       amount_needed(use_image_size_limit),
15501 +                       (amount_needed(use_image_size_limit) > 0),
15502 +                       main_storage_allocated,
15503 +                       main_storage_needed(1, 1),
15504 +                       main_storage_allocated < main_storage_needed(1, 1));
15505 +
15506 +       toi_cond_pause(0, NULL);
15507 +
15508 +       return (amount_needed(use_image_size_limit) > 0) ||
15509 +                main_storage_allocated < main_storage_needed(1, 1);
15510 +}
15511 +
15512 +static void display_failure_reason(int tries_exceeded)
15513 +{
15514 +       long storage_required = storage_still_required(),
15515 +           ram_required = ram_still_required(),
15516 +           high_ps1 = highpages_ps1_to_free(),
15517 +           low_ps1 = lowpages_ps1_to_free();
15518 +
15519 +       printk(KERN_INFO "Failed to prepare the image because...\n");
15520 +
15521 +       if (!storage_available) {
15522 +               printk(KERN_INFO "- You need some storage available to be "
15523 +                               "able to hibernate.\n");
15524 +               return;
15525 +       }
15526 +
15527 +       if (tries_exceeded)
15528 +               printk(KERN_INFO "- The maximum number of iterations was "
15529 +                               "reached without successfully preparing the "
15530 +                               "image.\n");
15531 +
15532 +       if (storage_required) {
15533 +               printk(KERN_INFO " - We need at least %ld pages of storage "
15534 +                               "(ignoring the header), but only have %ld.\n",
15535 +                               main_storage_needed(1, 1),
15536 +                               main_storage_allocated);
15537 +               set_abort_result(TOI_INSUFFICIENT_STORAGE);
15538 +       }
15539 +
15540 +       if (ram_required) {
15541 +               printk(KERN_INFO " - We need %ld more free pages of low "
15542 +                               "memory.\n", ram_required);
15543 +               printk(KERN_INFO "     Minimum free     : %8d\n", MIN_FREE_RAM);
15544 +               printk(KERN_INFO "   + Reqd. by modules : %8ld\n",
15545 +                               toi_memory_for_modules(0));
15546 +               printk(KERN_INFO "   + 2 * extra allow  : %8ld\n",
15547 +                               2 * extra_pd1_pages_allowance);
15548 +               printk(KERN_INFO "   - Currently free   : %8ld\n",
15549 +                               real_nr_free_low_pages());
15550 +               printk(KERN_INFO "                      : ========\n");
15551 +               printk(KERN_INFO "     Still needed     : %8ld\n",
15552 +                               ram_required);
15553 +
15554 +               /* Print breakdown of memory needed for modules */
15555 +               toi_memory_for_modules(1);
15556 +               set_abort_result(TOI_UNABLE_TO_FREE_ENOUGH_MEMORY);
15557 +       }
15558 +
15559 +       if (high_ps1) {
15560 +               printk(KERN_INFO "- We need to free %ld highmem pageset 1 "
15561 +                               "pages.\n", high_ps1);
15562 +               set_abort_result(TOI_UNABLE_TO_FREE_ENOUGH_MEMORY);
15563 +       }
15564 +
15565 +       if (low_ps1) {
15566 +               printk(KERN_INFO " - We need to free %ld lowmem pageset 1 "
15567 +                               "pages.\n", low_ps1);
15568 +               set_abort_result(TOI_UNABLE_TO_FREE_ENOUGH_MEMORY);
15569 +       }
15570 +}
15571 +
15572 +static void display_stats(int always, int sub_extra_pd1_allow)
15573 +{
15574 +       char buffer[255];
15575 +       snprintf(buffer, 254,
15576 +               "Free:%ld(%ld). Sets:%ld(%ld),%ld(%ld). "
15577 +               "Nosave:%ld-%ld=%ld. Storage:%lu/%lu(%lu=>%lu). "
15578 +               "Needed:%ld,%ld,%ld(%d,%ld,%ld,%ld) (PS2:%s)\n",
15579 +
15580 +               /* Free */
15581 +               real_nr_free_pages(all_zones_mask),
15582 +               real_nr_free_low_pages(),
15583 +
15584 +               /* Sets */
15585 +               pagedir1.size, pagedir1.size - get_highmem_size(pagedir1),
15586 +               pagedir2.size, pagedir2.size - get_highmem_size(pagedir2),
15587 +
15588 +               /* Nosave */
15589 +               num_nosave, extra_pages_allocated,
15590 +               num_nosave - extra_pages_allocated,
15591 +
15592 +               /* Storage */
15593 +               main_storage_allocated,
15594 +               storage_available,
15595 +               main_storage_needed(1, sub_extra_pd1_allow),
15596 +               main_storage_needed(1, 1),
15597 +
15598 +               /* Needed */
15599 +               lowpages_ps1_to_free(), highpages_ps1_to_free(),
15600 +               any_to_free(1),
15601 +               MIN_FREE_RAM, toi_memory_for_modules(0),
15602 +               extra_pd1_pages_allowance, ((long) image_size_limit) << 8,
15603 +
15604 +               need_pageset2() ? "yes" : "no");
15605 +
15606 +       if (always)
15607 +               printk("%s", buffer);
15608 +       else
15609 +               toi_message(TOI_EAT_MEMORY, TOI_MEDIUM, 1, buffer);
15610 +}
15611 +
15612 +/* generate_free_page_map
15613 + *
15614 + * Description:        This routine generates a bitmap of free pages from the
15615 + *             lists used by the memory manager. We then use the bitmap
15616 + *             to quickly calculate which pages to save and in which
15617 + *             pagesets.
15618 + */
15619 +static void generate_free_page_map(void)
15620 +{
15621 +       int order, pfn, cpu, t;
15622 +       unsigned long flags, i;
15623 +       struct zone *zone;
15624 +       struct list_head *curr;
15625 +
15626 +       for_each_populated_zone(zone) {
15627 +               spin_lock_irqsave(&zone->lock, flags);
15628 +
15629 +               for (i = 0; i < zone->spanned_pages; i++)
15630 +                       ClearPageNosaveFree(pfn_to_page(
15631 +                                               ZONE_START(zone) + i));
15632 +
15633 +               for_each_migratetype_order(order, t) {
15634 +                       list_for_each(curr,
15635 +                                       &zone->free_area[order].free_list[t]) {
15636 +                               unsigned long j;
15637 +
15638 +                               pfn = page_to_pfn(list_entry(curr, struct page,
15639 +                                                       lru));
15640 +                               for (j = 0; j < (1UL << order); j++)
15641 +                                       SetPageNosaveFree(pfn_to_page(pfn + j));
15642 +                       }
15643 +               }
15644 +
15645 +               for_each_online_cpu(cpu) {
15646 +                       struct per_cpu_pageset *pset = zone_pcp(zone, cpu);
15647 +                       struct per_cpu_pages *pcp = &pset->pcp;
15648 +                       struct page *page;
15649 +
15650 +                       list_for_each_entry(page, &pcp->list, lru)
15651 +                               SetPageNosaveFree(page);
15652 +               }
15653 +
15654 +               spin_unlock_irqrestore(&zone->lock, flags);
15655 +       }
15656 +}
15657 +
15658 +/* size_of_free_region
15659 + *
15660 + * Description:        Return the number of pages that are free, beginning with and
15661 + *             including this one.
15662 + */
15663 +static int size_of_free_region(struct zone *zone, unsigned long start_pfn)
15664 +{
15665 +       unsigned long this_pfn = start_pfn,
15666 +                     end_pfn = ZONE_START(zone) + zone->spanned_pages - 1;
15667 +
15668 +       while (this_pfn <= end_pfn && PageNosaveFree(pfn_to_page(this_pfn)))
15669 +               this_pfn++;
15670 +
15671 +       return this_pfn - start_pfn;
15672 +}
15673 +
15674 +/* flag_image_pages
15675 + *
15676 + * This routine generates our lists of pages to be stored in each
15677 + * pageset. Since we store the data using extents, and adding new
15678 + * extents might allocate a new extent page, this routine may well
15679 + * be called more than once.
15680 + */
15681 +static void flag_image_pages(int atomic_copy)
15682 +{
15683 +       int num_free = 0;
15684 +       unsigned long loop;
15685 +       struct zone *zone;
15686 +
15687 +       pagedir1.size = 0;
15688 +       pagedir2.size = 0;
15689 +
15690 +       set_highmem_size(pagedir1, 0);
15691 +       set_highmem_size(pagedir2, 0);
15692 +
15693 +       num_nosave = 0;
15694 +
15695 +       memory_bm_clear(pageset1_map);
15696 +
15697 +       generate_free_page_map();
15698 +
15699 +       /*
15700 +        * Pages not to be saved are marked Nosave irrespective of being
15701 +        * reserved.
15702 +        */
15703 +       for_each_populated_zone(zone) {
15704 +               int highmem = is_highmem(zone);
15705 +
15706 +               for (loop = 0; loop < zone->spanned_pages; loop++) {
15707 +                       unsigned long pfn = ZONE_START(zone) + loop;
15708 +                       struct page *page;
15709 +                       int chunk_size;
15710 +
15711 +                       if (!pfn_valid(pfn))
15712 +                               continue;
15713 +
15714 +                       chunk_size = size_of_free_region(zone, pfn);
15715 +                       if (chunk_size) {
15716 +                               num_free += chunk_size;
15717 +                               loop += chunk_size - 1;
15718 +                               continue;
15719 +                       }
15720 +
15721 +                       page = pfn_to_page(pfn);
15722 +
15723 +                       if (PageNosave(page)) {
15724 +                               num_nosave++;
15725 +                               continue;
15726 +                       }
15727 +
15728 +                       page = highmem ? saveable_highmem_page(zone, pfn) :
15729 +                               saveable_page(zone, pfn);
15730 +
15731 +                       if (!page) {
15732 +                               num_nosave++;
15733 +                               continue;
15734 +                       }
15735 +
15736 +                       if (PagePageset2(page)) {
15737 +                               pagedir2.size++;
15738 +                               if (PageHighMem(page))
15739 +                                       inc_highmem_size(pagedir2);
15740 +                               else
15741 +                                       SetPagePageset1Copy(page);
15742 +                               if (PageResave(page)) {
15743 +                                       SetPagePageset1(page);
15744 +                                       ClearPagePageset1Copy(page);
15745 +                                       pagedir1.size++;
15746 +                                       if (PageHighMem(page))
15747 +                                               inc_highmem_size(pagedir1);
15748 +                               }
15749 +                       } else {
15750 +                               pagedir1.size++;
15751 +                               SetPagePageset1(page);
15752 +                               if (PageHighMem(page))
15753 +                                       inc_highmem_size(pagedir1);
15754 +                       }
15755 +               }
15756 +       }
15757 +
15758 +       if (!atomic_copy)
15759 +               toi_message(TOI_EAT_MEMORY, TOI_MEDIUM, 0,
15760 +                       "Count data pages: Set1 (%d) + Set2 (%d) + Nosave (%ld)"
15761 +                                               " + NumFree (%d) = %d.\n",
15762 +                       pagedir1.size, pagedir2.size, num_nosave, num_free,
15763 +                       pagedir1.size + pagedir2.size + num_nosave + num_free);
15764 +}
15765 +
15766 +void toi_recalculate_image_contents(int atomic_copy)
15767 +{
15768 +       memory_bm_clear(pageset1_map);
15769 +       if (!atomic_copy) {
15770 +               unsigned long pfn;
15771 +               memory_bm_position_reset(pageset2_map);
15772 +               for (pfn = memory_bm_next_pfn(pageset2_map);
15773 +                               pfn != BM_END_OF_MAP;
15774 +                               pfn = memory_bm_next_pfn(pageset2_map))
15775 +                       ClearPagePageset1Copy(pfn_to_page(pfn));
15776 +               /* Need to call this before getting pageset1_size! */
15777 +               toi_mark_pages_for_pageset2();
15778 +       }
15779 +       flag_image_pages(atomic_copy);
15780 +
15781 +       if (!atomic_copy) {
15782 +               storage_available = toiActiveAllocator->storage_available();
15783 +               display_stats(0, 0);
15784 +       }
15785 +}
15786 +
15787 +/* update_image
15788 + *
15789 + * Allocate [more] memory and storage for the image.
15790 + */
15791 +static void update_image(int ps2_recalc)
15792 +{
15793 +       int wanted, got, old_header_req;
15794 +       long seek;
15795 +
15796 +       /* Include allowance for growth in pagedir1 while writing pagedir 2 */
15797 +       wanted = pagedir1.size +  extra_pd1_pages_allowance -
15798 +               get_lowmem_size(pagedir2);
15799 +       if (wanted > extra_pages_allocated) {
15800 +               got = toi_allocate_extra_pagedir_memory(wanted);
15801 +               if (wanted < got) {
15802 +                       toi_message(TOI_EAT_MEMORY, TOI_LOW, 1,
15803 +                               "Want %d extra pages for pageset1, got %d.\n",
15804 +                               wanted, got);
15805 +                       return;
15806 +               }
15807 +       }
15808 +
15809 +       if (ps2_recalc)
15810 +               goto recalc;
15811 +
15812 +       thaw_kernel_threads();
15813 +
15814 +       /*
15815 +        * Allocate remaining storage space, if possible, up to the
15816 +        * maximum we know we'll need. It's okay to allocate the
15817 +        * maximum if the writer is the swapwriter, but
15818 +        * we don't want to grab all available space on an NFS share.
15819 +        * We therefore ignore the expected compression ratio here,
15820 +        * thereby trying to allocate the maximum image size we could
15821 +        * need (assuming compression doesn't expand the image), but
15822 +        * don't complain if we can't get the full amount we're after.
15823 +        */
15824 +
15825 +       do {
15826 +               old_header_req = header_storage_needed;
15827 +               toiActiveAllocator->reserve_header_space(header_storage_needed);
15828 +
15829 +               /* How much storage is free with the reservation applied? */
15830 +               storage_available = toiActiveAllocator->storage_available();
15831 +               seek = min(storage_available, main_storage_needed(0, 0));
15832 +
15833 +               toiActiveAllocator->allocate_storage(seek);
15834 +
15835 +               main_storage_allocated =
15836 +                       toiActiveAllocator->storage_allocated();
15837 +
15838 +               /* Need more header because more storage allocated? */
15839 +               header_storage_needed = get_header_storage_needed();
15840 +
15841 +       } while (header_storage_needed > old_header_req);
15842 +
15843 +       if (freeze_processes())
15844 +               set_abort_result(TOI_FREEZING_FAILED);
15845 +
15846 +recalc:
15847 +       toi_recalculate_image_contents(0);
15848 +}
15849 +
15850 +/* attempt_to_freeze
15851 + *
15852 + * Try to freeze processes.
15853 + */
15854 +
15855 +static int attempt_to_freeze(void)
15856 +{
15857 +       int result;
15858 +
15859 +       /* Stop processes before checking again */
15860 +       thaw_processes();
15861 +       toi_prepare_status(CLEAR_BAR, "Freezing processes & syncing "
15862 +                       "filesystems.");
15863 +       result = freeze_processes();
15864 +
15865 +       if (result)
15866 +               set_abort_result(TOI_FREEZING_FAILED);
15867 +
15868 +       return result;
15869 +}
15870 +
15871 +/* eat_memory
15872 + *
15873 + * Try to free some memory, either to meet hard or soft constraints on the image
15874 + * characteristics.
15875 + *
15876 + * Hard constraints:
15877 + * - Pageset1 must be < half of memory;
15878 + * - We must have enough memory free at resume time to have pageset1
15879 + *   be able to be loaded in pages that don't conflict with where it has to
15880 + *   be restored.
15881 + * Soft constraints
15882 + * - User specificied image size limit.
15883 + */
15884 +static void eat_memory(void)
15885 +{
15886 +       long amount_wanted = 0;
15887 +       int did_eat_memory = 0;
15888 +
15889 +       /*
15890 +        * Note that if we have enough storage space and enough free memory, we
15891 +        * may exit without eating anything. We give up when the last 10
15892 +        * iterations ate no extra pages because we're not going to get much
15893 +        * more anyway, but the few pages we get will take a lot of time.
15894 +        *
15895 +        * We freeze processes before beginning, and then unfreeze them if we
15896 +        * need to eat memory until we think we have enough. If our attempts
15897 +        * to freeze fail, we give up and abort.
15898 +        */
15899 +
15900 +       amount_wanted = amount_needed(1);
15901 +
15902 +       switch (image_size_limit) {
15903 +       case -1: /* Don't eat any memory */
15904 +               if (amount_wanted > 0) {
15905 +                       set_abort_result(TOI_WOULD_EAT_MEMORY);
15906 +                       return;
15907 +               }
15908 +               break;
15909 +       case -2:  /* Free caches only */
15910 +               drop_pagecache();
15911 +               toi_recalculate_image_contents(0);
15912 +               amount_wanted = amount_needed(1);
15913 +               break;
15914 +       default:
15915 +               break;
15916 +       }
15917 +
15918 +       if (amount_wanted > 0 && !test_result_state(TOI_ABORTED) &&
15919 +                       image_size_limit != -1) {
15920 +               long request = amount_wanted + 50;
15921 +
15922 +               toi_prepare_status(CLEAR_BAR,
15923 +                               "Seeking to free %ldMB of memory.",
15924 +                               MB(amount_wanted));
15925 +
15926 +               thaw_kernel_threads();
15927 +
15928 +               /*
15929 +                * Ask for too many because shrink_all_memory doesn't
15930 +                * currently return enough most of the time.
15931 +                */
15932 +               shrink_all_memory(request);
15933 +
15934 +               did_eat_memory = 1;
15935 +
15936 +               toi_recalculate_image_contents(0);
15937 +
15938 +               amount_wanted = amount_needed(1);
15939 +
15940 +               printk("Asked shrink_all_memory for %ld pages, got %ld.\n",
15941 +                               request, request - amount_wanted);
15942 +
15943 +               toi_cond_pause(0, NULL);
15944 +
15945 +               if (freeze_processes())
15946 +                       set_abort_result(TOI_FREEZING_FAILED);
15947 +       }
15948 +
15949 +       if (did_eat_memory)
15950 +               toi_recalculate_image_contents(0);
15951 +}
15952 +
15953 +/* toi_prepare_image
15954 + *
15955 + * Entry point to the whole image preparation section.
15956 + *
15957 + * We do four things:
15958 + * - Freeze processes;
15959 + * - Ensure image size constraints are met;
15960 + * - Complete all the preparation for saving the image,
15961 + *   including allocation of storage. The only memory
15962 + *   that should be needed when we're finished is that
15963 + *   for actually storing the image (and we know how
15964 + *   much is needed for that because the modules tell
15965 + *   us).
15966 + * - Make sure that all dirty buffers are written out.
15967 + */
15968 +#define MAX_TRIES 2
15969 +int toi_prepare_image(void)
15970 +{
15971 +       int result = 1, tries = 1;
15972 +
15973 +       main_storage_allocated = 0;
15974 +       no_ps2_needed = 0;
15975 +
15976 +       if (attempt_to_freeze())
15977 +               return 1;
15978 +
15979 +       if (!extra_pd1_pages_allowance)
15980 +               get_extra_pd1_allowance();
15981 +
15982 +       storage_available = toiActiveAllocator->storage_available();
15983 +
15984 +       if (!storage_available) {
15985 +               printk(KERN_INFO "No storage available. Didn't try to prepare "
15986 +                               "an image.\n");
15987 +               display_failure_reason(0);
15988 +               set_abort_result(TOI_NOSTORAGE_AVAILABLE);
15989 +               return 1;
15990 +       }
15991 +
15992 +       if (build_attention_list()) {
15993 +               abort_hibernate(TOI_UNABLE_TO_PREPARE_IMAGE,
15994 +                               "Unable to successfully prepare the image.\n");
15995 +               return 1;
15996 +       }
15997 +
15998 +       toi_recalculate_image_contents(0);
15999 +
16000 +       do {
16001 +               toi_prepare_status(CLEAR_BAR,
16002 +                               "Preparing Image. Try %d.", tries);
16003 +
16004 +               eat_memory();
16005 +
16006 +               if (test_result_state(TOI_ABORTED))
16007 +                       break;
16008 +
16009 +               update_image(0);
16010 +
16011 +               tries++;
16012 +
16013 +       } while (image_not_ready(1) && tries <= MAX_TRIES &&
16014 +                       !test_result_state(TOI_ABORTED));
16015 +
16016 +       result = image_not_ready(0);
16017 +
16018 +       if (!test_result_state(TOI_ABORTED)) {
16019 +               if (result) {
16020 +                       display_stats(1, 0);
16021 +                       display_failure_reason(tries > MAX_TRIES);
16022 +                       abort_hibernate(TOI_UNABLE_TO_PREPARE_IMAGE,
16023 +                               "Unable to successfully prepare the image.\n");
16024 +               } else {
16025 +                       /* Pageset 2 needed? */
16026 +                       if (!need_pageset2() &&
16027 +                                 test_action_state(TOI_NO_PS2_IF_UNNEEDED)) {
16028 +                               no_ps2_needed = 1;
16029 +                               toi_recalculate_image_contents(0);
16030 +                               update_image(1);
16031 +                       }
16032 +
16033 +                       toi_cond_pause(1, "Image preparation complete.");
16034 +               }
16035 +       }
16036 +
16037 +       return result ? result : allocate_checksum_pages();
16038 +}
16039 diff --git a/kernel/power/tuxonice_prepare_image.h b/kernel/power/tuxonice_prepare_image.h
16040 new file mode 100644
16041 index 0000000..9a1de79
16042 --- /dev/null
16043 +++ b/kernel/power/tuxonice_prepare_image.h
16044 @@ -0,0 +1,36 @@
16045 +/*
16046 + * kernel/power/tuxonice_prepare_image.h
16047 + *
16048 + * Copyright (C) 2003-2008 Nigel Cunningham (nigel at tuxonice net)
16049 + *
16050 + * This file is released under the GPLv2.
16051 + *
16052 + */
16053 +
16054 +#include <asm/sections.h>
16055 +
16056 +extern int toi_prepare_image(void);
16057 +extern void toi_recalculate_image_contents(int storage_available);
16058 +extern long real_nr_free_pages(unsigned long zone_idx_mask);
16059 +extern int image_size_limit;
16060 +extern void toi_free_extra_pagedir_memory(void);
16061 +extern long extra_pd1_pages_allowance;
16062 +extern void free_attention_list(void);
16063 +
16064 +#define MIN_FREE_RAM 100
16065 +#define MIN_EXTRA_PAGES_ALLOWANCE 500
16066 +
16067 +#define all_zones_mask ((unsigned long) ((1 << MAX_NR_ZONES) - 1))
16068 +#ifdef CONFIG_HIGHMEM
16069 +#define real_nr_free_high_pages() (real_nr_free_pages(1 << ZONE_HIGHMEM))
16070 +#define real_nr_free_low_pages() (real_nr_free_pages(all_zones_mask - \
16071 +                                               (1 << ZONE_HIGHMEM)))
16072 +#else
16073 +#define real_nr_free_high_pages() (0)
16074 +#define real_nr_free_low_pages() (real_nr_free_pages(all_zones_mask))
16075 +
16076 +/* For eat_memory function */
16077 +#define ZONE_HIGHMEM (MAX_NR_ZONES + 1)
16078 +#endif
16079 +
16080 +long get_header_storage_needed(void);
16081 diff --git a/kernel/power/tuxonice_storage.c b/kernel/power/tuxonice_storage.c
16082 new file mode 100644
16083 index 0000000..5dafc95
16084 --- /dev/null
16085 +++ b/kernel/power/tuxonice_storage.c
16086 @@ -0,0 +1,282 @@
16087 +/*
16088 + * kernel/power/tuxonice_storage.c
16089 + *
16090 + * Copyright (C) 2005-2008 Nigel Cunningham (nigel at tuxonice net)
16091 + *
16092 + * This file is released under the GPLv2.
16093 + *
16094 + * Routines for talking to a userspace program that manages storage.
16095 + *
16096 + * The kernel side:
16097 + * - starts the userspace program;
16098 + * - sends messages telling it when to open and close the connection;
16099 + * - tells it when to quit;
16100 + *
16101 + * The user space side:
16102 + * - passes messages regarding status;
16103 + *
16104 + */
16105 +
16106 +#include <linux/suspend.h>
16107 +#include <linux/freezer.h>
16108 +
16109 +#include "tuxonice_sysfs.h"
16110 +#include "tuxonice_modules.h"
16111 +#include "tuxonice_netlink.h"
16112 +#include "tuxonice_storage.h"
16113 +#include "tuxonice_ui.h"
16114 +
16115 +static struct user_helper_data usm_helper_data;
16116 +static struct toi_module_ops usm_ops;
16117 +static int message_received, usm_prepare_count;
16118 +static int storage_manager_last_action, storage_manager_action;
16119 +
16120 +static int usm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
16121 +{
16122 +       int type;
16123 +       int *data;
16124 +
16125 +       type = nlh->nlmsg_type;
16126 +
16127 +       /* A control message: ignore them */
16128 +       if (type < NETLINK_MSG_BASE)
16129 +               return 0;
16130 +
16131 +       /* Unknown message: reply with EINVAL */
16132 +       if (type >= USM_MSG_MAX)
16133 +               return -EINVAL;
16134 +
16135 +       /* All operations require privileges, even GET */
16136 +       if (security_netlink_recv(skb, CAP_NET_ADMIN))
16137 +               return -EPERM;
16138 +
16139 +       /* Only allow one task to receive NOFREEZE privileges */
16140 +       if (type == NETLINK_MSG_NOFREEZE_ME && usm_helper_data.pid != -1)
16141 +               return -EBUSY;
16142 +
16143 +       data = (int *) NLMSG_DATA(nlh);
16144 +
16145 +       switch (type) {
16146 +       case USM_MSG_SUCCESS:
16147 +       case USM_MSG_FAILED:
16148 +               message_received = type;
16149 +               complete(&usm_helper_data.wait_for_process);
16150 +               break;
16151 +       default:
16152 +               printk(KERN_INFO "Storage manager doesn't recognise "
16153 +                               "message %d.\n", type);
16154 +       }
16155 +
16156 +       return 1;
16157 +}
16158 +
16159 +#ifdef CONFIG_NET
16160 +static int activations;
16161 +
16162 +int toi_activate_storage(int force)
16163 +{
16164 +       int tries = 1;
16165 +
16166 +       if (usm_helper_data.pid == -1 || !usm_ops.enabled)
16167 +               return 0;
16168 +
16169 +       message_received = 0;
16170 +       activations++;
16171 +
16172 +       if (activations > 1 && !force)
16173 +               return 0;
16174 +
16175 +       while ((!message_received || message_received == USM_MSG_FAILED) &&
16176 +                       tries < 2) {
16177 +               toi_prepare_status(DONT_CLEAR_BAR, "Activate storage attempt "
16178 +                               "%d.\n", tries);
16179 +
16180 +               init_completion(&usm_helper_data.wait_for_process);
16181 +
16182 +               toi_send_netlink_message(&usm_helper_data,
16183 +                       USM_MSG_CONNECT,
16184 +                       NULL, 0);
16185 +
16186 +               /* Wait 2 seconds for the userspace process to make contact */
16187 +               wait_for_completion_timeout(&usm_helper_data.wait_for_process,
16188 +                               2*HZ);
16189 +
16190 +               tries++;
16191 +       }
16192 +
16193 +       return 0;
16194 +}
16195 +
16196 +int toi_deactivate_storage(int force)
16197 +{
16198 +       if (usm_helper_data.pid == -1 || !usm_ops.enabled)
16199 +               return 0;
16200 +
16201 +       message_received = 0;
16202 +       activations--;
16203 +
16204 +       if (activations && !force)
16205 +               return 0;
16206 +
16207 +       init_completion(&usm_helper_data.wait_for_process);
16208 +
16209 +       toi_send_netlink_message(&usm_helper_data,
16210 +                       USM_MSG_DISCONNECT,
16211 +                       NULL, 0);
16212 +
16213 +       wait_for_completion_timeout(&usm_helper_data.wait_for_process, 2*HZ);
16214 +
16215 +       if (!message_received || message_received == USM_MSG_FAILED) {
16216 +               printk(KERN_INFO "Returning failure disconnecting storage.\n");
16217 +               return 1;
16218 +       }
16219 +
16220 +       return 0;
16221 +}
16222 +#endif
16223 +
16224 +static void storage_manager_simulate(void)
16225 +{
16226 +       printk(KERN_INFO "--- Storage manager simulate ---\n");
16227 +       toi_prepare_usm();
16228 +       schedule();
16229 +       printk(KERN_INFO "--- Activate storage 1 ---\n");
16230 +       toi_activate_storage(1);
16231 +       schedule();
16232 +       printk(KERN_INFO "--- Deactivate storage 1 ---\n");
16233 +       toi_deactivate_storage(1);
16234 +       schedule();
16235 +       printk(KERN_INFO "--- Cleanup usm ---\n");
16236 +       toi_cleanup_usm();
16237 +       schedule();
16238 +       printk(KERN_INFO "--- Storage manager simulate ends ---\n");
16239 +}
16240 +
16241 +static int usm_storage_needed(void)
16242 +{
16243 +       return strlen(usm_helper_data.program);
16244 +}
16245 +
16246 +static int usm_save_config_info(char *buf)
16247 +{
16248 +       int len = strlen(usm_helper_data.program);
16249 +       memcpy(buf, usm_helper_data.program, len);
16250 +       return len;
16251 +}
16252 +
16253 +static void usm_load_config_info(char *buf, int size)
16254 +{
16255 +       /* Don't load the saved path if one has already been set */
16256 +       if (usm_helper_data.program[0])
16257 +               return;
16258 +
16259 +       memcpy(usm_helper_data.program, buf, size);
16260 +}
16261 +
16262 +static int usm_memory_needed(void)
16263 +{
16264 +       /* ball park figure of 32 pages */
16265 +       return 32 * PAGE_SIZE;
16266 +}
16267 +
16268 +/* toi_prepare_usm
16269 + */
16270 +int toi_prepare_usm(void)
16271 +{
16272 +       usm_prepare_count++;
16273 +
16274 +       if (usm_prepare_count > 1 || !usm_ops.enabled)
16275 +               return 0;
16276 +
16277 +       usm_helper_data.pid = -1;
16278 +
16279 +       if (!*usm_helper_data.program)
16280 +               return 0;
16281 +
16282 +       toi_netlink_setup(&usm_helper_data);
16283 +
16284 +       if (usm_helper_data.pid == -1)
16285 +               printk(KERN_INFO "TuxOnIce Storage Manager wanted, but couldn't"
16286 +                               " start it.\n");
16287 +
16288 +       toi_activate_storage(0);
16289 +
16290 +       return usm_helper_data.pid != -1;
16291 +}
16292 +
16293 +void toi_cleanup_usm(void)
16294 +{
16295 +       usm_prepare_count--;
16296 +
16297 +       if (usm_helper_data.pid > -1 && !usm_prepare_count) {
16298 +               toi_deactivate_storage(0);
16299 +               toi_netlink_close(&usm_helper_data);
16300 +       }
16301 +}
16302 +
16303 +static void storage_manager_activate(void)
16304 +{
16305 +       if (storage_manager_action == storage_manager_last_action)
16306 +               return;
16307 +
16308 +       if (storage_manager_action)
16309 +               toi_prepare_usm();
16310 +       else
16311 +               toi_cleanup_usm();
16312 +
16313 +       storage_manager_last_action = storage_manager_action;
16314 +}
16315 +
16316 +/*
16317 + * User interface specific /sys/power/tuxonice entries.
16318 + */
16319 +
16320 +static struct toi_sysfs_data sysfs_params[] = {
16321 +       SYSFS_NONE("simulate_atomic_copy", storage_manager_simulate),
16322 +       SYSFS_INT("enabled", SYSFS_RW, &usm_ops.enabled, 0, 1, 0, NULL),
16323 +       SYSFS_STRING("program", SYSFS_RW, usm_helper_data.program, 254, 0,
16324 +               NULL),
16325 +       SYSFS_INT("activate_storage", SYSFS_RW , &storage_manager_action, 0, 1,
16326 +                       0, storage_manager_activate)
16327 +};
16328 +
16329 +static struct toi_module_ops usm_ops = {
16330 +       .type                           = MISC_MODULE,
16331 +       .name                           = "usm",
16332 +       .directory                      = "storage_manager",
16333 +       .module                         = THIS_MODULE,
16334 +       .storage_needed                 = usm_storage_needed,
16335 +       .save_config_info               = usm_save_config_info,
16336 +       .load_config_info               = usm_load_config_info,
16337 +       .memory_needed                  = usm_memory_needed,
16338 +
16339 +       .sysfs_data                     = sysfs_params,
16340 +       .num_sysfs_entries              = sizeof(sysfs_params) /
16341 +               sizeof(struct toi_sysfs_data),
16342 +};
16343 +
16344 +/* toi_usm_sysfs_init
16345 + * Description: Boot time initialisation for user interface.
16346 + */
16347 +int toi_usm_init(void)
16348 +{
16349 +       usm_helper_data.nl = NULL;
16350 +       usm_helper_data.program[0] = '\0';
16351 +       usm_helper_data.pid = -1;
16352 +       usm_helper_data.skb_size = 0;
16353 +       usm_helper_data.pool_limit = 6;
16354 +       usm_helper_data.netlink_id = NETLINK_TOI_USM;
16355 +       usm_helper_data.name = "userspace storage manager";
16356 +       usm_helper_data.rcv_msg = usm_user_rcv_msg;
16357 +       usm_helper_data.interface_version = 2;
16358 +       usm_helper_data.must_init = 0;
16359 +       init_completion(&usm_helper_data.wait_for_process);
16360 +
16361 +       return toi_register_module(&usm_ops);
16362 +}
16363 +
16364 +void toi_usm_exit(void)
16365 +{
16366 +       toi_netlink_close_complete(&usm_helper_data);
16367 +       toi_unregister_module(&usm_ops);
16368 +}
16369 diff --git a/kernel/power/tuxonice_storage.h b/kernel/power/tuxonice_storage.h
16370 new file mode 100644
16371 index 0000000..24f8e8a
16372 --- /dev/null
16373 +++ b/kernel/power/tuxonice_storage.h
16374 @@ -0,0 +1,45 @@
16375 +/*
16376 + * kernel/power/tuxonice_storage.h
16377 + *
16378 + * Copyright (C) 2005-2008 Nigel Cunningham (nigel at tuxonice net)
16379 + *
16380 + * This file is released under the GPLv2.
16381 + */
16382 +
16383 +#ifdef CONFIG_NET
16384 +int toi_prepare_usm(void);
16385 +void toi_cleanup_usm(void);
16386 +
16387 +int toi_activate_storage(int force);
16388 +int toi_deactivate_storage(int force);
16389 +extern int toi_usm_init(void);
16390 +extern void toi_usm_exit(void);
16391 +#else
16392 +static inline int toi_usm_init(void) { return 0; }
16393 +static inline void toi_usm_exit(void) { }
16394 +
16395 +static inline int toi_activate_storage(int force)
16396 +{
16397 +       return 0;
16398 +}
16399 +
16400 +static inline int toi_deactivate_storage(int force)
16401 +{
16402 +       return 0;
16403 +}
16404 +
16405 +static inline int toi_prepare_usm(void) { return 0; }
16406 +static inline void toi_cleanup_usm(void) { }
16407 +#endif
16408 +
16409 +enum {
16410 +       USM_MSG_BASE = 0x10,
16411 +
16412 +       /* Kernel -> Userspace */
16413 +       USM_MSG_CONNECT = 0x30,
16414 +       USM_MSG_DISCONNECT = 0x31,
16415 +       USM_MSG_SUCCESS = 0x40,
16416 +       USM_MSG_FAILED = 0x41,
16417 +
16418 +       USM_MSG_MAX,
16419 +};
16420 diff --git a/kernel/power/tuxonice_swap.c b/kernel/power/tuxonice_swap.c
16421 new file mode 100644
16422 index 0000000..3753f5b
16423 --- /dev/null
16424 +++ b/kernel/power/tuxonice_swap.c
16425 @@ -0,0 +1,1334 @@
16426 +/*
16427 + * kernel/power/tuxonice_swap.c
16428 + *
16429 + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net)
16430 + *
16431 + * Distributed under GPLv2.
16432 + *
16433 + * This file encapsulates functions for usage of swap space as a
16434 + * backing store.
16435 + */
16436 +
16437 +#include <linux/suspend.h>
16438 +#include <linux/blkdev.h>
16439 +#include <linux/swapops.h>
16440 +#include <linux/swap.h>
16441 +#include <linux/syscalls.h>
16442 +
16443 +#include "tuxonice.h"
16444 +#include "tuxonice_sysfs.h"
16445 +#include "tuxonice_modules.h"
16446 +#include "tuxonice_io.h"
16447 +#include "tuxonice_ui.h"
16448 +#include "tuxonice_extent.h"
16449 +#include "tuxonice_block_io.h"
16450 +#include "tuxonice_alloc.h"
16451 +#include "tuxonice_builtin.h"
16452 +
16453 +static struct toi_module_ops toi_swapops;
16454 +
16455 +/* --- Struct of pages stored on disk */
16456 +
16457 +struct sig_data {
16458 +       dev_t device;
16459 +       unsigned long sector;
16460 +       int resume_attempted;
16461 +       int orig_sig_type;
16462 +};
16463 +
16464 +union diskpage {
16465 +       union swap_header swh;  /* swh.magic is the only member used */
16466 +       struct sig_data sig_data;
16467 +};
16468 +
16469 +union p_diskpage {
16470 +       union diskpage *pointer;
16471 +       char *ptr;
16472 +       unsigned long address;
16473 +};
16474 +
16475 +enum {
16476 +       IMAGE_SIGNATURE,
16477 +       NO_IMAGE_SIGNATURE,
16478 +       TRIED_RESUME,
16479 +       NO_TRIED_RESUME,
16480 +};
16481 +
16482 +/*
16483 + * Both of these point to versions of the swap header page. original_sig points
16484 + * to the data we read from disk at the start of hibernating or checking whether
16485 + * to resume. no_image is the page stored in the image header, showing what the
16486 + * swap header page looked like at the start of hibernating.
16487 + */
16488 +static char *current_signature_page;
16489 +static char no_image_signature_contents[sizeof(struct sig_data)];
16490 +
16491 +/* Devices used for swap */
16492 +static struct toi_bdev_info devinfo[MAX_SWAPFILES];
16493 +
16494 +/* Extent chains for swap & blocks */
16495 +static struct hibernate_extent_chain swapextents;
16496 +static struct hibernate_extent_chain block_chain[MAX_SWAPFILES];
16497 +
16498 +static dev_t header_dev_t;
16499 +static struct block_device *header_block_device;
16500 +static unsigned long headerblock;
16501 +
16502 +/* For swapfile automatically swapon/off'd. */
16503 +static char swapfilename[32] = "";
16504 +static int toi_swapon_status;
16505 +
16506 +/* Header Page Information */
16507 +static long header_pages_reserved;
16508 +
16509 +/* Swap Pages */
16510 +static long swap_pages_allocated;
16511 +
16512 +/* User Specified Parameters. */
16513 +
16514 +static unsigned long resume_firstblock;
16515 +static dev_t resume_swap_dev_t;
16516 +static struct block_device *resume_block_device;
16517 +
16518 +static struct sysinfo swapinfo;
16519 +
16520 +/* Block devices open. */
16521 +struct bdev_opened {
16522 +       dev_t device;
16523 +       struct block_device *bdev;
16524 +};
16525 +
16526 +/*
16527 + * Entry MAX_SWAPFILES is the resume block device, which may
16528 + * be a swap device not enabled when we hibernate.
16529 + * Entry MAX_SWAPFILES + 1 is the header block device, which
16530 + * is needed before we find out which slot it occupies.
16531 + *
16532 + * We use a separate struct to devInfo so that we can track
16533 + * the bdevs we open, because if we need to abort resuming
16534 + * prior to the atomic restore, they need to be closed, but
16535 + * closing them after sucessfully resuming would be wrong.
16536 + */
16537 +static struct bdev_opened *bdevs_opened[MAX_SWAPFILES + 2];
16538 +
16539 +/**
16540 + * close_bdev: Close a swap bdev.
16541 + *
16542 + * int: The swap entry number to close.
16543 + */
16544 +static void close_bdev(int i)
16545 +{
16546 +       struct bdev_opened *this = bdevs_opened[i];
16547 +
16548 +       if (!this)
16549 +               return;
16550 +
16551 +       blkdev_put(this->bdev, FMODE_READ | FMODE_NDELAY);
16552 +       toi_kfree(8, this, sizeof(*this));
16553 +       bdevs_opened[i] = NULL;
16554 +}
16555 +
16556 +/**
16557 + * close_bdevs: Close all bdevs we opened.
16558 + *
16559 + * Close all bdevs that we opened and reset the related vars.
16560 + */
16561 +static void close_bdevs(void)
16562 +{
16563 +       int i;
16564 +
16565 +       for (i = 0; i < MAX_SWAPFILES + 2; i++)
16566 +               close_bdev(i);
16567 +
16568 +       resume_block_device = NULL;
16569 +       header_block_device = NULL;
16570 +}
16571 +
16572 +/**
16573 + * open_bdev: Open a bdev at resume time.
16574 + *
16575 + * index: The swap index. May be MAX_SWAPFILES for the resume_dev_t
16576 + * (the user can have resume= pointing at a swap partition/file that isn't
16577 + * swapon'd when they hibernate. MAX_SWAPFILES+1 for the first page of the
16578 + * header. It will be from a swap partition that was enabled when we hibernated,
16579 + * but we don't know it's real index until we read that first page.
16580 + * dev_t: The device major/minor.
16581 + * display_errs: Whether to try to do this quietly.
16582 + *
16583 + * We stored a dev_t in the image header. Open the matching device without
16584 + * requiring /dev/<whatever> in most cases and record the details needed
16585 + * to close it later and avoid duplicating work.
16586 + */
16587 +static struct block_device *open_bdev(int index, dev_t device, int display_errs)
16588 +{
16589 +       struct bdev_opened *this;
16590 +       struct block_device *bdev;
16591 +
16592 +       if (bdevs_opened[index]) {
16593 +               if (bdevs_opened[index]->device == device)
16594 +                       return bdevs_opened[index]->bdev;
16595 +
16596 +               close_bdev(index);
16597 +       }
16598 +
16599 +       bdev = toi_open_by_devnum(device, FMODE_READ | FMODE_NDELAY);
16600 +
16601 +       if (IS_ERR(bdev) || !bdev) {
16602 +               if (display_errs)
16603 +                       toi_early_boot_message(1, TOI_CONTINUE_REQ,
16604 +                               "Failed to get access to block device "
16605 +                               "\"%x\" (error %d).\n Maybe you need "
16606 +                               "to run mknod and/or lvmsetup in an "
16607 +                               "initrd/ramfs?", device, bdev);
16608 +               return ERR_PTR(-EINVAL);
16609 +       }
16610 +
16611 +       this = toi_kzalloc(8, sizeof(struct bdev_opened), GFP_KERNEL);
16612 +       if (!this) {
16613 +               printk(KERN_WARNING "TuxOnIce: Failed to allocate memory for "
16614 +                               "opening a bdev.");
16615 +               blkdev_put(bdev, FMODE_READ | FMODE_NDELAY);
16616 +               return ERR_PTR(-ENOMEM);
16617 +       }
16618 +
16619 +       bdevs_opened[index] = this;
16620 +       this->device = device;
16621 +       this->bdev = bdev;
16622 +
16623 +       return bdev;
16624 +}
16625 +
16626 +/**
16627 + * enable_swapfile: Swapon the user specified swapfile prior to hibernating.
16628 + *
16629 + * Activate the given swapfile if it wasn't already enabled. Remember whether
16630 + * we really did swapon it for swapoffing later.
16631 + */
16632 +static void enable_swapfile(void)
16633 +{
16634 +       int activateswapresult = -EINVAL;
16635 +
16636 +       if (swapfilename[0]) {
16637 +               /* Attempt to swap on with maximum priority */
16638 +               activateswapresult = sys_swapon(swapfilename, 0xFFFF);
16639 +               if (activateswapresult && activateswapresult != -EBUSY)
16640 +                       printk("TuxOnIce: The swapfile/partition specified by "
16641 +                               "/sys/power/tuxonice/swap/swapfile "
16642 +                               "(%s) could not be turned on (error %d). "
16643 +                               "Attempting to continue.\n",
16644 +                               swapfilename, activateswapresult);
16645 +               if (!activateswapresult)
16646 +                       toi_swapon_status = 1;
16647 +       }
16648 +}
16649 +
16650 +/**
16651 + * disable_swapfile: Swapoff any file swaponed at the start of the cycle.
16652 + *
16653 + * If we did successfully swapon a file at the start of the cycle, swapoff
16654 + * it now (finishing up).
16655 + */
16656 +static void disable_swapfile(void)
16657 +{
16658 +       if (!toi_swapon_status)
16659 +               return;
16660 +
16661 +       sys_swapoff(swapfilename);
16662 +       toi_swapon_status = 0;
16663 +}
16664 +
16665 +/**
16666 + * try_to_parse_resume_device: Try to parse resume=
16667 + *
16668 + * Any "swap:" has been stripped away and we just have the path to deal with.
16669 + * We attempt to do name_to_dev_t, open and stat the file. Having opened the
16670 + * file, get the struct block_device * to match.
16671 + */
16672 +static int try_to_parse_resume_device(char *commandline, int quiet)
16673 +{
16674 +       struct kstat stat;
16675 +       int error = 0;
16676 +
16677 +       wait_for_device_probe();
16678 +       resume_swap_dev_t = name_to_dev_t(commandline);
16679 +
16680 +       if (!resume_swap_dev_t) {
16681 +               struct file *file = filp_open(commandline,
16682 +                               O_RDONLY|O_LARGEFILE, 0);
16683 +
16684 +               if (!IS_ERR(file) && file) {
16685 +                       vfs_getattr(file->f_vfsmnt, file->f_dentry, &stat);
16686 +                       filp_close(file, NULL);
16687 +               } else
16688 +                       error = vfs_stat(commandline, &stat);
16689 +               if (!error)
16690 +                       resume_swap_dev_t = stat.rdev;
16691 +       }
16692 +
16693 +       if (!resume_swap_dev_t) {
16694 +               if (quiet)
16695 +                       return 1;
16696 +
16697 +               if (test_toi_state(TOI_TRYING_TO_RESUME))
16698 +                       toi_early_boot_message(1, TOI_CONTINUE_REQ,
16699 +                         "Failed to translate \"%s\" into a device id.\n",
16700 +                         commandline);
16701 +               else
16702 +                       printk("TuxOnIce: Can't translate \"%s\" into a device "
16703 +                                       "id yet.\n", commandline);
16704 +               return 1;
16705 +       }
16706 +
16707 +       resume_block_device = open_bdev(MAX_SWAPFILES, resume_swap_dev_t, 0);
16708 +       if (IS_ERR(resume_block_device)) {
16709 +               if (!quiet)
16710 +                       toi_early_boot_message(1, TOI_CONTINUE_REQ,
16711 +                               "Failed to get access to \"%s\", where"
16712 +                               " the swap header should be found.",
16713 +                               commandline);
16714 +               return 1;
16715 +       }
16716 +
16717 +       return 0;
16718 +}
16719 +
16720 +/*
16721 + * If we have read part of the image, we might have filled  memory with
16722 + * data that should be zeroed out.
16723 + */
16724 +static void toi_swap_noresume_reset(void)
16725 +{
16726 +       toi_bio_ops.rw_cleanup(READ);
16727 +       memset((char *) &devinfo, 0, sizeof(devinfo));
16728 +}
16729 +
16730 +static int get_current_signature(void)
16731 +{
16732 +       if (!current_signature_page) {
16733 +               current_signature_page = (char *) toi_get_zeroed_page(38,
16734 +                       TOI_ATOMIC_GFP);
16735 +               if (!current_signature_page)
16736 +                       return -ENOMEM;
16737 +       }
16738 +
16739 +       return toi_bio_ops.bdev_page_io(READ, resume_block_device,
16740 +               resume_firstblock, virt_to_page(current_signature_page));
16741 +}
16742 +
16743 +static int parse_signature(void)
16744 +{
16745 +       union p_diskpage swap_header_page;
16746 +       struct sig_data *sig;
16747 +       int type;
16748 +       char *swap_header;
16749 +       const char *sigs[] = {
16750 +               "SWAP-SPACE", "SWAPSPACE2", "S1SUSP", "S2SUSP", "S1SUSPEND"
16751 +       };
16752 +
16753 +       int result = get_current_signature();
16754 +       if (result)
16755 +               return result;
16756 +
16757 +       swap_header_page = (union p_diskpage) current_signature_page;
16758 +       sig = (struct sig_data *) current_signature_page;
16759 +       swap_header = swap_header_page.pointer->swh.magic.magic;
16760 +
16761 +       for (type = 0; type < 5; type++)
16762 +               if (!memcmp(sigs[type], swap_header, strlen(sigs[type])))
16763 +                       return type;
16764 +
16765 +       if (memcmp(tuxonice_signature, swap_header, sizeof(tuxonice_signature)))
16766 +               return -1;
16767 +
16768 +       header_dev_t = sig->device;
16769 +       clear_toi_state(TOI_RESUMED_BEFORE);
16770 +       if (sig->resume_attempted)
16771 +               set_toi_state(TOI_RESUMED_BEFORE);
16772 +       headerblock = sig->sector;
16773 +
16774 +       return 10;
16775 +}
16776 +
16777 +static void forget_signatures(void)
16778 +{
16779 +       if (current_signature_page) {
16780 +               toi_free_page(38, (unsigned long) current_signature_page);
16781 +               current_signature_page = NULL;
16782 +       }
16783 +}
16784 +
16785 +/*
16786 + * write_modified_signature
16787 + *
16788 + * Write a (potentially) modified signature page without forgetting the
16789 + * original contents.
16790 + */
16791 +static int write_modified_signature(int modification)
16792 +{
16793 +       union p_diskpage swap_header_page;
16794 +       struct swap_info_struct *si;
16795 +       int result;
16796 +       char *orig_sig;
16797 +
16798 +       /* In case we haven't already */
16799 +       result = get_current_signature();
16800 +
16801 +       if (result)
16802 +               return result;
16803 +
16804 +       swap_header_page.address = toi_get_zeroed_page(38, TOI_ATOMIC_GFP);
16805 +
16806 +       if (!swap_header_page.address)
16807 +               return -ENOMEM;
16808 +
16809 +       memcpy(swap_header_page.ptr, current_signature_page, PAGE_SIZE);
16810 +
16811 +       switch (modification) {
16812 +       case IMAGE_SIGNATURE:
16813 +
16814 +               memcpy(no_image_signature_contents, swap_header_page.ptr,
16815 +                               sizeof(no_image_signature_contents));
16816 +
16817 +               /* Get the details of the header first page. */
16818 +               toi_extent_state_goto_start(&toi_writer_posn);
16819 +               toi_bio_ops.forward_one_page(1, 1);
16820 +
16821 +               si = get_swap_info_struct(toi_writer_posn.current_chain);
16822 +
16823 +               /* Prepare the signature */
16824 +               swap_header_page.pointer->sig_data.device = si->bdev->bd_dev;
16825 +               swap_header_page.pointer->sig_data.sector =
16826 +                       toi_writer_posn.current_offset;
16827 +               swap_header_page.pointer->sig_data.resume_attempted = 0;
16828 +               swap_header_page.pointer->sig_data.orig_sig_type =
16829 +                       parse_signature();
16830 +
16831 +               memcpy(swap_header_page.pointer->swh.magic.magic,
16832 +                               tuxonice_signature, sizeof(tuxonice_signature));
16833 +
16834 +               break;
16835 +       case NO_IMAGE_SIGNATURE:
16836 +               if (!swap_header_page.pointer->sig_data.orig_sig_type)
16837 +                       orig_sig = "SWAP-SPACE";
16838 +               else
16839 +                       orig_sig = "SWAPSPACE2";
16840 +
16841 +               memcpy(swap_header_page.pointer->swh.magic.magic, orig_sig, 10);
16842 +               memcpy(swap_header_page.ptr, no_image_signature_contents,
16843 +                               sizeof(no_image_signature_contents));
16844 +               break;
16845 +       case TRIED_RESUME:
16846 +               swap_header_page.pointer->sig_data.resume_attempted = 1;
16847 +               break;
16848 +       case NO_TRIED_RESUME:
16849 +               swap_header_page.pointer->sig_data.resume_attempted = 0;
16850 +               break;
16851 +       }
16852 +
16853 +       result = toi_bio_ops.bdev_page_io(WRITE, resume_block_device,
16854 +               resume_firstblock, virt_to_page(swap_header_page.address));
16855 +
16856 +       memcpy(current_signature_page, swap_header_page.ptr, PAGE_SIZE);
16857 +
16858 +       toi_free_page(38, swap_header_page.address);
16859 +
16860 +       return result;
16861 +}
16862 +
16863 +/*
16864 + * apply_header_reservation
16865 + */
16866 +static int apply_header_reservation(void)
16867 +{
16868 +       int i;
16869 +
16870 +       toi_extent_state_goto_start(&toi_writer_posn);
16871 +
16872 +       for (i = 0; i < header_pages_reserved; i++)
16873 +               if (toi_bio_ops.forward_one_page(1, 0))
16874 +                       return -ENOSPC;
16875 +
16876 +       /* The end of header pages will be the start of pageset 2;
16877 +        * we are now sitting on the first pageset2 page. */
16878 +       toi_extent_state_save(&toi_writer_posn, &toi_writer_posn_save[2]);
16879 +       return 0;
16880 +}
16881 +
16882 +static void toi_swap_reserve_header_space(int request)
16883 +{
16884 +       header_pages_reserved = (long) request;
16885 +}
16886 +
16887 +static void free_block_chains(void)
16888 +{
16889 +       int i;
16890 +
16891 +       for (i = 0; i < MAX_SWAPFILES; i++)
16892 +               if (block_chain[i].first)
16893 +                       toi_put_extent_chain(&block_chain[i]);
16894 +}
16895 +
16896 +static int add_blocks_to_extent_chain(int chain, int start, int end)
16897 +{
16898 +       if (test_action_state(TOI_TEST_BIO))
16899 +               printk(KERN_INFO "Adding extent chain %d %d-%d.\n", chain,
16900 +                               start << devinfo[chain].bmap_shift,
16901 +                               end << devinfo[chain].bmap_shift);
16902 +
16903 +       if (toi_add_to_extent_chain(&block_chain[chain], start, end)) {
16904 +               free_block_chains();
16905 +               return -ENOMEM;
16906 +       }
16907 +
16908 +       return 0;
16909 +}
16910 +
16911 +
16912 +static int get_main_pool_phys_params(void)
16913 +{
16914 +       struct hibernate_extent *extentpointer = NULL;
16915 +       unsigned long address;
16916 +       int extent_min = -1, extent_max = -1, last_chain = -1;
16917 +
16918 +       free_block_chains();
16919 +
16920 +       toi_extent_for_each(&swapextents, extentpointer, address) {
16921 +               swp_entry_t swap_address = (swp_entry_t) { address };
16922 +               pgoff_t offset = swp_offset(swap_address);
16923 +               unsigned swapfilenum = swp_type(swap_address);
16924 +               struct swap_info_struct *sis =
16925 +                       get_swap_info_struct(swapfilenum);
16926 +               sector_t new_sector = map_swap_page(sis, offset);
16927 +
16928 +               if (devinfo[swapfilenum].ignored)
16929 +                       continue;
16930 +
16931 +               if ((new_sector == extent_max + 1) &&
16932 +                   (last_chain == swapfilenum)) {
16933 +                       extent_max++;
16934 +                       continue;
16935 +               }
16936 +
16937 +               if (extent_min > -1 && add_blocks_to_extent_chain(last_chain,
16938 +                                       extent_min, extent_max)) {
16939 +                       printk(KERN_ERR "Out of memory while making block "
16940 +                                       "chains.\n");
16941 +                       return -ENOMEM;
16942 +               }
16943 +
16944 +               extent_min = new_sector;
16945 +               extent_max = new_sector;
16946 +               last_chain = swapfilenum;
16947 +       }
16948 +
16949 +       if (extent_min > -1 && add_blocks_to_extent_chain(last_chain,
16950 +                               extent_min, extent_max)) {
16951 +               printk(KERN_ERR "Out of memory while making block chains.\n");
16952 +               return -ENOMEM;
16953 +       }
16954 +
16955 +       return apply_header_reservation();
16956 +}
16957 +
16958 +static long raw_to_real(long raw)
16959 +{
16960 +       long result;
16961 +
16962 +       result = raw - (raw * (sizeof(unsigned long) + sizeof(int)) +
16963 +               (PAGE_SIZE + sizeof(unsigned long) + sizeof(int) + 1)) /
16964 +               (PAGE_SIZE + sizeof(unsigned long) + sizeof(int));
16965 +
16966 +       return result < 0 ? 0 : result;
16967 +}
16968 +
16969 +static int toi_swap_storage_allocated(void)
16970 +{
16971 +       return (int) raw_to_real(swap_pages_allocated - header_pages_reserved);
16972 +}
16973 +
16974 +/*
16975 + * Like si_swapinfo, except that we don't include ram backed swap (compcache!)
16976 + * and don't need to use the spinlocks (userspace is stopped when this
16977 + * function is called).
16978 + */
16979 +void si_swapinfo_no_compcache(struct sysinfo *val)
16980 +{
16981 +       unsigned int i;
16982 +
16983 +       si_swapinfo(&swapinfo);
16984 +       val->freeswap = 0;
16985 +       val->totalswap = 0;
16986 +
16987 +       for (i = 0; i < MAX_SWAPFILES; i++) {
16988 +               struct swap_info_struct *si = get_swap_info_struct(i);
16989 +               if ((si->flags & SWP_USED) && si->swap_map &&
16990 +                   (si->flags & SWP_WRITEOK) &&
16991 +                   (strncmp(si->bdev->bd_disk->disk_name, "ram", 3))) {
16992 +                       val->totalswap += si->inuse_pages;
16993 +                       val->freeswap += si->pages - si->inuse_pages;
16994 +               }
16995 +       }
16996 +}
16997 +/*
16998 + * We can't just remember the value from allocation time, because other
16999 + * processes might have allocated swap in the mean time.
17000 + */
17001 +static int toi_swap_storage_available(void)
17002 +{
17003 +       si_swapinfo_no_compcache(&swapinfo);
17004 +       return (int) raw_to_real((long) swapinfo.freeswap +
17005 +                       swap_pages_allocated - header_pages_reserved);
17006 +}
17007 +
17008 +static int toi_swap_initialise(int starting_cycle)
17009 +{
17010 +       int result = 0;
17011 +
17012 +       if (!starting_cycle)
17013 +               return 0;
17014 +
17015 +       enable_swapfile();
17016 +
17017 +       if (resume_swap_dev_t && !resume_block_device) {
17018 +               resume_block_device = open_bdev(MAX_SWAPFILES,
17019 +                               resume_swap_dev_t, 1);
17020 +               if (IS_ERR(resume_block_device))
17021 +                       result = 1;
17022 +       }
17023 +
17024 +       return result;
17025 +}
17026 +
17027 +static void toi_swap_cleanup(int ending_cycle)
17028 +{
17029 +       if (ending_cycle)
17030 +               disable_swapfile();
17031 +
17032 +       close_bdevs();
17033 +
17034 +       forget_signatures();
17035 +}
17036 +
17037 +static int toi_swap_release_storage(void)
17038 +{
17039 +       header_pages_reserved = 0;
17040 +       swap_pages_allocated = 0;
17041 +
17042 +       if (swapextents.first) {
17043 +               /* Free swap entries */
17044 +               struct hibernate_extent *extentpointer;
17045 +               unsigned long extentvalue;
17046 +               toi_extent_for_each(&swapextents, extentpointer,
17047 +                               extentvalue)
17048 +                       swap_free((swp_entry_t) { extentvalue });
17049 +
17050 +               toi_put_extent_chain(&swapextents);
17051 +
17052 +               free_block_chains();
17053 +       }
17054 +
17055 +       return 0;
17056 +}
17057 +
17058 +static void free_swap_range(unsigned long min, unsigned long max)
17059 +{
17060 +       int j;
17061 +
17062 +       for (j = min; j <= max; j++)
17063 +               swap_free((swp_entry_t) { j });
17064 +}
17065 +
17066 +/*
17067 + * Round robin allocation (where swap storage has the same priority).
17068 + * could make this very inefficient, so we track extents allocated on
17069 + * a per-swapfile basis.
17070 + */
17071 +static int toi_swap_allocate_storage(int request)
17072 +{
17073 +       int i, result = 0, to_add[MAX_SWAPFILES], pages_to_get, extra_pages,
17074 +           gotten = 0, result2;
17075 +       unsigned long extent_min[MAX_SWAPFILES], extent_max[MAX_SWAPFILES];
17076 +
17077 +       extra_pages = DIV_ROUND_UP(request * (sizeof(unsigned long)
17078 +                              + sizeof(int)), PAGE_SIZE);
17079 +       pages_to_get = request + extra_pages - swapextents.size +
17080 +               header_pages_reserved;
17081 +
17082 +       if (pages_to_get < 1)
17083 +               return apply_header_reservation();
17084 +
17085 +       for (i = 0; i < MAX_SWAPFILES; i++) {
17086 +               struct swap_info_struct *si = get_swap_info_struct(i);
17087 +               to_add[i] = 0;
17088 +               if (!(si->flags & SWP_USED) || !si->swap_map ||
17089 +                   !(si->flags & SWP_WRITEOK))
17090 +                       continue;
17091 +               if (!strncmp(si->bdev->bd_disk->disk_name, "ram", 3)) {
17092 +                       devinfo[i].ignored = 1;
17093 +                       continue;
17094 +               }
17095 +               devinfo[i].ignored = 0;
17096 +               devinfo[i].bdev = si->bdev;
17097 +               devinfo[i].dev_t = si->bdev->bd_dev;
17098 +               devinfo[i].bmap_shift = 3;
17099 +               devinfo[i].blocks_per_page = 1;
17100 +       }
17101 +
17102 +       while (gotten < pages_to_get) {
17103 +               swp_entry_t entry;
17104 +               unsigned long new_value;
17105 +               unsigned swapfilenum;
17106 +
17107 +               entry = get_swap_page();
17108 +               if (!entry.val)
17109 +                       break;
17110 +
17111 +               swapfilenum = swp_type(entry);
17112 +               new_value = entry.val;
17113 +
17114 +               if (!to_add[swapfilenum]) {
17115 +                       to_add[swapfilenum] = 1;
17116 +                       extent_min[swapfilenum] = new_value;
17117 +                       extent_max[swapfilenum] = new_value;
17118 +                       if (!devinfo[swapfilenum].ignored)
17119 +                               gotten++;
17120 +                       continue;
17121 +               }
17122 +
17123 +               if (new_value == extent_max[swapfilenum] + 1) {
17124 +                       extent_max[swapfilenum]++;
17125 +                       if (!devinfo[swapfilenum].ignored)
17126 +                               gotten++;
17127 +                       continue;
17128 +               }
17129 +
17130 +               if (toi_add_to_extent_chain(&swapextents,
17131 +                                       extent_min[swapfilenum],
17132 +                                       extent_max[swapfilenum])) {
17133 +                       printk(KERN_INFO "Failed to allocate extent for "
17134 +                                       "%lu-%lu.\n", extent_min[swapfilenum],
17135 +                                       extent_max[swapfilenum]);
17136 +                       free_swap_range(extent_min[swapfilenum],
17137 +                                       extent_max[swapfilenum]);
17138 +                       swap_free(entry);
17139 +                       if (!devinfo[swapfilenum].ignored)
17140 +                               gotten -= (extent_max[swapfilenum] -
17141 +                                       extent_min[swapfilenum] + 1);
17142 +                       /* Don't try to add again below */
17143 +                       to_add[swapfilenum] = 0;
17144 +                       break;
17145 +               } else {
17146 +                       extent_min[swapfilenum] = new_value;
17147 +                       extent_max[swapfilenum] = new_value;
17148 +                       if (!devinfo[swapfilenum].ignored)
17149 +                               gotten++;
17150 +               }
17151 +       }
17152 +
17153 +       for (i = 0; i < MAX_SWAPFILES; i++) {
17154 +               int this_result;
17155 +
17156 +               /* Anything to do for this swap entry? */
17157 +               if (!to_add[i])
17158 +                       continue;
17159 +
17160 +               this_result = toi_add_to_extent_chain(&swapextents,
17161 +                               extent_min[i], extent_max[i]);
17162 +
17163 +               /* Added okay? */
17164 +               if (!this_result)
17165 +                       continue;
17166 +
17167 +               /* 
17168 +                * Nope. Remember an error occured, free the swap and subtract
17169 +                * from the amount of swap allocated.
17170 +                */
17171 +               result = this_result;
17172 +
17173 +               free_swap_range(extent_min[i], extent_max[i]);
17174 +               if (!devinfo[i].ignored)
17175 +                       gotten -= (extent_max[i] - extent_min[i] + 1);
17176 +       }
17177 +
17178 +       if (gotten < pages_to_get) {
17179 +               printk("Got fewer pages than required "
17180 +                               "(%d wanted, %d gotten).\n",
17181 +                               pages_to_get, gotten);
17182 +               result = -ENOSPC;
17183 +       }
17184 +
17185 +       swap_pages_allocated += (long) gotten;
17186 +
17187 +       result2 = get_main_pool_phys_params();
17188 +
17189 +       return result ? result : result2;
17190 +}
17191 +
17192 +static int toi_swap_write_header_init(void)
17193 +{
17194 +       int i, result;
17195 +       struct swap_info_struct *si;
17196 +
17197 +       toi_bio_ops.rw_init(WRITE, 0);
17198 +       toi_writer_buffer_posn = 0;
17199 +
17200 +       /* Info needed to bootstrap goes at the start of the header.
17201 +        * First we save the positions and devinfo, including the number
17202 +        * of header pages. Then we save the structs containing data needed
17203 +        * for reading the header pages back.
17204 +        * Note that even if header pages take more than one page, when we
17205 +        * read back the info, we will have restored the location of the
17206 +        * next header page by the time we go to use it.
17207 +        */
17208 +
17209 +       result = toi_bio_ops.rw_header_chunk(WRITE, &toi_swapops,
17210 +                       (char *) &no_image_signature_contents,
17211 +                       sizeof(struct sig_data));
17212 +
17213 +       if (result)
17214 +               return result;
17215 +
17216 +       /* Forward one page will be done prior to the read */
17217 +       for (i = 0; i < MAX_SWAPFILES; i++) {
17218 +               si = get_swap_info_struct(i);
17219 +               if (si->flags & SWP_USED && si->swap_map &&
17220 +                   si->flags & SWP_WRITEOK)
17221 +                       devinfo[i].dev_t = si->bdev->bd_dev;
17222 +               else
17223 +                       devinfo[i].dev_t = (dev_t) 0;
17224 +       }
17225 +
17226 +       result = toi_bio_ops.rw_header_chunk(WRITE, &toi_swapops,
17227 +                       (char *) &toi_writer_posn_save,
17228 +                       sizeof(toi_writer_posn_save));
17229 +
17230 +       if (result)
17231 +               return result;
17232 +
17233 +       result = toi_bio_ops.rw_header_chunk(WRITE, &toi_swapops,
17234 +                       (char *) &devinfo, sizeof(devinfo));
17235 +
17236 +       if (result)
17237 +               return result;
17238 +
17239 +       for (i = 0; i < MAX_SWAPFILES; i++)
17240 +               toi_serialise_extent_chain(&toi_swapops, &block_chain[i]);
17241 +
17242 +       return 0;
17243 +}
17244 +
17245 +static int toi_swap_write_header_cleanup(void)
17246 +{
17247 +       int result = toi_bio_ops.write_header_chunk_finish();
17248 +
17249 +       /* Set signature to save we have an image */
17250 +       if (!result)
17251 +               result = write_modified_signature(IMAGE_SIGNATURE);
17252 +
17253 +       return result;
17254 +}
17255 +
17256 +/* ------------------------- HEADER READING ------------------------- */
17257 +
17258 +/*
17259 + * read_header_init()
17260 + *
17261 + * Description:
17262 + * 1. Attempt to read the device specified with resume=.
17263 + * 2. Check the contents of the swap header for our signature.
17264 + * 3. Warn, ignore, reset and/or continue as appropriate.
17265 + * 4. If continuing, read the toi_swap configuration section
17266 + *    of the header and set up block device info so we can read
17267 + *    the rest of the header & image.
17268 + *
17269 + * Returns:
17270 + * May not return if user choose to reboot at a warning.
17271 + * -EINVAL if cannot resume at this time. Booting should continue
17272 + * normally.
17273 + */
17274 +
17275 +static int toi_swap_read_header_init(void)
17276 +{
17277 +       int i, result = 0;
17278 +       toi_writer_buffer_posn = 0;
17279 +
17280 +       if (!header_dev_t) {
17281 +               printk(KERN_INFO "read_header_init called when we haven't "
17282 +                               "verified there is an image!\n");
17283 +               return -EINVAL;
17284 +       }
17285 +
17286 +       /*
17287 +        * If the header is not on the resume_swap_dev_t, get the resume device
17288 +        * first.
17289 +        */
17290 +       if (header_dev_t != resume_swap_dev_t) {
17291 +               header_block_device = open_bdev(MAX_SWAPFILES + 1,
17292 +                               header_dev_t, 1);
17293 +
17294 +               if (IS_ERR(header_block_device))
17295 +                       return PTR_ERR(header_block_device);
17296 +       } else
17297 +               header_block_device = resume_block_device;
17298 +
17299 +       toi_bio_ops.read_header_init();
17300 +
17301 +       /*
17302 +        * Read toi_swap configuration.
17303 +        * Headerblock size taken into account already.
17304 +        */
17305 +       result = toi_bio_ops.bdev_page_io(READ, header_block_device,
17306 +                       headerblock << 3,
17307 +                       virt_to_page((unsigned long) toi_writer_buffer));
17308 +       if (result)
17309 +               return result;
17310 +
17311 +       memcpy(&no_image_signature_contents, toi_writer_buffer,
17312 +                       sizeof(no_image_signature_contents));
17313 +
17314 +       toi_writer_buffer_posn = sizeof(no_image_signature_contents);
17315 +
17316 +       memcpy(&toi_writer_posn_save, toi_writer_buffer +
17317 +                       toi_writer_buffer_posn, sizeof(toi_writer_posn_save));
17318 +
17319 +       toi_writer_buffer_posn += sizeof(toi_writer_posn_save);
17320 +
17321 +       memcpy(&devinfo, toi_writer_buffer + toi_writer_buffer_posn,
17322 +                       sizeof(devinfo));
17323 +
17324 +       toi_writer_buffer_posn += sizeof(devinfo);
17325 +
17326 +       /* Restore device info */
17327 +       for (i = 0; i < MAX_SWAPFILES; i++) {
17328 +               dev_t thisdevice = devinfo[i].dev_t;
17329 +               struct block_device *bdev_result;
17330 +
17331 +               devinfo[i].bdev = NULL;
17332 +
17333 +               if (!thisdevice || devinfo[i].ignored)
17334 +                       continue;
17335 +
17336 +               if (thisdevice == resume_swap_dev_t) {
17337 +                       devinfo[i].bdev = resume_block_device;
17338 +                       continue;
17339 +               }
17340 +
17341 +               if (thisdevice == header_dev_t) {
17342 +                       devinfo[i].bdev = header_block_device;
17343 +                       continue;
17344 +               }
17345 +
17346 +               bdev_result = open_bdev(i, thisdevice, 1);
17347 +               if (IS_ERR(bdev_result))
17348 +                       return PTR_ERR(bdev_result);
17349 +               devinfo[i].bdev = bdevs_opened[i]->bdev;
17350 +       }
17351 +
17352 +       toi_extent_state_goto_start(&toi_writer_posn);
17353 +       toi_bio_ops.set_extra_page_forward();
17354 +
17355 +       for (i = 0; i < MAX_SWAPFILES && !result; i++)
17356 +               result = toi_load_extent_chain(&block_chain[i]);
17357 +
17358 +       return result;
17359 +}
17360 +
17361 +static int toi_swap_read_header_cleanup(void)
17362 +{
17363 +       toi_bio_ops.rw_cleanup(READ);
17364 +       return 0;
17365 +}
17366 +
17367 +/*
17368 + * workspace_size
17369 + *
17370 + * Description:
17371 + * Returns the number of bytes of RAM needed for this
17372 + * code to do its work. (Used when calculating whether
17373 + * we have enough memory to be able to hibernate & resume).
17374 + *
17375 + */
17376 +static int toi_swap_memory_needed(void)
17377 +{
17378 +       return 1;
17379 +}
17380 +
17381 +/*
17382 + * Print debug info
17383 + *
17384 + * Description:
17385 + */
17386 +static int toi_swap_print_debug_stats(char *buffer, int size)
17387 +{
17388 +       int len = 0;
17389 +       struct sysinfo sysinfo;
17390 +
17391 +       if (toiActiveAllocator != &toi_swapops) {
17392 +               len = scnprintf(buffer, size,
17393 +                               "- SwapAllocator inactive.\n");
17394 +               return len;
17395 +       }
17396 +
17397 +       len = scnprintf(buffer, size, "- SwapAllocator active.\n");
17398 +       if (swapfilename[0])
17399 +               len += scnprintf(buffer+len, size-len,
17400 +                       "  Attempting to automatically swapon: %s.\n",
17401 +                       swapfilename);
17402 +
17403 +       si_swapinfo_no_compcache(&sysinfo);
17404 +
17405 +       len += scnprintf(buffer+len, size-len,
17406 +                       "  Swap available for image: %d pages.\n",
17407 +                       (int) sysinfo.freeswap + toi_swap_storage_allocated());
17408 +
17409 +       return len;
17410 +}
17411 +
17412 +/*
17413 + * Storage needed
17414 + *
17415 + * Returns amount of space in the swap header required
17416 + * for the toi_swap's data. This ignores the links between
17417 + * pages, which we factor in when allocating the space.
17418 + *
17419 + * We ensure the space is allocated, but actually save the
17420 + * data from write_header_init and therefore don't also define a
17421 + * save_config_info routine.
17422 + */
17423 +static int toi_swap_storage_needed(void)
17424 +{
17425 +       int i, result;
17426 +       result = sizeof(struct sig_data) + sizeof(toi_writer_posn_save) +
17427 +               sizeof(devinfo);
17428 +
17429 +       for (i = 0; i < MAX_SWAPFILES; i++) {
17430 +               result += 2 * sizeof(int);
17431 +               result += (2 * sizeof(unsigned long) *
17432 +                       block_chain[i].num_extents);
17433 +       }
17434 +
17435 +       return result;
17436 +}
17437 +
17438 +/*
17439 + * Image_exists
17440 + *
17441 + * Returns -1 if don't know, otherwise 0 (no) or 1 (yes).
17442 + */
17443 +static int toi_swap_image_exists(int quiet)
17444 +{
17445 +       int signature_found;
17446 +
17447 +       if (!resume_swap_dev_t) {
17448 +               if (!quiet)
17449 +                       printk(KERN_INFO "Not even trying to read header "
17450 +                               "because resume_swap_dev_t is not set.\n");
17451 +               return -1;
17452 +       }
17453 +
17454 +       if (!resume_block_device) {
17455 +               resume_block_device = open_bdev(MAX_SWAPFILES,
17456 +                               resume_swap_dev_t, 1);
17457 +               if (IS_ERR(resume_block_device)) {
17458 +                       if (!quiet)
17459 +                               printk(KERN_INFO "Failed to open resume dev_t"
17460 +                                               " (%x).\n", resume_swap_dev_t);
17461 +                       return -1;
17462 +               }
17463 +       }
17464 +
17465 +       signature_found = parse_signature();
17466 +
17467 +       switch (signature_found) {
17468 +       case -ENOMEM:
17469 +               return -1;
17470 +       case -1:
17471 +               if (!quiet)
17472 +                       printk(KERN_ERR "TuxOnIce: Unable to find a signature."
17473 +                               " Could you have moved a swap file?\n");
17474 +               return -1;
17475 +       case 0:
17476 +       case 1:
17477 +               if (!quiet)
17478 +                       printk(KERN_INFO "TuxOnIce: Normal swapspace found.\n");
17479 +               return 0;
17480 +       case 2:
17481 +       case 3:
17482 +       case 4:
17483 +               if (!quiet)
17484 +                       printk(KERN_INFO "TuxOnIce: Detected another "
17485 +                               "implementation's signature.\n");
17486 +               return 0;
17487 +       case 10:
17488 +               if (!quiet)
17489 +                       printk(KERN_INFO "TuxOnIce: Detected TuxOnIce binary "
17490 +                               "signature.\n");
17491 +               return 1;
17492 +       }
17493 +
17494 +       printk("Unrecognised parse_signature result (%d).\n", signature_found);
17495 +       return 0;
17496 +}
17497 +
17498 +/* toi_swap_remove_image
17499 + *
17500 + */
17501 +static int toi_swap_remove_image(void)
17502 +{
17503 +       /*
17504 +        * If nr_hibernates == 0, we must be booting, so no swap pages
17505 +        * will be recorded as used yet.
17506 +        */
17507 +
17508 +       if (nr_hibernates)
17509 +               toi_swap_release_storage();
17510 +
17511 +       /*
17512 +        * We don't do a sanity check here: we want to restore the swap
17513 +        * whatever version of kernel made the hibernate image.
17514 +        *
17515 +        * We need to write swap, but swap may not be enabled so
17516 +        * we write the device directly
17517 +        *
17518 +        * If we don't have an current_signature_page, we didn't
17519 +        * read an image header, so don't change anything.
17520 +        */
17521 +
17522 +       return toi_swap_image_exists(1) ?
17523 +               write_modified_signature(NO_IMAGE_SIGNATURE) : 0;
17524 +}
17525 +
17526 +/*
17527 + * Mark resume attempted.
17528 + *
17529 + * Record that we tried to resume from this image. We have already read the
17530 + * signature in. We just need to write the modified version.
17531 + */
17532 +static int toi_swap_mark_resume_attempted(int mark)
17533 +{
17534 +       if (!resume_swap_dev_t) {
17535 +               printk(KERN_INFO "Not even trying to record attempt at resuming"
17536 +                               " because resume_swap_dev_t is not set.\n");
17537 +               return -ENODEV;
17538 +       }
17539 +
17540 +       return write_modified_signature(mark ? TRIED_RESUME : NO_TRIED_RESUME);
17541 +}
17542 +
17543 +/*
17544 + * Parse Image Location
17545 + *
17546 + * Attempt to parse a resume= parameter.
17547 + * Swap Writer accepts:
17548 + * resume=swap:DEVNAME[:FIRSTBLOCK][@BLOCKSIZE]
17549 + *
17550 + * Where:
17551 + * DEVNAME is convertable to a dev_t by name_to_dev_t
17552 + * FIRSTBLOCK is the location of the first block in the swap file
17553 + * (specifying for a swap partition is nonsensical but not prohibited).
17554 + * Data is validated by attempting to read a swap header from the
17555 + * location given. Failure will result in toi_swap refusing to
17556 + * save an image, and a reboot with correct parameters will be
17557 + * necessary.
17558 + */
17559 +static int toi_swap_parse_sig_location(char *commandline,
17560 +               int only_allocator, int quiet)
17561 +{
17562 +       char *thischar, *devstart, *colon = NULL;
17563 +       int signature_found, result = -EINVAL, temp_result = 0;
17564 +
17565 +       if (strncmp(commandline, "swap:", 5)) {
17566 +               /*
17567 +                * Failing swap:, we'll take a simple
17568 +                * resume=/dev/hda2, but fall through to
17569 +                * other allocators if /dev/ isn't matched.
17570 +                */
17571 +               if (strncmp(commandline, "/dev/", 5))
17572 +                       return 1;
17573 +       } else
17574 +               commandline += 5;
17575 +
17576 +       devstart = commandline;
17577 +       thischar = commandline;
17578 +       while ((*thischar != ':') && (*thischar != '@') &&
17579 +               ((thischar - commandline) < 250) && (*thischar))
17580 +               thischar++;
17581 +
17582 +       if (*thischar == ':') {
17583 +               colon = thischar;
17584 +               *colon = 0;
17585 +               thischar++;
17586 +       }
17587 +
17588 +       while ((thischar - commandline) < 250 && *thischar)
17589 +               thischar++;
17590 +
17591 +       if (colon) {
17592 +               unsigned long block;
17593 +               temp_result = strict_strtoul(colon + 1, 0, &block);
17594 +               if (!temp_result)
17595 +                       resume_firstblock = (int) block;
17596 +       } else
17597 +               resume_firstblock = 0;
17598 +
17599 +       clear_toi_state(TOI_CAN_HIBERNATE);
17600 +       clear_toi_state(TOI_CAN_RESUME);
17601 +
17602 +       if (!temp_result)
17603 +               temp_result = try_to_parse_resume_device(devstart, quiet);
17604 +
17605 +       if (colon)
17606 +               *colon = ':';
17607 +
17608 +       if (temp_result)
17609 +               return -EINVAL;
17610 +
17611 +       signature_found = toi_swap_image_exists(quiet);
17612 +
17613 +       if (signature_found != -1) {
17614 +               result = 0;
17615 +
17616 +               toi_bio_ops.set_devinfo(devinfo);
17617 +               toi_writer_posn.chains = &block_chain[0];
17618 +               toi_writer_posn.num_chains = MAX_SWAPFILES;
17619 +               set_toi_state(TOI_CAN_HIBERNATE);
17620 +               set_toi_state(TOI_CAN_RESUME);
17621 +       } else
17622 +               if (!quiet)
17623 +                       printk(KERN_ERR "TuxOnIce: SwapAllocator: No swap "
17624 +                               "signature found at %s.\n", devstart);
17625 +       return result;
17626 +}
17627 +
17628 +static int header_locations_read_sysfs(const char *page, int count)
17629 +{
17630 +       int i, printedpartitionsmessage = 0, len = 0, haveswap = 0;
17631 +       struct inode *swapf = NULL;
17632 +       int zone;
17633 +       char *path_page = (char *) toi_get_free_page(10, GFP_KERNEL);
17634 +       char *path, *output = (char *) page;
17635 +       int path_len;
17636 +
17637 +       if (!page)
17638 +               return 0;
17639 +
17640 +       for (i = 0; i < MAX_SWAPFILES; i++) {
17641 +               struct swap_info_struct *si =  get_swap_info_struct(i);
17642 +
17643 +               if ((!si->flags & SWP_USED) || si->swap_map ||
17644 +                   !(si->flags & SWP_WRITEOK))
17645 +                       continue;
17646 +
17647 +               if (S_ISBLK(si->swap_file->f_mapping->host->i_mode)) {
17648 +                       haveswap = 1;
17649 +                       if (!printedpartitionsmessage) {
17650 +                               len += sprintf(output + len,
17651 +                                       "For swap partitions, simply use the "
17652 +                                       "format: resume=swap:/dev/hda1.\n");
17653 +                               printedpartitionsmessage = 1;
17654 +                       }
17655 +               } else {
17656 +                       path_len = 0;
17657 +
17658 +                       path = d_path(&si->swap_file->f_path, path_page,
17659 +                                       PAGE_SIZE);
17660 +                       path_len = snprintf(path_page, 31, "%s", path);
17661 +
17662 +                       haveswap = 1;
17663 +                       swapf = si->swap_file->f_mapping->host;
17664 +                       zone = bmap(swapf, 0);
17665 +                       if (!zone) {
17666 +                               len += sprintf(output + len,
17667 +                                       "Swapfile %s has been corrupted. Reuse"
17668 +                                       " mkswap on it and try again.\n",
17669 +                                       path_page);
17670 +                       } else {
17671 +                               char name_buffer[255];
17672 +                               len += sprintf(output + len,
17673 +                                       "For swapfile `%s`,"
17674 +                                       " use resume=swap:/dev/%s:0x%x.\n",
17675 +                                       path_page,
17676 +                                       bdevname(si->bdev, name_buffer),
17677 +                                       zone << (swapf->i_blkbits - 9));
17678 +                       }
17679 +               }
17680 +       }
17681 +
17682 +       if (!haveswap)
17683 +               len = sprintf(output, "You need to turn on swap partitions "
17684 +                               "before examining this file.\n");
17685 +
17686 +       toi_free_page(10, (unsigned long) path_page);
17687 +       return len;
17688 +}
17689 +
17690 +static struct toi_sysfs_data sysfs_params[] = {
17691 +       SYSFS_STRING("swapfilename", SYSFS_RW, swapfilename, 255, 0, NULL),
17692 +       SYSFS_CUSTOM("headerlocations", SYSFS_READONLY,
17693 +                       header_locations_read_sysfs, NULL, 0, NULL),
17694 +       SYSFS_INT("enabled", SYSFS_RW, &toi_swapops.enabled, 0, 1, 0,
17695 +                       attempt_to_parse_resume_device2),
17696 +};
17697 +
17698 +static struct toi_module_ops toi_swapops = {
17699 +       .type                                   = WRITER_MODULE,
17700 +       .name                                   = "swap storage",
17701 +       .directory                              = "swap",
17702 +       .module                                 = THIS_MODULE,
17703 +       .memory_needed                          = toi_swap_memory_needed,
17704 +       .print_debug_info                       = toi_swap_print_debug_stats,
17705 +       .storage_needed                         = toi_swap_storage_needed,
17706 +       .initialise                             = toi_swap_initialise,
17707 +       .cleanup                                = toi_swap_cleanup,
17708 +
17709 +       .noresume_reset         = toi_swap_noresume_reset,
17710 +       .storage_available      = toi_swap_storage_available,
17711 +       .storage_allocated      = toi_swap_storage_allocated,
17712 +       .reserve_header_space   = toi_swap_reserve_header_space,
17713 +       .allocate_storage       = toi_swap_allocate_storage,
17714 +       .image_exists           = toi_swap_image_exists,
17715 +       .mark_resume_attempted  = toi_swap_mark_resume_attempted,
17716 +       .write_header_init      = toi_swap_write_header_init,
17717 +       .write_header_cleanup   = toi_swap_write_header_cleanup,
17718 +       .read_header_init       = toi_swap_read_header_init,
17719 +       .read_header_cleanup    = toi_swap_read_header_cleanup,
17720 +       .remove_image           = toi_swap_remove_image,
17721 +       .parse_sig_location     = toi_swap_parse_sig_location,
17722 +
17723 +       .sysfs_data             = sysfs_params,
17724 +       .num_sysfs_entries      = sizeof(sysfs_params) /
17725 +               sizeof(struct toi_sysfs_data),
17726 +};
17727 +
17728 +/* ---- Registration ---- */
17729 +static __init int toi_swap_load(void)
17730 +{
17731 +       toi_swapops.rw_init = toi_bio_ops.rw_init;
17732 +       toi_swapops.rw_cleanup = toi_bio_ops.rw_cleanup;
17733 +       toi_swapops.read_page = toi_bio_ops.read_page;
17734 +       toi_swapops.write_page = toi_bio_ops.write_page;
17735 +       toi_swapops.rw_header_chunk = toi_bio_ops.rw_header_chunk;
17736 +       toi_swapops.rw_header_chunk_noreadahead =
17737 +               toi_bio_ops.rw_header_chunk_noreadahead;
17738 +       toi_swapops.io_flusher = toi_bio_ops.io_flusher;
17739 +       toi_swapops.update_throughput_throttle =
17740 +               toi_bio_ops.update_throughput_throttle;
17741 +       toi_swapops.finish_all_io = toi_bio_ops.finish_all_io;
17742 +
17743 +       return toi_register_module(&toi_swapops);
17744 +}
17745 +
17746 +#ifdef MODULE
17747 +static __exit void toi_swap_unload(void)
17748 +{
17749 +       toi_unregister_module(&toi_swapops);
17750 +}
17751 +
17752 +module_init(toi_swap_load);
17753 +module_exit(toi_swap_unload);
17754 +MODULE_LICENSE("GPL");
17755 +MODULE_AUTHOR("Nigel Cunningham");
17756 +MODULE_DESCRIPTION("TuxOnIce SwapAllocator");
17757 +#else
17758 +late_initcall(toi_swap_load);
17759 +#endif
17760 diff --git a/kernel/power/tuxonice_sysfs.c b/kernel/power/tuxonice_sysfs.c
17761 new file mode 100644
17762 index 0000000..3b815ab
17763 --- /dev/null
17764 +++ b/kernel/power/tuxonice_sysfs.c
17765 @@ -0,0 +1,335 @@
17766 +/*
17767 + * kernel/power/tuxonice_sysfs.c
17768 + *
17769 + * Copyright (C) 2002-2008 Nigel Cunningham (nigel at tuxonice net)
17770 + *
17771 + * This file is released under the GPLv2.
17772 + *
17773 + * This file contains support for sysfs entries for tuning TuxOnIce.
17774 + *
17775 + * We have a generic handler that deals with the most common cases, and
17776 + * hooks for special handlers to use.
17777 + */
17778 +
17779 +#include <linux/suspend.h>
17780 +
17781 +#include "tuxonice_sysfs.h"
17782 +#include "tuxonice.h"
17783 +#include "tuxonice_storage.h"
17784 +#include "tuxonice_alloc.h"
17785 +
17786 +static int toi_sysfs_initialised;
17787 +
17788 +static void toi_initialise_sysfs(void);
17789 +
17790 +static struct toi_sysfs_data sysfs_params[];
17791 +
17792 +#define to_sysfs_data(_attr) container_of(_attr, struct toi_sysfs_data, attr)
17793 +
17794 +static void toi_main_wrapper(void)
17795 +{
17796 +       toi_try_hibernate();
17797 +}
17798 +
17799 +static ssize_t toi_attr_show(struct kobject *kobj, struct attribute *attr,
17800 +                             char *page)
17801 +{
17802 +       struct toi_sysfs_data *sysfs_data = to_sysfs_data(attr);
17803 +       int len = 0;
17804 +       int full_prep = sysfs_data->flags & SYSFS_NEEDS_SM_FOR_READ;
17805 +
17806 +       if (full_prep && toi_start_anything(0))
17807 +               return -EBUSY;
17808 +
17809 +       if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_READ)
17810 +               toi_prepare_usm();
17811 +
17812 +       switch (sysfs_data->type) {
17813 +       case TOI_SYSFS_DATA_CUSTOM:
17814 +               len = (sysfs_data->data.special.read_sysfs) ?
17815 +                       (sysfs_data->data.special.read_sysfs)(page, PAGE_SIZE)
17816 +                       : 0;
17817 +               break;
17818 +       case TOI_SYSFS_DATA_BIT:
17819 +               len = sprintf(page, "%d\n",
17820 +                       -test_bit(sysfs_data->data.bit.bit,
17821 +                               sysfs_data->data.bit.bit_vector));
17822 +               break;
17823 +       case TOI_SYSFS_DATA_INTEGER:
17824 +               len = sprintf(page, "%d\n",
17825 +                       *(sysfs_data->data.integer.variable));
17826 +               break;
17827 +       case TOI_SYSFS_DATA_LONG:
17828 +               len = sprintf(page, "%ld\n",
17829 +                       *(sysfs_data->data.a_long.variable));
17830 +               break;
17831 +       case TOI_SYSFS_DATA_UL:
17832 +               len = sprintf(page, "%lu\n",
17833 +                       *(sysfs_data->data.ul.variable));
17834 +               break;
17835 +       case TOI_SYSFS_DATA_STRING:
17836 +               len = sprintf(page, "%s\n",
17837 +                       sysfs_data->data.string.variable);
17838 +               break;
17839 +       }
17840 +
17841 +       if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_READ)
17842 +               toi_cleanup_usm();
17843 +
17844 +       if (full_prep)
17845 +               toi_finish_anything(0);
17846 +
17847 +       return len;
17848 +}
17849 +
17850 +#define BOUND(_variable, _type) do { \
17851 +       if (*_variable < sysfs_data->data._type.minimum) \
17852 +               *_variable = sysfs_data->data._type.minimum; \
17853 +       else if (*_variable > sysfs_data->data._type.maximum) \
17854 +               *_variable = sysfs_data->data._type.maximum; \
17855 +} while (0)
17856 +
17857 +static ssize_t toi_attr_store(struct kobject *kobj, struct attribute *attr,
17858 +               const char *my_buf, size_t count)
17859 +{
17860 +       int assigned_temp_buffer = 0, result = count;
17861 +       struct toi_sysfs_data *sysfs_data = to_sysfs_data(attr);
17862 +
17863 +       if (toi_start_anything((sysfs_data->flags & SYSFS_HIBERNATE_OR_RESUME)))
17864 +               return -EBUSY;
17865 +
17866 +       ((char *) my_buf)[count] = 0;
17867 +
17868 +       if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_WRITE)
17869 +               toi_prepare_usm();
17870 +
17871 +       switch (sysfs_data->type) {
17872 +       case TOI_SYSFS_DATA_CUSTOM:
17873 +               if (sysfs_data->data.special.write_sysfs)
17874 +                       result = (sysfs_data->data.special.write_sysfs)(my_buf,
17875 +                                       count);
17876 +               break;
17877 +       case TOI_SYSFS_DATA_BIT:
17878 +               {
17879 +               unsigned long value;
17880 +               result = strict_strtoul(my_buf, 0, &value);
17881 +               if (result)
17882 +                       break;
17883 +               if (value)
17884 +                       set_bit(sysfs_data->data.bit.bit,
17885 +                               (sysfs_data->data.bit.bit_vector));
17886 +               else
17887 +                       clear_bit(sysfs_data->data.bit.bit,
17888 +                               (sysfs_data->data.bit.bit_vector));
17889 +               }
17890 +               break;
17891 +       case TOI_SYSFS_DATA_INTEGER:
17892 +               {
17893 +                       long temp;
17894 +                       result = strict_strtol(my_buf, 0, &temp);
17895 +                       if (result)
17896 +                               break;
17897 +                       *(sysfs_data->data.integer.variable) = (int) temp;
17898 +                       BOUND(sysfs_data->data.integer.variable, integer);
17899 +                       break;
17900 +               }
17901 +       case TOI_SYSFS_DATA_LONG:
17902 +               {
17903 +                       long *variable =
17904 +                               sysfs_data->data.a_long.variable;
17905 +                       result = strict_strtol(my_buf, 0, variable);
17906 +                       if (result)
17907 +                               break;
17908 +                       BOUND(variable, a_long);
17909 +                       break;
17910 +               }
17911 +       case TOI_SYSFS_DATA_UL:
17912 +               {
17913 +                       unsigned long *variable =
17914 +                               sysfs_data->data.ul.variable;
17915 +                       result = strict_strtoul(my_buf, 0, variable);
17916 +                       if (result)
17917 +                               break;
17918 +                       BOUND(variable, ul);
17919 +                       break;
17920 +               }
17921 +               break;
17922 +       case TOI_SYSFS_DATA_STRING:
17923 +               {
17924 +                       int copy_len = count;
17925 +                       char *variable =
17926 +                               sysfs_data->data.string.variable;
17927 +
17928 +                       if (sysfs_data->data.string.max_length &&
17929 +                           (copy_len > sysfs_data->data.string.max_length))
17930 +                               copy_len = sysfs_data->data.string.max_length;
17931 +
17932 +                       if (!variable) {
17933 +                               variable = (char *) toi_get_zeroed_page(31,
17934 +                                               TOI_ATOMIC_GFP);
17935 +                               sysfs_data->data.string.variable = variable;
17936 +                               assigned_temp_buffer = 1;
17937 +                       }
17938 +                       strncpy(variable, my_buf, copy_len);
17939 +                       if (copy_len && my_buf[copy_len - 1] == '\n')
17940 +                               variable[count - 1] = 0;
17941 +                       variable[count] = 0;
17942 +               }
17943 +               break;
17944 +       }
17945 +
17946 +       if (!result)
17947 +               result = count;
17948 +
17949 +       /* Side effect routine? */
17950 +       if (result == count && sysfs_data->write_side_effect)
17951 +               sysfs_data->write_side_effect();
17952 +
17953 +       /* Free temporary buffers */
17954 +       if (assigned_temp_buffer) {
17955 +               toi_free_page(31,
17956 +                       (unsigned long) sysfs_data->data.string.variable);
17957 +               sysfs_data->data.string.variable = NULL;
17958 +       }
17959 +
17960 +       if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_WRITE)
17961 +               toi_cleanup_usm();
17962 +
17963 +       toi_finish_anything(sysfs_data->flags & SYSFS_HIBERNATE_OR_RESUME);
17964 +
17965 +       return result;
17966 +}
17967 +
17968 +static struct sysfs_ops toi_sysfs_ops = {
17969 +       .show   = &toi_attr_show,
17970 +       .store  = &toi_attr_store,
17971 +};
17972 +
17973 +static struct kobj_type toi_ktype = {
17974 +       .sysfs_ops      = &toi_sysfs_ops,
17975 +};
17976 +
17977 +struct kobject *tuxonice_kobj;
17978 +
17979 +/* Non-module sysfs entries.
17980 + *
17981 + * This array contains entries that are automatically registered at
17982 + * boot. Modules and the console code register their own entries separately.
17983 + */
17984 +
17985 +static struct toi_sysfs_data sysfs_params[] = {
17986 +       SYSFS_CUSTOM("do_hibernate", SYSFS_WRITEONLY, NULL, NULL,
17987 +               SYSFS_HIBERNATING, toi_main_wrapper),
17988 +       SYSFS_CUSTOM("do_resume", SYSFS_WRITEONLY, NULL, NULL,
17989 +               SYSFS_RESUMING, toi_try_resume)
17990 +};
17991 +
17992 +void remove_toi_sysdir(struct kobject *kobj)
17993 +{
17994 +       if (!kobj)
17995 +               return;
17996 +
17997 +       kobject_put(kobj);
17998 +}
17999 +
18000 +struct kobject *make_toi_sysdir(char *name)
18001 +{
18002 +       struct kobject *kobj = kobject_create_and_add(name, tuxonice_kobj);
18003 +
18004 +       if (!kobj) {
18005 +               printk(KERN_INFO "TuxOnIce: Can't allocate kobject for sysfs "
18006 +                               "dir!\n");
18007 +               return NULL;
18008 +       }
18009 +
18010 +       kobj->ktype = &toi_ktype;
18011 +
18012 +       return kobj;
18013 +}
18014 +
18015 +/* toi_register_sysfs_file
18016 + *
18017 + * Helper for registering a new /sysfs/tuxonice entry.
18018 + */
18019 +
18020 +int toi_register_sysfs_file(
18021 +               struct kobject *kobj,
18022 +               struct toi_sysfs_data *toi_sysfs_data)
18023 +{
18024 +       int result;
18025 +
18026 +       if (!toi_sysfs_initialised)
18027 +               toi_initialise_sysfs();
18028 +
18029 +       result = sysfs_create_file(kobj, &toi_sysfs_data->attr);
18030 +       if (result)
18031 +               printk(KERN_INFO "TuxOnIce: sysfs_create_file for %s "
18032 +                       "returned %d.\n",
18033 +                       toi_sysfs_data->attr.name, result);
18034 +       kobj->ktype = &toi_ktype;
18035 +
18036 +       return result;
18037 +}
18038 +EXPORT_SYMBOL_GPL(toi_register_sysfs_file);
18039 +
18040 +/* toi_unregister_sysfs_file
18041 + *
18042 + * Helper for removing unwanted /sys/power/tuxonice entries.
18043 + *
18044 + */
18045 +void toi_unregister_sysfs_file(struct kobject *kobj,
18046 +               struct toi_sysfs_data *toi_sysfs_data)
18047 +{
18048 +       sysfs_remove_file(kobj, &toi_sysfs_data->attr);
18049 +}
18050 +EXPORT_SYMBOL_GPL(toi_unregister_sysfs_file);
18051 +
18052 +void toi_cleanup_sysfs(void)
18053 +{
18054 +       int i,
18055 +           numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data);
18056 +
18057 +       if (!toi_sysfs_initialised)
18058 +               return;
18059 +
18060 +       for (i = 0; i < numfiles; i++)
18061 +               toi_unregister_sysfs_file(tuxonice_kobj, &sysfs_params[i]);
18062 +
18063 +       kobject_put(tuxonice_kobj);
18064 +       toi_sysfs_initialised = 0;
18065 +}
18066 +
18067 +/* toi_initialise_sysfs
18068 + *
18069 + * Initialise the /sysfs/tuxonice directory.
18070 + */
18071 +
18072 +static void toi_initialise_sysfs(void)
18073 +{
18074 +       int i;
18075 +       int numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data);
18076 +
18077 +       if (toi_sysfs_initialised)
18078 +               return;
18079 +
18080 +       /* Make our TuxOnIce directory a child of /sys/power */
18081 +       tuxonice_kobj = kobject_create_and_add("tuxonice", power_kobj);
18082 +       if (!tuxonice_kobj)
18083 +               return;
18084 +
18085 +       toi_sysfs_initialised = 1;
18086 +
18087 +       for (i = 0; i < numfiles; i++)
18088 +               toi_register_sysfs_file(tuxonice_kobj, &sysfs_params[i]);
18089 +}
18090 +
18091 +int toi_sysfs_init(void)
18092 +{
18093 +       toi_initialise_sysfs();
18094 +       return 0;
18095 +}
18096 +
18097 +void toi_sysfs_exit(void)
18098 +{
18099 +       toi_cleanup_sysfs();
18100 +}
18101 diff --git a/kernel/power/tuxonice_sysfs.h b/kernel/power/tuxonice_sysfs.h
18102 new file mode 100644
18103 index 0000000..2020ac7
18104 --- /dev/null
18105 +++ b/kernel/power/tuxonice_sysfs.h
18106 @@ -0,0 +1,137 @@
18107 +/*
18108 + * kernel/power/tuxonice_sysfs.h
18109 + *
18110 + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net)
18111 + *
18112 + * This file is released under the GPLv2.
18113 + */
18114 +
18115 +#include <linux/sysfs.h>
18116 +
18117 +struct toi_sysfs_data {
18118 +       struct attribute attr;
18119 +       int type;
18120 +       int flags;
18121 +       union {
18122 +               struct {
18123 +                       unsigned long *bit_vector;
18124 +                       int bit;
18125 +               } bit;
18126 +               struct {
18127 +                       int *variable;
18128 +                       int minimum;
18129 +                       int maximum;
18130 +               } integer;
18131 +               struct {
18132 +                       long *variable;
18133 +                       long minimum;
18134 +                       long maximum;
18135 +               } a_long;
18136 +               struct {
18137 +                       unsigned long *variable;
18138 +                       unsigned long minimum;
18139 +                       unsigned long maximum;
18140 +               } ul;
18141 +               struct {
18142 +                       char *variable;
18143 +                       int max_length;
18144 +               } string;
18145 +               struct {
18146 +                       int (*read_sysfs) (const char *buffer, int count);
18147 +                       int (*write_sysfs) (const char *buffer, int count);
18148 +                       void *data;
18149 +               } special;
18150 +       } data;
18151 +
18152 +       /* Side effects routine. Used, eg, for reparsing the
18153 +        * resume= entry when it changes */
18154 +       void (*write_side_effect) (void);
18155 +       struct list_head sysfs_data_list;
18156 +};
18157 +
18158 +enum {
18159 +       TOI_SYSFS_DATA_NONE = 1,
18160 +       TOI_SYSFS_DATA_CUSTOM,
18161 +       TOI_SYSFS_DATA_BIT,
18162 +       TOI_SYSFS_DATA_INTEGER,
18163 +       TOI_SYSFS_DATA_UL,
18164 +       TOI_SYSFS_DATA_LONG,
18165 +       TOI_SYSFS_DATA_STRING
18166 +};
18167 +
18168 +#define SYSFS_WRITEONLY 0200
18169 +#define SYSFS_READONLY 0444
18170 +#define SYSFS_RW 0644
18171 +
18172 +#define SYSFS_BIT(_name, _mode, _ul, _bit, _flags) { \
18173 +       .attr = {.name  = _name , .mode   = _mode }, \
18174 +       .type = TOI_SYSFS_DATA_BIT, \
18175 +       .flags = _flags, \
18176 +       .data = { .bit = { .bit_vector = _ul, .bit = _bit } } }
18177 +
18178 +#define SYSFS_INT(_name, _mode, _int, _min, _max, _flags, _wse) { \
18179 +       .attr = {.name  = _name , .mode   = _mode }, \
18180 +       .type = TOI_SYSFS_DATA_INTEGER, \
18181 +       .flags = _flags, \
18182 +       .data = { .integer = { .variable = _int, .minimum = _min, \
18183 +                       .maximum = _max } }, \
18184 +       .write_side_effect = _wse }
18185 +
18186 +#define SYSFS_UL(_name, _mode, _ul, _min, _max, _flags) { \
18187 +       .attr = {.name  = _name , .mode   = _mode }, \
18188 +       .type = TOI_SYSFS_DATA_UL, \
18189 +       .flags = _flags, \
18190 +       .data = { .ul = { .variable = _ul, .minimum = _min, \
18191 +                       .maximum = _max } } }
18192 +
18193 +#define SYSFS_LONG(_name, _mode, _long, _min, _max, _flags) { \
18194 +       .attr = {.name  = _name , .mode   = _mode }, \
18195 +       .type = TOI_SYSFS_DATA_LONG, \
18196 +       .flags = _flags, \
18197 +       .data = { .a_long = { .variable = _long, .minimum = _min, \
18198 +                       .maximum = _max } } }
18199 +
18200 +#define SYSFS_STRING(_name, _mode, _string, _max_len, _flags, _wse) { \
18201 +       .attr = {.name  = _name , .mode   = _mode }, \
18202 +       .type = TOI_SYSFS_DATA_STRING, \
18203 +       .flags = _flags, \
18204 +       .data = { .string = { .variable = _string, .max_length = _max_len } }, \
18205 +       .write_side_effect = _wse }
18206 +
18207 +#define SYSFS_CUSTOM(_name, _mode, _read, _write, _flags, _wse) { \
18208 +       .attr = {.name  = _name , .mode   = _mode }, \
18209 +       .type = TOI_SYSFS_DATA_CUSTOM, \
18210 +       .flags = _flags, \
18211 +       .data = { .special = { .read_sysfs = _read, .write_sysfs = _write } }, \
18212 +       .write_side_effect = _wse }
18213 +
18214 +#define SYSFS_NONE(_name, _wse) { \
18215 +       .attr = {.name  = _name , .mode   = SYSFS_WRITEONLY }, \
18216 +       .type = TOI_SYSFS_DATA_NONE, \
18217 +       .write_side_effect = _wse, \
18218 +}
18219 +
18220 +/* Flags */
18221 +#define SYSFS_NEEDS_SM_FOR_READ 1
18222 +#define SYSFS_NEEDS_SM_FOR_WRITE 2
18223 +#define SYSFS_HIBERNATE 4
18224 +#define SYSFS_RESUME 8
18225 +#define SYSFS_HIBERNATE_OR_RESUME (SYSFS_HIBERNATE | SYSFS_RESUME)
18226 +#define SYSFS_HIBERNATING (SYSFS_HIBERNATE | SYSFS_NEEDS_SM_FOR_WRITE)
18227 +#define SYSFS_RESUMING (SYSFS_RESUME | SYSFS_NEEDS_SM_FOR_WRITE)
18228 +#define SYSFS_NEEDS_SM_FOR_BOTH \
18229 + (SYSFS_NEEDS_SM_FOR_READ | SYSFS_NEEDS_SM_FOR_WRITE)
18230 +
18231 +int toi_register_sysfs_file(struct kobject *kobj,
18232 +               struct toi_sysfs_data *toi_sysfs_data);
18233 +void toi_unregister_sysfs_file(struct kobject *kobj,
18234 +               struct toi_sysfs_data *toi_sysfs_data);
18235 +
18236 +extern struct kobject *tuxonice_kobj;
18237 +
18238 +struct kobject *make_toi_sysdir(char *name);
18239 +void remove_toi_sysdir(struct kobject *obj);
18240 +extern void toi_cleanup_sysfs(void);
18241 +
18242 +extern int toi_sysfs_init(void);
18243 +extern void toi_sysfs_exit(void);
18244 diff --git a/kernel/power/tuxonice_ui.c b/kernel/power/tuxonice_ui.c
18245 new file mode 100644
18246 index 0000000..4da4afd
18247 --- /dev/null
18248 +++ b/kernel/power/tuxonice_ui.c
18249 @@ -0,0 +1,250 @@
18250 +/*
18251 + * kernel/power/tuxonice_ui.c
18252 + *
18253 + * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
18254 + * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz>
18255 + * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr>
18256 + * Copyright (C) 2002-2008 Nigel Cunningham (nigel at tuxonice net)
18257 + *
18258 + * This file is released under the GPLv2.
18259 + *
18260 + * Routines for TuxOnIce's user interface.
18261 + *
18262 + * The user interface code talks to a userspace program via a
18263 + * netlink socket.
18264 + *
18265 + * The kernel side:
18266 + * - starts the userui program;
18267 + * - sends text messages and progress bar status;
18268 + *
18269 + * The user space side:
18270 + * - passes messages regarding user requests (abort, toggle reboot etc)
18271 + *
18272 + */
18273 +
18274 +#define __KERNEL_SYSCALLS__
18275 +
18276 +#include <linux/reboot.h>
18277 +
18278 +#include "tuxonice_sysfs.h"
18279 +#include "tuxonice_modules.h"
18280 +#include "tuxonice.h"
18281 +#include "tuxonice_ui.h"
18282 +#include "tuxonice_netlink.h"
18283 +#include "tuxonice_power_off.h"
18284 +#include "tuxonice_builtin.h"
18285 +
18286 +static char local_printf_buf[1024];    /* Same as printk - should be safe */
18287 +struct ui_ops *toi_current_ui;
18288 +EXPORT_SYMBOL_GPL(toi_current_ui);
18289 +
18290 +/**
18291 + * toi_wait_for_keypress - Wait for keypress via userui or /dev/console.
18292 + *
18293 + * @timeout: Maximum time to wait.
18294 + *
18295 + * Wait for a keypress, either from userui or /dev/console if userui isn't
18296 + * available. The non-userui path is particularly for at boot-time, prior
18297 + * to userui being started, when we have an important warning to give to
18298 + * the user.
18299 + */
18300 +static char toi_wait_for_keypress(int timeout)
18301 +{
18302 +       if (toi_current_ui && toi_current_ui->wait_for_key(timeout))
18303 +               return ' ';
18304 +
18305 +       return toi_wait_for_keypress_dev_console(timeout);
18306 +}
18307 +
18308 +/* toi_early_boot_message()
18309 + * Description:        Handle errors early in the process of booting.
18310 + *             The user may press C to continue booting, perhaps
18311 + *             invalidating the image,  or space to reboot.
18312 + *             This works from either the serial console or normally
18313 + *             attached keyboard.
18314 + *
18315 + *             Note that we come in here from init, while the kernel is
18316 + *             locked. If we want to get events from the serial console,
18317 + *             we need to temporarily unlock the kernel.
18318 + *
18319 + *             toi_early_boot_message may also be called post-boot.
18320 + *             In this case, it simply printks the message and returns.
18321 + *
18322 + * Arguments:  int     Whether we are able to erase the image.
18323 + *             int     default_answer. What to do when we timeout. This
18324 + *                     will normally be continue, but the user might
18325 + *                     provide command line options (__setup) to override
18326 + *                     particular cases.
18327 + *             Char *. Pointer to a string explaining why we're moaning.
18328 + */
18329 +
18330 +#define say(message, a...) printk(KERN_EMERG message, ##a)
18331 +
18332 +void toi_early_boot_message(int message_detail, int default_answer,
18333 +       char *warning_reason, ...)
18334 +{
18335 +#if defined(CONFIG_VT) || defined(CONFIG_SERIAL_CONSOLE)
18336 +       unsigned long orig_state = get_toi_state(), continue_req = 0;
18337 +       unsigned long orig_loglevel = console_loglevel;
18338 +       int can_ask = 1;
18339 +#else
18340 +       int can_ask = 0;
18341 +#endif
18342 +
18343 +       va_list args;
18344 +       int printed_len;
18345 +
18346 +       if (!toi_wait) {
18347 +               set_toi_state(TOI_CONTINUE_REQ);
18348 +               can_ask = 0;
18349 +       }
18350 +
18351 +       if (warning_reason) {
18352 +               va_start(args, warning_reason);
18353 +               printed_len = vsnprintf(local_printf_buf,
18354 +                               sizeof(local_printf_buf),
18355 +                               warning_reason,
18356 +                               args);
18357 +               va_end(args);
18358 +       }
18359 +
18360 +       if (!test_toi_state(TOI_BOOT_TIME)) {
18361 +               printk("TuxOnIce: %s\n", local_printf_buf);
18362 +               return;
18363 +       }
18364 +
18365 +       if (!can_ask) {
18366 +               continue_req = !!default_answer;
18367 +               goto post_ask;
18368 +       }
18369 +
18370 +#if defined(CONFIG_VT) || defined(CONFIG_SERIAL_CONSOLE)
18371 +       console_loglevel = 7;
18372 +
18373 +       say("=== TuxOnIce ===\n\n");
18374 +       if (warning_reason) {
18375 +               say("BIG FAT WARNING!! %s\n\n", local_printf_buf);
18376 +               switch (message_detail) {
18377 +               case 0:
18378 +                       say("If you continue booting, note that any image WILL"
18379 +                               "NOT BE REMOVED.\nTuxOnIce is unable to do so "
18380 +                               "because the appropriate modules aren't\n"
18381 +                               "loaded. You should manually remove the image "
18382 +                               "to avoid any\npossibility of corrupting your "
18383 +                               "filesystem(s) later.\n");
18384 +                       break;
18385 +               case 1:
18386 +                       say("If you want to use the current TuxOnIce image, "
18387 +                               "reboot and try\nagain with the same kernel "
18388 +                               "that you hibernated from. If you want\n"
18389 +                               "to forget that image, continue and the image "
18390 +                               "will be erased.\n");
18391 +                       break;
18392 +               }
18393 +               say("Press SPACE to reboot or C to continue booting with "
18394 +                       "this kernel\n\n");
18395 +               if (toi_wait > 0)
18396 +                       say("Default action if you don't select one in %d "
18397 +                               "seconds is: %s.\n",
18398 +                               toi_wait,
18399 +                               default_answer == TOI_CONTINUE_REQ ?
18400 +                               "continue booting" : "reboot");
18401 +       } else {
18402 +               say("BIG FAT WARNING!!\n\n"
18403 +                       "You have tried to resume from this image before.\n"
18404 +                       "If it failed once, it may well fail again.\n"
18405 +                       "Would you like to remove the image and boot "
18406 +                       "normally?\nThis will be equivalent to entering "
18407 +                       "noresume on the\nkernel command line.\n\n"
18408 +                       "Press SPACE to remove the image or C to continue "
18409 +                       "resuming.\n\n");
18410 +               if (toi_wait > 0)
18411 +                       say("Default action if you don't select one in %d "
18412 +                               "seconds is: %s.\n", toi_wait,
18413 +                               !!default_answer ?
18414 +                               "continue resuming" : "remove the image");
18415 +       }
18416 +       console_loglevel = orig_loglevel;
18417 +
18418 +       set_toi_state(TOI_SANITY_CHECK_PROMPT);
18419 +       clear_toi_state(TOI_CONTINUE_REQ);
18420 +
18421 +       if (toi_wait_for_keypress(toi_wait) == 0) /* We timed out */
18422 +               continue_req = !!default_answer;
18423 +       else
18424 +               continue_req = test_toi_state(TOI_CONTINUE_REQ);
18425 +
18426 +#endif /* CONFIG_VT or CONFIG_SERIAL_CONSOLE */
18427 +
18428 +post_ask:
18429 +       if ((warning_reason) && (!continue_req))
18430 +               machine_restart(NULL);
18431 +
18432 +       restore_toi_state(orig_state);
18433 +       if (continue_req)
18434 +               set_toi_state(TOI_CONTINUE_REQ);
18435 +}
18436 +EXPORT_SYMBOL_GPL(toi_early_boot_message);
18437 +#undef say
18438 +
18439 +/*
18440 + * User interface specific /sys/power/tuxonice entries.
18441 + */
18442 +
18443 +static struct toi_sysfs_data sysfs_params[] = {
18444 +#if defined(CONFIG_NET) && defined(CONFIG_SYSFS)
18445 +       SYSFS_INT("default_console_level", SYSFS_RW,
18446 +                       &toi_bkd.toi_default_console_level, 0, 7, 0, NULL),
18447 +       SYSFS_UL("debug_sections", SYSFS_RW, &toi_bkd.toi_debug_state, 0,
18448 +                       1 << 30, 0),
18449 +       SYSFS_BIT("log_everything", SYSFS_RW, &toi_bkd.toi_action, TOI_LOGALL,
18450 +                       0)
18451 +#endif
18452 +};
18453 +
18454 +static struct toi_module_ops userui_ops = {
18455 +       .type                           = MISC_HIDDEN_MODULE,
18456 +       .name                           = "printk ui",
18457 +       .directory                      = "user_interface",
18458 +       .module                         = THIS_MODULE,
18459 +       .sysfs_data                     = sysfs_params,
18460 +       .num_sysfs_entries              = sizeof(sysfs_params) /
18461 +               sizeof(struct toi_sysfs_data),
18462 +};
18463 +
18464 +int toi_register_ui_ops(struct ui_ops *this_ui)
18465 +{
18466 +       if (toi_current_ui) {
18467 +               printk(KERN_INFO "Only one TuxOnIce user interface module can "
18468 +                               "be loaded at a time.");
18469 +               return -EBUSY;
18470 +       }
18471 +
18472 +       toi_current_ui = this_ui;
18473 +
18474 +       return 0;
18475 +}
18476 +EXPORT_SYMBOL_GPL(toi_register_ui_ops);
18477 +
18478 +void toi_remove_ui_ops(struct ui_ops *this_ui)
18479 +{
18480 +       if (toi_current_ui != this_ui)
18481 +               return;
18482 +
18483 +       toi_current_ui = NULL;
18484 +}
18485 +EXPORT_SYMBOL_GPL(toi_remove_ui_ops);
18486 +
18487 +/* toi_console_sysfs_init
18488 + * Description: Boot time initialisation for user interface.
18489 + */
18490 +
18491 +int toi_ui_init(void)
18492 +{
18493 +       return toi_register_module(&userui_ops);
18494 +}
18495 +
18496 +void toi_ui_exit(void)
18497 +{
18498 +       toi_unregister_module(&userui_ops);
18499 +}
18500 diff --git a/kernel/power/tuxonice_ui.h b/kernel/power/tuxonice_ui.h
18501 new file mode 100644
18502 index 0000000..dc45741
18503 --- /dev/null
18504 +++ b/kernel/power/tuxonice_ui.h
18505 @@ -0,0 +1,103 @@
18506 +/*
18507 + * kernel/power/tuxonice_ui.h
18508 + *
18509 + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net)
18510 + */
18511 +
18512 +enum {
18513 +       DONT_CLEAR_BAR,
18514 +       CLEAR_BAR
18515 +};
18516 +
18517 +enum {
18518 +       /* Userspace -> Kernel */
18519 +       USERUI_MSG_ABORT = 0x11,
18520 +       USERUI_MSG_SET_STATE = 0x12,
18521 +       USERUI_MSG_GET_STATE = 0x13,
18522 +       USERUI_MSG_GET_DEBUG_STATE = 0x14,
18523 +       USERUI_MSG_SET_DEBUG_STATE = 0x15,
18524 +       USERUI_MSG_SPACE = 0x18,
18525 +       USERUI_MSG_GET_POWERDOWN_METHOD = 0x1A,
18526 +       USERUI_MSG_SET_POWERDOWN_METHOD = 0x1B,
18527 +       USERUI_MSG_GET_LOGLEVEL = 0x1C,
18528 +       USERUI_MSG_SET_LOGLEVEL = 0x1D,
18529 +       USERUI_MSG_PRINTK = 0x1E,
18530 +
18531 +       /* Kernel -> Userspace */
18532 +       USERUI_MSG_MESSAGE = 0x21,
18533 +       USERUI_MSG_PROGRESS = 0x22,
18534 +       USERUI_MSG_POST_ATOMIC_RESTORE = 0x25,
18535 +
18536 +       USERUI_MSG_MAX,
18537 +};
18538 +
18539 +struct userui_msg_params {
18540 +       u32 a, b, c, d;
18541 +       char text[255];
18542 +};
18543 +
18544 +struct ui_ops {
18545 +       char (*wait_for_key) (int timeout);
18546 +       u32 (*update_status) (u32 value, u32 maximum, const char *fmt, ...);
18547 +       void (*prepare_status) (int clearbar, const char *fmt, ...);
18548 +       void (*cond_pause) (int pause, char *message);
18549 +       void (*abort)(int result_code, const char *fmt, ...);
18550 +       void (*prepare)(void);
18551 +       void (*cleanup)(void);
18552 +       void (*post_atomic_restore)(void);
18553 +       void (*message)(u32 section, u32 level, u32 normally_logged,
18554 +                       const char *fmt, ...);
18555 +};
18556 +
18557 +extern struct ui_ops *toi_current_ui;
18558 +
18559 +#define toi_update_status(val, max, fmt, args...) \
18560 + (toi_current_ui ? (toi_current_ui->update_status) (val, max, fmt, ##args) : \
18561 +       max)
18562 +
18563 +#define toi_ui_post_atomic_restore(void) \
18564 +       do { if (toi_current_ui) \
18565 +               (toi_current_ui->post_atomic_restore)(); \
18566 +       } while (0)
18567 +
18568 +#define toi_prepare_console(void) \
18569 +       do { if (toi_current_ui) \
18570 +               (toi_current_ui->prepare)(); \
18571 +       } while (0)
18572 +
18573 +#define toi_cleanup_console(void) \
18574 +       do { if (toi_current_ui) \
18575 +               (toi_current_ui->cleanup)(); \
18576 +       } while (0)
18577 +
18578 +#define abort_hibernate(result, fmt, args...) \
18579 +       do { if (toi_current_ui) \
18580 +               (toi_current_ui->abort)(result, fmt, ##args); \
18581 +            else { \
18582 +               set_abort_result(result); \
18583 +            } \
18584 +       } while (0)
18585 +
18586 +#define toi_cond_pause(pause, message) \
18587 +       do { if (toi_current_ui) \
18588 +               (toi_current_ui->cond_pause)(pause, message); \
18589 +       } while (0)
18590 +
18591 +#define toi_prepare_status(clear, fmt, args...) \
18592 +       do { if (toi_current_ui) \
18593 +               (toi_current_ui->prepare_status)(clear, fmt, ##args); \
18594 +            else \
18595 +               printk(KERN_ERR fmt "%s", ##args, "\n"); \
18596 +       } while (0)
18597 +
18598 +#define toi_message(sn, lev, log, fmt, a...) \
18599 +do { \
18600 +       if (toi_current_ui && (!sn || test_debug_state(sn))) \
18601 +               toi_current_ui->message(sn, lev, log, fmt, ##a); \
18602 +} while (0)
18603 +
18604 +__exit void toi_ui_cleanup(void);
18605 +extern int toi_ui_init(void);
18606 +extern void toi_ui_exit(void);
18607 +extern int toi_register_ui_ops(struct ui_ops *this_ui);
18608 +extern void toi_remove_ui_ops(struct ui_ops *this_ui);
18609 diff --git a/kernel/power/tuxonice_userui.c b/kernel/power/tuxonice_userui.c
18610 new file mode 100644
18611 index 0000000..c7b1053
18612 --- /dev/null
18613 +++ b/kernel/power/tuxonice_userui.c
18614 @@ -0,0 +1,662 @@
18615 +/*
18616 + * kernel/power/user_ui.c
18617 + *
18618 + * Copyright (C) 2005-2007 Bernard Blackham
18619 + * Copyright (C) 2002-2008 Nigel Cunningham (nigel at tuxonice net)
18620 + *
18621 + * This file is released under the GPLv2.
18622 + *
18623 + * Routines for TuxOnIce's user interface.
18624 + *
18625 + * The user interface code talks to a userspace program via a
18626 + * netlink socket.
18627 + *
18628 + * The kernel side:
18629 + * - starts the userui program;
18630 + * - sends text messages and progress bar status;
18631 + *
18632 + * The user space side:
18633 + * - passes messages regarding user requests (abort, toggle reboot etc)
18634 + *
18635 + */
18636 +
18637 +#define __KERNEL_SYSCALLS__
18638 +
18639 +#include <linux/suspend.h>
18640 +#include <linux/freezer.h>
18641 +#include <linux/console.h>
18642 +#include <linux/ctype.h>
18643 +#include <linux/tty.h>
18644 +#include <linux/vt_kern.h>
18645 +#include <linux/reboot.h>
18646 +#include <linux/kmod.h>
18647 +#include <linux/security.h>
18648 +#include <linux/syscalls.h>
18649 +
18650 +#include "tuxonice_sysfs.h"
18651 +#include "tuxonice_modules.h"
18652 +#include "tuxonice.h"
18653 +#include "tuxonice_ui.h"
18654 +#include "tuxonice_netlink.h"
18655 +#include "tuxonice_power_off.h"
18656 +
18657 +static char local_printf_buf[1024];    /* Same as printk - should be safe */
18658 +
18659 +static struct user_helper_data ui_helper_data;
18660 +static struct toi_module_ops userui_ops;
18661 +static int orig_kmsg;
18662 +
18663 +static char lastheader[512];
18664 +static int lastheader_message_len;
18665 +static int ui_helper_changed; /* Used at resume-time so don't overwrite value
18666 +                               set from initrd/ramfs. */
18667 +
18668 +/* Number of distinct progress amounts that userspace can display */
18669 +static int progress_granularity = 30;
18670 +
18671 +static DECLARE_WAIT_QUEUE_HEAD(userui_wait_for_key);
18672 +
18673 +/**
18674 + * ui_nl_set_state - Update toi_action based on a message from userui.
18675 + *
18676 + * @n: The bit (1 << bit) to set.
18677 + */
18678 +static void ui_nl_set_state(int n)
18679 +{
18680 +       /* Only let them change certain settings */
18681 +       static const u32 toi_action_mask =
18682 +               (1 << TOI_REBOOT) | (1 << TOI_PAUSE) |
18683 +               (1 << TOI_LOGALL) |
18684 +               (1 << TOI_SINGLESTEP) |
18685 +               (1 << TOI_PAUSE_NEAR_PAGESET_END);
18686 +
18687 +       toi_bkd.toi_action = (toi_bkd.toi_action & (~toi_action_mask)) |
18688 +               (n & toi_action_mask);
18689 +
18690 +       if (!test_action_state(TOI_PAUSE) &&
18691 +                       !test_action_state(TOI_SINGLESTEP))
18692 +               wake_up_interruptible(&userui_wait_for_key);
18693 +}
18694 +
18695 +/**
18696 + * userui_post_atomic_restore - Tell userui that atomic restore just happened.
18697 + *
18698 + * Tell userui that atomic restore just occured, so that it can do things like
18699 + * redrawing the screen, re-getting settings and so on.
18700 + */
18701 +static void userui_post_atomic_restore(void)
18702 +{
18703 +       toi_send_netlink_message(&ui_helper_data,
18704 +                       USERUI_MSG_POST_ATOMIC_RESTORE, NULL, 0);
18705 +}
18706 +
18707 +/**
18708 + * userui_storage_needed - Report how much memory in image header is needed.
18709 + */
18710 +static int userui_storage_needed(void)
18711 +{
18712 +       return sizeof(ui_helper_data.program) + 1 + sizeof(int);
18713 +}
18714 +
18715 +/**
18716 + * userui_save_config_info - Fill buffer with config info for image header.
18717 + *
18718 + * @buf: Buffer into which to put the config info we want to save.
18719 + */
18720 +static int userui_save_config_info(char *buf)
18721 +{
18722 +       *((int *) buf) = progress_granularity;
18723 +       memcpy(buf + sizeof(int), ui_helper_data.program,
18724 +                       sizeof(ui_helper_data.program));
18725 +       return sizeof(ui_helper_data.program) + sizeof(int) + 1;
18726 +}
18727 +
18728 +/**
18729 + * userui_load_config_info - Restore config info from buffer.
18730 + *
18731 + * @buf: Buffer containing header info loaded.
18732 + * @size: Size of data loaded for this module.
18733 + */
18734 +static void userui_load_config_info(char *buf, int size)
18735 +{
18736 +       progress_granularity = *((int *) buf);
18737 +       size -= sizeof(int);
18738 +
18739 +       /* Don't load the saved path if one has already been set */
18740 +       if (ui_helper_changed)
18741 +               return;
18742 +
18743 +       if (size > sizeof(ui_helper_data.program))
18744 +               size = sizeof(ui_helper_data.program);
18745 +
18746 +       memcpy(ui_helper_data.program, buf + sizeof(int), size);
18747 +       ui_helper_data.program[sizeof(ui_helper_data.program)-1] = '\0';
18748 +}
18749 +
18750 +/**
18751 + * set_ui_program_set: Record that userui program was changed.
18752 + *
18753 + * Side effect routine for when the userui program is set. In an initrd or
18754 + * ramfs, the user may set a location for the userui program. If this happens,
18755 + * we don't want to reload the value that was saved in the image header. This
18756 + * routine allows us to flag that we shouldn't restore the program name from
18757 + * the image header.
18758 + */
18759 +static void set_ui_program_set(void)
18760 +{
18761 +       ui_helper_changed = 1;
18762 +}
18763 +
18764 +/**
18765 + * userui_memory_needed - Tell core how much memory to reserve for us.
18766 + */
18767 +static int userui_memory_needed(void)
18768 +{
18769 +       /* ball park figure of 128 pages */
18770 +       return 128 * PAGE_SIZE;
18771 +}
18772 +
18773 +/**
18774 + * userui_update_status - Update the progress bar and (if on) in-bar message.
18775 + *
18776 + * @value: Current progress percentage numerator.
18777 + * @maximum: Current progress percentage denominator.
18778 + * @fmt: Message to be displayed in the middle of the progress bar.
18779 + *
18780 + * Note that a NULL message does not mean that any previous message is erased!
18781 + * For that, you need toi_prepare_status with clearbar on.
18782 + *
18783 + * Returns an unsigned long, being the next numerator (as determined by the
18784 + * maximum and progress granularity) where status needs to be updated.
18785 + * This is to reduce unnecessary calls to update_status.
18786 + */
18787 +static u32 userui_update_status(u32 value, u32 maximum, const char *fmt, ...)
18788 +{
18789 +       static u32 last_step = 9999;
18790 +       struct userui_msg_params msg;
18791 +       u32 this_step, next_update;
18792 +       int bitshift;
18793 +
18794 +       if (ui_helper_data.pid == -1)
18795 +               return 0;
18796 +
18797 +       if ((!maximum) || (!progress_granularity))
18798 +               return maximum;
18799 +
18800 +       if (value < 0)
18801 +               value = 0;
18802 +
18803 +       if (value > maximum)
18804 +               value = maximum;
18805 +
18806 +       /* Try to avoid math problems - we can't do 64 bit math here
18807 +        * (and shouldn't need it - anyone got screen resolution
18808 +        * of 65536 pixels or more?) */
18809 +       bitshift = fls(maximum) - 16;
18810 +       if (bitshift > 0) {
18811 +               u32 temp_maximum = maximum >> bitshift;
18812 +               u32 temp_value = value >> bitshift;
18813 +               this_step = (u32)
18814 +                       (temp_value * progress_granularity / temp_maximum);
18815 +               next_update = (((this_step + 1) * temp_maximum /
18816 +                                       progress_granularity) + 1) << bitshift;
18817 +       } else {
18818 +               this_step = (u32) (value * progress_granularity / maximum);
18819 +               next_update = ((this_step + 1) * maximum /
18820 +                               progress_granularity) + 1;
18821 +       }
18822 +
18823 +       if (this_step == last_step)
18824 +               return next_update;
18825 +
18826 +       memset(&msg, 0, sizeof(msg));
18827 +
18828 +       msg.a = this_step;
18829 +       msg.b = progress_granularity;
18830 +
18831 +       if (fmt) {
18832 +               va_list args;
18833 +               va_start(args, fmt);
18834 +               vsnprintf(msg.text, sizeof(msg.text), fmt, args);
18835 +               va_end(args);
18836 +               msg.text[sizeof(msg.text)-1] = '\0';
18837 +       }
18838 +
18839 +       toi_send_netlink_message(&ui_helper_data, USERUI_MSG_PROGRESS,
18840 +                       &msg, sizeof(msg));
18841 +       last_step = this_step;
18842 +
18843 +       return next_update;
18844 +}
18845 +
18846 +/**
18847 + * userui_message - Display a message without necessarily logging it.
18848 + *
18849 + * @section: Type of message. Messages can be filtered by type.
18850 + * @level: Degree of importance of the message. Lower values = higher priority.
18851 + * @normally_logged: Whether logged even if log_everything is off.
18852 + * @fmt: Message (and parameters).
18853 + *
18854 + * This function is intended to do the same job as printk, but without normally
18855 + * logging what is printed. The point is to be able to get debugging info on
18856 + * screen without filling the logs with "1/534. ^M 2/534^M. 3/534^M"
18857 + *
18858 + * It may be called from an interrupt context - can't sleep!
18859 + */
18860 +static void userui_message(u32 section, u32 level, u32 normally_logged,
18861 +               const char *fmt, ...)
18862 +{
18863 +       struct userui_msg_params msg;
18864 +
18865 +       if ((level) && (level > console_loglevel))
18866 +               return;
18867 +
18868 +       memset(&msg, 0, sizeof(msg));
18869 +
18870 +       msg.a = section;
18871 +       msg.b = level;
18872 +       msg.c = normally_logged;
18873 +
18874 +       if (fmt) {
18875 +               va_list args;
18876 +               va_start(args, fmt);
18877 +               vsnprintf(msg.text, sizeof(msg.text), fmt, args);
18878 +               va_end(args);
18879 +               msg.text[sizeof(msg.text)-1] = '\0';
18880 +       }
18881 +
18882 +       if (test_action_state(TOI_LOGALL))
18883 +               printk(KERN_INFO "%s\n", msg.text);
18884 +
18885 +       toi_send_netlink_message(&ui_helper_data, USERUI_MSG_MESSAGE,
18886 +                       &msg, sizeof(msg));
18887 +}
18888 +
18889 +/**
18890 + * wait_for_key_via_userui - Wait for userui to receive a keypress.
18891 + */
18892 +static void wait_for_key_via_userui(void)
18893 +{
18894 +       DECLARE_WAITQUEUE(wait, current);
18895 +
18896 +       add_wait_queue(&userui_wait_for_key, &wait);
18897 +       set_current_state(TASK_INTERRUPTIBLE);
18898 +
18899 +       interruptible_sleep_on(&userui_wait_for_key);
18900 +
18901 +       set_current_state(TASK_RUNNING);
18902 +       remove_wait_queue(&userui_wait_for_key, &wait);
18903 +}
18904 +
18905 +/**
18906 + * userui_prepare_status - Display high level messages.
18907 + *
18908 + * @clearbar: Whether to clear the progress bar.
18909 + * @fmt...: New message for the title.
18910 + *
18911 + * Prepare the 'nice display', drawing the header and version, along with the
18912 + * current action and perhaps also resetting the progress bar.
18913 + */
18914 +static void userui_prepare_status(int clearbar, const char *fmt, ...)
18915 +{
18916 +       va_list args;
18917 +
18918 +       if (fmt) {
18919 +               va_start(args, fmt);
18920 +               lastheader_message_len = vsnprintf(lastheader, 512, fmt, args);
18921 +               va_end(args);
18922 +       }
18923 +
18924 +       if (clearbar)
18925 +               toi_update_status(0, 1, NULL);
18926 +
18927 +       if (ui_helper_data.pid == -1)
18928 +               printk(KERN_EMERG "%s\n", lastheader);
18929 +       else
18930 +               toi_message(0, TOI_STATUS, 1, lastheader, NULL);
18931 +}
18932 +
18933 +/**
18934 + * toi_wait_for_keypress - Wait for keypress via userui.
18935 + *
18936 + * @timeout: Maximum time to wait.
18937 + *
18938 + * Wait for a keypress from userui.
18939 + *
18940 + * FIXME: Implement timeout?
18941 + */
18942 +static char userui_wait_for_keypress(int timeout)
18943 +{
18944 +       char key = '\0';
18945 +
18946 +       if (ui_helper_data.pid != -1) {
18947 +               wait_for_key_via_userui();
18948 +               key = ' ';
18949 +       }
18950 +
18951 +       return key;
18952 +}
18953 +
18954 +/**
18955 + * userui_abort_hibernate - Abort a cycle & tell user if they didn't request it.
18956 + *
18957 + * @result_code: Reason why we're aborting (1 << bit).
18958 + * @fmt: Message to display if telling the user what's going on.
18959 + *
18960 + * Abort a cycle. If this wasn't at the user's request (and we're displaying
18961 + * output), tell the user why and wait for them to acknowledge the message.
18962 + */
18963 +static void userui_abort_hibernate(int result_code, const char *fmt, ...)
18964 +{
18965 +       va_list args;
18966 +       int printed_len = 0;
18967 +
18968 +       set_result_state(result_code);
18969 +
18970 +       if (test_result_state(TOI_ABORTED))
18971 +               return;
18972 +
18973 +       set_result_state(TOI_ABORTED);
18974 +
18975 +       if (test_result_state(TOI_ABORT_REQUESTED))
18976 +               return;
18977 +
18978 +       va_start(args, fmt);
18979 +       printed_len = vsnprintf(local_printf_buf,  sizeof(local_printf_buf),
18980 +                       fmt, args);
18981 +       va_end(args);
18982 +       if (ui_helper_data.pid != -1)
18983 +               printed_len = sprintf(local_printf_buf + printed_len,
18984 +                                       " (Press SPACE to continue)");
18985 +
18986 +       toi_prepare_status(CLEAR_BAR, "%s", local_printf_buf);
18987 +
18988 +       if (ui_helper_data.pid != -1)
18989 +               userui_wait_for_keypress(0);
18990 +}
18991 +
18992 +/**
18993 + * request_abort_hibernate - Abort hibernating or resuming at user request.
18994 + *
18995 + * Handle the user requesting the cancellation of a hibernation or resume by
18996 + * pressing escape.
18997 + */
18998 +static void request_abort_hibernate(void)
18999 +{
19000 +       if (test_result_state(TOI_ABORT_REQUESTED))
19001 +               return;
19002 +
19003 +       if (test_toi_state(TOI_NOW_RESUMING)) {
19004 +               toi_prepare_status(CLEAR_BAR, "Escape pressed. "
19005 +                                       "Powering down again.");
19006 +               set_toi_state(TOI_STOP_RESUME);
19007 +               while (!test_toi_state(TOI_IO_STOPPED))
19008 +                       schedule();
19009 +               if (toiActiveAllocator->mark_resume_attempted)
19010 +                       toiActiveAllocator->mark_resume_attempted(0);
19011 +               toi_power_down();
19012 +       }
19013 +
19014 +       toi_prepare_status(CLEAR_BAR, "--- ESCAPE PRESSED :"
19015 +                                       " ABORTING HIBERNATION ---");
19016 +       set_abort_result(TOI_ABORT_REQUESTED);
19017 +       wake_up_interruptible(&userui_wait_for_key);
19018 +}
19019 +
19020 +/**
19021 + * userui_user_rcv_msg - Receive a netlink message from userui.
19022 + *
19023 + * @skb: skb received.
19024 + * @nlh: Netlink header received.
19025 + */
19026 +static int userui_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
19027 +{
19028 +       int type;
19029 +       int *data;
19030 +
19031 +       type = nlh->nlmsg_type;
19032 +
19033 +       /* A control message: ignore them */
19034 +       if (type < NETLINK_MSG_BASE)
19035 +               return 0;
19036 +
19037 +       /* Unknown message: reply with EINVAL */
19038 +       if (type >= USERUI_MSG_MAX)
19039 +               return -EINVAL;
19040 +
19041 +       /* All operations require privileges, even GET */
19042 +       if (security_netlink_recv(skb, CAP_NET_ADMIN))
19043 +               return -EPERM;
19044 +
19045 +       /* Only allow one task to receive NOFREEZE privileges */
19046 +       if (type == NETLINK_MSG_NOFREEZE_ME && ui_helper_data.pid != -1) {
19047 +               printk(KERN_INFO "Got NOFREEZE_ME request when "
19048 +                       "ui_helper_data.pid is %d.\n", ui_helper_data.pid);
19049 +               return -EBUSY;
19050 +       }
19051 +
19052 +       data = (int *) NLMSG_DATA(nlh);
19053 +
19054 +       switch (type) {
19055 +       case USERUI_MSG_ABORT:
19056 +               request_abort_hibernate();
19057 +               return 0;
19058 +       case USERUI_MSG_GET_STATE:
19059 +               toi_send_netlink_message(&ui_helper_data,
19060 +                               USERUI_MSG_GET_STATE, &toi_bkd.toi_action,
19061 +                               sizeof(toi_bkd.toi_action));
19062 +               return 0;
19063 +       case USERUI_MSG_GET_DEBUG_STATE:
19064 +               toi_send_netlink_message(&ui_helper_data,
19065 +                               USERUI_MSG_GET_DEBUG_STATE,
19066 +                               &toi_bkd.toi_debug_state,
19067 +                               sizeof(toi_bkd.toi_debug_state));
19068 +               return 0;
19069 +       case USERUI_MSG_SET_STATE:
19070 +               if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(int)))
19071 +                       return -EINVAL;
19072 +               ui_nl_set_state(*data);
19073 +               return 0;
19074 +       case USERUI_MSG_SET_DEBUG_STATE:
19075 +               if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(int)))
19076 +                       return -EINVAL;
19077 +               toi_bkd.toi_debug_state = (*data);
19078 +               return 0;
19079 +       case USERUI_MSG_SPACE:
19080 +               wake_up_interruptible(&userui_wait_for_key);
19081 +               return 0;
19082 +       case USERUI_MSG_GET_POWERDOWN_METHOD:
19083 +               toi_send_netlink_message(&ui_helper_data,
19084 +                               USERUI_MSG_GET_POWERDOWN_METHOD,
19085 +                               &toi_poweroff_method,
19086 +                               sizeof(toi_poweroff_method));
19087 +               return 0;
19088 +       case USERUI_MSG_SET_POWERDOWN_METHOD:
19089 +               if (nlh->nlmsg_len != NLMSG_LENGTH(sizeof(char)))
19090 +                       return -EINVAL;
19091 +               toi_poweroff_method = (unsigned long)(*data);
19092 +               return 0;
19093 +       case USERUI_MSG_GET_LOGLEVEL:
19094 +               toi_send_netlink_message(&ui_helper_data,
19095 +                               USERUI_MSG_GET_LOGLEVEL,
19096 +                               &toi_bkd.toi_default_console_level,
19097 +                               sizeof(toi_bkd.toi_default_console_level));
19098 +               return 0;
19099 +       case USERUI_MSG_SET_LOGLEVEL:
19100 +               if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(int)))
19101 +                       return -EINVAL;
19102 +               toi_bkd.toi_default_console_level = (*data);
19103 +               return 0;
19104 +       case USERUI_MSG_PRINTK:
19105 +               printk(KERN_INFO "%s", (char *) data);
19106 +               return 0;
19107 +       }
19108 +
19109 +       /* Unhandled here */
19110 +       return 1;
19111 +}
19112 +
19113 +/**
19114 + * userui_cond_pause - Possibly pause at user request.
19115 + *
19116 + * @pause: Whether to pause or just display the message.
19117 + * @message: Message to display at the start of pausing.
19118 + *
19119 + * Potentially pause and wait for the user to tell us to continue. We normally
19120 + * only pause when @pause is set. While paused, the user can do things like
19121 + * changing the loglevel, toggling the display of debugging sections and such
19122 + * like.
19123 + */
19124 +static void userui_cond_pause(int pause, char *message)
19125 +{
19126 +       int displayed_message = 0, last_key = 0;
19127 +
19128 +       while (last_key != 32 &&
19129 +               ui_helper_data.pid != -1 &&
19130 +               ((test_action_state(TOI_PAUSE) && pause) ||
19131 +                (test_action_state(TOI_SINGLESTEP)))) {
19132 +               if (!displayed_message) {
19133 +                       toi_prepare_status(DONT_CLEAR_BAR,
19134 +                          "%s Press SPACE to continue.%s",
19135 +                          message ? message : "",
19136 +                          (test_action_state(TOI_SINGLESTEP)) ?
19137 +                          " Single step on." : "");
19138 +                       displayed_message = 1;
19139 +               }
19140 +               last_key = userui_wait_for_keypress(0);
19141 +       }
19142 +       schedule();
19143 +}
19144 +
19145 +/**
19146 + * userui_prepare_console - Prepare the console for use.
19147 + *
19148 + * Prepare a console for use, saving current kmsg settings and attempting to
19149 + * start userui. Console loglevel changes are handled by userui.
19150 + */
19151 +static void userui_prepare_console(void)
19152 +{
19153 +       orig_kmsg = kmsg_redirect;
19154 +       kmsg_redirect = fg_console + 1;
19155 +
19156 +       ui_helper_data.pid = -1;
19157 +
19158 +       if (!userui_ops.enabled) {
19159 +               printk(KERN_INFO "TuxOnIce: Userui disabled.\n");
19160 +               return;
19161 +       }
19162 +
19163 +       if (*ui_helper_data.program)
19164 +               toi_netlink_setup(&ui_helper_data);
19165 +       else
19166 +               printk(KERN_INFO "TuxOnIce: Userui program not configured.\n");
19167 +}
19168 +
19169 +/**
19170 + * userui_cleanup_console - Cleanup after a cycle.
19171 + *
19172 + * Tell userui to cleanup, and restore kmsg_redirect to its original value.
19173 + */
19174 +
19175 +static void userui_cleanup_console(void)
19176 +{
19177 +       if (ui_helper_data.pid > -1)
19178 +               toi_netlink_close(&ui_helper_data);
19179 +
19180 +       kmsg_redirect = orig_kmsg;
19181 +}
19182 +
19183 +/*
19184 + * User interface specific /sys/power/tuxonice entries.
19185 + */
19186 +
19187 +static struct toi_sysfs_data sysfs_params[] = {
19188 +#if defined(CONFIG_NET) && defined(CONFIG_SYSFS)
19189 +       SYSFS_BIT("enable_escape", SYSFS_RW, &toi_bkd.toi_action,
19190 +                       TOI_CAN_CANCEL, 0),
19191 +       SYSFS_BIT("pause_between_steps", SYSFS_RW, &toi_bkd.toi_action,
19192 +                       TOI_PAUSE, 0),
19193 +       SYSFS_INT("enabled", SYSFS_RW, &userui_ops.enabled, 0, 1, 0, NULL),
19194 +       SYSFS_INT("progress_granularity", SYSFS_RW, &progress_granularity, 1,
19195 +                       2048, 0, NULL),
19196 +       SYSFS_STRING("program", SYSFS_RW, ui_helper_data.program, 255, 0,
19197 +                       set_ui_program_set),
19198 +       SYSFS_INT("debug", SYSFS_RW, &ui_helper_data.debug, 0, 1, 0, NULL)
19199 +#endif
19200 +};
19201 +
19202 +static struct toi_module_ops userui_ops = {
19203 +       .type                           = MISC_MODULE,
19204 +       .name                           = "userui",
19205 +       .shared_directory               = "user_interface",
19206 +       .module                         = THIS_MODULE,
19207 +       .storage_needed                 = userui_storage_needed,
19208 +       .save_config_info               = userui_save_config_info,
19209 +       .load_config_info               = userui_load_config_info,
19210 +       .memory_needed                  = userui_memory_needed,
19211 +       .sysfs_data                     = sysfs_params,
19212 +       .num_sysfs_entries              = sizeof(sysfs_params) /
19213 +               sizeof(struct toi_sysfs_data),
19214 +};
19215 +
19216 +static struct ui_ops my_ui_ops = {
19217 +       .post_atomic_restore            = userui_post_atomic_restore,
19218 +       .update_status                  = userui_update_status,
19219 +       .message                        = userui_message,
19220 +       .prepare_status                 = userui_prepare_status,
19221 +       .abort                          = userui_abort_hibernate,
19222 +       .cond_pause                     = userui_cond_pause,
19223 +       .prepare                        = userui_prepare_console,
19224 +       .cleanup                        = userui_cleanup_console,
19225 +       .wait_for_key                   = userui_wait_for_keypress,
19226 +};
19227 +
19228 +/**
19229 + * toi_user_ui_init - Boot time initialisation for user interface.
19230 + *
19231 + * Invoked from the core init routine.
19232 + */
19233 +static __init int toi_user_ui_init(void)
19234 +{
19235 +       int result;
19236 +
19237 +       ui_helper_data.nl = NULL;
19238 +       strncpy(ui_helper_data.program, CONFIG_TOI_USERUI_DEFAULT_PATH, 255);
19239 +       ui_helper_data.pid = -1;
19240 +       ui_helper_data.skb_size = sizeof(struct userui_msg_params);
19241 +       ui_helper_data.pool_limit = 6;
19242 +       ui_helper_data.netlink_id = NETLINK_TOI_USERUI;
19243 +       ui_helper_data.name = "userspace ui";
19244 +       ui_helper_data.rcv_msg = userui_user_rcv_msg;
19245 +       ui_helper_data.interface_version = 8;
19246 +       ui_helper_data.must_init = 0;
19247 +       ui_helper_data.not_ready = userui_cleanup_console;
19248 +       init_completion(&ui_helper_data.wait_for_process);
19249 +       result = toi_register_module(&userui_ops);
19250 +       if (!result)
19251 +               result = toi_register_ui_ops(&my_ui_ops);
19252 +       if (result)
19253 +               toi_unregister_module(&userui_ops);
19254 +
19255 +       return result;
19256 +}
19257 +
19258 +#ifdef MODULE
19259 +/**
19260 + * toi_user_ui_ext - Cleanup code for if the core is unloaded.
19261 + */
19262 +static __exit void toi_user_ui_exit(void)
19263 +{
19264 +       toi_netlink_close_complete(&ui_helper_data);
19265 +       toi_remove_ui_ops(&my_ui_ops);
19266 +       toi_unregister_module(&userui_ops);
19267 +}
19268 +
19269 +module_init(toi_user_ui_init);
19270 +module_exit(toi_user_ui_exit);
19271 +MODULE_AUTHOR("Nigel Cunningham");
19272 +MODULE_DESCRIPTION("TuxOnIce Userui Support");
19273 +MODULE_LICENSE("GPL");
19274 +#else
19275 +late_initcall(toi_user_ui_init);
19276 +#endif
19277 diff --git a/kernel/power/user.c b/kernel/power/user.c
19278 index ed97375..3519246 100644
19279 --- a/kernel/power/user.c
19280 +++ b/kernel/power/user.c
19281 @@ -65,6 +65,7 @@ static struct snapshot_data {
19282  } snapshot_state;
19283  
19284  atomic_t snapshot_device_available = ATOMIC_INIT(1);
19285 +EXPORT_SYMBOL_GPL(snapshot_device_available);
19286  
19287  static int snapshot_open(struct inode *inode, struct file *filp)
19288  {
19289 diff --git a/kernel/printk.c b/kernel/printk.c
19290 index 5052b54..0215fc3 100644
19291 --- a/kernel/printk.c
19292 +++ b/kernel/printk.c
19293 @@ -32,6 +32,7 @@
19294  #include <linux/security.h>
19295  #include <linux/bootmem.h>
19296  #include <linux/syscalls.h>
19297 +#include <linux/suspend.h>
19298  #include <linux/kexec.h>
19299  
19300  #include <asm/uaccess.h>
19301 @@ -60,6 +61,7 @@ int console_printk[4] = {
19302         MINIMUM_CONSOLE_LOGLEVEL,       /* minimum_console_loglevel */
19303         DEFAULT_CONSOLE_LOGLEVEL,       /* default_console_loglevel */
19304  };
19305 +EXPORT_SYMBOL_GPL(console_printk);
19306  
19307  /*
19308   * Low level drivers may need that to know if they can schedule in
19309 @@ -911,6 +913,7 @@ void suspend_console(void)
19310         console_suspended = 1;
19311         up(&console_sem);
19312  }
19313 +EXPORT_SYMBOL_GPL(suspend_console);
19314  
19315  void resume_console(void)
19316  {
19317 @@ -920,6 +923,7 @@ void resume_console(void)
19318         console_suspended = 0;
19319         release_console_sem();
19320  }
19321 +EXPORT_SYMBOL_GPL(resume_console);
19322  
19323  /**
19324   * acquire_console_sem - lock the console system for exclusive use.
19325 diff --git a/mm/bootmem.c b/mm/bootmem.c
19326 index daf9271..fb468a7 100644
19327 --- a/mm/bootmem.c
19328 +++ b/mm/bootmem.c
19329 @@ -22,6 +22,7 @@
19330  unsigned long max_low_pfn;
19331  unsigned long min_low_pfn;
19332  unsigned long max_pfn;
19333 +EXPORT_SYMBOL_GPL(max_pfn);
19334  
19335  #ifdef CONFIG_CRASH_DUMP
19336  /*
19337 diff --git a/mm/highmem.c b/mm/highmem.c
19338 index 68eb1d9..800c7a9 100644
19339 --- a/mm/highmem.c
19340 +++ b/mm/highmem.c
19341 @@ -58,6 +58,7 @@ unsigned int nr_free_highpages (void)
19342  
19343         return pages;
19344  }
19345 +EXPORT_SYMBOL_GPL(nr_free_highpages);
19346  
19347  static int pkmap_count[LAST_PKMAP];
19348  static unsigned int last_pkmap_nr;
19349 diff --git a/mm/memory.c b/mm/memory.c
19350 index 4126dd1..878eff1 100644
19351 --- a/mm/memory.c
19352 +++ b/mm/memory.c
19353 @@ -1185,6 +1185,7 @@ no_page_table:
19354         }
19355         return page;
19356  }
19357 +EXPORT_SYMBOL_GPL(follow_page);
19358  
19359  /* Can we do the FOLL_ANON optimization? */
19360  static inline int use_zero_page(struct vm_area_struct *vma)
19361 diff --git a/mm/mmzone.c b/mm/mmzone.c
19362 index f5b7d17..72a6770 100644
19363 --- a/mm/mmzone.c
19364 +++ b/mm/mmzone.c
19365 @@ -14,6 +14,7 @@ struct pglist_data *first_online_pgdat(void)
19366  {
19367         return NODE_DATA(first_online_node);
19368  }
19369 +EXPORT_SYMBOL_GPL(first_online_pgdat);
19370  
19371  struct pglist_data *next_online_pgdat(struct pglist_data *pgdat)
19372  {
19373 @@ -23,6 +24,7 @@ struct pglist_data *next_online_pgdat(struct pglist_data *pgdat)
19374                 return NULL;
19375         return NODE_DATA(nid);
19376  }
19377 +EXPORT_SYMBOL_GPL(next_online_pgdat);
19378  
19379  /*
19380   * next_zone - helper magic for for_each_zone()
19381 @@ -42,6 +44,7 @@ struct zone *next_zone(struct zone *zone)
19382         }
19383         return zone;
19384  }
19385 +EXPORT_SYMBOL_GPL(next_zone);
19386  
19387  static inline int zref_in_nodemask(struct zoneref *zref, nodemask_t *nodes)
19388  {
19389 diff --git a/mm/page-writeback.c b/mm/page-writeback.c
19390 index bb553c3..fb606e5 100644
19391 --- a/mm/page-writeback.c
19392 +++ b/mm/page-writeback.c
19393 @@ -105,6 +105,7 @@ unsigned int dirty_expire_interval = 30 * 100; /* centiseconds */
19394   * Flag that makes the machine dump writes/reads and block dirtyings.
19395   */
19396  int block_dump;
19397 +EXPORT_SYMBOL_GPL(block_dump);
19398  
19399  /*
19400   * Flag that puts the machine in "laptop mode". Doubles as a timeout in jiffies:
19401 diff --git a/mm/page_alloc.c b/mm/page_alloc.c
19402 index fe753ec..c2f026b 100644
19403 --- a/mm/page_alloc.c
19404 +++ b/mm/page_alloc.c
19405 @@ -1810,6 +1810,26 @@ static unsigned int nr_free_zone_pages(int offset)
19406         return sum;
19407  }
19408  
19409 +static unsigned int nr_unallocated_zone_pages(int offset)
19410 +{
19411 +       struct zoneref *z;
19412 +       struct zone *zone;
19413 +
19414 +       /* Just pick one node, since fallback list is circular */
19415 +       unsigned int sum = 0;
19416 +
19417 +       struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL);
19418 +
19419 +       for_each_zone_zonelist(zone, z, zonelist, offset) {
19420 +               unsigned long high = zone->pages_high;
19421 +               unsigned long left = zone_page_state(zone, NR_FREE_PAGES);
19422 +               if (left > high)
19423 +                       sum += left - high;
19424 +       }
19425 +
19426 +       return sum;
19427 +}
19428 +
19429  /*
19430   * Amount of free RAM allocatable within ZONE_DMA and ZONE_NORMAL
19431   */
19432 @@ -1820,6 +1840,15 @@ unsigned int nr_free_buffer_pages(void)
19433  EXPORT_SYMBOL_GPL(nr_free_buffer_pages);
19434  
19435  /*
19436 + * Amount of free RAM allocatable within ZONE_DMA and ZONE_NORMAL
19437 + */
19438 +unsigned int nr_unallocated_buffer_pages(void)
19439 +{
19440 +       return nr_unallocated_zone_pages(gfp_zone(GFP_USER));
19441 +}
19442 +EXPORT_SYMBOL_GPL(nr_unallocated_buffer_pages);
19443 +
19444 +/*
19445   * Amount of free RAM allocatable within all zones
19446   */
19447  unsigned int nr_free_pagecache_pages(void)
19448 diff --git a/mm/shmem.c b/mm/shmem.c
19449 index b25f95c..4908d20 100644
19450 --- a/mm/shmem.c
19451 +++ b/mm/shmem.c
19452 @@ -1557,6 +1557,8 @@ static struct inode *shmem_get_inode(struct super_block *sb, int mode,
19453                 memset(info, 0, (char *)inode - (char *)info);
19454                 spin_lock_init(&info->lock);
19455                 info->flags = flags & VM_NORESERVE;
19456 +               if (flags & VM_ATOMIC_COPY)
19457 +                       inode->i_flags |= S_ATOMIC_COPY;
19458                 INIT_LIST_HEAD(&info->swaplist);
19459  
19460                 switch (mode & S_IFMT) {
19461 diff --git a/mm/swap_state.c b/mm/swap_state.c
19462 index 1416e7e..4f75ac3 100644
19463 --- a/mm/swap_state.c
19464 +++ b/mm/swap_state.c
19465 @@ -45,6 +45,7 @@ struct address_space swapper_space = {
19466         .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear),
19467         .backing_dev_info = &swap_backing_dev_info,
19468  };
19469 +EXPORT_SYMBOL_GPL(swapper_space);
19470  
19471  #define INC_CACHE_INFO(x)      do { swap_cache_info.x++; } while (0)
19472  
19473 diff --git a/mm/swapfile.c b/mm/swapfile.c
19474 index 312fafe..894fcb5 100644
19475 --- a/mm/swapfile.c
19476 +++ b/mm/swapfile.c
19477 @@ -414,6 +414,7 @@ noswap:
19478         spin_unlock(&swap_lock);
19479         return (swp_entry_t) {0};
19480  }
19481 +EXPORT_SYMBOL_GPL(get_swap_page);
19482  
19483  swp_entry_t get_swap_page_of_type(int type)
19484  {
19485 @@ -508,6 +509,7 @@ void swap_free(swp_entry_t entry)
19486                 spin_unlock(&swap_lock);
19487         }
19488  }
19489 +EXPORT_SYMBOL_GPL(swap_free);
19490  
19491  /*
19492   * How many references to page are currently swapped out?
19493 @@ -1178,6 +1180,7 @@ sector_t map_swap_page(struct swap_info_struct *sis, pgoff_t offset)
19494                 BUG_ON(se == start_se);         /* It *must* be present */
19495         }
19496  }
19497 +EXPORT_SYMBOL_GPL(map_swap_page);
19498  
19499  #ifdef CONFIG_HIBERNATION
19500  /*
19501 @@ -1521,6 +1524,7 @@ out_dput:
19502  out:
19503         return err;
19504  }
19505 +EXPORT_SYMBOL_GPL(sys_swapoff);
19506  
19507  #ifdef CONFIG_PROC_FS
19508  /* iterator */
19509 @@ -1919,6 +1923,7 @@ out:
19510         }
19511         return error;
19512  }
19513 +EXPORT_SYMBOL_GPL(sys_swapon);
19514  
19515  void si_swapinfo(struct sysinfo *val)
19516  {
19517 @@ -1936,6 +1941,7 @@ void si_swapinfo(struct sysinfo *val)
19518         val->totalswap = total_swap_pages + nr_to_be_unused;
19519         spin_unlock(&swap_lock);
19520  }
19521 +EXPORT_SYMBOL_GPL(si_swapinfo);
19522  
19523  /*
19524   * Verify that a swap entry is valid and increment its swap map count.
19525 @@ -1984,6 +1990,7 @@ get_swap_info_struct(unsigned type)
19526  {
19527         return &swap_info[type];
19528  }
19529 +EXPORT_SYMBOL_GPL(get_swap_info_struct);
19530  
19531  /*
19532   * swap_lock prevents swap_map being freed. Don't grab an extra
19533 diff --git a/mm/vmscan.c b/mm/vmscan.c
19534 index d254306..e20daf5 100644
19535 --- a/mm/vmscan.c
19536 +++ b/mm/vmscan.c
19537 @@ -2036,6 +2036,9 @@ void wakeup_kswapd(struct zone *zone, int order)
19538         if (!populated_zone(zone))
19539                 return;
19540  
19541 +       if (freezer_is_on())
19542 +               return;
19543 +
19544         pgdat = zone->zone_pgdat;
19545         if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0))
19546                 return;
19547 @@ -2196,6 +2199,7 @@ out:
19548  
19549         return sc.nr_reclaimed;
19550  }
19551 +EXPORT_SYMBOL_GPL(shrink_all_memory);
19552  #endif
19553  
19554  /* It's optimal to keep kswapds on the same CPUs as their memory, but
This page took 1.796994 seconds and 3 git commands to generate.