]> git.pld-linux.org Git - packages/kernel.git/blob - linux-2.6-suspend2.patch
- update SECURITY_DEFAULT_MMAP_MIN_ADDR to match upstream settings
[packages/kernel.git] / linux-2.6-suspend2.patch
1 diff --git a/Documentation/power/tuxonice-internals.txt b/Documentation/power/tuxonice-internals.txt
2 new file mode 100644
3 index 0000000..68c0454
4 --- /dev/null
5 +++ b/Documentation/power/tuxonice-internals.txt
6 @@ -0,0 +1,474 @@
7 +                  TuxOnIce 3.0 Internal Documentation.
8 +                       Updated to 11 March 2008
9 +
10 +1.  Introduction.
11 +
12 +    TuxOnIce 3.0 is an addition to the Linux Kernel, designed to
13 +    allow the user to quickly shutdown and quickly boot a computer, without
14 +    needing to close documents or programs. It is equivalent to the
15 +    hibernate facility in some laptops. This implementation, however,
16 +    requires no special BIOS or hardware support.
17 +
18 +    The code in these files is based upon the original implementation
19 +    prepared by Gabor Kuti and additional work by Pavel Machek and a
20 +    host of others. This code has been substantially reworked by Nigel
21 +    Cunningham, again with the help and testing of many others, not the
22 +    least of whom is Michael Frank. At its heart, however, the operation is
23 +    essentially the same as Gabor's version.
24 +
25 +2.  Overview of operation.
26 +
27 +    The basic sequence of operations is as follows:
28 +
29 +       a. Quiesce all other activity.
30 +       b. Ensure enough memory and storage space are available, and attempt
31 +          to free memory/storage if necessary.
32 +       c. Allocate the required memory and storage space.
33 +       d. Write the image.
34 +       e. Power down.
35 +
36 +    There are a number of complicating factors which mean that things are
37 +    not as simple as the above would imply, however...
38 +
39 +    o The activity of each process must be stopped at a point where it will
40 +    not be holding locks necessary for saving the image, or unexpectedly
41 +    restart operations due to something like a timeout and thereby make
42 +    our image inconsistent.
43 +
44 +    o It is desirous that we sync outstanding I/O to disk before calculating
45 +    image statistics. This reduces corruption if one should suspend but
46 +    then not resume, and also makes later parts of the operation safer (see
47 +    below).
48 +
49 +    o We need to get as close as we can to an atomic copy of the data.
50 +    Inconsistencies in the image will result in inconsistent memory contents at
51 +    resume time, and thus in instability of the system and/or file system
52 +    corruption. This would appear to imply a maximum image size of one half of
53 +    the amount of RAM, but we have a solution... (again, below).
54 +
55 +    o In 2.6, we choose to play nicely with the other suspend-to-disk
56 +    implementations.
57 +
58 +3.  Detailed description of internals.
59 +
60 +    a. Quiescing activity.
61 +
62 +    Safely quiescing the system is achieved using three separate but related
63 +    aspects.
64 +
65 +    First, we note that the vast majority of processes don't need to run during
66 +    suspend. They can be 'frozen'. We therefore implement a refrigerator
67 +    routine, which processes enter and in which they remain until the cycle is
68 +    complete. Processes enter the refrigerator via try_to_freeze() invocations
69 +    at appropriate places.  A process cannot be frozen in any old place. It
70 +    must not be holding locks that will be needed for writing the image or
71 +    freezing other processes. For this reason, userspace processes generally
72 +    enter the refrigerator via the signal handling code, and kernel threads at
73 +    the place in their event loops where they drop locks and yield to other
74 +    processes or sleep.
75 +
76 +    The task of freezing processes is complicated by the fact that there can be
77 +    interdependencies between processes. Freezing process A before process B may
78 +    mean that process B cannot be frozen, because it stops at waiting for
79 +    process A rather than in the refrigerator. This issue is seen where
80 +    userspace waits on freezeable kernel threads or fuse filesystem threads. To
81 +    address this issue, we implement the following algorithm for quiescing
82 +    activity:
83 +
84 +       - Freeze filesystems (including fuse - userspace programs starting
85 +               new requests are immediately frozen; programs already running
86 +               requests complete their work before being frozen in the next
87 +               step)
88 +       - Freeze userspace
89 +       - Thaw filesystems (this is safe now that userspace is frozen and no
90 +               fuse requests are outstanding).
91 +       - Invoke sys_sync (noop on fuse).
92 +       - Freeze filesystems
93 +       - Freeze kernel threads
94 +
95 +    If we need to free memory, we thaw kernel threads and filesystems, but not
96 +    userspace. We can then free caches without worrying about deadlocks due to
97 +    swap files being on frozen filesystems or such like.
98 +
99 +    b. Ensure enough memory & storage are available.
100 +
101 +    We have a number of constraints to meet in order to be able to successfully
102 +    suspend and resume.
103 +
104 +    First, the image will be written in two parts, described below. One of these
105 +    parts needs to have an atomic copy made, which of course implies a maximum
106 +    size of one half of the amount of system memory. The other part ('pageset')
107 +    is not atomically copied, and can therefore be as large or small as desired.
108 +
109 +    Second, we have constraints on the amount of storage available. In these
110 +    calculations, we may also consider any compression that will be done. The
111 +    cryptoapi module allows the user to configure an expected compression ratio.
112 +   
113 +    Third, the user can specify an arbitrary limit on the image size, in
114 +    megabytes. This limit is treated as a soft limit, so that we don't fail the
115 +    attempt to suspend if we cannot meet this constraint.
116 +
117 +    c. Allocate the required memory and storage space.
118 +
119 +    Having done the initial freeze, we determine whether the above constraints
120 +    are met, and seek to allocate the metadata for the image. If the constraints
121 +    are not met, or we fail to allocate the required space for the metadata, we
122 +    seek to free the amount of memory that we calculate is needed and try again.
123 +    We allow up to four iterations of this loop before aborting the cycle. If we
124 +    do fail, it should only be because of a bug in TuxOnIce's calculations.
125 +    
126 +    These steps are merged together in the prepare_image function, found in
127 +    prepare_image.c. The functions are merged because of the cyclical nature
128 +    of the problem of calculating how much memory and storage is needed. Since
129 +    the data structures containing the information about the image must
130 +    themselves take memory and use storage, the amount of memory and storage
131 +    required changes as we prepare the image. Since the changes are not large,
132 +    only one or two iterations will be required to achieve a solution.
133 +
134 +    The recursive nature of the algorithm is miminised by keeping user space
135 +    frozen while preparing the image, and by the fact that our records of which
136 +    pages are to be saved and which pageset they are saved in use bitmaps (so
137 +    that changes in number or fragmentation of the pages to be saved don't
138 +    feedback via changes in the amount of memory needed for metadata). The
139 +    recursiveness is thus limited to any extra slab pages allocated to store the
140 +    extents that record storage used, and the effects of seeking to free memory.
141 +
142 +    d. Write the image.
143 +
144 +    We previously mentioned the need to create an atomic copy of the data, and
145 +    the half-of-memory limitation that is implied in this. This limitation is
146 +    circumvented by dividing the memory to be saved into two parts, called
147 +    pagesets.
148 +
149 +    Pageset2 contains the page cache - the pages on the active and inactive
150 +    lists. These pages aren't needed or modifed while TuxOnIce is running, so
151 +    they can be safely written without an atomic copy. They are therefore
152 +    saved first and reloaded last. While saving these pages, TuxOnIce carefully
153 +    ensures that the work of writing the pages doesn't make the image
154 +    inconsistent.
155 +
156 +    Once pageset2 has been saved, we prepare to do the atomic copy of remaining
157 +    memory. As part of the preparation, we power down drivers, thereby providing
158 +    them with the opportunity to have their state recorded in the image. The
159 +    amount of memory allocated by drivers for this is usually negligible, but if
160 +    DRI is in use, video drivers may require significants amounts. Ideally we
161 +    would be able to query drivers while preparing the image as to the amount of
162 +    memory they will need. Unfortunately no such mechanism exists at the time of
163 +    writing. For this reason, TuxOnIce allows the user to set an
164 +    'extra_pages_allowance', which is used to seek to ensure sufficient memory
165 +    is available for drivers at this point. TuxOnIce also lets the user set this
166 +    value to 0. In this case, a test driver suspend is done while preparing the
167 +    image, and the difference (plus a margin) used instead.
168 +
169 +    Having suspended the drivers, we save the CPU context before making an
170 +    atomic copy of pageset1, resuming the drivers and saving the atomic copy.
171 +    After saving the two pagesets, we just need to save our metadata before
172 +    powering down.
173 +
174 +    As we mentioned earlier, the contents of pageset2 pages aren't needed once
175 +    they've been saved. We therefore use them as the destination of our atomic
176 +    copy. In the unlikely event that pageset1 is larger, extra pages are
177 +    allocated while the image is being prepared. This is normally only a real
178 +    possibility when the system has just been booted and the page cache is
179 +    small.
180 +
181 +    This is where we need to be careful about syncing, however. Pageset2 will
182 +    probably contain filesystem meta data. If this is overwritten with pageset1
183 +    and then a sync occurs, the filesystem will be corrupted - at least until
184 +    resume time and another sync of the restored data. Since there is a
185 +    possibility that the user might not resume or (may it never be!) that
186 +    suspend might oops, we do our utmost to avoid syncing filesystems after
187 +    copying pageset1.
188 +
189 +    e. Power down.
190 +
191 +    Powering down uses standard kernel routines. TuxOnIce supports powering down
192 +    using the ACPI S3, S4 and S5 methods or the kernel's non-ACPI power-off.
193 +    Supporting suspend to ram (S3) as a power off option might sound strange,
194 +    but it allows the user to quickly get their system up and running again if
195 +    the battery doesn't run out (we just need to re-read the overwritten pages)
196 +    and if the battery does run out (or the user removes power), they can still
197 +    resume.
198 +
199 +4.  Data Structures.
200 +
201 +    TuxOnIce uses three main structures to store its metadata and configuration
202 +    information:
203 +
204 +    a) Pageflags bitmaps.
205 +
206 +    TuxOnIce records which pages will be in pageset1, pageset2, the destination
207 +    of the atomic copy and the source of the atomically restored image using
208 +    bitmaps. These bitmaps are created from order zero allocations to maximise
209 +    reliability. The individual pages are combined together with pointers to
210 +    form per-zone bitmaps, which are in turn combined with another layer of
211 +    pointers to construct the overall bitmap.
212 +
213 +    The pageset1 bitmap is thus easily stored in the image header for use at
214 +    resume time.
215 +
216 +    As mentioned above, using bitmaps also means that the amount of memory and
217 +    storage required for recording the above information is constant. This
218 +    greatly simplifies the work of preparing the image. In earlier versions of
219 +    TuxOnIce, extents were used to record which pages would be stored. In that
220 +    case, however, eating memory could result in greater fragmentation of the
221 +    lists of pages, which in turn required more memory to store the extents and
222 +    more storage in the image header. These could in turn require further
223 +    freeing of memory, and another iteration. All of this complexity is removed
224 +    by having bitmaps.
225 +
226 +    Bitmaps also make a lot of sense because TuxOnIce only ever iterates
227 +    through the lists. There is therefore no cost to not being able to find the
228 +    nth page in order 0 time. We only need to worry about the cost of finding
229 +    the n+1th page, given the location of the nth page. Bitwise optimisations
230 +    help here.
231 +
232 +    The data structure is: unsigned long ***.
233 +
234 +    b) Extents for block data.
235 +
236 +    TuxOnIce supports writing the image to multiple block devices. In the case
237 +    of swap, multiple partitions and/or files may be in use, and we happily use
238 +    them all. This is accomplished as follows:
239 +
240 +    Whatever the actual source of the allocated storage, the destination of the
241 +    image can be viewed in terms of one or more block devices, and on each
242 +    device, a list of sectors. To simplify matters, we only use contiguous,
243 +    PAGE_SIZE aligned sectors, like the swap code does.
244 +
245 +    Since sector numbers on each bdev may well not start at 0, it makes much
246 +    more sense to use extents here. Contiguous ranges of pages can thus be
247 +    represented in the extents by contiguous values.
248 +
249 +    Variations in block size are taken account of in transforming this data
250 +    into the parameters for bio submission.
251 +
252 +    We can thus implement a layer of abstraction wherein the core of TuxOnIce
253 +    doesn't have to worry about which device we're currently writing to or
254 +    where in the device we are. It simply requests that the next page in the
255 +    pageset or header be written, leaving the details to this lower layer.
256 +    The lower layer remembers where in the sequence of devices and blocks each
257 +    pageset starts. The header always starts at the beginning of the allocated
258 +    storage.
259 +
260 +    So extents are:
261 +
262 +    struct extent {
263 +      unsigned long minimum, maximum;
264 +      struct extent *next;
265 +    }
266 +
267 +    These are combined into chains of extents for a device:
268 +
269 +    struct extent_chain {
270 +      int size; /* size of the extent ie sum (max-min+1) */
271 +      int allocs, frees;
272 +      char *name;
273 +      struct extent *first, *last_touched;
274 +    };
275 +
276 +    For each bdev, we need to store a little more info:
277 +
278 +    struct suspend_bdev_info {
279 +       struct block_device *bdev;
280 +       dev_t dev_t;
281 +       int bmap_shift;
282 +       int blocks_per_page;
283 +    };
284 +
285 +    The dev_t is used to identify the device in the stored image. As a result,
286 +    we expect devices at resume time to have the same major and minor numbers
287 +    as they had while suspending.  This is primarily a concern where the user
288 +    utilises LVM for storage, as they will need to dmsetup their partitions in
289 +    such a way as to maintain this consistency at resume time.
290 +
291 +    bmap_shift and blocks_per_page record apply the effects of variations in
292 +    blocks per page settings for the filesystem and underlying bdev. For most
293 +    filesystems, these are the same, but for xfs, they can have independant
294 +    values.
295 +
296 +    Combining these two structures together, we have everything we need to
297 +    record what devices and what blocks on each device are being used to
298 +    store the image, and to submit i/o using bio_submit.
299 +
300 +    The last elements in the picture are a means of recording how the storage
301 +    is being used.
302 +
303 +    We do this first and foremost by implementing a layer of abstraction on
304 +    top of the devices and extent chains which allows us to view however many
305 +    devices there might be as one long storage tape, with a single 'head' that
306 +    tracks a 'current position' on the tape:
307 +
308 +    struct extent_iterate_state {
309 +      struct extent_chain *chains;
310 +      int num_chains;
311 +      int current_chain;
312 +      struct extent *current_extent;
313 +      unsigned long current_offset;
314 +    };
315 +
316 +    That is, *chains points to an array of size num_chains of extent chains.
317 +    For the filewriter, this is always a single chain. For the swapwriter, the
318 +    array is of size MAX_SWAPFILES.
319 +
320 +    current_chain, current_extent and current_offset thus point to the current
321 +    index in the chains array (and into a matching array of struct
322 +    suspend_bdev_info), the current extent in that chain (to optimise access),
323 +    and the current value in the offset.
324 +
325 +    The image is divided into three parts:
326 +    - The header
327 +    - Pageset 1
328 +    - Pageset 2
329 +
330 +    The header always starts at the first device and first block. We know its
331 +    size before we begin to save the image because we carefully account for
332 +    everything that will be stored in it.
333 +
334 +    The second pageset (LRU) is stored first. It begins on the next page after
335 +    the end of the header.
336 +
337 +    The first pageset is stored second. It's start location is only known once
338 +    pageset2 has been saved, since pageset2 may be compressed as it is written.
339 +    This location is thus recorded at the end of saving pageset2. It is page
340 +    aligned also.
341 +
342 +    Since this information is needed at resume time, and the location of extents
343 +    in memory will differ at resume time, this needs to be stored in a portable
344 +    way:
345 +
346 +    struct extent_iterate_saved_state {
347 +        int chain_num;
348 +        int extent_num;
349 +        unsigned long offset;
350 +    };
351 +
352 +    We can thus implement a layer of abstraction wherein the core of TuxOnIce
353 +    doesn't have to worry about which device we're currently writing to or
354 +    where in the device we are. It simply requests that the next page in the
355 +    pageset or header be written, leaving the details to this layer, and
356 +    invokes the routines to remember and restore the position, without having
357 +    to worry about the details of how the data is arranged on disk or such like.
358 +
359 +    c) Modules
360 +
361 +    One aim in designing TuxOnIce was to make it flexible. We wanted to allow
362 +    for the implementation of different methods of transforming a page to be
363 +    written to disk and different methods of getting the pages stored.
364 +
365 +    In early versions (the betas and perhaps Suspend1), compression support was
366 +    inlined in the image writing code, and the data structures and code for
367 +    managing swap were intertwined with the rest of the code. A number of people
368 +    had expressed interest in implementing image encryption, and alternative
369 +    methods of storing the image.
370 +
371 +    In order to achieve this, TuxOnIce was given a modular design.
372 +
373 +    A module is a single file which encapsulates the functionality needed
374 +    to transform a pageset of data (encryption or compression, for example),
375 +    or to write the pageset to a device. The former type of module is called
376 +    a 'page-transformer', the later a 'writer'.
377 +
378 +    Modules are linked together in pipeline fashion. There may be zero or more
379 +    page transformers in a pipeline, and there is always exactly one writer.
380 +    The pipeline follows this pattern:
381 +
382 +               ---------------------------------
383 +               |          TuxOnIce Core        |
384 +               ---------------------------------
385 +                               |
386 +                               |
387 +               ---------------------------------
388 +               |       Page transformer 1      |
389 +               ---------------------------------
390 +                               |
391 +                               |
392 +               ---------------------------------
393 +               |       Page transformer 2      |
394 +               ---------------------------------
395 +                               |
396 +                               |
397 +               ---------------------------------
398 +               |            Writer             |
399 +               ---------------------------------
400 +
401 +    During the writing of an image, the core code feeds pages one at a time
402 +    to the first module. This module performs whatever transformations it
403 +    implements on the incoming data, completely consuming the incoming data and
404 +    feeding output in a similar manner to the next module. A module may buffer
405 +    its output.
406 +
407 +    All routines are SMP safe, and the final result of the transformations is
408 +    written with an index (provided by the core) and size of the output by the
409 +    writer. As a result, we can have multithreaded I/O without needing to
410 +    worry about the sequence in which pages are written (or read).
411 +
412 +    During reading, the pipeline works in the reverse direction. The core code
413 +    calls the first module with the address of a buffer which should be filled.
414 +    (Note that the buffer size is always PAGE_SIZE at this time). This module
415 +    will in turn request data from the next module and so on down until the
416 +    writer is made to read from the stored image.
417 +
418 +    Part of definition of the structure of a module thus looks like this:
419 +
420 +        int (*rw_init) (int rw, int stream_number);
421 +        int (*rw_cleanup) (int rw);
422 +        int (*write_chunk) (struct page *buffer_page);
423 +        int (*read_chunk) (struct page *buffer_page, int sync);
424 +
425 +    It should be noted that the _cleanup routine may be called before the
426 +    full stream of data has been read or written. While writing the image,
427 +    the user may (depending upon settings) choose to abort suspending, and
428 +    if we are in the midst of writing the last portion of the image, a portion
429 +    of the second pageset may be reread. This may also happen if an error
430 +    occurs and we seek to abort the process of writing the image.
431 +
432 +    The modular design is also useful in a number of other ways. It provides
433 +    a means where by we can add support for:
434 +
435 +    - providing overall initialisation and cleanup routines;
436 +    - serialising configuration information in the image header;
437 +    - providing debugging information to the user;
438 +    - determining memory and image storage requirements;
439 +    - dis/enabling components at run-time;
440 +    - configuring the module (see below);
441 +
442 +    ...and routines for writers specific to their work:
443 +    - Parsing a resume= location;
444 +    - Determining whether an image exists;
445 +    - Marking a resume as having been attempted;
446 +    - Invalidating an image;
447 +
448 +    Since some parts of the core - the user interface and storage manager
449 +    support - have use for some of these functions, they are registered as
450 +    'miscellaneous' modules as well.
451 +
452 +    d) Sysfs data structures.
453 +
454 +    This brings us naturally to support for configuring TuxOnIce. We desired to
455 +    provide a way to make TuxOnIce as flexible and configurable as possible.
456 +    The user shouldn't have to reboot just because they want to now suspend to
457 +    a file instead of a partition, for example.
458 +
459 +    To accomplish this, TuxOnIce implements a very generic means whereby the
460 +    core and modules can register new sysfs entries. All TuxOnIce entries use
461 +    a single _store and _show routine, both of which are found in sysfs.c in
462 +    the kernel/power directory. These routines handle the most common operations
463 +    - getting and setting the values of bits, integers, longs, unsigned longs
464 +    and strings in one place, and allow overrides for customised get and set
465 +    options as well as side-effect routines for all reads and writes.
466 +
467 +    When combined with some simple macros, a new sysfs entry can then be defined
468 +    in just a couple of lines:
469 +
470 +    { TOI_ATTR("progress_granularity", SYSFS_RW),
471 +      SYSFS_INT(&progress_granularity, 1, 2048)
472 +    },
473 +
474 +    This defines a sysfs entry named "progress_granularity" which is rw and
475 +    allows the user to access an integer stored at &progress_granularity, giving
476 +    it a value between 1 and 2048 inclusive.
477 +
478 +    Sysfs entries are registered under /sys/power/tuxonice, and entries for
479 +    modules are located in a subdirectory named after the module.
480 +
481 diff --git a/Documentation/power/tuxonice.txt b/Documentation/power/tuxonice.txt
482 new file mode 100644
483 index 0000000..d13ce85
484 --- /dev/null
485 +++ b/Documentation/power/tuxonice.txt
486 @@ -0,0 +1,750 @@
487 +       --- TuxOnIce, version 3.0 ---
488 +
489 +1.  What is it?
490 +2.  Why would you want it?
491 +3.  What do you need to use it?
492 +4.  Why not just use the version already in the kernel?
493 +5.  How do you use it?
494 +6.  What do all those entries in /sys/power/tuxonice do?
495 +7.  How do you get support?
496 +8.  I think I've found a bug. What should I do?
497 +9.  When will XXX be supported?
498 +10  How does it work?
499 +11. Who wrote TuxOnIce?
500 +
501 +1. What is it?
502 +
503 +   Imagine you're sitting at your computer, working away. For some reason, you
504 +   need to turn off your computer for a while - perhaps it's time to go home
505 +   for the day. When you come back to your computer next, you're going to want
506 +   to carry on where you left off. Now imagine that you could push a button and
507 +   have your computer store the contents of its memory to disk and power down.
508 +   Then, when you next start up your computer, it loads that image back into
509 +   memory and you can carry on from where you were, just as if you'd never
510 +   turned the computer off. You have far less time to start up, no reopening of
511 +   applications or finding what directory you put that file in yesterday.
512 +   That's what TuxOnIce does.
513 +
514 +   TuxOnIce has a long heritage. It began life as work by Gabor Kuti, who,
515 +   with some help from Pavel Machek, got an early version going in 1999. The
516 +   project was then taken over by Florent Chabaud while still in alpha version
517 +   numbers. Nigel Cunningham came on the scene when Florent was unable to
518 +   continue, moving the project into betas, then 1.0, 2.0 and so on up to
519 +   the present series. During the 2.0 series, the name was contracted to
520 +   Suspend2 and the website suspend2.net created. Beginning around July 2007,
521 +   a transition to calling the software TuxOnIce was made, to seek to help
522 +   make it clear that TuxOnIce is more concerned with hibernation than suspend
523 +   to ram.
524 +
525 +   Pavel Machek's swsusp code, which was merged around 2.5.17 retains the
526 +   original name, and was essentially a fork of the beta code until Rafael
527 +   Wysocki came on the scene in 2005 and began to improve it further.
528 +
529 +2. Why would you want it?
530 +
531 +   Why wouldn't you want it?
532 +   
533 +   Being able to save the state of your system and quickly restore it improves
534 +   your productivity - you get a useful system in far less time than through
535 +   the normal boot process. You also get to be completely 'green', using zero
536 +   power, or as close to that as possible (the computer may still provide
537 +   minimal power to some devices, so they can initiate a power on, but that
538 +   will be the same amount of power as would be used if you told the computer
539 +   to shutdown.
540 +   
541 +3. What do you need to use it?
542 +
543 +   a. Kernel Support.
544 +
545 +   i) The TuxOnIce patch.
546 +   
547 +   TuxOnIce is part of the Linux Kernel. This version is not part of Linus's
548 +   2.6 tree at the moment, so you will need to download the kernel source and
549 +   apply the latest patch. Having done that, enable the appropriate options in
550 +   make [menu|x]config (under Power Management Options - look for "Enhanced
551 +   Hibernation"), compile and install your kernel. TuxOnIce works with SMP,
552 +   Highmem, preemption, fuse filesystems, x86-32, PPC and x86_64.
553 +
554 +   TuxOnIce patches are available from http://tuxonice.net.
555 +
556 +   ii) Compression support.
557 +
558 +   Compression support is implemented via the cryptoapi. You will therefore want
559 +   to select any Cryptoapi transforms that you want to use on your image from
560 +   the Cryptoapi menu while configuring your kernel. Part of the TuxOnIce patch
561 +   adds a new cryptoapi compression called LZF. We recommend the use of this
562 +   compression method - it is very fast and still achieves good compression.
563 +
564 +   You can also tell TuxOnIce to write it's image to an encrypted and/or
565 +   compressed filesystem/swap partition. In that case, you don't need to do
566 +   anything special for TuxOnIce when it comes to kernel configuration.
567 +
568 +   iii) Configuring other options.
569 +
570 +   While you're configuring your kernel, try to configure as much as possible
571 +   to build as modules. We recommend this because there are a number of drivers
572 +   that are still in the process of implementing proper power management
573 +   support. In those cases, the best way to work around their current lack is
574 +   to build them as modules and remove the modules while hibernating. You might
575 +   also bug the driver authors to get their support up to speed, or even help!
576 +
577 +   b. Storage.
578 +
579 +   i) Swap.
580 +
581 +   TuxOnIce can store the hibernation image in your swap partition, a swap file or
582 +   a combination thereof. Whichever combination you choose, you will probably
583 +   want to create enough swap space to store the largest image you could have,
584 +   plus the space you'd normally use for swap. A good rule of thumb would be
585 +   to calculate the amount of swap you'd want without using TuxOnIce, and then
586 +   add the amount of memory you have. This swapspace can be arranged in any way
587 +   you'd like. It can be in one partition or file, or spread over a number. The
588 +   only requirement is that they be active when you start a hibernation cycle.
589 +   
590 +   There is one exception to this requirement. TuxOnIce has the ability to turn
591 +   on one swap file or partition at the start of hibernating and turn it back off
592 +   at the end. If you want to ensure you have enough memory to store a image
593 +   when your memory is fully used, you might want to make one swap partition or
594 +   file for 'normal' use, and another for TuxOnIce to activate & deactivate
595 +   automatically. (Further details below).
596 +
597 +   ii) Normal files.
598 +
599 +   TuxOnIce includes a 'file allocator'. The file allocator can store your
600 +   image in a simple file. Since Linux has the concept of everything being a
601 +   file, this is more powerful than it initially sounds. If, for example, you
602 +   were to set up a network block device file, you could hibernate to a network
603 +   server. This has been tested and works to a point, but nbd itself isn't
604 +   stateless enough for our purposes.
605 +
606 +   Take extra care when setting up the file allocator. If you just type
607 +   commands without thinking and then try to hibernate, you could cause
608 +   irreversible corruption on your filesystems! Make sure you have backups.
609 +
610 +   Most people will only want to hibernate to a local file. To achieve that, do
611 +   something along the lines of:
612 +
613 +   echo "TuxOnIce" > /hibernation-file
614 +   dd if=/dev/zero bs=1M count=512 >> hibernation-file
615 +
616 +   This will create a 512MB file called /hibernation-file. To get TuxOnIce to use
617 +   it:
618 +
619 +   echo /hibernation-file > /sys/power/tuxonice/file/target
620 +
621 +   Then
622 +
623 +   cat /sys/power/tuxonice/resume
624 +
625 +   Put the results of this into your bootloader's configuration (see also step
626 +   C, below):
627 +
628 +   ---EXAMPLE-ONLY-DON'T-COPY-AND-PASTE---
629 +   # cat /sys/power/tuxonice/resume
630 +   file:/dev/hda2:0x1e001
631 +   
632 +   In this example, we would edit the append= line of our lilo.conf|menu.lst
633 +   so that it included:
634 +
635 +   resume=file:/dev/hda2:0x1e001
636 +   ---EXAMPLE-ONLY-DON'T-COPY-AND-PASTE---
637
638 +   For those who are thinking 'Could I make the file sparse?', the answer is
639 +   'No!'. At the moment, there is no way for TuxOnIce to fill in the holes in
640 +   a sparse file while hibernating. In the longer term (post merge!), I'd like
641 +   to change things so that the file could be dynamically resized and have
642 +   holes filled as needed. Right now, however, that's not possible and not a
643 +   priority.
644 +
645 +   c. Bootloader configuration.
646 +   
647 +   Using TuxOnIce also requires that you add an extra parameter to 
648 +   your lilo.conf or equivalent. Here's an example for a swap partition:
649 +
650 +   append="resume=swap:/dev/hda1"
651 +
652 +   This would tell TuxOnIce that /dev/hda1 is a swap partition you 
653 +   have. TuxOnIce will use the swap signature of this partition as a
654 +   pointer to your data when you hibernate. This means that (in this example)
655 +   /dev/hda1 doesn't need to be _the_ swap partition where all of your data
656 +   is actually stored. It just needs to be a swap partition that has a
657 +   valid signature.
658 +
659 +   You don't need to have a swap partition for this purpose. TuxOnIce
660 +   can also use a swap file, but usage is a little more complex. Having made
661 +   your swap file, turn it on and do 
662 +
663 +   cat /sys/power/tuxonice/swap/headerlocations
664 +
665 +   (this assumes you've already compiled your kernel with TuxOnIce
666 +   support and booted it). The results of the cat command will tell you
667 +   what you need to put in lilo.conf:
668 +
669 +   For swap partitions like /dev/hda1, simply use resume=/dev/hda1.
670 +   For swapfile `swapfile`, use resume=swap:/dev/hda2:0x242d.
671 +
672 +   If the swapfile changes for any reason (it is moved to a different
673 +   location, it is deleted and recreated, or the filesystem is
674 +   defragmented) then you will have to check
675 +   /sys/power/tuxonice/swap/headerlocations for a new resume_block value.
676 +
677 +   Once you've compiled and installed the kernel and adjusted your bootloader
678 +   configuration, you should only need to reboot for the most basic part
679 +   of TuxOnIce to be ready.
680 +
681 +   If you only compile in the swap allocator, or only compile in the file
682 +   allocator, you don't need to add the "swap:" part of the resume=
683 +   parameters above. resume=/dev/hda2:0x242d will work just as well. If you
684 +   have compiled both and your storage is on swap, you can also use this
685 +   format (the swap allocator is the default allocator).
686 +
687 +   When compiling your kernel, one of the options in the 'Power Management
688 +   Support' menu, just above the 'Enhanced Hibernation (TuxOnIce)' entry is
689 +   called 'Default resume partition'. This can be used to set a default value
690 +   for the resume= parameter.
691 +
692 +   d. The hibernate script.
693 +
694 +   Since the driver model in 2.6 kernels is still being developed, you may need
695 +   to do more than just configure TuxOnIce. Users of TuxOnIce usually start the
696 +   process via a script which prepares for the hibernation cycle, tells the
697 +   kernel to do its stuff and then restore things afterwards. This script might
698 +   involve:
699 +
700 +   - Switching to a text console and back if X doesn't like the video card
701 +     status on resume.
702 +   - Un/reloading drivers that don't play well with hibernation.
703 +  
704 +   Note that you might not be able to unload some drivers if there are 
705 +   processes using them. You might have to kill off processes that hold
706 +   devices open. Hint: if your X server accesses an USB mouse, doing a
707 +   'chvt' to a text console releases the device and you can unload the
708 +   module.
709 +
710 +   Check out the latest script (available on tuxonice.net).
711 +
712 +   e. The userspace user interface.
713 +
714 +   TuxOnIce has very limited support for displaying status if you only apply
715 +   the kernel patch - it can printk messages, but that is all. In addition,
716 +   some of the functions mentioned in this document (such as cancelling a cycle
717 +   or performing interactive debugging) are unavailable. To utilise these
718 +   functions, or simply get a nice display, you need the 'userui' component.
719 +   Userui comes in three flavours, usplash, fbsplash and text. Text should
720 +   work on any console. Usplash and fbsplash require the appropriate
721 +   (distro specific?) support.
722 +
723 +   To utilise a userui, TuxOnIce just needs to be told where to find the
724 +   userspace binary:
725 +
726 +   echo "/usr/local/sbin/tuxoniceui_fbsplash" > /sys/power/tuxonice/user_interface/program
727 +
728 +   The hibernate script can do this for you, and a default value for this
729 +   setting can be configured when compiling the kernel. This path is also
730 +   stored in the image header, so if you have an initrd or initramfs, you can
731 +   use the userui during the first part of resuming (prior to the atomic
732 +   restore) by putting the binary in the same path in your initrd/ramfs.
733 +   Alternatively, you can put it in a different location and do an echo
734 +   similar to the above prior to the echo > do_resume. The value saved in the
735 +   image header will then be ignored.
736 +
737 +4. Why not just use the version already in the kernel?
738 +
739 +   The version in the vanilla kernel has a number of drawbacks. The most
740 +   serious of these are:
741 +       - it has a maximum image size of 1/2 total memory;
742 +       - it doesn't allocate storage until after it has snapshotted memory.
743 +         This means that you can't be sure hibernating will work until you
744 +         see it start to write the image;
745 +       - it does not allow you to press escape to cancel a cycle;
746 +       - it does not allow you to press escape to cancel resuming;
747 +       - it does not allow you to automatically swapon a file when
748 +         starting a cycle;
749 +       - it does not allow you to use multiple swap partitions or files;
750 +       - it does not allow you to use ordinary files;
751 +       - it just invalidates an image and continues to boot if you
752 +         accidentally boot the wrong kernel after hibernating;
753 +       - it doesn't support any sort of nice display while hibernating;
754 +       - it is moving toward requiring that you have an initrd/initramfs
755 +         to ever have a hope of resuming (uswsusp). While uswsusp will
756 +         address some of the concerns above, it won't address all of them,
757 +          and will be more complicated to get set up;
758 +        - it doesn't have support for suspend-to-both (write a hibernation
759 +         image, then suspend to ram; I think this is known as ReadySafe
760 +         under M$).
761 +
762 +5. How do you use it?
763 +
764 +   A hibernation cycle can be started directly by doing:
765 +
766 +       echo > /sys/power/tuxonice/do_hibernate
767 +
768 +   In practice, though, you'll probably want to use the hibernate script
769 +   to unload modules, configure the kernel the way you like it and so on.
770 +   In that case, you'd do (as root):
771 +
772 +       hibernate
773 +
774 +   See the hibernate script's man page for more details on the options it
775 +   takes.
776 +
777 +   If you're using the text or splash user interface modules, one feature of
778 +   TuxOnIce that you might find useful is that you can press Escape at any time
779 +   during hibernating, and the process will be aborted.
780 +
781 +   Due to the way hibernation works, this means you'll have your system back and
782 +   perfectly usable almost instantly. The only exception is when it's at the
783 +   very end of writing the image. Then it will need to reload a small (usually
784 +   4-50MBs, depending upon the image characteristics) portion first.
785 +
786 +   Likewise, when resuming, you can press escape and resuming will be aborted.
787 +   The computer will then powerdown again according to settings at that time for
788 +   the powerdown method or rebooting.
789 +
790 +   You can change the settings for powering down while the image is being
791 +   written by pressing 'R' to toggle rebooting and 'O' to toggle between
792 +   suspending to ram and powering down completely).
793 +   
794 +   If you run into problems with resuming, adding the "noresume" option to
795 +   the kernel command line will let you skip the resume step and recover your
796 +   system. This option shouldn't normally be needed, because TuxOnIce modifies
797 +   the image header prior to the atomic restore, and will thus prompt you
798 +   if it detects that you've tried to resume an image before (this flag is
799 +   removed if you press Escape to cancel a resume, so you won't be prompted
800 +   then).
801 +
802 +   Recent kernels (2.6.24 onwards) add support for resuming from a different
803 +   kernel to the one that was hibernated (thanks to Rafael for his work on
804 +   this - I've just embraced and enhanced the support for TuxOnIce). This
805 +   should further reduce the need for you to use the noresume option.
806 +
807 +6. What do all those entries in /sys/power/tuxonice do?
808 +
809 +   /sys/power/tuxonice is the directory which contains files you can use to
810 +   tune and configure TuxOnIce to your liking. The exact contents of
811 +   the directory will depend upon the version of TuxOnIce you're
812 +   running and the options you selected at compile time. In the following
813 +   descriptions, names in brackets refer to compile time options.
814 +   (Note that they're all dependant upon you having selected CONFIG_TUXONICE
815 +   in the first place!).
816 +
817 +   Since the values of these settings can open potential security risks, the
818 +   writeable ones are accessible only to the root user. You may want to
819 +   configure sudo to allow you to invoke your hibernate script as an ordinary
820 +   user.
821
822 +   - checksum/enabled
823 +
824 +   Use cryptoapi hashing routines to verify that Pageset2 pages don't change
825 +   while we're saving the first part of the image, and to get any pages that
826 +   do change resaved in the atomic copy. This should normally not be needed,
827 +   but if you're seeing issues, please enable this. If your issues stop you
828 +   being able to resume, enable this option, hibernate and cancel the cycle
829 +   after the atomic copy is done. If the debugging info shows a non-zero
830 +   number of pages resaved, please report this to Nigel.
831
832 +   - compression/algorithm
833 +
834 +   Set the cryptoapi algorithm used for compressing the image.
835 +
836 +   - compression/expected_compression
837 +
838 +   These values allow you to set an expected compression ratio, which TuxOnice
839 +   will use in calculating whether it meets constraints on the image size. If
840 +   this expected compression ratio is not attained, the hibernation cycle will
841 +   abort, so it is wise to allow some spare. You can see what compression
842 +   ratio is achieved in the logs after hibernating.
843 +
844 +   - debug_info:
845 +  
846 +   This file returns information about your configuration that may be helpful
847 +   in diagnosing problems with hibernating.
848 +
849 +   - do_hibernate:
850 +
851 +   When anything is written to this file, the kernel side of TuxOnIce will
852 +   begin to attempt to write an image to disk and power down. You'll normally
853 +   want to run the hibernate script instead, to get modules unloaded first.
854 +
855 +   - do_resume:
856 +
857 +   When anything is written to this file TuxOnIce will attempt to read and
858 +   restore an image. If there is no image, it will return almost immediately.
859 +   If an image exists, the echo > will never return. Instead, the original
860 +   kernel context will be restored and the original echo > do_hibernate will
861 +   return.
862 +
863 +   - */enabled
864 +
865 +   These option can be used to temporarily disable various parts of TuxOnIce.
866 +
867 +   - extra_pages_allowance
868 +
869 +   When TuxOnIce does its atomic copy, it calls the driver model suspend
870 +   and resume methods. If you have DRI enabled with a driver such as fglrx,
871 +   this can result in the driver allocating a substantial amount of memory
872 +   for storing its state. Extra_pages_allowance tells TuxOnIce how much
873 +   extra memory it should ensure is available for those allocations. If
874 +   your attempts at hibernating end with a message in dmesg indicating that
875 +   insufficient extra pages were allowed, you need to increase this value.
876 +
877 +   - file/target:
878 +
879 +   Read this value to get the current setting. Write to it to point TuxOnice
880 +   at a new storage location for the file allocator. See section 3.b.ii above
881 +   for details of how to set up the file allocator.
882 +
883 +   - freezer_test
884 +
885 +   This entry can be used to get TuxOnIce to just test the freezer and prepare
886 +   an image without actually doing a hibernation cycle. It is useful for
887 +   diagnosing freezing and image preparation issues.
888 +
889 +   - image_exists:
890 +
891 +   Can be used in a script to determine whether a valid image exists at the
892 +   location currently pointed to by resume=. Returns up to three lines.
893 +   The first is whether an image exists (-1 for unsure, otherwise 0 or 1).
894 +   If an image eixsts, additional lines will return the machine and version.
895 +   Echoing anything to this entry removes any current image.
896 +
897 +   - image_size_limit:
898 +
899 +   The maximum size of hibernation image written to disk, measured in megabytes
900 +   (1024*1024).
901 +
902 +   - last_result:
903 +
904 +   The result of the last hibernation cycle, as defined in
905 +   include/linux/suspend-debug.h with the values SUSPEND_ABORTED to
906 +   SUSPEND_KEPT_IMAGE. This is a bitmask.
907 +
908 +   - log_everything (CONFIG_PM_DEBUG):
909 +
910 +   Setting this option results in all messages printed being logged. Normally,
911 +   only a subset are logged, so as to not slow the process and not clutter the
912 +   logs. Useful for debugging. It can be toggled during a cycle by pressing
913 +   'L'.
914 +
915 +   - pause_between_steps (CONFIG_PM_DEBUG):
916 +
917 +   This option is used during debugging, to make TuxOnIce pause between
918 +   each step of the process. It is ignored when the nice display is on.
919 +
920 +   - powerdown_method:
921 +
922 +   Used to select a method by which TuxOnIce should powerdown after writing the
923 +   image. Currently:
924 +
925 +   0: Don't use ACPI to power off.
926 +   3: Attempt to enter Suspend-to-ram.
927 +   4: Attempt to enter ACPI S4 mode.
928 +   5: Attempt to power down via ACPI S5 mode.
929 +
930 +   Note that these options are highly dependant upon your hardware & software:
931 +
932 +   3: When succesful, your machine suspends to ram instead of powering off.
933 +      The advantage of using this mode is that it doesn't matter whether your
934 +      battery has enough charge to make it through to your next resume. If it
935 +      lasts, you will simply resume from suspend to ram (and the image on disk
936 +      will be discarded). If the battery runs out, you will resume from disk
937 +      instead. The disadvantage is that it takes longer than a normal
938 +      suspend-to-ram to enter the state, since the suspend-to-disk image needs
939 +      to be written first.
940 +   4/5: When successful, your machine will be off and comsume (almost) no power.
941 +      But it might still react to some external events like opening the lid or
942 +      trafic on  a network or usb device. For the bios, resume is then the same
943 +      as warm boot, similar to a situation where you used the command `reboot'
944 +      to reboot your machine. If your machine has problems on warm boot or if
945 +      you want to protect your machine with the bios password, this is probably
946 +      not the right choice. Mode 4 may be necessary on some machines where ACPI
947 +      wake up methods need to be run to properly reinitialise hardware after a
948 +      hibernation cycle.  
949 +   0: Switch the machine completely off. The only possible wakeup is the power
950 +      button. For the bios, resume is then the same as a cold boot, in
951 +      particular you would  have to provide your bios boot password if your
952 +      machine uses that feature for booting.
953 +
954 +   - progressbar_granularity_limit:
955 +
956 +   This option can be used to limit the granularity of the progress bar
957 +   displayed with a bootsplash screen. The value is the maximum number of
958 +   steps. That is, 10 will make the progress bar jump in 10% increments.
959 +
960 +   - reboot:
961 +
962 +   This option causes TuxOnIce to reboot rather than powering down
963 +   at the end of saving an image. It can be toggled during a cycle by pressing
964 +   'R'.
965 +
966 +   - resume_commandline:
967 +
968 +   This entry can be read after resuming to see the commandline that was used
969 +   when resuming began. You might use this to set up two bootloader entries
970 +   that are the same apart from the fact that one includes a extra append=
971 +   argument "at_work=1". You could then grep resume_commandline in your
972 +   post-resume scripts and configure networking (for example) differently
973 +   depending upon whether you're at home or work. resume_commandline can be
974 +   set to arbitrary text if you wish to remove sensitive contents.
975 +
976 +   - swap/swapfilename:
977 +
978 +   This entry is used to specify the swapfile or partition that
979 +   TuxOnIce will attempt to swapon/swapoff automatically. Thus, if
980 +   I normally use /dev/hda1 for swap, and want to use /dev/hda2 for specifically
981 +   for my hibernation image, I would
982 +  
983 +   echo /dev/hda2 > /sys/power/tuxonice/swap/swapfile
984 +
985 +   /dev/hda2 would then be automatically swapon'd and swapoff'd. Note that the
986 +   swapon and swapoff occur while other processes are frozen (including kswapd)
987 +   so this swap file will not be used up when attempting to free memory. The
988 +   parition/file is also given the highest priority, so other swapfiles/partitions
989 +   will only be used to save the image when this one is filled.
990 +
991 +   The value of this file is used by headerlocations along with any currently
992 +   activated swapfiles/partitions.
993 +
994 +   - swap/headerlocations:
995 +
996 +   This option tells you the resume= options to use for swap devices you
997 +   currently have activated. It is particularly useful when you only want to
998 +   use a swap file to store your image. See above for further details.
999 +
1000 +   - userui_program
1001 +
1002 +   This entry is used to tell TuxOnice what userspace program to use for
1003 +   providing a user interface while hibernating. The program uses a netlink
1004 +   socket to pass messages back and forward to the kernel, allowing all of the
1005 +   functions formerly implemented in the kernel user interface components.
1006 +
1007 +   - user_interface/debug_sections (CONFIG_PM_DEBUG):
1008 +
1009 +   This value, together with the console log level, controls what debugging
1010 +   information is displayed. The console log level determines the level of
1011 +   detail, and this value determines what detail is displayed. This value is
1012 +   a bit vector, and the meaning of the bits can be found in the kernel tree
1013 +   in include/linux/tuxonice.h. It can be overridden using the kernel's
1014 +   command line option suspend_dbg.
1015 +
1016 +   - user_interface/default_console_level (CONFIG_PM_DEBUG):
1017 +
1018 +   This determines the value of the console log level at the start of a
1019 +   hibernation cycle. If debugging is compiled in, the console log level can be
1020 +   changed during a cycle by pressing the digit keys. Meanings are:
1021 +
1022 +   0: Nice display.
1023 +   1: Nice display plus numerical progress.
1024 +   2: Errors only.
1025 +   3: Low level debugging info.
1026 +   4: Medium level debugging info.
1027 +   5: High level debugging info.
1028 +   6: Verbose debugging info.
1029 +
1030 +   - user_interface/enable_escape:
1031 +
1032 +   Setting this to "1" will enable you abort a hibernation cycle or resuming by
1033 +   pressing escape, "0" (default) disables this feature. Note that enabling
1034 +   this option means that you cannot initiate a hibernation cycle and then walk
1035 +away
1036 +   from your computer, expecting it to be secure. With feature disabled,
1037 +   you can validly have this expectation once TuxOnice begins to write the
1038 +   image to disk. (Prior to this point, it is possible that TuxOnice might
1039 +   about because of failure to freeze all processes or because constraints
1040 +   on its ability to save the image are not met).
1041 +
1042 +   - version:
1043 +  
1044 +   The version of TuxOnIce you have compiled into the currently running kernel.
1045 +
1046 +7. How do you get support?
1047 +
1048 +   Glad you asked. TuxOnIce is being actively maintained and supported
1049 +   by Nigel (the guy doing most of the kernel coding at the moment), Bernard
1050 +   (who maintains the hibernate script and userspace user interface components)
1051 +   and its users.
1052 +
1053 +   Resources availble include HowTos, FAQs and a Wiki, all available via
1054 +   tuxonice.net.  You can find the mailing lists there.
1055 +
1056 +8. I think I've found a bug. What should I do?
1057 +
1058 +   By far and a way, the most common problems people have with TuxOnIce
1059 +   related to drivers not having adequate power management support. In this
1060 +   case, it is not a bug with TuxOnIce, but we can still help you. As we
1061 +   mentioned above, such issues can usually be worked around by building the
1062 +   functionality as modules and unloading them while hibernating. Please visit
1063 +   the Wiki for up-to-date lists of known issues and work arounds.
1064 +
1065 +   If this information doesn't help, try running:
1066 +
1067 +   hibernate --bug-report
1068 +
1069 +   ..and sending the output to the users mailing list.
1070 +
1071 +   Good information on how to provide us with useful information from an
1072 +   oops is found in the file REPORTING-BUGS, in the top level directory
1073 +   of the kernel tree. If you get an oops, please especially note the
1074 +   information about running what is printed on the screen through ksymoops.
1075 +   The raw information is useless.
1076 +
1077 +9. When will XXX be supported?
1078 +
1079 +   If there's a feature missing from TuxOnIce that you'd like, feel free to
1080 +   ask. We try to be obliging, within reason.
1081 +
1082 +   Patches are welcome. Please send to the list.
1083 +
1084 +10. How does it work?
1085 +
1086 +   TuxOnIce does its work in a number of steps.
1087 +
1088 +   a. Freezing system activity.
1089 +
1090 +   The first main stage in hibernating is to stop all other activity. This is
1091 +   achieved in stages. Processes are considered in fours groups, which we will
1092 +   describe in reverse order for clarity's sake: Threads with the PF_NOFREEZE
1093 +   flag, kernel threads without this flag, userspace processes with the
1094 +   PF_SYNCTHREAD flag and all other processes. The first set (PF_NOFREEZE) are
1095 +   untouched by the refrigerator code. They are allowed to run during hibernating
1096 +   and resuming, and are used to support user interaction, storage access or the
1097 +   like. Other kernel threads (those unneeded while hibernating) are frozen last.
1098 +   This leaves us with userspace processes that need to be frozen. When a
1099 +   process enters one of the *_sync system calls, we set a PF_SYNCTHREAD flag on
1100 +   that process for the duration of that call. Processes that have this flag are
1101 +   frozen after processes without it, so that we can seek to ensure that dirty
1102 +   data is synced to disk as quickly as possible in a situation where other
1103 +   processes may be submitting writes at the same time. Freezing the processes
1104 +   that are submitting data stops new I/O from being submitted. Syncthreads can
1105 +   then cleanly finish their work. So the order is:
1106 +
1107 +   - Userspace processes without PF_SYNCTHREAD or PF_NOFREEZE;
1108 +   - Userspace processes with PF_SYNCTHREAD (they won't have NOFREEZE);
1109 +   - Kernel processes without PF_NOFREEZE.
1110 +
1111 +   b. Eating memory.
1112 +
1113 +   For a successful hibernation cycle, you need to have enough disk space to store the
1114 +   image and enough memory for the various limitations of TuxOnIce's
1115 +   algorithm. You can also specify a maximum image size. In order to attain
1116 +   to those constraints, TuxOnIce may 'eat' memory. If, after freezing
1117 +   processes, the constraints aren't met, TuxOnIce will thaw all the
1118 +   other processes and begin to eat memory until its calculations indicate
1119 +   the constraints are met. It will then freeze processes again and recheck
1120 +   its calculations.
1121 +
1122 +   c. Allocation of storage.
1123 +
1124 +   Next, TuxOnIce allocates the storage that will be used to save
1125 +   the image.
1126 +
1127 +   The core of TuxOnIce knows nothing about how or where pages are stored. We
1128 +   therefore request the active allocator (remember you might have compiled in
1129 +   more than one!) to allocate enough storage for our expect image size. If
1130 +   this request cannot be fulfilled, we eat more memory and try again. If it
1131 +   is fulfiled, we seek to allocate additional storage, just in case our
1132 +   expected compression ratio (if any) isn't achieved. This time, however, we
1133 +   just continue if we can't allocate enough storage.
1134 +
1135 +   If these calls to our allocator change the characteristics of the image
1136 +   such that we haven't allocated enough memory, we also loop. (The allocator
1137 +   may well need to allocate space for its storage information).
1138 +
1139 +   d. Write the first part of the image.
1140 +
1141 +   TuxOnIce stores the image in two sets of pages called 'pagesets'.
1142 +   Pageset 2 contains pages on the active and inactive lists; essentially
1143 +   the page cache. Pageset 1 contains all other pages, including the kernel.
1144 +   We use two pagesets for one important reason: We need to make an atomic copy
1145 +   of the kernel to ensure consistency of the image. Without a second pageset,
1146 +   that would limit us to an image that was at most half the amount of memory
1147 +   available. Using two pagesets allows us to store a full image. Since pageset
1148 +   2 pages won't be needed in saving pageset 1, we first save pageset 2 pages.
1149 +   We can then make our atomic copy of the remaining pages using both pageset 2
1150 +   pages and any other pages that are free. While saving both pagesets, we are
1151 +   careful not to corrupt the image. Among other things, we use lowlevel block
1152 +   I/O routines that don't change the pagecache contents.
1153 +
1154 +   The next step, then, is writing pageset 2.
1155 +
1156 +   e. Suspending drivers and storing processor context.
1157 +
1158 +   Having written pageset2, TuxOnIce calls the power management functions to
1159 +   notify drivers of the hibernation, and saves the processor state in preparation
1160 +   for the atomic copy of memory we are about to make.
1161 +
1162 +   f. Atomic copy.
1163 +
1164 +   At this stage, everything else but the TuxOnIce code is halted. Processes
1165 +   are frozen or idling, drivers are quiesced and have stored (ideally and where
1166 +   necessary) their configuration in memory we are about to atomically copy.
1167 +   In our lowlevel architecture specific code, we have saved the CPU state.
1168 +   We can therefore now do our atomic copy before resuming drivers etc.
1169 +
1170 +   g. Save the atomic copy (pageset 1).
1171 +
1172 +   TuxOnice can then write the atomic copy of the remaining pages. Since we
1173 +   have copied the pages into other locations, we can continue to use the
1174 +   normal block I/O routines without fear of corruption our image.
1175 +
1176 +   f. Save the image header.
1177 +
1178 +   Nearly there! We save our settings and other parameters needed for
1179 +   reloading pageset 1 in an 'image header'. We also tell our allocator to
1180 +   serialise its data at this stage, so that it can reread the image at resume
1181 +   time.
1182 +
1183 +   g. Set the image header.
1184 +
1185 +   Finally, we edit the header at our resume= location. The signature is
1186 +   changed by the allocator to reflect the fact that an image exists, and to
1187 +   point to the start of that data if necessary (swap allocator).
1188 +
1189 +   h. Power down.
1190 +
1191 +   Or reboot if we're debugging and the appropriate option is selected.
1192 +
1193 +   Whew!
1194 +
1195 +   Reloading the image.
1196 +   --------------------
1197 +
1198 +   Reloading the image is essentially the reverse of all the above. We load
1199 +   our copy of pageset 1, being careful to choose locations that aren't going
1200 +   to be overwritten as we copy it back (We start very early in the boot
1201 +   process, so there are no other processes to quiesce here). We then copy
1202 +   pageset 1 back to its original location in memory and restore the process
1203 +   context. We are now running with the original kernel. Next, we reload the
1204 +   pageset 2 pages, free the memory and swap used by TuxOnIce, restore
1205 +   the pageset header and restart processes. Sounds easy in comparison to
1206 +   hibernating, doesn't it!
1207 +
1208 +   There is of course more to TuxOnIce than this, but this explanation
1209 +   should be a good start. If there's interest, I'll write further
1210 +   documentation on range pages and the low level I/O.
1211 +
1212 +11. Who wrote TuxOnIce?
1213 +
1214 +   (Answer based on the writings of Florent Chabaud, credits in files and
1215 +   Nigel's limited knowledge; apologies to anyone missed out!)
1216 +
1217 +   The main developers of TuxOnIce have been...
1218 +
1219 +   Gabor Kuti
1220 +   Pavel Machek
1221 +   Florent Chabaud
1222 +   Bernard Blackham
1223 +   Nigel Cunningham
1224 +
1225 +   Significant portions of swsusp, the code in the vanilla kernel which
1226 +   TuxOnIce enhances, have been worked on by Rafael Wysocki. Thanks should
1227 +   also be expressed to him.
1228 +
1229 +   The above mentioned developers have been aided in their efforts by a host
1230 +   of hundreds, if not thousands of testers and people who have submitted bug
1231 +   fixes & suggestions. Of special note are the efforts of Michael Frank, who
1232 +   had his computers repetitively hibernate and resume for literally tens of
1233 +   thousands of cycles and developed scripts to stress the system and test
1234 +   TuxOnIce far beyond the point most of us (Nigel included!) would consider
1235 +   testing. His efforts have contributed as much to TuxOnIce as any of the
1236 +   names above.
1237 diff --git a/MAINTAINERS b/MAINTAINERS
1238 index ff24d01..251eb14 100644
1239 --- a/MAINTAINERS
1240 +++ b/MAINTAINERS
1241 @@ -4099,6 +4099,13 @@ P:       Maciej W. Rozycki
1242  M:     macro@linux-mips.org
1243  S:     Maintained
1244  
1245 +TUXONICE (ENHANCED HIBERNATION)
1246 +P:     Nigel Cunningham
1247 +M:     nigel@tuxonice.net
1248 +L:     suspend2-devel@tuxonice.net
1249 +W:     http://tuxonice.net
1250 +S:     Maintained
1251 +
1252  U14-34F SCSI DRIVER
1253  P:     Dario Ballabio
1254  M:     ballabio_dario@emc.com
1255 diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
1256 index 2001abd..78c6a4c 100644
1257 --- a/arch/powerpc/mm/pgtable_32.c
1258 +++ b/arch/powerpc/mm/pgtable_32.c
1259 @@ -397,6 +397,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
1260  
1261         change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
1262  }
1263 +EXPORT_SYMBOL_GPL(kernel_map_pages);
1264  #endif /* CONFIG_DEBUG_PAGEALLOC */
1265  
1266  static int fixmaps;
1267 diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
1268 index 724adfc..a270c9e 100644
1269 --- a/arch/x86/kernel/reboot.c
1270 +++ b/arch/x86/kernel/reboot.c
1271 @@ -502,6 +502,7 @@ void machine_restart(char *cmd)
1272  {
1273         machine_ops.restart(cmd);
1274  }
1275 +EXPORT_SYMBOL_GPL(machine_restart);
1276  
1277  void machine_halt(void)
1278  {
1279 diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
1280 index 43e2f84..5783f37 100644
1281 --- a/arch/x86/mm/pageattr.c
1282 +++ b/arch/x86/mm/pageattr.c
1283 @@ -1031,6 +1031,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
1284          */
1285         cpa_fill_pool(NULL);
1286  }
1287 +EXPORT_SYMBOL_GPL(kernel_map_pages);
1288  
1289  #ifdef CONFIG_DEBUG_FS
1290  static int dpa_show(struct seq_file *m, void *v)
1291 @@ -1084,7 +1085,7 @@ bool kernel_page_present(struct page *page)
1292         pte = lookup_address((unsigned long)page_address(page), &level);
1293         return (pte_val(*pte) & _PAGE_PRESENT);
1294  }
1295 -
1296 +EXPORT_SYMBOL_GPL(kernel_page_present);
1297  #endif /* CONFIG_HIBERNATION */
1298  
1299  #endif /* CONFIG_DEBUG_PAGEALLOC */
1300 diff --git a/arch/x86/power/cpu_64.c b/arch/x86/power/cpu_64.c
1301 index 66bdfb5..77aae90 100644
1302 --- a/arch/x86/power/cpu_64.c
1303 +++ b/arch/x86/power/cpu_64.c
1304 @@ -10,6 +10,7 @@
1305  
1306  #include <linux/smp.h>
1307  #include <linux/suspend.h>
1308 +#include <linux/module.h>
1309  #include <asm/proto.h>
1310  #include <asm/page.h>
1311  #include <asm/pgtable.h>
1312 @@ -75,6 +76,7 @@ void save_processor_state(void)
1313  {
1314         __save_processor_state(&saved_context);
1315  }
1316 +EXPORT_SYMBOL_GPL(save_processor_state);
1317  
1318  static void do_fpu_end(void)
1319  {
1320 diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c
1321 index 81197c6..ff7e534 100644
1322 --- a/arch/x86/power/hibernate_32.c
1323 +++ b/arch/x86/power/hibernate_32.c
1324 @@ -8,6 +8,7 @@
1325  
1326  #include <linux/suspend.h>
1327  #include <linux/bootmem.h>
1328 +#include <linux/module.h>
1329  
1330  #include <asm/system.h>
1331  #include <asm/page.h>
1332 @@ -163,6 +164,7 @@ int swsusp_arch_resume(void)
1333         restore_image();
1334         return 0;
1335  }
1336 +EXPORT_SYMBOL_GPL(swsusp_arch_resume);
1337  
1338  /*
1339   *     pfn_is_nosave - check if given pfn is in the 'nosave' section
1340 diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
1341 index 6dd000d..b42e72a 100644
1342 --- a/arch/x86/power/hibernate_64.c
1343 +++ b/arch/x86/power/hibernate_64.c
1344 @@ -10,6 +10,7 @@
1345  
1346  #include <linux/smp.h>
1347  #include <linux/suspend.h>
1348 +#include <linux/module.h>
1349  #include <asm/proto.h>
1350  #include <asm/page.h>
1351  #include <asm/pgtable.h>
1352 @@ -117,6 +118,7 @@ int swsusp_arch_resume(void)
1353         restore_image();
1354         return 0;
1355  }
1356 +EXPORT_SYMBOL_GPL(swsusp_arch_resume);
1357  
1358  /*
1359   *     pfn_is_nosave - check if given pfn is in the 'nosave' section
1360 @@ -167,3 +169,4 @@ int arch_hibernation_header_restore(void *addr)
1361         restore_cr3 = rdr->cr3;
1362         return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL;
1363  }
1364 +EXPORT_SYMBOL_GPL(arch_hibernation_header_restore);
1365 diff --git a/crypto/Kconfig b/crypto/Kconfig
1366 index d831859..f71cb00 100644
1367 --- a/crypto/Kconfig
1368 +++ b/crypto/Kconfig
1369 @@ -233,6 +233,14 @@ config CRYPTO_MD5
1370         help
1371           MD5 message digest algorithm (RFC1321).
1372  
1373 +config CRYPTO_LZF
1374 +       tristate "LZF compression algorithm"
1375 +       default y
1376 +       select CRYPTO_ALGAPI
1377 +       help
1378 +         This is the LZF algorithm. It is especially useful for TuxOnIce,
1379 +         because it achieves good compression quickly.
1380 +
1381  config CRYPTO_MICHAEL_MIC
1382         tristate "Michael MIC keyed digest algorithm"
1383         select CRYPTO_ALGAPI
1384 diff --git a/crypto/Makefile b/crypto/Makefile
1385 index d4f3ed8..fe05a9e 100644
1386 --- a/crypto/Makefile
1387 +++ b/crypto/Makefile
1388 @@ -67,6 +67,7 @@ obj-$(CONFIG_CRYPTO_SALSA20) += salsa20_generic.o
1389  obj-$(CONFIG_CRYPTO_DEFLATE) += deflate.o
1390  obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o
1391  obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o
1392 +obj-$(CONFIG_CRYPTO_LZF) += lzf.o
1393  obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o
1394  obj-$(CONFIG_CRYPTO_LZO) += lzo.o
1395  
1396 diff --git a/crypto/lzf.c b/crypto/lzf.c
1397 new file mode 100644
1398 index 0000000..ccaf83a
1399 --- /dev/null
1400 +++ b/crypto/lzf.c
1401 @@ -0,0 +1,326 @@
1402 +/*
1403 + * Cryptoapi LZF compression module.
1404 + *
1405 + * Copyright (c) 2004-2008 Nigel Cunningham <nigel at tuxonice net>
1406 + *
1407 + * based on the deflate.c file:
1408 + *
1409 + * Copyright (c) 2003 James Morris <jmorris@intercode.com.au>
1410 + *
1411 + * and upon the LZF compression module donated to the TuxOnIce project with
1412 + * the following copyright:
1413 + *
1414 + * This program is free software; you can redistribute it and/or modify it
1415 + * under the terms of the GNU General Public License as published by the Free
1416 + * Software Foundation; either version 2 of the License, or (at your option)
1417 + * any later version.
1418 + * Copyright (c) 2000-2003 Marc Alexander Lehmann <pcg@goof.com>
1419 + *
1420 + * Redistribution and use in source and binary forms, with or without modifica-
1421 + * tion, are permitted provided that the following conditions are met:
1422 + *
1423 + *   1.  Redistributions of source code must retain the above copyright notice,
1424 + *       this list of conditions and the following disclaimer.
1425 + *
1426 + *   2.  Redistributions in binary form must reproduce the above copyright
1427 + *       notice, this list of conditions and the following disclaimer in the
1428 + *       documentation and/or other materials provided with the distribution.
1429 + *
1430 + *   3.  The name of the author may not be used to endorse or promote products
1431 + *       derived from this software without specific prior written permission.
1432 + *
1433 + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
1434 + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
1435 + * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
1436 + * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
1437 + * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
1438 + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
1439 + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
1440 + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
1441 + * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
1442 + * OF THE POSSIBILITY OF SUCH DAMAGE.
1443 + *
1444 + * Alternatively, the contents of this file may be used under the terms of
1445 + * the GNU General Public License version 2 (the "GPL"), in which case the
1446 + * provisions of the GPL are applicable instead of the above. If you wish to
1447 + * allow the use of your version of this file only under the terms of the
1448 + * GPL and not to allow others to use your version of this file under the
1449 + * BSD license, indicate your decision by deleting the provisions above and
1450 + * replace them with the notice and other provisions required by the GPL. If
1451 + * you do not delete the provisions above, a recipient may use your version
1452 + * of this file under either the BSD or the GPL.
1453 + */
1454 +
1455 +#include <linux/kernel.h>
1456 +#include <linux/module.h>
1457 +#include <linux/init.h>
1458 +#include <linux/module.h>
1459 +#include <linux/crypto.h>
1460 +#include <linux/err.h>
1461 +#include <linux/vmalloc.h>
1462 +#include <linux/string.h>
1463 +
1464 +struct lzf_ctx {
1465 +       void *hbuf;
1466 +       unsigned int bufofs;
1467 +};
1468 +
1469 +/*
1470 + * size of hashtable is (1 << hlog) * sizeof (char *)
1471 + * decompression is independent of the hash table size
1472 + * the difference between 15 and 14 is very small
1473 + * for small blocks (and 14 is also faster).
1474 + * For a low-memory configuration, use hlog == 13;
1475 + * For best compression, use 15 or 16.
1476 + */
1477 +static const int hlog = 13;
1478 +
1479 +/*
1480 + * don't play with this unless you benchmark!
1481 + * decompression is not dependent on the hash function
1482 + * the hashing function might seem strange, just believe me
1483 + * it works ;)
1484 + */
1485 +static inline u16 first(const u8 *p)
1486 +{
1487 +       return ((p[0]) << 8) + p[1];
1488 +}
1489 +
1490 +static inline u16 next(u8 v, const u8 *p)
1491 +{
1492 +       return ((v) << 8) + p[2];
1493 +}
1494 +
1495 +static inline u32 idx(unsigned int h)
1496 +{
1497 +       return (((h ^ (h << 5)) >> (3*8 - hlog)) + h*3) & ((1 << hlog) - 1);
1498 +}
1499 +
1500 +/*
1501 + * IDX works because it is very similar to a multiplicative hash, e.g.
1502 + * (h * 57321 >> (3*8 - hlog))
1503 + * the next one is also quite good, albeit slow ;)
1504 + * (int)(cos(h & 0xffffff) * 1e6)
1505 + */
1506 +
1507 +static const int max_lit = (1 <<  5);
1508 +static const int max_off = (1 << 13);
1509 +static const int max_ref = ((1 <<  8) + (1 << 3));
1510 +
1511 +/*
1512 + * compressed format
1513 + *
1514 + * 000LLLLL <L+1>    ; literal
1515 + * LLLOOOOO oooooooo ; backref L
1516 + * 111OOOOO LLLLLLLL oooooooo ; backref L+7
1517 + *
1518 + */
1519 +
1520 +static void lzf_compress_exit(struct crypto_tfm *tfm)
1521 +{
1522 +       struct lzf_ctx *ctx = crypto_tfm_ctx(tfm);
1523 +
1524 +       if (!ctx->hbuf)
1525 +               return;
1526 +
1527 +       vfree(ctx->hbuf);
1528 +       ctx->hbuf = NULL;
1529 +}
1530 +
1531 +static int lzf_compress_init(struct crypto_tfm *tfm)
1532 +{
1533 +       struct lzf_ctx *ctx = crypto_tfm_ctx(tfm);
1534 +
1535 +       /* Get LZF ready to go */
1536 +       ctx->hbuf = vmalloc_32((1 << hlog) * sizeof(char *));
1537 +       if (ctx->hbuf)
1538 +               return 0;
1539 +
1540 +       printk(KERN_WARNING "Failed to allocate %ld bytes for lzf workspace\n",
1541 +                       (long) ((1 << hlog) * sizeof(char *)));
1542 +       return -ENOMEM;
1543 +}
1544 +
1545 +static int lzf_compress(struct crypto_tfm *tfm, const u8 *in_data,
1546 +               unsigned int in_len, u8 *out_data, unsigned int *out_len)
1547 +{
1548 +       struct lzf_ctx *ctx = crypto_tfm_ctx(tfm);
1549 +       const u8 **htab = ctx->hbuf;
1550 +       const u8 **hslot;
1551 +       const u8 *ip = in_data;
1552 +       u8 *op = out_data;
1553 +       const u8 *in_end = ip + in_len;
1554 +       u8 *out_end = op + *out_len - 3;
1555 +       const u8 *ref;
1556 +
1557 +       unsigned int hval = first(ip);
1558 +       unsigned long off;
1559 +       int lit = 0;
1560 +
1561 +       memset(htab, 0, sizeof(htab));
1562 +
1563 +       for (;;) {
1564 +               if (ip < in_end - 2) {
1565 +                       hval = next(hval, ip);
1566 +                       hslot = htab + idx(hval);
1567 +                       ref = *hslot;
1568 +                       *hslot = ip;
1569 +
1570 +                       off = ip - ref - 1;
1571 +                       if (off < max_off
1572 +                           && ip + 4 < in_end && ref > in_data
1573 +                           && *(u16 *) ref == *(u16 *) ip && ref[2] == ip[2]
1574 +                           ) {
1575 +                               /* match found at *ref++ */
1576 +                               unsigned int len = 2;
1577 +                               unsigned int maxlen = in_end - ip - len;
1578 +                               maxlen = maxlen > max_ref ? max_ref : maxlen;
1579 +
1580 +                               do {
1581 +                                       len++;
1582 +                               } while (len < maxlen && ref[len] == ip[len]);
1583 +
1584 +                               if (op + lit + 1 + 3 >= out_end) {
1585 +                                       *out_len = PAGE_SIZE;
1586 +                                       return 0;
1587 +                               }
1588 +
1589 +                               if (lit) {
1590 +                                       *op++ = lit - 1;
1591 +                                       lit = -lit;
1592 +                                       do {
1593 +                                               *op++ = ip[lit];
1594 +                                       } while (++lit);
1595 +                               }
1596 +
1597 +                               len -= 2;
1598 +                               ip++;
1599 +
1600 +                               if (len < 7) {
1601 +                                       *op++ = (off >> 8) + (len << 5);
1602 +                               } else {
1603 +                                       *op++ = (off >> 8) + (7 << 5);
1604 +                                       *op++ = len - 7;
1605 +                               }
1606 +
1607 +                               *op++ = off;
1608 +
1609 +                               ip += len;
1610 +                               hval = first(ip);
1611 +                               hval = next(hval, ip);
1612 +                               htab[idx(hval)] = ip;
1613 +                               ip++;
1614 +                               continue;
1615 +                       }
1616 +               } else if (ip == in_end)
1617 +                       break;
1618 +
1619 +               /* one more literal byte we must copy */
1620 +               lit++;
1621 +               ip++;
1622 +
1623 +               if (lit == max_lit) {
1624 +                       if (op + 1 + max_lit >= out_end) {
1625 +                               *out_len = PAGE_SIZE;
1626 +                               return 0;
1627 +                       }
1628 +
1629 +                       *op++ = max_lit - 1;
1630 +                       memcpy(op, ip - max_lit, max_lit);
1631 +                       op += max_lit;
1632 +                       lit = 0;
1633 +               }
1634 +       }
1635 +
1636 +       if (lit) {
1637 +               if (op + lit + 1 >= out_end) {
1638 +                       *out_len = PAGE_SIZE;
1639 +                       return 0;
1640 +               }
1641 +
1642 +               *op++ = lit - 1;
1643 +               lit = -lit;
1644 +               do {
1645 +                       *op++ = ip[lit];
1646 +               } while (++lit);
1647 +       }
1648 +
1649 +       *out_len = op - out_data;
1650 +       return 0;
1651 +}
1652 +
1653 +static int lzf_decompress(struct crypto_tfm *tfm, const u8 *src,
1654 +               unsigned int slen, u8 *dst, unsigned int *dlen)
1655 +{
1656 +       u8 const *ip = src;
1657 +       u8 *op = dst;
1658 +       u8 const *const in_end = ip + slen;
1659 +       u8 *const out_end = op + *dlen;
1660 +
1661 +       *dlen = PAGE_SIZE;
1662 +       do {
1663 +               unsigned int ctrl = *ip++;
1664 +
1665 +               if (ctrl < (1 << 5)) {
1666 +                       /* literal run */
1667 +                       ctrl++;
1668 +
1669 +                       if (op + ctrl > out_end)
1670 +                               return 0;
1671 +                       memcpy(op, ip, ctrl);
1672 +                       op += ctrl;
1673 +                       ip += ctrl;
1674 +               } else {        /* back reference */
1675 +
1676 +                       unsigned int len = ctrl >> 5;
1677 +
1678 +                       u8 *ref = op - ((ctrl & 0x1f) << 8) - 1;
1679 +
1680 +                       if (len == 7)
1681 +                               len += *ip++;
1682 +
1683 +                       ref -= *ip++;
1684 +                       len += 2;
1685 +
1686 +                       if (op + len > out_end || ref < (u8 *) dst)
1687 +                               return 0;
1688 +
1689 +                       do {
1690 +                               *op++ = *ref++;
1691 +                       } while (--len);
1692 +               }
1693 +       } while (op < out_end && ip < in_end);
1694 +
1695 +       *dlen = op - (u8 *) dst;
1696 +       return 0;
1697 +}
1698 +
1699 +static struct crypto_alg alg = {
1700 +       .cra_name = "lzf",
1701 +       .cra_flags = CRYPTO_ALG_TYPE_COMPRESS,
1702 +       .cra_ctxsize = sizeof(struct lzf_ctx),
1703 +       .cra_module = THIS_MODULE,
1704 +       .cra_list = LIST_HEAD_INIT(alg.cra_list),
1705 +       .cra_init = lzf_compress_init,
1706 +       .cra_exit = lzf_compress_exit,
1707 +       .cra_u = { .compress = {
1708 +       .coa_compress = lzf_compress,
1709 +       .coa_decompress = lzf_decompress } }
1710 +};
1711 +
1712 +static int __init init(void)
1713 +{
1714 +       return crypto_register_alg(&alg);
1715 +}
1716 +
1717 +static void __exit fini(void)
1718 +{
1719 +       crypto_unregister_alg(&alg);
1720 +}
1721 +
1722 +module_init(init);
1723 +module_exit(fini);
1724 +
1725 +MODULE_LICENSE("GPL");
1726 +MODULE_DESCRIPTION("LZF Compression Algorithm");
1727 +MODULE_AUTHOR("Marc Alexander Lehmann & Nigel Cunningham");
1728 diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
1729 index 273a944..aee84df 100644
1730 --- a/drivers/base/power/main.c
1731 +++ b/drivers/base/power/main.c
1732 @@ -54,6 +54,7 @@ void device_pm_lock(void)
1733  {
1734         mutex_lock(&dpm_list_mtx);
1735  }
1736 +EXPORT_SYMBOL_GPL(device_pm_lock);
1737  
1738  /**
1739   *     device_pm_unlock - unlock the list of active devices used by the PM core
1740 @@ -62,6 +63,7 @@ void device_pm_unlock(void)
1741  {
1742         mutex_unlock(&dpm_list_mtx);
1743  }
1744 +EXPORT_SYMBOL_GPL(device_pm_unlock);
1745  
1746  /**
1747   *     device_pm_add - add a device to the list of active devices
1748 diff --git a/drivers/char/vt.c b/drivers/char/vt.c
1749 index d429499..9603d96 100644
1750 --- a/drivers/char/vt.c
1751 +++ b/drivers/char/vt.c
1752 @@ -187,6 +187,7 @@ int fg_console;
1753  int last_console;
1754  int want_console = -1;
1755  int kmsg_redirect;
1756 +EXPORT_SYMBOL_GPL(kmsg_redirect);
1757  
1758  /*
1759   * For each existing display, we have a pointer to console currently visible
1760 diff --git a/drivers/md/md.c b/drivers/md/md.c
1761 index fe6eccd..3918848 100644
1762 --- a/drivers/md/md.c
1763 +++ b/drivers/md/md.c
1764 @@ -5593,7 +5593,6 @@ void md_done_sync(mddev_t *mddev, int blocks, int ok)
1765         }
1766  }
1767  
1768 -
1769  /* md_write_start(mddev, bi)
1770   * If we need to update some array metadata (e.g. 'active' flag
1771   * in superblock) before writing, schedule a superblock update
1772 @@ -5738,6 +5737,9 @@ void md_do_sync(mddev_t *mddev)
1773                 mddev->curr_resync = 2;
1774  
1775         try_again:
1776 +               while (freezer_is_on())
1777 +                       yield();
1778 +
1779                 if (kthread_should_stop()) {
1780                         set_bit(MD_RECOVERY_INTR, &mddev->recovery);
1781                         goto skip;
1782 @@ -5759,6 +5761,10 @@ void md_do_sync(mddev_t *mddev)
1783                                          * time 'round when curr_resync == 2
1784                                          */
1785                                         continue;
1786 +
1787 +                               while (freezer_is_on())
1788 +                                       yield();
1789 +
1790                                 /* We need to wait 'interruptible' so as not to
1791                                  * contribute to the load average, and not to
1792                                  * be caught by 'softlockup'
1793 @@ -5771,6 +5777,7 @@ void md_do_sync(mddev_t *mddev)
1794                                                " share one or more physical units)\n",
1795                                                desc, mdname(mddev), mdname(mddev2));
1796                                         mddev_put(mddev2);
1797 +                                       try_to_freeze();
1798                                         if (signal_pending(current))
1799                                                 flush_signals(current);
1800                                         schedule();
1801 @@ -5854,6 +5861,10 @@ void md_do_sync(mddev_t *mddev)
1802                                    mddev->resync_max > j
1803                                    || kthread_should_stop());
1804                 }
1805 +
1806 +               while (freezer_is_on())
1807 +                       yield();
1808 +
1809                 if (kthread_should_stop())
1810                         goto interrupted;
1811                 sectors = mddev->pers->sync_request(mddev, j, &skipped,
1812 @@ -5897,6 +5908,9 @@ void md_do_sync(mddev_t *mddev)
1813                         last_mark = next;
1814                 }
1815  
1816 +               while (freezer_is_on())
1817 +                       yield();
1818 +
1819  
1820                 if (kthread_should_stop())
1821                         goto interrupted;
1822 diff --git a/fs/buffer.c b/fs/buffer.c
1823 index ac78d4c..7ae191b 100644
1824 --- a/fs/buffer.c
1825 +++ b/fs/buffer.c
1826 @@ -247,6 +247,93 @@ void thaw_bdev(struct block_device *bdev, struct super_block *sb)
1827  }
1828  EXPORT_SYMBOL(thaw_bdev);
1829  
1830 +#ifdef CONFIG_FS_FREEZER_DEBUG
1831 +#define FS_PRINTK(fmt, args...) printk(fmt, ## args)
1832 +#else
1833 +#define FS_PRINTK(fmt, args...)
1834 +#endif
1835 +
1836 +/* #define DEBUG_FS_FREEZING */
1837 +
1838 +/**
1839 + * freeze_filesystems - lock all filesystems and force them into a consistent
1840 + * state
1841 + * @which:     What combination of fuse & non-fuse to freeze.
1842 + */
1843 +void freeze_filesystems(int which)
1844 +{
1845 +       struct super_block *sb;
1846 +
1847 +       lockdep_off();
1848 +
1849 +       /*
1850 +        * Freeze in reverse order so filesystems dependant upon others are
1851 +        * frozen in the right order (eg. loopback on ext3).
1852 +        */
1853 +       list_for_each_entry_reverse(sb, &super_blocks, s_list) {
1854 +               FS_PRINTK(KERN_INFO "Considering %s.%s: (root %p, bdev %x)",
1855 +                       sb->s_type->name ? sb->s_type->name : "?",
1856 +                       sb->s_subtype ? sb->s_subtype : "", sb->s_root,
1857 +                       sb->s_bdev ? sb->s_bdev->bd_dev : 0);
1858 +
1859 +               if (sb->s_type->fs_flags & FS_IS_FUSE &&
1860 +                   sb->s_frozen == SB_UNFROZEN &&
1861 +                   which & FS_FREEZER_FUSE) {
1862 +                       sb->s_frozen = SB_FREEZE_TRANS;
1863 +                       sb->s_flags |= MS_FROZEN;
1864 +                       FS_PRINTK("Fuse filesystem done.\n");
1865 +                       continue;
1866 +               }
1867 +
1868 +               if (!sb->s_root || !sb->s_bdev ||
1869 +                   (sb->s_frozen == SB_FREEZE_TRANS) ||
1870 +                   (sb->s_flags & MS_RDONLY) ||
1871 +                   (sb->s_flags & MS_FROZEN) ||
1872 +                   !(which & FS_FREEZER_NORMAL)) {
1873 +                       FS_PRINTK(KERN_INFO "Nope.\n");
1874 +                       continue;
1875 +               }
1876 +
1877 +               FS_PRINTK(KERN_INFO "Freezing %x... ", sb->s_bdev->bd_dev);
1878 +               freeze_bdev(sb->s_bdev);
1879 +               sb->s_flags |= MS_FROZEN;
1880 +               FS_PRINTK(KERN_INFO "Done.\n");
1881 +       }
1882 +
1883 +       lockdep_on();
1884 +}
1885 +
1886 +/**
1887 + * thaw_filesystems - unlock all filesystems
1888 + * @which:     What combination of fuse & non-fuse to thaw.
1889 + */
1890 +void thaw_filesystems(int which)
1891 +{
1892 +       struct super_block *sb;
1893 +
1894 +       lockdep_off();
1895 +
1896 +       list_for_each_entry(sb, &super_blocks, s_list) {
1897 +               if (!(sb->s_flags & MS_FROZEN))
1898 +                       continue;
1899 +
1900 +               if (sb->s_type->fs_flags & FS_IS_FUSE) {
1901 +                       if (!(which & FS_FREEZER_FUSE))
1902 +                               continue;
1903 +
1904 +                       sb->s_frozen = SB_UNFROZEN;
1905 +               } else {
1906 +                       if (!(which & FS_FREEZER_NORMAL))
1907 +                               continue;
1908 +
1909 +                       thaw_bdev(sb->s_bdev, sb);
1910 +               }
1911 +               sb->s_flags &= ~MS_FROZEN;
1912 +       }
1913 +
1914 +       lockdep_on();
1915 +}
1916 +
1917  /*
1918   * Various filesystems appear to want __find_get_block to be non-blocking.
1919   * But it's the page lock which protects the buffers.  To get around this,
1920 diff --git a/fs/drop_caches.c b/fs/drop_caches.c
1921 index 3e5637f..f3c5cd6 100644
1922 --- a/fs/drop_caches.c
1923 +++ b/fs/drop_caches.c
1924 @@ -8,6 +8,7 @@
1925  #include <linux/writeback.h>
1926  #include <linux/sysctl.h>
1927  #include <linux/gfp.h>
1928 +#include <linux/module.h>
1929  
1930  /* A global variable is a bit ugly, but it keeps the code simple */
1931  int sysctl_drop_caches;
1932 @@ -33,7 +34,7 @@ static void drop_pagecache_sb(struct super_block *sb)
1933         iput(toput_inode);
1934  }
1935  
1936 -static void drop_pagecache(void)
1937 +void drop_pagecache(void)
1938  {
1939         struct super_block *sb;
1940  
1941 @@ -61,6 +62,7 @@ static void drop_slab(void)
1942                 nr_objects = shrink_slab(1000, GFP_KERNEL, 1000);
1943         } while (nr_objects > 10);
1944  }
1945 +EXPORT_SYMBOL_GPL(drop_pagecache);
1946  
1947  int drop_caches_sysctl_handler(ctl_table *table, int write,
1948         struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
1949 diff --git a/fs/fuse/control.c b/fs/fuse/control.c
1950 index 4f3cab3..f15b0c5 100644
1951 --- a/fs/fuse/control.c
1952 +++ b/fs/fuse/control.c
1953 @@ -207,6 +207,7 @@ static void fuse_ctl_kill_sb(struct super_block *sb)
1954  static struct file_system_type fuse_ctl_fs_type = {
1955         .owner          = THIS_MODULE,
1956         .name           = "fusectl",
1957 +       .fs_flags       = FS_IS_FUSE,
1958         .get_sb         = fuse_ctl_get_sb,
1959         .kill_sb        = fuse_ctl_kill_sb,
1960  };
1961 diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
1962 index 87250b6..7246e3d 100644
1963 --- a/fs/fuse/dev.c
1964 +++ b/fs/fuse/dev.c
1965 @@ -7,6 +7,7 @@
1966  */
1967  
1968  #include "fuse_i.h"
1969 +#include "fuse.h"
1970  
1971  #include <linux/init.h>
1972  #include <linux/module.h>
1973 @@ -16,6 +17,7 @@
1974  #include <linux/pagemap.h>
1975  #include <linux/file.h>
1976  #include <linux/slab.h>
1977 +#include <linux/freezer.h>
1978  
1979  MODULE_ALIAS_MISCDEV(FUSE_MINOR);
1980  
1981 @@ -743,6 +745,8 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
1982         if (!fc)
1983                 return -EPERM;
1984  
1985 +       FUSE_MIGHT_FREEZE(file->f_mapping->host->i_sb, "fuse_dev_read");
1986 +
1987   restart:
1988         spin_lock(&fc->lock);
1989         err = -EAGAIN;
1990 @@ -869,6 +873,9 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
1991         if (!fc)
1992                 return -EPERM;
1993  
1994 +       FUSE_MIGHT_FREEZE(iocb->ki_filp->f_mapping->host->i_sb,
1995 +                       "fuse_dev_write");
1996 +
1997         fuse_copy_init(&cs, fc, 0, NULL, iov, nr_segs);
1998         if (nbytes < sizeof(struct fuse_out_header))
1999                 return -EINVAL;
2000 diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
2001 index fd03330..85fec3a 100644
2002 --- a/fs/fuse/dir.c
2003 +++ b/fs/fuse/dir.c
2004 @@ -7,12 +7,14 @@
2005  */
2006  
2007  #include "fuse_i.h"
2008 +#include "fuse.h"
2009  
2010  #include <linux/pagemap.h>
2011  #include <linux/file.h>
2012  #include <linux/gfp.h>
2013  #include <linux/sched.h>
2014  #include <linux/namei.h>
2015 +#include <linux/freezer.h>
2016  
2017  #if BITS_PER_LONG >= 64
2018  static inline void fuse_dentry_settime(struct dentry *entry, u64 time)
2019 @@ -174,6 +176,9 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
2020                         return 0;
2021  
2022                 fc = get_fuse_conn(inode);
2023 +
2024 +               FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_dentry_revalidate");
2025 +
2026                 req = fuse_get_req(fc);
2027                 if (IS_ERR(req))
2028                         return 0;
2029 @@ -268,6 +273,8 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
2030         if (name->len > FUSE_NAME_MAX)
2031                 goto out;
2032  
2033 +       FUSE_MIGHT_FREEZE(sb, "fuse_lookup_name");
2034 +
2035         req = fuse_get_req(fc);
2036         err = PTR_ERR(req);
2037         if (IS_ERR(req))
2038 @@ -331,6 +338,8 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
2039         if (err)
2040                 goto out_err;
2041  
2042 +       FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_lookup");
2043 +
2044         err = -EIO;
2045         if (inode && get_node_id(inode) == FUSE_ROOT_ID)
2046                 goto out_iput;
2047 @@ -402,6 +411,8 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
2048         if (IS_ERR(forget_req))
2049                 return PTR_ERR(forget_req);
2050  
2051 +       FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_create_open");
2052 +
2053         req = fuse_get_req(fc);
2054         err = PTR_ERR(req);
2055         if (IS_ERR(req))
2056 @@ -488,6 +499,8 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
2057         int err;
2058         struct fuse_req *forget_req;
2059  
2060 +       FUSE_MIGHT_FREEZE(dir->i_sb, "create_new_entry");
2061 +
2062         forget_req = fuse_get_req(fc);
2063         if (IS_ERR(forget_req)) {
2064                 fuse_put_request(fc, req);
2065 @@ -585,7 +598,11 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, int mode)
2066  {
2067         struct fuse_mkdir_in inarg;
2068         struct fuse_conn *fc = get_fuse_conn(dir);
2069 -       struct fuse_req *req = fuse_get_req(fc);
2070 +       struct fuse_req *req;
2071 +
2072 +       FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_mkdir");
2073 +
2074 +       req = fuse_get_req(fc);
2075         if (IS_ERR(req))
2076                 return PTR_ERR(req);
2077  
2078 @@ -605,7 +622,11 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry,
2079  {
2080         struct fuse_conn *fc = get_fuse_conn(dir);
2081         unsigned len = strlen(link) + 1;
2082 -       struct fuse_req *req = fuse_get_req(fc);
2083 +       struct fuse_req *req;
2084 +
2085 +       FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_symlink");
2086 +
2087 +       req = fuse_get_req(fc);
2088         if (IS_ERR(req))
2089                 return PTR_ERR(req);
2090  
2091 @@ -622,7 +643,11 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
2092  {
2093         int err;
2094         struct fuse_conn *fc = get_fuse_conn(dir);
2095 -       struct fuse_req *req = fuse_get_req(fc);
2096 +       struct fuse_req *req;
2097 +
2098 +       FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_unlink");
2099 +
2100 +       req = fuse_get_req(fc);
2101         if (IS_ERR(req))
2102                 return PTR_ERR(req);
2103  
2104 @@ -653,7 +678,11 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
2105  {
2106         int err;
2107         struct fuse_conn *fc = get_fuse_conn(dir);
2108 -       struct fuse_req *req = fuse_get_req(fc);
2109 +       struct fuse_req *req;
2110 +
2111 +       FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_rmdir");
2112 +
2113 +       req = fuse_get_req(fc);
2114         if (IS_ERR(req))
2115                 return PTR_ERR(req);
2116  
2117 diff --git a/fs/fuse/file.c b/fs/fuse/file.c
2118 index 2bada6b..a2081c4 100644
2119 --- a/fs/fuse/file.c
2120 +++ b/fs/fuse/file.c
2121 @@ -7,11 +7,13 @@
2122  */
2123  
2124  #include "fuse_i.h"
2125 +#include "fuse.h"
2126  
2127  #include <linux/pagemap.h>
2128  #include <linux/slab.h>
2129  #include <linux/kernel.h>
2130  #include <linux/sched.h>
2131 +#include <linux/freezer.h>
2132  
2133  static const struct file_operations fuse_direct_io_file_operations;
2134  
2135 @@ -23,6 +25,8 @@ static int fuse_send_open(struct inode *inode, struct file *file, int isdir,
2136         struct fuse_req *req;
2137         int err;
2138  
2139 +       FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_send_open");
2140 +
2141         req = fuse_get_req(fc);
2142         if (IS_ERR(req))
2143                 return PTR_ERR(req);
2144 @@ -268,6 +272,8 @@ static int fuse_flush(struct file *file, fl_owner_t id)
2145         if (fc->no_flush)
2146                 return 0;
2147  
2148 +       FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_flush");
2149 +
2150         req = fuse_get_req_nofail(fc, file);
2151         memset(&inarg, 0, sizeof(inarg));
2152         inarg.fh = ff->fh;
2153 @@ -319,6 +325,8 @@ int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
2154         if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
2155                 return 0;
2156  
2157 +       FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_fsync_common");
2158 +
2159         /*
2160          * Start writeback against all dirty pages of the inode, then
2161          * wait for all outstanding writes, before sending the FSYNC
2162 @@ -427,6 +435,8 @@ static int fuse_readpage(struct file *file, struct page *page)
2163         if (is_bad_inode(inode))
2164                 goto out;
2165  
2166 +       FUSE_MIGHT_FREEZE(file->f_mapping->host->i_sb, "fuse_readpage");
2167 +
2168         /*
2169          * Page writeback can extend beyond the liftime of the
2170          * page-cache page, so make sure we read a properly synced
2171 @@ -527,6 +537,9 @@ static int fuse_readpages_fill(void *_data, struct page *page)
2172         struct inode *inode = data->inode;
2173         struct fuse_conn *fc = get_fuse_conn(inode);
2174  
2175 +       FUSE_MIGHT_FREEZE(data->file->f_mapping->host->i_sb,
2176 +                       "fuse_readpages_fill");
2177 +
2178         fuse_wait_on_page_writeback(inode, page->index);
2179  
2180         if (req->num_pages &&
2181 @@ -557,6 +570,8 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
2182         if (is_bad_inode(inode))
2183                 goto out;
2184  
2185 +       FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_readpages");
2186 +
2187         data.file = file;
2188         data.inode = inode;
2189         data.req = fuse_get_req(fc);
2190 @@ -674,6 +689,8 @@ static int fuse_buffered_write(struct file *file, struct inode *inode,
2191         if (is_bad_inode(inode))
2192                 return -EIO;
2193  
2194 +       FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_buffered_write");
2195 +
2196         /*
2197          * Make sure writepages on the same page are not mixed up with
2198          * plain writes.
2199 @@ -828,6 +845,8 @@ static ssize_t fuse_perform_write(struct file *file,
2200                 struct fuse_req *req;
2201                 ssize_t count;
2202  
2203 +               FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_perform_write");
2204 +
2205                 req = fuse_get_req(fc);
2206                 if (IS_ERR(req)) {
2207                         err = PTR_ERR(req);
2208 @@ -962,6 +981,8 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
2209         if (is_bad_inode(inode))
2210                 return -EIO;
2211  
2212 +       FUSE_MIGHT_FREEZE(file->f_mapping->host->i_sb, "fuse_direct_io");
2213 +
2214         req = fuse_get_req(fc);
2215         if (IS_ERR(req))
2216                 return PTR_ERR(req);
2217 @@ -1315,6 +1336,8 @@ static int fuse_getlk(struct file *file, struct file_lock *fl)
2218         struct fuse_lk_out outarg;
2219         int err;
2220  
2221 +       FUSE_MIGHT_FREEZE(file->f_mapping->host->i_sb, "fuse_getlk");
2222 +
2223         req = fuse_get_req(fc);
2224         if (IS_ERR(req))
2225                 return PTR_ERR(req);
2226 @@ -1350,6 +1373,8 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
2227         if (fl->fl_flags & FL_CLOSE)
2228                 return 0;
2229  
2230 +       FUSE_MIGHT_FREEZE(file->f_mapping->host->i_sb, "fuse_setlk");
2231 +
2232         req = fuse_get_req(fc);
2233         if (IS_ERR(req))
2234                 return PTR_ERR(req);
2235 @@ -1416,6 +1441,8 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
2236         if (!inode->i_sb->s_bdev || fc->no_bmap)
2237                 return 0;
2238  
2239 +       FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_bmap");
2240 +
2241         req = fuse_get_req(fc);
2242         if (IS_ERR(req))
2243                 return 0;
2244 diff --git a/fs/fuse/fuse.h b/fs/fuse/fuse.h
2245 new file mode 100644
2246 index 0000000..170e49a
2247 --- /dev/null
2248 +++ b/fs/fuse/fuse.h
2249 @@ -0,0 +1,13 @@
2250 +#define FUSE_MIGHT_FREEZE(superblock, desc) \
2251 +do { \
2252 +       int printed = 0; \
2253 +       while (superblock->s_frozen != SB_UNFROZEN) { \
2254 +               if (!printed) { \
2255 +                       printk(KERN_INFO "%d frozen in " desc ".\n", \
2256 +                                               current->pid); \
2257 +                       printed = 1; \
2258 +               } \
2259 +               try_to_freeze(); \
2260 +               yield(); \
2261 +       } \
2262 +} while (0)
2263 diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
2264 index d2249f1..04ae6cb 100644
2265 --- a/fs/fuse/inode.c
2266 +++ b/fs/fuse/inode.c
2267 @@ -914,7 +914,7 @@ static int fuse_get_sb(struct file_system_type *fs_type,
2268  static struct file_system_type fuse_fs_type = {
2269         .owner          = THIS_MODULE,
2270         .name           = "fuse",
2271 -       .fs_flags       = FS_HAS_SUBTYPE,
2272 +       .fs_flags       = FS_HAS_SUBTYPE | FS_IS_FUSE,
2273         .get_sb         = fuse_get_sb,
2274         .kill_sb        = kill_anon_super,
2275  };
2276 @@ -933,7 +933,7 @@ static struct file_system_type fuseblk_fs_type = {
2277         .name           = "fuseblk",
2278         .get_sb         = fuse_get_sb_blk,
2279         .kill_sb        = kill_block_super,
2280 -       .fs_flags       = FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
2281 +       .fs_flags       = FS_REQUIRES_DEV | FS_HAS_SUBTYPE | FS_IS_FUSE,
2282  };
2283  
2284  static inline int register_fuseblk(void)
2285 diff --git a/fs/namei.c b/fs/namei.c
2286 index 4ea63ed..65be6a6 100644
2287 --- a/fs/namei.c
2288 +++ b/fs/namei.c
2289 @@ -2223,6 +2223,8 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
2290         if (!dir->i_op || !dir->i_op->unlink)
2291                 return -EPERM;
2292  
2293 +       vfs_check_frozen(dir->i_sb, SB_FREEZE_WRITE);
2294 +
2295         DQUOT_INIT(dir);
2296  
2297         mutex_lock(&dentry->d_inode->i_mutex);
2298 diff --git a/fs/super.c b/fs/super.c
2299 index e931ae9..70145e2 100644
2300 --- a/fs/super.c
2301 +++ b/fs/super.c
2302 @@ -43,6 +43,8 @@
2303  
2304  
2305  LIST_HEAD(super_blocks);
2306 +EXPORT_SYMBOL_GPL(super_blocks);
2307 +
2308  DEFINE_SPINLOCK(sb_lock);
2309  
2310  /**
2311 diff --git a/include/linux/Kbuild b/include/linux/Kbuild
2312 index b68ec09..6eebd34 100644
2313 --- a/include/linux/Kbuild
2314 +++ b/include/linux/Kbuild
2315 @@ -208,6 +208,7 @@ unifdef-y += filter.h
2316  unifdef-y += flat.h
2317  unifdef-y += futex.h
2318  unifdef-y += fs.h
2319 +unifdef-y += freezer.h
2320  unifdef-y += gameport.h
2321  unifdef-y += generic_serial.h
2322  unifdef-y += hayesesp.h
2323 diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
2324 index eadaab4..7eb6655 100644
2325 --- a/include/linux/buffer_head.h
2326 +++ b/include/linux/buffer_head.h
2327 @@ -171,6 +171,11 @@ wait_queue_head_t *bh_waitq_head(struct buffer_head *bh);
2328  int fsync_bdev(struct block_device *);
2329  struct super_block *freeze_bdev(struct block_device *);
2330  void thaw_bdev(struct block_device *, struct super_block *);
2331 +#define FS_FREEZER_FUSE 1
2332 +#define FS_FREEZER_NORMAL 2
2333 +#define FS_FREEZER_ALL (FS_FREEZER_FUSE | FS_FREEZER_NORMAL)
2334 +void freeze_filesystems(int which);
2335 +void thaw_filesystems(int which);
2336  int fsync_super(struct super_block *);
2337  int fsync_no_super(struct block_device *);
2338  struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block,
2339 diff --git a/include/linux/freezer.h b/include/linux/freezer.h
2340 index deddeed..8c2dadf 100644
2341 --- a/include/linux/freezer.h
2342 +++ b/include/linux/freezer.h
2343 @@ -127,6 +127,23 @@ static inline void set_freezable(void)
2344         current->flags &= ~PF_NOFREEZE;
2345  }
2346  
2347 +#ifdef CONFIG_PM_SLEEP
2348 +extern int freezer_state;
2349 +#define FREEZER_OFF 0
2350 +#define FREEZER_FILESYSTEMS_FROZEN 1
2351 +#define FREEZER_USERSPACE_FROZEN 2
2352 +#define FREEZER_FULLY_ON 3
2353 +
2354 +static inline int freezer_is_on(void)
2355 +{
2356 +       return freezer_state == FREEZER_FULLY_ON;
2357 +}
2358 +#else
2359 +static inline int freezer_is_on(void) { return 0; }
2360 +#endif
2361 +
2362 +extern void thaw_kernel_threads(void);
2363 +
2364  /*
2365   * Tell the freezer that the current task should be frozen by it and that it
2366   * should send a fake signal to the task to freeze it.
2367 @@ -178,6 +195,8 @@ static inline int freeze_processes(void) { BUG(); return 0; }
2368  static inline void thaw_processes(void) {}
2369  
2370  static inline int try_to_freeze(void) { return 0; }
2371 +static inline int freezer_is_on(void) { return 0; }
2372 +static inline void thaw_kernel_threads(void) { }
2373  
2374  static inline void freezer_do_not_count(void) {}
2375  static inline void freezer_count(void) {}
2376 diff --git a/include/linux/fs.h b/include/linux/fs.h
2377 index 580b513..e7a3169 100644
2378 --- a/include/linux/fs.h
2379 +++ b/include/linux/fs.h
2380 @@ -8,6 +8,7 @@
2381  
2382  #include <linux/limits.h>
2383  #include <linux/ioctl.h>
2384 +#include <linux/freezer.h>
2385  
2386  /*
2387   * It's silly to have NR_OPEN bigger than NR_FILE, but you can change
2388 @@ -96,6 +97,7 @@ extern int dir_notify_enable;
2389  #define FS_REQUIRES_DEV 1 
2390  #define FS_BINARY_MOUNTDATA 2
2391  #define FS_HAS_SUBTYPE 4
2392 +#define FS_IS_FUSE     8       /* Fuse filesystem - bdev freeze these too */
2393  #define FS_REVAL_DOT   16384   /* Check the paths ".", ".." for staleness */
2394  #define FS_RENAME_DOES_D_MOVE  32768   /* FS will handle d_move()
2395                                          * during rename() internally.
2396 @@ -128,6 +130,7 @@ extern int dir_notify_enable;
2397  #define MS_RELATIME    (1<<21) /* Update atime relative to mtime/ctime. */
2398  #define MS_KERNMOUNT   (1<<22) /* this is a kern_mount call */
2399  #define MS_I_VERSION   (1<<23) /* Update inode I_version field */
2400 +#define MS_FROZEN      (1<<24) /* Frozen by freeze_filesystems() */
2401  #define MS_ACTIVE      (1<<30)
2402  #define MS_NOUSER      (1<<31)
2403  
2404 @@ -1141,8 +1144,11 @@ enum {
2405         SB_FREEZE_TRANS = 2,
2406  };
2407  
2408 -#define vfs_check_frozen(sb, level) \
2409 -       wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level)))
2410 +#define vfs_check_frozen(sb, level) do { \
2411 +       freezer_do_not_count(); \
2412 +       wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level))); \
2413 +       freezer_count(); \
2414 +} while (0)
2415  
2416  #define get_fs_excl() atomic_inc(&current->fs_excl)
2417  #define put_fs_excl() atomic_dec(&current->fs_excl)
2418 diff --git a/include/linux/mm.h b/include/linux/mm.h
2419 index 72a15dc..01a7657 100644
2420 --- a/include/linux/mm.h
2421 +++ b/include/linux/mm.h
2422 @@ -1264,6 +1264,7 @@ int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *,
2423                                         void __user *, size_t *, loff_t *);
2424  unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
2425                         unsigned long lru_pages);
2426 +void drop_pagecache(void);
2427  
2428  #ifndef CONFIG_MMU
2429  #define randomize_va_space 0
2430 diff --git a/include/linux/netlink.h b/include/linux/netlink.h
2431 index 9ff1b54..100dc2e 100644
2432 --- a/include/linux/netlink.h
2433 +++ b/include/linux/netlink.h
2434 @@ -24,6 +24,8 @@
2435  /* leave room for NETLINK_DM (DM Events) */
2436  #define NETLINK_SCSITRANSPORT  18      /* SCSI Transports */
2437  #define NETLINK_ECRYPTFS       19
2438 +#define NETLINK_TOI_USERUI     20      /* TuxOnIce's userui */
2439 +#define NETLINK_TOI_USM                21      /* Userspace storage manager */
2440  
2441  #define MAX_LINKS 32           
2442  
2443 diff --git a/include/linux/suspend.h b/include/linux/suspend.h
2444 index 2ce8207..f469faf 100644
2445 --- a/include/linux/suspend.h
2446 +++ b/include/linux/suspend.h
2447 @@ -280,4 +280,70 @@ static inline void register_nosave_region_late(unsigned long b, unsigned long e)
2448  
2449  extern struct mutex pm_mutex;
2450  
2451 +enum {
2452 +       TOI_CAN_HIBERNATE,
2453 +       TOI_CAN_RESUME,
2454 +       TOI_RESUME_DEVICE_OK,
2455 +       TOI_NORESUME_SPECIFIED,
2456 +       TOI_SANITY_CHECK_PROMPT,
2457 +       TOI_CONTINUE_REQ,
2458 +       TOI_RESUMED_BEFORE,
2459 +       TOI_BOOT_TIME,
2460 +       TOI_NOW_RESUMING,
2461 +       TOI_IGNORE_LOGLEVEL,
2462 +       TOI_TRYING_TO_RESUME,
2463 +       TOI_LOADING_ALT_IMAGE,
2464 +       TOI_STOP_RESUME,
2465 +       TOI_IO_STOPPED,
2466 +       TOI_NOTIFIERS_PREPARE,
2467 +       TOI_CLUSTER_MODE,
2468 +       TOI_BOOT_KERNEL,
2469 +};
2470 +
2471 +#ifdef CONFIG_TOI
2472 +
2473 +/* Used in init dir files */
2474 +extern unsigned long toi_state;
2475 +#define set_toi_state(bit) (set_bit(bit, &toi_state))
2476 +#define clear_toi_state(bit) (clear_bit(bit, &toi_state))
2477 +#define test_toi_state(bit) (test_bit(bit, &toi_state))
2478 +extern int toi_running;
2479 +
2480 +#define test_action_state(bit) (test_bit(bit, &toi_bkd.toi_action))
2481 +extern int toi_try_hibernate(void);
2482 +
2483 +#else /* !CONFIG_TOI */
2484 +
2485 +#define toi_state              (0)
2486 +#define set_toi_state(bit) do { } while (0)
2487 +#define clear_toi_state(bit) do { } while (0)
2488 +#define test_toi_state(bit) (0)
2489 +#define toi_running (0)
2490 +
2491 +static inline int toi_try_hibernate(void) { return 0; }
2492 +#define test_action_state(bit) (0)
2493 +
2494 +#endif /* CONFIG_TOI */
2495 +
2496 +#ifdef CONFIG_HIBERNATION
2497 +#ifdef CONFIG_TOI
2498 +extern void toi_try_resume(void);
2499 +#else
2500 +#define toi_try_resume() do { } while (0)
2501 +#endif
2502 +
2503 +extern int resume_attempted;
2504 +extern int software_resume(void);
2505 +
2506 +static inline void check_resume_attempted(void)
2507 +{
2508 +       if (resume_attempted)
2509 +               return;
2510 +
2511 +       software_resume();
2512 +}
2513 +#else
2514 +#define check_resume_attempted() do { } while (0)
2515 +#define resume_attempted (0)
2516 +#endif
2517  #endif /* _LINUX_SUSPEND_H */
2518 diff --git a/include/linux/swap.h b/include/linux/swap.h
2519 index de40f16..661d8d5 100644
2520 --- a/include/linux/swap.h
2521 +++ b/include/linux/swap.h
2522 @@ -164,6 +164,7 @@ extern unsigned long totalram_pages;
2523  extern unsigned long totalreserve_pages;
2524  extern long nr_swap_pages;
2525  extern unsigned int nr_free_buffer_pages(void);
2526 +extern unsigned int nr_unallocated_buffer_pages(void);
2527  extern unsigned int nr_free_pagecache_pages(void);
2528  
2529  /* Definition of global_page_state not available yet */
2530 diff --git a/init/do_mounts.c b/init/do_mounts.c
2531 index 3715feb..facc15a 100644
2532 --- a/init/do_mounts.c
2533 +++ b/init/do_mounts.c
2534 @@ -141,6 +141,7 @@ fail:
2535  done:
2536         return res;
2537  }
2538 +EXPORT_SYMBOL_GPL(name_to_dev_t);
2539  
2540  static int __init root_dev_setup(char *line)
2541  {
2542 @@ -400,6 +401,8 @@ void __init prepare_namespace(void)
2543         if (is_floppy && rd_doload && rd_load_disk(0))
2544                 ROOT_DEV = Root_RAM0;
2545  
2546 +       check_resume_attempted();
2547 +
2548         mount_root();
2549  out:
2550         sys_mount(".", "/", NULL, MS_MOVE, NULL);
2551 diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
2552 index 614241b..f3ea292 100644
2553 --- a/init/do_mounts_initrd.c
2554 +++ b/init/do_mounts_initrd.c
2555 @@ -6,6 +6,7 @@
2556  #include <linux/romfs_fs.h>
2557  #include <linux/initrd.h>
2558  #include <linux/sched.h>
2559 +#include <linux/suspend.h>
2560  #include <linux/freezer.h>
2561  
2562  #include "do_mounts.h"
2563 @@ -68,6 +69,11 @@ static void __init handle_initrd(void)
2564  
2565         current->flags &= ~PF_FREEZER_SKIP;
2566  
2567 +       if (!resume_attempted)
2568 +               printk(KERN_ERR "TuxOnIce: No attempt was made to resume from "
2569 +                               "any image that might exist.\n");
2570 +       clear_toi_state(TOI_BOOT_TIME);
2571 +
2572         /* move initrd to rootfs' /old */
2573         sys_fchdir(old_fd);
2574         sys_mount("/", ".", NULL, MS_MOVE, NULL);
2575 diff --git a/init/main.c b/init/main.c
2576 index 3820323..5dcf9c3 100644
2577 --- a/init/main.c
2578 +++ b/init/main.c
2579 @@ -120,6 +120,7 @@ extern void softirq_init(void);
2580  char __initdata boot_command_line[COMMAND_LINE_SIZE];
2581  /* Untouched saved command line (eg. for /proc) */
2582  char *saved_command_line;
2583 +EXPORT_SYMBOL_GPL(saved_command_line);
2584  /* Command line for parameter parsing */
2585  static char *static_command_line;
2586  
2587 diff --git a/kernel/cpu.c b/kernel/cpu.c
2588 index f17e985..214686f 100644
2589 --- a/kernel/cpu.c
2590 +++ b/kernel/cpu.c
2591 @@ -427,6 +427,7 @@ int disable_nonboot_cpus(void)
2592         cpu_maps_update_done();
2593         return error;
2594  }
2595 +EXPORT_SYMBOL_GPL(disable_nonboot_cpus);
2596  
2597  void __ref enable_nonboot_cpus(void)
2598  {
2599 @@ -451,6 +452,7 @@ void __ref enable_nonboot_cpus(void)
2600  out:
2601         cpu_maps_update_done();
2602  }
2603 +EXPORT_SYMBOL_GPL(enable_nonboot_cpus);
2604  #endif /* CONFIG_PM_SLEEP_SMP */
2605  
2606  #endif /* CONFIG_SMP */
2607 diff --git a/kernel/fork.c b/kernel/fork.c
2608 index d8ad2c6..0aa6946 100644
2609 --- a/kernel/fork.c
2610 +++ b/kernel/fork.c
2611 @@ -77,6 +77,7 @@ int max_threads;              /* tunable limit on nr_threads */
2612  DEFINE_PER_CPU(unsigned long, process_counts) = 0;
2613  
2614  __cacheline_aligned DEFINE_RWLOCK(tasklist_lock);  /* outer */
2615 +EXPORT_SYMBOL_GPL(tasklist_lock);
2616  
2617  int nr_processes(void)
2618  {
2619 diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
2620 index dcd165f..5539ad0 100644
2621 --- a/kernel/power/Kconfig
2622 +++ b/kernel/power/Kconfig
2623 @@ -38,6 +38,13 @@ config CAN_PM_TRACE
2624         def_bool y
2625         depends on PM_DEBUG && PM_SLEEP && EXPERIMENTAL
2626  
2627 +config FS_FREEZER_DEBUG
2628 +       bool "Filesystem freezer debugging"
2629 +       depends on PM_DEBUG
2630 +       default n
2631 +       ---help---
2632 +       This option enables debugging of the filesystem freezing code.
2633 +
2634  config PM_TRACE
2635         bool
2636         help
2637 @@ -179,6 +186,256 @@ config PM_STD_PARTITION
2638           suspended image to. It will simply pick the first available swap 
2639           device.
2640  
2641 +menuconfig TOI_CORE
2642 +       tristate "Enhanced Hibernation (TuxOnIce)"
2643 +       depends on HIBERNATION
2644 +       default y
2645 +       ---help---
2646 +         TuxOnIce is the 'new and improved' suspend support.
2647 +         
2648 +         See the TuxOnIce home page (tuxonice.net)
2649 +         for FAQs, HOWTOs and other documentation.
2650 +
2651 +       comment "Image Storage (you need at least one allocator)"
2652 +               depends on TOI_CORE
2653 +       
2654 +       config TOI_FILE
2655 +               tristate "File Allocator"
2656 +               depends on TOI_CORE
2657 +               default y
2658 +               ---help---
2659 +                 This option enables support for storing an image in a
2660 +                 simple file. This should be possible, but we're still
2661 +                 testing it.
2662 +
2663 +       config TOI_SWAP
2664 +               tristate "Swap Allocator"
2665 +               depends on TOI_CORE && SWAP
2666 +               default y
2667 +               ---help---
2668 +                 This option enables support for storing an image in your
2669 +                 swap space.
2670 +
2671 +       comment "General Options"
2672 +               depends on TOI_CORE
2673 +
2674 +       config TOI_DEFAULT_PRE_HIBERNATE
2675 +               string "Default pre-hibernate command"
2676 +               depends on TOI_CORE
2677 +               ---help---
2678 +                 This entry allows you to specify a command to be run prior
2679 +                 to starting a hibernation cycle. If this command returns
2680 +                 a non-zero result code, hibernating will be aborted. If
2681 +                 you're starting hibernation via the hibernate script,
2682 +                 this value should probably be blank.
2683 +
2684 +       config TOI_DEFAULT_POST_HIBERNATE
2685 +               string "Default post-resume command"
2686 +               depends on TOI_CORE
2687 +               ---help---
2688 +                 This entry allows you to specify a command to be run after
2689 +                 completing a hibernation cycle. The return code of this
2690 +                 command is ignored. If you're starting hibernation via the
2691 +                 hibernate script, this value should probably be blank.
2692 +
2693 +       config TOI_CRYPTO
2694 +               tristate "Compression support"
2695 +               depends on TOI_CORE && CRYPTO
2696 +               default y
2697 +               ---help---
2698 +                 This option adds support for using cryptoapi compression
2699 +                 algorithms. Compression is particularly useful as
2700 +                 the LZF support that comes with the TuxOnIce patch can double
2701 +                 your suspend and resume speed.
2702 +
2703 +                 You probably want this, so say Y here.
2704 +
2705 +       comment "No compression support available without Cryptoapi support."
2706 +               depends on TOI_CORE && !CRYPTO
2707 +
2708 +       config TOI_USERUI
2709 +               tristate "Userspace User Interface support"
2710 +               depends on TOI_CORE && NET && (VT || SERIAL_CONSOLE)
2711 +               default y
2712 +               ---help---
2713 +                 This option enabled support for a userspace based user interface
2714 +                 to TuxOnIce, which allows you to have a nice display while suspending
2715 +                 and resuming, and also enables features such as pressing escape to
2716 +                 cancel a cycle or interactive debugging.
2717 +
2718 +       config TOI_USERUI_DEFAULT_PATH
2719 +               string "Default userui program location"
2720 +               default "/usr/local/sbin/tuxonice_fbsplash"
2721 +               depends on TOI_USERUI
2722 +               ---help---
2723 +                 This entry allows you to specify a default path to the userui binary.
2724 +
2725 +       config TOI_KEEP_IMAGE
2726 +               bool "Allow Keep Image Mode"
2727 +               depends on TOI_CORE
2728 +               ---help---
2729 +                 This option allows you to keep and image and reuse it. It is intended
2730 +                 __ONLY__ for use with systems where all filesystems are mounted read-
2731 +                 only (kiosks, for example). To use it, compile this option in and boot
2732 +                 normally. Set the KEEP_IMAGE flag in /sys/power/tuxonice and suspend.
2733 +                 When you resume, the image will not be removed. You will be unable to turn
2734 +                 off swap partitions (assuming you are using the swap allocator), but future
2735 +                 suspends simply do a power-down. The image can be updated using the
2736 +                 kernel command line parameter suspend_act= to turn off the keep image
2737 +                 bit. Keep image mode is a little less user friendly on purpose - it
2738 +                 should not be used without thought!
2739 +
2740 +       config TOI_REPLACE_SWSUSP
2741 +               bool "Replace swsusp by default"
2742 +               default y
2743 +               depends on TOI_CORE
2744 +               ---help---
2745 +                 TuxOnIce can replace swsusp. This option makes that the default state,
2746 +                 requiring you to echo 0 > /sys/power/tuxonice/replace_swsusp if you want
2747 +                 to use the vanilla kernel functionality. Note that your initrd/ramfs will
2748 +                 need to do this before trying to resume, too.
2749 +                 With overriding swsusp enabled, echoing disk  to /sys/power/state will
2750 +                 start a TuxOnIce cycle. If resume= doesn't specify an allocator and both
2751 +                 the swap and file allocators are compiled in, the swap allocator will be
2752 +                 used by default.
2753 +
2754 +       config TOI_IGNORE_LATE_INITCALL
2755 +               bool "Wait for initrd/ramfs to run, by default"
2756 +               default n
2757 +               depends on TOI_CORE
2758 +               ---help---
2759 +                 When booting, TuxOnIce can check for an image and start to resume prior
2760 +                 to any initrd/ramfs running (via a late initcall).
2761 +
2762 +                 If you don't have an initrd/ramfs, this is what you want to happen - 
2763 +                 otherwise you won't be able to safely resume. You should set this option
2764 +                 to 'No'.
2765 +
2766 +                 If, however, you want your initrd/ramfs to run anyway before resuming,
2767 +                 you need to tell TuxOnIce to ignore that earlier opportunity to resume.
2768 +                 This can be done either by using this compile time option, or by
2769 +                 overriding this option with the boot-time parameter toi_initramfs_resume_only=1.
2770 +
2771 +                 Note that if TuxOnIce can't resume at the earlier opportunity, the
2772 +                 value of this option won't matter - the initramfs/initrd (if any) will
2773 +                 run anyway.
2774 +
2775 +       menuconfig TOI_CLUSTER
2776 +               tristate "Cluster support"
2777 +               default n
2778 +               depends on TOI_CORE && NET && BROKEN
2779 +               ---help---
2780 +                 Support for linking multiple machines in a cluster so that they suspend
2781 +                 and resume together.
2782 +
2783 +       config TOI_DEFAULT_CLUSTER_INTERFACE
2784 +               string "Default cluster interface"
2785 +               depends on TOI_CLUSTER
2786 +               ---help---
2787 +                 The default interface on which to communicate with other nodes in
2788 +                 the cluster.
2789 +                 
2790 +                 If no value is set here, cluster support will be disabled by default.
2791 +
2792 +       config TOI_DEFAULT_CLUSTER_KEY
2793 +               string "Default cluster key"
2794 +               default "Default"
2795 +               depends on TOI_CLUSTER
2796 +               ---help---
2797 +                 The default key used by this node. All nodes in the same cluster
2798 +                 have the same key. Multiple clusters may coexist on the same lan
2799 +                 by using different values for this key.
2800 +
2801 +       config TOI_CLUSTER_IMAGE_TIMEOUT
2802 +               int "Timeout when checking for image"
2803 +               default 15
2804 +               depends on TOI_CLUSTER
2805 +               ---help---
2806 +                 Timeout (seconds) before continuing to boot when waiting to see
2807 +                 whether other nodes might have an image. Set to -1 to wait
2808 +                 indefinitely. In WAIT_UNTIL_NODES is non zero, we might continue
2809 +                 booting sooner than this timeout.
2810 +
2811 +       config TOI_CLUSTER_WAIT_UNTIL_NODES
2812 +               int "Nodes without image before continuing"
2813 +               default 0
2814 +               depends on TOI_CLUSTER
2815 +               ---help---
2816 +                 When booting and no image is found, we wait to see if other nodes
2817 +                 have an image before continuing to boot. This value lets us
2818 +                 continue after seeing a certain number of nodes without an image,
2819 +                 instead of continuing to wait for the timeout. Set to 0 to only
2820 +                 use the timeout.
2821 +
2822 +       config TOI_DEFAULT_CLUSTER_PRE_HIBERNATE
2823 +               string "Default pre-hibernate script"
2824 +               depends on TOI_CLUSTER
2825 +               ---help---
2826 +                 The default script to be called when starting to hibernate.
2827 +
2828 +       config TOI_DEFAULT_CLUSTER_POST_HIBERNATE
2829 +               string "Default post-hibernate script"
2830 +               depends on TOI_CLUSTER
2831 +               ---help---
2832 +                 The default script to be called after resuming from hibernation.
2833 +
2834 +       config TOI_DEFAULT_WAIT
2835 +               int "Default waiting time for emergency boot messages"
2836 +               default "25"
2837 +               range -1 32768
2838 +               depends on TOI_CORE
2839 +               help
2840 +                 TuxOnIce can display warnings very early in the process of resuming,
2841 +                 if (for example) it appears that you have booted a kernel that doesn't
2842 +                 match an image on disk. It can then give you the opportunity to either
2843 +                 continue booting that kernel, or reboot the machine. This option can be
2844 +                 used to control how long to wait in such circumstances. -1 means wait
2845 +                 forever. 0 means don't wait at all (do the default action, which will
2846 +                 generally be to continue booting and remove the image). Values of 1 or
2847 +                 more indicate a number of seconds (up to 255) to wait before doing the
2848 +                 default.
2849 +
2850 +       config  TOI_DEFAULT_EXTRA_PAGES_ALLOWANCE
2851 +               int "Default extra pages allowance"
2852 +               default "500"
2853 +               range 500 32768
2854 +               depends on TOI_CORE
2855 +               help
2856 +                 This value controls the default for the allowance TuxOnIce makes for
2857 +                 drivers to allocate extra memory during the atomic copy. The default
2858 +                 value of 500 will be okay if you're not using DRI. If you are using
2859 +                 DRI, the easiest way to find what value to use is to try to hibernate
2860 +                 and look at how many pages were actually needed in the sysfs entry
2861 +                 /sys/power/tuxonice/debug_info (first number on the last line), adding
2862 +                 a little extra because the value is not always the same.
2863 +
2864 +       config TOI_CHECKSUM
2865 +               bool "Checksum pageset2"
2866 +               default n
2867 +               depends on TOI_CORE
2868 +               select CRYPTO
2869 +               select CRYPTO_ALGAPI
2870 +               select CRYPTO_MD4
2871 +               ---help---
2872 +                 Adds support for checksumming pageset2 pages, to ensure you really get an
2873 +                 atomic copy. Since some filesystems (XFS especially) change metadata even
2874 +                 when there's no other activity, we need this to check for pages that have
2875 +                 been changed while we were saving the page cache. If your debugging output
2876 +                 always says no pages were resaved, you may be able to safely disable this
2877 +                 option.
2878 +
2879 +config TOI
2880 +       bool
2881 +       depends on TOI_CORE!=n
2882 +       default y
2883 +
2884 +config TOI_EXPORTS
2885 +       bool
2886 +       depends on TOI_SWAP=m || TOI_FILE=m || \
2887 +               TOI_CRYPTO=m || TOI_CLUSTER=m || \
2888 +               TOI_USERUI=m || TOI_CORE=m
2889 +       default y
2890 +
2891  config APM_EMULATION
2892         tristate "Advanced Power Management Emulation"
2893         depends on PM && SYS_SUPPORTS_APM_EMULATION
2894 diff --git a/kernel/power/Makefile b/kernel/power/Makefile
2895 index 597823b..61d7360 100644
2896 --- a/kernel/power/Makefile
2897 +++ b/kernel/power/Makefile
2898 @@ -4,6 +4,35 @@ EXTRA_CFLAGS   +=      -DDEBUG
2899  endif
2900  
2901  obj-y                          := main.o
2902 +
2903 +tuxonice_core-objs := tuxonice_modules.o tuxonice_sysfs.o tuxonice_highlevel.o \
2904 +               tuxonice_io.o tuxonice_pagedir.o tuxonice_prepare_image.o \
2905 +               tuxonice_extent.o tuxonice_pageflags.o tuxonice_ui.o \
2906 +               tuxonice_power_off.o tuxonice_atomic_copy.o
2907 +
2908 +obj-$(CONFIG_TOI)              += tuxonice_builtin.o
2909 +
2910 +ifdef CONFIG_PM_DEBUG
2911 +tuxonice_core-objs             += tuxonice_alloc.o
2912 +endif
2913 +
2914 +ifdef CONFIG_TOI_CHECKSUM
2915 +tuxonice_core-objs             += tuxonice_checksum.o
2916 +endif
2917 +
2918 +ifdef CONFIG_NET
2919 +tuxonice_core-objs             += tuxonice_storage.o tuxonice_netlink.o
2920 +endif
2921 +
2922 +obj-$(CONFIG_TOI_CORE)         += tuxonice_core.o
2923 +obj-$(CONFIG_TOI_CRYPTO)       += tuxonice_compress.o
2924 +
2925 +obj-$(CONFIG_TOI_SWAP)         += tuxonice_block_io.o tuxonice_swap.o
2926 +obj-$(CONFIG_TOI_FILE)         += tuxonice_block_io.o tuxonice_file.o
2927 +obj-$(CONFIG_TOI_CLUSTER)      += tuxonice_cluster.o
2928 +
2929 +obj-$(CONFIG_TOI_USERUI)       += tuxonice_userui.o
2930 +
2931  obj-$(CONFIG_PM_SLEEP)         += process.o console.o
2932  obj-$(CONFIG_HIBERNATION)      += swsusp.o disk.o snapshot.o swap.o user.o
2933  
2934 diff --git a/kernel/power/console.c b/kernel/power/console.c
2935 index b8628be..0d11c15 100644
2936 --- a/kernel/power/console.c
2937 +++ b/kernel/power/console.c
2938 @@ -68,6 +68,7 @@ int pm_prepare_console(void)
2939         kmsg_redirect = SUSPEND_CONSOLE;
2940         return 0;
2941  }
2942 +EXPORT_SYMBOL_GPL(pm_prepare_console);
2943  
2944  void pm_restore_console(void)
2945  {
2946 @@ -80,4 +81,5 @@ void pm_restore_console(void)
2947         release_console_sem();
2948         kmsg_redirect = orig_kmsg;
2949  }
2950 +EXPORT_SYMBOL_GPL(pm_restore_console);
2951  #endif
2952 diff --git a/kernel/power/disk.c b/kernel/power/disk.c
2953 index bbd85c6..3953914 100644
2954 --- a/kernel/power/disk.c
2955 +++ b/kernel/power/disk.c
2956 @@ -24,10 +24,12 @@
2957  #include <linux/ftrace.h>
2958  
2959  #include "power.h"
2960 -
2961 +#include "tuxonice.h"
2962  
2963  static int noresume = 0;
2964 -static char resume_file[256] = CONFIG_PM_STD_PARTITION;
2965 +char resume_file[256] = CONFIG_PM_STD_PARTITION;
2966 +EXPORT_SYMBOL_GPL(resume_file);
2967 +
2968  dev_t swsusp_resume_device;
2969  sector_t swsusp_resume_block;
2970  
2971 @@ -105,55 +107,60 @@ static int hibernation_test(int level) { return 0; }
2972   *     hibernation
2973   */
2974  
2975 -static int platform_begin(int platform_mode)
2976 +int platform_begin(int platform_mode)
2977  {
2978         return (platform_mode && hibernation_ops) ?
2979                 hibernation_ops->begin() : 0;
2980  }
2981 +EXPORT_SYMBOL_GPL(platform_begin);
2982  
2983  /**
2984   *     platform_end - tell the platform driver that we've entered the
2985   *     working state
2986   */
2987  
2988 -static void platform_end(int platform_mode)
2989 +void platform_end(int platform_mode)
2990  {
2991         if (platform_mode && hibernation_ops)
2992                 hibernation_ops->end();
2993  }
2994 +EXPORT_SYMBOL_GPL(platform_end);
2995  
2996  /**
2997   *     platform_pre_snapshot - prepare the machine for hibernation using the
2998   *     platform driver if so configured and return an error code if it fails
2999   */
3000  
3001 -static int platform_pre_snapshot(int platform_mode)
3002 +int platform_pre_snapshot(int platform_mode)
3003  {
3004         return (platform_mode && hibernation_ops) ?
3005                 hibernation_ops->pre_snapshot() : 0;
3006  }
3007 +EXPORT_SYMBOL_GPL(platform_pre_snapshot);
3008  
3009  /**
3010   *     platform_leave - prepare the machine for switching to the normal mode
3011   *     of operation using the platform driver (called with interrupts disabled)
3012   */
3013  
3014 -static void platform_leave(int platform_mode)
3015 +void platform_leave(int platform_mode)
3016  {
3017         if (platform_mode && hibernation_ops)
3018                 hibernation_ops->leave();
3019  }
3020 +EXPORT_SYMBOL_GPL(platform_leave);
3021  
3022  /**
3023   *     platform_finish - switch the machine to the normal mode of operation
3024   *     using the platform driver (must be called after platform_prepare())
3025   */
3026  
3027 -static void platform_finish(int platform_mode)
3028 +void platform_finish(int platform_mode)
3029  {
3030         if (platform_mode && hibernation_ops)
3031                 hibernation_ops->finish();
3032  }
3033 +EXPORT_SYMBOL_GPL(platform_finish);
3034  
3035  /**
3036   *     platform_pre_restore - prepare the platform for the restoration from a
3037 @@ -161,11 +168,12 @@ static void platform_finish(int platform_mode)
3038   *     called, platform_restore_cleanup() must be called.
3039   */
3040  
3041 -static int platform_pre_restore(int platform_mode)
3042 +int platform_pre_restore(int platform_mode)
3043  {
3044         return (platform_mode && hibernation_ops) ?
3045                 hibernation_ops->pre_restore() : 0;
3046  }
3047 +EXPORT_SYMBOL_GPL(platform_pre_restore);
3048  
3049  /**
3050   *     platform_restore_cleanup - switch the platform to the normal mode of
3051 @@ -174,22 +182,24 @@ static int platform_pre_restore(int platform_mode)
3052   *     regardless of the result of platform_pre_restore().
3053   */
3054  
3055 -static void platform_restore_cleanup(int platform_mode)
3056 +void platform_restore_cleanup(int platform_mode)
3057  {
3058         if (platform_mode && hibernation_ops)
3059                 hibernation_ops->restore_cleanup();
3060  }
3061 +EXPORT_SYMBOL_GPL(platform_restore_cleanup);
3062  
3063  /**
3064   *     platform_recover - recover the platform from a failure to suspend
3065   *     devices.
3066   */
3067  
3068 -static void platform_recover(int platform_mode)
3069 +void platform_recover(int platform_mode)
3070  {
3071         if (platform_mode && hibernation_ops && hibernation_ops->recover)
3072                 hibernation_ops->recover();
3073  }
3074 +EXPORT_SYMBOL_GPL(platform_recover);
3075  
3076  /**
3077   *     create_image - freeze devices that need to be frozen with interrupts
3078 @@ -393,6 +403,7 @@ int hibernation_restore(int platform_mode)
3079         pm_restore_console();
3080         return error;
3081  }
3082 +EXPORT_SYMBOL_GPL(hibernation_platform_enter);
3083  
3084  /**
3085   *     hibernation_platform_enter - enter the hibernation state using the
3086 @@ -508,6 +519,9 @@ int hibernate(void)
3087  {
3088         int error;
3089  
3090 +       if (test_action_state(TOI_REPLACE_SWSUSP))
3091 +               return toi_try_hibernate();
3092 +
3093         mutex_lock(&pm_mutex);
3094         /* The snapshot device should not be opened while we're running */
3095         if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
3096 @@ -580,10 +594,19 @@ int hibernate(void)
3097   *
3098   */
3099  
3100 -static int software_resume(void)
3101 +int software_resume(void)
3102  {
3103         int error;
3104         unsigned int flags;
3105 +       resume_attempted = 1;
3106 +
3107 +       /*
3108 +        * We can't know (until an image header - if any - is loaded), whether
3109 +        * we did override swsusp. We therefore ensure that both are tried.
3110 +        */
3111 +       if (test_action_state(TOI_REPLACE_SWSUSP))
3112 +               printk(KERN_INFO "Replacing swsusp.\n");
3113 +               toi_try_resume();
3114  
3115         /*
3116          * name_to_dev_t() below takes a sysfs buffer mutex when sysfs
3117 @@ -882,6 +905,7 @@ static int __init resume_offset_setup(char *str)
3118  static int __init noresume_setup(char *str)
3119  {
3120         noresume = 1;
3121 +       set_toi_state(TOI_NORESUME_SPECIFIED);
3122         return 1;
3123  }
3124  
3125 diff --git a/kernel/power/main.c b/kernel/power/main.c
3126 index 540b16b..f2ad48c 100644
3127 --- a/kernel/power/main.c
3128 +++ b/kernel/power/main.c
3129 @@ -26,6 +26,7 @@
3130  #include "power.h"
3131  
3132  DEFINE_MUTEX(pm_mutex);
3133 +EXPORT_SYMBOL_GPL(pm_mutex);
3134  
3135  unsigned int pm_flags;
3136  EXPORT_SYMBOL(pm_flags);
3137 @@ -34,7 +35,8 @@ EXPORT_SYMBOL(pm_flags);
3138  
3139  /* Routines for PM-transition notifications */
3140  
3141 -static BLOCKING_NOTIFIER_HEAD(pm_chain_head);
3142 +BLOCKING_NOTIFIER_HEAD(pm_chain_head);
3143 +EXPORT_SYMBOL_GPL(pm_chain_head);
3144  
3145  int register_pm_notifier(struct notifier_block *nb)
3146  {
3147 @@ -204,6 +206,7 @@ void suspend_set_ops(struct platform_suspend_ops *ops)
3148         suspend_ops = ops;
3149         mutex_unlock(&pm_mutex);
3150  }
3151 +EXPORT_SYMBOL_GPL(pm_notifier_call_chain);
3152  
3153  /**
3154   * suspend_valid_only_mem - generic memory-only valid callback
3155 @@ -441,6 +444,7 @@ static int enter_state(suspend_state_t state)
3156         mutex_unlock(&pm_mutex);
3157         return error;
3158  }
3159 +EXPORT_SYMBOL_GPL(suspend_devices_and_enter);
3160  
3161  
3162  /**
3163 @@ -463,6 +467,7 @@ EXPORT_SYMBOL(pm_suspend);
3164  #endif /* CONFIG_SUSPEND */
3165  
3166  struct kobject *power_kobj;
3167 +EXPORT_SYMBOL_GPL(power_kobj);
3168  
3169  /**
3170   *     state - control system power state.
3171 diff --git a/kernel/power/power.h b/kernel/power/power.h
3172 index acc0c10..777fc05 100644
3173 --- a/kernel/power/power.h
3174 +++ b/kernel/power/power.h
3175 @@ -1,3 +1,10 @@
3176 +/*
3177 + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net)
3178 + */
3179 +
3180 +#ifndef KERNEL_POWER_POWER_H
3181 +#define KERNEL_POWER_POWER_H
3182 +
3183  #include <linux/suspend.h>
3184  #include <linux/suspend_ioctls.h>
3185  #include <linux/utsname.h>
3186 @@ -21,18 +28,22 @@ struct swsusp_info {
3187  extern int arch_hibernation_header_save(void *addr, unsigned int max_size);
3188  extern int arch_hibernation_header_restore(void *addr);
3189  
3190 -static inline int init_header_complete(struct swsusp_info *info)
3191 +static inline int init_swsusp_header_complete(struct swsusp_info *info)
3192  {
3193         return arch_hibernation_header_save(info, MAX_ARCH_HEADER_SIZE);
3194  }
3195  
3196 -static inline char *check_image_kernel(struct swsusp_info *info)
3197 +static inline char *check_swsusp_image_kernel(struct swsusp_info *info)
3198  {
3199         return arch_hibernation_header_restore(info) ?
3200                         "architecture specific data" : NULL;
3201  }
3202 +#else
3203 +extern char *check_swsusp_image_kernel(struct swsusp_info *info);
3204  #endif /* CONFIG_ARCH_HIBERNATION_HEADER */
3205 +extern int init_swsusp_header(struct swsusp_info *info);
3206  
3207 +extern char resume_file[256];
3208  /*
3209   * Keep some memory free so that I/O operations can succeed without paging
3210   * [Might this be more than 4 MB?]
3211 @@ -49,6 +60,7 @@ static inline char *check_image_kernel(struct swsusp_info *info)
3212  extern int hibernation_snapshot(int platform_mode);
3213  extern int hibernation_restore(int platform_mode);
3214  extern int hibernation_platform_enter(void);
3215 +extern void platform_recover(int platform_mode);
3216  #endif
3217  
3218  extern int pfn_is_nosave(unsigned long);
3219 @@ -63,6 +75,8 @@ static struct kobj_attribute _name##_attr = { \
3220         .store  = _name##_store,                \
3221  }
3222  
3223 +extern struct pbe *restore_pblist;
3224 +
3225  /* Preferred image size in bytes (default 500 MB) */
3226  extern unsigned long image_size;
3227  extern int in_suspend;
3228 @@ -223,3 +237,90 @@ static inline void suspend_thaw_processes(void)
3229  {
3230  }
3231  #endif
3232 +
3233 +extern struct page *saveable_page(unsigned long pfn);
3234 +#ifdef CONFIG_HIGHMEM
3235 +extern struct page *saveable_highmem_page(unsigned long pfn);
3236 +#else
3237 +static inline void *saveable_highmem_page(unsigned long pfn) { return NULL; }
3238 +#endif
3239 +
3240 +#define PBES_PER_PAGE (PAGE_SIZE / sizeof(struct pbe))
3241 +extern struct list_head nosave_regions;
3242 +
3243 +/**
3244 + *     This structure represents a range of page frames the contents of which
3245 + *     should not be saved during the suspend.
3246 + */
3247 +
3248 +struct nosave_region {
3249 +       struct list_head list;
3250 +       unsigned long start_pfn;
3251 +       unsigned long end_pfn;
3252 +};
3253 +
3254 +#ifndef PHYS_PFN_OFFSET
3255 +#define PHYS_PFN_OFFSET 0
3256 +#endif
3257 +
3258 +#define ZONE_START(thiszone) ((thiszone)->zone_start_pfn - PHYS_PFN_OFFSET)
3259 +
3260 +#define BM_END_OF_MAP  (~0UL)
3261 +
3262 +#define BM_BITS_PER_BLOCK      (PAGE_SIZE << 3)
3263 +
3264 +struct bm_block {
3265 +       struct bm_block *next;          /* next element of the list */
3266 +       unsigned long start_pfn;        /* pfn represented by the first bit */
3267 +       unsigned long end_pfn;  /* pfn represented by the last bit plus 1 */
3268 +       unsigned long *data;    /* bitmap representing pages */
3269 +};
3270 +
3271 +struct zone_bitmap {
3272 +       struct zone_bitmap *next;       /* next element of the list */
3273 +       unsigned long start_pfn;        /* minimal pfn in this zone */
3274 +       unsigned long end_pfn;          /* maximal pfn in this zone plus 1 */
3275 +       struct bm_block *bm_blocks;     /* list of bitmap blocks */
3276 +       struct bm_block *cur_block;     /* recently used bitmap block */
3277 +};
3278 +
3279 +/* strcut bm_position is used for browsing memory bitmaps */
3280 +
3281 +struct bm_position {
3282 +       struct zone_bitmap *zone_bm;
3283 +       struct bm_block *block;
3284 +       int bit;
3285 +};
3286 +
3287 +struct memory_bitmap {
3288 +       struct zone_bitmap *zone_bm_list;       /* list of zone bitmaps */
3289 +       struct linked_page *p_list;     /* list of pages used to store zone
3290 +                                        * bitmap objects and bitmap block
3291 +                                        * objects
3292 +                                        */
3293 +       struct bm_position cur; /* most recently used bit position */
3294 +};
3295 +
3296 +extern int memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask,
3297 +               int safe_needed);
3298 +extern void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
3299 +extern void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn);
3300 +extern void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn);
3301 +extern int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn);
3302 +extern unsigned long memory_bm_next_pfn(struct memory_bitmap *bm);
3303 +extern void memory_bm_position_reset(struct memory_bitmap *bm);
3304 +extern void memory_bm_clear(struct memory_bitmap *bm);
3305 +extern void memory_bm_copy(struct memory_bitmap *source,
3306 +               struct memory_bitmap *dest);
3307 +extern void memory_bm_dup(struct memory_bitmap *source,
3308 +               struct memory_bitmap *dest);
3309 +
3310 +#ifdef CONFIG_TOI
3311 +struct toi_module_ops;
3312 +extern int memory_bm_read(struct memory_bitmap *bm, int (*rw_chunk)
3313 +       (int rw, struct toi_module_ops *owner, char *buffer, int buffer_size));
3314 +extern int memory_bm_write(struct memory_bitmap *bm, int (*rw_chunk)
3315 +       (int rw, struct toi_module_ops *owner, char *buffer, int buffer_size));
3316 +#endif
3317 +
3318 +#endif
3319 diff --git a/kernel/power/process.c b/kernel/power/process.c
3320 index 278946a..6035bbf 100644
3321 --- a/kernel/power/process.c
3322 +++ b/kernel/power/process.c
3323 @@ -13,6 +13,10 @@
3324  #include <linux/module.h>
3325  #include <linux/syscalls.h>
3326  #include <linux/freezer.h>
3327 +#include <linux/buffer_head.h>
3328 +
3329 +int freezer_state;
3330 +EXPORT_SYMBOL_GPL(freezer_state);
3331  
3332  /* 
3333   * Timeout for stopping processes
3334 @@ -201,7 +205,8 @@ static int try_to_freeze_tasks(bool sig_only)
3335                 do_each_thread(g, p) {
3336                         task_lock(p);
3337                         if (freezing(p) && !freezer_should_skip(p))
3338 -                               printk(KERN_ERR " %s\n", p->comm);
3339 +                               printk(KERN_ERR " %s (%d) failed to freeze.\n",
3340 +                                               p->comm, p->pid);
3341                         cancel_freezing(p);
3342                         task_unlock(p);
3343                 } while_each_thread(g, p);
3344 @@ -221,22 +226,31 @@ int freeze_processes(void)
3345  {
3346         int error;
3347  
3348 -       printk("Freezing user space processes ... ");
3349 +       printk(KERN_INFO "Stopping fuse filesystems.\n");
3350 +       freeze_filesystems(FS_FREEZER_FUSE);
3351 +       freezer_state = FREEZER_FILESYSTEMS_FROZEN;
3352 +       printk(KERN_INFO "Freezing user space processes ... ");
3353         error = try_to_freeze_tasks(true);
3354         if (error)
3355                 goto Exit;
3356 -       printk("done.\n");
3357 +       printk(KERN_INFO "done.\n");
3358  
3359 -       printk("Freezing remaining freezable tasks ... ");
3360 +       sys_sync();
3361 +       printk(KERN_INFO "Stopping normal filesystems.\n");
3362 +       freeze_filesystems(FS_FREEZER_NORMAL);
3363 +       freezer_state = FREEZER_USERSPACE_FROZEN;
3364 +       printk(KERN_INFO "Freezing remaining freezable tasks ... ");
3365         error = try_to_freeze_tasks(false);
3366         if (error)
3367                 goto Exit;
3368         printk("done.");
3369 +       freezer_state = FREEZER_FULLY_ON;
3370   Exit:
3371         BUG_ON(in_atomic());
3372         printk("\n");
3373         return error;
3374  }
3375 +EXPORT_SYMBOL_GPL(freeze_processes);
3376  
3377  static void thaw_tasks(bool nosig_only)
3378  {
3379 @@ -257,11 +271,42 @@ static void thaw_tasks(bool nosig_only)
3380  
3381  void thaw_processes(void)
3382  {
3383 -       printk("Restarting tasks ... ");
3384 -       thaw_tasks(true);
3385 +       int old_state = freezer_state;
3386 +
3387 +       if (old_state == FREEZER_OFF)
3388 +               return;
3389 +
3390 +       /*
3391 +        * Change state beforehand because thawed tasks might submit I/O
3392 +        * immediately.
3393 +        */
3394 +       freezer_state = FREEZER_OFF;
3395 +
3396 +       printk(KERN_INFO "Restarting all filesystems ...\n");
3397 +       thaw_filesystems(FS_FREEZER_ALL);
3398 +
3399 +       printk(KERN_INFO "Restarting tasks ... ");
3400 +
3401 +       if (old_state == FREEZER_FULLY_ON)
3402 +               thaw_tasks(true);
3403         thaw_tasks(false);
3404         schedule();
3405         printk("done.\n");
3406  }
3407 +EXPORT_SYMBOL_GPL(thaw_processes);
3408  
3409  EXPORT_SYMBOL(refrigerator);
3410 +
3411 +void thaw_kernel_threads(void)
3412 +{
3413 +       freezer_state = FREEZER_USERSPACE_FROZEN;
3414 +       printk(KERN_INFO "Restarting normal filesystems.\n");
3415 +       thaw_filesystems(FS_FREEZER_NORMAL);
3416 +       thaw_tasks(true);
3417 +}
3418 +
3419 +/*
3420 + * It's ugly putting this EXPORT down here, but it's necessary so that it
3421 + * doesn't matter whether the fs-freezing patch is applied or not.
3422 + */
3423 +EXPORT_SYMBOL_GPL(thaw_kernel_threads);
3424 diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
3425 index 5d2ab83..8fc59e9 100644
3426 --- a/kernel/power/snapshot.c
3427 +++ b/kernel/power/snapshot.c
3428 @@ -33,6 +33,8 @@
3429  #include <asm/io.h>
3430  
3431  #include "power.h"
3432 +#include "tuxonice_builtin.h"
3433 +#include "tuxonice_pagedir.h"
3434  
3435  static int swsusp_page_is_free(struct page *);
3436  static void swsusp_set_page_forbidden(struct page *);
3437 @@ -44,6 +46,10 @@ static void swsusp_unset_page_forbidden(struct page *);
3438   * directly to their "original" page frames.
3439   */
3440  struct pbe *restore_pblist;
3441 +EXPORT_SYMBOL_GPL(restore_pblist);
3442 +
3443 +int resume_attempted;
3444 +EXPORT_SYMBOL_GPL(resume_attempted);
3445  
3446  /* Pointer to an auxiliary buffer (1 page) */
3447  static void *buffer;
3448 @@ -86,6 +92,9 @@ static void *get_image_page(gfp_t gfp_mask, int safe_needed)
3449  
3450  unsigned long get_safe_page(gfp_t gfp_mask)
3451  {
3452 +       if (toi_running)
3453 +               return toi_get_nonconflicting_page();
3454 +
3455         return (unsigned long)get_image_page(gfp_mask, PG_SAFE);
3456  }
3457  
3458 @@ -228,50 +237,14 @@ static void chain_free(struct chain_allocator *ca, int clear_page_nosave)
3459   *     the represented memory area.
3460   */
3461  
3462 -#define BM_END_OF_MAP  (~0UL)
3463 -
3464 -#define BM_BITS_PER_BLOCK      (PAGE_SIZE << 3)
3465 -
3466 -struct bm_block {
3467 -       struct bm_block *next;          /* next element of the list */
3468 -       unsigned long start_pfn;        /* pfn represented by the first bit */
3469 -       unsigned long end_pfn;  /* pfn represented by the last bit plus 1 */
3470 -       unsigned long *data;    /* bitmap representing pages */
3471 -};
3472 -
3473  static inline unsigned long bm_block_bits(struct bm_block *bb)
3474  {
3475         return bb->end_pfn - bb->start_pfn;
3476  }
3477  
3478 -struct zone_bitmap {
3479 -       struct zone_bitmap *next;       /* next element of the list */
3480 -       unsigned long start_pfn;        /* minimal pfn in this zone */
3481 -       unsigned long end_pfn;          /* maximal pfn in this zone plus 1 */
3482 -       struct bm_block *bm_blocks;     /* list of bitmap blocks */
3483 -       struct bm_block *cur_block;     /* recently used bitmap block */
3484 -};
3485 -
3486 -/* strcut bm_position is used for browsing memory bitmaps */
3487 -
3488 -struct bm_position {
3489 -       struct zone_bitmap *zone_bm;
3490 -       struct bm_block *block;
3491 -       int bit;
3492 -};
3493 -
3494 -struct memory_bitmap {
3495 -       struct zone_bitmap *zone_bm_list;       /* list of zone bitmaps */
3496 -       struct linked_page *p_list;     /* list of pages used to store zone
3497 -                                        * bitmap objects and bitmap block
3498 -                                        * objects
3499 -                                        */
3500 -       struct bm_position cur; /* most recently used bit position */
3501 -};
3502 -
3503  /* Functions that operate on memory bitmaps */
3504  
3505 -static void memory_bm_position_reset(struct memory_bitmap *bm)
3506 +void memory_bm_position_reset(struct memory_bitmap *bm)
3507  {
3508         struct zone_bitmap *zone_bm;
3509  
3510 @@ -280,8 +253,9 @@ static void memory_bm_position_reset(struct memory_bitmap *bm)
3511         bm->cur.block = zone_bm->bm_blocks;
3512         bm->cur.bit = 0;
3513  }
3514 +EXPORT_SYMBOL_GPL(memory_bm_position_reset);
3515  
3516 -static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
3517 +void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
3518  
3519  /**
3520   *     create_bm_block_list - create a list of block bitmap objects
3521 @@ -331,7 +305,7 @@ create_zone_bm_list(unsigned int nr_zones, struct chain_allocator *ca)
3522    *    memory_bm_create - allocate memory for a memory bitmap
3523    */
3524  
3525 -static int
3526 +int
3527  memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
3528  {
3529         struct chain_allocator ca;
3530 @@ -406,15 +380,19 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
3531         memory_bm_free(bm, PG_UNSAFE_CLEAR);
3532         return -ENOMEM;
3533  }
3534 +EXPORT_SYMBOL_GPL(memory_bm_create);
3535  
3536  /**
3537    *    memory_bm_free - free memory occupied by the memory bitmap @bm
3538    */
3539  
3540 -static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
3541 +void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
3542  {
3543         struct zone_bitmap *zone_bm;
3544  
3545 +       if (!bm->zone_bm_list)
3546 +               return;
3547 +
3548         /* Free the list of bit blocks for each zone_bitmap object */
3549         zone_bm = bm->zone_bm_list;
3550         while (zone_bm) {
3551 @@ -431,6 +409,7 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
3552         free_list_of_pages(bm->p_list, clear_nosave_free);
3553         bm->zone_bm_list = NULL;
3554  }
3555 +EXPORT_SYMBOL_GPL(memory_bm_free);
3556  
3557  /**
3558   *     memory_bm_find_bit - find the bit in the bitmap @bm that corresponds
3559 @@ -474,7 +453,7 @@ static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
3560         return 0;
3561  }
3562  
3563 -static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
3564 +void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
3565  {
3566         void *addr;
3567         unsigned int bit;
3568 @@ -484,6 +463,7 @@ static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
3569         BUG_ON(error);
3570         set_bit(bit, addr);
3571  }
3572 +EXPORT_SYMBOL_GPL(memory_bm_set_bit);
3573  
3574  static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn)
3575  {
3576 @@ -497,7 +477,7 @@ static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn)
3577         return error;
3578  }
3579  
3580 -static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn)
3581 +void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn)
3582  {
3583         void *addr;
3584         unsigned int bit;
3585 @@ -507,8 +487,9 @@ static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn)
3586         BUG_ON(error);
3587         clear_bit(bit, addr);
3588  }
3589 +EXPORT_SYMBOL_GPL(memory_bm_clear_bit);
3590  
3591 -static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
3592 +int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
3593  {
3594         void *addr;
3595         unsigned int bit;
3596 @@ -518,6 +499,7 @@ static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
3597         BUG_ON(error);
3598         return test_bit(bit, addr);
3599  }
3600 +EXPORT_SYMBOL_GPL(memory_bm_test_bit);
3601  
3602  /**
3603   *     memory_bm_next_pfn - find the pfn that corresponds to the next set bit
3604 @@ -528,7 +510,7 @@ static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
3605   *     this function.
3606   */
3607  
3608 -static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
3609 +unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
3610  {
3611         struct zone_bitmap *zone_bm;
3612         struct bm_block *bb;
3613 @@ -560,19 +542,167 @@ static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
3614         bm->cur.bit = bit + 1;
3615         return bb->start_pfn + bit;
3616  }
3617 +EXPORT_SYMBOL_GPL(memory_bm_next_pfn);
3618  
3619 -/**
3620 - *     This structure represents a range of page frames the contents of which
3621 - *     should not be saved during the suspend.
3622 - */
3623 +void memory_bm_clear(struct memory_bitmap *bm)
3624 +{
3625 +       unsigned long pfn;
3626  
3627 -struct nosave_region {
3628 -       struct list_head list;
3629 -       unsigned long start_pfn;
3630 -       unsigned long end_pfn;
3631 -};
3632 +       memory_bm_position_reset(bm);
3633 +       pfn = memory_bm_next_pfn(bm);
3634 +       while (pfn != BM_END_OF_MAP) {
3635 +               memory_bm_clear_bit(bm, pfn);
3636 +               pfn = memory_bm_next_pfn(bm);
3637 +       }
3638 +}
3639 +EXPORT_SYMBOL_GPL(memory_bm_clear);
3640 +
3641 +void memory_bm_copy(struct memory_bitmap *source, struct memory_bitmap *dest)
3642 +{
3643 +       unsigned long pfn;
3644 +
3645 +       memory_bm_position_reset(source);
3646 +       pfn = memory_bm_next_pfn(source);
3647 +       while (pfn != BM_END_OF_MAP) {
3648 +               memory_bm_set_bit(dest, pfn);
3649 +               pfn = memory_bm_next_pfn(source);
3650 +       }
3651 +}
3652 +EXPORT_SYMBOL_GPL(memory_bm_copy);
3653 +
3654 +void memory_bm_dup(struct memory_bitmap *source, struct memory_bitmap *dest)
3655 +{
3656 +       memory_bm_clear(dest);
3657 +       memory_bm_copy(source, dest);
3658 +}
3659 +EXPORT_SYMBOL_GPL(memory_bm_dup);
3660 +
3661 +#ifdef CONFIG_TOI
3662 +int memory_bm_write(struct memory_bitmap *bm, int (*rw_chunk)
3663 +       (int rw, struct toi_module_ops *owner, char *buffer, int buffer_size))
3664 +{
3665 +       int result = 0;
3666 +       unsigned int nr = 0;
3667 +       struct zone_bitmap *zone_bm;
3668 +       struct bm_block *bb;
3669 +
3670 +       if (!bm)
3671 +               return result;
3672 +
3673 +       for (zone_bm = bm->zone_bm_list; zone_bm; zone_bm = zone_bm->next)
3674 +               nr++;
3675 +
3676 +       result = (*rw_chunk)(WRITE, NULL, (char *) &nr, sizeof(unsigned int));
3677 +       if (result)
3678 +               return result;
3679 +
3680 +       for (zone_bm = bm->zone_bm_list; zone_bm; zone_bm = zone_bm->next) {
3681 +               result = (*rw_chunk)(WRITE, NULL, (char *) &zone_bm->start_pfn,
3682 +                               2 * sizeof(unsigned long));
3683 +               if (result)
3684 +                       return result;
3685 +
3686 +               nr = 0;
3687 +               for (bb = zone_bm->bm_blocks; bb; bb = bb->next)
3688 +                       nr++;
3689 +
3690 +               result = (*rw_chunk)(WRITE, NULL, (char *) &nr,
3691 +                               sizeof(unsigned int));
3692  
3693 -static LIST_HEAD(nosave_regions);
3694 +               if (result)
3695 +                       return result;
3696 +
3697 +               for (bb = zone_bm->bm_blocks; bb; bb = bb->next) {
3698 +                       result = (*rw_chunk)(WRITE, NULL, (char *) bb->data,
3699 +                                       PAGE_SIZE);
3700 +                       if (result)
3701 +                               return result;
3702 +               }
3703 +       }
3704 +
3705 +       return 0;
3706 +}
3707 +EXPORT_SYMBOL_GPL(memory_bm_write);
3708 +
3709 +int memory_bm_read(struct memory_bitmap *bm, int (*rw_chunk)
3710 +       (int rw, struct toi_module_ops *owner, char *buffer, int buffer_size))
3711 +{
3712 +       int result = 0;
3713 +       unsigned int nr;
3714 +       struct zone_bitmap *zone_bm;
3715 +       struct bm_block *bb;
3716 +       struct chain_allocator ca;
3717 +
3718 +       if (!bm)
3719 +               return result;
3720 +
3721 +       chain_init(&ca, GFP_ATOMIC, 0);
3722 +
3723 +       result = (*rw_chunk)(READ, NULL, (char *) &nr, sizeof(unsigned int));
3724 +       if (result)
3725 +               return result;
3726 +
3727 +       zone_bm = create_zone_bm_list(nr, &ca);
3728 +       bm->zone_bm_list = zone_bm;
3729 +       if (!zone_bm) {
3730 +               chain_free(&ca, PG_ANY);
3731 +               return -ENOMEM;
3732 +       }
3733 +
3734 +       for (zone_bm = bm->zone_bm_list; zone_bm; zone_bm = zone_bm->next) {
3735 +               unsigned long pfn;
3736 +
3737 +               result = (*rw_chunk)(READ, NULL, (char *) &zone_bm->start_pfn,
3738 +                               2 * sizeof(unsigned long));
3739 +               if (result)
3740 +                       return result;
3741 +
3742 +               result = (*rw_chunk)(READ, NULL, (char *) &nr,
3743 +                               sizeof(unsigned int));
3744 +
3745 +               if (result)
3746 +                       return result;
3747 +
3748 +               bb = create_bm_block_list(nr, &ca);
3749 +               zone_bm->bm_blocks = bb;
3750 +               zone_bm->cur_block = bb;
3751 +               if (!bb)
3752 +                       goto Free;
3753 +
3754 +               pfn = zone_bm->start_pfn;
3755 +
3756 +               for (bb = zone_bm->bm_blocks; bb; bb = bb->next) {
3757 +                       bb->data = get_image_page(GFP_ATOMIC, 0);
3758 +                       if (!bb->data)
3759 +                               goto Free;
3760 +
3761 +                       bb->start_pfn = pfn;
3762 +                       if (pfn + BM_BITS_PER_BLOCK > zone_bm->end_pfn)
3763 +                               bb->end_pfn = zone_bm->end_pfn;
3764 +                       else
3765 +                               bb->end_pfn = bb->start_pfn + BM_BITS_PER_BLOCK;
3766 +                       pfn = bb->end_pfn;
3767 +                       result = (*rw_chunk)(READ, NULL, (char *) bb->data,
3768 +                                       PAGE_SIZE);
3769 +                       if (result)
3770 +                               return result;
3771 +               }
3772 +       }
3773 +       bm->p_list = ca.chain;
3774 +       memory_bm_position_reset(bm);
3775 +
3776 +       return 0;
3777 +
3778 +Free:
3779 +       bm->p_list = ca.chain;
3780 +       memory_bm_free(bm, PG_ANY);
3781 +       return -ENOMEM;
3782 +}
3783 +EXPORT_SYMBOL_GPL(memory_bm_read);
3784 +#endif
3785 +
3786 +LIST_HEAD(nosave_regions);
3787 +EXPORT_SYMBOL_GPL(nosave_regions);
3788  
3789  /**
3790   *     register_nosave_region - register a range of page frames the contents
3791 @@ -809,7 +939,7 @@ static unsigned int count_free_highmem_pages(void)
3792   *     and it isn't a part of a free chunk of pages.
3793   */
3794  
3795 -static struct page *saveable_highmem_page(unsigned long pfn)
3796 +struct page *saveable_highmem_page(unsigned long pfn)
3797  {
3798         struct page *page;
3799  
3800 @@ -826,6 +956,7 @@ static struct page *saveable_highmem_page(unsigned long pfn)
3801  
3802         return page;
3803  }
3804 +EXPORT_SYMBOL_GPL(saveable_highmem_page);
3805  
3806  /**
3807   *     count_highmem_pages - compute the total number of saveable highmem
3808 @@ -851,8 +982,6 @@ unsigned int count_highmem_pages(void)
3809         }
3810         return n;
3811  }
3812 -#else
3813 -static inline void *saveable_highmem_page(unsigned long pfn) { return NULL; }
3814  #endif /* CONFIG_HIGHMEM */
3815  
3816  /**
3817 @@ -864,7 +993,7 @@ static inline void *saveable_highmem_page(unsigned long pfn) { return NULL; }
3818   *     a free chunk of pages.
3819   */
3820  
3821 -static struct page *saveable_page(unsigned long pfn)
3822 +struct page *saveable_page(unsigned long pfn)
3823  {
3824         struct page *page;
3825  
3826 @@ -884,6 +1013,7 @@ static struct page *saveable_page(unsigned long pfn)
3827  
3828         return page;
3829  }
3830 +EXPORT_SYMBOL_GPL(saveable_page);
3831  
3832  /**
3833   *     count_data_pages - compute the total number of saveable non-highmem
3834 @@ -1198,6 +1328,9 @@ asmlinkage int swsusp_save(void)
3835  {
3836         unsigned int nr_pages, nr_highmem;
3837  
3838 +       if (toi_running)
3839 +               return toi_post_context_save();
3840 +
3841         printk(KERN_INFO "PM: Creating hibernation image: \n");
3842  
3843         drain_local_pages(NULL);
3844 @@ -1238,14 +1371,14 @@ asmlinkage int swsusp_save(void)
3845  }
3846  
3847  #ifndef CONFIG_ARCH_HIBERNATION_HEADER
3848 -static int init_header_complete(struct swsusp_info *info)
3849 +int init_swsusp_header_complete(struct swsusp_info *info)
3850  {
3851         memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname));
3852         info->version_code = LINUX_VERSION_CODE;
3853         return 0;
3854  }
3855  
3856 -static char *check_image_kernel(struct swsusp_info *info)
3857 +char *check_swsusp_image_kernel(struct swsusp_info *info)
3858  {
3859         if (info->version_code != LINUX_VERSION_CODE)
3860                 return "kernel version";
3861 @@ -1259,6 +1392,7 @@ static char *check_image_kernel(struct swsusp_info *info)
3862                 return "machine";
3863         return NULL;
3864  }
3865 +EXPORT_SYMBOL_GPL(check_swsusp_image_kernel);
3866  #endif /* CONFIG_ARCH_HIBERNATION_HEADER */
3867  
3868  unsigned long snapshot_get_image_size(void)
3869 @@ -1266,7 +1400,7 @@ unsigned long snapshot_get_image_size(void)
3870         return nr_copy_pages + nr_meta_pages + 1;
3871  }
3872  
3873 -static int init_header(struct swsusp_info *info)
3874 +int init_swsusp_header(struct swsusp_info *info)
3875  {
3876         memset(info, 0, sizeof(struct swsusp_info));
3877         info->num_physpages = num_physpages;
3878 @@ -1274,8 +1408,9 @@ static int init_header(struct swsusp_info *info)
3879         info->pages = snapshot_get_image_size();
3880         info->size = info->pages;
3881         info->size <<= PAGE_SHIFT;
3882 -       return init_header_complete(info);
3883 +       return init_swsusp_header_complete(info);
3884  }
3885 +EXPORT_SYMBOL_GPL(init_swsusp_header);
3886  
3887  /**
3888   *     pack_pfns - pfns corresponding to the set bits found in the bitmap @bm
3889 @@ -1330,7 +1465,7 @@ int snapshot_read_next(struct snapshot_handle *handle, size_t count)
3890         if (!handle->offset) {
3891                 int error;
3892  
3893 -               error = init_header((struct swsusp_info *)buffer);
3894 +               error = init_swsusp_header((struct swsusp_info *)buffer);
3895                 if (error)
3896                         return error;
3897                 handle->buffer = buffer;
3898 @@ -1427,7 +1562,7 @@ static int check_header(struct swsusp_info *info)
3899  {
3900         char *reason;
3901  
3902 -       reason = check_image_kernel(info);
3903 +       reason = check_swsusp_image_kernel(info);
3904         if (!reason && info->num_physpages != num_physpages)
3905                 reason = "memory size";
3906         if (reason) {
3907 diff --git a/kernel/power/tuxonice.h b/kernel/power/tuxonice.h
3908 new file mode 100644
3909 index 0000000..34ddaa9
3910 --- /dev/null
3911 +++ b/kernel/power/tuxonice.h
3912 @@ -0,0 +1,210 @@
3913 +/*
3914 + * kernel/power/tuxonice.h
3915 + *
3916 + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net)
3917 + *
3918 + * This file is released under the GPLv2.
3919 + *
3920 + * It contains declarations used throughout swsusp.
3921 + *
3922 + */
3923 +
3924 +#ifndef KERNEL_POWER_TOI_H
3925 +#define KERNEL_POWER_TOI_H
3926 +
3927 +#include <linux/delay.h>
3928 +#include <linux/bootmem.h>
3929 +#include <linux/suspend.h>
3930 +#include <linux/fs.h>
3931 +#include <linux/kmod.h>
3932 +#include <asm/setup.h>
3933 +#include "tuxonice_pageflags.h"
3934 +
3935 +#define TOI_CORE_VERSION "3.0-rc8"
3936 +
3937 +#define MY_BOOT_KERNEL_DATA_VERSION 1
3938 +
3939 +struct toi_boot_kernel_data {
3940 +       int version;
3941 +       int size;
3942 +       unsigned long toi_action;
3943 +       unsigned long toi_debug_state;
3944 +       u32 toi_default_console_level;
3945 +       int toi_io_time[2][2];
3946 +       char toi_nosave_commandline[COMMAND_LINE_SIZE];
3947 +};
3948 +
3949 +extern struct toi_boot_kernel_data toi_bkd;
3950 +
3951 +/* Location of book kernel data struct in kernel being resumed */
3952 +extern unsigned long boot_kernel_data_buffer;
3953 +
3954 +/*              == Action states ==            */
3955 +
3956 +enum {
3957 +       TOI_REBOOT,
3958 +       TOI_PAUSE,
3959 +       TOI_LOGALL,
3960 +       TOI_CAN_CANCEL,
3961 +       TOI_KEEP_IMAGE,
3962 +       TOI_FREEZER_TEST,
3963 +       TOI_SINGLESTEP,
3964 +       TOI_PAUSE_NEAR_PAGESET_END,
3965 +       TOI_TEST_FILTER_SPEED,
3966 +       TOI_TEST_BIO,
3967 +       TOI_NO_PAGESET2,
3968 +       TOI_PM_PREPARE_CONSOLE,
3969 +       TOI_IGNORE_ROOTFS,
3970 +       TOI_REPLACE_SWSUSP,
3971 +       TOI_PAGESET2_FULL,
3972 +       TOI_ABORT_ON_RESAVE_NEEDED,
3973 +       TOI_NO_MULTITHREADED_IO,
3974 +       TOI_NO_DIRECT_LOAD,
3975 +       TOI_LATE_CPU_HOTPLUG,
3976 +       TOI_GET_MAX_MEM_ALLOCD,
3977 +       TOI_NO_FLUSHER_THREAD,
3978 +       TOI_NO_PS2_IF_UNNEEDED
3979 +};
3980 +
3981 +#define clear_action_state(bit) (test_and_clear_bit(bit, &toi_bkd.toi_action))
3982 +
3983 +/*              == Result states ==            */
3984 +
3985 +enum {
3986 +       TOI_ABORTED,
3987 +       TOI_ABORT_REQUESTED,
3988 +       TOI_NOSTORAGE_AVAILABLE,
3989 +       TOI_INSUFFICIENT_STORAGE,
3990 +       TOI_FREEZING_FAILED,
3991 +       TOI_KEPT_IMAGE,
3992 +       TOI_WOULD_EAT_MEMORY,
3993 +       TOI_UNABLE_TO_FREE_ENOUGH_MEMORY,
3994 +       TOI_PM_SEM,
3995 +       TOI_DEVICE_REFUSED,
3996 +       TOI_EXTRA_PAGES_ALLOW_TOO_SMALL,
3997 +       TOI_UNABLE_TO_PREPARE_IMAGE,
3998 +       TOI_FAILED_MODULE_INIT,
3999 +       TOI_FAILED_MODULE_CLEANUP,
4000 +       TOI_FAILED_IO,
4001 +       TOI_OUT_OF_MEMORY,
4002 +       TOI_IMAGE_ERROR,
4003 +       TOI_PLATFORM_PREP_FAILED,
4004 +       TOI_CPU_HOTPLUG_FAILED,
4005 +       TOI_ARCH_PREPARE_FAILED,
4006 +       TOI_RESAVE_NEEDED,
4007 +       TOI_CANT_SUSPEND,
4008 +       TOI_NOTIFIERS_PREPARE_FAILED,
4009 +       TOI_PRE_SNAPSHOT_FAILED,
4010 +       TOI_PRE_RESTORE_FAILED,
4011 +       TOI_CANT_USE_ALT_RESUME,
4012 +       TOI_NUM_RESULT_STATES   /* Used in printing debug info only */
4013 +};
4014 +
4015 +extern unsigned long toi_result;
4016 +
4017 +#define set_result_state(bit) (test_and_set_bit(bit, &toi_result))
4018 +#define set_abort_result(bit) (test_and_set_bit(TOI_ABORTED, &toi_result), \
4019 +                               test_and_set_bit(bit, &toi_result))
4020 +#define clear_result_state(bit) (test_and_clear_bit(bit, &toi_result))
4021 +#define test_result_state(bit) (test_bit(bit, &toi_result))
4022 +
4023 +/*      == Debug sections and levels ==        */
4024 +
4025 +/* debugging levels. */
4026 +enum {
4027 +       TOI_STATUS = 0,
4028 +       TOI_ERROR = 2,
4029 +       TOI_LOW,
4030 +       TOI_MEDIUM,
4031 +       TOI_HIGH,
4032 +       TOI_VERBOSE,
4033 +};
4034 +
4035 +enum {
4036 +       TOI_ANY_SECTION,
4037 +       TOI_EAT_MEMORY,
4038 +       TOI_IO,
4039 +       TOI_HEADER,
4040 +       TOI_WRITER,
4041 +       TOI_MEMORY,
4042 +};
4043 +
4044 +#define set_debug_state(bit) (test_and_set_bit(bit, &toi_bkd.toi_debug_state))
4045 +#define clear_debug_state(bit) \
4046 +       (test_and_clear_bit(bit, &toi_bkd.toi_debug_state))
4047 +#define test_debug_state(bit) (test_bit(bit, &toi_bkd.toi_debug_state))
4048 +
4049 +/*             == Steps in hibernating ==      */
4050 +
4051 +enum {
4052 +       STEP_HIBERNATE_PREPARE_IMAGE,
4053 +       STEP_HIBERNATE_SAVE_IMAGE,
4054 +       STEP_HIBERNATE_POWERDOWN,
4055 +       STEP_RESUME_CAN_RESUME,
4056 +       STEP_RESUME_LOAD_PS1,
4057 +       STEP_RESUME_DO_RESTORE,
4058 +       STEP_RESUME_READ_PS2,
4059 +       STEP_RESUME_GO,
4060 +       STEP_RESUME_ALT_IMAGE,
4061 +       STEP_CLEANUP,
4062 +       STEP_QUIET_CLEANUP
4063 +};
4064 +
4065 +/*             == TuxOnIce states ==
4066 +       (see also include/linux/suspend.h)      */
4067 +
4068 +#define get_toi_state()  (toi_state)
4069 +#define restore_toi_state(saved_state) \
4070 +       do { toi_state = saved_state; } while (0)
4071 +
4072 +/*             == Module support ==            */
4073 +
4074 +struct toi_core_fns {
4075 +       int (*post_context_save)(void);
4076 +       unsigned long (*get_nonconflicting_page)(void);
4077 +       int (*try_hibernate)(void);
4078 +       void (*try_resume)(void);
4079 +};
4080 +
4081 +extern struct toi_core_fns *toi_core_fns;
4082 +
4083 +/*             == All else ==                  */
4084 +#define KB(x) ((x) << (PAGE_SHIFT - 10))
4085 +#define MB(x) ((x) >> (20 - PAGE_SHIFT))
4086 +
4087 +extern int toi_start_anything(int toi_or_resume);
4088 +extern void toi_finish_anything(int toi_or_resume);
4089 +
4090 +extern int save_image_part1(void);
4091 +extern int toi_atomic_restore(void);
4092 +
4093 +extern int _toi_try_hibernate(void);
4094 +extern void __toi_try_resume(void);
4095 +
4096 +extern int __toi_post_context_save(void);
4097 +
4098 +extern unsigned int nr_hibernates;
4099 +extern char alt_resume_param[256];
4100 +
4101 +extern void copyback_post(void);
4102 +extern int toi_hibernate(void);
4103 +extern long extra_pd1_pages_used;
4104 +
4105 +#define SECTOR_SIZE 512
4106 +
4107 +extern void toi_early_boot_message(int can_erase_image, int default_answer,
4108 +       char *warning_reason, ...);
4109 +
4110 +static inline int load_direct(struct page *page)
4111 +{
4112 +       return test_action_state(TOI_NO_DIRECT_LOAD) ? 0 :
4113 +               PagePageset1Copy(page);
4114 +}
4115 +
4116 +extern int do_check_can_resume(void);
4117 +extern int do_toi_step(int step);
4118 +extern int toi_launch_userspace_program(char *command, int channel_no,
4119 +               enum umh_wait wait, int debug);
4120 +
4121 +extern char *tuxonice_signature;
4122 +#endif
4123 diff --git a/kernel/power/tuxonice_alloc.c b/kernel/power/tuxonice_alloc.c
4124 new file mode 100644
4125 index 0000000..5e3aae8
4126 --- /dev/null
4127 +++ b/kernel/power/tuxonice_alloc.c
4128 @@ -0,0 +1,287 @@
4129 +/*
4130 + * kernel/power/tuxonice_alloc.c
4131 + *
4132 + * Copyright (C) 2008 Nigel Cunningham (nigel at tuxonice net)
4133 + *
4134 + * This file is released under the GPLv2.
4135 + *
4136 + */
4137 +
4138 +#ifdef CONFIG_PM_DEBUG
4139 +#include <linux/module.h>
4140 +#include <linux/slab.h>
4141 +#include "tuxonice_modules.h"
4142 +#include "tuxonice_alloc.h"
4143 +#include "tuxonice_sysfs.h"
4144 +#include "tuxonice.h"
4145 +
4146 +#define TOI_ALLOC_PATHS 39
4147 +
4148 +static DEFINE_MUTEX(toi_alloc_mutex);
4149 +
4150 +static struct toi_module_ops toi_alloc_ops;
4151 +
4152 +static int toi_fail_num;
4153 +static atomic_t toi_alloc_count[TOI_ALLOC_PATHS],
4154 +               toi_free_count[TOI_ALLOC_PATHS],
4155 +               toi_test_count[TOI_ALLOC_PATHS],
4156 +               toi_fail_count[TOI_ALLOC_PATHS];
4157 +static int toi_cur_allocd[TOI_ALLOC_PATHS], toi_max_allocd[TOI_ALLOC_PATHS];
4158 +static int cur_allocd, max_allocd;
4159 +
4160 +static char *toi_alloc_desc[TOI_ALLOC_PATHS] = {
4161 +       "", /* 0 */
4162 +       "get_io_info_struct",
4163 +       "extent",
4164 +       "extent (loading chain)",
4165 +       "userui channel",
4166 +       "userui arg", /* 5 */
4167 +       "attention list metadata",
4168 +       "extra pagedir memory metadata",
4169 +       "bdev metadata",
4170 +       "extra pagedir memory",
4171 +       "header_locations_read", /* 10 */
4172 +       "bio queue",
4173 +       "prepare_readahead",
4174 +       "i/o buffer",
4175 +       "writer buffer in bio_init",
4176 +       "checksum buffer", /* 15 */
4177 +       "compression buffer",
4178 +       "filewriter signature op",
4179 +       "set resume param alloc1",
4180 +       "set resume param alloc2",
4181 +       "debugging info buffer", /* 20 */
4182 +       "check can resume buffer",
4183 +       "write module config buffer",
4184 +       "read module config buffer",
4185 +       "write image header buffer",
4186 +       "read pageset1 buffer", /* 25 */
4187 +       "get_have_image_data buffer",
4188 +       "checksum page",
4189 +       "worker rw loop",
4190 +       "get nonconflicting page",
4191 +       "ps1 load addresses", /* 30 */
4192 +       "remove swap image",
4193 +       "swap image exists",
4194 +       "swap parse sig location",
4195 +       "sysfs kobj",
4196 +       "swap mark resume attempted buffer", /* 35 */
4197 +       "cluster member",
4198 +       "boot kernel data buffer",
4199 +       "setting swap signature"
4200 +};
4201 +
4202 +#define MIGHT_FAIL(FAIL_NUM, FAIL_VAL) \
4203 +       do { \
4204 +               BUG_ON(FAIL_NUM >= TOI_ALLOC_PATHS); \
4205 +               \
4206 +               if (FAIL_NUM == toi_fail_num) { \
4207 +                       atomic_inc(&toi_test_count[FAIL_NUM]); \
4208 +                       toi_fail_num = 0; \
4209 +                       return FAIL_VAL; \
4210 +               } \
4211 +       } while (0)
4212 +
4213 +static void alloc_update_stats(int fail_num, void *result)
4214 +{
4215 +       if (!result) {
4216 +               atomic_inc(&toi_fail_count[fail_num]);
4217 +               return;
4218 +       }
4219 +
4220 +       atomic_inc(&toi_alloc_count[fail_num]);
4221 +       if (unlikely(test_action_state(TOI_GET_MAX_MEM_ALLOCD))) {
4222 +               mutex_lock(&toi_alloc_mutex);
4223 +               toi_cur_allocd[fail_num]++;
4224 +               cur_allocd++;
4225 +               if (unlikely(cur_allocd > max_allocd)) {
4226 +                       int i;
4227 +
4228 +                       for (i = 0; i < TOI_ALLOC_PATHS; i++)
4229 +                               toi_max_allocd[i] = toi_cur_allocd[i];
4230 +                       max_allocd = cur_allocd;
4231 +               }
4232 +               mutex_unlock(&toi_alloc_mutex);
4233 +       }
4234 +}
4235 +
4236 +static void free_update_stats(int fail_num)
4237 +{
4238 +       BUG_ON(fail_num >= TOI_ALLOC_PATHS);
4239 +       atomic_inc(&toi_free_count[fail_num]);
4240 +       if (unlikely(test_action_state(TOI_GET_MAX_MEM_ALLOCD))) {
4241 +               mutex_lock(&toi_alloc_mutex);
4242 +               cur_allocd--;
4243 +               toi_cur_allocd[fail_num]--;
4244 +               mutex_unlock(&toi_alloc_mutex);
4245 +       }
4246 +}
4247 +
4248 +void *toi_kzalloc(int fail_num, size_t size, gfp_t flags)
4249 +{
4250 +       void *result;
4251 +
4252 +       if (toi_alloc_ops.enabled)
4253 +               MIGHT_FAIL(fail_num, NULL);
4254 +       result = kzalloc(size, flags);
4255 +       if (toi_alloc_ops.enabled)
4256 +               alloc_update_stats(fail_num, result);
4257 +       return result;
4258 +}
4259 +EXPORT_SYMBOL_GPL(toi_kzalloc);
4260 +
4261 +unsigned long toi_get_free_pages(int fail_num, gfp_t mask,
4262 +               unsigned int order)
4263 +{
4264 +       unsigned long result;
4265 +
4266 +       if (toi_alloc_ops.enabled)
4267 +               MIGHT_FAIL(fail_num, 0);
4268 +       result = __get_free_pages(mask, order);
4269 +       if (toi_alloc_ops.enabled)
4270 +               alloc_update_stats(fail_num, (void *) result);
4271 +       return result;
4272 +}
4273 +EXPORT_SYMBOL_GPL(toi_get_free_pages);
4274 +
4275 +struct page *toi_alloc_page(int fail_num, gfp_t mask)
4276 +{
4277 +       struct page *result;
4278 +
4279 +       if (toi_alloc_ops.enabled)
4280 +               MIGHT_FAIL(fail_num, NULL);
4281 +       result = alloc_page(mask);
4282 +       if (toi_alloc_ops.enabled)
4283 +               alloc_update_stats(fail_num, (void *) result);
4284 +       return result;
4285 +}
4286 +EXPORT_SYMBOL_GPL(toi_alloc_page);
4287 +
4288 +unsigned long toi_get_zeroed_page(int fail_num, gfp_t mask)
4289 +{
4290 +       unsigned long result;
4291 +
4292 +       if (toi_alloc_ops.enabled)
4293 +               MIGHT_FAIL(fail_num, 0);
4294 +       result = get_zeroed_page(mask);
4295 +       if (toi_alloc_ops.enabled)
4296 +               alloc_update_stats(fail_num, (void *) result);
4297 +       return result;
4298 +}
4299 +EXPORT_SYMBOL_GPL(toi_get_zeroed_page);
4300 +
4301 +void toi_kfree(int fail_num, const void *arg)
4302 +{
4303 +       if (arg && toi_alloc_ops.enabled)
4304 +               free_update_stats(fail_num);
4305 +
4306 +       kfree(arg);
4307 +}
4308 +EXPORT_SYMBOL_GPL(toi_kfree);
4309 +
4310 +void toi_free_page(int fail_num, unsigned long virt)
4311 +{
4312 +       if (virt && toi_alloc_ops.enabled)
4313 +               free_update_stats(fail_num);
4314 +
4315 +       free_page(virt);
4316 +}
4317 +EXPORT_SYMBOL_GPL(toi_free_page);
4318 +
4319 +void toi__free_page(int fail_num, struct page *page)
4320 +{
4321 +       if (page && toi_alloc_ops.enabled)
4322 +               free_update_stats(fail_num);
4323 +
4324 +       __free_page(page);
4325 +}
4326 +EXPORT_SYMBOL_GPL(toi__free_page);
4327 +
4328 +void toi_free_pages(int fail_num, struct page *page, int order)
4329 +{
4330 +       if (page && toi_alloc_ops.enabled)
4331 +               free_update_stats(fail_num);
4332 +
4333 +       __free_pages(page, order);
4334 +}
4335 +
4336 +void toi_alloc_print_debug_stats(void)
4337 +{
4338 +       int i, header_done = 0;
4339 +
4340 +       if (!toi_alloc_ops.enabled)
4341 +               return;
4342 +
4343 +       for (i = 0; i < TOI_ALLOC_PATHS; i++)
4344 +               if (atomic_read(&toi_alloc_count[i]) !=
4345 +                   atomic_read(&toi_free_count[i])) {
4346 +                       if (!header_done) {
4347 +                               printk(KERN_INFO "Idx  Allocs   Frees   Tests "
4348 +                                       "  Fails Max     Description\n");
4349 +                               header_done = 1;
4350 +                       }
4351 +
4352 +                       printk(KERN_INFO "%3d %7d %7d %7d %7d %7d %s\n", i,
4353 +                               atomic_read(&toi_alloc_count[i]),
4354 +                               atomic_read(&toi_free_count[i]),
4355 +                               atomic_read(&toi_test_count[i]),
4356 +                               atomic_read(&toi_fail_count[i]),
4357 +                               toi_max_allocd[i],
4358 +                               toi_alloc_desc[i]);
4359 +               }
4360 +}
4361 +EXPORT_SYMBOL_GPL(toi_alloc_print_debug_stats);
4362 +
4363 +static int toi_alloc_initialise(int starting_cycle)
4364 +{
4365 +       int i;
4366 +
4367 +       if (starting_cycle && toi_alloc_ops.enabled) {
4368 +               for (i = 0; i < TOI_ALLOC_PATHS; i++) {
4369 +                       atomic_set(&toi_alloc_count[i], 0);
4370 +                       atomic_set(&toi_free_count[i], 0);
4371 +                       atomic_set(&toi_test_count[i], 0);
4372 +                       atomic_set(&toi_fail_count[i], 0);
4373 +                       toi_cur_allocd[i] = 0;
4374 +                       toi_max_allocd[i] = 0;
4375 +               };
4376 +               max_allocd = 0;
4377 +               cur_allocd = 0;
4378 +       }
4379 +
4380 +       return 0;
4381 +}
4382 +
4383 +static struct toi_sysfs_data sysfs_params[] = {
4384 +       SYSFS_INT("failure_test", SYSFS_RW, &toi_fail_num, 0, 99, 0, NULL),
4385 +       SYSFS_BIT("find_max_mem_allocated", SYSFS_RW, &toi_bkd.toi_action,
4386 +                       TOI_GET_MAX_MEM_ALLOCD, 0),
4387 +       SYSFS_INT("enabled", SYSFS_RW, &toi_alloc_ops.enabled, 0, 1, 0,
4388 +                       NULL)
4389 +};
4390 +
4391 +static struct toi_module_ops toi_alloc_ops = {
4392 +       .type                                   = MISC_HIDDEN_MODULE,
4393 +       .name                                   = "allocation debugging",
4394 +       .directory                              = "alloc",
4395 +       .module                                 = THIS_MODULE,
4396 +       .early                                  = 1,
4397 +       .initialise                             = toi_alloc_initialise,
4398 +
4399 +       .sysfs_data             = sysfs_params,
4400 +       .num_sysfs_entries      = sizeof(sysfs_params) /
4401 +               sizeof(struct toi_sysfs_data),
4402 +};
4403 +
4404 +int toi_alloc_init(void)
4405 +{
4406 +       int result = toi_register_module(&toi_alloc_ops);
4407 +       toi_alloc_ops.enabled = 0;
4408 +       return result;
4409 +}
4410 +
4411 +void toi_alloc_exit(void)
4412 +{
4413 +       toi_unregister_module(&toi_alloc_ops);
4414 +}
4415 +#endif
4416 diff --git a/kernel/power/tuxonice_alloc.h b/kernel/power/tuxonice_alloc.h
4417 new file mode 100644
4418 index 0000000..a1dd8ff
4419 --- /dev/null
4420 +++ b/kernel/power/tuxonice_alloc.h
4421 @@ -0,0 +1,51 @@
4422 +/*
4423 + * kernel/power/tuxonice_alloc.h
4424 + *
4425 + * Copyright (C) 2008 Nigel Cunningham (nigel at tuxonice net)
4426 + *
4427 + * This file is released under the GPLv2.
4428 + *
4429 + */
4430 +
4431 +#define TOI_WAIT_GFP (GFP_KERNEL | __GFP_NOWARN)
4432 +#define TOI_ATOMIC_GFP (GFP_ATOMIC | __GFP_NOWARN)
4433 +
4434 +#ifdef CONFIG_PM_DEBUG
4435 +extern void *toi_kzalloc(int fail_num, size_t size, gfp_t flags);
4436 +extern void toi_kfree(int fail_num, const void *arg);
4437 +
4438 +extern unsigned long toi_get_free_pages(int fail_num, gfp_t mask,
4439 +               unsigned int order);
4440 +#define toi_get_free_page(FAIL_NUM, MASK) toi_get_free_pages(FAIL_NUM, MASK, 0)
4441 +extern unsigned long toi_get_zeroed_page(int fail_num, gfp_t mask);
4442 +extern void toi_free_page(int fail_num, unsigned long buf);
4443 +extern void toi__free_page(int fail_num, struct page *page);
4444 +extern void toi_free_pages(int fail_num, struct page *page, int order);
4445 +extern struct page *toi_alloc_page(int fail_num, gfp_t mask);
4446 +extern int toi_alloc_init(void);
4447 +extern void toi_alloc_exit(void);
4448 +
4449 +extern void toi_alloc_print_debug_stats(void);
4450 +
4451 +#else /* CONFIG_PM_DEBUG */
4452 +
4453 +#define toi_kzalloc(FAIL, SIZE, FLAGS) (kzalloc(SIZE, FLAGS))
4454 +#define toi_kfree(FAIL, ALLOCN) (kfree(ALLOCN))
4455 +
4456 +#define toi_get_free_pages(FAIL, FLAGS, ORDER) __get_free_pages(FLAGS, ORDER)
4457 +#define toi_get_free_page(FAIL, FLAGS) __get_free_page(FLAGS)
4458 +#define toi_get_zeroed_page(FAIL, FLAGS) get_zeroed_page(FLAGS)
4459 +#define toi_free_page(FAIL, ALLOCN) do { free_page(ALLOCN); } while (0)
4460 +#define toi__free_page(FAIL, PAGE) __free_page(PAGE)
4461 +#define toi_free_pages(FAIL, PAGE, ORDER) __free_pages(PAGE, ORDER)
4462 +#define toi_alloc_page(FAIL, MASK) alloc_page(MASK)
4463 +static inline int toi_alloc_init(void)
4464 +{
4465 +       return 0;
4466 +}
4467 +
4468 +static inline void toi_alloc_exit(void) { }
4469 +
4470 +static inline void toi_alloc_print_debug_stats(void) { }
4471 +
4472 +#endif
4473 diff --git a/kernel/power/tuxonice_atomic_copy.c b/kernel/power/tuxonice_atomic_copy.c
4474 new file mode 100644
4475 index 0000000..64e5749
4476 --- /dev/null
4477 +++ b/kernel/power/tuxonice_atomic_copy.c
4478 @@ -0,0 +1,398 @@
4479 +/*
4480 + * kernel/power/tuxonice_atomic_copy.c
4481 + *
4482 + * Copyright 2004-2008 Nigel Cunningham (nigel at tuxonice net)
4483 + * Copyright (C) 2006 Red Hat, inc.
4484 + *
4485 + * Distributed under GPLv2.
4486 + *
4487 + * Routines for doing the atomic save/restore.
4488 + */
4489 +
4490 +#include <linux/suspend.h>
4491 +#include <linux/highmem.h>
4492 +#include <linux/cpu.h>
4493 +#include <linux/freezer.h>
4494 +#include <linux/console.h>
4495 +#include <linux/ftrace.h>
4496 +#include "tuxonice.h"
4497 +#include "tuxonice_storage.h"
4498 +#include "tuxonice_power_off.h"
4499 +#include "tuxonice_ui.h"
4500 +#include "power.h"
4501 +#include "tuxonice_io.h"
4502 +#include "tuxonice_prepare_image.h"
4503 +#include "tuxonice_pageflags.h"
4504 +#include "tuxonice_checksum.h"
4505 +#include "tuxonice_builtin.h"
4506 +#include "tuxonice_atomic_copy.h"
4507 +#include "tuxonice_alloc.h"
4508 +
4509 +long extra_pd1_pages_used;
4510 +static int ftrace_save;
4511 +
4512 +/**
4513 + * free_pbe_list: Free page backup entries used by the atomic copy code.
4514 + *
4515 + * Normally, this function isn't used. If, however, we need to abort before
4516 + * doing the atomic copy, we use this to free the pbes previously allocated.
4517 + **/
4518 +static void free_pbe_list(struct pbe **list, int highmem)
4519 +{
4520 +       while (*list) {
4521 +               int i;
4522 +               struct pbe *free_pbe, *next_page = NULL;
4523 +               struct page *page;
4524 +
4525 +               if (highmem) {
4526 +                       page = (struct page *) *list;
4527 +                       free_pbe = (struct pbe *) kmap(page);
4528 +               } else {
4529 +                       page = virt_to_page(*list);
4530 +                       free_pbe = *list;
4531 +               }
4532 +
4533 +               for (i = 0; i < PBES_PER_PAGE; i++) {
4534 +                       if (!free_pbe)
4535 +                               break;
4536 +                       if (highmem)
4537 +                               toi__free_page(29, free_pbe->address);
4538 +                       else
4539 +                               toi_free_page(29,
4540 +                                       (unsigned long) free_pbe->address);
4541 +                       free_pbe = free_pbe->next;
4542 +               }
4543 +
4544 +               if (highmem) {
4545 +                       if (free_pbe)
4546 +                               next_page = free_pbe;
4547 +                       kunmap(page);
4548 +               } else {
4549 +                       if (free_pbe)
4550 +                               next_page = free_pbe;
4551 +               }
4552 +
4553 +               toi__free_page(29, page);
4554 +               *list = (struct pbe *) next_page;
4555 +       };
4556 +}
4557 +
4558 +/**
4559 + * copyback_post: Post atomic-restore actions.
4560 + *
4561 + * After doing the atomic restore, we have a few more things to do:
4562 + * 1) We want to retain some values across the restore, so we now copy
4563 + * these from the nosave variables to the normal ones.
4564 + * 2) Set the status flags.
4565 + * 3) Resume devices.
4566 + * 4) Tell userui so it can redraw & restore settings.
4567 + * 5) Reread the page cache.
4568 + **/
4569 +
4570 +void copyback_post(void)
4571 +{
4572 +       struct toi_boot_kernel_data *bkd =
4573 +               (struct toi_boot_kernel_data *) boot_kernel_data_buffer;
4574 +
4575 +       /*
4576 +        * The boot kernel's data may be larger (newer version) or
4577 +        * smaller (older version) than ours. Copy the minimum
4578 +        * of the two sizes, so that we don't overwrite valid values
4579 +        * from pre-atomic copy.
4580 +        */
4581 +
4582 +       memcpy(&toi_bkd, (char *) boot_kernel_data_buffer,
4583 +                       min_t(int, sizeof(struct toi_boot_kernel_data),
4584 +                               bkd->size));
4585 +
4586 +       if (toi_activate_storage(1))
4587 +               panic("Failed to reactivate our storage.");
4588 +
4589 +       toi_ui_post_atomic_restore();
4590 +
4591 +       toi_cond_pause(1, "About to reload secondary pagedir.");
4592 +
4593 +       if (read_pageset2(0))
4594 +               panic("Unable to successfully reread the page cache.");
4595 +
4596 +       /*
4597 +        * If the user wants to sleep again after resuming from full-off,
4598 +        * it's most likely to be in order to suspend to ram, so we'll
4599 +        * do this check after loading pageset2, to give them the fastest
4600 +        * wakeup when they are ready to use the computer again.
4601 +        */
4602 +       toi_check_resleep();
4603 +}
4604 +
4605 +/**
4606 + * toi_copy_pageset1: Do the atomic copy of pageset1.
4607 + *
4608 + * Make the atomic copy of pageset1. We can't use copy_page (as we once did)
4609 + * because we can't be sure what side effects it has. On my old Duron, with
4610 + * 3DNOW, kernel_fpu_begin increments preempt count, making our preempt
4611 + * count at resume time 4 instead of 3.
4612 + *
4613 + * We don't want to call kmap_atomic unconditionally because it has the side
4614 + * effect of incrementing the preempt count, which will leave it one too high
4615 + * post resume (the page containing the preempt count will be copied after
4616 + * its incremented. This is essentially the same problem.
4617 + **/
4618 +
4619 +void toi_copy_pageset1(void)
4620 +{
4621 +       int i;
4622 +       unsigned long source_index, dest_index;
4623 +
4624 +       memory_bm_position_reset(&pageset1_map);
4625 +       memory_bm_position_reset(&pageset1_copy_map);
4626 +
4627 +       source_index = memory_bm_next_pfn(&pageset1_map);
4628 +       dest_index = memory_bm_next_pfn(&pageset1_copy_map);
4629 +
4630 +       for (i = 0; i < pagedir1.size; i++) {
4631 +               unsigned long *origvirt, *copyvirt;
4632 +               struct page *origpage, *copypage;
4633 +               int loop = (PAGE_SIZE / sizeof(unsigned long)) - 1,
4634 +                   was_present;
4635 +
4636 +               origpage = pfn_to_page(source_index);
4637 +               copypage = pfn_to_page(dest_index);
4638 +
4639 +               origvirt = PageHighMem(origpage) ?
4640 +                       kmap_atomic(origpage, KM_USER0) :
4641 +                       page_address(origpage);
4642 +
4643 +               copyvirt = PageHighMem(copypage) ?
4644 +                       kmap_atomic(copypage, KM_USER1) :
4645 +                       page_address(copypage);
4646 +
4647 +               was_present = kernel_page_present(origpage);
4648 +               if (!was_present)
4649 +                       kernel_map_pages(origpage, 1, 1);
4650 +
4651 +               while (loop >= 0) {
4652 +                       *(copyvirt + loop) = *(origvirt + loop);
4653 +                       loop--;
4654 +               }
4655 +
4656 +               if (!was_present)
4657 +                       kernel_map_pages(origpage, 1, 0);
4658 +
4659 +               if (PageHighMem(origpage))
4660 +                       kunmap_atomic(origvirt, KM_USER0);
4661 +
4662 +               if (PageHighMem(copypage))
4663 +                       kunmap_atomic(copyvirt, KM_USER1);
4664 +
4665 +               source_index = memory_bm_next_pfn(&pageset1_map);
4666 +               dest_index = memory_bm_next_pfn(&pageset1_copy_map);
4667 +       }
4668 +}
4669 +
4670 +/**
4671 + * __toi_post_context_save: Steps after saving the cpu context.
4672 + *
4673 + * Steps taken after saving the CPU state to make the actual
4674 + * atomic copy.
4675 + *
4676 + * Called from swsusp_save in snapshot.c via toi_post_context_save.
4677 + **/
4678 +
4679 +int __toi_post_context_save(void)
4680 +{
4681 +       long old_ps1_size = pagedir1.size;
4682 +
4683 +       check_checksums();
4684 +
4685 +       free_checksum_pages();
4686 +
4687 +       toi_recalculate_image_contents(1);
4688 +
4689 +       extra_pd1_pages_used = pagedir1.size - old_ps1_size;
4690 +
4691 +       if (extra_pd1_pages_used > extra_pd1_pages_allowance) {
4692 +               printk(KERN_INFO "Pageset1 has grown by %ld pages. "
4693 +                       "extra_pages_allowance is currently only %lu.\n",
4694 +                       pagedir1.size - old_ps1_size,
4695 +                       extra_pd1_pages_allowance);
4696 +               set_abort_result(TOI_EXTRA_PAGES_ALLOW_TOO_SMALL);
4697 +               return 1;
4698 +       }
4699 +
4700 +       if (!test_action_state(TOI_TEST_FILTER_SPEED) &&
4701 +           !test_action_state(TOI_TEST_BIO))
4702 +               toi_copy_pageset1();
4703 +
4704 +       return 0;
4705 +}
4706 +
4707 +/**
4708 + * toi_hibernate: High level code for doing the atomic copy.
4709 + *
4710 + * High-level code which prepares to do the atomic copy. Loosely based
4711 + * on the swsusp version, but with the following twists:
4712 + * - We set toi_running so the swsusp code uses our code paths.
4713 + * - We give better feedback regarding what goes wrong if there is a problem.
4714 + * - We use an extra function to call the assembly, just in case this code
4715 + *   is in a module (return address).
4716 + **/
4717 +
4718 +int toi_hibernate(void)
4719 +{
4720 +       int error;
4721 +
4722 +       toi_running = 1; /* For the swsusp code we use :< */
4723 +
4724 +       error = toi_lowlevel_builtin();
4725 +
4726 +       toi_running = 0;
4727 +       return error;
4728 +}
4729 +
4730 +/**
4731 + * toi_atomic_restore: Prepare to do the atomic restore.
4732 + *
4733 + * Get ready to do the atomic restore. This part gets us into the same
4734 + * state we are in prior to do calling do_toi_lowlevel while
4735 + * hibernating: hot-unplugging secondary cpus and freeze processes,
4736 + * before starting the thread that will do the restore.
4737 + **/
4738 +
4739 +int toi_atomic_restore(void)
4740 +{
4741 +       int error;
4742 +
4743 +       toi_running = 1;
4744 +
4745 +       toi_prepare_status(DONT_CLEAR_BAR,      "Atomic restore.");
4746 +
4747 +       memcpy(&toi_bkd.toi_nosave_commandline, saved_command_line,
4748 +               COMMAND_LINE_SIZE);
4749 +
4750 +       if (add_boot_kernel_data_pbe())
4751 +               goto Failed;
4752 +
4753 +       if (toi_go_atomic(PMSG_QUIESCE, 0))
4754 +               goto Failed;
4755 +
4756 +       /* We'll ignore saved state, but this gets preempt count (etc) right */
4757 +       save_processor_state();
4758 +
4759 +       error = swsusp_arch_resume();
4760 +       /*
4761 +        * Code below is only ever reached in case of failure. Otherwise
4762 +        * execution continues at place where swsusp_arch_suspend was called.
4763 +        *
4764 +        * We don't know whether it's safe to continue (this shouldn't happen),
4765 +        * so lets err on the side of caution.
4766 +        */
4767 +       BUG();
4768 +
4769 +Failed:
4770 +       free_pbe_list(&restore_pblist, 0);
4771 +#ifdef CONFIG_HIGHMEM
4772 +       free_pbe_list(&restore_highmem_pblist, 1);
4773 +#endif
4774 +       if (test_action_state(TOI_PM_PREPARE_CONSOLE))
4775 +               pm_restore_console();
4776 +       toi_running = 0;
4777 +       return 1;
4778 +}
4779 +
4780 +int toi_go_atomic(pm_message_t state, int suspend_time)
4781 +{
4782 +       toi_prepare_status(DONT_CLEAR_BAR, "Doing atomic copy/restore.");
4783 +
4784 +       if (suspend_time && platform_begin(1)) {
4785 +               set_abort_result(TOI_PLATFORM_PREP_FAILED);
4786 +               toi_end_atomic(ATOMIC_STEP_PLATFORM_END, suspend_time, 0);
4787 +               return 1;
4788 +       }
4789 +
4790 +       suspend_console();
4791 +       ftrace_save = __ftrace_enabled_save();
4792 +
4793 +       if (device_suspend(state)) {
4794 +               set_abort_result(TOI_DEVICE_REFUSED);
4795 +               toi_end_atomic(ATOMIC_STEP_DEVICE_RESUME, suspend_time, 3);
4796 +               return 1;
4797 +       }
4798 +
4799 +       if (suspend_time && platform_pre_snapshot(1)) {
4800 +               set_abort_result(TOI_PRE_SNAPSHOT_FAILED);
4801 +               toi_end_atomic(ATOMIC_STEP_PLATFORM_FINISH, suspend_time, 0);
4802 +               return 1;
4803 +       }
4804 +
4805 +       if (!suspend_time && platform_pre_restore(1)) {
4806 +               set_abort_result(TOI_PRE_RESTORE_FAILED);
4807 +               toi_end_atomic(ATOMIC_STEP_DEVICE_RESUME, suspend_time, 0);
4808 +               return 1;
4809 +       }
4810 +
4811 +       if (test_action_state(TOI_LATE_CPU_HOTPLUG)) {
4812 +               if (disable_nonboot_cpus()) {
4813 +                       set_abort_result(TOI_CPU_HOTPLUG_FAILED);
4814 +                       toi_end_atomic(ATOMIC_STEP_CPU_HOTPLUG,
4815 +                                       suspend_time, 0);
4816 +                       return 1;
4817 +               }
4818 +       }
4819 +
4820 +       if (suspend_time && arch_prepare_suspend()) {
4821 +               set_abort_result(TOI_ARCH_PREPARE_FAILED);
4822 +               toi_end_atomic(ATOMIC_STEP_CPU_HOTPLUG, suspend_time, 0);
4823 +               return 1;
4824 +       }
4825 +
4826 +       device_pm_lock();
4827 +       local_irq_disable();
4828 +
4829 +       /* At this point, device_suspend() has been called, but *not*
4830 +        * device_power_down(). We *must* device_power_down() now.
4831 +        * Otherwise, drivers for some devices (e.g. interrupt controllers)
4832 +        * become desynchronized with the actual state of the hardware
4833 +        * at resume time, and evil weirdness ensues.
4834 +        */
4835 +
4836 +       if (device_power_down(state)) {
4837 +               set_abort_result(TOI_DEVICE_REFUSED);
4838 +               toi_end_atomic(ATOMIC_STEP_IRQS, suspend_time, 0);
4839 +               return 1;
4840 +       }
4841 +
4842 +       return 0;
4843 +}
4844 +
4845 +void toi_end_atomic(int stage, int suspend_time, int error)
4846 +{
4847 +       switch (stage) {
4848 +       case ATOMIC_ALL_STEPS:
4849 +               if (!suspend_time)
4850 +                       platform_leave(1);
4851 +               device_power_up(suspend_time ?
4852 +                       (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
4853 +       case ATOMIC_STEP_IRQS:
4854 +               local_irq_enable();
4855 +               device_pm_unlock();
4856 +       case ATOMIC_STEP_CPU_HOTPLUG:
4857 +               if (test_action_state(TOI_LATE_CPU_HOTPLUG))
4858 +                       enable_nonboot_cpus();
4859 +       case ATOMIC_STEP_PLATFORM_FINISH:
4860 +               platform_finish(1);
4861 +       case ATOMIC_STEP_DEVICE_RESUME:
4862 +               if (suspend_time && (error & 2)) {
4863 +                       platform_recover(1);
4864 +               }
4865 +               device_resume(suspend_time ?
4866 +                       ((error & 1) ? PMSG_RECOVER : PMSG_THAW) :
4867 +                       PMSG_RESTORE);
4868 +       case ATOMIC_STEP_RESUME_CONSOLE:
4869 +               __ftrace_enabled_restore(ftrace_save);
4870 +               resume_console();
4871 +       case ATOMIC_STEP_PLATFORM_END:
4872 +               platform_end(1);
4873 +
4874 +               toi_prepare_status(DONT_CLEAR_BAR, "Post atomic.");
4875 +       }
4876 +}
4877 diff --git a/kernel/power/tuxonice_atomic_copy.h b/kernel/power/tuxonice_atomic_copy.h
4878 new file mode 100644
4879 index 0000000..8df3552
4880 --- /dev/null
4881 +++ b/kernel/power/tuxonice_atomic_copy.h
4882 @@ -0,0 +1,22 @@
4883 +/*
4884 + * kernel/power/tuxonice_atomic_copy.h
4885 + *
4886 + * Copyright 2008 Nigel Cunningham (nigel at tuxonice net)
4887 + *
4888 + * Distributed under GPLv2.
4889 + *
4890 + * Routines for doing the atomic save/restore.
4891 + */
4892 +
4893 +enum {
4894 +       ATOMIC_ALL_STEPS,
4895 +       ATOMIC_STEP_IRQS,
4896 +       ATOMIC_STEP_CPU_HOTPLUG,
4897 +       ATOMIC_STEP_PLATFORM_FINISH,
4898 +       ATOMIC_STEP_DEVICE_RESUME,
4899 +       ATOMIC_STEP_RESUME_CONSOLE,
4900 +       ATOMIC_STEP_PLATFORM_END,
4901 +};
4902 +
4903 +int toi_go_atomic(pm_message_t state, int toi_time);
4904 +void toi_end_atomic(int stage, int toi_time, int error);
4905 diff --git a/kernel/power/tuxonice_block_io.c b/kernel/power/tuxonice_block_io.c
4906 new file mode 100644
4907 index 0000000..0af7fc5
4908 --- /dev/null
4909 +++ b/kernel/power/tuxonice_block_io.c
4910 @@ -0,0 +1,1211 @@
4911 +/*
4912 + * kernel/power/tuxonice_block_io.c
4913 + *
4914 + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net)
4915 + *
4916 + * Distributed under GPLv2.
4917 + *
4918 + * This file contains block io functions for TuxOnIce. These are
4919 + * used by the swapwriter and it is planned that they will also
4920 + * be used by the NFSwriter.
4921 + *
4922 + */
4923 +
4924 +#include <linux/blkdev.h>
4925 +#include <linux/syscalls.h>
4926 +#include <linux/suspend.h>
4927 +
4928 +#include "tuxonice.h"
4929 +#include "tuxonice_sysfs.h"
4930 +#include "tuxonice_modules.h"
4931 +#include "tuxonice_prepare_image.h"
4932 +#include "tuxonice_block_io.h"
4933 +#include "tuxonice_ui.h"
4934 +#include "tuxonice_alloc.h"
4935 +#include "tuxonice_io.h"
4936 +
4937 +#define ANY_REASON 0
4938 +#define MEMORY_ONLY 1
4939 +
4940 +/* #define MEASURE_MUTEX_CONTENTION */
4941 +#ifndef MEASURE_MUTEX_CONTENTION
4942 +#define my_mutex_lock(index, the_lock) mutex_lock(the_lock)
4943 +#define my_mutex_unlock(index, the_lock) mutex_unlock(the_lock)
4944 +#else
4945 +unsigned long mutex_times[2][2][NR_CPUS];
4946 +#define my_mutex_lock(index, the_lock) do { \
4947 +       int have_mutex; \
4948 +       have_mutex = mutex_trylock(the_lock); \
4949 +       if (!have_mutex) { \
4950 +               mutex_lock(the_lock); \
4951 +               mutex_times[index][0][smp_processor_id()]++; \
4952 +       } else { \
4953 +               mutex_times[index][1][smp_processor_id()]++; \
4954 +       }
4955 +
4956 +#define my_mutex_unlock(index, the_lock) \
4957 +       mutex_unlock(the_lock); \
4958 +} while (0)
4959 +#endif
4960 +
4961 +static int target_outstanding_io = 1024;
4962 +static int max_outstanding_writes, max_outstanding_reads;
4963 +
4964 +static struct page *bio_queue_head, *bio_queue_tail;
4965 +static atomic_t toi_bio_queue_size;
4966 +static DEFINE_SPINLOCK(bio_queue_lock);
4967 +
4968 +static int free_mem_throttle, throughput_throttle;
4969 +static int more_readahead = 1;
4970 +static struct page *readahead_list_head, *readahead_list_tail;
4971 +
4972 +static struct page *waiting_on;
4973 +
4974 +static atomic_t toi_io_in_progress, toi_io_done;
4975 +static DECLARE_WAIT_QUEUE_HEAD(num_in_progress_wait);
4976 +
4977 +static int extra_page_forward;
4978 +
4979 +static int current_stream;
4980 +/* 0 = Header, 1 = Pageset1, 2 = Pageset2, 3 = End of PS1 */
4981 +struct hibernate_extent_iterate_saved_state toi_writer_posn_save[4];
4982 +EXPORT_SYMBOL_GPL(toi_writer_posn_save);
4983 +
4984 +/* Pointer to current entry being loaded/saved. */
4985 +struct toi_extent_iterate_state toi_writer_posn;
4986 +EXPORT_SYMBOL_GPL(toi_writer_posn);
4987 +
4988 +/* Not static, so that the allocators can setup and complete
4989 + * writing the header */
4990 +char *toi_writer_buffer;
4991 +EXPORT_SYMBOL_GPL(toi_writer_buffer);
4992 +
4993 +int toi_writer_buffer_posn;
4994 +EXPORT_SYMBOL_GPL(toi_writer_buffer_posn);
4995 +
4996 +static struct toi_bdev_info *toi_devinfo;
4997 +
4998 +static DEFINE_MUTEX(toi_bio_mutex);
4999 +
5000 +static struct task_struct *toi_queue_flusher;
5001 +static int toi_bio_queue_flush_pages(int dedicated_thread);
5002 +
5003 +#define TOTAL_OUTSTANDING_IO (atomic_read(&toi_io_in_progress) + \
5004 +              atomic_read(&toi_bio_queue_size))
5005 +
5006 +/**
5007 + * set_free_mem_throttle: Set the point where we pause to avoid oom.
5008 + *
5009 + * Initially, this value is zero, but when we first fail to allocate memory,
5010 + * we set it (plus a buffer) and thereafter throttle i/o once that limit is
5011 + * reached.
5012 + */
5013 +
5014 +static void set_free_mem_throttle(void)
5015 +{
5016 +       int new_throttle = nr_unallocated_buffer_pages() + 256;
5017 +
5018 +       if (new_throttle > free_mem_throttle)
5019 +               free_mem_throttle = new_throttle;
5020 +}
5021 +
5022 +#define NUM_REASONS 7
5023 +static atomic_t reasons[NUM_REASONS];
5024 +static char *reason_name[NUM_REASONS] = {
5025 +       "readahead not ready",
5026 +       "bio allocation",
5027 +       "synchronous I/O",
5028 +       "toi_bio_get_new_page",
5029 +       "memory low",
5030 +       "readahead buffer allocation",
5031 +       "throughput_throttle",
5032 +};
5033 +
5034 +/**
5035 + * do_bio_wait: Wait for some TuxOnIce i/o to complete.
5036 + *
5037 + * Submit any I/O that's batched up (if we're not already doing
5038 + * that, schedule and clean up whatever we can.
5039 + */
5040 +static void do_bio_wait(int reason)
5041 +{
5042 +       struct page *was_waiting_on = waiting_on;
5043 +
5044 +       /* On SMP, waiting_on can be reset, so we make a copy */
5045 +       if (was_waiting_on) {
5046 +               if (PageLocked(was_waiting_on)) {
5047 +                       wait_on_page_bit(was_waiting_on, PG_locked);
5048 +                       atomic_inc(&reasons[reason]);
5049 +               }
5050 +       } else {
5051 +               atomic_inc(&reasons[reason]);
5052 +
5053 +               wait_event(num_in_progress_wait,
5054 +                       !atomic_read(&toi_io_in_progress) ||
5055 +                       nr_unallocated_buffer_pages() > free_mem_throttle);
5056 +       }
5057 +}
5058 +
5059 +static void throttle_if_needed(int reason)
5060 +{
5061 +       int free_pages = nr_unallocated_buffer_pages();
5062 +
5063 +       /* Getting low on memory and I/O is in progress? */
5064 +       while (unlikely(free_pages < free_mem_throttle) &&
5065 +                       atomic_read(&toi_io_in_progress)) {
5066 +               do_bio_wait(4);
5067 +               free_pages = nr_unallocated_buffer_pages();
5068 +       }
5069 +
5070 +       while (reason == ANY_REASON && throughput_throttle &&
5071 +               TOTAL_OUTSTANDING_IO >= throughput_throttle) {
5072 +               atomic_inc(&reasons[6]);
5073 +               wait_event(num_in_progress_wait,
5074 +                       !atomic_read(&toi_io_in_progress) ||
5075 +                       TOTAL_OUTSTANDING_IO < throughput_throttle);
5076 +       }
5077 +}
5078 +
5079 +/**
5080 + * toi_monitor_outstanding_io: Show the user how much I/O we're waiting for.
5081 + */
5082 +static void toi_monitor_outstanding_io(void)
5083 +{
5084 +       int orig = TOTAL_OUTSTANDING_IO, step = orig / 5;
5085 +
5086 +       while (orig) {
5087 +               int new_min = orig > step ? orig - step : 0,
5088 +                   new_max = orig + step,
5089 +                   mb = MB(orig);
5090 +               if (mb)
5091 +                       toi_prepare_status(DONT_CLEAR_BAR,
5092 +                               "Waiting on I/O completion (%d MB)", mb);
5093 +               wait_event(num_in_progress_wait,
5094 +                       TOTAL_OUTSTANDING_IO <= new_min ||
5095 +                       TOTAL_OUTSTANDING_IO >= new_max);
5096 +               orig = TOTAL_OUTSTANDING_IO;
5097 +       }
5098 +}
5099 +
5100 +/**
5101 + * update_throughput_throttle: Update the raw throughput throttle.
5102 + *
5103 + * Called once per second by the core, used to limit the amount of I/O
5104 + * we submit at once, spreading out our waiting through the whole job
5105 + * and letting userui get an opportunity to do its work.
5106 + *
5107 + * We throttle to 1/10s worth of I/O.
5108 + */
5109 +static void update_throughput_throttle(int jif_index)
5110 +{
5111 +       int done = atomic_read(&toi_io_done);
5112 +       throughput_throttle = done / jif_index;
5113 +}
5114 +
5115 +/**
5116 + * toi_finish_all_io: Wait for all outstanding i/o to complete.
5117 + */
5118 +static void toi_finish_all_io(void)
5119 +{
5120 +       toi_bio_queue_flush_pages(0);
5121 +       wait_event(num_in_progress_wait, !TOTAL_OUTSTANDING_IO);
5122 +}
5123 +
5124 +/**
5125 + * toi_end_bio: bio completion function.
5126 + *
5127 + * @bio: bio that has completed.
5128 + * @err: Error value. Yes, like end_swap_bio_read, we ignore it.
5129 + *
5130 + * Function called by block driver from interrupt context when I/O is completed.
5131 + * Nearly the fs/buffer.c version, but we want to do our cleanup too. We only
5132 + * free pages if they were buffers used when writing the image.
5133 + */
5134 +static void toi_end_bio(struct bio *bio, int err)
5135 +{
5136 +       struct page *page = bio->bi_io_vec[0].bv_page;
5137 +
5138 +       BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags));
5139 +
5140 +       unlock_page(page);
5141 +       bio_put(bio);
5142 +
5143 +       if (waiting_on == page)
5144 +               waiting_on = NULL;
5145 +
5146 +       put_page(page);
5147 +
5148 +       if (bio->bi_private)
5149 +               toi__free_page((int) ((unsigned long) bio->bi_private) , page);
5150 +
5151 +       bio_put(bio);
5152 +
5153 +       atomic_dec(&toi_io_in_progress);
5154 +       atomic_inc(&toi_io_done);
5155 +
5156 +       wake_up(&num_in_progress_wait);
5157 +}
5158 +
5159 +/**
5160 + *     submit - submit BIO request.
5161 + *     @writing: READ or WRITE.
5162 + *
5163 + *     Based on Patrick's pmdisk code from long ago:
5164 + *     "Straight from the textbook - allocate and initialize the bio.
5165 + *     If we're writing, make sure the page is marked as dirty.
5166 + *     Then submit it and carry on."
5167 + *
5168 + *     With a twist, though - we handle block_size != PAGE_SIZE.
5169 + *     Caller has already checked that our page is not fragmented.
5170 + */
5171 +static int submit(int writing, struct block_device *dev, sector_t first_block,
5172 +               struct page *page, int free_group)
5173 +{
5174 +       struct bio *bio = NULL;
5175 +       int cur_outstanding_io;
5176 +
5177 +       throttle_if_needed(MEMORY_ONLY);
5178 +
5179 +       while (!bio) {
5180 +               bio = bio_alloc(TOI_ATOMIC_GFP, 1);
5181 +               if (!bio) {
5182 +                       set_free_mem_throttle();
5183 +                       do_bio_wait(1);
5184 +               }
5185 +       }
5186 +
5187 +       bio->bi_bdev = dev;
5188 +       bio->bi_sector = first_block;
5189 +       bio->bi_private = (void *) ((unsigned long) free_group);
5190 +       bio->bi_end_io = toi_end_bio;
5191 +
5192 +       if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
5193 +               printk(KERN_INFO "ERROR: adding page to bio at %lld\n",
5194 +                               (unsigned long long) first_block);
5195 +               bio_put(bio);
5196 +               return -EFAULT;
5197 +       }
5198 +
5199 +       bio_get(bio);
5200 +
5201 +       cur_outstanding_io = atomic_add_return(1, &toi_io_in_progress);
5202 +       if (writing) {
5203 +               if (cur_outstanding_io > max_outstanding_writes)
5204 +                       max_outstanding_writes = cur_outstanding_io;
5205 +       } else {
5206 +               if (cur_outstanding_io > max_outstanding_reads)
5207 +                       max_outstanding_reads = cur_outstanding_io;
5208 +       }
5209 +
5210 +
5211 +       if (unlikely(test_action_state(TOI_TEST_FILTER_SPEED))) {
5212 +               /* Fake having done the hard work */
5213 +               set_bit(BIO_UPTODATE, &bio->bi_flags);
5214 +               toi_end_bio(bio, 0);
5215 +       } else
5216 +               submit_bio(writing | (1 << BIO_RW_SYNC), bio);
5217 +
5218 +       return 0;
5219 +}
5220 +
5221 +/**
5222 + * toi_do_io: Prepare to do some i/o on a page and submit or batch it.
5223 + *
5224 + * @writing: Whether reading or writing.
5225 + * @bdev: The block device which we're using.
5226 + * @block0: The first sector we're reading or writing.
5227 + * @page: The page on which I/O is being done.
5228 + * @readahead_index: If doing readahead, the index (reset this flag when done).
5229 + * @syncio: Whether the i/o is being done synchronously.
5230 + *
5231 + * Prepare and start a read or write operation.
5232 + *
5233 + * Note that we always work with our own page. If writing, we might be given a
5234 + * compression buffer that will immediately be used to start compressing the
5235 + * next page. For reading, we do readahead and therefore don't know the final
5236 + * address where the data needs to go.
5237 + */
5238 +static int toi_do_io(int writing, struct block_device *bdev, long block0,
5239 +       struct page *page, int is_readahead, int syncio, int free_group)
5240 +{
5241 +       page->private = 0;
5242 +
5243 +       /* Do here so we don't race against toi_bio_get_next_page_read */
5244 +       lock_page(page);
5245 +
5246 +       if (is_readahead) {
5247 +               if (readahead_list_head)
5248 +                       readahead_list_tail->private = (unsigned long) page;
5249 +               else
5250 +                       readahead_list_head = page;
5251 +
5252 +               readahead_list_tail = page;
5253 +       }
5254 +
5255 +       /* Done before submitting to avoid races. */
5256 +       if (syncio)
5257 +               waiting_on = page;
5258 +
5259 +       /* Submit the page */
5260 +       get_page(page);
5261 +
5262 +       if (submit(writing, bdev, block0, page, free_group))
5263 +               return -EFAULT;
5264 +
5265 +       if (syncio)
5266 +               do_bio_wait(2);
5267 +
5268 +       return 0;
5269 +}
5270 +
5271 +/**
5272 + * toi_bdev_page_io: Simpler interface to do directly i/o on a single page.
5273 + *
5274 + * @writing: Whether reading or writing.
5275 + * @bdev: Block device on which we're operating.
5276 + * @pos: Sector at which page to read starts.
5277 + * @page: Page to be read/written.
5278 + *
5279 + * We used to use bread here, but it doesn't correctly handle
5280 + * blocksize != PAGE_SIZE. Now we create a submit_info to get the data we
5281 + * want and use our normal routines (synchronously).
5282 + */
5283 +static int toi_bdev_page_io(int writing, struct block_device *bdev,
5284 +               long pos, struct page *page)
5285 +{
5286 +       return toi_do_io(writing, bdev, pos, page, 0, 1, 0);
5287 +}
5288 +
5289 +/**
5290 + * toi_bio_memory_needed: Report amount of memory needed for block i/o.
5291 + *
5292 + * We want to have at least enough memory so as to have target_outstanding_io
5293 + * or more transactions on the fly at once. If we can do more, fine.
5294 + */
5295 +static int toi_bio_memory_needed(void)
5296 +{
5297 +       return target_outstanding_io * (PAGE_SIZE + sizeof(struct request) +
5298 +                               sizeof(struct bio));
5299 +}
5300 +
5301 +/*
5302 + * toi_bio_print_debug_stats
5303 + *
5304 + * Description:
5305 + */
5306 +static int toi_bio_print_debug_stats(char *buffer, int size)
5307 +{
5308 +       int len = scnprintf(buffer, size, "- Max outstanding reads %d. Max "
5309 +                       "writes %d.\n", max_outstanding_reads,
5310 +                       max_outstanding_writes);
5311 +
5312 +       len += scnprintf(buffer + len, size - len,
5313 +               "  Memory_needed: %d x (%lu + %u + %u) = %d bytes.\n",
5314 +               target_outstanding_io,
5315 +               PAGE_SIZE, (unsigned int) sizeof(struct request),
5316 +               (unsigned int) sizeof(struct bio), toi_bio_memory_needed());
5317 +
5318 +#ifdef MEASURE_MUTEX_CONTENTION
5319 +       {
5320 +       int i;
5321 +
5322 +       len += scnprintf(buffer + len, size - len,
5323 +               "  Mutex contention while reading:\n  Contended      Free\n");
5324 +
5325 +       for_each_online_cpu(i)
5326 +               len += scnprintf(buffer + len, size - len,
5327 +               "  %9lu %9lu\n",
5328 +               mutex_times[0][0][i], mutex_times[0][1][i]);
5329 +
5330 +       len += scnprintf(buffer + len, size - len,
5331 +               "  Mutex contention while writing:\n  Contended      Free\n");
5332 +
5333 +       for_each_online_cpu(i)
5334 +               len += scnprintf(buffer + len, size - len,
5335 +               "  %9lu %9lu\n",
5336 +               mutex_times[1][0][i], mutex_times[1][1][i]);
5337 +
5338 +       }
5339 +#endif
5340 +
5341 +       return len + scnprintf(buffer + len, size - len,
5342 +               "  Free mem throttle point reached %d.\n", free_mem_throttle);
5343 +}
5344 +
5345 +/**
5346 + * toi_set_devinfo: Set the bdev info used for i/o.
5347 + *
5348 + * @info: Pointer to array of struct toi_bdev_info - the list of
5349 + * bdevs and blocks on them in which the image is stored.
5350 + *
5351 + * Set the list of bdevs and blocks in which the image will be stored.
5352 + * Sort of like putting a tape in the cassette player.
5353 + */
5354 +static void toi_set_devinfo(struct toi_bdev_info *info)
5355 +{
5356 +       toi_devinfo = info;
5357 +}
5358 +
5359 +/**
5360 + * dump_block_chains: Print the contents of the bdev info array.
5361 + */
5362 +static void dump_block_chains(void)
5363 +{
5364 +       int i;
5365 +
5366 +       for (i = 0; i < toi_writer_posn.num_chains; i++) {
5367 +               struct hibernate_extent *this;
5368 +
5369 +               this = (toi_writer_posn.chains + i)->first;
5370 +
5371 +               if (!this)
5372 +                       continue;
5373 +
5374 +               printk(KERN_INFO "Chain %d:", i);
5375 +
5376 +               while (this) {
5377 +                       printk(" [%lu-%lu]%s", this->start,
5378 +                                       this->end, this->next ? "," : "");
5379 +                       this = this->next;
5380 +               }
5381 +
5382 +               printk("\n");
5383 +       }
5384 +
5385 +       for (i = 0; i < 4; i++)
5386 +               printk(KERN_INFO "Posn %d: Chain %d, extent %d, offset %lu.\n",
5387 +                               i, toi_writer_posn_save[i].chain_num,
5388 +                               toi_writer_posn_save[i].extent_num,
5389 +                               toi_writer_posn_save[i].offset);
5390 +}
5391 +
5392 +/**
5393 + * go_next_page: Skip blocks to the start of the next page.
5394 + *
5395 + * Go forward one page, or two if extra_page_forward is set. It only gets
5396 + * set at the start of reading the image header, to skip the first page
5397 + * of the header, which is read without using the extent chains.
5398 + */
5399 +static int go_next_page(int writing)
5400 +{
5401 +       int i, max = (toi_writer_posn.current_chain == -1) ? 1 :
5402 +         toi_devinfo[toi_writer_posn.current_chain].blocks_per_page;
5403 +
5404 +       for (i = 0; i < max; i++)
5405 +               toi_extent_state_next(&toi_writer_posn);
5406 +
5407 +       if (toi_extent_state_eof(&toi_writer_posn)) {
5408 +               /* Don't complain if readahead falls off the end */
5409 +               if (writing) {
5410 +                       printk(KERN_INFO "Extent state eof. "
5411 +                               "Expected compression ratio too optimistic?\n");
5412 +                       dump_block_chains();
5413 +               }
5414 +               return -ENODATA;
5415 +       }
5416 +
5417 +       if (extra_page_forward) {
5418 +               extra_page_forward = 0;
5419 +               return go_next_page(writing);
5420 +       }
5421 +
5422 +       return 0;
5423 +}
5424 +
5425 +/**
5426 + * set_extra_page_forward: Make us skip an extra page on next go_next_page.
5427 + *
5428 + * Used in reading header, to jump to 2nd page after getting 1st page
5429 + * direct from image header.
5430 + */
5431 +static void set_extra_page_forward(void)
5432 +{
5433 +       extra_page_forward = 1;
5434 +}
5435 +
5436 +/**
5437 + * toi_bio_rw_page: Do i/o on the next disk page in the image.
5438 + *
5439 + * @writing: Whether reading or writing.
5440 + * @page: Page to do i/o on.
5441 + * @readahead_index: -1 or the index in the readahead ring.
5442 + *
5443 + * Submit a page for reading or writing, possibly readahead.
5444 + */
5445 +static int toi_bio_rw_page(int writing, struct page *page,
5446 +               int is_readahead, int free_group)
5447 +{
5448 +       struct toi_bdev_info *dev_info;
5449 +       int result;
5450 +
5451 +       if (go_next_page(writing)) {
5452 +               printk(KERN_INFO "Failed to advance a page in the extent "
5453 +                               "data.\n");
5454 +               return -ENODATA;
5455 +       }
5456 +
5457 +       if (current_stream == 0 && writing &&
5458 +               toi_writer_posn.current_chain ==
5459 +                       toi_writer_posn_save[2].chain_num &&
5460 +               toi_writer_posn.current_offset ==
5461 +                       toi_writer_posn_save[2].offset) {
5462 +               dump_block_chains();
5463 +               BUG();
5464 +       }
5465 +
5466 +       dev_info = &toi_devinfo[toi_writer_posn.current_chain];
5467 +
5468 +       result = toi_do_io(writing, dev_info->bdev,
5469 +               toi_writer_posn.current_offset <<
5470 +                       dev_info->bmap_shift,
5471 +               page, is_readahead, 0, free_group);
5472 +
5473 +       if (result) {
5474 +               more_readahead = 0;
5475 +               return result;
5476 +       }
5477 +
5478 +       if (!writing) {
5479 +               int compare_to = 0;
5480 +
5481 +               switch (current_stream) {
5482 +               case 0:
5483 +                       compare_to = 2;
5484 +                       break;
5485 +               case 1:
5486 +                       compare_to = 3;
5487 +                       break;
5488 +               case 2:
5489 +                       compare_to = 1;
5490 +                       break;
5491 +               }
5492 +
5493 +               if (toi_writer_posn.current_chain ==
5494 +                               toi_writer_posn_save[compare_to].chain_num &&
5495 +                   toi_writer_posn.current_offset ==
5496 +                               toi_writer_posn_save[compare_to].offset)
5497 +                       more_readahead = 0;
5498 +       }
5499 +       return 0;
5500 +}
5501 +
5502 +/**
5503 + * toi_rw_init: Prepare to read or write a stream in the image.
5504 + *
5505 + * @writing: Whether reading or writing.
5506 + * @stream number: Section of the image being processed.
5507 + */
5508 +static int toi_rw_init(int writing, int stream_number)
5509 +{
5510 +       if (stream_number)
5511 +               toi_extent_state_restore(&toi_writer_posn,
5512 +                               &toi_writer_posn_save[stream_number]);
5513 +       else
5514 +               toi_extent_state_goto_start(&toi_writer_posn);
5515 +
5516 +       atomic_set(&toi_io_done, 0);
5517 +       toi_writer_buffer = (char *) toi_get_zeroed_page(11, TOI_ATOMIC_GFP);
5518 +       toi_writer_buffer_posn = writing ? 0 : PAGE_SIZE;
5519 +
5520 +       current_stream = stream_number;
5521 +
5522 +       more_readahead = 1;
5523 +
5524 +       return toi_writer_buffer ? 0 : -ENOMEM;
5525 +}
5526 +
5527 +/**
5528 + * toi_read_header_init: Prepare to read the image header.
5529 + *
5530 + * Reset readahead indices prior to starting to read a section of the image.
5531 + */
5532 +static void toi_read_header_init(void)
5533 +{
5534 +       toi_writer_buffer = (char *) toi_get_zeroed_page(11, TOI_ATOMIC_GFP);
5535 +       more_readahead = 1;
5536 +}
5537 +
5538 +/*
5539 + * toi_bio_queue_write
5540 + */
5541 +static void toi_bio_queue_write(char **full_buffer)
5542 +{
5543 +       struct page *page = virt_to_page(*full_buffer);
5544 +       unsigned long flags;
5545 +
5546 +       page->private = 0;
5547 +
5548 +       spin_lock_irqsave(&bio_queue_lock, flags);
5549 +       if (!bio_queue_head)
5550 +               bio_queue_head = page;
5551 +       else
5552 +               bio_queue_tail->private = (unsigned long) page;
5553 +
5554 +       bio_queue_tail = page;
5555 +       atomic_inc(&toi_bio_queue_size);
5556 +
5557 +       spin_unlock_irqrestore(&bio_queue_lock, flags);
5558 +       wake_up(&toi_io_queue_flusher);
5559 +
5560 +       *full_buffer = NULL;
5561 +}
5562 +
5563 +/**
5564 + * toi_rw_cleanup: Cleanup after i/o.
5565 + *
5566 + * @writing: Whether we were reading or writing.
5567 + */
5568 +static int toi_rw_cleanup(int writing)
5569 +{
5570 +       int i;
5571 +
5572 +       if (writing) {
5573 +               int result;
5574 +
5575 +               if (toi_writer_buffer_posn)
5576 +                       toi_bio_queue_write(&toi_writer_buffer);
5577 +
5578 +               result = toi_bio_queue_flush_pages(0);
5579 +
5580 +               if (result)
5581 +                       return result;
5582 +
5583 +               if (current_stream == 2)
5584 +                       toi_extent_state_save(&toi_writer_posn,
5585 +                                       &toi_writer_posn_save[1]);
5586 +               else if (current_stream == 1)
5587 +                       toi_extent_state_save(&toi_writer_posn,
5588 +                                       &toi_writer_posn_save[3]);
5589 +       }
5590 +
5591 +       toi_finish_all_io();
5592 +
5593 +       while (readahead_list_head) {
5594 +               void *next = (void *) readahead_list_head->private;
5595 +               toi__free_page(12, readahead_list_head);
5596 +               readahead_list_head = next;
5597 +       }
5598 +
5599 +       readahead_list_tail = NULL;
5600 +
5601 +       if (!current_stream)
5602 +               return 0;
5603 +
5604 +       for (i = 0; i < NUM_REASONS; i++) {
5605 +               if (!atomic_read(&reasons[i]))
5606 +                       continue;
5607 +               printk(KERN_INFO "Waited for i/o due to %s %d times.\n",
5608 +                               reason_name[i], atomic_read(&reasons[i]));
5609 +               atomic_set(&reasons[i], 0);
5610 +       }
5611 +
5612 +       current_stream = 0;
5613 +       return 0;
5614 +}
5615 +
5616 +static int toi_start_one_readahead(int dedicated_thread)
5617 +{
5618 +       char *buffer = NULL;
5619 +       int oom = 0;
5620 +
5621 +       throttle_if_needed(ANY_REASON);
5622 +
5623 +       while (!buffer) {
5624 +               buffer = (char *) toi_get_zeroed_page(12,
5625 +                               TOI_ATOMIC_GFP);
5626 +               if (!buffer) {
5627 +                       if (oom && !dedicated_thread)
5628 +                               return -EIO;
5629 +
5630 +                       oom = 1;
5631 +                       set_free_mem_throttle();
5632 +                       do_bio_wait(5);
5633 +               }
5634 +       }
5635 +
5636 +       return toi_bio_rw_page(READ, virt_to_page(buffer), 1, 0);
5637 +}
5638 +
5639 +/*
5640 + * toi_start_new_readahead
5641 + *
5642 + * Start readahead of image pages.
5643 + *
5644 + * No mutex needed because this is only ever called by one cpu.
5645 + */
5646 +static int toi_start_new_readahead(int dedicated_thread)
5647 +{
5648 +       int last_result, num_submitted = 0;
5649 +
5650 +       /* Start a new readahead? */
5651 +       if (!more_readahead)
5652 +               return 0;
5653 +
5654 +       do {
5655 +               int result = toi_start_one_readahead(dedicated_thread);
5656 +
5657 +               if (result == -EIO)
5658 +                       return 0;
5659 +               else
5660 +                       last_result = result;
5661 +
5662 +               if (last_result == -ENODATA)
5663 +                       more_readahead = 0;
5664 +
5665 +               if (!more_readahead && last_result) {
5666 +                       /*
5667 +                        * Don't complain about failing to do readahead past
5668 +                        * the end of storage.
5669 +                        */
5670 +                       if (last_result != -ENODATA)
5671 +                               printk(KERN_INFO
5672 +                                       "Begin read chunk returned %d.\n",
5673 +                                       last_result);
5674 +               } else
5675 +                       num_submitted++;
5676 +
5677 +       } while (more_readahead &&
5678 +                (dedicated_thread ||
5679 +                 (num_submitted < target_outstanding_io &&
5680 +                  atomic_read(&toi_io_in_progress) < target_outstanding_io)));
5681 +       return 0;
5682 +}
5683 +
5684 +static void bio_io_flusher(int writing)
5685 +{
5686 +
5687 +       if (writing)
5688 +               toi_bio_queue_flush_pages(1);
5689 +       else
5690 +               toi_start_new_readahead(1);
5691 +}
5692 +
5693 +/**
5694 + * toi_bio_get_next_page_read: Read a disk page with readahead.
5695 + *
5696 + * Read a page from disk, submitting readahead and cleaning up finished i/o
5697 + * while we wait for the page we're after.
5698 + */
5699 +static int toi_bio_get_next_page_read(int no_readahead)
5700 +{
5701 +       unsigned long *virt;
5702 +       struct page *next;
5703 +
5704 +       /*
5705 +        * When reading the second page of the header, we have to
5706 +        * delay submitting the read until after we've gotten the
5707 +        * extents out of the first page.
5708 +        */
5709 +       if (unlikely(no_readahead && toi_start_one_readahead(0))) {
5710 +               printk(KERN_INFO "No readahead and toi_start_one_readahead "
5711 +                               "returned non-zero.\n");
5712 +               return -EIO;
5713 +       }
5714 +
5715 +       /*
5716 +        * On SMP, we may need to wait for the first readahead
5717 +        * to be submitted.
5718 +        */
5719 +       if (unlikely(!readahead_list_head)) {
5720 +               BUG_ON(!more_readahead);
5721 +               do {
5722 +                       cpu_relax();
5723 +               } while (!readahead_list_head);
5724 +       }
5725 +
5726 +       if (PageLocked(readahead_list_head)) {
5727 +               waiting_on = readahead_list_head;
5728 +               do_bio_wait(0);
5729 +       }
5730 +
5731 +       virt = page_address(readahead_list_head);
5732 +       memcpy(toi_writer_buffer, virt, PAGE_SIZE);
5733 +
5734 +       next = (struct page *) readahead_list_head->private;
5735 +       toi__free_page(12, readahead_list_head);
5736 +       readahead_list_head = next;
5737 +       return 0;
5738 +}
5739 +
5740 +/*
5741 + * toi_bio_queue_flush_pages
5742 + */
5743 +
5744 +static int toi_bio_queue_flush_pages(int dedicated_thread)
5745 +{
5746 +       unsigned long flags;
5747 +       int result = 0;
5748 +
5749 +top:
5750 +       spin_lock_irqsave(&bio_queue_lock, flags);
5751 +       while (bio_queue_head) {
5752 +               struct page *page = bio_queue_head;
5753 +               bio_queue_head = (struct page *) page->private;
5754 +               if (bio_queue_tail == page)
5755 +                       bio_queue_tail = NULL;
5756 +               atomic_dec(&toi_bio_queue_size);
5757 +               spin_unlock_irqrestore(&bio_queue_lock, flags);
5758 +               result = toi_bio_rw_page(WRITE, page, 0, 11);
5759 +               if (result)
5760 +                       return result;
5761 +               spin_lock_irqsave(&bio_queue_lock, flags);
5762 +       }
5763 +       spin_unlock_irqrestore(&bio_queue_lock, flags);
5764 +
5765 +       if (dedicated_thread) {
5766 +               wait_event(toi_io_queue_flusher, bio_queue_head ||
5767 +                               toi_bio_queue_flusher_should_finish);
5768 +               if (likely(!toi_bio_queue_flusher_should_finish))
5769 +                       goto top;
5770 +               toi_bio_queue_flusher_should_finish = 0;
5771 +       }
5772 +       return 0;
5773 +}
5774 +
5775 +/*
5776 + * toi_bio_get_new_page
5777 + */
5778 +static void toi_bio_get_new_page(char **full_buffer)
5779 +{
5780 +       throttle_if_needed(ANY_REASON);
5781 +
5782 +       while (!*full_buffer) {
5783 +               *full_buffer = (char *) toi_get_zeroed_page(11, TOI_ATOMIC_GFP);
5784 +               if (!*full_buffer) {
5785 +                       set_free_mem_throttle();
5786 +                       do_bio_wait(3);
5787 +               }
5788 +       }
5789 +}
5790 +
5791 +/*
5792 + * toi_rw_buffer: Combine smaller buffers into PAGE_SIZE I/O.
5793 + *
5794 + * @writing: Bool - whether writing (or reading).
5795 + * @buffer: The start of the buffer to write or fill.
5796 + * @buffer_size: The size of the buffer to write or fill.
5797 + */
5798 +static int toi_rw_buffer(int writing, char *buffer, int buffer_size,
5799 +               int no_readahead)
5800 +{
5801 +       int bytes_left = buffer_size;
5802 +
5803 +       while (bytes_left) {
5804 +               char *source_start = buffer + buffer_size - bytes_left;
5805 +               char *dest_start = toi_writer_buffer + toi_writer_buffer_posn;
5806 +               int capacity = PAGE_SIZE - toi_writer_buffer_posn;
5807 +               char *to = writing ? dest_start : source_start;
5808 +               char *from = writing ? source_start : dest_start;
5809 +
5810 +               if (bytes_left <= capacity) {
5811 +                       memcpy(to, from, bytes_left);
5812 +                       toi_writer_buffer_posn += bytes_left;
5813 +                       return 0;
5814 +               }
5815 +
5816 +               /* Complete this page and start a new one */
5817 +               memcpy(to, from, capacity);
5818 +               bytes_left -= capacity;
5819 +
5820 +               if (!writing) {
5821 +                       int result = toi_bio_get_next_page_read(no_readahead);
5822 +                       if (result)
5823 +                               return result;
5824 +               } else {
5825 +                       toi_bio_queue_write(&toi_writer_buffer);
5826 +                       toi_bio_get_new_page(&toi_writer_buffer);
5827 +               }
5828 +
5829 +               toi_writer_buffer_posn = 0;
5830 +               toi_cond_pause(0, NULL);
5831 +       }
5832 +
5833 +       return 0;
5834 +}
5835 +
5836 +/**
5837 + * toi_bio_read_page - read a page of the image.
5838 + *
5839 + * @pfn: The pfn where the data belongs.
5840 + * @buffer_page: The page containing the (possibly compressed) data.
5841 + * @buf_size: The number of bytes on @buffer_page used.
5842 + *
5843 + * Read a (possibly compressed) page from the image, into buffer_page,
5844 + * returning its pfn and the buffer size.
5845 + */
5846 +static int toi_bio_read_page(unsigned long *pfn, struct page *buffer_page,
5847 +               unsigned int *buf_size)
5848 +{
5849 +       int result = 0;
5850 +       char *buffer_virt = kmap(buffer_page);
5851 +
5852 +       /* Only call start_new_readahead if we don't have a dedicated thread */
5853 +       if (current == toi_queue_flusher && toi_start_new_readahead(0)) {
5854 +               printk(KERN_INFO "Queue flusher and toi_start_one_readahead "
5855 +                               "returned non-zero.\n");
5856 +               return -EIO;
5857 +       }
5858 +
5859 +       my_mutex_lock(0, &toi_bio_mutex);
5860 +
5861 +       if (toi_rw_buffer(READ, (char *) pfn, sizeof(unsigned long), 0) ||
5862 +           toi_rw_buffer(READ, (char *) buf_size, sizeof(int), 0) ||
5863 +           toi_rw_buffer(READ, buffer_virt, *buf_size, 0)) {
5864 +               abort_hibernate(TOI_FAILED_IO, "Read of data failed.");
5865 +               result = 1;
5866 +       }
5867 +
5868 +       my_mutex_unlock(0, &toi_bio_mutex);
5869 +       kunmap(buffer_page);
5870 +       return result;
5871 +}
5872 +
5873 +/**
5874 + * toi_bio_write_page - Write a page of the image.
5875 + *
5876 + * @pfn: The pfn where the data belongs.
5877 + * @buffer_page: The page containing the (possibly compressed) data.
5878 + * @buf_size: The number of bytes on @buffer_page used.
5879 + *
5880 + * Write a (possibly compressed) page to the image from the buffer, together
5881 + * with it's index and buffer size.
5882 + */
5883 +static int toi_bio_write_page(unsigned long pfn, struct page *buffer_page,
5884 +               unsigned int buf_size)
5885 +{
5886 +       char *buffer_virt;
5887 +       int result = 0, result2 = 0;
5888 +
5889 +       if (unlikely(test_action_state(TOI_TEST_FILTER_SPEED)))
5890 +               return 0;
5891 +
5892 +       my_mutex_lock(1, &toi_bio_mutex);
5893 +       buffer_virt = kmap(buffer_page);
5894 +
5895 +       if (toi_rw_buffer(WRITE, (char *) &pfn, sizeof(unsigned long), 0) ||
5896 +           toi_rw_buffer(WRITE, (char *) &buf_size, sizeof(int), 0) ||
5897 +           toi_rw_buffer(WRITE, buffer_virt, buf_size, 0)) {
5898 +               printk(KERN_INFO "toi_rw_buffer returned non-zero to "
5899 +                               "toi_bio_write_page.\n");
5900 +               result = -EIO;
5901 +       }
5902 +
5903 +       kunmap(buffer_page);
5904 +       my_mutex_unlock(1, &toi_bio_mutex);
5905 +
5906 +       if (current == toi_queue_flusher)
5907 +               result2 = toi_bio_queue_flush_pages(0);
5908 +
5909 +       return result ? result : result2;
5910 +}
5911 +
5912 +/**
5913 + * toi_rw_header_chunk: Read or write a portion of the image header.
5914 + *
5915 + * @writing: Whether reading or writing.
5916 + * @owner: The module for which we're writing. Used for confirming that modules
5917 + * don't use more header space than they asked for.
5918 + * @buffer: Address of the data to write.
5919 + * @buffer_size: Size of the data buffer.
5920 + * @no_readahead: Don't try to start readhead (when still getting extents)
5921 + */
5922 +static int _toi_rw_header_chunk(int writing, struct toi_module_ops *owner,
5923 +               char *buffer, int buffer_size, int no_readahead)
5924 +{
5925 +       int result = 0;
5926 +
5927 +       if (owner) {
5928 +               owner->header_used += buffer_size;
5929 +               toi_message(TOI_HEADER, TOI_LOW, 1,
5930 +                       "Header: %s : %d bytes (%d/%d).\n",
5931 +                       buffer_size, owner->header_used,
5932 +                       owner->header_requested);
5933 +               if (owner->header_used > owner->header_requested) {
5934 +                       printk(KERN_EMERG "TuxOnIce module %s is using more "
5935 +                               "header space (%u) than it requested (%u).\n",
5936 +                               owner->name,
5937 +                               owner->header_used,
5938 +                               owner->header_requested);
5939 +                       return buffer_size;
5940 +               }
5941 +       } else
5942 +               toi_message(TOI_HEADER, TOI_LOW, 1,
5943 +                       "Header: (No owner): %d bytes.\n", buffer_size);
5944 +
5945 +       if (!writing && !no_readahead)
5946 +               result = toi_start_new_readahead(0);
5947 +
5948 +       if (!result)
5949 +               result = toi_rw_buffer(writing, buffer, buffer_size,
5950 +                               no_readahead);
5951 +
5952 +       return result;
5953 +}
5954 +
5955 +static int toi_rw_header_chunk(int writing, struct toi_module_ops *owner,
5956 +               char *buffer, int size)
5957 +{
5958 +       return _toi_rw_header_chunk(writing, owner, buffer, size, 0);
5959 +}
5960 +
5961 +static int toi_rw_header_chunk_noreadahead(int writing,
5962 +               struct toi_module_ops *owner, char *buffer, int size)
5963 +{
5964 +       return _toi_rw_header_chunk(writing, owner, buffer, size, 1);
5965 +}
5966 +
5967 +/**
5968 + * write_header_chunk_finish: Flush any buffered header data.
5969 + */
5970 +static int write_header_chunk_finish(void)
5971 +{
5972 +       int result = 0;
5973 +
5974 +       if (toi_writer_buffer_posn)
5975 +               toi_bio_queue_write(&toi_writer_buffer);
5976 +
5977 +       toi_bio_queue_flush_pages(0);
5978 +       toi_finish_all_io();
5979 +
5980 +       return result;
5981 +}
5982 +
5983 +/**
5984 + * toi_bio_storage_needed: Get the amount of storage needed for my fns.
5985 + */
5986 +static int toi_bio_storage_needed(void)
5987 +{
5988 +       return 2 * sizeof(int);
5989 +}
5990 +
5991 +/**
5992 + * toi_bio_save_config_info: Save block i/o config to image header.
5993 + *
5994 + * @buf: PAGE_SIZE'd buffer into which data should be saved.
5995 + */
5996 +static int toi_bio_save_config_info(char *buf)
5997 +{
5998 +       int *ints = (int *) buf;
5999 +       ints[0] = target_outstanding_io;
6000 +       return sizeof(int);
6001 +}
6002 +
6003 +/**
6004 + * toi_bio_load_config_info: Restore block i/o config.
6005 + *
6006 + * @buf: Data to be reloaded.
6007 + * @size: Size of the buffer saved.
6008 + */
6009 +static void toi_bio_load_config_info(char *buf, int size)
6010 +{
6011 +       int *ints = (int *) buf;
6012 +       target_outstanding_io  = ints[0];
6013 +}
6014 +
6015 +/**
6016 + * toi_bio_initialise: Initialise bio code at start of some action.
6017 + *
6018 + * @starting_cycle: Whether starting a hibernation cycle, or just reading or
6019 + * writing a sysfs value.
6020 + */
6021 +static int toi_bio_initialise(int starting_cycle)
6022 +{
6023 +       if (starting_cycle) {
6024 +               max_outstanding_writes = 0;
6025 +               max_outstanding_reads = 0;
6026 +               toi_queue_flusher = current;
6027 +#ifdef MEASURE_MUTEX_CONTENTION
6028 +               {
6029 +               int i, j, k;
6030 +
6031 +               for (i = 0; i < 2; i++)
6032 +                       for (j = 0; j < 2; j++)
6033 +                               for_each_online_cpu(k)
6034 +                                       mutex_times[i][j][k] = 0;
6035 +               }
6036 +#endif
6037 +       }
6038 +
6039 +       return 0;
6040 +}
6041 +
6042 +/**
6043 + * toi_bio_cleanup: Cleanup after some action.
6044 + *
6045 + * @finishing_cycle: Whether completing a cycle.
6046 + */
6047 +static void toi_bio_cleanup(int finishing_cycle)
6048 +{
6049 +       if (toi_writer_buffer) {
6050 +               toi_free_page(11, (unsigned long) toi_writer_buffer);
6051 +               toi_writer_buffer = NULL;
6052 +       }
6053 +}
6054 +
6055 +struct toi_bio_ops toi_bio_ops = {
6056 +       .bdev_page_io = toi_bdev_page_io,
6057 +       .finish_all_io = toi_finish_all_io,
6058 +       .monitor_outstanding_io = toi_monitor_outstanding_io,
6059 +       .update_throughput_throttle = update_throughput_throttle,
6060 +       .forward_one_page = go_next_page,
6061 +       .set_extra_page_forward = set_extra_page_forward,
6062 +       .set_devinfo = toi_set_devinfo,
6063 +       .read_page = toi_bio_read_page,
6064 +       .write_page = toi_bio_write_page,
6065 +       .rw_init = toi_rw_init,
6066 +       .rw_cleanup = toi_rw_cleanup,
6067 +       .read_header_init = toi_read_header_init,
6068 +       .rw_header_chunk = toi_rw_header_chunk,
6069 +       .rw_header_chunk_noreadahead = toi_rw_header_chunk_noreadahead,
6070 +       .write_header_chunk_finish = write_header_chunk_finish,
6071 +       .io_flusher = bio_io_flusher,
6072 +};
6073 +EXPORT_SYMBOL_GPL(toi_bio_ops);
6074 +
6075 +static struct toi_sysfs_data sysfs_params[] = {
6076 +       SYSFS_INT("target_outstanding_io", SYSFS_RW, &target_outstanding_io,
6077 +                       0, 16384, 0, NULL),
6078 +};
6079 +
6080 +static struct toi_module_ops toi_blockwriter_ops = {
6081 +       .name                                   = "lowlevel i/o",
6082 +       .type                                   = MISC_HIDDEN_MODULE,
6083 +       .directory                              = "block_io",
6084 +       .module                                 = THIS_MODULE,
6085 +       .print_debug_info                       = toi_bio_print_debug_stats,
6086 +       .memory_needed                          = toi_bio_memory_needed,
6087 +       .storage_needed                         = toi_bio_storage_needed,
6088 +       .save_config_info                       = toi_bio_save_config_info,
6089 +       .load_config_info                       = toi_bio_load_config_info,
6090 +       .initialise                             = toi_bio_initialise,
6091 +       .cleanup                                = toi_bio_cleanup,
6092 +
6093 +       .sysfs_data             = sysfs_params,
6094 +       .num_sysfs_entries      = sizeof(sysfs_params) /
6095 +               sizeof(struct toi_sysfs_data),
6096 +};
6097 +
6098 +/**
6099 + * toi_block_io_load: Load time routine for block i/o module.
6100 + *
6101 + * Register block i/o ops and sysfs entries.
6102 + */
6103 +static __init int toi_block_io_load(void)
6104 +{
6105 +       return toi_register_module(&toi_blockwriter_ops);
6106 +}
6107 +
6108 +#ifdef MODULE
6109 +static __exit void toi_block_io_unload(void)
6110 +{
6111 +       toi_unregister_module(&toi_blockwriter_ops);
6112 +}
6113 +
6114 +module_init(toi_block_io_load);
6115 +module_exit(toi_block_io_unload);
6116 +MODULE_LICENSE("GPL");
6117 +MODULE_AUTHOR("Nigel Cunningham");
6118 +MODULE_DESCRIPTION("TuxOnIce block io functions");
6119 +#else
6120 +late_initcall(toi_block_io_load);
6121 +#endif
6122 diff --git a/kernel/power/tuxonice_block_io.h b/kernel/power/tuxonice_block_io.h
6123 new file mode 100644
6124 index 0000000..364e6e2
6125 --- /dev/null
6126 +++ b/kernel/power/tuxonice_block_io.h
6127 @@ -0,0 +1,59 @@
6128 +/*
6129 + * kernel/power/tuxonice_block_io.h
6130 + *
6131 + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net)
6132 + * Copyright (C) 2006 Red Hat, inc.
6133 + *
6134 + * Distributed under GPLv2.
6135 + *
6136 + * This file contains declarations for functions exported from
6137 + * tuxonice_block_io.c, which contains low level io functions.
6138 + */
6139 +
6140 +#include <linux/buffer_head.h>
6141 +#include "tuxonice_extent.h"
6142 +
6143 +struct toi_bdev_info {
6144 +       struct block_device *bdev;
6145 +       dev_t dev_t;
6146 +       int bmap_shift;
6147 +       int blocks_per_page;
6148 +};
6149 +
6150 +/*
6151 + * Our exported interface so the swapwriter and filewriter don't
6152 + * need these functions duplicated.
6153 + */
6154 +struct toi_bio_ops {
6155 +       int (*bdev_page_io) (int rw, struct block_device *bdev, long pos,
6156 +                       struct page *page);
6157 +       void (*check_io_stats) (void);
6158 +       void (*reset_io_stats) (void);
6159 +       void (*monitor_outstanding_io) (void);
6160 +       void (*update_throughput_throttle) (int jif_index);
6161 +       void (*finish_all_io) (void);
6162 +       int (*forward_one_page) (int writing);
6163 +       void (*set_extra_page_forward) (void);
6164 +       void (*set_devinfo) (struct toi_bdev_info *info);
6165 +       int (*read_page) (unsigned long *index, struct page *buffer_page,
6166 +                       unsigned int *buf_size);
6167 +       int (*write_page) (unsigned long index, struct page *buffer_page,
6168 +                       unsigned int buf_size);
6169 +       void (*read_header_init) (void);
6170 +       int (*rw_header_chunk) (int rw, struct toi_module_ops *owner,
6171 +                       char *buffer, int buffer_size);
6172 +       int (*rw_header_chunk_noreadahead) (int rw,
6173 +                       struct toi_module_ops *owner,
6174 +                       char *buffer, int buffer_size);
6175 +       int (*write_header_chunk_finish) (void);
6176 +       int (*rw_init) (int rw, int stream_number);
6177 +       int (*rw_cleanup) (int rw);
6178 +       void (*io_flusher) (int rw);
6179 +};
6180 +
6181 +extern struct toi_bio_ops toi_bio_ops;
6182 +
6183 +extern char *toi_writer_buffer;
6184 +extern int toi_writer_buffer_posn;
6185 +extern struct hibernate_extent_iterate_saved_state toi_writer_posn_save[4];
6186 +extern struct toi_extent_iterate_state toi_writer_posn;
6187 diff --git a/kernel/power/tuxonice_builtin.c b/kernel/power/tuxonice_builtin.c
6188 new file mode 100644
6189 index 0000000..512a6e7
6190 --- /dev/null
6191 +++ b/kernel/power/tuxonice_builtin.c
6192 @@ -0,0 +1,321 @@
6193 +/*
6194 + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net)
6195 + *
6196 + * This file is released under the GPLv2.
6197 + */
6198 +#include <linux/module.h>
6199 +#include <linux/resume-trace.h>
6200 +#include <linux/kernel.h>
6201 +#include <linux/swap.h>
6202 +#include <linux/syscalls.h>
6203 +#include <linux/bio.h>
6204 +#include <linux/root_dev.h>
6205 +#include <linux/freezer.h>
6206 +#include <linux/reboot.h>
6207 +#include <linux/writeback.h>
6208 +#include <linux/tty.h>
6209 +#include <linux/crypto.h>
6210 +#include <linux/cpu.h>
6211 +#include <linux/ctype.h>
6212 +#include "tuxonice_io.h"
6213 +#include "tuxonice.h"
6214 +#include "tuxonice_extent.h"
6215 +#include "tuxonice_block_io.h"
6216 +#include "tuxonice_netlink.h"
6217 +#include "tuxonice_prepare_image.h"
6218 +#include "tuxonice_ui.h"
6219 +#include "tuxonice_sysfs.h"
6220 +#include "tuxonice_pagedir.h"
6221 +#include "tuxonice_modules.h"
6222 +#include "tuxonice_builtin.h"
6223 +#include "tuxonice_power_off.h"
6224 +
6225 +/*
6226 + * Highmem related functions (x86 only).
6227 + */
6228 +
6229 +#ifdef CONFIG_HIGHMEM
6230 +
6231 +/**
6232 + * copyback_high: Restore highmem pages.
6233 + *
6234 + * Highmem data and pbe lists are/can be stored in highmem.
6235 + * The format is slightly different to the lowmem pbe lists
6236 + * used for the assembly code: the last pbe in each page is
6237 + * a struct page * instead of struct pbe *, pointing to the
6238 + * next page where pbes are stored (or NULL if happens to be
6239 + * the end of the list). Since we don't want to generate
6240 + * unnecessary deltas against swsusp code, we use a cast
6241 + * instead of a union.
6242 + **/
6243 +
6244 +static void copyback_high(void)
6245 +{
6246 +       struct page *pbe_page = (struct page *) restore_highmem_pblist;
6247 +       struct pbe *this_pbe, *first_pbe;
6248 +       unsigned long *origpage, *copypage;
6249 +       int pbe_index = 1;
6250 +
6251 +       if (!pbe_page)
6252 +               return;
6253 +
6254 +       this_pbe = (struct pbe *) kmap_atomic(pbe_page, KM_BOUNCE_READ);
6255 +       first_pbe = this_pbe;
6256 +
6257 +       while (this_pbe) {
6258 +               int loop = (PAGE_SIZE / sizeof(unsigned long)) - 1;
6259 +
6260 +               origpage = kmap_atomic((struct page *) this_pbe->orig_address,
6261 +                       KM_BIO_DST_IRQ);
6262 +               copypage = kmap_atomic((struct page *) this_pbe->address,
6263 +                       KM_BIO_SRC_IRQ);
6264 +
6265 +               while (loop >= 0) {
6266 +                       *(origpage + loop) = *(copypage + loop);
6267 +                       loop--;
6268 +               }
6269 +
6270 +               kunmap_atomic(origpage, KM_BIO_DST_IRQ);
6271 +               kunmap_atomic(copypage, KM_BIO_SRC_IRQ);
6272 +
6273 +               if (!this_pbe->next)
6274 +                       break;
6275 +
6276 +               if (pbe_index < PBES_PER_PAGE) {
6277 +                       this_pbe++;
6278 +                       pbe_index++;
6279 +               } else {
6280 +                       pbe_page = (struct page *) this_pbe->next;
6281 +                       kunmap_atomic(first_pbe, KM_BOUNCE_READ);
6282 +                       if (!pbe_page)
6283 +                               return;
6284 +                       this_pbe = (struct pbe *) kmap_atomic(pbe_page,
6285 +                                       KM_BOUNCE_READ);
6286 +                       first_pbe = this_pbe;
6287 +                       pbe_index = 1;
6288 +               }
6289 +       }
6290 +       kunmap_atomic(first_pbe, KM_BOUNCE_READ);
6291 +}
6292 +
6293 +#else /* CONFIG_HIGHMEM */
6294 +static void copyback_high(void) { }
6295 +#endif
6296 +
6297 +char toi_wait_for_keypress_dev_console(int timeout)
6298 +{
6299 +       int fd, this_timeout = 255;
6300 +       char key = '\0';
6301 +       struct termios t, t_backup;
6302 +
6303 +       /* We should be guaranteed /dev/console exists after populate_rootfs()
6304 +        * in init/main.c.
6305 +        */
6306 +       fd = sys_open("/dev/console", O_RDONLY, 0);
6307 +       if (fd < 0) {
6308 +               printk(KERN_INFO "Couldn't open /dev/console.\n");
6309 +               return key;
6310 +       }
6311 +
6312 +       if (sys_ioctl(fd, TCGETS, (long)&t) < 0)
6313 +               goto out_close;
6314 +
6315 +       memcpy(&t_backup, &t, sizeof(t));
6316 +
6317 +       t.c_lflag &= ~(ISIG|ICANON|ECHO);
6318 +       t.c_cc[VMIN] = 0;
6319 +
6320 +new_timeout:
6321 +       if (timeout > 0) {
6322 +               this_timeout = timeout < 26 ? timeout : 25;
6323 +               timeout -= this_timeout;
6324 +               this_timeout *= 10;
6325 +       }
6326 +
6327 +       t.c_cc[VTIME] = this_timeout;
6328 +
6329 +       if (sys_ioctl(fd, TCSETS, (long)&t) < 0)
6330 +               goto out_restore;
6331 +
6332 +       while (1) {
6333 +               if (sys_read(fd, &key, 1) <= 0) {
6334 +                       if (timeout)
6335 +                               goto new_timeout;
6336 +                       key = '\0';
6337 +                       break;
6338 +               }
6339 +               key = tolower(key);
6340 +               if (test_toi_state(TOI_SANITY_CHECK_PROMPT)) {
6341 +                       if (key == 'c') {
6342 +                               set_toi_state(TOI_CONTINUE_REQ);
6343 +                               break;
6344 +                       } else if (key == ' ')
6345 +                               break;
6346 +               } else
6347 +                       break;
6348 +       }
6349 +
6350 +out_restore:
6351 +       sys_ioctl(fd, TCSETS, (long)&t_backup);
6352 +out_close:
6353 +       sys_close(fd);
6354 +
6355 +       return key;
6356 +}
6357 +EXPORT_SYMBOL_GPL(toi_wait_for_keypress_dev_console);
6358 +
6359 +struct toi_boot_kernel_data toi_bkd __nosavedata
6360 +               __attribute__((aligned(PAGE_SIZE))) = {
6361 +       MY_BOOT_KERNEL_DATA_VERSION,
6362 +       0,
6363 +#ifdef CONFIG_TOI_REPLACE_SWSUSP
6364 +       (1 << TOI_REPLACE_SWSUSP) |
6365 +#endif
6366 +       (1 << TOI_NO_FLUSHER_THREAD) |
6367 +       (1 << TOI_PAGESET2_FULL) | (1 << TOI_LATE_CPU_HOTPLUG),
6368 +};
6369 +EXPORT_SYMBOL_GPL(toi_bkd);
6370 +
6371 +struct block_device *toi_open_by_devnum(dev_t dev, unsigned mode)
6372 +{
6373 +       struct block_device *bdev = bdget(dev);
6374 +       int err = -ENOMEM;
6375 +       int flags = mode & FMODE_WRITE ? O_RDWR : O_RDONLY;
6376 +       flags |= O_NONBLOCK;
6377 +       if (bdev)
6378 +               err = blkdev_get(bdev, mode, flags);
6379 +       return err ? ERR_PTR(err) : bdev;
6380 +}
6381 +EXPORT_SYMBOL_GPL(toi_open_by_devnum);
6382 +
6383 +int toi_wait = CONFIG_TOI_DEFAULT_WAIT;
6384 +EXPORT_SYMBOL_GPL(toi_wait);
6385 +
6386 +struct toi_core_fns *toi_core_fns;
6387 +EXPORT_SYMBOL_GPL(toi_core_fns);
6388 +
6389 +struct memory_bitmap pageset1_map;
6390 +EXPORT_SYMBOL_GPL(pageset1_map);
6391 +
6392 +struct memory_bitmap pageset1_copy_map;
6393 +EXPORT_SYMBOL_GPL(pageset1_copy_map);
6394 +
6395 +unsigned long toi_result;
6396 +EXPORT_SYMBOL_GPL(toi_result);
6397 +
6398 +struct pagedir pagedir1 = {1};
6399 +EXPORT_SYMBOL_GPL(pagedir1);
6400 +
6401 +unsigned long toi_get_nonconflicting_page(void)
6402 +{
6403 +       return toi_core_fns->get_nonconflicting_page();
6404 +}
6405 +
6406 +int toi_post_context_save(void)
6407 +{
6408 +       return toi_core_fns->post_context_save();
6409 +}
6410 +
6411 +int toi_try_hibernate(void)
6412 +{
6413 +       if (!toi_core_fns)
6414 +               return -ENODEV;
6415 +
6416 +       return toi_core_fns->try_hibernate();
6417 +}
6418 +
6419 +static int num_resume_calls;
6420 +#ifdef CONFIG_TOI_IGNORE_LATE_INITCALL
6421 +static int ignore_late_initcall = 1;
6422 +#else
6423 +static int ignore_late_initcall;
6424 +#endif
6425 +
6426 +void toi_try_resume(void)
6427 +{
6428 +       /* Don't let it wrap around eventually */
6429 +       if (num_resume_calls < 2)
6430 +               num_resume_calls++;
6431 +
6432 +       if (num_resume_calls == 1 && ignore_late_initcall) {
6433 +               printk(KERN_INFO "TuxOnIce: Ignoring late initcall, as requested.\n");
6434 +               return;
6435 +       }
6436 +
6437 +       if (toi_core_fns)
6438 +               toi_core_fns->try_resume();
6439 +       else
6440 +               printk(KERN_INFO "TuxOnIce core not loaded yet.\n");
6441 +}
6442 +
6443 +int toi_lowlevel_builtin(void)
6444 +{
6445 +       int error = 0;
6446 +
6447 +       save_processor_state();
6448 +       error = swsusp_arch_suspend();
6449 +       if (error)
6450 +               printk(KERN_ERR "Error %d hibernating\n", error);
6451 +
6452 +       /* Restore control flow appears here */
6453 +       if (!toi_in_hibernate) {
6454 +               copyback_high();
6455 +               set_toi_state(TOI_NOW_RESUMING);
6456 +       }
6457 +
6458 +       restore_processor_state();
6459 +
6460 +       return error;
6461 +}
6462 +EXPORT_SYMBOL_GPL(toi_lowlevel_builtin);
6463 +
6464 +unsigned long toi_compress_bytes_in, toi_compress_bytes_out;
6465 +EXPORT_SYMBOL_GPL(toi_compress_bytes_in);
6466 +EXPORT_SYMBOL_GPL(toi_compress_bytes_out);
6467 +
6468 +unsigned long toi_state = ((1 << TOI_BOOT_TIME) |
6469 +               (1 << TOI_IGNORE_LOGLEVEL) |
6470 +               (1 << TOI_IO_STOPPED));
6471 +EXPORT_SYMBOL_GPL(toi_state);
6472 +
6473 +/* The number of hibernates we have started (some may have been cancelled) */
6474 +unsigned int nr_hibernates;
6475 +EXPORT_SYMBOL_GPL(nr_hibernates);
6476 +
6477 +int toi_running;
6478 +EXPORT_SYMBOL_GPL(toi_running);
6479 +
6480 +int toi_in_hibernate __nosavedata;
6481 +EXPORT_SYMBOL_GPL(toi_in_hibernate);
6482 +
6483 +__nosavedata struct pbe *restore_highmem_pblist;
6484 +EXPORT_SYMBOL_GPL(restore_highmem_pblist);
6485 +
6486 +static int __init toi_wait_setup(char *str)
6487 +{
6488 +       int value;
6489 +
6490 +       if (sscanf(str, "=%d", &value)) {
6491 +               if (value < -1 || value > 255)
6492 +                       printk(KERN_INFO "TuxOnIce_wait outside range -1 to "
6493 +                                       "255.\n");
6494 +               else
6495 +                       toi_wait = value;
6496 +       }
6497 +
6498 +       return 1;
6499 +}
6500 +
6501 +__setup("toi_wait", toi_wait_setup);
6502 +
6503 +static int __init toi_ignore_late_initcall_setup(char *str)
6504 +{
6505 +       int value;
6506 +
6507 +       if (sscanf(str, "=%d", &value))
6508 +               ignore_late_initcall = value;
6509 +
6510 +       return 1;
6511 +}
6512 +
6513 +__setup("toi_initramfs_resume_only", toi_ignore_late_initcall_setup);
6514 diff --git a/kernel/power/tuxonice_builtin.h b/kernel/power/tuxonice_builtin.h
6515 new file mode 100644
6516 index 0000000..20a35c6
6517 --- /dev/null
6518 +++ b/kernel/power/tuxonice_builtin.h
6519 @@ -0,0 +1,27 @@
6520 +/*
6521 + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net)
6522 + *
6523 + * This file is released under the GPLv2.
6524 + */
6525 +#include <asm/setup.h>
6526 +
6527 +extern struct toi_core_fns *toi_core_fns;
6528 +extern unsigned long toi_compress_bytes_in, toi_compress_bytes_out;
6529 +extern unsigned int nr_hibernates;
6530 +extern int toi_in_hibernate;
6531 +
6532 +extern __nosavedata struct pbe *restore_highmem_pblist;
6533 +
6534 +int toi_lowlevel_builtin(void);
6535 +
6536 +#ifdef CONFIG_HIGHMEM
6537 +extern __nosavedata struct zone_data *toi_nosave_zone_list;
6538 +extern __nosavedata unsigned long toi_nosave_max_pfn;
6539 +#endif
6540 +
6541 +extern unsigned long toi_get_nonconflicting_page(void);
6542 +extern int toi_post_context_save(void);
6543 +
6544 +extern char toi_wait_for_keypress_dev_console(int timeout);
6545 +extern struct block_device *toi_open_by_devnum(dev_t dev, unsigned mode);
6546 +extern int toi_wait;
6547 diff --git a/kernel/power/tuxonice_checksum.c b/kernel/power/tuxonice_checksum.c
6548 new file mode 100644
6549 index 0000000..1e5d62e
6550 --- /dev/null
6551 +++ b/kernel/power/tuxonice_checksum.c
6552 @@ -0,0 +1,373 @@
6553 +/*
6554 + * kernel/power/tuxonice_checksum.c
6555 + *
6556 + * Copyright (C) 2006-2008 Nigel Cunningham (nigel at tuxonice net)
6557 + * Copyright (C) 2006 Red Hat, inc.
6558 + *
6559 + * This file is released under the GPLv2.
6560 + *
6561 + * This file contains data checksum routines for TuxOnIce,
6562 + * using cryptoapi. They are used to locate any modifications
6563 + * made to pageset 2 while we're saving it.
6564 + */
6565 +
6566 +#include <linux/suspend.h>
6567 +#include <linux/highmem.h>
6568 +#include <linux/vmalloc.h>
6569 +#include <linux/crypto.h>
6570 +#include <linux/scatterlist.h>
6571 +
6572 +#include "tuxonice.h"
6573 +#include "tuxonice_modules.h"
6574 +#include "tuxonice_sysfs.h"
6575 +#include "tuxonice_io.h"
6576 +#include "tuxonice_pageflags.h"
6577 +#include "tuxonice_checksum.h"
6578 +#include "tuxonice_pagedir.h"
6579 +#include "tuxonice_alloc.h"
6580 +
6581 +static struct toi_module_ops toi_checksum_ops;
6582 +
6583 +/* Constant at the mo, but I might allow tuning later */
6584 +static char toi_checksum_name[32] = "md4";
6585 +/* Bytes per checksum */
6586 +#define CHECKSUM_SIZE (16)
6587 +
6588 +#define CHECKSUMS_PER_PAGE ((PAGE_SIZE - sizeof(void *)) / CHECKSUM_SIZE)
6589 +
6590 +struct cpu_context {
6591 +       struct crypto_hash *transform;
6592 +       struct hash_desc desc;
6593 +       struct scatterlist sg[2];
6594 +       char *buf;
6595 +};
6596 +
6597 +static DEFINE_PER_CPU(struct cpu_context, contexts);
6598 +static int pages_allocated;
6599 +static unsigned long page_list;
6600 +
6601 +static int toi_num_resaved;
6602 +
6603 +static unsigned long this_checksum, next_page;
6604 +static int checksum_index;
6605 +
6606 +static inline int checksum_pages_needed(void)
6607 +{
6608 +       return DIV_ROUND_UP(pagedir2.size, CHECKSUMS_PER_PAGE);
6609 +}
6610 +
6611 +/* ---- Local buffer management ---- */
6612 +
6613 +/*
6614 + * toi_checksum_cleanup
6615 + *
6616 + * Frees memory allocated for our labours.
6617 + */
6618 +static void toi_checksum_cleanup(int ending_cycle)
6619 +{
6620 +       int cpu;
6621 +
6622 +       if (ending_cycle) {
6623 +               for_each_online_cpu(cpu) {
6624 +                       struct cpu_context *this = &per_cpu(contexts, cpu);
6625 +                       if (this->transform) {
6626 +                               crypto_free_hash(this->transform);
6627 +                               this->transform = NULL;
6628 +                               this->desc.tfm = NULL;
6629 +                       }
6630 +
6631 +                       if (this->buf) {
6632 +                               toi_free_page(27, (unsigned long) this->buf);
6633 +                               this->buf = NULL;
6634 +                       }
6635 +               }
6636 +       }
6637 +}
6638 +
6639 +/*
6640 + * toi_crypto_initialise
6641 + *
6642 + * Prepare to do some work by allocating buffers and transforms.
6643 + * Returns: Int: Zero. Even if we can't set up checksum, we still
6644 + * seek to hibernate.
6645 + */
6646 +static int toi_checksum_initialise(int starting_cycle)
6647 +{
6648 +       int cpu;
6649 +
6650 +       if (!(starting_cycle & SYSFS_HIBERNATE) || !toi_checksum_ops.enabled)
6651 +               return 0;
6652 +
6653 +       if (!*toi_checksum_name) {
6654 +               printk(KERN_INFO "TuxOnIce: No checksum algorithm name set.\n");
6655 +               return 1;
6656 +       }
6657 +
6658 +       for_each_online_cpu(cpu) {
6659 +               struct cpu_context *this = &per_cpu(contexts, cpu);
6660 +               struct page *page;
6661 +
6662 +               this->transform = crypto_alloc_hash(toi_checksum_name, 0, 0);
6663 +               if (IS_ERR(this->transform)) {
6664 +                       printk(KERN_INFO "TuxOnIce: Failed to initialise the "
6665 +                               "%s checksum algorithm: %ld.\n",
6666 +                               toi_checksum_name, (long) this->transform);
6667 +                       this->transform = NULL;
6668 +                       return 1;
6669 +               }
6670 +
6671 +               this->desc.tfm = this->transform;
6672 +               this->desc.flags = 0;
6673 +
6674 +               page = toi_alloc_page(27, GFP_KERNEL);
6675 +               if (!page)
6676 +                       return 1;
6677 +               this->buf = page_address(page);
6678 +               sg_init_one(&this->sg[0], this->buf, PAGE_SIZE);
6679 +       }
6680 +       return 0;
6681 +}
6682 +
6683 +/*
6684 + * toi_checksum_print_debug_stats
6685 + * @buffer: Pointer to a buffer into which the debug info will be printed.
6686 + * @size: Size of the buffer.
6687 + *
6688 + * Print information to be recorded for debugging purposes into a buffer.
6689 + * Returns: Number of characters written to the buffer.
6690 + */
6691 +
6692 +static int toi_checksum_print_debug_stats(char *buffer, int size)
6693 +{
6694 +       int len;
6695 +
6696 +       if (!toi_checksum_ops.enabled)
6697 +               return scnprintf(buffer, size,
6698 +                       "- Checksumming disabled.\n");
6699 +
6700 +       len = scnprintf(buffer, size, "- Checksum method is '%s'.\n",
6701 +                       toi_checksum_name);
6702 +       len += scnprintf(buffer + len, size - len,
6703 +               "  %d pages resaved in atomic copy.\n", toi_num_resaved);
6704 +       return len;
6705 +}
6706 +
6707 +static int toi_checksum_memory_needed(void)
6708 +{
6709 +       return toi_checksum_ops.enabled ?
6710 +               checksum_pages_needed() << PAGE_SHIFT : 0;
6711 +}
6712 +
6713 +static int toi_checksum_storage_needed(void)
6714 +{
6715 +       if (toi_checksum_ops.enabled)
6716 +               return strlen(toi_checksum_name) + sizeof(int) + 1;
6717 +       else
6718 +               return 0;
6719 +}
6720 +
6721 +/*
6722 + * toi_checksum_save_config_info
6723 + * @buffer: Pointer to a buffer of size PAGE_SIZE.
6724 + *
6725 + * Save informaton needed when reloading the image at resume time.
6726 + * Returns: Number of bytes used for saving our data.
6727 + */
6728 +static int toi_checksum_save_config_info(char *buffer)
6729 +{
6730 +       int namelen = strlen(toi_checksum_name) + 1;
6731 +       int total_len;
6732 +
6733 +       *((unsigned int *) buffer) = namelen;
6734 +       strncpy(buffer + sizeof(unsigned int), toi_checksum_name, namelen);
6735 +       total_len = sizeof(unsigned int) + namelen;
6736 +       return total_len;
6737 +}
6738 +
6739 +/* toi_checksum_load_config_info
6740 + * @buffer: Pointer to the start of the data.
6741 + * @size: Number of bytes that were saved.
6742 + *
6743 + * Description:        Reload information needed for dechecksuming the image at
6744 + * resume time.
6745 + */
6746 +static void toi_checksum_load_config_info(char *buffer, int size)
6747 +{
6748 +       int namelen;
6749 +
6750 +       namelen = *((unsigned int *) (buffer));
6751 +       strncpy(toi_checksum_name, buffer + sizeof(unsigned int),
6752 +                       namelen);
6753 +       return;
6754 +}
6755 +
6756 +/*
6757 + * Free Checksum Memory
6758 + */
6759 +
6760 +void free_checksum_pages(void)
6761 +{
6762 +       while (pages_allocated) {
6763 +               unsigned long next = *((unsigned long *) page_list);
6764 +               ClearPageNosave(virt_to_page(page_list));
6765 +               toi_free_page(15, (unsigned long) page_list);
6766 +               page_list = next;
6767 +               pages_allocated--;
6768 +       }
6769 +}
6770 +
6771 +/*
6772 + * Allocate Checksum Memory
6773 + */
6774 +
6775 +int allocate_checksum_pages(void)
6776 +{
6777 +       int pages_needed = checksum_pages_needed();
6778 +
6779 +       if (!toi_checksum_ops.enabled)
6780 +               return 0;
6781 +
6782 +       while (pages_allocated < pages_needed) {
6783 +               unsigned long *new_page =
6784 +                 (unsigned long *) toi_get_zeroed_page(15, TOI_ATOMIC_GFP);
6785 +               if (!new_page) {
6786 +                       printk("Unable to allocate checksum pages.\n");
6787 +                       return -ENOMEM;
6788 +               }
6789 +               SetPageNosave(virt_to_page(new_page));
6790 +               (*new_page) = page_list;
6791 +               page_list = (unsigned long) new_page;
6792 +               pages_allocated++;
6793 +       }
6794 +
6795 +       next_page = (unsigned long) page_list;
6796 +       checksum_index = 0;
6797 +
6798 +       return 0;
6799 +}
6800 +
6801 +char *tuxonice_get_next_checksum(void)
6802 +{
6803 +       if (!toi_checksum_ops.enabled)
6804 +               return NULL;
6805 +
6806 +       if (checksum_index % CHECKSUMS_PER_PAGE)
6807 +               this_checksum += CHECKSUM_SIZE;
6808 +       else {
6809 +               this_checksum = next_page + sizeof(void *);
6810 +               next_page = *((unsigned long *) next_page);
6811 +       }
6812 +
6813 +       checksum_index++;
6814 +       return (char *) this_checksum;
6815 +}
6816 +
6817 +int tuxonice_calc_checksum(struct page *page, char *checksum_locn)
6818 +{
6819 +       char *pa;
6820 +       int result, cpu = smp_processor_id();
6821 +       struct cpu_context *ctx = &per_cpu(contexts, cpu);
6822 +
6823 +       if (!toi_checksum_ops.enabled)
6824 +               return 0;
6825 +
6826 +       pa = kmap(page);
6827 +       memcpy(ctx->buf, pa, PAGE_SIZE);
6828 +       kunmap(page);
6829 +       result = crypto_hash_digest(&ctx->desc, ctx->sg, PAGE_SIZE,
6830 +                                               checksum_locn);
6831 +       return result;
6832 +}
6833 +/*
6834 + * Calculate checksums
6835 + */
6836 +
6837 +void check_checksums(void)
6838 +{
6839 +       int pfn, index = 0, cpu = smp_processor_id();
6840 +       char current_checksum[CHECKSUM_SIZE];
6841 +       struct cpu_context *ctx = &per_cpu(contexts, cpu);
6842 +
6843 +       if (!toi_checksum_ops.enabled)
6844 +               return;
6845 +
6846 +       next_page = (unsigned long) page_list;
6847 +
6848 +       toi_num_resaved = 0;
6849 +       this_checksum = 0;
6850 +
6851 +       BITMAP_FOR_EACH_SET(pageset2_map, pfn) {
6852 +               int ret;
6853 +               char *pa;
6854 +               struct page *page = pfn_to_page(pfn);
6855 +
6856 +               if (index % CHECKSUMS_PER_PAGE) {
6857 +                       this_checksum += CHECKSUM_SIZE;
6858 +               } else {
6859 +                       this_checksum = next_page + sizeof(void *);
6860 +                       next_page = *((unsigned long *) next_page);
6861 +               }
6862 +
6863 +               /* Done when IRQs disabled so must be atomic */
6864 +               pa = kmap_atomic(page, KM_USER1);
6865 +               memcpy(ctx->buf, pa, PAGE_SIZE);
6866 +               kunmap_atomic(pa, KM_USER1);
6867 +               ret = crypto_hash_digest(&ctx->desc, ctx->sg, PAGE_SIZE,
6868 +                                                       current_checksum);
6869 +
6870 +               if (ret) {
6871 +                       printk(KERN_INFO "Digest failed. Returned %d.\n", ret);
6872 +                       return;
6873 +               }
6874 +
6875 +               if (memcmp(current_checksum, (char *) this_checksum,
6876 +                                                       CHECKSUM_SIZE)) {
6877 +                       SetPageResave(pfn_to_page(pfn));
6878 +                       toi_num_resaved++;
6879 +                       if (test_action_state(TOI_ABORT_ON_RESAVE_NEEDED))
6880 +                               set_abort_result(TOI_RESAVE_NEEDED);
6881 +               }
6882 +
6883 +               index++;
6884 +       }
6885 +}
6886 +
6887 +static struct toi_sysfs_data sysfs_params[] = {
6888 +       SYSFS_INT("enabled", SYSFS_RW, &toi_checksum_ops.enabled, 0, 1, 0,
6889 +                       NULL),
6890 +       SYSFS_BIT("abort_if_resave_needed", SYSFS_RW, &toi_bkd.toi_action,
6891 +                       TOI_ABORT_ON_RESAVE_NEEDED, 0)
6892 +};
6893 +
6894 +/*
6895 + * Ops structure.
6896 + */
6897 +static struct toi_module_ops toi_checksum_ops = {
6898 +       .type                   = MISC_MODULE,
6899 +       .name                   = "checksumming",
6900 +       .directory              = "checksum",
6901 +       .module                 = THIS_MODULE,
6902 +       .initialise             = toi_checksum_initialise,
6903 +       .cleanup                = toi_checksum_cleanup,
6904 +       .print_debug_info       = toi_checksum_print_debug_stats,
6905 +       .save_config_info       = toi_checksum_save_config_info,
6906 +       .load_config_info       = toi_checksum_load_config_info,
6907 +       .memory_needed          = toi_checksum_memory_needed,
6908 +       .storage_needed         = toi_checksum_storage_needed,
6909 +
6910 +       .sysfs_data             = sysfs_params,
6911 +       .num_sysfs_entries      = sizeof(sysfs_params) /
6912 +               sizeof(struct toi_sysfs_data),
6913 +};
6914 +
6915 +/* ---- Registration ---- */
6916 +int toi_checksum_init(void)
6917 +{
6918 +       int result = toi_register_module(&toi_checksum_ops);
6919 +       return result;
6920 +}
6921 +
6922 +void toi_checksum_exit(void)
6923 +{
6924 +       toi_unregister_module(&toi_checksum_ops);
6925 +}
6926 diff --git a/kernel/power/tuxonice_checksum.h b/kernel/power/tuxonice_checksum.h
6927 new file mode 100644
6928 index 0000000..84a9174
6929 --- /dev/null
6930 +++ b/kernel/power/tuxonice_checksum.h
6931 @@ -0,0 +1,32 @@
6932 +/*
6933 + * kernel/power/tuxonice_checksum.h
6934 + *
6935 + * Copyright (C) 2006-2008 Nigel Cunningham (nigel at tuxonice net)
6936 + * Copyright (C) 2006 Red Hat, inc.
6937 + *
6938 + * This file is released under the GPLv2.
6939 + *
6940 + * This file contains data checksum routines for TuxOnIce,
6941 + * using cryptoapi. They are used to locate any modifications
6942 + * made to pageset 2 while we're saving it.
6943 + */
6944 +
6945 +#if defined(CONFIG_TOI_CHECKSUM)
6946 +extern int toi_checksum_init(void);
6947 +extern void toi_checksum_exit(void);
6948 +void check_checksums(void);
6949 +int allocate_checksum_pages(void);
6950 +void free_checksum_pages(void);
6951 +char *tuxonice_get_next_checksum(void);
6952 +int tuxonice_calc_checksum(struct page *page, char *checksum_locn);
6953 +#else
6954 +static inline int toi_checksum_init(void) { return 0; }
6955 +static inline void toi_checksum_exit(void) { }
6956 +static inline void check_checksums(void) { };
6957 +static inline int allocate_checksum_pages(void) { return 0; };
6958 +static inline void free_checksum_pages(void) { };
6959 +static inline char *tuxonice_get_next_checksum(void) { return NULL; };
6960 +static inline int tuxonice_calc_checksum(struct page *page, char *checksum_locn)
6961 +       { return 0; }
6962 +#endif
6963 +
6964 diff --git a/kernel/power/tuxonice_cluster.c b/kernel/power/tuxonice_cluster.c
6965 new file mode 100644
6966 index 0000000..405a1a1
6967 --- /dev/null
6968 +++ b/kernel/power/tuxonice_cluster.c
6969 @@ -0,0 +1,1069 @@
6970 +/*
6971 + * kernel/power/tuxonice_cluster.c
6972 + *
6973 + * Copyright (C) 2006-2008 Nigel Cunningham (nigel at tuxonice net)
6974 + *
6975 + * This file is released under the GPLv2.
6976 + *
6977 + * This file contains routines for cluster hibernation support.
6978 + *
6979 + * Based on ip autoconfiguration code in net/ipv4/ipconfig.c.
6980 + *
6981 + * How does it work?
6982 + *
6983 + * There is no 'master' node that tells everyone else what to do. All nodes
6984 + * send messages to the broadcast address/port, maintain a list of peers
6985 + * and figure out when to progress to the next step in hibernating or resuming.
6986 + * This makes us more fault tolerant when it comes to nodes coming and going
6987 + * (which may be more of an issue if we're hibernating when power supplies
6988 + * are being unreliable).
6989 + *
6990 + * At boot time, we start a ktuxonice thread that handles communication with
6991 + * other nodes. This node maintains a state machine that controls our progress
6992 + * through hibernating and resuming, keeping us in step with other nodes. Nodes
6993 + * are identified by their hw address.
6994 + *
6995 + * On startup, the node sends CLUSTER_PING on the configured interface's
6996 + * broadcast address, port $toi_cluster_port (see below) and begins to listen
6997 + * for other broadcast messages. CLUSTER_PING messages are repeated at
6998 + * intervals of 5 minutes, with a random offset to spread traffic out.
6999 + *
7000 + * A hibernation cycle is initiated from any node via
7001 + *
7002 + * echo > /sys/power/tuxonice/do_hibernate
7003 + *
7004 + * and (possibily) the hibernate script. At each step of the process, the node
7005 + * completes its work, and waits for all other nodes to signal completion of
7006 + * their work (or timeout) before progressing to the next step.
7007 + *
7008 + * Request/state  Action before reply  Possible reply  Next state
7009 + * HIBERNATE     capable, pre-script   HIBERNATE|ACK   NODE_PREP
7010 + *                                     HIBERNATE|NACK  INIT_0
7011 + *
7012 + * PREP                  prepare_image         PREP|ACK        IMAGE_WRITE
7013 + *                                     PREP|NACK       INIT_0
7014 + *                                     ABORT           RUNNING
7015 + *
7016 + * IO            write image           IO|ACK          power off
7017 + *                                     ABORT           POST_RESUME
7018 + *
7019 + * (Boot time)   check for image       IMAGE|ACK       RESUME_PREP
7020 + *                                     (Note 1)
7021 + *                                     IMAGE|NACK      (Note 2)
7022 + *
7023 + * PREP                  prepare read image    PREP|ACK        IMAGE_READ
7024 + *                                     PREP|NACK       (As NACK_IMAGE)
7025 + *
7026 + * IO            read image            IO|ACK          POST_RESUME
7027 + *
7028 + * POST_RESUME   thaw, post-script                     RUNNING
7029 + *
7030 + * INIT_0        init 0
7031 + *
7032 + * Other messages:
7033 + *
7034 + * - PING: Request for all other live nodes to send a PONG. Used at startup to
7035 + *   announce presence, when a node is suspected dead and periodically, in case
7036 + *   segments of the network are [un]plugged.
7037 + *
7038 + * - PONG: Response to a PING.
7039 + *
7040 + * - ABORT: Request to cancel writing an image.
7041 + *
7042 + * - BYE: Notification that this node is shutting down.
7043 + *
7044 + * Note 1: Repeated at 3s intervals until we continue to boot/resume, so that
7045 + * nodes which are slower to start up can get state synchronised. If a node
7046 + * starting up sees other nodes sending RESUME_PREP or IMAGE_READ, it may send
7047 + * ACK_IMAGE and they will wait for it to catch up. If it sees ACK_READ, it
7048 + * must invalidate its image (if any) and boot normally.
7049 + *
7050 + * Note 2: May occur when one node lost power or powered off while others
7051 + * hibernated. This node waits for others to complete resuming (ACK_READ)
7052 + * before completing its boot, so that it appears as a fail node restarting.
7053 + *
7054 + * If any node has an image, then it also has a list of nodes that hibernated
7055 + * in synchronisation with it. The node will wait for other nodes to appear
7056 + * or timeout before beginning its restoration.
7057 + *
7058 + * If a node has no image, it needs to wait, in case other nodes which do have
7059 + * an image are going to resume, but are taking longer to announce their
7060 + * presence. For this reason, the user can specify a timeout value and a number
7061 + * of nodes detected before we just continue. (We might want to assume in a
7062 + * cluster of, say, 15 nodes, if 8 others have booted without finding an image,
7063 + * the remaining nodes will too. This might help in situations where some nodes
7064 + * are much slower to boot, or more subject to hardware failures or such like).
7065 + */
7066 +
7067 +#include <linux/suspend.h>
7068 +#include <linux/module.h>
7069 +#include <linux/moduleparam.h>
7070 +#include <linux/if.h>
7071 +#include <linux/rtnetlink.h>
7072 +#include <linux/ip.h>
7073 +#include <linux/udp.h>
7074 +#include <linux/in.h>
7075 +#include <linux/if_arp.h>
7076 +#include <linux/kthread.h>
7077 +#include <linux/wait.h>
7078 +#include <linux/netdevice.h>
7079 +#include <net/ip.h>
7080 +
7081 +#include "tuxonice.h"
7082 +#include "tuxonice_modules.h"
7083 +#include "tuxonice_sysfs.h"
7084 +#include "tuxonice_alloc.h"
7085 +#include "tuxonice_io.h"
7086 +
7087 +#if 1
7088 +#define PRINTK(a, b...) do { printk(a, ##b); } while (0)
7089 +#else
7090 +#define PRINTK(a, b...) do { } while (0)
7091 +#endif
7092 +
7093 +static int loopback_mode;
7094 +static int num_local_nodes = 1;
7095 +#define MAX_LOCAL_NODES 8
7096 +#define SADDR (loopback_mode ? b->sid : h->saddr)
7097 +
7098 +#define MYNAME "TuxOnIce Clustering"
7099 +
7100 +enum cluster_message {
7101 +       MSG_ACK = 1,
7102 +       MSG_NACK = 2,
7103 +       MSG_PING = 4,
7104 +       MSG_ABORT = 8,
7105 +       MSG_BYE = 16,
7106 +       MSG_HIBERNATE = 32,
7107 +       MSG_IMAGE = 64,
7108 +       MSG_IO = 128,
7109 +       MSG_RUNNING = 256
7110 +};
7111 +
7112 +static char *str_message(int message)
7113 +{
7114 +       switch (message) {
7115 +       case 4:
7116 +               return "Ping";
7117 +       case 8:
7118 +               return "Abort";
7119 +       case 9:
7120 +               return "Abort acked";
7121 +       case 10:
7122 +               return "Abort nacked";
7123 +       case 16:
7124 +               return "Bye";
7125 +       case 17:
7126 +               return "Bye acked";
7127 +       case 18:
7128 +               return "Bye nacked";
7129 +       case 32:
7130 +               return "Hibernate request";
7131 +       case 33:
7132 +               return "Hibernate ack";
7133 +       case 34:
7134 +               return "Hibernate nack";
7135 +       case 64:
7136 +               return "Image exists?";
7137 +       case 65:
7138 +               return "Image does exist";
7139 +       case 66:
7140 +               return "No image here";
7141 +       case 128:
7142 +               return "I/O";
7143 +       case 129:
7144 +               return "I/O okay";
7145 +       case 130:
7146 +               return "I/O failed";
7147 +       case 256:
7148 +               return "Running";
7149 +       default:
7150 +               printk("Unrecognised message %d.\n", message);
7151 +               return "Unrecognised message (see dmesg)";
7152 +       }
7153 +}
7154 +
7155 +#define MSG_ACK_MASK (MSG_ACK | MSG_NACK)
7156 +#define MSG_STATE_MASK (~MSG_ACK_MASK)
7157 +
7158 +struct node_info {
7159 +       struct list_head member_list;
7160 +       wait_queue_head_t member_events;
7161 +       spinlock_t member_list_lock;
7162 +       spinlock_t receive_lock;
7163 +       int peer_count, ignored_peer_count;
7164 +       struct toi_sysfs_data sysfs_data;
7165 +       enum cluster_message current_message;
7166 +};
7167 +
7168 +struct node_info node_array[MAX_LOCAL_NODES];
7169 +
7170 +struct cluster_member {
7171 +       __be32 addr;
7172 +       enum cluster_message message;
7173 +       struct list_head list;
7174 +       int ignore;
7175 +};
7176 +
7177 +#define toi_cluster_port_send 3501
7178 +#define toi_cluster_port_recv 3502
7179 +
7180 +static struct net_device *net_dev;
7181 +static struct toi_module_ops toi_cluster_ops;
7182 +
7183 +static int toi_recv(struct sk_buff *skb, struct net_device *dev,
7184 +               struct packet_type *pt, struct net_device *orig_dev);
7185 +
7186 +static struct packet_type toi_cluster_packet_type = {
7187 +       .type = __constant_htons(ETH_P_IP),
7188 +       .func = toi_recv,
7189 +};
7190 +
7191 +struct toi_pkt {               /* BOOTP packet format */
7192 +       struct iphdr iph;       /* IP header */
7193 +       struct udphdr udph;     /* UDP header */
7194 +       u8 htype;               /* HW address type */
7195 +       u8 hlen;                /* HW address length */
7196 +       __be32 xid;             /* Transaction ID */
7197 +       __be16 secs;            /* Seconds since we started */
7198 +       __be16 flags;           /* Just what it says */
7199 +       u8 hw_addr[16];         /* Sender's HW address */
7200 +       u16 message;            /* Message */
7201 +       unsigned long sid;      /* Source ID for loopback testing */
7202 +};
7203 +
7204 +static char toi_cluster_iface[IFNAMSIZ] = CONFIG_TOI_DEFAULT_CLUSTER_INTERFACE;
7205 +
7206 +static int added_pack;
7207 +
7208 +static int others_have_image;
7209 +
7210 +/* Key used to allow multiple clusters on the same lan */
7211 +static char toi_cluster_key[32] = CONFIG_TOI_DEFAULT_CLUSTER_KEY;
7212 +static char pre_hibernate_script[255] =
7213 +       CONFIG_TOI_DEFAULT_CLUSTER_PRE_HIBERNATE;
7214 +static char post_hibernate_script[255] =
7215 +       CONFIG_TOI_DEFAULT_CLUSTER_POST_HIBERNATE;
7216 +
7217 +/*                     List of cluster members                 */
7218 +static unsigned long continue_delay = 5 * HZ;
7219 +static unsigned long cluster_message_timeout = 3 * HZ;
7220 +
7221 +/*             === Membership list ===         */
7222 +
7223 +static void print_member_info(int index)
7224 +{
7225 +       struct cluster_member *this;
7226 +
7227 +       printk(KERN_INFO "==> Dumping node %d.\n", index);
7228 +
7229 +       list_for_each_entry(this, &node_array[index].member_list, list)
7230 +               printk(KERN_INFO "%d.%d.%d.%d last message %s. %s\n",
7231 +                               NIPQUAD(this->addr),
7232 +                               str_message(this->message),
7233 +                               this->ignore ? "(Ignored)" : "");
7234 +       printk(KERN_INFO "== Done ==\n");
7235 +}
7236 +
7237 +static struct cluster_member *__find_member(int index, __be32 addr)
7238 +{
7239 +       struct cluster_member *this;
7240 +
7241 +       list_for_each_entry(this, &node_array[index].member_list, list) {
7242 +               if (this->addr != addr)
7243 +                       continue;
7244 +
7245 +               return this;
7246 +       }
7247 +
7248 +       return NULL;
7249 +}
7250 +
7251 +static void set_ignore(int index, __be32 addr, struct cluster_member *this)
7252 +{
7253 +       if (this->ignore) {
7254 +               PRINTK("Node %d already ignoring %d.%d.%d.%d.\n",
7255 +                               index, NIPQUAD(addr));
7256 +               return;
7257 +       }
7258 +
7259 +       PRINTK("Node %d sees node %d.%d.%d.%d now being ignored.\n",
7260 +                               index, NIPQUAD(addr));
7261 +       this->ignore = 1;
7262 +       node_array[index].ignored_peer_count++;
7263 +}
7264 +
7265 +static int __add_update_member(int index, __be32 addr, int message)
7266 +{
7267 +       struct cluster_member *this;
7268 +
7269 +       this = __find_member(index, addr);
7270 +       if (this) {
7271 +               if (this->message != message) {
7272 +                       this->message = message;
7273 +                       if ((message & MSG_NACK) &&
7274 +                           (message & (MSG_HIBERNATE | MSG_IMAGE | MSG_IO)))
7275 +                               set_ignore(index, addr, this);
7276 +                       PRINTK("Node %d sees node %d.%d.%d.%d now sending "
7277 +                                       "%s.\n", index, NIPQUAD(addr),
7278 +                                       str_message(message));
7279 +                       wake_up(&node_array[index].member_events);
7280 +               }
7281 +               return 0;
7282 +       }
7283 +
7284 +       this = (struct cluster_member *) toi_kzalloc(36,
7285 +                       sizeof(struct cluster_member), GFP_KERNEL);
7286 +
7287 +       if (!this)
7288 +               return -1;
7289 +
7290 +       this->addr = addr;
7291 +       this->message = message;
7292 +       this->ignore = 0;
7293 +       INIT_LIST_HEAD(&this->list);
7294 +
7295 +       node_array[index].peer_count++;
7296 +
7297 +       PRINTK("Node %d sees node %d.%d.%d.%d sending %s.\n", index,
7298 +                       NIPQUAD(addr), str_message(message));
7299 +
7300 +       if ((message & MSG_NACK) &&
7301 +           (message & (MSG_HIBERNATE | MSG_IMAGE | MSG_IO)))
7302 +               set_ignore(index, addr, this);
7303 +       list_add_tail(&this->list, &node_array[index].member_list);
7304 +       return 1;
7305 +}
7306 +
7307 +static int add_update_member(int index, __be32 addr, int message)
7308 +{
7309 +       int result;
7310 +       unsigned long flags;
7311 +       spin_lock_irqsave(&node_array[index].member_list_lock, flags);
7312 +       result = __add_update_member(index, addr, message);
7313 +       spin_unlock_irqrestore(&node_array[index].member_list_lock, flags);
7314 +
7315 +       print_member_info(index);
7316 +
7317 +       wake_up(&node_array[index].member_events);
7318 +
7319 +       return result;
7320 +}
7321 +
7322 +static void del_member(int index, __be32 addr)
7323 +{
7324 +       struct cluster_member *this;
7325 +       unsigned long flags;
7326 +
7327 +       spin_lock_irqsave(&node_array[index].member_list_lock, flags);
7328 +       this = __find_member(index, addr);
7329 +
7330 +       if (this) {
7331 +               list_del_init(&this->list);
7332 +               toi_kfree(36, this);
7333 +               node_array[index].peer_count--;
7334 +       }
7335 +
7336 +       spin_unlock_irqrestore(&node_array[index].member_list_lock, flags);
7337 +}
7338 +
7339 +/*             === Message transmission ===    */
7340 +
7341 +static void toi_send_if(int message, unsigned long my_id);
7342 +
7343 +/*
7344 + *  Process received TOI packet.
7345 + */
7346 +static int toi_recv(struct sk_buff *skb, struct net_device *dev,
7347 +               struct packet_type *pt, struct net_device *orig_dev)
7348 +{
7349 +       struct toi_pkt *b;
7350 +       struct iphdr *h;
7351 +       int len, result, index;
7352 +       unsigned long addr, message, ack;
7353 +
7354 +       /* Perform verifications before taking the lock.  */
7355 +       if (skb->pkt_type == PACKET_OTHERHOST)
7356 +               goto drop;
7357 +
7358 +       if (dev != net_dev)
7359 +               goto drop;
7360 +
7361 +       skb = skb_share_check(skb, GFP_ATOMIC);
7362 +       if (!skb)
7363 +               return NET_RX_DROP;
7364 +
7365 +       if (!pskb_may_pull(skb,
7366 +                          sizeof(struct iphdr) +
7367 +                          sizeof(struct udphdr)))
7368 +               goto drop;
7369 +
7370 +       b = (struct toi_pkt *)skb_network_header(skb);
7371 +       h = &b->iph;
7372 +
7373 +       if (h->ihl != 5 || h->version != 4 || h->protocol != IPPROTO_UDP)
7374 +               goto drop;
7375 +
7376 +       /* Fragments are not supported */
7377 +       if (h->frag_off & htons(IP_OFFSET | IP_MF)) {
7378 +               if (net_ratelimit())
7379 +                       printk(KERN_ERR "TuxOnIce: Ignoring fragmented "
7380 +                              "cluster message.\n");
7381 +               goto drop;
7382 +       }
7383 +
7384 +       if (skb->len < ntohs(h->tot_len))
7385 +               goto drop;
7386 +
7387 +       if (ip_fast_csum((char *) h, h->ihl))
7388 +               goto drop;
7389 +
7390 +       if (b->udph.source != htons(toi_cluster_port_send) ||
7391 +           b->udph.dest != htons(toi_cluster_port_recv))
7392 +               goto drop;
7393 +
7394 +       if (ntohs(h->tot_len) < ntohs(b->udph.len) + sizeof(struct iphdr))
7395 +               goto drop;
7396 +
7397 +       len = ntohs(b->udph.len) - sizeof(struct udphdr);
7398 +
7399 +       /* Ok the front looks good, make sure we can get at the rest.  */
7400 +       if (!pskb_may_pull(skb, skb->len))
7401 +               goto drop;
7402 +
7403 +       b = (struct toi_pkt *)skb_network_header(skb);
7404 +       h = &b->iph;
7405 +
7406 +       addr = SADDR;
7407 +       PRINTK(">>> Message %s received from " NIPQUAD_FMT ".\n",
7408 +                       str_message(b->message), NIPQUAD(addr));
7409 +
7410 +       message = b->message & MSG_STATE_MASK;
7411 +       ack = b->message & MSG_ACK_MASK;
7412 +
7413 +       for (index = 0; index < num_local_nodes; index++) {
7414 +               int new_message = node_array[index].current_message,
7415 +                   old_message = new_message;
7416 +
7417 +               if (index == SADDR || !old_message) {
7418 +                       PRINTK("Ignoring node %d (offline or self).\n", index);
7419 +                       continue;
7420 +               }
7421 +
7422 +               /* One message at a time, please. */
7423 +               spin_lock(&node_array[index].receive_lock);
7424 +
7425 +               result = add_update_member(index, SADDR, b->message);
7426 +               if (result == -1) {
7427 +                       printk(KERN_INFO "Failed to add new cluster member "
7428 +                                       NIPQUAD_FMT ".\n",
7429 +                                       NIPQUAD(addr));
7430 +                       goto drop_unlock;
7431 +               }
7432 +
7433 +               switch (b->message & MSG_STATE_MASK) {
7434 +               case MSG_PING:
7435 +                       break;
7436 +               case MSG_ABORT:
7437 +                       break;
7438 +               case MSG_BYE:
7439 +                       break;
7440 +               case MSG_HIBERNATE:
7441 +                       /* Can I hibernate? */
7442 +                       new_message = MSG_HIBERNATE |
7443 +                               ((index & 1) ? MSG_NACK : MSG_ACK);
7444 +                       break;
7445 +               case MSG_IMAGE:
7446 +                       /* Can I resume? */
7447 +                       new_message = MSG_IMAGE |
7448 +                               ((index & 1) ? MSG_NACK : MSG_ACK);
7449 +                       if (new_message != old_message)
7450 +                               printk("Setting whether I can resume to %d.\n",
7451 +                                               new_message);
7452 +                       break;
7453 +               case MSG_IO:
7454 +                       new_message = MSG_IO | MSG_ACK;
7455 +                       break;
7456 +               case MSG_RUNNING:
7457 +                       break;
7458 +               default:
7459 +                       if (net_ratelimit())
7460 +                               printk(KERN_ERR "Unrecognised TuxOnIce cluster"
7461 +                                       " message %d from " NIPQUAD_FMT ".\n",
7462 +                                       b->message, NIPQUAD(addr));
7463 +               };
7464 +
7465 +               if (old_message != new_message) {
7466 +                       node_array[index].current_message = new_message;
7467 +                       printk(KERN_INFO ">>> Sending new message for node "
7468 +                                       "%d.\n", index);
7469 +                       toi_send_if(new_message, index);
7470 +               } else if (!ack) {
7471 +                       printk(KERN_INFO ">>> Resending message for node %d.\n",
7472 +                                       index);
7473 +                       toi_send_if(new_message, index);
7474 +               }
7475 +drop_unlock:
7476 +               spin_unlock(&node_array[index].receive_lock);
7477 +       };
7478 +
7479 +drop:
7480 +       /* Throw the packet out. */
7481 +       kfree_skb(skb);
7482 +
7483 +       return 0;
7484 +}
7485 +
7486 +/*
7487 + *  Send cluster message to single interface.
7488 + */
7489 +static void toi_send_if(int message, unsigned long my_id)
7490 +{
7491 +       struct sk_buff *skb;
7492 +       struct toi_pkt *b;
7493 +       int hh_len = LL_RESERVED_SPACE(net_dev);
7494 +       struct iphdr *h;
7495 +
7496 +       /* Allocate packet */
7497 +       skb = alloc_skb(sizeof(struct toi_pkt) + hh_len + 15, GFP_KERNEL);
7498 +       if (!skb)
7499 +               return;
7500 +       skb_reserve(skb, hh_len);
7501 +       b = (struct toi_pkt *) skb_put(skb, sizeof(struct toi_pkt));
7502 +       memset(b, 0, sizeof(struct toi_pkt));
7503 +
7504 +       /* Construct IP header */
7505 +       skb_reset_network_header(skb);
7506 +       h = ip_hdr(skb);
7507 +       h->version = 4;
7508 +       h->ihl = 5;
7509 +       h->tot_len = htons(sizeof(struct toi_pkt));
7510 +       h->frag_off = htons(IP_DF);
7511 +       h->ttl = 64;
7512 +       h->protocol = IPPROTO_UDP;
7513 +       h->daddr = htonl(INADDR_BROADCAST);
7514 +       h->check = ip_fast_csum((unsigned char *) h, h->ihl);
7515 +
7516 +       /* Construct UDP header */
7517 +       b->udph.source = htons(toi_cluster_port_send);
7518 +       b->udph.dest = htons(toi_cluster_port_recv);
7519 +       b->udph.len = htons(sizeof(struct toi_pkt) - sizeof(struct iphdr));
7520 +       /* UDP checksum not calculated -- explicitly allowed in BOOTP RFC */
7521 +
7522 +       /* Construct message */
7523 +       b->message = message;
7524 +       b->sid = my_id;
7525 +       b->htype = net_dev->type; /* can cause undefined behavior */
7526 +       b->hlen = net_dev->addr_len;
7527 +       memcpy(b->hw_addr, net_dev->dev_addr, net_dev->addr_len);
7528 +       b->secs = htons(3); /* 3 seconds */
7529 +
7530 +       /* Chain packet down the line... */
7531 +       skb->dev = net_dev;
7532 +       skb->protocol = htons(ETH_P_IP);
7533 +       if ((dev_hard_header(skb, net_dev, ntohs(skb->protocol),
7534 +                    net_dev->broadcast, net_dev->dev_addr, skb->len) < 0) ||
7535 +                       dev_queue_xmit(skb) < 0)
7536 +               printk(KERN_INFO "E");
7537 +}
7538 +
7539 +/*     =========================================               */
7540 +
7541 +/*                     kTOICluster                     */
7542 +
7543 +static atomic_t num_cluster_threads;
7544 +static DECLARE_WAIT_QUEUE_HEAD(clusterd_events);
7545 +
7546 +static int kTOICluster(void *data)
7547 +{
7548 +       unsigned long my_id;
7549 +
7550 +       my_id = atomic_add_return(1, &num_cluster_threads) - 1;
7551 +       node_array[my_id].current_message = (unsigned long) data;
7552 +
7553 +       PRINTK("kTOICluster daemon %lu starting.\n", my_id);
7554 +
7555 +       current->flags |= PF_NOFREEZE;
7556 +
7557 +       while (node_array[my_id].current_message) {
7558 +               toi_send_if(node_array[my_id].current_message, my_id);
7559 +               sleep_on_timeout(&clusterd_events,
7560 +                               cluster_message_timeout);
7561 +               PRINTK("Link state %lu is %d.\n", my_id,
7562 +                               node_array[my_id].current_message);
7563 +       }
7564 +
7565 +       toi_send_if(MSG_BYE, my_id);
7566 +       atomic_dec(&num_cluster_threads);
7567 +       wake_up(&clusterd_events);
7568 +
7569 +       PRINTK("kTOICluster daemon %lu exiting.\n", my_id);
7570 +       __set_current_state(TASK_RUNNING);
7571 +       return 0;
7572 +}
7573 +
7574 +static void kill_clusterd(void)
7575 +{
7576 +       int i;
7577 +
7578 +       for (i = 0; i < num_local_nodes; i++) {
7579 +               if (node_array[i].current_message) {
7580 +                       PRINTK("Seeking to kill clusterd %d.\n", i);
7581 +                       node_array[i].current_message = 0;
7582 +               }
7583 +       }
7584 +       wait_event(clusterd_events,
7585 +                       !atomic_read(&num_cluster_threads));
7586 +       PRINTK("All cluster daemons have exited.\n");
7587 +}
7588 +
7589 +static int peers_not_in_message(int index, int message, int precise)
7590 +{
7591 +       struct cluster_member *this;
7592 +       unsigned long flags;
7593 +       int result = 0;
7594 +
7595 +       spin_lock_irqsave(&node_array[index].member_list_lock, flags);
7596 +       list_for_each_entry(this, &node_array[index].member_list, list) {
7597 +               if (this->ignore)
7598 +                       continue;
7599 +
7600 +               PRINTK("Peer %d.%d.%d.%d sending %s. "
7601 +                       "Seeking %s.\n",
7602 +                       NIPQUAD(this->addr),
7603 +                       str_message(this->message), str_message(message));
7604 +               if ((precise ? this->message :
7605 +                                       this->message & MSG_STATE_MASK) !=
7606 +                                       message)
7607 +                       result++;
7608 +       }
7609 +       spin_unlock_irqrestore(&node_array[index].member_list_lock, flags);
7610 +       PRINTK("%d peers in sought message.\n", result);
7611 +       return result;
7612 +}
7613 +
7614 +static void reset_ignored(int index)
7615 +{
7616 +       struct cluster_member *this;
7617 +       unsigned long flags;
7618 +
7619 +       spin_lock_irqsave(&node_array[index].member_list_lock, flags);
7620 +       list_for_each_entry(this, &node_array[index].member_list, list)
7621 +               this->ignore = 0;
7622 +       node_array[index].ignored_peer_count = 0;
7623 +       spin_unlock_irqrestore(&node_array[index].member_list_lock, flags);
7624 +}
7625 +
7626 +static int peers_in_message(int index, int message, int precise)
7627 +{
7628 +       return node_array[index].peer_count -
7629 +               node_array[index].ignored_peer_count -
7630 +               peers_not_in_message(index, message, precise);
7631 +}
7632 +
7633 +static int time_to_continue(int index, unsigned long start, int message)
7634 +{
7635 +       int first = peers_not_in_message(index, message, 0);
7636 +       int second = peers_in_message(index, message, 1);
7637 +
7638 +       PRINTK("First part returns %d, second returns %d.\n", first, second);
7639 +
7640 +       if (!first && !second) {
7641 +               PRINTK("All peers answered message %d.\n",
7642 +                       message);
7643 +               return 1;
7644 +       }
7645 +
7646 +       if (time_after(jiffies, start + continue_delay)) {
7647 +               PRINTK("Timeout reached.\n");
7648 +               return 1;
7649 +       }
7650 +
7651 +       PRINTK("Not time to continue yet (%lu < %lu).\n", jiffies,
7652 +                       start + continue_delay);
7653 +       return 0;
7654 +}
7655 +
7656 +void toi_initiate_cluster_hibernate(void)
7657 +{
7658 +       int result;
7659 +       unsigned long start;
7660 +
7661 +       result = do_toi_step(STEP_HIBERNATE_PREPARE_IMAGE);
7662 +       if (result)
7663 +               return;
7664 +
7665 +       toi_send_if(MSG_HIBERNATE, 0);
7666 +
7667 +       start = jiffies;
7668 +       wait_event(node_array[0].member_events,
7669 +                       time_to_continue(0, start, MSG_HIBERNATE));
7670 +
7671 +       if (test_action_state(TOI_FREEZER_TEST)) {
7672 +               toi_send_if(MSG_ABORT, 0);
7673 +
7674 +               start = jiffies;
7675 +               wait_event(node_array[0].member_events,
7676 +                       time_to_continue(0, start, MSG_RUNNING));
7677 +
7678 +               do_toi_step(STEP_QUIET_CLEANUP);
7679 +               return;
7680 +       }
7681 +
7682 +       toi_send_if(MSG_IO, 0);
7683 +
7684 +       result = do_toi_step(STEP_HIBERNATE_SAVE_IMAGE);
7685 +       if (result)
7686 +               return;
7687 +
7688 +       /* This code runs at resume time too! */
7689 +       if (toi_in_hibernate)
7690 +               result = do_toi_step(STEP_HIBERNATE_POWERDOWN);
7691 +}
7692 +EXPORT_SYMBOL_GPL(toi_initiate_cluster_hibernate);
7693 +
7694 +/* toi_cluster_print_debug_stats
7695 + *
7696 + * Description:        Print information to be recorded for debugging purposes into a
7697 + *             buffer.
7698 + * Arguments:  buffer: Pointer to a buffer into which the debug info will be
7699 + *                     printed.
7700 + *             size:   Size of the buffer.
7701 + * Returns:    Number of characters written to the buffer.
7702 + */
7703 +static int toi_cluster_print_debug_stats(char *buffer, int size)
7704 +{
7705 +       int len;
7706 +
7707 +       if (strlen(toi_cluster_iface))
7708 +               len = scnprintf(buffer, size,
7709 +                               "- Cluster interface is '%s'.\n",
7710 +                               toi_cluster_iface);
7711 +       else
7712 +               len = scnprintf(buffer, size,
7713 +                               "- Cluster support is disabled.\n");
7714 +       return len;
7715 +}
7716 +
7717 +/* cluster_memory_needed
7718 + *
7719 + * Description:        Tell the caller how much memory we need to operate during
7720 + *             hibernate/resume.
7721 + * Returns:    Unsigned long. Maximum number of bytes of memory required for
7722 + *             operation.
7723 + */
7724 +static int toi_cluster_memory_needed(void)
7725 +{
7726 +       return 0;
7727 +}
7728 +
7729 +static int toi_cluster_storage_needed(void)
7730 +{
7731 +       return 1 + strlen(toi_cluster_iface);
7732 +}
7733 +
7734 +/* toi_cluster_save_config_info
7735 + *
7736 + * Description:        Save informaton needed when reloading the image at resume time.
7737 + * Arguments:  Buffer:         Pointer to a buffer of size PAGE_SIZE.
7738 + * Returns:    Number of bytes used for saving our data.
7739 + */
7740 +static int toi_cluster_save_config_info(char *buffer)
7741 +{
7742 +       strcpy(buffer, toi_cluster_iface);
7743 +       return strlen(toi_cluster_iface + 1);
7744 +}
7745 +
7746 +/* toi_cluster_load_config_info
7747 + *
7748 + * Description:        Reload information needed for declustering the image at
7749 + *             resume time.
7750 + * Arguments:  Buffer:         Pointer to the start of the data.
7751 + *             Size:           Number of bytes that were saved.
7752 + */
7753 +static void toi_cluster_load_config_info(char *buffer, int size)
7754 +{
7755 +       strncpy(toi_cluster_iface, buffer, size);
7756 +       return;
7757 +}
7758 +
7759 +static void cluster_startup(void)
7760 +{
7761 +       int have_image = do_check_can_resume(), i;
7762 +       unsigned long start = jiffies, initial_message;
7763 +       struct task_struct *p;
7764 +
7765 +       initial_message = MSG_IMAGE;
7766 +
7767 +       have_image = 1;
7768 +
7769 +       for (i = 0; i < num_local_nodes; i++) {
7770 +               PRINTK("Starting ktoiclusterd %d.\n", i);
7771 +               p = kthread_create(kTOICluster, (void *) initial_message,
7772 +                               "ktoiclusterd/%d", i);
7773 +               if (IS_ERR(p)) {
7774 +                       printk("Failed to start ktoiclusterd.\n");
7775 +                       return;
7776 +               }
7777 +
7778 +               wake_up_process(p);
7779 +       }
7780 +
7781 +       /* Wait for delay or someone else sending first message */
7782 +       wait_event(node_array[0].member_events, time_to_continue(0, start,
7783 +                               MSG_IMAGE));
7784 +
7785 +       others_have_image = peers_in_message(0, MSG_IMAGE | MSG_ACK, 1);
7786 +
7787 +       printk(KERN_INFO "Continuing. I %shave an image. Peers with image:"
7788 +               " %d.\n", have_image ? "" : "don't ", others_have_image);
7789 +
7790 +       if (have_image) {
7791 +               int result;
7792 +
7793 +               /* Start to resume */
7794 +               printk(KERN_INFO "  === Starting to resume ===  \n");
7795 +               node_array[0].current_message = MSG_IO;
7796 +               toi_send_if(MSG_IO, 0);
7797 +
7798 +               /* result = do_toi_step(STEP_RESUME_LOAD_PS1); */
7799 +               result = 0;
7800 +
7801 +               if (!result) {
7802 +                       /*
7803 +                        * Atomic restore - we'll come back in the hibernation
7804 +                        * path.
7805 +                        */
7806 +
7807 +                       /* result = do_toi_step(STEP_RESUME_DO_RESTORE); */
7808 +                       result = 0;
7809 +
7810 +                       /* do_toi_step(STEP_QUIET_CLEANUP); */
7811 +               }
7812 +
7813 +               node_array[0].current_message |= MSG_NACK;
7814 +
7815 +               /* For debugging - disable for real life? */
7816 +               wait_event(node_array[0].member_events,
7817 +                               time_to_continue(0, start, MSG_IO));
7818 +       }
7819 +
7820 +       if (others_have_image) {
7821 +               /* Wait for them to resume */
7822 +               printk(KERN_INFO "Waiting for other nodes to resume.\n");
7823 +               start = jiffies;
7824 +               wait_event(node_array[0].member_events,
7825 +                               time_to_continue(0, start, MSG_RUNNING));
7826 +               if (peers_not_in_message(0, MSG_RUNNING, 0))
7827 +                       printk(KERN_INFO "Timed out while waiting for other "
7828 +                                       "nodes to resume.\n");
7829 +       }
7830 +
7831 +       /* Find out whether an image exists here. Send ACK_IMAGE or NACK_IMAGE
7832 +        * as appropriate.
7833 +        *
7834 +        * If we don't have an image:
7835 +        * - Wait until someone else says they have one, or conditions are met
7836 +        *   for continuing to boot (n machines or t seconds).
7837 +        * - If anyone has an image, wait for them to resume before continuing
7838 +        *   to boot.
7839 +        *
7840 +        * If we have an image:
7841 +        * - Wait until conditions are met before continuing to resume (n
7842 +        *   machines or t seconds). Send RESUME_PREP and freeze processes.
7843 +        *   NACK_PREP if freezing fails (shouldn't) and follow logic for
7844 +        *   us having no image above. On success, wait for [N]ACK_PREP from
7845 +        *   other machines. Read image (including atomic restore) until done.
7846 +        *   Wait for ACK_READ from others (should never fail). Thaw processes
7847 +        *   and do post-resume. (The section after the atomic restore is done
7848 +        *   via the code for hibernating).
7849 +        */
7850 +
7851 +       node_array[0].current_message = MSG_RUNNING;
7852 +}
7853 +
7854 +/* toi_cluster_open_iface
7855 + *
7856 + * Description:        Prepare to use an interface.
7857 + */
7858 +
7859 +static int toi_cluster_open_iface(void)
7860 +{
7861 +       struct net_device *dev;
7862 +
7863 +       rtnl_lock();
7864 +
7865 +       for_each_netdev(&init_net, dev) {
7866 +               if (/* dev == &init_net.loopback_dev || */
7867 +                   strcmp(dev->name, toi_cluster_iface))
7868 +                       continue;
7869 +
7870 +               net_dev = dev;
7871 +               break;
7872 +       }
7873 +
7874 +       rtnl_unlock();
7875 +
7876 +       if (!net_dev) {
7877 +               printk(KERN_ERR MYNAME ": Device %s not found.\n",
7878 +                               toi_cluster_iface);
7879 +               return -ENODEV;
7880 +       }
7881 +
7882 +       dev_add_pack(&toi_cluster_packet_type);
7883 +       added_pack = 1;
7884 +
7885 +       loopback_mode = (net_dev == init_net.loopback_dev);
7886 +       num_local_nodes = loopback_mode ? 8 : 1;
7887 +
7888 +       PRINTK("Loopback mode is %s. Number of local nodes is %d.\n",
7889 +                       loopback_mode ? "on" : "off", num_local_nodes);
7890 +
7891 +       cluster_startup();
7892 +       return 0;
7893 +}
7894 +
7895 +/* toi_cluster_close_iface
7896 + *
7897 + * Description: Stop using an interface.
7898 + */
7899 +
7900 +static int toi_cluster_close_iface(void)
7901 +{
7902 +       kill_clusterd();
7903 +       if (added_pack) {
7904 +               dev_remove_pack(&toi_cluster_packet_type);
7905 +               added_pack = 0;
7906 +       }
7907 +       return 0;
7908 +}
7909 +
7910 +static void write_side_effect(void)
7911 +{
7912 +       if (toi_cluster_ops.enabled) {
7913 +               toi_cluster_open_iface();
7914 +               set_toi_state(TOI_CLUSTER_MODE);
7915 +       } else {
7916 +               toi_cluster_close_iface();
7917 +               clear_toi_state(TOI_CLUSTER_MODE);
7918 +       }
7919 +}
7920 +
7921 +static void node_write_side_effect(void)
7922 +{
7923 +}
7924 +
7925 +/*
7926 + * data for our sysfs entries.
7927 + */
7928 +static struct toi_sysfs_data sysfs_params[] = {
7929 +       SYSFS_STRING("interface", SYSFS_RW, toi_cluster_iface, IFNAMSIZ, 0,
7930 +                       NULL),
7931 +       SYSFS_INT("enabled", SYSFS_RW, &toi_cluster_ops.enabled, 0, 1, 0,
7932 +                       write_side_effect),
7933 +       SYSFS_STRING("cluster_name", SYSFS_RW, toi_cluster_key, 32, 0, NULL),
7934 +       SYSFS_STRING("pre-hibernate-script", SYSFS_RW, pre_hibernate_script,
7935 +                       256, 0, NULL),
7936 +       SYSFS_STRING("post-hibernate-script", SYSFS_RW, post_hibernate_script,
7937 +                       256, 0, STRING),
7938 +       SYSFS_UL("continue_delay", SYSFS_RW, &continue_delay, HZ / 2, 60 * HZ,
7939 +                       0)
7940 +};
7941 +
7942 +/*
7943 + * Ops structure.
7944 + */
7945 +
7946 +static struct toi_module_ops toi_cluster_ops = {
7947 +       .type                   = FILTER_MODULE,
7948 +       .name                   = "Cluster",
7949 +       .directory              = "cluster",
7950 +       .module                 = THIS_MODULE,
7951 +       .memory_needed          = toi_cluster_memory_needed,
7952 +       .print_debug_info       = toi_cluster_print_debug_stats,
7953 +       .save_config_info       = toi_cluster_save_config_info,
7954 +       .load_config_info       = toi_cluster_load_config_info,
7955 +       .storage_needed         = toi_cluster_storage_needed,
7956 +
7957 +       .sysfs_data             = sysfs_params,
7958 +       .num_sysfs_entries      = sizeof(sysfs_params) /
7959 +               sizeof(struct toi_sysfs_data),
7960 +};
7961 +
7962 +/* ---- Registration ---- */
7963 +
7964 +#ifdef MODULE
7965 +#define INIT static __init
7966 +#define EXIT static __exit
7967 +#else
7968 +#define INIT
7969 +#define EXIT
7970 +#endif
7971 +
7972 +INIT int toi_cluster_init(void)
7973 +{
7974 +       int temp = toi_register_module(&toi_cluster_ops), i;
7975 +       struct kobject *kobj = toi_cluster_ops.dir_kobj;
7976 +
7977 +       for (i = 0; i < MAX_LOCAL_NODES; i++) {
7978 +               node_array[i].current_message = 0;
7979 +               INIT_LIST_HEAD(&node_array[i].member_list);
7980 +               init_waitqueue_head(&node_array[i].member_events);
7981 +               spin_lock_init(&node_array[i].member_list_lock);
7982 +               spin_lock_init(&node_array[i].receive_lock);
7983 +
7984 +               /* Set up sysfs entry */
7985 +               node_array[i].sysfs_data.attr.name = toi_kzalloc(8,
7986 +                               sizeof(node_array[i].sysfs_data.attr.name),
7987 +                               GFP_KERNEL);
7988 +               sprintf((char *) node_array[i].sysfs_data.attr.name, "node_%d",
7989 +                               i);
7990 +               node_array[i].sysfs_data.attr.mode = SYSFS_RW;
7991 +               node_array[i].sysfs_data.type = TOI_SYSFS_DATA_INTEGER;
7992 +               node_array[i].sysfs_data.flags = 0;
7993 +               node_array[i].sysfs_data.data.integer.variable =
7994 +                       (int *) &node_array[i].current_message;
7995 +               node_array[i].sysfs_data.data.integer.minimum = 0;
7996 +               node_array[i].sysfs_data.data.integer.maximum = INT_MAX;
7997 +               node_array[i].sysfs_data.write_side_effect =
7998 +                       node_write_side_effect;
7999 +               toi_register_sysfs_file(kobj, &node_array[i].sysfs_data);
8000 +       }
8001 +
8002 +       toi_cluster_ops.enabled = (strlen(toi_cluster_iface) > 0);
8003 +
8004 +       if (toi_cluster_ops.enabled)
8005 +               toi_cluster_open_iface();
8006 +
8007 +       return temp;
8008 +}
8009 +
8010 +EXIT void toi_cluster_exit(void)
8011 +{
8012 +       int i;
8013 +       toi_cluster_close_iface();
8014 +
8015 +       for (i = 0; i < MAX_LOCAL_NODES; i++)
8016 +               toi_unregister_sysfs_file(toi_cluster_ops.dir_kobj,
8017 +                               &node_array[i].sysfs_data);
8018 +       toi_unregister_module(&toi_cluster_ops);
8019 +}
8020 +
8021 +static int __init toi_cluster_iface_setup(char *iface)
8022 +{
8023 +       toi_cluster_ops.enabled = (*iface &&
8024 +                       strcmp(iface, "off"));
8025 +
8026 +       if (toi_cluster_ops.enabled)
8027 +               strncpy(toi_cluster_iface, iface, strlen(iface));
8028 +}
8029 +
8030 +__setup("toi_cluster=", toi_cluster_iface_setup);
8031 +
8032 +#ifdef MODULE
8033 +MODULE_LICENSE("GPL");
8034 +module_init(toi_cluster_init);
8035 +module_exit(toi_cluster_exit);
8036 +MODULE_AUTHOR("Nigel Cunningham");
8037 +MODULE_DESCRIPTION("Cluster Support for TuxOnIce");
8038 +#endif
8039 diff --git a/kernel/power/tuxonice_cluster.h b/kernel/power/tuxonice_cluster.h
8040 new file mode 100644
8041 index 0000000..b0f8918
8042 --- /dev/null
8043 +++ b/kernel/power/tuxonice_cluster.h
8044 @@ -0,0 +1,19 @@
8045 +/*
8046 + * kernel/power/tuxonice_cluster.h
8047 + *
8048 + * Copyright (C) 2006-2008 Nigel Cunningham (nigel at tuxonice net)
8049 + * Copyright (C) 2006 Red Hat, inc.
8050 + *
8051 + * This file is released under the GPLv2.
8052 + */
8053 +
8054 +#ifdef CONFIG_TOI_CLUSTER
8055 +extern int toi_cluster_init(void);
8056 +extern void toi_cluster_exit(void);
8057 +extern void toi_initiate_cluster_hibernate(void);
8058 +#else
8059 +static inline int toi_cluster_init(void) { return 0; }
8060 +static inline void toi_cluster_exit(void) { }
8061 +static inline void toi_initiate_cluster_hibernate(void) { }
8062 +#endif
8063 +
8064 diff --git a/kernel/power/tuxonice_compress.c b/kernel/power/tuxonice_compress.c
8065 new file mode 100644
8066 index 0000000..2c934d3
8067 --- /dev/null
8068 +++ b/kernel/power/tuxonice_compress.c
8069 @@ -0,0 +1,432 @@
8070 +/*
8071 + * kernel/power/compression.c
8072 + *
8073 + * Copyright (C) 2003-2008 Nigel Cunningham (nigel at tuxonice net)
8074 + *
8075 + * This file is released under the GPLv2.
8076 + *
8077 + * This file contains data compression routines for TuxOnIce,
8078 + * using cryptoapi.
8079 + */
8080 +
8081 +#include <linux/module.h>
8082 +#include <linux/suspend.h>
8083 +#include <linux/highmem.h>
8084 +#include <linux/vmalloc.h>
8085 +#include <linux/crypto.h>
8086 +
8087 +#include "tuxonice_builtin.h"
8088 +#include "tuxonice.h"
8089 +#include "tuxonice_modules.h"
8090 +#include "tuxonice_sysfs.h"
8091 +#include "tuxonice_io.h"
8092 +#include "tuxonice_ui.h"
8093 +#include "tuxonice_alloc.h"
8094 +
8095 +static int toi_expected_compression;
8096 +
8097 +static struct toi_module_ops toi_compression_ops;
8098 +static struct toi_module_ops *next_driver;
8099 +
8100 +static char toi_compressor_name[32] = "lzf";
8101 +
8102 +static DEFINE_MUTEX(stats_lock);
8103 +
8104 +struct cpu_context {
8105 +       u8 *page_buffer;
8106 +       struct crypto_comp *transform;
8107 +       unsigned int len;
8108 +       char *buffer_start;
8109 +};
8110 +
8111 +static DEFINE_PER_CPU(struct cpu_context, contexts);
8112 +
8113 +static int toi_compress_prepare_result;
8114 +
8115 +/*
8116 + * toi_compress_cleanup
8117 + *
8118 + * Frees memory allocated for our labours.
8119 + */
8120 +static void toi_compress_cleanup(int toi_or_resume)
8121 +{
8122 +       int cpu;
8123 +
8124 +       if (!toi_or_resume)
8125 +               return;
8126 +
8127 +       for_each_online_cpu(cpu) {
8128 +               struct cpu_context *this = &per_cpu(contexts, cpu);
8129 +               if (this->transform) {
8130 +                       crypto_free_comp(this->transform);
8131 +                       this->transform = NULL;
8132 +               }
8133 +
8134 +               if (this->page_buffer)
8135 +                       toi_free_page(16, (unsigned long) this->page_buffer);
8136 +
8137 +               this->page_buffer = NULL;
8138 +       }
8139 +}
8140 +
8141 +/*
8142 + * toi_crypto_prepare
8143 + *
8144 + * Prepare to do some work by allocating buffers and transforms.
8145 + */
8146 +static int toi_compress_crypto_prepare(void)
8147 +{
8148 +       int cpu;
8149 +
8150 +       if (!*toi_compressor_name) {
8151 +               printk(KERN_INFO "TuxOnIce: Compression enabled but no "
8152 +                               "compressor name set.\n");
8153 +               return 1;
8154 +       }
8155 +
8156 +       for_each_online_cpu(cpu) {
8157 +               struct cpu_context *this = &per_cpu(contexts, cpu);
8158 +               this->transform = crypto_alloc_comp(toi_compressor_name, 0, 0);
8159 +               if (IS_ERR(this->transform)) {
8160 +                       printk(KERN_INFO "TuxOnIce: Failed to initialise the "
8161 +                                       "%s compression transform.\n",
8162 +                                       toi_compressor_name);
8163 +                       this->transform = NULL;
8164 +                       return 1;
8165 +               }
8166 +
8167 +               this->page_buffer =
8168 +                       (char *) toi_get_zeroed_page(16, TOI_ATOMIC_GFP);
8169 +
8170 +               if (!this->page_buffer) {
8171 +                       printk(KERN_ERR
8172 +                         "Failed to allocate a page buffer for TuxOnIce "
8173 +                         "encryption driver.\n");
8174 +                       return -ENOMEM;
8175 +               }
8176 +       }
8177 +
8178 +       return 0;
8179 +}
8180 +
8181 +/*
8182 + * toi_compress_init
8183 + */
8184 +
8185 +static int toi_compress_init(int toi_or_resume)
8186 +{
8187 +       if (!toi_or_resume)
8188 +               return 0;
8189 +
8190 +       toi_compress_bytes_in = toi_compress_bytes_out = 0;
8191 +
8192 +       next_driver = toi_get_next_filter(&toi_compression_ops);
8193 +
8194 +       if (!next_driver)
8195 +               return -ECHILD;
8196 +
8197 +       toi_compress_prepare_result = toi_compress_crypto_prepare();
8198 +
8199 +       return 0;
8200 +}
8201 +
8202 +/*
8203 + * toi_compress_rw_init()
8204 + */
8205 +
8206 +static int toi_compress_rw_init(int rw, int stream_number)
8207 +{
8208 +       if (toi_compress_prepare_result) {
8209 +               printk("Failed to initialise compression algorithm.\n");
8210 +               if (rw == READ)
8211 +                       return -ENODEV;
8212 +               else
8213 +                       toi_compression_ops.enabled = 0;
8214 +       }
8215 +
8216 +       return 0;
8217 +}
8218 +
8219 +/*
8220 + * toi_compress_write_page()
8221 + *
8222 + * Compress a page of data, buffering output and passing on filled
8223 + * pages to the next module in the pipeline.
8224 + *
8225 + * Buffer_page:        Pointer to a buffer of size PAGE_SIZE, containing
8226 + * data to be compressed.
8227 + *
8228 + * Returns:    0 on success. Otherwise the error is that returned by later
8229 + *             modules, -ECHILD if we have a broken pipeline or -EIO if
8230 + *             zlib errs.
8231 + */
8232 +static int toi_compress_write_page(unsigned long index,
8233 +               struct page *buffer_page, unsigned int buf_size)
8234 +{
8235 +       int ret, cpu = smp_processor_id();
8236 +       struct cpu_context *ctx = &per_cpu(contexts, cpu);
8237 +
8238 +       if (!ctx->transform)
8239 +               return next_driver->write_page(index, buffer_page, buf_size);
8240 +
8241 +       ctx->buffer_start = kmap(buffer_page);
8242 +
8243 +       ctx->len = buf_size;
8244 +
8245 +       ret = crypto_comp_compress(ctx->transform,
8246 +                       ctx->buffer_start, buf_size,
8247 +                       ctx->page_buffer, &ctx->len);
8248 +
8249 +       kunmap(buffer_page);
8250 +
8251 +       if (ret) {
8252 +               printk(KERN_INFO "Compression failed.\n");
8253 +               goto failure;
8254 +       }
8255 +
8256 +       mutex_lock(&stats_lock);
8257 +       toi_compress_bytes_in += buf_size;
8258 +       toi_compress_bytes_out += ctx->len;
8259 +       mutex_unlock(&stats_lock);
8260 +
8261 +       if (ctx->len < buf_size) /* some compression */
8262 +               ret = next_driver->write_page(index,
8263 +                               virt_to_page(ctx->page_buffer),
8264 +                               ctx->len);
8265 +       else
8266 +               ret = next_driver->write_page(index, buffer_page, buf_size);
8267 +
8268 +failure:
8269 +       return ret;
8270 +}
8271 +
8272 +/*
8273 + * toi_compress_read_page()
8274 + * @buffer_page: struct page *. Pointer to a buffer of size PAGE_SIZE.
8275 + *
8276 + * Retrieve data from later modules and decompress it until the input buffer
8277 + * is filled.
8278 + * Zero if successful. Error condition from me or from downstream on failure.
8279 + */
8280 +static int toi_compress_read_page(unsigned long *index,
8281 +               struct page *buffer_page, unsigned int *buf_size)
8282 +{
8283 +       int ret, cpu = smp_processor_id();
8284 +       unsigned int len;
8285 +       unsigned int outlen = PAGE_SIZE;
8286 +       char *buffer_start;
8287 +       struct cpu_context *ctx = &per_cpu(contexts, cpu);
8288 +
8289 +       if (!ctx->transform)
8290 +               return next_driver->read_page(index, buffer_page, buf_size);
8291 +
8292 +       /*
8293 +        * All our reads must be synchronous - we can't decompress
8294 +        * data that hasn't been read yet.
8295 +        */
8296 +
8297 +       *buf_size = PAGE_SIZE;
8298 +
8299 +       ret = next_driver->read_page(index, buffer_page, &len);
8300 +
8301 +       /* Error or uncompressed data */
8302 +       if (ret || len == PAGE_SIZE)
8303 +               return ret;
8304 +
8305 +       buffer_start = kmap(buffer_page);
8306 +       memcpy(ctx->page_buffer, buffer_start, len);
8307 +       ret = crypto_comp_decompress(
8308 +                       ctx->transform,
8309 +                       ctx->page_buffer,
8310 +                       len, buffer_start, &outlen);
8311 +       if (ret)
8312 +               abort_hibernate(TOI_FAILED_IO,
8313 +                       "Compress_read returned %d.\n", ret);
8314 +       else if (outlen != PAGE_SIZE) {
8315 +               abort_hibernate(TOI_FAILED_IO,
8316 +                       "Decompression yielded %d bytes instead of %ld.\n",
8317 +                       outlen, PAGE_SIZE);
8318 +               printk(KERN_ERR "Decompression yielded %d bytes instead of "
8319 +                               "%ld.\n", outlen, PAGE_SIZE);
8320 +               ret = -EIO;
8321 +               *buf_size = outlen;
8322 +       }
8323 +       kunmap(buffer_page);
8324 +       return ret;
8325 +}
8326 +
8327 +/*
8328 + * toi_compress_print_debug_stats
8329 + * @buffer: Pointer to a buffer into which the debug info will be printed.
8330 + * @size: Size of the buffer.
8331 + *
8332 + * Print information to be recorded for debugging purposes into a buffer.
8333 + * Returns: Number of characters written to the buffer.
8334 + */
8335 +
8336 +static int toi_compress_print_debug_stats(char *buffer, int size)
8337 +{
8338 +       unsigned long pages_in = toi_compress_bytes_in >> PAGE_SHIFT,
8339 +                     pages_out = toi_compress_bytes_out >> PAGE_SHIFT;
8340 +       int len;
8341 +
8342 +       /* Output the compression ratio achieved. */
8343 +       if (*toi_compressor_name)
8344 +               len = scnprintf(buffer, size, "- Compressor is '%s'.\n",
8345 +                               toi_compressor_name);
8346 +       else
8347 +               len = scnprintf(buffer, size, "- Compressor is not set.\n");
8348 +
8349 +       if (pages_in)
8350 +               len += scnprintf(buffer+len, size - len,
8351 +                 "  Compressed %lu bytes into %lu (%ld percent compression).\n",
8352 +                 toi_compress_bytes_in,
8353 +                 toi_compress_bytes_out,
8354 +                 (pages_in - pages_out) * 100 / pages_in);
8355 +       return len;
8356 +}
8357 +
8358 +/*
8359 + * toi_compress_compression_memory_needed
8360 + *
8361 + * Tell the caller how much memory we need to operate during hibernate/resume.
8362 + * Returns: Unsigned long. Maximum number of bytes of memory required for
8363 + * operation.
8364 + */
8365 +static int toi_compress_memory_needed(void)
8366 +{
8367 +       return 2 * PAGE_SIZE;
8368 +}
8369 +
8370 +static int toi_compress_storage_needed(void)
8371 +{
8372 +       return 4 * sizeof(unsigned long) + strlen(toi_compressor_name) + 1;
8373 +}
8374 +
8375 +/*
8376 + * toi_compress_save_config_info
8377 + * @buffer: Pointer to a buffer of size PAGE_SIZE.
8378 + *
8379 + * Save informaton needed when reloading the image at resume time.
8380 + * Returns: Number of bytes used for saving our data.
8381 + */
8382 +static int toi_compress_save_config_info(char *buffer)
8383 +{
8384 +       int namelen = strlen(toi_compressor_name) + 1;
8385 +       int total_len;
8386 +
8387 +       *((unsigned long *) buffer) = toi_compress_bytes_in;
8388 +       *((unsigned long *) (buffer + 1 * sizeof(unsigned long))) =
8389 +               toi_compress_bytes_out;
8390 +       *((unsigned long *) (buffer + 2 * sizeof(unsigned long))) =
8391 +               toi_expected_compression;
8392 +       *((unsigned long *) (buffer + 3 * sizeof(unsigned long))) = namelen;
8393 +       strncpy(buffer + 4 * sizeof(unsigned long), toi_compressor_name,
8394 +                                                               namelen);
8395 +       total_len = 4 * sizeof(unsigned long) + namelen;
8396 +       return total_len;
8397 +}
8398 +
8399 +/* toi_compress_load_config_info
8400 + * @buffer: Pointer to the start of the data.
8401 + * @size: Number of bytes that were saved.
8402 + *
8403 + * Description:        Reload information needed for decompressing the image at
8404 + * resume time.
8405 + */
8406 +static void toi_compress_load_config_info(char *buffer, int size)
8407 +{
8408 +       int namelen;
8409 +
8410 +       toi_compress_bytes_in = *((unsigned long *) buffer);
8411 +       toi_compress_bytes_out = *((unsigned long *) (buffer + 1 *
8412 +                               sizeof(unsigned long)));
8413 +       toi_expected_compression = *((unsigned long *) (buffer + 2 *
8414 +                               sizeof(unsigned long)));
8415 +       namelen = *((unsigned long *) (buffer + 3 * sizeof(unsigned long)));
8416 +       if (strncmp(toi_compressor_name, buffer + 4 * sizeof(unsigned long),
8417 +                               namelen)) {
8418 +               toi_compress_cleanup(1);
8419 +               strncpy(toi_compressor_name, buffer + 4 * sizeof(unsigned long),
8420 +                       namelen);
8421 +               toi_compress_crypto_prepare();
8422 +       }
8423 +       return;
8424 +}
8425 +
8426 +/*
8427 + * toi_expected_compression_ratio
8428 + *
8429 + * Description:        Returns the expected ratio between data passed into this module
8430 + *             and the amount of data output when writing.
8431 + * Returns:    100 if the module is disabled. Otherwise the value set by the
8432 + *             user via our sysfs entry.
8433 + */
8434 +
8435 +static int toi_compress_expected_ratio(void)
8436 +{
8437 +       if (!toi_compression_ops.enabled)
8438 +               return 100;
8439 +       else
8440 +               return 100 - toi_expected_compression;
8441 +}
8442 +
8443 +/*
8444 + * data for our sysfs entries.
8445 + */
8446 +static struct toi_sysfs_data sysfs_params[] = {
8447 +       SYSFS_INT("expected_compression", SYSFS_RW, &toi_expected_compression,
8448 +                       0, 99, 0, NULL),
8449 +       SYSFS_INT("enabled", SYSFS_RW, &toi_compression_ops.enabled, 0, 1, 0,
8450 +                       NULL),
8451 +       SYSFS_STRING("algorithm", SYSFS_RW, toi_compressor_name, 31, 0, NULL),
8452 +};
8453 +
8454 +/*
8455 + * Ops structure.
8456 + */
8457 +static struct toi_module_ops toi_compression_ops = {
8458 +       .type                   = FILTER_MODULE,
8459 +       .name                   = "compression",
8460 +       .directory              = "compression",
8461 +       .module                 = THIS_MODULE,
8462 +       .initialise             = toi_compress_init,
8463 +       .cleanup                = toi_compress_cleanup,
8464 +       .memory_needed          = toi_compress_memory_needed,
8465 +       .print_debug_info       = toi_compress_print_debug_stats,
8466 +       .save_config_info       = toi_compress_save_config_info,
8467 +       .load_config_info       = toi_compress_load_config_info,
8468 +       .storage_needed         = toi_compress_storage_needed,
8469 +       .expected_compression   = toi_compress_expected_ratio,
8470 +
8471 +       .rw_init                = toi_compress_rw_init,
8472 +
8473 +       .write_page             = toi_compress_write_page,
8474 +       .read_page              = toi_compress_read_page,
8475 +
8476 +       .sysfs_data             = sysfs_params,
8477 +       .num_sysfs_entries      = sizeof(sysfs_params) /
8478 +               sizeof(struct toi_sysfs_data),
8479 +};
8480 +
8481 +/* ---- Registration ---- */
8482 +
8483 +static __init int toi_compress_load(void)
8484 +{
8485 +       return toi_register_module(&toi_compression_ops);
8486 +}
8487 +
8488 +#ifdef MODULE
8489 +static __exit void toi_compress_unload(void)
8490 +{
8491 +       toi_unregister_module(&toi_compression_ops);
8492 +}
8493 +
8494 +module_init(toi_compress_load);
8495 +module_exit(toi_compress_unload);
8496 +MODULE_LICENSE("GPL");
8497 +MODULE_AUTHOR("Nigel Cunningham");
8498 +MODULE_DESCRIPTION("Compression Support for TuxOnIce");
8499 +#else
8500 +late_initcall(toi_compress_load);
8501 +#endif
8502 diff --git a/kernel/power/tuxonice_extent.c b/kernel/power/tuxonice_extent.c
8503 new file mode 100644
8504 index 0000000..3abf881
8505 --- /dev/null
8506 +++ b/kernel/power/tuxonice_extent.c
8507 @@ -0,0 +1,299 @@
8508 +/*
8509 + * kernel/power/tuxonice_extent.c
8510 + *
8511 + * Copyright (C) 2003-2008 Nigel Cunningham (nigel at tuxonice net)
8512 + *
8513 + * Distributed under GPLv2.
8514 + *
8515 + * These functions encapsulate the manipulation of storage metadata.
8516 + */
8517 +
8518 +#include <linux/module.h>
8519 +#include <linux/suspend.h>
8520 +#include "tuxonice_modules.h"
8521 +#include "tuxonice_extent.h"
8522 +#include "tuxonice_alloc.h"
8523 +#include "tuxonice_ui.h"
8524 +#include "tuxonice.h"
8525 +
8526 +/* toi_get_extent
8527 + *
8528 + * Returns a free extent. May fail, returning NULL instead.
8529 + */
8530 +static struct hibernate_extent *toi_get_extent(void)
8531 +{
8532 +       return (struct hibernate_extent *) toi_kzalloc(2,
8533 +                       sizeof(struct hibernate_extent), TOI_ATOMIC_GFP);
8534 +}
8535 +
8536 +/* toi_put_extent_chain.
8537 + *
8538 + * Frees a whole chain of extents.
8539 + */
8540 +void toi_put_extent_chain(struct hibernate_extent_chain *chain)
8541 +{
8542 +       struct hibernate_extent *this;
8543 +
8544 +       this = chain->first;
8545 +
8546 +       while (this) {
8547 +               struct hibernate_extent *next = this->next;
8548 +               toi_kfree(2, this);
8549 +               chain->num_extents--;
8550 +               this = next;
8551 +       }
8552 +
8553 +       chain->first = NULL;
8554 +       chain->last_touched = NULL;
8555 +       chain->size = 0;
8556 +}
8557 +EXPORT_SYMBOL_GPL(toi_put_extent_chain);
8558 +
8559 +/*
8560 + * toi_add_to_extent_chain
8561 + *
8562 + * Add an extent to an existing chain.
8563 + */
8564 +int toi_add_to_extent_chain(struct hibernate_extent_chain *chain,
8565 +               unsigned long start, unsigned long end)
8566 +{
8567 +       struct hibernate_extent *new_ext = NULL, *cur_ext = NULL;
8568 +
8569 +       /* Find the right place in the chain */
8570 +       if (chain->last_touched && chain->last_touched->start < start)
8571 +               cur_ext = chain->last_touched;
8572 +       else if (chain->first && chain->first->start < start)
8573 +               cur_ext = chain->first;
8574 +
8575 +       if (cur_ext) {
8576 +               while (cur_ext->next && cur_ext->next->start < start)
8577 +                       cur_ext = cur_ext->next;
8578 +
8579 +               if (cur_ext->end == (start - 1)) {
8580 +                       struct hibernate_extent *next_ext = cur_ext->next;
8581 +                       cur_ext->end = end;
8582 +
8583 +                       /* Merge with the following one? */
8584 +                       if (next_ext && cur_ext->end + 1 == next_ext->start) {
8585 +                               cur_ext->end = next_ext->end;
8586 +                               cur_ext->next = next_ext->next;
8587 +                               toi_kfree(2, next_ext);
8588 +                               chain->num_extents--;
8589 +                       }
8590 +
8591 +                       chain->last_touched = cur_ext;
8592 +                       chain->size += (end - start + 1);
8593 +
8594 +                       return 0;
8595 +               }
8596 +       }
8597 +
8598 +       new_ext = toi_get_extent();
8599 +       if (!new_ext) {
8600 +               printk(KERN_INFO "Error unable to append a new extent to the "
8601 +                               "chain.\n");
8602 +               return -ENOMEM;
8603 +       }
8604 +
8605 +       chain->num_extents++;
8606 +       chain->size += (end - start + 1);
8607 +       new_ext->start = start;
8608 +       new_ext->end = end;
8609 +
8610 +       chain->last_touched = new_ext;
8611 +
8612 +       if (cur_ext) {
8613 +               new_ext->next = cur_ext->next;
8614 +               cur_ext->next = new_ext;
8615 +       } else {
8616 +               if (chain->first)
8617 +                       new_ext->next = chain->first;
8618 +               chain->first = new_ext;
8619 +       }
8620 +
8621 +       return 0;
8622 +}
8623 +EXPORT_SYMBOL_GPL(toi_add_to_extent_chain);
8624 +
8625 +/* toi_serialise_extent_chain
8626 + *
8627 + * Write a chain in the image.
8628 + */
8629 +int toi_serialise_extent_chain(struct toi_module_ops *owner,
8630 +               struct hibernate_extent_chain *chain)
8631 +{
8632 +       struct hibernate_extent *this;
8633 +       int ret, i = 0;
8634 +
8635 +       ret = toiActiveAllocator->rw_header_chunk(WRITE, owner, (char *) chain,
8636 +                       2 * sizeof(int));
8637 +       if (ret)
8638 +               return ret;
8639 +
8640 +       this = chain->first;
8641 +       while (this) {
8642 +               ret = toiActiveAllocator->rw_header_chunk(WRITE, owner,
8643 +                               (char *) this, 2 * sizeof(unsigned long));
8644 +               if (ret)
8645 +                       return ret;
8646 +               this = this->next;
8647 +               i++;
8648 +       }
8649 +
8650 +       if (i != chain->num_extents) {
8651 +               printk(KERN_EMERG "Saved %d extents but chain metadata says "
8652 +                       "there should be %d.\n", i, chain->num_extents);
8653 +               return 1;
8654 +       }
8655 +
8656 +       return ret;
8657 +}
8658 +EXPORT_SYMBOL_GPL(toi_serialise_extent_chain);
8659 +
8660 +/* toi_load_extent_chain
8661 + *
8662 + * Read back a chain saved in the image.
8663 + */
8664 +int toi_load_extent_chain(struct hibernate_extent_chain *chain)
8665 +{
8666 +       struct hibernate_extent *this, *last = NULL;
8667 +       int i, ret;
8668 +
8669 +       ret = toiActiveAllocator->rw_header_chunk_noreadahead(READ, NULL,
8670 +                       (char *) chain, 2 * sizeof(int));
8671 +       if (ret) {
8672 +               printk("Failed to read size of extent chain.\n");
8673 +               return 1;
8674 +       }
8675 +
8676 +       for (i = 0; i < chain->num_extents; i++) {
8677 +               this = toi_kzalloc(3, sizeof(struct hibernate_extent),
8678 +                               TOI_ATOMIC_GFP);
8679 +               if (!this) {
8680 +                       printk(KERN_INFO "Failed to allocate a new extent.\n");
8681 +                       return -ENOMEM;
8682 +               }
8683 +               this->next = NULL;
8684 +               ret = toiActiveAllocator->rw_header_chunk_noreadahead(READ,
8685 +                               NULL, (char *) this, 2 * sizeof(unsigned long));
8686 +               if (ret) {
8687 +                       printk(KERN_INFO "Failed to an extent.\n");
8688 +                       return 1;
8689 +               }
8690 +               if (last)
8691 +                       last->next = this;
8692 +               else
8693 +                       chain->first = this;
8694 +               last = this;
8695 +       }
8696 +       return 0;
8697 +}
8698 +EXPORT_SYMBOL_GPL(toi_load_extent_chain);
8699 +
8700 +/* toi_extent_state_next
8701 + *
8702 + * Given a state, progress to the next valid entry. We may begin in an
8703 + * invalid state, as we do when invoked after extent_state_goto_start below.
8704 + *
8705 + * When using compression and expected_compression > 0, we let the image size
8706 + * be larger than storage, so we can validly run out of data to return.
8707 + */
8708 +unsigned long toi_extent_state_next(struct toi_extent_iterate_state *state)
8709 +{
8710 +       if (state->current_chain == state->num_chains)
8711 +               return 0;
8712 +
8713 +       if (state->current_extent) {
8714 +               if (state->current_offset == state->current_extent->end) {
8715 +                       if (state->current_extent->next) {
8716 +                               state->current_extent =
8717 +                                       state->current_extent->next;
8718 +                               state->current_offset =
8719 +                                       state->current_extent->start;
8720 +                       } else {
8721 +                               state->current_extent = NULL;
8722 +                               state->current_offset = 0;
8723 +                       }
8724 +               } else
8725 +                       state->current_offset++;
8726 +       }
8727 +
8728 +       while (!state->current_extent) {
8729 +               int chain_num = ++(state->current_chain);
8730 +
8731 +               if (chain_num == state->num_chains)
8732 +                       return 0;
8733 +
8734 +               state->current_extent = (state->chains + chain_num)->first;
8735 +
8736 +               if (!state->current_extent)
8737 +                       continue;
8738 +
8739 +               state->current_offset = state->current_extent->start;
8740 +       }
8741 +
8742 +       return state->current_offset;
8743 +}
8744 +EXPORT_SYMBOL_GPL(toi_extent_state_next);
8745 +
8746 +/* toi_extent_state_goto_start
8747 + *
8748 + * Find the first valid value in a group of chains.
8749 + */
8750 +void toi_extent_state_goto_start(struct toi_extent_iterate_state *state)
8751 +{
8752 +       state->current_chain = -1;
8753 +       state->current_extent = NULL;
8754 +       state->current_offset = 0;
8755 +}
8756 +EXPORT_SYMBOL_GPL(toi_extent_state_goto_start);
8757 +
8758 +/* toi_extent_start_save
8759 + *
8760 + * Given a state and a struct hibernate_extent_state_store, save the current
8761 + * position in a format that can be used with relocated chains (at
8762 + * resume time).
8763 + */
8764 +void toi_extent_state_save(struct toi_extent_iterate_state *state,
8765 +               struct hibernate_extent_iterate_saved_state *saved_state)
8766 +{
8767 +       struct hibernate_extent *extent;
8768 +
8769 +       saved_state->chain_num = state->current_chain;
8770 +       saved_state->extent_num = 0;
8771 +       saved_state->offset = state->current_offset;
8772 +
8773 +       if (saved_state->chain_num == -1)
8774 +               return;
8775 +
8776 +       extent = (state->chains + state->current_chain)->first;
8777 +
8778 +       while (extent != state->current_extent) {
8779 +               saved_state->extent_num++;
8780 +               extent = extent->next;
8781 +       }
8782 +}
8783 +EXPORT_SYMBOL_GPL(toi_extent_state_save);
8784 +
8785 +/* toi_extent_start_restore
8786 + *
8787 + * Restore the position saved by extent_state_save.
8788 + */
8789 +void toi_extent_state_restore(struct toi_extent_iterate_state *state,
8790 +               struct hibernate_extent_iterate_saved_state *saved_state)
8791 +{
8792 +       int posn = saved_state->extent_num;
8793 +
8794 +       if (saved_state->chain_num == -1) {
8795 +               toi_extent_state_goto_start(state);
8796 +               return;
8797 +       }
8798 +
8799 +       state->current_chain = saved_state->chain_num;
8800 +       state->current_extent = (state->chains + state->current_chain)->first;
8801 +       state->current_offset = saved_state->offset;
8802 +
8803 +       while (posn--)
8804 +               state->current_extent = state->current_extent->next;
8805 +}
8806 +EXPORT_SYMBOL_GPL(toi_extent_state_restore);
8807 diff --git a/kernel/power/tuxonice_extent.h b/kernel/power/tuxonice_extent.h
8808 new file mode 100644
8809 index 0000000..22ffb9b
8810 --- /dev/null
8811 +++ b/kernel/power/tuxonice_extent.h
8812 @@ -0,0 +1,72 @@
8813 +/*
8814 + * kernel/power/tuxonice_extent.h
8815 + *
8816 + * Copyright (C) 2003-2008 Nigel Cunningham (nigel at tuxonice net)
8817 + *
8818 + * This file is released under the GPLv2.
8819 + *
8820 + * It contains declarations related to extents. Extents are
8821 + * TuxOnIce's method of storing some of the metadata for the image.
8822 + * See tuxonice_extent.c for more info.
8823 + *
8824 + */
8825 +
8826 +#include "tuxonice_modules.h"
8827 +
8828 +#ifndef EXTENT_H
8829 +#define EXTENT_H
8830 +
8831 +struct hibernate_extent {
8832 +       unsigned long start, end;
8833 +       struct hibernate_extent *next;
8834 +};
8835 +
8836 +struct hibernate_extent_chain {
8837 +       int size; /* size of the chain ie sum (max-min+1) */
8838 +       int num_extents;
8839 +       struct hibernate_extent *first, *last_touched;
8840 +};
8841 +
8842 +struct toi_extent_iterate_state {
8843 +       struct hibernate_extent_chain *chains;
8844 +       int num_chains;
8845 +       int current_chain;
8846 +       struct hibernate_extent *current_extent;
8847 +       unsigned long current_offset;
8848 +};
8849 +
8850 +struct hibernate_extent_iterate_saved_state {
8851 +       int chain_num;
8852 +       int extent_num;
8853 +       unsigned long offset;
8854 +};
8855 +
8856 +#define toi_extent_state_eof(state) \
8857 +       ((state)->num_chains == (state)->current_chain)
8858 +
8859 +/* Simplify iterating through all the values in an extent chain */
8860 +#define toi_extent_for_each(extent_chain, extentpointer, value) \
8861 +if ((extent_chain)->first) \
8862 +       for ((extentpointer) = (extent_chain)->first, (value) = \
8863 +                       (extentpointer)->start; \
8864 +            ((extentpointer) && ((extentpointer)->next || (value) <= \
8865 +                                (extentpointer)->end)); \
8866 +            (((value) == (extentpointer)->end) ? \
8867 +               ((extentpointer) = (extentpointer)->next, (value) = \
8868 +                ((extentpointer) ? (extentpointer)->start : 0)) : \
8869 +                       (value)++))
8870 +
8871 +void toi_put_extent_chain(struct hibernate_extent_chain *chain);
8872 +int toi_add_to_extent_chain(struct hibernate_extent_chain *chain,
8873 +               unsigned long start, unsigned long end);
8874 +int toi_serialise_extent_chain(struct toi_module_ops *owner,
8875 +               struct hibernate_extent_chain *chain);
8876 +int toi_load_extent_chain(struct hibernate_extent_chain *chain);
8877 +
8878 +void toi_extent_state_save(struct toi_extent_iterate_state *state,
8879 +               struct hibernate_extent_iterate_saved_state *saved_state);
8880 +void toi_extent_state_restore(struct toi_extent_iterate_state *state,
8881 +               struct hibernate_extent_iterate_saved_state *saved_state);
8882 +void toi_extent_state_goto_start(struct toi_extent_iterate_state *state);
8883 +unsigned long toi_extent_state_next(struct toi_extent_iterate_state *state);
8884 +#endif
8885 diff --git a/kernel/power/tuxonice_file.c b/kernel/power/tuxonice_file.c
8886 new file mode 100644
8887 index 0000000..0bb3e2e
8888 --- /dev/null
8889 +++ b/kernel/power/tuxonice_file.c
8890 @@ -0,0 +1,1122 @@
8891 +/*
8892 + * kernel/power/tuxonice_file.c
8893 + *
8894 + * Copyright (C) 2005-2008 Nigel Cunningham (nigel at tuxonice net)
8895 + *
8896 + * Distributed under GPLv2.
8897 + *
8898 + * This file encapsulates functions for usage of a simple file as a
8899 + * backing store. It is based upon the swapallocator, and shares the
8900 + * same basic working. Here, though, we have nothing to do with
8901 + * swapspace, and only one device to worry about.
8902 + *
8903 + * The user can just
8904 + *
8905 + * echo TuxOnIce > /path/to/my_file
8906 + *
8907 + * dd if=/dev/zero bs=1M count=<file_size_desired> >> /path/to/my_file
8908 + *
8909 + * and
8910 + *
8911 + * echo /path/to/my_file > /sys/power/tuxonice/file/target
8912 + *
8913 + * then put what they find in /sys/power/tuxonice/resume
8914 + * as their resume= parameter in lilo.conf (and rerun lilo if using it).
8915 + *
8916 + * Having done this, they're ready to hibernate and resume.
8917 + *
8918 + * TODO:
8919 + * - File resizing.
8920 + */
8921 +
8922 +#include <linux/suspend.h>
8923 +#include <linux/module.h>
8924 +#include <linux/blkdev.h>
8925 +#include <linux/file.h>
8926 +#include <linux/stat.h>
8927 +#include <linux/mount.h>
8928 +#include <linux/statfs.h>
8929 +#include <linux/syscalls.h>
8930 +#include <linux/namei.h>
8931 +#include <linux/fs.h>
8932 +#include <linux/root_dev.h>
8933 +
8934 +#include "tuxonice.h"
8935 +#include "tuxonice_sysfs.h"
8936 +#include "tuxonice_modules.h"
8937 +#include "tuxonice_ui.h"
8938 +#include "tuxonice_extent.h"
8939 +#include "tuxonice_io.h"
8940 +#include "tuxonice_storage.h"
8941 +#include "tuxonice_block_io.h"
8942 +#include "tuxonice_alloc.h"
8943 +#include "tuxonice_builtin.h"
8944 +
8945 +static struct toi_module_ops toi_fileops;
8946 +
8947 +/* Details of our target.  */
8948 +
8949 +static char toi_file_target[256];
8950 +static struct inode *target_inode;
8951 +static struct file *target_file;
8952 +static struct block_device *toi_file_target_bdev;
8953 +static dev_t resume_file_dev_t;
8954 +static int used_devt;
8955 +static int setting_toi_file_target;
8956 +static sector_t target_firstblock, target_header_start;
8957 +static int target_storage_available;
8958 +static int target_claim;
8959 +
8960 +/* Old signatures */
8961 +static char HaveImage[] = "HaveImage\n";
8962 +static char NoImage[] =   "TuxOnIce\n";
8963 +#define sig_size (sizeof(HaveImage) + 1)
8964 +
8965 +struct toi_file_header {
8966 +       char sig[sig_size];
8967 +       int resumed_before;
8968 +       unsigned long first_header_block;
8969 +       int have_image;
8970 +};
8971 +
8972 +/* Header Page Information */
8973 +static int header_pages_reserved;
8974 +
8975 +/* Main Storage Pages */
8976 +static int main_pages_allocated, main_pages_requested;
8977 +
8978 +#define target_is_normal_file() (S_ISREG(target_inode->i_mode))
8979 +
8980 +static struct toi_bdev_info devinfo;
8981 +
8982 +/* Extent chain for blocks */
8983 +static struct hibernate_extent_chain block_chain;
8984 +
8985 +/* Signature operations */
8986 +enum {
8987 +       GET_IMAGE_EXISTS,
8988 +       INVALIDATE,
8989 +       MARK_RESUME_ATTEMPTED,
8990 +       UNMARK_RESUME_ATTEMPTED,
8991 +};
8992 +
8993 +static void set_devinfo(struct block_device *bdev, int target_blkbits)
8994 +{
8995 +       devinfo.bdev = bdev;
8996 +       if (!target_blkbits) {
8997 +               devinfo.bmap_shift = devinfo.blocks_per_page = 0;
8998 +       } else {
8999 +               devinfo.bmap_shift = target_blkbits - 9;
9000 +               devinfo.blocks_per_page = (1 << (PAGE_SHIFT - target_blkbits));
9001 +       }
9002 +}
9003 +
9004 +static long raw_to_real(long raw)
9005 +{
9006 +       long result;
9007 +
9008 +       result = raw - (raw * (sizeof(unsigned long) + sizeof(int)) +
9009 +               (PAGE_SIZE + sizeof(unsigned long) + sizeof(int) + 1)) /
9010 +               (PAGE_SIZE + sizeof(unsigned long) + sizeof(int));
9011 +
9012 +       return result < 0 ? 0 : result;
9013 +}
9014 +
9015 +static int toi_file_storage_available(void)
9016 +{
9017 +       int result = 0;
9018 +       struct block_device *bdev = toi_file_target_bdev;
9019 +
9020 +       if (!target_inode)
9021 +               return 0;
9022 +
9023 +       switch (target_inode->i_mode & S_IFMT) {
9024 +       case S_IFSOCK:
9025 +       case S_IFCHR:
9026 +       case S_IFIFO: /* Socket, Char, Fifo */
9027 +               return -1;
9028 +       case S_IFREG: /* Regular file: current size - holes + free
9029 +                        space on part */
9030 +               result = target_storage_available;
9031 +               break;
9032 +       case S_IFBLK: /* Block device */
9033 +               if (!bdev->bd_disk) {
9034 +                       printk(KERN_INFO "bdev->bd_disk null.\n");
9035 +                       return 0;
9036 +               }
9037 +
9038 +               result = (bdev->bd_part ?
9039 +                       bdev->bd_part->nr_sects :
9040 +                       bdev->bd_disk->capacity) >> (PAGE_SHIFT - 9);
9041 +       }
9042 +
9043 +       return raw_to_real(result);
9044 +}
9045 +
9046 +static int has_contiguous_blocks(int page_num)
9047 +{
9048 +       int j;
9049 +       sector_t last = 0;
9050 +
9051 +       for (j = 0; j < devinfo.blocks_per_page; j++) {
9052 +               sector_t this = bmap(target_inode,
9053 +                               page_num * devinfo.blocks_per_page + j);
9054 +
9055 +               if (!this || (last && (last + 1) != this))
9056 +                       break;
9057 +
9058 +               last = this;
9059 +       }
9060 +
9061 +       return j == devinfo.blocks_per_page;
9062 +}
9063 +
9064 +static int size_ignoring_ignored_pages(void)
9065 +{
9066 +       int mappable = 0, i;
9067 +
9068 +       if (!target_is_normal_file())
9069 +               return toi_file_storage_available();
9070 +
9071 +       for (i = 0; i < (target_inode->i_size >> PAGE_SHIFT) ; i++)
9072 +               if (has_contiguous_blocks(i))
9073 +                       mappable++;
9074 +
9075 +       return mappable;
9076 +}
9077 +
9078 +static int __populate_block_list(int min, int max)
9079 +{
9080 +       if (test_action_state(TOI_TEST_BIO))
9081 +               printk(KERN_INFO "Adding extent %d-%d.\n",
9082 +                       min << devinfo.bmap_shift,
9083 +                       ((max + 1) << devinfo.bmap_shift) - 1);
9084 +
9085 +       return toi_add_to_extent_chain(&block_chain, min, max);
9086 +}
9087 +
9088 +static int apply_header_reservation(void)
9089 +{
9090 +       int i;
9091 +
9092 +       /* Apply header space reservation */
9093 +       toi_extent_state_goto_start(&toi_writer_posn);
9094 +       toi_bio_ops.forward_one_page(1); /* To first page */
9095 +
9096 +       for (i = 0; i < header_pages_reserved; i++)
9097 +               if (toi_bio_ops.forward_one_page(1))
9098 +                       return -ENOSPC;
9099 +
9100 +       /* The end of header pages will be the start of pageset 2 */
9101 +       toi_extent_state_save(&toi_writer_posn, &toi_writer_posn_save[2]);
9102 +
9103 +       return 0;
9104 +}
9105 +
9106 +static int populate_block_list(void)
9107 +{
9108 +       int i, extent_min = -1, extent_max = -1, got_header = 0, result = 0;
9109 +
9110 +       if (block_chain.first)
9111 +               toi_put_extent_chain(&block_chain);
9112 +
9113 +       if (!target_is_normal_file()) {
9114 +               return (target_storage_available > 0) ?
9115 +                       __populate_block_list(devinfo.blocks_per_page,
9116 +                               (target_storage_available + 1) *
9117 +                               devinfo.blocks_per_page - 1) : 0;
9118 +       }
9119 +
9120 +       for (i = 0; i < (target_inode->i_size >> PAGE_SHIFT); i++) {
9121 +               sector_t new_sector;
9122 +
9123 +               if (!has_contiguous_blocks(i))
9124 +                       continue;
9125 +
9126 +               new_sector = bmap(target_inode,
9127 +               (i * devinfo.blocks_per_page));
9128 +
9129 +               /*
9130 +                * Ignore the first block in the file.
9131 +                * It gets the header.
9132 +                */
9133 +               if (new_sector == target_firstblock >> devinfo.bmap_shift) {
9134 +                       got_header = 1;
9135 +                       continue;
9136 +               }
9137 +
9138 +               /*
9139 +                * I'd love to be able to fill in holes and resize
9140 +                * files, but not yet...
9141 +                */
9142 +
9143 +               if (new_sector == extent_max + 1)
9144 +                       extent_max += devinfo.blocks_per_page;
9145 +               else {
9146 +                       if (extent_min > -1) {
9147 +                               result = __populate_block_list(extent_min,
9148 +                                               extent_max);
9149 +                               if (result)
9150 +                                       return result;
9151 +                       }
9152 +
9153 +                       extent_min = new_sector;
9154 +                       extent_max = extent_min +
9155 +                               devinfo.blocks_per_page - 1;
9156 +               }
9157 +       }
9158 +
9159 +       if (extent_min > -1) {
9160 +               result = __populate_block_list(extent_min, extent_max);
9161 +               if (result)
9162 +                       return result;
9163 +       }
9164 +
9165 +       return apply_header_reservation();
9166 +}
9167 +
9168 +static void toi_file_cleanup(int finishing_cycle)
9169 +{
9170 +       if (toi_file_target_bdev) {
9171 +               if (target_claim) {
9172 +                       bd_release(toi_file_target_bdev);
9173 +                       target_claim = 0;
9174 +               }
9175 +
9176 +               if (used_devt) {
9177 +                       blkdev_put(toi_file_target_bdev);
9178 +                       used_devt = 0;
9179 +               }
9180 +               toi_file_target_bdev = NULL;
9181 +               target_inode = NULL;
9182 +               set_devinfo(NULL, 0);
9183 +               target_storage_available = 0;
9184 +       }
9185 +
9186 +       if (target_file && !IS_ERR(target_file))
9187 +               filp_close(target_file, NULL);
9188 +
9189 +       target_file = NULL;
9190 +}
9191 +
9192 +/*
9193 + * reopen_resume_devt
9194 + *
9195 + * Having opened resume= once, we remember the major and
9196 + * minor nodes and use them to reopen the bdev for checking
9197 + * whether an image exists (possibly when starting a resume).
9198 + */
9199 +static void reopen_resume_devt(void)
9200 +{
9201 +       toi_file_target_bdev = toi_open_by_devnum(resume_file_dev_t,
9202 +                       FMODE_READ);
9203 +       if (IS_ERR(toi_file_target_bdev)) {
9204 +               printk(KERN_INFO "Got a dev_num (%lx) but failed to open it.\n",
9205 +                               (unsigned long) resume_file_dev_t);
9206 +               return;
9207 +       }
9208 +       target_inode = toi_file_target_bdev->bd_inode;
9209 +       set_devinfo(toi_file_target_bdev, target_inode->i_blkbits);
9210 +}
9211 +
9212 +static void toi_file_get_target_info(char *target, int get_size,
9213 +               int resume_param)
9214 +{
9215 +       if (target_file)
9216 +               toi_file_cleanup(0);
9217 +
9218 +       if (!target || !strlen(target))
9219 +               return;
9220 +
9221 +       target_file = filp_open(target, O_RDWR|O_LARGEFILE, 0);
9222 +
9223 +       if (IS_ERR(target_file) || !target_file) {
9224 +
9225 +               if (!resume_param) {
9226 +                       printk(KERN_INFO "Open file %s returned %p.\n",
9227 +                                       target, target_file);
9228 +                       target_file = NULL;
9229 +                       return;
9230 +               }
9231 +
9232 +               target_file = NULL;
9233 +               resume_file_dev_t = name_to_dev_t(target);
9234 +               if (!resume_file_dev_t) {
9235 +                       struct kstat stat;
9236 +                       int error = vfs_stat(target, &stat);
9237 +                       printk(KERN_INFO "Open file %s returned %p and "
9238 +                                       "name_to_devt failed.\n", target,
9239 +                                       target_file);
9240 +                       if (error)
9241 +                               printk(KERN_INFO "Stating the file also failed."
9242 +                                       " Nothing more we can do.\n");
9243 +                       else
9244 +                               resume_file_dev_t = stat.rdev;
9245 +                       return;
9246 +               }
9247 +
9248 +               toi_file_target_bdev = toi_open_by_devnum(resume_file_dev_t,
9249 +                               FMODE_READ);
9250 +               if (IS_ERR(toi_file_target_bdev)) {
9251 +                       printk(KERN_INFO "Got a dev_num (%lx) but failed to "
9252 +                                       "open it.\n",
9253 +                                       (unsigned long) resume_file_dev_t);
9254 +                       return;
9255 +               }
9256 +               used_devt = 1;
9257 +               target_inode = toi_file_target_bdev->bd_inode;
9258 +       } else
9259 +               target_inode = target_file->f_mapping->host;
9260 +
9261 +       if (S_ISLNK(target_inode->i_mode) || S_ISDIR(target_inode->i_mode) ||
9262 +           S_ISSOCK(target_inode->i_mode) || S_ISFIFO(target_inode->i_mode)) {
9263 +               printk(KERN_INFO "File support works with regular files,"
9264 +                               " character files and block devices.\n");
9265 +               goto cleanup;
9266 +       }
9267 +
9268 +       if (!used_devt) {
9269 +               if (S_ISBLK(target_inode->i_mode)) {
9270 +                       toi_file_target_bdev = I_BDEV(target_inode);
9271 +                       if (!bd_claim(toi_file_target_bdev, &toi_fileops))
9272 +                               target_claim = 1;
9273 +               } else
9274 +                       toi_file_target_bdev = target_inode->i_sb->s_bdev;
9275 +               resume_file_dev_t = toi_file_target_bdev->bd_dev;
9276 +       }
9277 +
9278 +       set_devinfo(toi_file_target_bdev, target_inode->i_blkbits);
9279 +
9280 +       if (get_size)
9281 +               target_storage_available = size_ignoring_ignored_pages();
9282 +
9283 +       if (!resume_param)
9284 +               target_firstblock = bmap(target_inode, 0) << devinfo.bmap_shift;
9285 +
9286 +       return;
9287 +cleanup:
9288 +       target_inode = NULL;
9289 +       if (target_file) {
9290 +               filp_close(target_file, NULL);
9291 +               target_file = NULL;
9292 +       }
9293 +       set_devinfo(NULL, 0);
9294 +       target_storage_available = 0;
9295 +}
9296 +
9297 +static void toi_file_noresume_reset(void)
9298 +{
9299 +       toi_bio_ops.rw_cleanup(READ);
9300 +}
9301 +
9302 +static int parse_signature(struct toi_file_header *header)
9303 +{
9304 +       int have_image = !memcmp(HaveImage, header->sig, sizeof(HaveImage) - 1);
9305 +       int no_image_header = !memcmp(NoImage, header->sig,
9306 +                       sizeof(NoImage) - 1);
9307 +       int binary_sig = !memcmp(tuxonice_signature, header->sig,
9308 +                       sizeof(tuxonice_signature));
9309 +
9310 +       if (no_image_header || (binary_sig && !header->have_image))
9311 +               return 0;
9312 +
9313 +       if (!have_image && !binary_sig)
9314 +               return -1;
9315 +
9316 +       if (header->resumed_before)
9317 +               set_toi_state(TOI_RESUMED_BEFORE);
9318 +       else
9319 +               clear_toi_state(TOI_RESUMED_BEFORE);
9320 +
9321 +       target_header_start = header->first_header_block;
9322 +       return 1;
9323 +}
9324 +
9325 +/* prepare_signature */
9326 +
9327 +static int prepare_signature(struct toi_file_header *current_header,
9328 +               unsigned long first_header_block)
9329 +{
9330 +       strncpy(current_header->sig, tuxonice_signature,
9331 +                       sizeof(tuxonice_signature));
9332 +       current_header->resumed_before = 0;
9333 +       current_header->first_header_block = first_header_block;
9334 +       current_header->have_image = 1;
9335 +       return 0;
9336 +}
9337 +
9338 +static int toi_file_storage_allocated(void)
9339 +{
9340 +       if (!target_inode)
9341 +               return 0;
9342 +
9343 +       if (target_is_normal_file())
9344 +               return (int) raw_to_real(target_storage_available);
9345 +       else
9346 +               return (int) raw_to_real(main_pages_requested);
9347 +}
9348 +
9349 +static int toi_file_release_storage(void)
9350 +{
9351 +       if (test_action_state(TOI_KEEP_IMAGE) &&
9352 +           test_toi_state(TOI_NOW_RESUMING))
9353 +               return 0;
9354 +
9355 +       toi_put_extent_chain(&block_chain);
9356 +
9357 +       header_pages_reserved = 0;
9358 +       main_pages_allocated = 0;
9359 +       main_pages_requested = 0;
9360 +       return 0;
9361 +}
9362 +
9363 +static void toi_file_reserve_header_space(int request)
9364 +{
9365 +       header_pages_reserved = request;
9366 +       apply_header_reservation();
9367 +}
9368 +
9369 +static int toi_file_allocate_storage(int main_space_requested)
9370 +{
9371 +       int result = 0;
9372 +
9373 +       int extra_pages = DIV_ROUND_UP(main_space_requested *
9374 +                       (sizeof(unsigned long) + sizeof(int)), PAGE_SIZE);
9375 +       int pages_to_get = main_space_requested + extra_pages +
9376 +               header_pages_reserved;
9377 +       int blocks_to_get = pages_to_get - block_chain.size;
9378 +
9379 +       /* Only release_storage reduces the size */
9380 +       if (blocks_to_get < 1)
9381 +               return 0;
9382 +
9383 +       result = populate_block_list();
9384 +
9385 +       if (result)
9386 +               return result;
9387 +
9388 +       toi_message(TOI_WRITER, TOI_MEDIUM, 0,
9389 +               "Finished with block_chain.size == %d.\n",
9390 +               block_chain.size);
9391 +
9392 +       if (block_chain.size < pages_to_get) {
9393 +               printk("Block chain size (%d) < header pages (%d) + extra "
9394 +                       "pages (%d) + main pages (%d) (=%d pages).\n",
9395 +                       block_chain.size, header_pages_reserved, extra_pages,
9396 +                       main_space_requested, pages_to_get);
9397 +               result = -ENOSPC;
9398 +       }
9399 +
9400 +       main_pages_requested = main_space_requested;
9401 +       main_pages_allocated = main_space_requested + extra_pages;
9402 +       return result;
9403 +}
9404 +
9405 +static int toi_file_write_header_init(void)
9406 +{
9407 +       int result;
9408 +
9409 +       toi_bio_ops.rw_init(WRITE, 0);
9410 +       toi_writer_buffer_posn = 0;
9411 +
9412 +       /* Info needed to bootstrap goes at the start of the header.
9413 +        * First we save the basic info needed for reading, including the number
9414 +        * of header pages. Then we save the structs containing data needed
9415 +        * for reading the header pages back.
9416 +        * Note that even if header pages take more than one page, when we
9417 +        * read back the info, we will have restored the location of the
9418 +        * next header page by the time we go to use it.
9419 +        */
9420 +
9421 +       result = toi_bio_ops.rw_header_chunk(WRITE, &toi_fileops,
9422 +                       (char *) &toi_writer_posn_save,
9423 +                       sizeof(toi_writer_posn_save));
9424 +
9425 +       if (result)
9426 +               return result;
9427 +
9428 +       result = toi_bio_ops.rw_header_chunk(WRITE, &toi_fileops,
9429 +                       (char *) &devinfo, sizeof(devinfo));
9430 +
9431 +       if (result)
9432 +               return result;
9433 +
9434 +       toi_serialise_extent_chain(&toi_fileops, &block_chain);
9435 +
9436 +       return 0;
9437 +}
9438 +
9439 +static int toi_file_write_header_cleanup(void)
9440 +{
9441 +       struct toi_file_header *header;
9442 +       int result;
9443 +       unsigned long sig_page = toi_get_zeroed_page(38, TOI_ATOMIC_GFP);
9444 +
9445 +       /* Write any unsaved data */
9446 +       if (toi_writer_buffer_posn)
9447 +               toi_bio_ops.write_header_chunk_finish();
9448 +
9449 +       toi_bio_ops.finish_all_io();
9450 +
9451 +       toi_extent_state_goto_start(&toi_writer_posn);
9452 +       toi_bio_ops.forward_one_page(1);
9453 +
9454 +       /* Adjust image header */
9455 +       result = toi_bio_ops.bdev_page_io(READ, toi_file_target_bdev,
9456 +                       target_firstblock,
9457 +                       virt_to_page(sig_page));
9458 +       if (result)
9459 +               goto out;
9460 +
9461 +       header = (struct toi_file_header *) sig_page;
9462 +
9463 +       prepare_signature(header,
9464 +                       toi_writer_posn.current_offset <<
9465 +                       devinfo.bmap_shift);
9466 +
9467 +       result = toi_bio_ops.bdev_page_io(WRITE, toi_file_target_bdev,
9468 +                       target_firstblock,
9469 +                       virt_to_page(sig_page));
9470 +
9471 +out:
9472 +       toi_bio_ops.finish_all_io();
9473 +       toi_free_page(38, sig_page);
9474 +
9475 +       return result;
9476 +}
9477 +
9478 +/* HEADER READING */
9479 +
9480 +/*
9481 + * read_header_init()
9482 + *
9483 + * Description:
9484 + * 1. Attempt to read the device specified with resume=.
9485 + * 2. Check the contents of the header for our signature.
9486 + * 3. Warn, ignore, reset and/or continue as appropriate.
9487 + * 4. If continuing, read the toi_file configuration section
9488 + *    of the header and set up block device info so we can read
9489 + *    the rest of the header & image.
9490 + *
9491 + * Returns:
9492 + * May not return if user choose to reboot at a warning.
9493 + * -EINVAL if cannot resume at this time. Booting should continue
9494 + * normally.
9495 + */
9496 +
9497 +static int toi_file_read_header_init(void)
9498 +{
9499 +       int result;
9500 +       struct block_device *tmp;
9501 +
9502 +       toi_bio_ops.read_header_init();
9503 +
9504 +       /* Read toi_file configuration */
9505 +       result = toi_bio_ops.bdev_page_io(READ, toi_file_target_bdev,
9506 +                       target_header_start,
9507 +                       virt_to_page((unsigned long) toi_writer_buffer));
9508 +
9509 +       if (result) {
9510 +               printk("FileAllocator read header init: Failed to initialise "
9511 +                               "reading the first page of data.\n");
9512 +               toi_bio_ops.rw_cleanup(READ);
9513 +               return result;
9514 +       }
9515 +
9516 +       memcpy(&toi_writer_posn_save, toi_writer_buffer,
9517 +              sizeof(toi_writer_posn_save));
9518 +
9519 +       toi_writer_buffer_posn = sizeof(toi_writer_posn_save);
9520 +
9521 +       tmp = devinfo.bdev;
9522 +
9523 +       memcpy(&devinfo,
9524 +              toi_writer_buffer + toi_writer_buffer_posn,
9525 +              sizeof(devinfo));
9526 +
9527 +       devinfo.bdev = tmp;
9528 +       toi_writer_buffer_posn += sizeof(devinfo);
9529 +
9530 +       toi_extent_state_goto_start(&toi_writer_posn);
9531 +       toi_bio_ops.set_extra_page_forward();
9532 +
9533 +       return toi_load_extent_chain(&block_chain);
9534 +}
9535 +
9536 +static int toi_file_read_header_cleanup(void)
9537 +{
9538 +       toi_bio_ops.rw_cleanup(READ);
9539 +       return 0;
9540 +}
9541 +
9542 +static int toi_file_signature_op(int op)
9543 +{
9544 +       char *cur;
9545 +       int result = 0, changed = 0;
9546 +       struct toi_file_header *header;
9547 +
9548 +       if (IS_ERR(toi_file_target_bdev))
9549 +               return -1;
9550 +
9551 +       cur = (char *) toi_get_zeroed_page(17, TOI_ATOMIC_GFP);
9552 +       if (!cur) {
9553 +               printk("Unable to allocate a page for reading the image "
9554 +                               "signature.\n");
9555 +               return -ENOMEM;
9556 +       }
9557 +
9558 +       result = toi_bio_ops.bdev_page_io(READ, toi_file_target_bdev,
9559 +                       target_firstblock,
9560 +                       virt_to_page(cur));
9561 +
9562 +       if (result)
9563 +               goto out;
9564 +
9565 +       header = (struct toi_file_header *) cur;
9566 +       result = parse_signature(header);
9567 +
9568 +       switch (op) {
9569 +       case INVALIDATE:
9570 +               if (result == -1)
9571 +                       goto out;
9572 +
9573 +               memcpy(header->sig, tuxonice_signature,
9574 +                               sizeof(tuxonice_signature));
9575 +               header->resumed_before = 0;
9576 +               header->have_image = 0;
9577 +               result = changed = 1;
9578 +               break;
9579 +       case MARK_RESUME_ATTEMPTED:
9580 +               if (result == 1) {
9581 +                       header->resumed_before = 1;
9582 +                       changed = 1;
9583 +               }
9584 +               break;
9585 +       case UNMARK_RESUME_ATTEMPTED:
9586 +               if (result == 1) {
9587 +                       header->resumed_before = 0;
9588 +                       changed = 1;
9589 +               }
9590 +               break;
9591 +       }
9592 +
9593 +       if (changed) {
9594 +               int io_result = toi_bio_ops.bdev_page_io(WRITE,
9595 +                               toi_file_target_bdev, target_firstblock,
9596 +                               virt_to_page(cur));
9597 +               if (io_result)
9598 +                       result = io_result;
9599 +       }
9600 +
9601 +out:
9602 +       toi_bio_ops.finish_all_io();
9603 +       toi_free_page(17, (unsigned long) cur);
9604 +       return result;
9605 +}
9606 +
9607 +/* Print debug info
9608 + *
9609 + * Description:
9610 + */
9611 +
9612 +static int toi_file_print_debug_stats(char *buffer, int size)
9613 +{
9614 +       int len = 0;
9615 +
9616 +       if (toiActiveAllocator != &toi_fileops) {
9617 +               len = scnprintf(buffer, size,
9618 +                               "- FileAllocator inactive.\n");
9619 +               return len;
9620 +       }
9621 +
9622 +       len = scnprintf(buffer, size, "- FileAllocator active.\n");
9623 +
9624 +       len += scnprintf(buffer+len, size-len, "  Storage available for "
9625 +                       "image: %d pages.\n",
9626 +                       toi_file_storage_allocated());
9627 +
9628 +       return len;
9629 +}
9630 +
9631 +/*
9632 + * Storage needed
9633 + *
9634 + * Returns amount of space in the image header required
9635 + * for the toi_file's data.
9636 + *
9637 + * We ensure the space is allocated, but actually save the
9638 + * data from write_header_init and therefore don't also define a
9639 + * save_config_info routine.
9640 + */
9641 +static int toi_file_storage_needed(void)
9642 +{
9643 +       return sig_size + strlen(toi_file_target) + 1 +
9644 +               sizeof(toi_writer_posn_save) +
9645 +               sizeof(devinfo) +
9646 +               sizeof(struct hibernate_extent_chain) - 2 * sizeof(void *) +
9647 +               (2 * sizeof(unsigned long) * block_chain.num_extents);
9648 +}
9649 +
9650 +/*
9651 + * toi_file_remove_image
9652 + *
9653 + */
9654 +static int toi_file_remove_image(void)
9655 +{
9656 +       toi_file_release_storage();
9657 +       return toi_file_signature_op(INVALIDATE);
9658 +}
9659 +
9660 +/*
9661 + * Image_exists
9662 + *
9663 + */
9664 +
9665 +static int toi_file_image_exists(int quiet)
9666 +{
9667 +       if (!toi_file_target_bdev)
9668 +               reopen_resume_devt();
9669 +
9670 +       return toi_file_signature_op(GET_IMAGE_EXISTS);
9671 +}
9672 +
9673 +/*
9674 + * Mark resume attempted.
9675 + *
9676 + * Record that we tried to resume from this image.
9677 + */
9678 +
9679 +static int toi_file_mark_resume_attempted(int mark)
9680 +{
9681 +       return toi_file_signature_op(mark ? MARK_RESUME_ATTEMPTED :
9682 +               UNMARK_RESUME_ATTEMPTED);
9683 +}
9684 +
9685 +static void toi_file_set_resume_param(void)
9686 +{
9687 +       char *buffer = (char *) toi_get_zeroed_page(18, TOI_ATOMIC_GFP);
9688 +       char *buffer2 = (char *) toi_get_zeroed_page(19, TOI_ATOMIC_GFP);
9689 +       unsigned long sector = bmap(target_inode, 0);
9690 +       int offset = 0;
9691 +
9692 +       if (!buffer || !buffer2) {
9693 +               if (buffer)
9694 +                       toi_free_page(18, (unsigned long) buffer);
9695 +               if (buffer2)
9696 +                       toi_free_page(19, (unsigned long) buffer2);
9697 +               printk("TuxOnIce: Failed to allocate memory while setting "
9698 +                               "resume= parameter.\n");
9699 +               return;
9700 +       }
9701 +
9702 +       if (toi_file_target_bdev) {
9703 +               set_devinfo(toi_file_target_bdev, target_inode->i_blkbits);
9704 +
9705 +               bdevname(toi_file_target_bdev, buffer2);
9706 +               offset += snprintf(buffer + offset, PAGE_SIZE - offset,
9707 +                               "/dev/%s", buffer2);
9708 +
9709 +               if (sector)
9710 +                       offset += snprintf(buffer + offset, PAGE_SIZE - offset,
9711 +                               ":0x%lx", sector << devinfo.bmap_shift);
9712 +       } else
9713 +               offset += snprintf(buffer + offset, PAGE_SIZE - offset,
9714 +                               "%s is not a valid target.", toi_file_target);
9715 +
9716 +       sprintf(resume_file, "file:%s", buffer);
9717 +
9718 +       toi_free_page(18, (unsigned long) buffer);
9719 +       toi_free_page(19, (unsigned long) buffer2);
9720 +
9721 +       toi_attempt_to_parse_resume_device(1);
9722 +}
9723 +
9724 +static int __test_toi_file_target(char *target, int resume_time, int quiet)
9725 +{
9726 +       toi_file_get_target_info(target, 0, resume_time);
9727 +       if (toi_file_signature_op(GET_IMAGE_EXISTS) > -1) {
9728 +               if (!quiet)
9729 +                       printk(KERN_INFO "TuxOnIce: FileAllocator: File "
9730 +                                       "signature found.\n");
9731 +               if (!resume_time)
9732 +                       toi_file_set_resume_param();
9733 +
9734 +               toi_bio_ops.set_devinfo(&devinfo);
9735 +               toi_writer_posn.chains = &block_chain;
9736 +               toi_writer_posn.num_chains = 1;
9737 +
9738 +               if (!resume_time)
9739 +                       set_toi_state(TOI_CAN_HIBERNATE);
9740 +               return 0;
9741 +       }
9742 +
9743 +       clear_toi_state(TOI_CAN_HIBERNATE);
9744 +
9745 +       if (quiet)
9746 +               return 1;
9747 +
9748 +       if (*target)
9749 +               printk(KERN_INFO "TuxOnIce: FileAllocator: Sorry. No signature "
9750 +                               "found at  %s.\n", target);
9751 +       else
9752 +               if (!resume_time)
9753 +                       printk(KERN_INFO "TuxOnIce: FileAllocator: Sorry. "
9754 +                                       "Target is not set for hibernating.\n");
9755 +
9756 +       return 1;
9757 +}
9758 +
9759 +static void test_toi_file_target(void)
9760 +{
9761 +       setting_toi_file_target = 1;
9762 +
9763 +       printk(KERN_INFO "TuxOnIce: Hibernating %sabled.\n",
9764 +                       __test_toi_file_target(toi_file_target, 0, 1) ?
9765 +                       "dis" : "en");
9766 +
9767 +       setting_toi_file_target = 0;
9768 +}
9769 +
9770 +/*
9771 + * Parse Image Location
9772 + *
9773 + * Attempt to parse a resume= parameter.
9774 + * File Allocator accepts:
9775 + * resume=file:DEVNAME[:FIRSTBLOCK]
9776 + *
9777 + * Where:
9778 + * DEVNAME is convertable to a dev_t by name_to_dev_t
9779 + * FIRSTBLOCK is the location of the first block in the file.
9780 + * BLOCKSIZE is the logical blocksize >= SECTOR_SIZE & <= PAGE_SIZE,
9781 + * mod SECTOR_SIZE == 0 of the device.
9782 + * Data is validated by attempting to read a header from the
9783 + * location given. Failure will result in toi_file refusing to
9784 + * save an image, and a reboot with correct parameters will be
9785 + * necessary.
9786 + */
9787 +
9788 +static int toi_file_parse_sig_location(char *commandline,
9789 +               int only_writer, int quiet)
9790 +{
9791 +       char *thischar, *devstart = NULL, *colon = NULL, *at_symbol = NULL;
9792 +       int result = -EINVAL, target_blocksize = 0;
9793 +
9794 +       if (strncmp(commandline, "file:", 5)) {
9795 +               if (!only_writer)
9796 +                       return 1;
9797 +       } else
9798 +               commandline += 5;
9799 +
9800 +       /*
9801 +        * Don't check signature again if we're beginning a cycle. If we already
9802 +        * did the initialisation successfully, assume we'll be okay when it
9803 +        * comes to resuming.
9804 +        */
9805 +       if (toi_file_target_bdev)
9806 +               return 0;
9807 +
9808 +       devstart = thischar = commandline;
9809 +       while ((*thischar != ':') && (*thischar != '@') &&
9810 +               ((thischar - commandline) < 250) && (*thischar))
9811 +               thischar++;
9812 +
9813 +       if (*thischar == ':') {
9814 +               colon = thischar;
9815 +               *colon = 0;
9816 +               thischar++;
9817 +       }
9818 +
9819 +       while ((*thischar != '@') && ((thischar - commandline) < 250)
9820 +                       && (*thischar))
9821 +               thischar++;
9822 +
9823 +       if (*thischar == '@') {
9824 +               at_symbol = thischar;
9825 +               *at_symbol = 0;
9826 +       }
9827 +
9828 +       /*
9829 +        * For the toi_file, you can be able to resume, but not hibernate,
9830 +        * because the resume= is set correctly, but the toi_file_target
9831 +        * isn't.
9832 +        *
9833 +        * We may have come here as a result of setting resume or
9834 +        * toi_file_target. We only test the toi_file target in the
9835 +        * former case (it's already done in the later), and we do it before
9836 +        * setting the block number ourselves. It will overwrite the values
9837 +        * given on the command line if we don't.
9838 +        */
9839 +
9840 +       if (!setting_toi_file_target)
9841 +               __test_toi_file_target(toi_file_target, 1, 0);
9842 +
9843 +       if (colon)
9844 +               target_firstblock = (int) simple_strtoul(colon + 1, NULL, 0);
9845 +       else
9846 +               target_firstblock = 0;
9847 +
9848 +       if (at_symbol) {
9849 +               target_blocksize = (int) simple_strtoul(at_symbol + 1, NULL, 0);
9850 +               if (target_blocksize & (SECTOR_SIZE - 1)) {
9851 +                       printk(KERN_INFO "FileAllocator: Blocksizes are "
9852 +                                       "multiples of %d.\n", SECTOR_SIZE);
9853 +                       result = -EINVAL;
9854 +                       goto out;
9855 +               }
9856 +       }
9857 +
9858 +       if (!quiet)
9859 +               printk(KERN_INFO "TuxOnIce FileAllocator: Testing whether you"
9860 +                               " can resume:\n");
9861 +
9862 +       toi_file_get_target_info(commandline, 0, 1);
9863 +
9864 +       if (!toi_file_target_bdev || IS_ERR(toi_file_target_bdev)) {
9865 +               toi_file_target_bdev = NULL;
9866 +               result = -1;
9867 +               goto out;
9868 +       }
9869 +
9870 +       if (target_blocksize)
9871 +               set_devinfo(toi_file_target_bdev, ffs(target_blocksize));
9872 +
9873 +       result = __test_toi_file_target(commandline, 1, 0);
9874 +
9875 +out:
9876 +       if (result)
9877 +               clear_toi_state(TOI_CAN_HIBERNATE);
9878 +
9879 +       if (!quiet)
9880 +               printk(KERN_INFO "Resuming %sabled.\n",  result ? "dis" : "en");
9881 +
9882 +       if (colon)
9883 +               *colon = ':';
9884 +       if (at_symbol)
9885 +               *at_symbol = '@';
9886 +
9887 +       return result;
9888 +}
9889 +
9890 +/* toi_file_save_config_info
9891 + *
9892 + * Description:        Save the target's name, not for resume time, but for
9893 + *             all_settings.
9894 + * Arguments:  Buffer:         Pointer to a buffer of size PAGE_SIZE.
9895 + * Returns:    Number of bytes used for saving our data.
9896 + */
9897 +
9898 +static int toi_file_save_config_info(char *buffer)
9899 +{
9900 +       strcpy(buffer, toi_file_target);
9901 +       return strlen(toi_file_target) + 1;
9902 +}
9903 +
9904 +/* toi_file_load_config_info
9905 + *
9906 + * Description:        Reload target's name.
9907 + * Arguments:  Buffer:         Pointer to the start of the data.
9908 + *             Size:           Number of bytes that were saved.
9909 + */
9910 +
9911 +static void toi_file_load_config_info(char *buffer, int size)
9912 +{
9913 +       strcpy(toi_file_target, buffer);
9914 +}
9915 +
9916 +static int toi_file_initialise(int starting_cycle)
9917 +{
9918 +       if (starting_cycle) {
9919 +               if (toiActiveAllocator != &toi_fileops)
9920 +                       return 0;
9921 +
9922 +               if (starting_cycle & SYSFS_HIBERNATE && !*toi_file_target) {
9923 +                       printk(KERN_INFO "FileAllocator is the active writer,  "
9924 +                                       "but no filename has been set.\n");
9925 +                       return 1;
9926 +               }
9927 +       }
9928 +
9929 +       if (*toi_file_target)
9930 +               toi_file_get_target_info(toi_file_target, starting_cycle, 0);
9931 +
9932 +       if (starting_cycle && (toi_file_image_exists(1) == -1)) {
9933 +               printk("%s is does not have a valid signature for "
9934 +                               "hibernating.\n", toi_file_target);
9935 +               return 1;
9936 +       }
9937 +
9938 +       return 0;
9939 +}
9940 +
9941 +static struct toi_sysfs_data sysfs_params[] = {
9942 +
9943 +       SYSFS_STRING("target", SYSFS_RW, toi_file_target, 256,
9944 +               SYSFS_NEEDS_SM_FOR_WRITE, test_toi_file_target),
9945 +       SYSFS_INT("enabled", SYSFS_RW, &toi_fileops.enabled, 0, 1, 0,
9946 +               attempt_to_parse_resume_device2)
9947 +};
9948 +
9949 +static struct toi_module_ops toi_fileops = {
9950 +       .type                                   = WRITER_MODULE,
9951 +       .name                                   = "file storage",
9952 +       .directory                              = "file",
9953 +       .module                                 = THIS_MODULE,
9954 +       .print_debug_info                       = toi_file_print_debug_stats,
9955 +       .save_config_info                       = toi_file_save_config_info,
9956 +       .load_config_info                       = toi_file_load_config_info,
9957 +       .storage_needed                         = toi_file_storage_needed,
9958 +       .initialise                             = toi_file_initialise,
9959 +       .cleanup                                = toi_file_cleanup,
9960 +
9961 +       .noresume_reset         = toi_file_noresume_reset,
9962 +       .storage_available      = toi_file_storage_available,
9963 +       .storage_allocated      = toi_file_storage_allocated,
9964 +       .release_storage        = toi_file_release_storage,
9965 +       .reserve_header_space   = toi_file_reserve_header_space,
9966 +       .allocate_storage       = toi_file_allocate_storage,
9967 +       .image_exists           = toi_file_image_exists,
9968 +       .mark_resume_attempted  = toi_file_mark_resume_attempted,
9969 +       .write_header_init      = toi_file_write_header_init,
9970 +       .write_header_cleanup   = toi_file_write_header_cleanup,
9971 +       .read_header_init       = toi_file_read_header_init,
9972 +       .read_header_cleanup    = toi_file_read_header_cleanup,
9973 +       .remove_image           = toi_file_remove_image,
9974 +       .parse_sig_location     = toi_file_parse_sig_location,
9975 +
9976 +       .sysfs_data             = sysfs_params,
9977 +       .num_sysfs_entries      = sizeof(sysfs_params) /
9978 +               sizeof(struct toi_sysfs_data),
9979 +};
9980 +
9981 +/* ---- Registration ---- */
9982 +static __init int toi_file_load(void)
9983 +{
9984 +       toi_fileops.rw_init = toi_bio_ops.rw_init;
9985 +       toi_fileops.rw_cleanup = toi_bio_ops.rw_cleanup;
9986 +       toi_fileops.read_page = toi_bio_ops.read_page;
9987 +       toi_fileops.write_page = toi_bio_ops.write_page;
9988 +       toi_fileops.rw_header_chunk = toi_bio_ops.rw_header_chunk;
9989 +       toi_fileops.rw_header_chunk_noreadahead =
9990 +               toi_bio_ops.rw_header_chunk_noreadahead;
9991 +       toi_fileops.io_flusher = toi_bio_ops.io_flusher;
9992 +       toi_fileops.update_throughput_throttle = toi_bio_ops.update_throughput_throttle;
9993 +       toi_fileops.monitor_outstanding_io = toi_bio_ops.monitor_outstanding_io;
9994 +       toi_fileops.finish_all_io = toi_bio_ops.finish_all_io;
9995 +
9996 +       return toi_register_module(&toi_fileops);
9997 +}
9998 +
9999 +#ifdef MODULE
10000 +static __exit void toi_file_unload(void)
10001 +{
10002 +       toi_unregister_module(&toi_fileops);
10003 +}
10004 +
10005 +module_init(toi_file_load);
10006 +module_exit(toi_file_unload);
10007 +MODULE_LICENSE("GPL");
10008 +MODULE_AUTHOR("Nigel Cunningham");
10009 +MODULE_DESCRIPTION("TuxOnIce FileAllocator");
10010 +#else
10011 +late_initcall(toi_file_load);
10012 +#endif
10013 diff --git a/kernel/power/tuxonice_highlevel.c b/kernel/power/tuxonice_highlevel.c
10014 new file mode 100644
10015 index 0000000..5a6f96d
10016 --- /dev/null
10017 +++ b/kernel/power/tuxonice_highlevel.c
10018 @@ -0,0 +1,1269 @@
10019 +/*
10020 + * kernel/power/tuxonice_highlevel.c
10021 + */
10022 +/** \mainpage TuxOnIce.
10023 + *
10024 + * TuxOnIce provides support for saving and restoring an image of
10025 + * system memory to an arbitrary storage device, either on the local computer,
10026 + * or across some network. The support is entirely OS based, so TuxOnIce
10027 + * works without requiring BIOS, APM or ACPI support. The vast majority of the
10028 + * code is also architecture independant, so it should be very easy to port
10029 + * the code to new architectures. TuxOnIce includes support for SMP, 4G HighMem
10030 + * and preemption. Initramfses and initrds are also supported.
10031 + *
10032 + * TuxOnIce uses a modular design, in which the method of storing the image is
10033 + * completely abstracted from the core code, as are transformations on the data
10034 + * such as compression and/or encryption (multiple 'modules' can be used to
10035 + * provide arbitrary combinations of functionality). The user interface is also
10036 + * modular, so that arbitrarily simple or complex interfaces can be used to
10037 + * provide anything from debugging information through to eye candy.
10038 + *
10039 + * \section Copyright
10040 + *
10041 + * TuxOnIce is released under the GPLv2.
10042 + *
10043 + * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu><BR>
10044 + * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz><BR>
10045 + * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr><BR>
10046 + * Copyright (C) 2002-2008 Nigel Cunningham (nigel at tuxonice net)<BR>
10047 + *
10048 + * \section Credits
10049 + *
10050 + * Nigel would like to thank the following people for their work:
10051 + *
10052 + * Bernard Blackham <bernard@blackham.com.au><BR>
10053 + * Web page & Wiki administration, some coding. A person without whom
10054 + * TuxOnIce would not be where it is.
10055 + *
10056 + * Michael Frank <mhf@linuxmail.org><BR>
10057 + * Extensive testing and help with improving stability. I was constantly
10058 + * amazed by the quality and quantity of Michael's help.
10059 + *
10060 + * Pavel Machek <pavel@ucw.cz><BR>
10061 + * Modifications, defectiveness pointing, being with Gabor at the very
10062 + * beginning, suspend to swap space, stop all tasks. Port to 2.4.18-ac and
10063 + * 2.5.17. Even though Pavel and I disagree on the direction suspend to
10064 + * disk should take, I appreciate the valuable work he did in helping Gabor
10065 + * get the concept working.
10066 + *
10067 + * ..and of course the myriads of TuxOnIce users who have helped diagnose
10068 + * and fix bugs, made suggestions on how to improve the code, proofread
10069 + * documentation, and donated time and money.
10070 + *
10071 + * Thanks also to corporate sponsors:
10072 + *
10073 + * <B>Redhat.</B>Sometime employer from May 2006 (my fault, not Redhat's!).
10074 + *
10075 + * <B>Cyclades.com.</B> Nigel's employers from Dec 2004 until May 2006, who
10076 + * allowed him to work on TuxOnIce and PM related issues on company time.
10077 + *
10078 + * <B>LinuxFund.org.</B> Sponsored Nigel's work on TuxOnIce for four months Oct
10079 + * 2003 to Jan 2004.
10080 + *
10081 + * <B>LAC Linux.</B> Donated P4 hardware that enabled development and ongoing
10082 + * maintenance of SMP and Highmem support.
10083 + *
10084 + * <B>OSDL.</B> Provided access to various hardware configurations, make
10085 + * occasional small donations to the project.
10086 + */
10087 +
10088 +#include <linux/suspend.h>
10089 +#include <linux/module.h>
10090 +#include <linux/freezer.h>
10091 +#include <linux/utsrelease.h>
10092 +#include <linux/cpu.h>
10093 +#include <linux/console.h>
10094 +#include <linux/writeback.h>
10095 +#include <linux/uaccess.h> /* for get/set_fs & KERNEL_DS on i386 */
10096 +
10097 +#include "tuxonice.h"
10098 +#include "tuxonice_modules.h"
10099 +#include "tuxonice_sysfs.h"
10100 +#include "tuxonice_prepare_image.h"
10101 +#include "tuxonice_io.h"
10102 +#include "tuxonice_ui.h"
10103 +#include "tuxonice_power_off.h"
10104 +#include "tuxonice_storage.h"
10105 +#include "tuxonice_checksum.h"
10106 +#include "tuxonice_builtin.h"
10107 +#include "tuxonice_atomic_copy.h"
10108 +#include "tuxonice_alloc.h"
10109 +#include "tuxonice_cluster.h"
10110 +
10111 +/*! Pageset metadata. */
10112 +struct pagedir pagedir2 = {2};
10113 +EXPORT_SYMBOL_GPL(pagedir2);
10114 +
10115 +static mm_segment_t oldfs;
10116 +static DEFINE_MUTEX(tuxonice_in_use);
10117 +static int block_dump_save;
10118 +static char pre_hibernate_command[256];
10119 +static char post_hibernate_command[256];
10120 +
10121 +char *tuxonice_signature = "\xed\xc3\x02\xe9\x98\x56\xe5\x0c";
10122 +EXPORT_SYMBOL_GPL(tuxonice_signature);
10123 +
10124 +int do_toi_step(int step);
10125 +
10126 +unsigned long boot_kernel_data_buffer;
10127 +
10128 +static char *result_strings[] = {
10129 +       "Hiberation was aborted",
10130 +       "The user requested that we cancel the hibernation",
10131 +       "No storage was available",
10132 +       "Insufficient storage was available",
10133 +       "Freezing filesystems and/or tasks failed",
10134 +       "A pre-existing image was used",
10135 +       "We would free memory, but image size limit doesn't allow this",
10136 +       "Unable to free enough memory to hibernate",
10137 +       "Unable to obtain the Power Management Semaphore",
10138 +       "A device suspend/resume returned an error",
10139 +       "The extra pages allowance is too small",
10140 +       "We were unable to successfully prepare an image",
10141 +       "TuxOnIce module initialisation failed",
10142 +       "TuxOnIce module cleanup failed",
10143 +       "I/O errors were encountered",
10144 +       "Ran out of memory",
10145 +       "An error was encountered while reading the image",
10146 +       "Platform preparation failed",
10147 +       "CPU Hotplugging failed",
10148 +       "Architecture specific preparation failed",
10149 +       "Pages needed resaving, but we were told to abort if this happens",
10150 +       "We can't hibernate at the moment (invalid resume= or filewriter "
10151 +               "target?)",
10152 +       "A hibernation preparation notifier chain member cancelled the "
10153 +               "hibernation",
10154 +       "Pre-snapshot preparation failed",
10155 +       "Post-snapshot cleanup failed",
10156 +       "Can't resume from alternate image",
10157 +};
10158 +
10159 +/**
10160 + * toi_finish_anything - Cleanup after doing anything.
10161 + *
10162 + * @hibernate_or_resume: Whether finishing a cycle or attempt at resuming.
10163 + *
10164 + * This is our basic clean-up routine, matching start_anything below. We
10165 + * call cleanup routines, drop module references and restore process fs and
10166 + * cpus allowed masks, together with the global block_dump variable's value.
10167 + */
10168 +void toi_finish_anything(int hibernate_or_resume)
10169 +{
10170 +       toi_cleanup_modules(hibernate_or_resume);
10171 +       toi_put_modules();
10172 +       if (hibernate_or_resume) {
10173 +               block_dump = block_dump_save;
10174 +               set_cpus_allowed(current, CPU_MASK_ALL);
10175 +               toi_alloc_print_debug_stats();
10176 +
10177 +               if (hibernate_or_resume == SYSFS_HIBERNATE &&
10178 +                               strlen(post_hibernate_command))
10179 +                       toi_launch_userspace_program(post_hibernate_command,
10180 +                                       0, UMH_WAIT_PROC, 0);
10181 +               atomic_inc(&snapshot_device_available);
10182 +               mutex_unlock(&pm_mutex);
10183 +       }
10184 +
10185 +       set_fs(oldfs);
10186 +       mutex_unlock(&tuxonice_in_use);
10187 +}
10188 +
10189 +/**
10190 + * toi_start_anything - Basic initialisation for TuxOnIce.
10191 + *
10192 + * @toi_or_resume: Whether starting a cycle or attempt at resuming.
10193 + *
10194 + * Our basic initialisation routine. Take references on modules, use the
10195 + * kernel segment, recheck resume= if no active allocator is set, initialise
10196 + * modules, save and reset block_dump and ensure we're running on CPU0.
10197 + */
10198 +int toi_start_anything(int hibernate_or_resume)
10199 +{
10200 +       int starting_cycle = (hibernate_or_resume == SYSFS_HIBERNATE);
10201 +
10202 +       mutex_lock(&tuxonice_in_use);
10203 +
10204 +       oldfs = get_fs();
10205 +       set_fs(KERNEL_DS);
10206 +
10207 +       if (hibernate_or_resume) {
10208 +               mutex_lock(&pm_mutex);
10209 +
10210 +               if (!atomic_add_unless(&snapshot_device_available, -1, 0))
10211 +                       goto snapshotdevice_unavailable;
10212 +       }
10213 +
10214 +       if (starting_cycle && strlen(pre_hibernate_command)) {
10215 +               int result = toi_launch_userspace_program(pre_hibernate_command,
10216 +                               0, UMH_WAIT_PROC, 0);
10217 +               if (result) {
10218 +                       printk(KERN_INFO "Pre-hibernate command '%s' returned"
10219 +                                       " %d. Aborting.\n",
10220 +                                       pre_hibernate_command, result);
10221 +                       goto prehibernate_err;
10222 +               }
10223 +       }
10224 +
10225 +       if (hibernate_or_resume == SYSFS_HIBERNATE)
10226 +               toi_print_modules();
10227 +
10228 +       if (toi_get_modules()) {
10229 +               printk("TuxOnIce: Get modules failed!\n");
10230 +               goto prehibernate_err;
10231 +       }
10232 +
10233 +       if (hibernate_or_resume) {
10234 +               block_dump_save = block_dump;
10235 +               block_dump = 0;
10236 +               set_cpus_allowed(current,
10237 +                               cpumask_of_cpu(first_cpu(cpu_online_map)));
10238 +       }
10239 +
10240 +       if (toi_initialise_modules_early(hibernate_or_resume))
10241 +               goto early_init_err;
10242 +
10243 +       if (!toiActiveAllocator)
10244 +               toi_attempt_to_parse_resume_device(!hibernate_or_resume);
10245 +
10246 +       if (!toi_initialise_modules_late(hibernate_or_resume))
10247 +               return 0;
10248 +
10249 +       toi_cleanup_modules(hibernate_or_resume);
10250 +early_init_err:
10251 +       if (hibernate_or_resume) {
10252 +               block_dump_save = block_dump;
10253 +               set_cpus_allowed(current, CPU_MASK_ALL);
10254 +       }
10255 +prehibernate_err:
10256 +       if (hibernate_or_resume)
10257 +               atomic_inc(&snapshot_device_available);
10258 +snapshotdevice_unavailable:
10259 +       if (hibernate_or_resume)
10260 +               mutex_unlock(&pm_mutex);
10261 +       set_fs(oldfs);
10262 +       mutex_unlock(&tuxonice_in_use);
10263 +       return -EBUSY;
10264 +}
10265 +
10266 +/*
10267 + * Nosave page tracking.
10268 + *
10269 + * Here rather than in prepare_image because we want to do it once only at the
10270 + * start of a cycle.
10271 + */
10272 +
10273 +/**
10274 + * mark_nosave_pages - Set up our Nosave bitmap.
10275 + *
10276 + * Build a bitmap of Nosave pages from the list. The bitmap allows faster
10277 + * use when preparing the image.
10278 + */
10279 +static void mark_nosave_pages(void)
10280 +{
10281 +       struct nosave_region *region;
10282 +
10283 +       list_for_each_entry(region, &nosave_regions, list) {
10284 +               unsigned long pfn;
10285 +
10286 +               for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++)
10287 +                       if (pfn_valid(pfn))
10288 +                               SetPageNosave(pfn_to_page(pfn));
10289 +       }
10290 +}
10291 +
10292 +/**
10293 + * allocate_bitmaps: Allocate bitmaps used to record page states.
10294 + *
10295 + * Allocate the bitmaps we use to record the various TuxOnIce related
10296 + * page states.
10297 + */
10298 +static int allocate_bitmaps(void)
10299 +{
10300 +       if (memory_bm_create(&pageset1_map, GFP_KERNEL, 0) ||
10301 +           memory_bm_create(&pageset1_copy_map, GFP_KERNEL, 0) ||
10302 +           memory_bm_create(&pageset2_map, GFP_KERNEL, 0) ||
10303 +           memory_bm_create(&io_map, GFP_KERNEL, 0) ||
10304 +           memory_bm_create(&nosave_map, GFP_KERNEL, 0) ||
10305 +           memory_bm_create(&free_map, GFP_KERNEL, 0) ||
10306 +           memory_bm_create(&page_resave_map, GFP_KERNEL, 0))
10307 +               return 1;
10308 +
10309 +       return 0;
10310 +}
10311 +
10312 +/**
10313 + * free_bitmaps: Free the bitmaps used to record page states.
10314 + *
10315 + * Free the bitmaps allocated above. It is not an error to call
10316 + * memory_bm_free on a bitmap that isn't currently allocated.
10317 + */
10318 +static void free_bitmaps(void)
10319 +{
10320 +       memory_bm_free(&pageset1_map, 0);
10321 +       memory_bm_free(&pageset1_copy_map, 0);
10322 +       memory_bm_free(&pageset2_map, 0);
10323 +       memory_bm_free(&io_map, 0);
10324 +       memory_bm_free(&nosave_map, 0);
10325 +       memory_bm_free(&free_map, 0);
10326 +       memory_bm_free(&page_resave_map, 0);
10327 +}
10328 +
10329 +/**
10330 + * io_MB_per_second: Return the number of MB/s read or written.
10331 + *
10332 + * @write: Whether to return the speed at which we wrote.
10333 + *
10334 + * Calculate the number of megabytes per second that were read or written.
10335 + */
10336 +static int io_MB_per_second(int write)
10337 +{
10338 +       return (toi_bkd.toi_io_time[write][1]) ?
10339 +               MB((unsigned long) toi_bkd.toi_io_time[write][0]) * HZ /
10340 +               toi_bkd.toi_io_time[write][1] : 0;
10341 +}
10342 +
10343 +/**
10344 + * get_debug_info: Fill a buffer with debugging information.
10345 + *
10346 + * @buffer: The buffer to be filled.
10347 + * @count: The size of the buffer, in bytes.
10348 + *
10349 + * Fill a (usually PAGE_SIZEd) buffer with the debugging info that we will
10350 + * either printk or return via sysfs.
10351 + */
10352 +#define SNPRINTF(a...)         do { len += scnprintf(((char *) buffer) + len, \
10353 +               count - len - 1, ## a); } while (0)
10354 +
10355 +static int get_toi_debug_info(const char *buffer, int count)
10356 +{
10357 +       int len = 0, i, first_result = 1;
10358 +
10359 +       SNPRINTF("TuxOnIce debugging info:\n");
10360 +       SNPRINTF("- TuxOnIce core  : " TOI_CORE_VERSION "\n");
10361 +       SNPRINTF("- Kernel Version : " UTS_RELEASE "\n");
10362 +       SNPRINTF("- Compiler vers. : %d.%d\n", __GNUC__, __GNUC_MINOR__);
10363 +       SNPRINTF("- Attempt number : %d\n", nr_hibernates);
10364 +       SNPRINTF("- Parameters     : %ld %ld %ld %d %d %ld\n",
10365 +                       toi_result,
10366 +                       toi_bkd.toi_action,
10367 +                       toi_bkd.toi_debug_state,
10368 +                       toi_bkd.toi_default_console_level,
10369 +                       image_size_limit,
10370 +                       toi_poweroff_method);
10371 +       SNPRINTF("- Overall expected compression percentage: %d.\n",
10372 +                       100 - toi_expected_compression_ratio());
10373 +       len += toi_print_module_debug_info(((char *) buffer) + len,
10374 +                       count - len - 1);
10375 +       if (toi_bkd.toi_io_time[0][1]) {
10376 +               if ((io_MB_per_second(0) < 5) || (io_MB_per_second(1) < 5)) {
10377 +                       SNPRINTF("- I/O speed: Write %ld KB/s",
10378 +                         (KB((unsigned long) toi_bkd.toi_io_time[0][0]) * HZ /
10379 +                         toi_bkd.toi_io_time[0][1]));
10380 +                       if (toi_bkd.toi_io_time[1][1])
10381 +                               SNPRINTF(", Read %ld KB/s",
10382 +                                 (KB((unsigned long)
10383 +                                     toi_bkd.toi_io_time[1][0]) * HZ /
10384 +                                 toi_bkd.toi_io_time[1][1]));
10385 +               } else {
10386 +                       SNPRINTF("- I/O speed: Write %ld MB/s",
10387 +                        (MB((unsigned long) toi_bkd.toi_io_time[0][0]) * HZ /
10388 +                         toi_bkd.toi_io_time[0][1]));
10389 +                       if (toi_bkd.toi_io_time[1][1])
10390 +                               SNPRINTF(", Read %ld MB/s",
10391 +                                (MB((unsigned long)
10392 +                                    toi_bkd.toi_io_time[1][0]) * HZ /
10393 +                                 toi_bkd.toi_io_time[1][1]));
10394 +               }
10395 +               SNPRINTF(".\n");
10396 +       } else
10397 +               SNPRINTF("- No I/O speed stats available.\n");
10398 +       SNPRINTF("- Extra pages    : %ld used/%ld.\n",
10399 +                       extra_pd1_pages_used, extra_pd1_pages_allowance);
10400 +
10401 +       for (i = 0; i < TOI_NUM_RESULT_STATES; i++)
10402 +               if (test_result_state(i)) {
10403 +                       SNPRINTF("%s: %s.\n", first_result ?
10404 +                                       "- Result         " :
10405 +                                       "                 ",
10406 +                                       result_strings[i]);
10407 +                       first_result = 0;
10408 +               }
10409 +       if (first_result)
10410 +               SNPRINTF("- Result         : %s.\n", nr_hibernates ?
10411 +                       "Succeeded" :
10412 +                       "No hibernation attempts so far");
10413 +       return len;
10414 +}
10415 +
10416 +/**
10417 + * do_cleanup: Cleanup after attempting to hibernate or resume.
10418 + *
10419 + * @get_debug_info: Whether to allocate and return debugging info.
10420 + *
10421 + * Cleanup after attempting to hibernate or resume, possibly getting
10422 + * debugging info as we do so.
10423 + */
10424 +static void do_cleanup(int get_debug_info)
10425 +{
10426 +       int i = 0;
10427 +       char *buffer = NULL;
10428 +
10429 +       if (get_debug_info)
10430 +               toi_prepare_status(DONT_CLEAR_BAR, "Cleaning up...");
10431 +
10432 +       free_checksum_pages();
10433 +
10434 +       if (get_debug_info)
10435 +               buffer = (char *) toi_get_zeroed_page(20, TOI_ATOMIC_GFP);
10436 +
10437 +       if (buffer)
10438 +               i = get_toi_debug_info(buffer, PAGE_SIZE);
10439 +
10440 +       toi_free_extra_pagedir_memory();
10441 +
10442 +       pagedir1.size = pagedir2.size = 0;
10443 +       set_highmem_size(pagedir1, 0);
10444 +       set_highmem_size(pagedir2, 0);
10445 +
10446 +       if (boot_kernel_data_buffer) {
10447 +               if (!test_toi_state(TOI_BOOT_KERNEL))
10448 +                       toi_free_page(37, boot_kernel_data_buffer);
10449 +               boot_kernel_data_buffer = 0;
10450 +       }
10451 +
10452 +       clear_toi_state(TOI_BOOT_KERNEL);
10453 +       thaw_processes();
10454 +
10455 +#ifdef CONFIG_TOI_KEEP_IMAGE
10456 +       if (test_action_state(TOI_KEEP_IMAGE) &&
10457 +           !test_result_state(TOI_ABORTED)) {
10458 +               toi_message(TOI_ANY_SECTION, TOI_LOW, 1,
10459 +                       "TuxOnIce: Not invalidating the image due "
10460 +                       "to Keep Image being enabled.\n");
10461 +               set_result_state(TOI_KEPT_IMAGE);
10462 +       } else
10463 +#endif
10464 +               if (toiActiveAllocator)
10465 +                       toiActiveAllocator->remove_image();
10466 +
10467 +       free_bitmaps();
10468 +
10469 +       if (test_toi_state(TOI_NOTIFIERS_PREPARE)) {
10470 +               pm_notifier_call_chain(PM_POST_HIBERNATION);
10471 +               clear_toi_state(TOI_NOTIFIERS_PREPARE);
10472 +       }
10473 +
10474 +       if (buffer && i) {
10475 +               /* Printk can only handle 1023 bytes, including
10476 +                * its level mangling. */
10477 +               for (i = 0; i < 3; i++)
10478 +                       printk("%s", buffer + (1023 * i));
10479 +               toi_free_page(20, (unsigned long) buffer);
10480 +       }
10481 +
10482 +       if (!test_action_state(TOI_LATE_CPU_HOTPLUG))
10483 +               enable_nonboot_cpus();
10484 +       toi_cleanup_console();
10485 +
10486 +       free_attention_list();
10487 +
10488 +       toi_deactivate_storage(0);
10489 +
10490 +       clear_toi_state(TOI_IGNORE_LOGLEVEL);
10491 +       clear_toi_state(TOI_TRYING_TO_RESUME);
10492 +       clear_toi_state(TOI_NOW_RESUMING);
10493 +}
10494 +
10495 +/**
10496 + * check_still_keeping_image: We kept an image; check whether to reuse it.
10497 + *
10498 + * We enter this routine when we have kept an image. If the user has said they
10499 + * want to still keep it, all we need to do is powerdown. If powering down
10500 + * means hibernating to ram and the power doesn't run out, we'll return 1.
10501 + * If we do power off properly or the battery runs out, we'll resume via the
10502 + * normal paths.
10503 + *
10504 + * If the user has said they want to remove the previously kept image, we
10505 + * remove it, and return 0. We'll then store a new image.
10506 + */
10507 +static int check_still_keeping_image(void)
10508 +{
10509 +       if (test_action_state(TOI_KEEP_IMAGE)) {
10510 +               printk("Image already stored: powering down immediately.");
10511 +               do_toi_step(STEP_HIBERNATE_POWERDOWN);
10512 +               return 1;       /* Just in case we're using S3 */
10513 +       }
10514 +
10515 +       printk("Invalidating previous image.\n");
10516 +       toiActiveAllocator->remove_image();
10517 +
10518 +       return 0;
10519 +}
10520 +
10521 +/**
10522 + * toi_init: Prepare to hibernate to disk.
10523 + *
10524 + * Initialise variables & data structures, in preparation for
10525 + * hibernating to disk.
10526 + */
10527 +static int toi_init(void)
10528 +{
10529 +       int result, i, j;
10530 +
10531 +       toi_result = 0;
10532 +
10533 +       printk(KERN_INFO "Initiating a hibernation cycle.\n");
10534 +
10535 +       nr_hibernates++;
10536 +
10537 +       for (i = 0; i < 2; i++)
10538 +               for (j = 0; j < 2; j++)
10539 +                       toi_bkd.toi_io_time[i][j] = 0;
10540 +
10541 +       if (!test_toi_state(TOI_CAN_HIBERNATE) ||
10542 +           allocate_bitmaps())
10543 +               return 1;
10544 +
10545 +       mark_nosave_pages();
10546 +
10547 +       toi_prepare_console();
10548 +
10549 +       result = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
10550 +       if (result) {
10551 +               set_result_state(TOI_NOTIFIERS_PREPARE_FAILED);
10552 +               return 1;
10553 +       }
10554 +       set_toi_state(TOI_NOTIFIERS_PREPARE);
10555 +
10556 +       boot_kernel_data_buffer = toi_get_zeroed_page(37, TOI_ATOMIC_GFP);
10557 +       if (!boot_kernel_data_buffer) {
10558 +               printk(KERN_ERR "TuxOnIce: Failed to allocate "
10559 +                               "boot_kernel_data_buffer.\n");
10560 +               set_result_state(TOI_OUT_OF_MEMORY);
10561 +               return 1;
10562 +       }
10563 +
10564 +       if (test_action_state(TOI_LATE_CPU_HOTPLUG) ||
10565 +                       !disable_nonboot_cpus())
10566 +               return 1;
10567 +
10568 +       set_abort_result(TOI_CPU_HOTPLUG_FAILED);
10569 +       return 0;
10570 +}
10571 +
10572 +/**
10573 + * can_hibernate: Perform basic 'Can we hibernate?' tests.
10574 + *
10575 + * Perform basic tests that must pass if we're going to be able to hibernate:
10576 + * Can we get the pm_mutex? Is resume= valid (we need to know where to write
10577 + * the image header).
10578 + */
10579 +static int can_hibernate(void)
10580 +{
10581 +       if (!test_toi_state(TOI_CAN_HIBERNATE))
10582 +               toi_attempt_to_parse_resume_device(0);
10583 +
10584 +       if (!test_toi_state(TOI_CAN_HIBERNATE)) {
10585 +               printk(KERN_INFO "TuxOnIce: Hibernation is disabled.\n"
10586 +                       "This may be because you haven't put something along "
10587 +                       "the lines of\n\nresume=swap:/dev/hda1\n\n"
10588 +                       "in lilo.conf or equivalent. (Where /dev/hda1 is your "
10589 +                       "swap partition).\n");
10590 +               set_abort_result(TOI_CANT_SUSPEND);
10591 +               return 0;
10592 +       }
10593 +
10594 +       if (strlen(alt_resume_param)) {
10595 +               attempt_to_parse_alt_resume_param();
10596 +
10597 +               if (!strlen(alt_resume_param)) {
10598 +                       printk(KERN_INFO "Alternate resume parameter now "
10599 +                                       "invalid. Aborting.\n");
10600 +                       set_abort_result(TOI_CANT_USE_ALT_RESUME);
10601 +                       return 0;
10602 +               }
10603 +       }
10604 +
10605 +       return 1;
10606 +}
10607 +
10608 +/**
10609 + * do_post_image_write: Having written an image, figure out what to do next.
10610 + *
10611 + * After writing an image, we might load an alternate image or power down.
10612 + * Powering down might involve hibernating to ram, in which case we also
10613 + * need to handle reloading pageset2.
10614 + */
10615 +static int do_post_image_write(void)
10616 +{
10617 +       /* If switching images fails, do normal powerdown */
10618 +       if (alt_resume_param[0])
10619 +               do_toi_step(STEP_RESUME_ALT_IMAGE);
10620 +
10621 +       toi_power_down();
10622 +
10623 +       barrier();
10624 +       mb();
10625 +       do_cleanup(1);
10626 +       return 0;
10627 +}
10628 +
10629 +/**
10630 + * __save_image: Do the hard work of saving the image.
10631 + *
10632 + * High level routine for getting the image saved. The key assumptions made
10633 + * are that processes have been frozen and sufficient memory is available.
10634 + *
10635 + * We also exit through here at resume time, coming back from toi_hibernate
10636 + * after the atomic restore. This is the reason for the toi_in_hibernate
10637 + * test.
10638 + */
10639 +static int __save_image(void)
10640 +{
10641 +       int temp_result, did_copy = 0;
10642 +
10643 +       toi_prepare_status(DONT_CLEAR_BAR, "Starting to save the image..");
10644 +
10645 +       toi_message(TOI_ANY_SECTION, TOI_LOW, 1,
10646 +               " - Final values: %d and %d.\n",
10647 +               pagedir1.size, pagedir2.size);
10648 +
10649 +       toi_cond_pause(1, "About to write pagedir2.");
10650 +
10651 +       temp_result = write_pageset(&pagedir2);
10652 +
10653 +       if (temp_result == -1 || test_result_state(TOI_ABORTED))
10654 +               return 1;
10655 +
10656 +       toi_cond_pause(1, "About to copy pageset 1.");
10657 +
10658 +       if (test_result_state(TOI_ABORTED))
10659 +               return 1;
10660 +
10661 +       toi_deactivate_storage(1);
10662 +
10663 +       toi_prepare_status(DONT_CLEAR_BAR, "Doing atomic copy.");
10664 +
10665 +       toi_in_hibernate = 1;
10666 +
10667 +       if (toi_go_atomic(PMSG_FREEZE, 1))
10668 +               goto Failed;
10669 +
10670 +       temp_result = toi_hibernate();
10671 +       if (!temp_result)
10672 +               did_copy = 1;
10673 +
10674 +       /* We return here at resume time too! */
10675 +       toi_end_atomic(ATOMIC_ALL_STEPS, toi_in_hibernate, temp_result);
10676 +
10677 +Failed:
10678 +       if (toi_activate_storage(1))
10679 +               panic("Failed to reactivate our storage.");
10680 +
10681 +       /* Resume time? */
10682 +       if (!toi_in_hibernate) {
10683 +               copyback_post();
10684 +               return 0;
10685 +       }
10686 +
10687 +       /* Nope. Hibernating. So, see if we can save the image... */
10688 +
10689 +       if (temp_result || test_result_state(TOI_ABORTED)) {
10690 +               if (did_copy)
10691 +                       goto abort_reloading_pagedir_two;
10692 +               else
10693 +                       return 1;
10694 +       }
10695 +
10696 +       toi_update_status(pagedir2.size, pagedir1.size + pagedir2.size,
10697 +                       NULL);
10698 +
10699 +       if (test_result_state(TOI_ABORTED))
10700 +               goto abort_reloading_pagedir_two;
10701 +
10702 +       toi_cond_pause(1, "About to write pageset1.");
10703 +
10704 +       toi_message(TOI_ANY_SECTION, TOI_LOW, 1, "-- Writing pageset1\n");
10705 +
10706 +       temp_result = write_pageset(&pagedir1);
10707 +
10708 +       /* We didn't overwrite any memory, so no reread needs to be done. */
10709 +       if (test_action_state(TOI_TEST_FILTER_SPEED))
10710 +               return 1;
10711 +
10712 +       if (temp_result == 1 || test_result_state(TOI_ABORTED))
10713 +               goto abort_reloading_pagedir_two;
10714 +
10715 +       toi_cond_pause(1, "About to write header.");
10716 +
10717 +       if (test_result_state(TOI_ABORTED))
10718 +               goto abort_reloading_pagedir_two;
10719 +
10720 +       temp_result = write_image_header();
10721 +
10722 +       if (test_action_state(TOI_TEST_BIO))
10723 +               return 1;
10724 +
10725 +       if (!temp_result && !test_result_state(TOI_ABORTED))
10726 +               return 0;
10727 +
10728 +abort_reloading_pagedir_two:
10729 +       temp_result = read_pageset2(1);
10730 +
10731 +       /* If that failed, we're sunk. Panic! */
10732 +       if (temp_result)
10733 +               panic("Attempt to reload pagedir 2 while aborting "
10734 +                               "a hibernate failed.");
10735 +
10736 +       return 1;
10737 +}
10738 +
10739 +/**
10740 + * do_save_image: Save the image and handle the result.
10741 + *
10742 + * Save the prepared image. If we fail or we're in the path returning
10743 + * from the atomic restore, cleanup.
10744 + */
10745 +
10746 +static int do_save_image(void)
10747 +{
10748 +       int result = __save_image();
10749 +       if (!toi_in_hibernate || result)
10750 +               do_cleanup(1);
10751 +       return result;
10752 +}
10753 +
10754 +/**
10755 + * do_prepare_image: Try to prepare an image.
10756 + *
10757 + * Seek to initialise and prepare an image to be saved. On failure,
10758 + * cleanup.
10759 + */
10760 +
10761 +static int do_prepare_image(void)
10762 +{
10763 +       if (toi_activate_storage(0))
10764 +               return 1;
10765 +
10766 +       /*
10767 +        * If kept image and still keeping image and hibernating to RAM, we will
10768 +        * return 1 after hibernating and resuming (provided the power doesn't
10769 +        * run out. In that case, we skip directly to cleaning up and exiting.
10770 +        */
10771 +
10772 +       if (!can_hibernate() ||
10773 +           (test_result_state(TOI_KEPT_IMAGE) &&
10774 +            check_still_keeping_image()))
10775 +               goto cleanup;
10776 +
10777 +       if (toi_init() && !toi_prepare_image() &&
10778 +                       !test_result_state(TOI_ABORTED))
10779 +               return 0;
10780 +
10781 +cleanup:
10782 +       do_cleanup(0);
10783 +       return 1;
10784 +}
10785 +
10786 +/**
10787 + * do_check_can_resume: Find out whether an image has been stored.
10788 + *
10789 + * Read whether an image exists. We use the same routine as the
10790 + * image_exists sysfs entry, and just look to see whether the
10791 + * first character in the resulting buffer is a '1'.
10792 + */
10793 +int do_check_can_resume(void)
10794 +{
10795 +       char *buf = (char *) toi_get_zeroed_page(21, TOI_ATOMIC_GFP);
10796 +       int result = 0;
10797 +
10798 +       if (!buf)
10799 +               return 0;
10800 +
10801 +       /* Only interested in first byte, so throw away return code. */
10802 +       image_exists_read(buf, PAGE_SIZE);
10803 +
10804 +       if (buf[0] == '1')
10805 +               result = 1;
10806 +
10807 +       toi_free_page(21, (unsigned long) buf);
10808 +       return result;
10809 +}
10810 +EXPORT_SYMBOL_GPL(do_check_can_resume);
10811 +
10812 +/**
10813 + * do_load_atomic_copy: Load the first part of an image, if it exists.
10814 + *
10815 + * Check whether we have an image. If one exists, do sanity checking
10816 + * (possibly invalidating the image or even rebooting if the user
10817 + * requests that) before loading it into memory in preparation for the
10818 + * atomic restore.
10819 + *
10820 + * If and only if we have an image loaded and ready to restore, we return 1.
10821 + */
10822 +static int do_load_atomic_copy(void)
10823 +{
10824 +       int read_image_result = 0;
10825 +
10826 +       if (sizeof(swp_entry_t) != sizeof(long)) {
10827 +               printk(KERN_WARNING "TuxOnIce: The size of swp_entry_t != size"
10828 +                       " of long. Please report this!\n");
10829 +               return 1;
10830 +       }
10831 +
10832 +       if (!resume_file[0])
10833 +               printk(KERN_WARNING "TuxOnIce: "
10834 +                       "You need to use a resume= command line parameter to "
10835 +                       "tell TuxOnIce where to look for an image.\n");
10836 +
10837 +       toi_activate_storage(0);
10838 +
10839 +       if (!(test_toi_state(TOI_RESUME_DEVICE_OK)) &&
10840 +               !toi_attempt_to_parse_resume_device(0)) {
10841 +               /*
10842 +                * Without a usable storage device we can do nothing -
10843 +                * even if noresume is given
10844 +                */
10845 +
10846 +               if (!toiNumAllocators)
10847 +                       printk(KERN_ALERT "TuxOnIce: "
10848 +                         "No storage allocators have been registered.\n");
10849 +               else
10850 +                       printk(KERN_ALERT "TuxOnIce: "
10851 +                               "Missing or invalid storage location "
10852 +                               "(resume= parameter). Please correct and "
10853 +                               "rerun lilo (or equivalent) before "
10854 +                               "hibernating.\n");
10855 +               toi_deactivate_storage(0);
10856 +               return 1;
10857 +       }
10858 +
10859 +       read_image_result = read_pageset1(); /* non fatal error ignored */
10860 +
10861 +       if (test_toi_state(TOI_NORESUME_SPECIFIED))
10862 +               clear_toi_state(TOI_NORESUME_SPECIFIED);
10863 +
10864 +       toi_deactivate_storage(0);
10865 +
10866 +       if (read_image_result)
10867 +               return 1;
10868 +
10869 +       return 0;
10870 +}
10871 +
10872 +/**
10873 + * prepare_restore_load_alt_image: Save & restore alt image variables.
10874 + *
10875 + * Save and restore the pageset1 maps, when loading an alternate image.
10876 + */
10877 +static void prepare_restore_load_alt_image(int prepare)
10878 +{
10879 +       static struct memory_bitmap pageset1_map_save, pageset1_copy_map_save;
10880 +
10881 +       if (prepare) {
10882 +               memcpy(&pageset1_map_save, &pageset1_map,
10883 +                               sizeof(struct memory_bitmap));
10884 +               memset(&pageset1_map, 0, sizeof(struct memory_bitmap));
10885 +               memcpy(&pageset1_copy_map_save, &pageset1_copy_map,
10886 +                               sizeof(struct memory_bitmap));
10887 +               memset(&pageset1_copy_map, 0, sizeof(struct memory_bitmap));
10888 +               set_toi_state(TOI_LOADING_ALT_IMAGE);
10889 +               toi_reset_alt_image_pageset2_pfn();
10890 +       } else {
10891 +               memory_bm_free(&pageset1_map, 0);
10892 +               memcpy(&pageset1_map, &pageset1_map_save,
10893 +                               sizeof(struct memory_bitmap));
10894 +               memory_bm_free(&pageset1_copy_map, 0);
10895 +               memcpy(&pageset1_copy_map, &pageset1_copy_map_save,
10896 +                               sizeof(struct memory_bitmap));
10897 +               clear_toi_state(TOI_NOW_RESUMING);
10898 +               clear_toi_state(TOI_LOADING_ALT_IMAGE);
10899 +       }
10900 +}
10901 +
10902 +/**
10903 + * do_toi_step: Perform a step in hibernating or resuming.
10904 + *
10905 + * Perform a step in hibernating or resuming an image. This abstraction
10906 + * is in preparation for implementing cluster support, and perhaps replacing
10907 + * uswsusp too (haven't looked whether that's possible yet).
10908 + */
10909 +int do_toi_step(int step)
10910 +{
10911 +       switch (step) {
10912 +       case STEP_HIBERNATE_PREPARE_IMAGE:
10913 +               return do_prepare_image();
10914 +       case STEP_HIBERNATE_SAVE_IMAGE:
10915 +               return do_save_image();
10916 +       case STEP_HIBERNATE_POWERDOWN:
10917 +               return do_post_image_write();
10918 +       case STEP_RESUME_CAN_RESUME:
10919 +               return do_check_can_resume();
10920 +       case STEP_RESUME_LOAD_PS1:
10921 +               return do_load_atomic_copy();
10922 +       case STEP_RESUME_DO_RESTORE:
10923 +               /*
10924 +                * If we succeed, this doesn't return.
10925 +                * Instead, we return from do_save_image() in the
10926 +                * hibernated kernel.
10927 +                */
10928 +               return toi_atomic_restore();
10929 +       case STEP_RESUME_ALT_IMAGE:
10930 +               printk(KERN_INFO "Trying to resume alternate image.\n");
10931 +               toi_in_hibernate = 0;
10932 +               save_restore_alt_param(SAVE, NOQUIET);
10933 +               prepare_restore_load_alt_image(1);
10934 +               if (!do_check_can_resume()) {
10935 +                       printk(KERN_INFO "Nothing to resume from.\n");
10936 +                       goto out;
10937 +               }
10938 +               if (!do_load_atomic_copy())
10939 +                       toi_atomic_restore();
10940 +
10941 +               printk(KERN_INFO "Failed to load image.\n");
10942 +out:
10943 +               prepare_restore_load_alt_image(0);
10944 +               save_restore_alt_param(RESTORE, NOQUIET);
10945 +               break;
10946 +       case STEP_CLEANUP:
10947 +               do_cleanup(1);
10948 +               break;
10949 +       case STEP_QUIET_CLEANUP:
10950 +               do_cleanup(0);
10951 +               break;
10952 +       }
10953 +
10954 +       return 0;
10955 +}
10956 +EXPORT_SYMBOL_GPL(do_toi_step);
10957 +
10958 +/* -- Functions for kickstarting a hibernate or resume --- */
10959 +
10960 +/**
10961 + * __toi_try_resume: Try to do the steps in resuming.
10962 + *
10963 + * Check if we have an image and if so try to resume. Clear the status
10964 + * flags too.
10965 + */
10966 +void __toi_try_resume(void)
10967 +{
10968 +       set_toi_state(TOI_TRYING_TO_RESUME);
10969 +       resume_attempted = 1;
10970 +
10971 +       current->flags |= PF_MEMALLOC;
10972 +
10973 +       if (do_toi_step(STEP_RESUME_CAN_RESUME) &&
10974 +                       !do_toi_step(STEP_RESUME_LOAD_PS1))
10975 +               do_toi_step(STEP_RESUME_DO_RESTORE);
10976 +
10977 +       do_cleanup(0);
10978 +
10979 +       current->flags &= ~PF_MEMALLOC;
10980 +
10981 +       clear_toi_state(TOI_IGNORE_LOGLEVEL);
10982 +       clear_toi_state(TOI_TRYING_TO_RESUME);
10983 +       clear_toi_state(TOI_NOW_RESUMING);
10984 +}
10985 +
10986 +/**
10987 + * _toi_try_resume: Wrapper calling __toi_try_resume from do_mounts.
10988 + *
10989 + * Wrapper for when __toi_try_resume is called from init/do_mounts.c,
10990 + * rather than from echo > /sys/power/tuxonice/do_resume.
10991 + */
10992 +static void _toi_try_resume(void)
10993 +{
10994 +       resume_attempted = 1;
10995 +
10996 +       /*
10997 +        * There's a comment in kernel/power/disk.c that indicates
10998 +        * we should be able to use mutex_lock_nested below. That
10999 +        * doesn't seem to cut it, though, so let's just turn lockdep
11000 +        * off for now.
11001 +        */
11002 +       lockdep_off();
11003 +
11004 +       if (toi_start_anything(SYSFS_RESUMING))
11005 +               goto out;
11006 +
11007 +       __toi_try_resume();
11008 +
11009 +       /*
11010 +        * For initramfs, we have to clear the boot time
11011 +        * flag after trying to resume
11012 +        */
11013 +       clear_toi_state(TOI_BOOT_TIME);
11014 +
11015 +       toi_finish_anything(SYSFS_RESUMING);
11016 +out:
11017 +       lockdep_on();
11018 +}
11019 +
11020 +/**
11021 + * _toi_try_hibernate: Try to start a hibernation cycle.
11022 + *
11023 + * have_pmsem: Whther the pm_sem is already taken.
11024 + *
11025 + * Start a hibernation cycle, coming in from either
11026 + * echo > /sys/power/tuxonice/do_suspend
11027 + *
11028 + * or
11029 + *
11030 + * echo disk > /sys/power/state
11031 + *
11032 + * In the later case, we come in without pm_sem taken; in the
11033 + * former, it has been taken.
11034 + */
11035 +int _toi_try_hibernate(void)
11036 +{
11037 +       int result = 0, sys_power_disk = 0;
11038 +
11039 +       if (!mutex_is_locked(&tuxonice_in_use)) {
11040 +               /* Came in via /sys/power/disk */
11041 +               if (toi_start_anything(SYSFS_HIBERNATING))
11042 +                       return -EBUSY;
11043 +               sys_power_disk = 1;
11044 +       }
11045 +
11046 +       current->flags |= PF_MEMALLOC;
11047 +
11048 +       if (test_toi_state(TOI_CLUSTER_MODE)) {
11049 +               toi_initiate_cluster_hibernate();
11050 +               goto out;
11051 +       }
11052 +
11053 +       result = do_toi_step(STEP_HIBERNATE_PREPARE_IMAGE);
11054 +       if (result)
11055 +               goto out;
11056 +
11057 +       if (test_action_state(TOI_FREEZER_TEST)) {
11058 +               do_cleanup(0);
11059 +               goto out;
11060 +       }
11061 +
11062 +       result = do_toi_step(STEP_HIBERNATE_SAVE_IMAGE);
11063 +
11064 +       /* This code runs at resume time too! */
11065 +       if (!result && toi_in_hibernate)
11066 +               result = do_toi_step(STEP_HIBERNATE_POWERDOWN);
11067 +out:
11068 +       current->flags &= ~PF_MEMALLOC;
11069 +
11070 +       if (sys_power_disk)
11071 +               toi_finish_anything(SYSFS_HIBERNATING);
11072 +
11073 +       return result;
11074 +}
11075 +
11076 +/*
11077 + * channel_no: If !0, -c <channel_no> is added to args (userui).
11078 + */
11079 +int toi_launch_userspace_program(char *command, int channel_no,
11080 +               enum umh_wait wait, int debug)
11081 +{
11082 +       int retval;
11083 +       static char *envp[] = {
11084 +                       "HOME=/",
11085 +                       "TERM=linux",
11086 +                       "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
11087 +                       NULL };
11088 +       static char *argv[] =
11089 +               { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL };
11090 +       char *channel = NULL;
11091 +       int arg = 0, size;
11092 +       char test_read[255];
11093 +       char *orig_posn = command;
11094 +
11095 +       if (!strlen(orig_posn))
11096 +               return 1;
11097 +
11098 +       if (channel_no) {
11099 +               channel = toi_kzalloc(4, 6, GFP_KERNEL);
11100 +               if (!channel) {
11101 +                       printk(KERN_INFO "Failed to allocate memory in "
11102 +                               "preparing to launch userspace program.\n");
11103 +                       return 1;
11104 +               }
11105 +       }
11106 +
11107 +       /* Up to 6 args supported */
11108 +       while (arg < 6) {
11109 +               sscanf(orig_posn, "%s", test_read);
11110 +               size = strlen(test_read);
11111 +               if (!(size))
11112 +                       break;
11113 +               argv[arg] = toi_kzalloc(5, size + 1, TOI_ATOMIC_GFP);
11114 +               strcpy(argv[arg], test_read);
11115 +               orig_posn += size + 1;
11116 +               *test_read = 0;
11117 +               arg++;
11118 +       }
11119 +
11120 +       if (channel_no) {
11121 +               sprintf(channel, "-c%d", channel_no);
11122 +               argv[arg] = channel;
11123 +       } else
11124 +               arg--;
11125 +
11126 +       if (debug) {
11127 +               argv[++arg] = toi_kzalloc(5, 8, TOI_ATOMIC_GFP);
11128 +               strcpy(argv[arg], "--debug");
11129 +       }
11130 +
11131 +       retval = call_usermodehelper(argv[0], argv, envp, wait);
11132 +
11133 +       /*
11134 +        * If the program reports an error, retval = 256. Don't complain
11135 +        * about that here.
11136 +        */
11137 +       if (retval && retval != 256)
11138 +               printk("Failed to launch userspace program '%s': Error %d\n",
11139 +                               command, retval);
11140 +
11141 +       {
11142 +               int i;
11143 +               for (i = 0; i < arg; i++)
11144 +                       if (argv[i] && argv[i] != channel)
11145 +                               toi_kfree(5, argv[i]);
11146 +       }
11147 +
11148 +       toi_kfree(4, channel);
11149 +
11150 +       return retval;
11151 +}
11152 +
11153 +/*
11154 + * This array contains entries that are automatically registered at
11155 + * boot. Modules and the console code register their own entries separately.
11156 + */
11157 +static struct toi_sysfs_data sysfs_params[] = {
11158 +       SYSFS_LONG("extra_pages_allowance", SYSFS_RW,
11159 +                       &extra_pd1_pages_allowance, 0, LONG_MAX, 0),
11160 +       SYSFS_CUSTOM("image_exists", SYSFS_RW, image_exists_read,
11161 +                       image_exists_write, SYSFS_NEEDS_SM_FOR_BOTH, NULL),
11162 +       SYSFS_STRING("resume", SYSFS_RW, resume_file, 255,
11163 +                       SYSFS_NEEDS_SM_FOR_WRITE,
11164 +                       attempt_to_parse_resume_device2),
11165 +       SYSFS_STRING("alt_resume_param", SYSFS_RW, alt_resume_param, 255,
11166 +                       SYSFS_NEEDS_SM_FOR_WRITE,
11167 +                       attempt_to_parse_alt_resume_param),
11168 +       SYSFS_CUSTOM("debug_info", SYSFS_READONLY, get_toi_debug_info, NULL, 0,
11169 +                       NULL),
11170 +       SYSFS_BIT("ignore_rootfs", SYSFS_RW, &toi_bkd.toi_action,
11171 +                       TOI_IGNORE_ROOTFS, 0),
11172 +       SYSFS_INT("image_size_limit", SYSFS_RW, &image_size_limit, -2,
11173 +                       INT_MAX, 0, NULL),
11174 +       SYSFS_UL("last_result", SYSFS_RW, &toi_result, 0, 0, 0),
11175 +       SYSFS_BIT("no_multithreaded_io", SYSFS_RW, &toi_bkd.toi_action,
11176 +                       TOI_NO_MULTITHREADED_IO, 0),
11177 +       SYSFS_BIT("no_flusher_thread", SYSFS_RW, &toi_bkd.toi_action,
11178 +                       TOI_NO_FLUSHER_THREAD, 0),
11179 +       SYSFS_BIT("full_pageset2", SYSFS_RW, &toi_bkd.toi_action,
11180 +                       TOI_PAGESET2_FULL, 0),
11181 +       SYSFS_BIT("reboot", SYSFS_RW, &toi_bkd.toi_action, TOI_REBOOT, 0),
11182 +       SYSFS_BIT("replace_swsusp", SYSFS_RW, &toi_bkd.toi_action,
11183 +                       TOI_REPLACE_SWSUSP, 0),
11184 +       SYSFS_STRING("resume_commandline", SYSFS_RW,
11185 +                       toi_bkd.toi_nosave_commandline, COMMAND_LINE_SIZE, 0,
11186 +                       NULL),
11187 +       SYSFS_STRING("version", SYSFS_READONLY, TOI_CORE_VERSION, 0, 0, NULL),
11188 +       SYSFS_BIT("no_load_direct", SYSFS_RW, &toi_bkd.toi_action,
11189 +                       TOI_NO_DIRECT_LOAD, 0),
11190 +       SYSFS_BIT("freezer_test", SYSFS_RW, &toi_bkd.toi_action,
11191 +                       TOI_FREEZER_TEST, 0),
11192 +       SYSFS_BIT("test_bio", SYSFS_RW, &toi_bkd.toi_action, TOI_TEST_BIO, 0),
11193 +       SYSFS_BIT("test_filter_speed", SYSFS_RW, &toi_bkd.toi_action,
11194 +                       TOI_TEST_FILTER_SPEED, 0),
11195 +       SYSFS_BIT("no_pageset2", SYSFS_RW, &toi_bkd.toi_action,
11196 +                       TOI_NO_PAGESET2, 0),
11197 +       SYSFS_BIT("no_pageset2_if_unneeded", SYSFS_RW, &toi_bkd.toi_action,
11198 +                       TOI_NO_PS2_IF_UNNEEDED, 0),
11199 +       SYSFS_BIT("late_cpu_hotplug", SYSFS_RW, &toi_bkd.toi_action,
11200 +                       TOI_LATE_CPU_HOTPLUG, 0),
11201 +       SYSFS_STRING("pre_hibernate_command", SYSFS_RW, pre_hibernate_command,
11202 +                       0, 255, NULL),
11203 +       SYSFS_STRING("post_hibernate_command", SYSFS_RW, post_hibernate_command,
11204 +                       0, 255, NULL),
11205 +#ifdef CONFIG_TOI_KEEP_IMAGE
11206 +       SYSFS_BIT("keep_image", SYSFS_RW , &toi_bkd.toi_action, TOI_KEEP_IMAGE,
11207 +                       0),
11208 +#endif
11209 +};
11210 +
11211 +static struct toi_core_fns my_fns = {
11212 +       .get_nonconflicting_page = __toi_get_nonconflicting_page,
11213 +       .post_context_save = __toi_post_context_save,
11214 +       .try_hibernate = _toi_try_hibernate,
11215 +       .try_resume = _toi_try_resume,
11216 +};
11217 +
11218 +/**
11219 + * core_load: Initialisation of TuxOnIce core.
11220 + *
11221 + * Initialise the core, beginning with sysfs. Checksum and so on are part of
11222 + * the core, but have their own initialisation routines because they either
11223 + * aren't compiled in all the time or have their own subdirectories.
11224 + */
11225 +static __init int core_load(void)
11226 +{
11227 +       int i,
11228 +           numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data);
11229 +
11230 +       printk(KERN_INFO "TuxOnIce " TOI_CORE_VERSION
11231 +                       " (http://tuxonice.net)\n");
11232 +       strncpy(pre_hibernate_command, CONFIG_TOI_DEFAULT_PRE_HIBERNATE, 255);
11233 +       strncpy(post_hibernate_command, CONFIG_TOI_DEFAULT_POST_HIBERNATE, 255);
11234 +
11235 +       if (toi_sysfs_init())
11236 +               return 1;
11237 +
11238 +       for (i = 0; i < numfiles; i++)
11239 +               toi_register_sysfs_file(tuxonice_kobj, &sysfs_params[i]);
11240 +
11241 +       toi_core_fns = &my_fns;
11242 +
11243 +       if (toi_alloc_init())
11244 +               return 1;
11245 +       if (toi_checksum_init())
11246 +               return 1;
11247 +       if (toi_usm_init())
11248 +               return 1;
11249 +       if (toi_ui_init())
11250 +               return 1;
11251 +       if (toi_poweroff_init())
11252 +               return 1;
11253 +       if (toi_cluster_init())
11254 +               return 1;
11255 +
11256 +       return 0;
11257 +}
11258 +
11259 +#ifdef MODULE
11260 +/**
11261 + * core_unload: Prepare to unload the core code.
11262 + */
11263 +static __exit void core_unload(void)
11264 +{
11265 +       int i,
11266 +           numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data);
11267 +
11268 +       toi_alloc_exit();
11269 +       toi_checksum_exit();
11270 +       toi_poweroff_exit();
11271 +       toi_ui_exit();
11272 +       toi_usm_exit();
11273 +       toi_cluster_exit();
11274 +
11275 +       for (i = 0; i < numfiles; i++)
11276 +               toi_unregister_sysfs_file(tuxonice_kobj, &sysfs_params[i]);
11277 +
11278 +       toi_core_fns = NULL;
11279 +
11280 +       toi_sysfs_exit();
11281 +}
11282 +MODULE_LICENSE("GPL");
11283 +module_init(core_load);
11284 +module_exit(core_unload);
11285 +#else
11286 +late_initcall(core_load);
11287 +#endif
11288 diff --git a/kernel/power/tuxonice_io.c b/kernel/power/tuxonice_io.c
11289 new file mode 100644
11290 index 0000000..2ea20bf
11291 --- /dev/null
11292 +++ b/kernel/power/tuxonice_io.c
11293 @@ -0,0 +1,1470 @@
11294 +/*
11295 + * kernel/power/tuxonice_io.c
11296 + *
11297 + * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
11298 + * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz>
11299 + * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr>
11300 + * Copyright (C) 2002-2008 Nigel Cunningham (nigel at tuxonice net)
11301 + *
11302 + * This file is released under the GPLv2.
11303 + *
11304 + * It contains high level IO routines for hibernating.
11305 + *
11306 + */
11307 +
11308 +#include <linux/suspend.h>
11309 +#include <linux/version.h>
11310 +#include <linux/utsname.h>
11311 +#include <linux/mount.h>
11312 +#include <linux/highmem.h>
11313 +#include <linux/module.h>
11314 +#include <linux/kthread.h>
11315 +#include <linux/cpu.h>
11316 +#include <asm/tlbflush.h>
11317 +
11318 +#include "tuxonice.h"
11319 +#include "tuxonice_modules.h"
11320 +#include "tuxonice_pageflags.h"
11321 +#include "tuxonice_io.h"
11322 +#include "tuxonice_ui.h"
11323 +#include "tuxonice_storage.h"
11324 +#include "tuxonice_prepare_image.h"
11325 +#include "tuxonice_extent.h"
11326 +#include "tuxonice_sysfs.h"
11327 +#include "tuxonice_builtin.h"
11328 +#include "tuxonice_checksum.h"
11329 +#include "tuxonice_alloc.h"
11330 +char alt_resume_param[256];
11331 +
11332 +/* Variables shared between threads and updated under the mutex */
11333 +static int io_write, io_finish_at, io_base, io_barmax, io_pageset, io_result;
11334 +static int io_index, io_nextupdate, io_pc, io_pc_step, first_to_finish;
11335 +static unsigned long pfn, other_pfn;
11336 +static DEFINE_MUTEX(io_mutex);
11337 +static DEFINE_PER_CPU(struct page *, last_sought);
11338 +static DEFINE_PER_CPU(struct page *, last_high_page);
11339 +static DEFINE_PER_CPU(char *, checksum_locn);
11340 +static DEFINE_PER_CPU(struct pbe *, last_low_page);
11341 +static atomic_t io_count;
11342 +atomic_t toi_io_workers;
11343 +EXPORT_SYMBOL_GPL(toi_io_workers);
11344 +
11345 +DECLARE_WAIT_QUEUE_HEAD(toi_io_queue_flusher);
11346 +EXPORT_SYMBOL_GPL(toi_io_queue_flusher);
11347 +
11348 +int toi_bio_queue_flusher_should_finish;
11349 +EXPORT_SYMBOL_GPL(toi_bio_queue_flusher_should_finish);
11350 +
11351 +/* Indicates that this thread should be used for checking throughput */
11352 +#define MONITOR ((void *) 1)
11353 +
11354 +/* toi_attempt_to_parse_resume_device
11355 + *
11356 + * Can we hibernate, using the current resume= parameter?
11357 + */
11358 +int toi_attempt_to_parse_resume_device(int quiet)
11359 +{
11360 +       struct list_head *Allocator;
11361 +       struct toi_module_ops *thisAllocator;
11362 +       int result, returning = 0;
11363 +
11364 +       if (toi_activate_storage(0))
11365 +               return 0;
11366 +
11367 +       toiActiveAllocator = NULL;
11368 +       clear_toi_state(TOI_RESUME_DEVICE_OK);
11369 +       clear_toi_state(TOI_CAN_RESUME);
11370 +       clear_result_state(TOI_ABORTED);
11371 +
11372 +       if (!toiNumAllocators) {
11373 +               if (!quiet)
11374 +                       printk(KERN_INFO "TuxOnIce: No storage allocators have "
11375 +                               "been registered. Hibernating will be "
11376 +                               "disabled.\n");
11377 +               goto cleanup;
11378 +       }
11379 +
11380 +       if (!resume_file[0]) {
11381 +               if (!quiet)
11382 +                       printk("TuxOnIce: Resume= parameter is empty."
11383 +                               " Hibernating will be disabled.\n");
11384 +               goto cleanup;
11385 +       }
11386 +
11387 +       list_for_each(Allocator, &toiAllocators) {
11388 +               thisAllocator = list_entry(Allocator, struct toi_module_ops,
11389 +                                                               type_list);
11390 +
11391 +               /*
11392 +                * Not sure why you'd want to disable an allocator, but
11393 +                * we should honour the flag if we're providing it
11394 +                */
11395 +               if (!thisAllocator->enabled)
11396 +                       continue;
11397 +
11398 +               result = thisAllocator->parse_sig_location(
11399 +                               resume_file, (toiNumAllocators == 1),
11400 +                               quiet);
11401 +
11402 +               switch (result) {
11403 +               case -EINVAL:
11404 +                       /* For this allocator, but not a valid
11405 +                        * configuration. Error already printed. */
11406 +                       goto cleanup;
11407 +
11408 +               case 0:
11409 +                       /* For this allocator and valid. */
11410 +                       toiActiveAllocator = thisAllocator;
11411 +
11412 +                       set_toi_state(TOI_RESUME_DEVICE_OK);
11413 +                       set_toi_state(TOI_CAN_RESUME);
11414 +                       returning = 1;
11415 +                       goto cleanup;
11416 +               }
11417 +       }
11418 +       if (!quiet)
11419 +               printk("TuxOnIce: No matching enabled allocator found. "
11420 +                               "Resuming disabled.\n");
11421 +cleanup:
11422 +       toi_deactivate_storage(0);
11423 +       return returning;
11424 +}
11425 +EXPORT_SYMBOL_GPL(toi_attempt_to_parse_resume_device);
11426 +
11427 +void attempt_to_parse_resume_device2(void)
11428 +{
11429 +       toi_prepare_usm();
11430 +       toi_attempt_to_parse_resume_device(0);
11431 +       toi_cleanup_usm();
11432 +}
11433 +EXPORT_SYMBOL_GPL(attempt_to_parse_resume_device2);
11434 +
11435 +void save_restore_alt_param(int replace, int quiet)
11436 +{
11437 +       static char resume_param_save[255];
11438 +       static unsigned long toi_state_save;
11439 +
11440 +       if (replace) {
11441 +               toi_state_save = toi_state;
11442 +               strcpy(resume_param_save, resume_file);
11443 +               strcpy(resume_file, alt_resume_param);
11444 +       } else {
11445 +               strcpy(resume_file, resume_param_save);
11446 +               toi_state = toi_state_save;
11447 +       }
11448 +       toi_attempt_to_parse_resume_device(quiet);
11449 +}
11450 +
11451 +void attempt_to_parse_alt_resume_param(void)
11452 +{
11453 +       int ok = 0;
11454 +
11455 +       /* Temporarily set resume_param to the poweroff value */
11456 +       if (!strlen(alt_resume_param))
11457 +               return;
11458 +
11459 +       printk("=== Trying Poweroff Resume2 ===\n");
11460 +       save_restore_alt_param(SAVE, NOQUIET);
11461 +       if (test_toi_state(TOI_CAN_RESUME))
11462 +               ok = 1;
11463 +
11464 +       printk(KERN_INFO "=== Done ===\n");
11465 +       save_restore_alt_param(RESTORE, QUIET);
11466 +
11467 +       /* If not ok, clear the string */
11468 +       if (ok)
11469 +               return;
11470 +
11471 +       printk(KERN_INFO "Can't resume from that location; clearing "
11472 +                       "alt_resume_param.\n");
11473 +       alt_resume_param[0] = '\0';
11474 +}
11475 +
11476 +/* noresume_reset_modules
11477 + *
11478 + * Description:        When we read the start of an image, modules (and especially the
11479 + *             active allocator) might need to reset data structures if we
11480 + *             decide to remove the image rather than resuming from it.
11481 + */
11482 +
11483 +static void noresume_reset_modules(void)
11484 +{
11485 +       struct toi_module_ops *this_filter;
11486 +
11487 +       list_for_each_entry(this_filter, &toi_filters, type_list)
11488 +               if (this_filter->noresume_reset)
11489 +                       this_filter->noresume_reset();
11490 +
11491 +       if (toiActiveAllocator && toiActiveAllocator->noresume_reset)
11492 +               toiActiveAllocator->noresume_reset();
11493 +}
11494 +
11495 +/* fill_toi_header()
11496 + *
11497 + * Description:        Fill the hibernate header structure.
11498 + * Arguments:  struct toi_header: Header data structure to be filled.
11499 + */
11500 +
11501 +static int fill_toi_header(struct toi_header *sh)
11502 +{
11503 +       int i, error;
11504 +
11505 +       error = init_swsusp_header((struct swsusp_info *) sh);
11506 +       if (error)
11507 +               return error;
11508 +
11509 +       sh->pagedir = pagedir1;
11510 +       sh->pageset_2_size = pagedir2.size;
11511 +       sh->param0 = toi_result;
11512 +       sh->param1 = toi_bkd.toi_action;
11513 +       sh->param2 = toi_bkd.toi_debug_state;
11514 +       sh->param3 = toi_bkd.toi_default_console_level;
11515 +       sh->root_fs = current->fs->root.mnt->mnt_sb->s_dev;
11516 +       for (i = 0; i < 4; i++)
11517 +               sh->io_time[i/2][i%2] = toi_bkd.toi_io_time[i/2][i%2];
11518 +       sh->bkd = boot_kernel_data_buffer;
11519 +       return 0;
11520 +}
11521 +
11522 +/*
11523 + * rw_init_modules
11524 + *
11525 + * Iterate over modules, preparing the ones that will be used to read or write
11526 + * data.
11527 + */
11528 +static int rw_init_modules(int rw, int which)
11529 +{
11530 +       struct toi_module_ops *this_module;
11531 +       /* Initialise page transformers */
11532 +       list_for_each_entry(this_module, &toi_filters, type_list) {
11533 +               if (!this_module->enabled)
11534 +                       continue;
11535 +               if (this_module->rw_init && this_module->rw_init(rw, which)) {
11536 +                       abort_hibernate(TOI_FAILED_MODULE_INIT,
11537 +                               "Failed to initialise the %s filter.",
11538 +                               this_module->name);
11539 +                       return 1;
11540 +               }
11541 +       }
11542 +
11543 +       /* Initialise allocator */
11544 +       if (toiActiveAllocator->rw_init(rw, which)) {
11545 +               abort_hibernate(TOI_FAILED_MODULE_INIT,
11546 +                               "Failed to initialise the allocator.");
11547 +               return 1;
11548 +       }
11549 +
11550 +       /* Initialise other modules */
11551 +       list_for_each_entry(this_module, &toi_modules, module_list) {
11552 +               if (!this_module->enabled ||
11553 +                   this_module->type == FILTER_MODULE ||
11554 +                   this_module->type == WRITER_MODULE)
11555 +                       continue;
11556 +               if (this_module->rw_init && this_module->rw_init(rw, which)) {
11557 +                       set_abort_result(TOI_FAILED_MODULE_INIT);
11558 +                       printk(KERN_INFO "Setting aborted flag due to module "
11559 +                                       "init failure.\n");
11560 +                       return 1;
11561 +               }
11562 +       }
11563 +
11564 +       return 0;
11565 +}
11566 +
11567 +/*
11568 + * rw_cleanup_modules
11569 + *
11570 + * Cleanup components after reading or writing a set of pages.
11571 + * Only the allocator may fail.
11572 + */
11573 +static int rw_cleanup_modules(int rw)
11574 +{
11575 +       struct toi_module_ops *this_module;
11576 +       int result = 0;
11577 +
11578 +       /* Cleanup other modules */
11579 +       list_for_each_entry(this_module, &toi_modules, module_list) {
11580 +               if (!this_module->enabled ||
11581 +                   this_module->type == FILTER_MODULE ||
11582 +                   this_module->type == WRITER_MODULE)
11583 +                       continue;
11584 +               if (this_module->rw_cleanup)
11585 +                       result |= this_module->rw_cleanup(rw);
11586 +       }
11587 +
11588 +       /* Flush data and cleanup */
11589 +       list_for_each_entry(this_module, &toi_filters, type_list) {
11590 +               if (!this_module->enabled)
11591 +                       continue;
11592 +               if (this_module->rw_cleanup)
11593 +                       result |= this_module->rw_cleanup(rw);
11594 +       }
11595 +
11596 +       result |= toiActiveAllocator->rw_cleanup(rw);
11597 +
11598 +       return result;
11599 +}
11600 +
11601 +static struct page *copy_page_from_orig_page(struct page *orig_page)
11602 +{
11603 +       int is_high = PageHighMem(orig_page), index, min, max;
11604 +       struct page *high_page = NULL,
11605 +                   **my_last_high_page = &__get_cpu_var(last_high_page),
11606 +                   **my_last_sought = &__get_cpu_var(last_sought);
11607 +       struct pbe *this, **my_last_low_page = &__get_cpu_var(last_low_page);
11608 +       void *compare;
11609 +
11610 +       if (is_high) {
11611 +               if (*my_last_sought && *my_last_high_page &&
11612 +                               *my_last_sought < orig_page)
11613 +                       high_page = *my_last_high_page;
11614 +               else
11615 +                       high_page = (struct page *) restore_highmem_pblist;
11616 +               this = (struct pbe *) kmap(high_page);
11617 +               compare = orig_page;
11618 +       } else {
11619 +               if (*my_last_sought && *my_last_low_page &&
11620 +                               *my_last_sought < orig_page)
11621 +                       this = *my_last_low_page;
11622 +               else
11623 +                       this = restore_pblist;
11624 +               compare = page_address(orig_page);
11625 +       }
11626 +
11627 +       *my_last_sought = orig_page;
11628 +
11629 +       /* Locate page containing pbe */
11630 +       while (this[PBES_PER_PAGE - 1].next &&
11631 +                       this[PBES_PER_PAGE - 1].orig_address < compare) {
11632 +               if (is_high) {
11633 +                       struct page *next_high_page = (struct page *)
11634 +                               this[PBES_PER_PAGE - 1].next;
11635 +                       kunmap(high_page);
11636 +                       this = kmap(next_high_page);
11637 +                       high_page = next_high_page;
11638 +               } else
11639 +                       this = this[PBES_PER_PAGE - 1].next;
11640 +       }
11641 +
11642 +       /* Do a binary search within the page */
11643 +       min = 0;
11644 +       max = PBES_PER_PAGE;
11645 +       index = PBES_PER_PAGE / 2;
11646 +       while (max - min) {
11647 +               if (!this[index].orig_address ||
11648 +                   this[index].orig_address > compare)
11649 +                       max = index;
11650 +               else if (this[index].orig_address == compare) {
11651 +                       if (is_high) {
11652 +                               struct page *page = this[index].address;
11653 +                               *my_last_high_page = high_page;
11654 +                               kunmap(high_page);
11655 +                               return page;
11656 +                       }
11657 +                       *my_last_low_page = this;
11658 +                       return virt_to_page(this[index].address);
11659 +               } else
11660 +                       min = index;
11661 +               index = ((max + min) / 2);
11662 +       };
11663 +
11664 +       if (is_high)
11665 +               kunmap(high_page);
11666 +
11667 +       abort_hibernate(TOI_FAILED_IO, "Failed to get destination page for"
11668 +               " orig page %p. This[min].orig_address=%p.\n", orig_page,
11669 +               this[index].orig_address);
11670 +       return NULL;
11671 +}
11672 +
11673 +/*
11674 + * do_rw_loop
11675 + *
11676 + * The main I/O loop for reading or writing pages.
11677 + */
11678 +static int worker_rw_loop(void *data)
11679 +{
11680 +       unsigned long orig_pfn, write_pfn, next_jiffies = jiffies + HZ / 10, jif_index = 1;
11681 +       int result, my_io_index = 0, temp, last_worker, i_finished_first = 0;
11682 +       struct toi_module_ops *first_filter = toi_get_next_filter(NULL);
11683 +       struct page *buffer = toi_alloc_page(28, TOI_ATOMIC_GFP);
11684 +
11685 +       atomic_inc(&toi_io_workers);
11686 +       mutex_lock(&io_mutex);
11687 +
11688 +       do {
11689 +               unsigned int buf_size;
11690 +
11691 +               if (data && jiffies > next_jiffies) {
11692 +                       next_jiffies += HZ / 10;
11693 +                       if (toiActiveAllocator->update_throughput_throttle)
11694 +                               toiActiveAllocator->update_throughput_throttle(jif_index);
11695 +                       jif_index++;
11696 +               }
11697 +
11698 +               /*
11699 +                * What page to use? If reading, don't know yet which page's
11700 +                * data will be read, so always use the buffer. If writing,
11701 +                * use the copy (Pageset1) or original page (Pageset2), but
11702 +                * always write the pfn of the original page.
11703 +                */
11704 +               if (io_write) {
11705 +                       struct page *page;
11706 +                       char **my_checksum_locn = &__get_cpu_var(checksum_locn);
11707 +
11708 +                       pfn = memory_bm_next_pfn(&io_map);
11709 +
11710 +                       /* Another thread could have beaten us to it. */
11711 +                       if (pfn == max_pfn + 1) {
11712 +                               if (atomic_read(&io_count)) {
11713 +                                       printk("Ran out of pfns but io_count "
11714 +                                               "is still %d.\n",
11715 +                                               atomic_read(&io_count));
11716 +                                       BUG();
11717 +                               }
11718 +                               break;
11719 +                       }
11720 +
11721 +                       my_io_index = io_finish_at -
11722 +                               atomic_sub_return(1, &io_count);
11723 +
11724 +                       orig_pfn = pfn;
11725 +                       write_pfn = pfn;
11726 +
11727 +                       /*
11728 +                        * Other_pfn is updated by all threads, so we're not
11729 +                        * writing the same page multiple times.
11730 +                        */
11731 +                       memory_bm_clear_bit(&io_map, pfn);
11732 +                       if (io_pageset == 1) {
11733 +                               other_pfn = memory_bm_next_pfn(&pageset1_map);
11734 +                               write_pfn = other_pfn;
11735 +                       }
11736 +                       page = pfn_to_page(pfn);
11737 +
11738 +                       if (io_pageset == 2)
11739 +                               *my_checksum_locn =
11740 +                                       tuxonice_get_next_checksum();
11741 +
11742 +                       mutex_unlock(&io_mutex);
11743 +
11744 +                       if (io_pageset == 2 &&
11745 +                           tuxonice_calc_checksum(page, *my_checksum_locn))
11746 +                                       return 1;
11747 +
11748 +                       result = first_filter->write_page(write_pfn, page,
11749 +                                       PAGE_SIZE);
11750 +               } else {
11751 +                       my_io_index = io_finish_at -
11752 +                               atomic_sub_return(1, &io_count);
11753 +                       mutex_unlock(&io_mutex);
11754 +
11755 +                       /*
11756 +                        * Are we aborting? If so, don't submit any more I/O as
11757 +                        * resetting the resume_attempted flag (from ui.c) will
11758 +                        * clear the bdev flags, making this thread oops.
11759 +                        */
11760 +                       if (unlikely(test_toi_state(TOI_STOP_RESUME))) {
11761 +                               atomic_dec(&toi_io_workers);
11762 +                               if (!atomic_read(&toi_io_workers))
11763 +                                       set_toi_state(TOI_IO_STOPPED);
11764 +                               while (1)
11765 +                                       schedule();
11766 +                       }
11767 +
11768 +                       result = first_filter->read_page(&write_pfn, buffer,
11769 +                                       &buf_size);
11770 +                       if (buf_size != PAGE_SIZE) {
11771 +                               abort_hibernate(TOI_FAILED_IO,
11772 +                                       "I/O pipeline returned %d bytes instead"
11773 +                                       " of %ud.\n", buf_size, PAGE_SIZE);
11774 +                               mutex_lock(&io_mutex);
11775 +                               break;
11776 +                       }
11777 +               }
11778 +
11779 +               if (result) {
11780 +                       io_result = result;
11781 +                       if (io_write) {
11782 +                               printk(KERN_INFO "Write chunk returned %d.\n",
11783 +                                               result);
11784 +                               abort_hibernate(TOI_FAILED_IO,
11785 +                                       "Failed to write a chunk of the "
11786 +                                       "image.");
11787 +                               mutex_lock(&io_mutex);
11788 +                               break;
11789 +                       }
11790 +                       panic("Read chunk returned (%d)", result);
11791 +               }
11792 +
11793 +               /*
11794 +                * Discard reads of resaved pages while reading ps2
11795 +                * and unwanted pages while rereading ps2 when aborting.
11796 +                */
11797 +               if (!io_write && !PageResave(pfn_to_page(write_pfn))) {
11798 +                       struct page *final_page = pfn_to_page(write_pfn),
11799 +                                   *copy_page = final_page;
11800 +                       char *virt, *buffer_virt;
11801 +
11802 +                       if (io_pageset == 1 && !load_direct(final_page)) {
11803 +                               copy_page =
11804 +                                       copy_page_from_orig_page(final_page);
11805 +                               BUG_ON(!copy_page);
11806 +                       }
11807 +
11808 +                       if (memory_bm_test_bit(&io_map, write_pfn)) {
11809 +                               virt = kmap(copy_page);
11810 +                               buffer_virt = kmap(buffer);
11811 +                               memcpy(virt, buffer_virt, PAGE_SIZE);
11812 +                               kunmap(copy_page);
11813 +                               kunmap(buffer);
11814 +                               memory_bm_clear_bit(&io_map, write_pfn);
11815 +                       } else {
11816 +                               mutex_lock(&io_mutex);
11817 +                               atomic_inc(&io_count);
11818 +                               mutex_unlock(&io_mutex);
11819 +                       }
11820 +               }
11821 +
11822 +               temp = my_io_index + io_base - io_nextupdate;
11823 +
11824 +               if (my_io_index + io_base == io_nextupdate)
11825 +                       io_nextupdate = toi_update_status(my_io_index +
11826 +                               io_base, io_barmax, " %d/%d MB ",
11827 +                               MB(io_base+my_io_index+1), MB(io_barmax));
11828 +
11829 +               if (my_io_index == io_pc) {
11830 +                       printk("%s%d%%...", io_pc_step == 1 ? KERN_ERR : "",
11831 +                                       20 * io_pc_step);
11832 +                       io_pc_step++;
11833 +                       io_pc = io_finish_at * io_pc_step / 5;
11834 +               }
11835 +
11836 +               toi_cond_pause(0, NULL);
11837 +
11838 +               /*
11839 +                * Subtle: If there's less I/O still to be done than threads
11840 +                * running, quit. This stops us doing I/O beyond the end of
11841 +                * the image when reading.
11842 +                *
11843 +                * Possible race condition. Two threads could do the test at
11844 +                * the same time; one should exit and one should continue.
11845 +                * Therefore we take the mutex before comparing and exiting.
11846 +                */
11847 +
11848 +               mutex_lock(&io_mutex);
11849 +
11850 +       } while (atomic_read(&io_count) >= atomic_read(&toi_io_workers) &&
11851 +               !(io_write && test_result_state(TOI_ABORTED)));
11852 +
11853 +       last_worker = atomic_dec_and_test(&toi_io_workers);
11854 +       if (!first_to_finish) {
11855 +               first_to_finish = 1;
11856 +               i_finished_first = 1;
11857 +       }
11858 +       mutex_unlock(&io_mutex);
11859 +
11860 +       if (last_worker) {
11861 +               toi_bio_queue_flusher_should_finish = 1;
11862 +               wake_up(&toi_io_queue_flusher);
11863 +               toiActiveAllocator->finish_all_io();
11864 +       } else {
11865 +               /* Yes, there's still I/O above, but it's the last
11866 +                * pages being submitted, so switch to displaying
11867 +                * how much I/O we're waiting on.
11868 +                */
11869 +               if (i_finished_first &&
11870 +                   toiActiveAllocator->monitor_outstanding_io)
11871 +                       toiActiveAllocator->monitor_outstanding_io();
11872 +       }
11873 +
11874 +       toi__free_page(28, buffer);
11875 +
11876 +       return 0;
11877 +}
11878 +
11879 +static int start_other_threads(void)
11880 +{
11881 +       int cpu, num_started = 0;
11882 +       struct task_struct *p;
11883 +
11884 +       for_each_online_cpu(cpu) {
11885 +               if (cpu == smp_processor_id())
11886 +                       continue;
11887 +
11888 +               p = kthread_create(worker_rw_loop, num_started ? NULL : MONITOR,
11889 +                               "ks2io/%d", cpu);
11890 +               if (IS_ERR(p)) {
11891 +                       printk("ks2io for %i failed\n", cpu);
11892 +                       continue;
11893 +               }
11894 +               kthread_bind(p, cpu);
11895 +               p->flags |= PF_MEMALLOC;
11896 +               wake_up_process(p);
11897 +               num_started++;
11898 +       }
11899 +
11900 +       return num_started;
11901 +}
11902 +
11903 +/*
11904 + * do_rw_loop
11905 + *
11906 + * The main I/O loop for reading or writing pages.
11907 + */
11908 +static int do_rw_loop(int write, int finish_at, struct memory_bitmap *pageflags,
11909 +               int base, int barmax, int pageset)
11910 +{
11911 +       int index = 0, cpu, num_other_threads = 0;
11912 +
11913 +       if (!finish_at)
11914 +               return 0;
11915 +
11916 +       io_write = write;
11917 +       io_finish_at = finish_at;
11918 +       io_base = base;
11919 +       io_barmax = barmax;
11920 +       io_pageset = pageset;
11921 +       io_index = 0;
11922 +       io_pc = io_finish_at / 5;
11923 +       io_pc_step = 1;
11924 +       io_result = 0;
11925 +       io_nextupdate = base + 1;
11926 +       toi_bio_queue_flusher_should_finish = 0;
11927 +       first_to_finish = 0;
11928 +
11929 +       for_each_online_cpu(cpu) {
11930 +               per_cpu(last_sought, cpu) = NULL;
11931 +               per_cpu(last_low_page, cpu) = NULL;
11932 +               per_cpu(last_high_page, cpu) = NULL;
11933 +       }
11934 +
11935 +       /* Ensure all bits clear */
11936 +       memory_bm_clear(&io_map);
11937 +
11938 +       /* Set the bits for the pages to write */
11939 +       memory_bm_position_reset(pageflags);
11940 +
11941 +       pfn = memory_bm_next_pfn(pageflags);
11942 +
11943 +       while (pfn < max_pfn + 1 && index < finish_at) {
11944 +               memory_bm_set_bit(&io_map, pfn);
11945 +               pfn = memory_bm_next_pfn(pageflags);
11946 +               index++;
11947 +       }
11948 +
11949 +       BUG_ON(index < finish_at);
11950 +
11951 +       atomic_set(&io_count, finish_at);
11952 +
11953 +       pfn = max_pfn + 1;
11954 +       other_pfn = pfn;
11955 +
11956 +       memory_bm_position_reset(&pageset1_map);
11957 +
11958 +       clear_toi_state(TOI_IO_STOPPED);
11959 +       memory_bm_position_reset(&io_map);
11960 +
11961 +       if (!test_action_state(TOI_NO_MULTITHREADED_IO))
11962 +               num_other_threads = start_other_threads();
11963 +
11964 +       if (!num_other_threads || !toiActiveAllocator->io_flusher ||
11965 +               test_action_state(TOI_NO_FLUSHER_THREAD))
11966 +               worker_rw_loop(num_other_threads ? NULL : MONITOR);
11967 +       else
11968 +               toiActiveAllocator->io_flusher(write);
11969 +
11970 +       while (atomic_read(&toi_io_workers))
11971 +               schedule();
11972 +
11973 +       set_toi_state(TOI_IO_STOPPED);
11974 +       if (unlikely(test_toi_state(TOI_STOP_RESUME))) {
11975 +               while (1)
11976 +                       schedule();
11977 +       }
11978 +
11979 +       if (!io_result) {
11980 +               printk("done.\n");
11981 +
11982 +               toi_update_status(io_base + io_finish_at, io_barmax,
11983 +                               " %d/%d MB ",
11984 +                               MB(io_base + io_finish_at), MB(io_barmax));
11985 +       }
11986 +
11987 +       if (io_write && test_result_state(TOI_ABORTED))
11988 +               io_result = 1;
11989 +       else { /* All I/O done? */
11990 +               if  (memory_bm_next_pfn(&io_map) != BM_END_OF_MAP) {
11991 +                       printk(KERN_INFO "Finished I/O loop but still work to "
11992 +                                       "do?\nFinish at = %d. io_count = %d.\n",
11993 +                                       finish_at, atomic_read(&io_count));
11994 +                       BUG();
11995 +               }
11996 +       }
11997 +
11998 +       return io_result;
11999 +}
12000 +
12001 +/* write_pageset()
12002 + *
12003 + * Description:        Write a pageset to disk.
12004 + * Arguments:  pagedir:        Which pagedir to write..
12005 + * Returns:    Zero on success or -1 on failure.
12006 + */
12007 +
12008 +int write_pageset(struct pagedir *pagedir)
12009 +{
12010 +       int finish_at, base = 0, start_time, end_time;
12011 +       int barmax = pagedir1.size + pagedir2.size;
12012 +       long error = 0;
12013 +       struct memory_bitmap *pageflags;
12014 +
12015 +       /*
12016 +        * Even if there is nothing to read or write, the allocator
12017 +        * may need the init/cleanup for it's housekeeping.  (eg:
12018 +        * Pageset1 may start where pageset2 ends when writing).
12019 +        */
12020 +       finish_at = pagedir->size;
12021 +
12022 +       if (pagedir->id == 1) {
12023 +               toi_prepare_status(DONT_CLEAR_BAR,
12024 +                               "Writing kernel & process data...");
12025 +               base = pagedir2.size;
12026 +               if (test_action_state(TOI_TEST_FILTER_SPEED) ||
12027 +                   test_action_state(TOI_TEST_BIO))
12028 +                       pageflags = &pageset1_map;
12029 +               else
12030 +                       pageflags = &pageset1_copy_map;
12031 +       } else {
12032 +               toi_prepare_status(DONT_CLEAR_BAR, "Writing caches...");
12033 +               pageflags = &pageset2_map;
12034 +       }
12035 +
12036 +       start_time = jiffies;
12037 +
12038 +       if (rw_init_modules(1, pagedir->id)) {
12039 +               abort_hibernate(TOI_FAILED_MODULE_INIT,
12040 +                               "Failed to initialise modules for writing.");
12041 +               error = 1;
12042 +       }
12043 +
12044 +       if (!error)
12045 +               error = do_rw_loop(1, finish_at, pageflags, base, barmax,
12046 +                               pagedir->id);
12047 +
12048 +       if (rw_cleanup_modules(WRITE) && !error) {
12049 +               abort_hibernate(TOI_FAILED_MODULE_CLEANUP,
12050 +                               "Failed to cleanup after writing.");
12051 +               error = 1;
12052 +       }
12053 +
12054 +       end_time = jiffies;
12055 +
12056 +       if ((end_time - start_time) && (!test_result_state(TOI_ABORTED))) {
12057 +               toi_bkd.toi_io_time[0][0] += finish_at,
12058 +               toi_bkd.toi_io_time[0][1] += (end_time - start_time);
12059 +       }
12060 +
12061 +       return error;
12062 +}
12063 +
12064 +/* read_pageset()
12065 + *
12066 + * Description:        Read a pageset from disk.
12067 + * Arguments:  whichtowrite:   Controls what debugging output is printed.
12068 + *             overwrittenpagesonly: Whether to read the whole pageset or
12069 + *             only part.
12070 + * Returns:    Zero on success or -1 on failure.
12071 + */
12072 +
12073 +static int read_pageset(struct pagedir *pagedir, int overwrittenpagesonly)
12074 +{
12075 +       int result = 0, base = 0, start_time, end_time;
12076 +       int finish_at = pagedir->size;
12077 +       int barmax = pagedir1.size + pagedir2.size;
12078 +       struct memory_bitmap *pageflags;
12079 +
12080 +       if (pagedir->id == 1) {
12081 +               toi_prepare_status(DONT_CLEAR_BAR,
12082 +                               "Reading kernel & process data...");
12083 +               pageflags = &pageset1_map;
12084 +       } else {
12085 +               toi_prepare_status(DONT_CLEAR_BAR, "Reading caches...");
12086 +               if (overwrittenpagesonly)
12087 +                       barmax = finish_at = min(pagedir1.size,
12088 +                                                pagedir2.size);
12089 +               else
12090 +                       base = pagedir1.size;
12091 +               pageflags = &pageset2_map;
12092 +       }
12093 +
12094 +       start_time = jiffies;
12095 +
12096 +       if (rw_init_modules(0, pagedir->id)) {
12097 +               toiActiveAllocator->remove_image();
12098 +               result = 1;
12099 +       } else
12100 +               result = do_rw_loop(0, finish_at, pageflags, base, barmax,
12101 +                               pagedir->id);
12102 +
12103 +       if (rw_cleanup_modules(READ) && !result) {
12104 +               abort_hibernate(TOI_FAILED_MODULE_CLEANUP,
12105 +                               "Failed to cleanup after reading.");
12106 +               result = 1;
12107 +       }
12108 +
12109 +       /* Statistics */
12110 +       end_time = jiffies;
12111 +
12112 +       if ((end_time - start_time) && (!test_result_state(TOI_ABORTED))) {
12113 +               toi_bkd.toi_io_time[1][0] += finish_at,
12114 +               toi_bkd.toi_io_time[1][1] += (end_time - start_time);
12115 +       }
12116 +
12117 +       return result;
12118 +}
12119 +
12120 +/* write_module_configs()
12121 + *
12122 + * Description:        Store the configuration for each module in the image header.
12123 + * Returns:    Int: Zero on success, Error value otherwise.
12124 + */
12125 +static int write_module_configs(void)
12126 +{
12127 +       struct toi_module_ops *this_module;
12128 +       char *buffer = (char *) toi_get_zeroed_page(22, TOI_ATOMIC_GFP);
12129 +       int len, index = 1;
12130 +       struct toi_module_header toi_module_header;
12131 +
12132 +       if (!buffer) {
12133 +               printk(KERN_INFO "Failed to allocate a buffer for saving "
12134 +                               "module configuration info.\n");
12135 +               return -ENOMEM;
12136 +       }
12137 +
12138 +       /*
12139 +        * We have to know which data goes with which module, so we at
12140 +        * least write a length of zero for a module. Note that we are
12141 +        * also assuming every module's config data takes <= PAGE_SIZE.
12142 +        */
12143 +
12144 +       /* For each module (in registration order) */
12145 +       list_for_each_entry(this_module, &toi_modules, module_list) {
12146 +               if (!this_module->enabled || !this_module->storage_needed ||
12147 +                   (this_module->type == WRITER_MODULE &&
12148 +                    toiActiveAllocator != this_module))
12149 +                       continue;
12150 +
12151 +               /* Get the data from the module */
12152 +               len = 0;
12153 +               if (this_module->save_config_info)
12154 +                       len = this_module->save_config_info(buffer);
12155 +
12156 +               /* Save the details of the module */
12157 +               toi_module_header.enabled = this_module->enabled;
12158 +               toi_module_header.type = this_module->type;
12159 +               toi_module_header.index = index++;
12160 +               strncpy(toi_module_header.name, this_module->name,
12161 +                                       sizeof(toi_module_header.name));
12162 +               toiActiveAllocator->rw_header_chunk(WRITE,
12163 +                               this_module,
12164 +                               (char *) &toi_module_header,
12165 +                               sizeof(toi_module_header));
12166 +
12167 +               /* Save the size of the data and any data returned */
12168 +               toiActiveAllocator->rw_header_chunk(WRITE,
12169 +                               this_module,
12170 +                               (char *) &len, sizeof(int));
12171 +               if (len)
12172 +                       toiActiveAllocator->rw_header_chunk(
12173 +                               WRITE, this_module, buffer, len);
12174 +       }
12175 +
12176 +       /* Write a blank header to terminate the list */
12177 +       toi_module_header.name[0] = '\0';
12178 +       toiActiveAllocator->rw_header_chunk(WRITE, NULL,
12179 +                       (char *) &toi_module_header, sizeof(toi_module_header));
12180 +
12181 +       toi_free_page(22, (unsigned long) buffer);
12182 +       return 0;
12183 +}
12184 +
12185 +/* read_one_module_config()
12186 + *
12187 + * Description: Read the configuration for one module, and configure the module
12188 + *             to match if it is loaded.
12189 + * Returns:    Int. Zero on success or an error code.
12190 + */
12191 +
12192 +static int read_one_module_config(struct toi_module_header *header)
12193 +{
12194 +       struct toi_module_ops *this_module;
12195 +       int result, len;
12196 +       char *buffer;
12197 +
12198 +       /* Find the module */
12199 +       this_module = toi_find_module_given_name(header->name);
12200 +
12201 +       if (!this_module) {
12202 +               if (header->enabled) {
12203 +                       toi_early_boot_message(1, TOI_CONTINUE_REQ,
12204 +                               "It looks like we need module %s for reading "
12205 +                               "the image but it hasn't been registered.\n",
12206 +                               header->name);
12207 +                       if (!(test_toi_state(TOI_CONTINUE_REQ)))
12208 +                               return -EINVAL;
12209 +               } else
12210 +                       printk(KERN_INFO "Module %s configuration data found, "
12211 +                               "but the module hasn't registered. Looks like "
12212 +                               "it was disabled, so we're ignoring its data.",
12213 +                               header->name);
12214 +       }
12215 +
12216 +       /* Get the length of the data (if any) */
12217 +       result = toiActiveAllocator->rw_header_chunk(READ, NULL, (char *) &len,
12218 +                       sizeof(int));
12219 +       if (result) {
12220 +               printk("Failed to read the length of the module %s's"
12221 +                               " configuration data.\n",
12222 +                               header->name);
12223 +               return -EINVAL;
12224 +       }
12225 +
12226 +       /* Read any data and pass to the module (if we found one) */
12227 +       if (!len)
12228 +               return 0;
12229 +
12230 +       buffer = (char *) toi_get_zeroed_page(23, TOI_ATOMIC_GFP);
12231 +
12232 +       if (!buffer) {
12233 +               printk("Failed to allocate a buffer for reloading module "
12234 +                               "configuration info.\n");
12235 +               return -ENOMEM;
12236 +       }
12237 +
12238 +       toiActiveAllocator->rw_header_chunk(READ, NULL, buffer, len);
12239 +
12240 +       if (!this_module)
12241 +               goto out;
12242 +
12243 +       if (!this_module->save_config_info)
12244 +               printk("Huh? Module %s appears to have a save_config_info, but"
12245 +                               " not a load_config_info function!\n",
12246 +                               this_module->name);
12247 +       else
12248 +               this_module->load_config_info(buffer, len);
12249 +
12250 +       /*
12251 +        * Now move this module to the tail of its lists. This will put it in
12252 +        * order. Any new modules will end up at the top of the lists. They
12253 +        * should have been set to disabled when loaded (people will
12254 +        * normally not edit an initrd to load a new module and then hibernate
12255 +        * without using it!).
12256 +        */
12257 +
12258 +       toi_move_module_tail(this_module);
12259 +
12260 +       this_module->enabled = header->enabled;
12261 +
12262 +out:
12263 +       toi_free_page(23, (unsigned long) buffer);
12264 +       return 0;
12265 +}
12266 +
12267 +/* read_module_configs()
12268 + *
12269 + * Description:        Reload module configurations from the image header.
12270 + * Returns:    Int. Zero on success, error value otherwise.
12271 + */
12272 +
12273 +static int read_module_configs(void)
12274 +{
12275 +       int result = 0;
12276 +       struct toi_module_header toi_module_header;
12277 +       struct toi_module_ops *this_module;
12278 +
12279 +       /* All modules are initially disabled. That way, if we have a module
12280 +        * loaded now that wasn't loaded when we hibernated, it won't be used
12281 +        * in trying to read the data.
12282 +        */
12283 +       list_for_each_entry(this_module, &toi_modules, module_list)
12284 +               this_module->enabled = 0;
12285 +
12286 +       /* Get the first module header */
12287 +       result = toiActiveAllocator->rw_header_chunk(READ, NULL,
12288 +                       (char *) &toi_module_header,
12289 +                       sizeof(toi_module_header));
12290 +       if (result) {
12291 +               printk("Failed to read the next module header.\n");
12292 +               return -EINVAL;
12293 +       }
12294 +
12295 +       /* For each module (in registration order) */
12296 +       while (toi_module_header.name[0]) {
12297 +               result = read_one_module_config(&toi_module_header);
12298 +
12299 +               if (result)
12300 +                       return -EINVAL;
12301 +
12302 +               /* Get the next module header */
12303 +               result = toiActiveAllocator->rw_header_chunk(READ, NULL,
12304 +                               (char *) &toi_module_header,
12305 +                               sizeof(toi_module_header));
12306 +
12307 +               if (result) {
12308 +                       printk("Failed to read the next module header.\n");
12309 +                       return -EINVAL;
12310 +               }
12311 +       }
12312 +
12313 +       return 0;
12314 +}
12315 +
12316 +/* write_image_header()
12317 + *
12318 + * Description:        Write the image header after write the image proper.
12319 + * Returns:    Int. Zero on success or -1 on failure.
12320 + */
12321 +
12322 +int write_image_header(void)
12323 +{
12324 +       int ret;
12325 +       int total = pagedir1.size + pagedir2.size+2;
12326 +       char *header_buffer = NULL;
12327 +
12328 +       /* Now prepare to write the header */
12329 +       ret = toiActiveAllocator->write_header_init();
12330 +       if (ret) {
12331 +               abort_hibernate(TOI_FAILED_MODULE_INIT,
12332 +                               "Active allocator's write_header_init"
12333 +                               " function failed.");
12334 +               goto write_image_header_abort;
12335 +       }
12336 +
12337 +       /* Get a buffer */
12338 +       header_buffer = (char *) toi_get_zeroed_page(24, TOI_ATOMIC_GFP);
12339 +       if (!header_buffer) {
12340 +               abort_hibernate(TOI_OUT_OF_MEMORY,
12341 +                       "Out of memory when trying to get page for header!");
12342 +               goto write_image_header_abort;
12343 +       }
12344 +
12345 +       /* Write hibernate header */
12346 +       if (fill_toi_header((struct toi_header *) header_buffer)) {
12347 +               abort_hibernate(TOI_OUT_OF_MEMORY,
12348 +                       "Failure to fill header information!");
12349 +               goto write_image_header_abort;
12350 +       }
12351 +       toiActiveAllocator->rw_header_chunk(WRITE, NULL,
12352 +                       header_buffer, sizeof(struct toi_header));
12353 +
12354 +       toi_free_page(24, (unsigned long) header_buffer);
12355 +
12356 +       /* Write module configurations */
12357 +       ret = write_module_configs();
12358 +       if (ret) {
12359 +               abort_hibernate(TOI_FAILED_IO,
12360 +                               "Failed to write module configs.");
12361 +               goto write_image_header_abort;
12362 +       }
12363 +
12364 +       memory_bm_write(&pageset1_map, toiActiveAllocator->rw_header_chunk);
12365 +
12366 +       /* Flush data and let allocator cleanup */
12367 +       if (toiActiveAllocator->write_header_cleanup()) {
12368 +               abort_hibernate(TOI_FAILED_IO,
12369 +                               "Failed to cleanup writing header.");
12370 +               goto write_image_header_abort_no_cleanup;
12371 +       }
12372 +
12373 +       if (test_result_state(TOI_ABORTED))
12374 +               goto write_image_header_abort_no_cleanup;
12375 +
12376 +       toi_update_status(total, total, NULL);
12377 +
12378 +       return 0;
12379 +
12380 +write_image_header_abort:
12381 +       toiActiveAllocator->write_header_cleanup();
12382 +write_image_header_abort_no_cleanup:
12383 +       return -1;
12384 +}
12385 +
12386 +/* sanity_check()
12387 + *
12388 + * Description:        Perform a few checks, seeking to ensure that the kernel being
12389 + *             booted matches the one hibernated. They need to match so we can
12390 + *             be _sure_ things will work. It is not absolutely impossible for
12391 + *             resuming from a different kernel to work, just not assured.
12392 + * Arguments:  Struct toi_header. The header which was saved at hibernate
12393 + *             time.
12394 + */
12395 +static char *sanity_check(struct toi_header *sh)
12396 +{
12397 +       char *reason = check_swsusp_image_kernel((struct swsusp_info *) sh);
12398 +
12399 +       if (reason)
12400 +               return reason;
12401 +
12402 +       if (!test_action_state(TOI_IGNORE_ROOTFS)) {
12403 +               const struct super_block *sb;
12404 +               list_for_each_entry(sb, &super_blocks, s_list) {
12405 +                       if ((!(sb->s_flags & MS_RDONLY)) &&
12406 +                           (sb->s_type->fs_flags & FS_REQUIRES_DEV))
12407 +                               return "Device backed fs has been mounted "
12408 +                                       "rw prior to resume or initrd/ramfs "
12409 +                                       "is mounted rw.";
12410 +               }
12411 +       }
12412 +
12413 +       return NULL;
12414 +}
12415 +
12416 +/* __read_pageset1
12417 + *
12418 + * Description:        Test for the existence of an image and attempt to load it.
12419 + * Returns:    Int. Zero if image found and pageset1 successfully loaded.
12420 + *             Error if no image found or loaded.
12421 + */
12422 +static int __read_pageset1(void)
12423 +{
12424 +       int i, result = 0;
12425 +       char *header_buffer = (char *) toi_get_zeroed_page(25, TOI_ATOMIC_GFP),
12426 +            *sanity_error = NULL;
12427 +       struct toi_header *toi_header;
12428 +
12429 +       if (!header_buffer) {
12430 +               printk(KERN_INFO "Unable to allocate a page for reading the "
12431 +                               "signature.\n");
12432 +               return -ENOMEM;
12433 +       }
12434 +
12435 +       /* Check for an image */
12436 +       result = toiActiveAllocator->image_exists(1);
12437 +       if (!result) {
12438 +               result = -ENODATA;
12439 +               noresume_reset_modules();
12440 +               printk(KERN_INFO "TuxOnIce: No image found.\n");
12441 +               goto out;
12442 +       }
12443 +
12444 +       /*
12445 +        * Prepare the active allocator for reading the image header. The
12446 +        * activate allocator might read its own configuration.
12447 +        *
12448 +        * NB: This call may never return because there might be a signature
12449 +        * for a different image such that we warn the user and they choose
12450 +        * to reboot. (If the device ids look erroneous (2.4 vs 2.6) or the
12451 +        * location of the image might be unavailable if it was stored on a
12452 +        * network connection).
12453 +        */
12454 +
12455 +       result = toiActiveAllocator->read_header_init();
12456 +       if (result) {
12457 +               printk(KERN_INFO "TuxOnIce: Failed to initialise, reading the "
12458 +                               "image header.\n");
12459 +               goto out_remove_image;
12460 +       }
12461 +
12462 +       /* Check for noresume command line option */
12463 +       if (test_toi_state(TOI_NORESUME_SPECIFIED)) {
12464 +               printk(KERN_INFO "TuxOnIce: Noresume on command line. Removed "
12465 +                               "image.\n");
12466 +               goto out_remove_image;
12467 +       }
12468 +
12469 +       /* Check whether we've resumed before */
12470 +       if (test_toi_state(TOI_RESUMED_BEFORE)) {
12471 +               toi_early_boot_message(1, 0, NULL);
12472 +               if (!(test_toi_state(TOI_CONTINUE_REQ))) {
12473 +                       printk(KERN_INFO "TuxOnIce: Tried to resume before: "
12474 +                                       "Invalidated image.\n");
12475 +                       goto out_remove_image;
12476 +               }
12477 +       }
12478 +
12479 +       clear_toi_state(TOI_CONTINUE_REQ);
12480 +
12481 +       /* Read hibernate header */
12482 +       result = toiActiveAllocator->rw_header_chunk(READ, NULL,
12483 +                       header_buffer, sizeof(struct toi_header));
12484 +       if (result < 0) {
12485 +               printk("TuxOnIce: Failed to read the image signature.\n");
12486 +               goto out_remove_image;
12487 +       }
12488 +
12489 +       toi_header = (struct toi_header *) header_buffer;
12490 +
12491 +       /*
12492 +        * NB: This call may also result in a reboot rather than returning.
12493 +        */
12494 +
12495 +       sanity_error = sanity_check(toi_header);
12496 +       if (sanity_error) {
12497 +               toi_early_boot_message(1, TOI_CONTINUE_REQ,
12498 +                               sanity_error);
12499 +               printk(KERN_INFO "TuxOnIce: Sanity check failed.\n");
12500 +               goto out_remove_image;
12501 +       }
12502 +
12503 +       /*
12504 +        * We have an image and it looks like it will load okay.
12505 +        *
12506 +        * Get metadata from header. Don't override commandline parameters.
12507 +        *
12508 +        * We don't need to save the image size limit because it's not used
12509 +        * during resume and will be restored with the image anyway.
12510 +        */
12511 +
12512 +       memcpy((char *) &pagedir1,
12513 +               (char *) &toi_header->pagedir, sizeof(pagedir1));
12514 +       toi_result = toi_header->param0;
12515 +       toi_bkd.toi_action = toi_header->param1;
12516 +       toi_bkd.toi_debug_state = toi_header->param2;
12517 +       toi_bkd.toi_default_console_level = toi_header->param3;
12518 +       clear_toi_state(TOI_IGNORE_LOGLEVEL);
12519 +       pagedir2.size = toi_header->pageset_2_size;
12520 +       for (i = 0; i < 4; i++)
12521 +               toi_bkd.toi_io_time[i/2][i%2] =
12522 +                       toi_header->io_time[i/2][i%2];
12523 +
12524 +       set_toi_state(TOI_BOOT_KERNEL);
12525 +       boot_kernel_data_buffer = toi_header->bkd;
12526 +
12527 +       /* Read module configurations */
12528 +       result = read_module_configs();
12529 +       if (result) {
12530 +               pagedir1.size = pagedir2.size = 0;
12531 +               printk(KERN_INFO "TuxOnIce: Failed to read TuxOnIce module "
12532 +                               "configurations.\n");
12533 +               clear_action_state(TOI_KEEP_IMAGE);
12534 +               goto out_remove_image;
12535 +       }
12536 +
12537 +       toi_prepare_console();
12538 +
12539 +       set_toi_state(TOI_NOW_RESUMING);
12540 +
12541 +       if (!test_action_state(TOI_LATE_CPU_HOTPLUG)) {
12542 +               toi_prepare_status(DONT_CLEAR_BAR, "Disable nonboot cpus.");
12543 +               if (disable_nonboot_cpus()) {
12544 +                       set_abort_result(TOI_CPU_HOTPLUG_FAILED);
12545 +                       goto out_reset_console;
12546 +               }
12547 +       }
12548 +
12549 +       toi_prepare_status(DONT_CLEAR_BAR,      "Freeze processes.");
12550 +
12551 +       if (freeze_processes()) {
12552 +               printk("Some processes failed to stop.\n");
12553 +               goto out_thaw;
12554 +       }
12555 +       toi_cond_pause(1, "About to read original pageset1 locations.");
12556 +
12557 +       /*
12558 +        * Read original pageset1 locations. These are the addresses we can't
12559 +        * use for the data to be restored.
12560 +        */
12561 +
12562 +       if (memory_bm_create(&pageset1_copy_map, GFP_KERNEL, 0) ||
12563 +           memory_bm_create(&io_map, GFP_KERNEL, 0))
12564 +               goto out_thaw;
12565 +
12566 +       if (memory_bm_read(&pageset1_map, toiActiveAllocator->rw_header_chunk))
12567 +               goto out_thaw;
12568 +
12569 +       /* Clean up after reading the header */
12570 +       result = toiActiveAllocator->read_header_cleanup();
12571 +       if (result) {
12572 +               printk("TuxOnIce: Failed to cleanup after reading the image "
12573 +                               "header.\n");
12574 +               goto out_thaw;
12575 +       }
12576 +
12577 +       toi_cond_pause(1, "About to read pagedir.");
12578 +
12579 +       /*
12580 +        * Get the addresses of pages into which we will load the kernel to
12581 +        * be copied back
12582 +        */
12583 +       if (toi_get_pageset1_load_addresses()) {
12584 +               printk(KERN_INFO "TuxOnIce: Failed to get load addresses for "
12585 +                               "pageset1.\n");
12586 +               goto out_thaw;
12587 +       }
12588 +
12589 +       /* Read the original kernel back */
12590 +       toi_cond_pause(1, "About to read pageset 1.");
12591 +
12592 +       if (read_pageset(&pagedir1, 0)) {
12593 +               toi_prepare_status(DONT_CLEAR_BAR, "Failed to read pageset 1.");
12594 +               result = -EIO;
12595 +               printk(KERN_INFO "TuxOnIce: Failed to get load pageset1.\n");
12596 +               goto out_thaw;
12597 +       }
12598 +
12599 +       toi_cond_pause(1, "About to restore original kernel.");
12600 +       result = 0;
12601 +
12602 +       if (!test_action_state(TOI_KEEP_IMAGE) &&
12603 +           toiActiveAllocator->mark_resume_attempted)
12604 +               toiActiveAllocator->mark_resume_attempted(1);
12605 +
12606 +out:
12607 +       toi_free_page(25, (unsigned long) header_buffer);
12608 +       return result;
12609 +
12610 +out_thaw:
12611 +       thaw_processes();
12612 +       enable_nonboot_cpus();
12613 +out_reset_console:
12614 +       toi_cleanup_console();
12615 +out_remove_image:
12616 +       memory_bm_free(&pageset1_map, 0);
12617 +       memory_bm_free(&pageset1_copy_map, 0);
12618 +       memory_bm_free(&io_map, 0);
12619 +       result = -EINVAL;
12620 +       if (!test_action_state(TOI_KEEP_IMAGE))
12621 +               toiActiveAllocator->remove_image();
12622 +       toiActiveAllocator->read_header_cleanup();
12623 +       noresume_reset_modules();
12624 +       goto out;
12625 +}
12626 +
12627 +/* read_pageset1()
12628 + *
12629 + * Description:        Attempt to read the header and pageset1 of a hibernate image.
12630 + *             Handle the outcome, complaining where appropriate.
12631 + */
12632 +
12633 +int read_pageset1(void)
12634 +{
12635 +       int error;
12636 +
12637 +       error = __read_pageset1();
12638 +
12639 +       if (error && error != -ENODATA && error != -EINVAL &&
12640 +                                       !test_result_state(TOI_ABORTED))
12641 +               abort_hibernate(TOI_IMAGE_ERROR,
12642 +                       "TuxOnIce: Error %d resuming\n", error);
12643 +
12644 +       return error;
12645 +}
12646 +
12647 +/*
12648 + * get_have_image_data()
12649 + */
12650 +static char *get_have_image_data(void)
12651 +{
12652 +       char *output_buffer = (char *) toi_get_zeroed_page(26, TOI_ATOMIC_GFP);
12653 +       struct toi_header *toi_header;
12654 +
12655 +       if (!output_buffer) {
12656 +               printk(KERN_INFO "Output buffer null.\n");
12657 +               return NULL;
12658 +       }
12659 +
12660 +       /* Check for an image */
12661 +       if (!toiActiveAllocator->image_exists(1) ||
12662 +           toiActiveAllocator->read_header_init() ||
12663 +           toiActiveAllocator->rw_header_chunk(READ, NULL,
12664 +                       output_buffer, sizeof(struct toi_header))) {
12665 +               sprintf(output_buffer, "0\n");
12666 +               /*
12667 +                * From an initrd/ramfs, catting have_image and
12668 +                * getting a result of 0 is sufficient.
12669 +                */
12670 +               clear_toi_state(TOI_BOOT_TIME);
12671 +               goto out;
12672 +       }
12673 +
12674 +       toi_header = (struct toi_header *) output_buffer;
12675 +
12676 +       sprintf(output_buffer, "1\n%s\n%s\n",
12677 +                       toi_header->uts.machine,
12678 +                       toi_header->uts.version);
12679 +
12680 +       /* Check whether we've resumed before */
12681 +       if (test_toi_state(TOI_RESUMED_BEFORE))
12682 +               strcat(output_buffer, "Resumed before.\n");
12683 +
12684 +out:
12685 +       noresume_reset_modules();
12686 +       return output_buffer;
12687 +}
12688 +
12689 +/* read_pageset2()
12690 + *
12691 + * Description:        Read in part or all of pageset2 of an image, depending upon
12692 + *             whether we are hibernating and have only overwritten a portion
12693 + *             with pageset1 pages, or are resuming and need to read them
12694 + *             all.
12695 + * Arguments:  Int. Boolean. Read only pages which would have been
12696 + *             overwritten by pageset1?
12697 + * Returns:    Int. Zero if no error, otherwise the error value.
12698 + */
12699 +int read_pageset2(int overwrittenpagesonly)
12700 +{
12701 +       int result = 0;
12702 +
12703 +       if (!pagedir2.size)
12704 +               return 0;
12705 +
12706 +       result = read_pageset(&pagedir2, overwrittenpagesonly);
12707 +
12708 +       toi_cond_pause(1, "Pagedir 2 read.");
12709 +
12710 +       return result;
12711 +}
12712 +
12713 +/* image_exists_read
12714 + *
12715 + * Return 0 or 1, depending on whether an image is found.
12716 + * Incoming buffer is PAGE_SIZE and result is guaranteed
12717 + * to be far less than that, so we don't worry about
12718 + * overflow.
12719 + */
12720 +int image_exists_read(const char *page, int count)
12721 +{
12722 +       int len = 0;
12723 +       char *result;
12724 +
12725 +       if (toi_activate_storage(0))
12726 +               return count;
12727 +
12728 +       if (!test_toi_state(TOI_RESUME_DEVICE_OK))
12729 +               toi_attempt_to_parse_resume_device(0);
12730 +
12731 +       if (!toiActiveAllocator) {
12732 +               len = sprintf((char *) page, "-1\n");
12733 +       } else {
12734 +               result = get_have_image_data();
12735 +               if (result) {
12736 +                       len = sprintf((char *) page, "%s",  result);
12737 +                       toi_free_page(26, (unsigned long) result);
12738 +               }
12739 +       }
12740 +
12741 +       toi_deactivate_storage(0);
12742 +
12743 +       return len;
12744 +}
12745 +
12746 +/* image_exists_write
12747 + *
12748 + * Invalidate an image if one exists.
12749 + */
12750 +int image_exists_write(const char *buffer, int count)
12751 +{
12752 +       if (toi_activate_storage(0))
12753 +               return count;
12754 +
12755 +       if (toiActiveAllocator && toiActiveAllocator->image_exists(1))
12756 +               toiActiveAllocator->remove_image();
12757 +
12758 +       toi_deactivate_storage(0);
12759 +
12760 +       clear_result_state(TOI_KEPT_IMAGE);
12761 +
12762 +       return count;
12763 +}
12764 diff --git a/kernel/power/tuxonice_io.h b/kernel/power/tuxonice_io.h
12765 new file mode 100644
12766 index 0000000..86e8996
12767 --- /dev/null
12768 +++ b/kernel/power/tuxonice_io.h
12769 @@ -0,0 +1,71 @@
12770 +/*
12771 + * kernel/power/tuxonice_io.h
12772 + *
12773 + * Copyright (C) 2005-2008 Nigel Cunningham (nigel at tuxonice net)
12774 + *
12775 + * This file is released under the GPLv2.
12776 + *
12777 + * It contains high level IO routines for hibernating.
12778 + *
12779 + */
12780 +
12781 +#include <linux/utsname.h>
12782 +#include "tuxonice_pagedir.h"
12783 +#include "power.h"
12784 +
12785 +/* Non-module data saved in our image header */
12786 +struct toi_header {
12787 +       /*
12788 +        * Mirror struct swsusp_info, but without
12789 +        * the page aligned attribute
12790 +        */
12791 +       struct new_utsname uts;
12792 +       u32 version_code;
12793 +       unsigned long num_physpages;
12794 +       int cpus;
12795 +       unsigned long image_pages;
12796 +       unsigned long pages;
12797 +       unsigned long size;
12798 +
12799 +       /* Our own data */
12800 +       unsigned long orig_mem_free;
12801 +       int page_size;
12802 +       int pageset_2_size;
12803 +       int param0;
12804 +       int param1;
12805 +       int param2;
12806 +       int param3;
12807 +       int progress0;
12808 +       int progress1;
12809 +       int progress2;
12810 +       int progress3;
12811 +       int io_time[2][2];
12812 +       struct pagedir pagedir;
12813 +       dev_t root_fs;
12814 +       unsigned long bkd; /* Boot kernel data locn */
12815 +};
12816 +
12817 +extern int write_pageset(struct pagedir *pagedir);
12818 +extern int write_image_header(void);
12819 +extern int read_pageset1(void);
12820 +extern int read_pageset2(int overwrittenpagesonly);
12821 +
12822 +extern int toi_attempt_to_parse_resume_device(int quiet);
12823 +extern void attempt_to_parse_resume_device2(void);
12824 +extern void attempt_to_parse_alt_resume_param(void);
12825 +int image_exists_read(const char *page, int count);
12826 +int image_exists_write(const char *buffer, int count);
12827 +extern void save_restore_alt_param(int replace, int quiet);
12828 +extern atomic_t toi_io_workers;
12829 +
12830 +/* Args to save_restore_alt_param */
12831 +#define RESTORE 0
12832 +#define SAVE 1
12833 +
12834 +#define NOQUIET 0
12835 +#define QUIET 1
12836 +
12837 +extern dev_t name_to_dev_t(char *line);
12838 +
12839 +extern wait_queue_head_t toi_io_queue_flusher;
12840 +extern int toi_bio_queue_flusher_should_finish;
12841 diff --git a/kernel/power/tuxonice_modules.c b/kernel/power/tuxonice_modules.c
12842 new file mode 100644
12843 index 0000000..29d9100
12844 --- /dev/null
12845 +++ b/kernel/power/tuxonice_modules.c
12846 @@ -0,0 +1,464 @@
12847 +/*
12848 + * kernel/power/tuxonice_modules.c
12849 + *
12850 + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net)
12851 + *
12852 + */
12853 +
12854 +#include <linux/suspend.h>
12855 +#include <linux/module.h>
12856 +#include "tuxonice.h"
12857 +#include "tuxonice_modules.h"
12858 +#include "tuxonice_sysfs.h"
12859 +#include "tuxonice_ui.h"
12860 +
12861 +LIST_HEAD(toi_filters);
12862 +LIST_HEAD(toiAllocators);
12863 +LIST_HEAD(toi_modules);
12864 +
12865 +struct toi_module_ops *toiActiveAllocator;
12866 +EXPORT_SYMBOL_GPL(toiActiveAllocator);
12867 +
12868 +static int toi_num_filters;
12869 +int toiNumAllocators, toi_num_modules;
12870 +
12871 +/*
12872 + * toi_header_storage_for_modules
12873 + *
12874 + * Returns the amount of space needed to store configuration
12875 + * data needed by the modules prior to copying back the original
12876 + * kernel. We can exclude data for pageset2 because it will be
12877 + * available anyway once the kernel is copied back.
12878 + */
12879 +long toi_header_storage_for_modules(void)
12880 +{
12881 +       struct toi_module_ops *this_module;
12882 +       int bytes = 0;
12883 +
12884 +       list_for_each_entry(this_module, &toi_modules, module_list) {
12885 +               if (!this_module->enabled ||
12886 +                   (this_module->type == WRITER_MODULE &&
12887 +                    toiActiveAllocator != this_module))
12888 +                       continue;
12889 +               if (this_module->storage_needed) {
12890 +                       int this = this_module->storage_needed() +
12891 +                               sizeof(struct toi_module_header) +
12892 +                               sizeof(int);
12893 +                       this_module->header_requested = this;
12894 +                       bytes += this;
12895 +               }
12896 +       }
12897 +
12898 +       /* One more for the empty terminator */
12899 +       return bytes + sizeof(struct toi_module_header);
12900 +}
12901 +
12902 +/*
12903 + * toi_memory_for_modules
12904 + *
12905 + * Returns the amount of memory requested by modules for
12906 + * doing their work during the cycle.
12907 + */
12908 +
12909 +long toi_memory_for_modules(int print_parts)
12910 +{
12911 +       long bytes = 0, result;
12912 +       struct toi_module_ops *this_module;
12913 +
12914 +       if (print_parts)
12915 +               printk(KERN_INFO "Memory for modules:\n===================\n");
12916 +       list_for_each_entry(this_module, &toi_modules, module_list) {
12917 +               int this;
12918 +               if (!this_module->enabled)
12919 +                       continue;
12920 +               if (this_module->memory_needed) {
12921 +                       this = this_module->memory_needed();
12922 +                       if (print_parts)
12923 +                               printk(KERN_INFO "%10d bytes (%5ld pages) for "
12924 +                                               "module '%s'.\n", this,
12925 +                                               DIV_ROUND_UP(this, PAGE_SIZE),
12926 +                                               this_module->name);
12927 +                       bytes += this;
12928 +               }
12929 +       }
12930 +
12931 +       result = DIV_ROUND_UP(bytes, PAGE_SIZE);
12932 +       if (print_parts)
12933 +               printk(KERN_INFO " => %ld bytes, %ld pages.\n", bytes, result);
12934 +
12935 +       return result;
12936 +}
12937 +
12938 +/*
12939 + * toi_expected_compression_ratio
12940 + *
12941 + * Returns the compression ratio expected when saving the image.
12942 + */
12943 +
12944 +int toi_expected_compression_ratio(void)
12945 +{
12946 +       int ratio = 100;
12947 +       struct toi_module_ops *this_module;
12948 +
12949 +       list_for_each_entry(this_module, &toi_modules, module_list) {
12950 +               if (!this_module->enabled)
12951 +                       continue;
12952 +               if (this_module->expected_compression)
12953 +                       ratio = ratio * this_module->expected_compression()
12954 +                               / 100;
12955 +       }
12956 +
12957 +       return ratio;
12958 +}
12959 +
12960 +/* toi_find_module_given_dir
12961 + * Functionality :     Return a module (if found), given a pointer
12962 + *                     to its directory name
12963 + */
12964 +
12965 +static struct toi_module_ops *toi_find_module_given_dir(char *name)
12966 +{
12967 +       struct toi_module_ops *this_module, *found_module = NULL;
12968 +
12969 +       list_for_each_entry(this_module, &toi_modules, module_list) {
12970 +               if (!strcmp(name, this_module->directory)) {
12971 +                       found_module = this_module;
12972 +                       break;
12973 +               }
12974 +       }
12975 +
12976 +       return found_module;
12977 +}
12978 +
12979 +/* toi_find_module_given_name
12980 + * Functionality :     Return a module (if found), given a pointer
12981 + *                     to its name
12982 + */
12983 +
12984 +struct toi_module_ops *toi_find_module_given_name(char *name)
12985 +{
12986 +       struct toi_module_ops *this_module, *found_module = NULL;
12987 +
12988 +       list_for_each_entry(this_module, &toi_modules, module_list) {
12989 +               if (!strcmp(name, this_module->name)) {
12990 +                       found_module = this_module;
12991 +                       break;
12992 +               }
12993 +       }
12994 +
12995 +       return found_module;
12996 +}
12997 +
12998 +/*
12999 + * toi_print_module_debug_info
13000 + * Functionality   : Get debugging info from modules into a buffer.
13001 + */
13002 +int toi_print_module_debug_info(char *buffer, int buffer_size)
13003 +{
13004 +       struct toi_module_ops *this_module;
13005 +       int len = 0;
13006 +
13007 +       list_for_each_entry(this_module, &toi_modules, module_list) {
13008 +               if (!this_module->enabled)
13009 +                       continue;
13010 +               if (this_module->print_debug_info) {
13011 +                       int result;
13012 +                       result = this_module->print_debug_info(buffer + len,
13013 +                                       buffer_size - len);
13014 +                       len += result;
13015 +               }
13016 +       }
13017 +
13018 +       /* Ensure null terminated */
13019 +       buffer[buffer_size] = 0;
13020 +
13021 +       return len;
13022 +}
13023 +
13024 +/*
13025 + * toi_register_module
13026 + *
13027 + * Register a module.
13028 + */
13029 +int toi_register_module(struct toi_module_ops *module)
13030 +{
13031 +       int i;
13032 +       struct kobject *kobj;
13033 +
13034 +       module->enabled = 1;
13035 +
13036 +       if (toi_find_module_given_name(module->name)) {
13037 +               printk(KERN_INFO "TuxOnIce: Trying to load module %s,"
13038 +                               " which is already registered.\n",
13039 +                               module->name);
13040 +               return -EBUSY;
13041 +       }
13042 +
13043 +       switch (module->type) {
13044 +       case FILTER_MODULE:
13045 +               list_add_tail(&module->type_list, &toi_filters);
13046 +               toi_num_filters++;
13047 +               break;
13048 +       case WRITER_MODULE:
13049 +               list_add_tail(&module->type_list, &toiAllocators);
13050 +               toiNumAllocators++;
13051 +               break;
13052 +       case MISC_MODULE:
13053 +       case MISC_HIDDEN_MODULE:
13054 +               break;
13055 +       default:
13056 +               printk("Hmmm. Module '%s' has an invalid type."
13057 +                       " It has been ignored.\n", module->name);
13058 +               return -EINVAL;
13059 +       }
13060 +       list_add_tail(&module->module_list, &toi_modules);
13061 +       toi_num_modules++;
13062 +
13063 +       if ((!module->directory && !module->shared_directory) ||
13064 +                       !module->sysfs_data || !module->num_sysfs_entries)
13065 +               return 0;
13066 +
13067 +       /*
13068 +        * Modules may share a directory, but those with shared_dir
13069 +        * set must be loaded (via symbol dependencies) after parents
13070 +        * and unloaded beforehand.
13071 +        */
13072 +       if (module->shared_directory) {
13073 +               struct toi_module_ops *shared =
13074 +                       toi_find_module_given_dir(module->shared_directory);
13075 +               if (!shared) {
13076 +                       printk("TuxOnIce: Module %s wants to share %s's "
13077 +                                       "directory but %s isn't loaded.\n",
13078 +                                       module->name, module->shared_directory,
13079 +                                       module->shared_directory);
13080 +                       toi_unregister_module(module);
13081 +                       return -ENODEV;
13082 +               }
13083 +               kobj = shared->dir_kobj;
13084 +       } else {
13085 +               if (!strncmp(module->directory, "[ROOT]", 6))
13086 +                       kobj = tuxonice_kobj;
13087 +               else
13088 +                       kobj = make_toi_sysdir(module->directory);
13089 +       }
13090 +       module->dir_kobj = kobj;
13091 +       for (i = 0; i < module->num_sysfs_entries; i++) {
13092 +               int result = toi_register_sysfs_file(kobj,
13093 +                               &module->sysfs_data[i]);
13094 +               if (result)
13095 +                       return result;
13096 +       }
13097 +       return 0;
13098 +}
13099 +EXPORT_SYMBOL_GPL(toi_register_module);
13100 +
13101 +/*
13102 + * toi_unregister_module
13103 + *
13104 + * Remove a module.
13105 + */
13106 +void toi_unregister_module(struct toi_module_ops *module)
13107 +{
13108 +       int i;
13109 +
13110 +       if (module->dir_kobj)
13111 +               for (i = 0; i < module->num_sysfs_entries; i++)
13112 +                       toi_unregister_sysfs_file(module->dir_kobj,
13113 +                                       &module->sysfs_data[i]);
13114 +
13115 +       if (!module->shared_directory && module->directory &&
13116 +                       strncmp(module->directory, "[ROOT]", 6))
13117 +               remove_toi_sysdir(module->dir_kobj);
13118 +
13119 +       switch (module->type) {
13120 +       case FILTER_MODULE:
13121 +               list_del(&module->type_list);
13122 +               toi_num_filters--;
13123 +               break;
13124 +       case WRITER_MODULE:
13125 +               list_del(&module->type_list);
13126 +               toiNumAllocators--;
13127 +               if (toiActiveAllocator == module) {
13128 +                       toiActiveAllocator = NULL;
13129 +                       clear_toi_state(TOI_CAN_RESUME);
13130 +                       clear_toi_state(TOI_CAN_HIBERNATE);
13131 +               }
13132 +               break;
13133 +       case MISC_MODULE:
13134 +       case MISC_HIDDEN_MODULE:
13135 +               break;
13136 +       default:
13137 +               printk("Hmmm. Module '%s' has an invalid type."
13138 +                       " It has been ignored.\n", module->name);
13139 +               return;
13140 +       }
13141 +       list_del(&module->module_list);
13142 +       toi_num_modules--;
13143 +}
13144 +EXPORT_SYMBOL_GPL(toi_unregister_module);
13145 +
13146 +/*
13147 + * toi_move_module_tail
13148 + *
13149 + * Rearrange modules when reloading the config.
13150 + */
13151 +void toi_move_module_tail(struct toi_module_ops *module)
13152 +{
13153 +       switch (module->type) {
13154 +       case FILTER_MODULE:
13155 +               if (toi_num_filters > 1)
13156 +                       list_move_tail(&module->type_list, &toi_filters);
13157 +               break;
13158 +       case WRITER_MODULE:
13159 +               if (toiNumAllocators > 1)
13160 +                       list_move_tail(&module->type_list, &toiAllocators);
13161 +               break;
13162 +       case MISC_MODULE:
13163 +       case MISC_HIDDEN_MODULE:
13164 +               break;
13165 +       default:
13166 +               printk("Hmmm. Module '%s' has an invalid type."
13167 +                       " It has been ignored.\n", module->name);
13168 +               return;
13169 +       }
13170 +       if ((toi_num_filters + toiNumAllocators) > 1)
13171 +               list_move_tail(&module->module_list, &toi_modules);
13172 +}
13173 +
13174 +/*
13175 + * toi_initialise_modules
13176 + *
13177 + * Get ready to do some work!
13178 + */
13179 +int toi_initialise_modules(int starting_cycle, int early)
13180 +{
13181 +       struct toi_module_ops *this_module;
13182 +       int result;
13183 +
13184 +       list_for_each_entry(this_module, &toi_modules, module_list) {
13185 +               this_module->header_requested = 0;
13186 +               this_module->header_used = 0;
13187 +               if (!this_module->enabled)
13188 +                       continue;
13189 +               if (this_module->early != early)
13190 +                       continue;
13191 +               if (this_module->initialise) {
13192 +                       toi_message(TOI_MEMORY, TOI_MEDIUM, 1,
13193 +                               "Initialising module %s.\n",
13194 +                               this_module->name);
13195 +                       result = this_module->initialise(starting_cycle);
13196 +                       if (result) {
13197 +                               toi_cleanup_modules(starting_cycle);
13198 +                               return result;
13199 +                       }
13200 +                       this_module->initialised = 1;
13201 +               }
13202 +       }
13203 +
13204 +       return 0;
13205 +}
13206 +
13207 +/*
13208 + * toi_cleanup_modules
13209 + *
13210 + * Tell modules the work is done.
13211 + */
13212 +void toi_cleanup_modules(int finishing_cycle)
13213 +{
13214 +       struct toi_module_ops *this_module;
13215 +
13216 +       list_for_each_entry(this_module, &toi_modules, module_list) {
13217 +               if (!this_module->enabled || !this_module->initialised)
13218 +                       continue;
13219 +               if (this_module->cleanup) {
13220 +                       toi_message(TOI_MEMORY, TOI_MEDIUM, 1,
13221 +                               "Cleaning up module %s.\n",
13222 +                               this_module->name);
13223 +                       this_module->cleanup(finishing_cycle);
13224 +               }
13225 +               this_module->initialised = 0;
13226 +       }
13227 +}
13228 +
13229 +/*
13230 + * toi_get_next_filter
13231 + *
13232 + * Get the next filter in the pipeline.
13233 + */
13234 +struct toi_module_ops *toi_get_next_filter(struct toi_module_ops *filter_sought)
13235 +{
13236 +       struct toi_module_ops *last_filter = NULL, *this_filter = NULL;
13237 +
13238 +       list_for_each_entry(this_filter, &toi_filters, type_list) {
13239 +               if (!this_filter->enabled)
13240 +                       continue;
13241 +               if ((last_filter == filter_sought) || (!filter_sought))
13242 +                       return this_filter;
13243 +               last_filter = this_filter;
13244 +       }
13245 +
13246 +       return toiActiveAllocator;
13247 +}
13248 +EXPORT_SYMBOL_GPL(toi_get_next_filter);
13249 +
13250 +/**
13251 + * toi_show_modules: Printk what support is loaded.
13252 + */
13253 +void toi_print_modules(void)
13254 +{
13255 +       struct toi_module_ops *this_module;
13256 +       int prev = 0;
13257 +
13258 +       printk("TuxOnIce " TOI_CORE_VERSION ", with support for");
13259 +
13260 +       list_for_each_entry(this_module, &toi_modules, module_list) {
13261 +               if (this_module->type == MISC_HIDDEN_MODULE)
13262 +                       continue;
13263 +               printk("%s %s%s%s", prev ? "," : "",
13264 +                               this_module->enabled ? "" : "[",
13265 +                               this_module->name,
13266 +                               this_module->enabled ? "" : "]");
13267 +               prev = 1;
13268 +       }
13269 +
13270 +       printk(".\n");
13271 +}
13272 +
13273 +/* toi_get_modules
13274 + *
13275 + * Take a reference to modules so they can't go away under us.
13276 + */
13277 +
13278 +int toi_get_modules(void)
13279 +{
13280 +       struct toi_module_ops *this_module;
13281 +
13282 +       list_for_each_entry(this_module, &toi_modules, module_list) {
13283 +               struct toi_module_ops *this_module2;
13284 +
13285 +               if (try_module_get(this_module->module))
13286 +                       continue;
13287 +
13288 +               /* Failed! Reverse gets and return error */
13289 +               list_for_each_entry(this_module2, &toi_modules,
13290 +                               module_list) {
13291 +                       if (this_module == this_module2)
13292 +                               return -EINVAL;
13293 +                       module_put(this_module2->module);
13294 +               }
13295 +       }
13296 +       return 0;
13297 +}
13298 +
13299 +/* toi_put_modules
13300 + *
13301 + * Release our references to modules we used.
13302 + */
13303 +
13304 +void toi_put_modules(void)
13305 +{
13306 +       struct toi_module_ops *this_module;
13307 +
13308 +       list_for_each_entry(this_module, &toi_modules, module_list)
13309 +               module_put(this_module->module);
13310 +}
13311 diff --git a/kernel/power/tuxonice_modules.h b/kernel/power/tuxonice_modules.h
13312 new file mode 100644
13313 index 0000000..1c13561
13314 --- /dev/null
13315 +++ b/kernel/power/tuxonice_modules.h
13316 @@ -0,0 +1,185 @@
13317 +/*
13318 + * kernel/power/tuxonice_modules.h
13319 + *
13320 + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net)
13321 + *
13322 + * This file is released under the GPLv2.
13323 + *
13324 + * It contains declarations for modules. Modules are additions to
13325 + * TuxOnIce that provide facilities such as image compression or
13326 + * encryption, backends for storage of the image and user interfaces.
13327 + *
13328 + */
13329 +
13330 +#ifndef TOI_MODULES_H
13331 +#define TOI_MODULES_H
13332 +
13333 +/* This is the maximum size we store in the image header for a module name */
13334 +#define TOI_MAX_MODULE_NAME_LENGTH 30
13335 +
13336 +/* Per-module metadata */
13337 +struct toi_module_header {
13338 +       char name[TOI_MAX_MODULE_NAME_LENGTH];
13339 +       int enabled;
13340 +       int type;
13341 +       int index;
13342 +       int data_length;
13343 +       unsigned long signature;
13344 +};
13345 +
13346 +enum {
13347 +       FILTER_MODULE,
13348 +       WRITER_MODULE,
13349 +       MISC_MODULE, /* Block writer, eg. */
13350 +       MISC_HIDDEN_MODULE,
13351 +};
13352 +
13353 +enum {
13354 +       TOI_ASYNC,
13355 +       TOI_SYNC
13356 +};
13357 +
13358 +struct toi_module_ops {
13359 +       /* Functions common to all modules */
13360 +       int type;
13361 +       char *name;
13362 +       char *directory;
13363 +       char *shared_directory;
13364 +       struct kobject *dir_kobj;
13365 +       struct module *module;
13366 +       int enabled, early, initialised;
13367 +       struct list_head module_list;
13368 +
13369 +       /* List of filters or allocators */
13370 +       struct list_head list, type_list;
13371 +
13372 +       /*
13373 +        * Requirements for memory and storage in
13374 +        * the image header..
13375 +        */
13376 +       int (*memory_needed) (void);
13377 +       int (*storage_needed) (void);
13378 +
13379 +       int header_requested, header_used;
13380 +
13381 +       int (*expected_compression) (void);
13382 +
13383 +       /*
13384 +        * Debug info
13385 +        */
13386 +       int (*print_debug_info) (char *buffer, int size);
13387 +       int (*save_config_info) (char *buffer);
13388 +       void (*load_config_info) (char *buffer, int len);
13389 +
13390 +       /*
13391 +        * Initialise & cleanup - general routines called
13392 +        * at the start and end of a cycle.
13393 +        */
13394 +       int (*initialise) (int starting_cycle);
13395 +       void (*cleanup) (int finishing_cycle);
13396 +
13397 +       /*
13398 +        * Calls for allocating storage (allocators only).
13399 +        *
13400 +        * Header space is allocated separately. Note that allocation
13401 +        * of space for the header might result in allocated space
13402 +        * being stolen from the main pool if there is no unallocated
13403 +        * space. We have to be able to allocate enough space for
13404 +        * the header. We can eat memory to ensure there is enough
13405 +        * for the main pool.
13406 +        */
13407 +
13408 +       int (*storage_available) (void);
13409 +       void (*reserve_header_space) (int space_requested);
13410 +       int (*allocate_storage) (int space_requested);
13411 +       int (*storage_allocated) (void);
13412 +       int (*release_storage) (void);
13413 +
13414 +       /*
13415 +        * Routines used in image I/O.
13416 +        */
13417 +       int (*rw_init) (int rw, int stream_number);
13418 +       int (*rw_cleanup) (int rw);
13419 +       int (*write_page) (unsigned long index, struct page *buffer_page,
13420 +                       unsigned int buf_size);
13421 +       int (*read_page) (unsigned long *index, struct page *buffer_page,
13422 +                       unsigned int *buf_size);
13423 +       void (*io_flusher) (int rw);
13424 +
13425 +       /* Reset module if image exists but reading aborted */
13426 +       void (*noresume_reset) (void);
13427 +
13428 +       /* Read and write the metadata */
13429 +       int (*write_header_init) (void);
13430 +       int (*write_header_cleanup) (void);
13431 +
13432 +       int (*read_header_init) (void);
13433 +       int (*read_header_cleanup) (void);
13434 +
13435 +       int (*rw_header_chunk) (int rw, struct toi_module_ops *owner,
13436 +                       char *buffer_start, int buffer_size);
13437 +
13438 +       int (*rw_header_chunk_noreadahead) (int rw,
13439 +                       struct toi_module_ops *owner, char *buffer_start,
13440 +                       int buffer_size);
13441 +
13442 +       /* Attempt to parse an image location */
13443 +       int (*parse_sig_location) (char *buffer, int only_writer, int quiet);
13444 +
13445 +       /* Throttle I/O according to throughput */
13446 +       void (*update_throughput_throttle) (int jif_index);
13447 +
13448 +       /* Monitor outstanding I/O */
13449 +       void (*monitor_outstanding_io) (void);
13450 +
13451 +       /* Flush outstanding I/O */
13452 +       void (*finish_all_io) (void);
13453 +
13454 +       /* Determine whether image exists that we can restore */
13455 +       int (*image_exists) (int quiet);
13456 +
13457 +       /* Mark the image as having tried to resume */
13458 +       int (*mark_resume_attempted) (int);
13459 +
13460 +       /* Destroy image if one exists */
13461 +       int (*remove_image) (void);
13462 +
13463 +       /* Sysfs Data */
13464 +       struct toi_sysfs_data *sysfs_data;
13465 +       int num_sysfs_entries;
13466 +};
13467 +
13468 +extern int toi_num_modules, toiNumAllocators;
13469 +
13470 +extern struct toi_module_ops *toiActiveAllocator;
13471 +extern struct list_head toi_filters, toiAllocators, toi_modules;
13472 +
13473 +extern void toi_prepare_console_modules(void);
13474 +extern void toi_cleanup_console_modules(void);
13475 +
13476 +extern struct toi_module_ops *toi_find_module_given_name(char *name);
13477 +extern struct toi_module_ops *toi_get_next_filter(struct toi_module_ops *);
13478 +
13479 +extern int toi_register_module(struct toi_module_ops *module);
13480 +extern void toi_move_module_tail(struct toi_module_ops *module);
13481 +
13482 +extern long toi_header_storage_for_modules(void);
13483 +extern long toi_memory_for_modules(int print_parts);
13484 +extern int toi_expected_compression_ratio(void);
13485 +
13486 +extern int toi_print_module_debug_info(char *buffer, int buffer_size);
13487 +extern int toi_register_module(struct toi_module_ops *module);
13488 +extern void toi_unregister_module(struct toi_module_ops *module);
13489 +
13490 +extern int toi_initialise_modules(int starting_cycle, int early);
13491 +#define toi_initialise_modules_early(starting) \
13492 +       toi_initialise_modules(starting, 1)
13493 +#define toi_initialise_modules_late(starting) \
13494 +       toi_initialise_modules(starting, 0)
13495 +extern void toi_cleanup_modules(int finishing_cycle);
13496 +
13497 +extern void toi_print_modules(void);
13498 +
13499 +int toi_get_modules(void);
13500 +void toi_put_modules(void);
13501 +#endif
13502 diff --git a/kernel/power/tuxonice_netlink.c b/kernel/power/tuxonice_netlink.c
13503 new file mode 100644
13504 index 0000000..bb027a7
13505 --- /dev/null
13506 +++ b/kernel/power/tuxonice_netlink.c
13507 @@ -0,0 +1,343 @@
13508 +/*
13509 + * kernel/power/tuxonice_netlink.c
13510 + *
13511 + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net)
13512 + *
13513 + * This file is released under the GPLv2.
13514 + *
13515 + * Functions for communicating with a userspace helper via netlink.
13516 + */
13517 +
13518 +
13519 +#include <linux/suspend.h>
13520 +#include "tuxonice_netlink.h"
13521 +#include "tuxonice.h"
13522 +#include "tuxonice_modules.h"
13523 +#include "tuxonice_alloc.h"
13524 +
13525 +static struct user_helper_data *uhd_list;
13526 +
13527 +/*
13528 + * Refill our pool of SKBs for use in emergencies (eg, when eating memory and
13529 + * none can be allocated).
13530 + */
13531 +static void toi_fill_skb_pool(struct user_helper_data *uhd)
13532 +{
13533 +       while (uhd->pool_level < uhd->pool_limit) {
13534 +               struct sk_buff *new_skb =
13535 +                       alloc_skb(NLMSG_SPACE(uhd->skb_size), TOI_ATOMIC_GFP);
13536 +
13537 +               if (!new_skb)
13538 +                       break;
13539 +
13540 +               new_skb->next = uhd->emerg_skbs;
13541 +               uhd->emerg_skbs = new_skb;
13542 +               uhd->pool_level++;
13543 +       }
13544 +}
13545 +
13546 +/*
13547 + * Try to allocate a single skb. If we can't get one, try to use one from
13548 + * our pool.
13549 + */
13550 +static struct sk_buff *toi_get_skb(struct user_helper_data *uhd)
13551 +{
13552 +       struct sk_buff *skb =
13553 +               alloc_skb(NLMSG_SPACE(uhd->skb_size), TOI_ATOMIC_GFP);
13554 +
13555 +       if (skb)
13556 +               return skb;
13557 +
13558 +       skb = uhd->emerg_skbs;
13559 +       if (skb) {
13560 +               uhd->pool_level--;
13561 +               uhd->emerg_skbs = skb->next;
13562 +               skb->next = NULL;
13563 +       }
13564 +
13565 +       return skb;
13566 +}
13567 +
13568 +static void put_skb(struct user_helper_data *uhd, struct sk_buff *skb)
13569 +{
13570 +       if (uhd->pool_level < uhd->pool_limit) {
13571 +               skb->next = uhd->emerg_skbs;
13572 +               uhd->emerg_skbs = skb;
13573 +       } else
13574 +               kfree_skb(skb);
13575 +}
13576 +
13577 +void toi_send_netlink_message(struct user_helper_data *uhd,
13578 +               int type, void *params, size_t len)
13579 +{
13580 +       struct sk_buff *skb;
13581 +       struct nlmsghdr *nlh;
13582 +       void *dest;
13583 +       struct task_struct *t;
13584 +
13585 +       if (uhd->pid == -1)
13586 +               return;
13587 +
13588 +       if (uhd->debug)
13589 +               printk(KERN_ERR "toi_send_netlink_message: Send "
13590 +                               "message type %d.\n", type);
13591 +
13592 +       skb = toi_get_skb(uhd);
13593 +       if (!skb) {
13594 +               printk(KERN_INFO "toi_netlink: Can't allocate skb!\n");
13595 +               return;
13596 +       }
13597 +
13598 +       /* NLMSG_PUT contains a hidden goto nlmsg_failure */
13599 +       nlh = NLMSG_PUT(skb, 0, uhd->sock_seq, type, len);
13600 +       uhd->sock_seq++;
13601 +
13602 +       dest = NLMSG_DATA(nlh);
13603 +       if (params && len > 0)
13604 +               memcpy(dest, params, len);
13605 +
13606 +       netlink_unicast(uhd->nl, skb, uhd->pid, 0);
13607 +
13608 +       read_lock(&tasklist_lock);
13609 +       t = find_task_by_pid_type_ns(PIDTYPE_PID, uhd->pid, &init_pid_ns);
13610 +       if (!t) {
13611 +               read_unlock(&tasklist_lock);
13612 +               if (uhd->pid > -1)
13613 +                       printk(KERN_INFO "Hmm. Can't find the userspace task"
13614 +                               " %d.\n", uhd->pid);
13615 +               return;
13616 +       }
13617 +       wake_up_process(t);
13618 +       read_unlock(&tasklist_lock);
13619 +
13620 +       yield();
13621 +
13622 +       return;
13623 +
13624 +nlmsg_failure:
13625 +       if (skb)
13626 +               put_skb(uhd, skb);
13627 +
13628 +       if (uhd->debug)
13629 +               printk(KERN_ERR "toi_send_netlink_message: Failed to send "
13630 +                               "message type %d.\n", type);
13631 +}
13632 +EXPORT_SYMBOL_GPL(toi_send_netlink_message);
13633 +
13634 +static void send_whether_debugging(struct user_helper_data *uhd)
13635 +{
13636 +       static u8 is_debugging = 1;
13637 +
13638 +       toi_send_netlink_message(uhd, NETLINK_MSG_IS_DEBUGGING,
13639 +                       &is_debugging, sizeof(u8));
13640 +}
13641 +
13642 +/*
13643 + * Set the PF_NOFREEZE flag on the given process to ensure it can run whilst we
13644 + * are hibernating.
13645 + */
13646 +static int nl_set_nofreeze(struct user_helper_data *uhd, __u32 pid)
13647 +{
13648 +       struct task_struct *t;
13649 +
13650 +       if (uhd->debug)
13651 +               printk(KERN_ERR "nl_set_nofreeze for pid %d.\n", pid);
13652 +
13653 +       read_lock(&tasklist_lock);
13654 +       t = find_task_by_pid_type_ns(PIDTYPE_PID, pid, &init_pid_ns);
13655 +       if (!t) {
13656 +               read_unlock(&tasklist_lock);
13657 +               printk(KERN_INFO "Strange. Can't find the userspace task %d.\n",
13658 +                               pid);
13659 +               return -EINVAL;
13660 +       }
13661 +
13662 +       t->flags |= PF_NOFREEZE;
13663 +
13664 +       read_unlock(&tasklist_lock);
13665 +       uhd->pid = pid;
13666 +
13667 +       toi_send_netlink_message(uhd, NETLINK_MSG_NOFREEZE_ACK, NULL, 0);
13668 +
13669 +       return 0;
13670 +}
13671 +
13672 +/*
13673 + * Called when the userspace process has informed us that it's ready to roll.
13674 + */
13675 +static int nl_ready(struct user_helper_data *uhd, u32 version)
13676 +{
13677 +       if (version != uhd->interface_version) {
13678 +               printk(KERN_INFO "%s userspace process using invalid interface"
13679 +                               " version (%d - kernel wants %d). Trying to "
13680 +                               "continue without it.\n",
13681 +                               uhd->name, version, uhd->interface_version);
13682 +               if (uhd->not_ready)
13683 +                       uhd->not_ready();
13684 +               return -EINVAL;
13685 +       }
13686 +
13687 +       complete(&uhd->wait_for_process);
13688 +
13689 +       return 0;
13690 +}
13691 +
13692 +void toi_netlink_close_complete(struct user_helper_data *uhd)
13693 +{
13694 +       if (uhd->nl) {
13695 +               netlink_kernel_release(uhd->nl);
13696 +               uhd->nl = NULL;
13697 +       }
13698 +
13699 +       while (uhd->emerg_skbs) {
13700 +               struct sk_buff *next = uhd->emerg_skbs->next;
13701 +               kfree_skb(uhd->emerg_skbs);
13702 +               uhd->emerg_skbs = next;
13703 +       }
13704 +
13705 +       uhd->pid = -1;
13706 +}
13707 +EXPORT_SYMBOL_GPL(toi_netlink_close_complete);
13708 +
13709 +static int toi_nl_gen_rcv_msg(struct user_helper_data *uhd,
13710 +               struct sk_buff *skb, struct nlmsghdr *nlh)
13711 +{
13712 +       int type = nlh->nlmsg_type;
13713 +       int *data;
13714 +       int err;
13715 +
13716 +       if (uhd->debug)
13717 +               printk(KERN_ERR "toi_user_rcv_skb: Received message %d.\n",
13718 +                               type);
13719 +
13720 +       /* Let the more specific handler go first. It returns
13721 +        * 1 for valid messages that it doesn't know. */
13722 +       err = uhd->rcv_msg(skb, nlh);
13723 +       if (err != 1)
13724 +               return err;
13725 +
13726 +       /* Only allow one task to receive NOFREEZE privileges */
13727 +       if (type == NETLINK_MSG_NOFREEZE_ME && uhd->pid != -1) {
13728 +               printk(KERN_INFO "Received extra nofreeze me requests.\n");
13729 +               return -EBUSY;
13730 +       }
13731 +
13732 +       data = NLMSG_DATA(nlh);
13733 +
13734 +       switch (type) {
13735 +       case NETLINK_MSG_NOFREEZE_ME:
13736 +               return nl_set_nofreeze(uhd, nlh->nlmsg_pid);
13737 +       case NETLINK_MSG_GET_DEBUGGING:
13738 +               send_whether_debugging(uhd);
13739 +               return 0;
13740 +       case NETLINK_MSG_READY:
13741 +               if (nlh->nlmsg_len != NLMSG_LENGTH(sizeof(u32))) {
13742 +                       printk(KERN_INFO "Invalid ready mesage.\n");
13743 +                       if (uhd->not_ready)
13744 +                               uhd->not_ready();
13745 +                       return -EINVAL;
13746 +               }
13747 +               return nl_ready(uhd, (u32) *data);
13748 +       case NETLINK_MSG_CLEANUP:
13749 +               toi_netlink_close_complete(uhd);
13750 +               return 0;
13751 +       }
13752 +
13753 +       return -EINVAL;
13754 +}
13755 +
13756 +static void toi_user_rcv_skb(struct sk_buff *skb)
13757 +{
13758 +       int err;
13759 +       struct nlmsghdr *nlh;
13760 +       struct user_helper_data *uhd = uhd_list;
13761 +
13762 +       while (uhd && uhd->netlink_id != skb->sk->sk_protocol)
13763 +               uhd = uhd->next;
13764 +
13765 +       if (!uhd)
13766 +               return;
13767 +
13768 +       while (skb->len >= NLMSG_SPACE(0)) {
13769 +               u32 rlen;
13770 +
13771 +               nlh = (struct nlmsghdr *) skb->data;
13772 +               if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
13773 +                       return;
13774 +
13775 +               rlen = NLMSG_ALIGN(nlh->nlmsg_len);
13776 +               if (rlen > skb->len)
13777 +                       rlen = skb->len;
13778 +
13779 +               err = toi_nl_gen_rcv_msg(uhd, skb, nlh);
13780 +               if (err)
13781 +                       netlink_ack(skb, nlh, err);
13782 +               else if (nlh->nlmsg_flags & NLM_F_ACK)
13783 +                       netlink_ack(skb, nlh, 0);
13784 +               skb_pull(skb, rlen);
13785 +       }
13786 +}
13787 +
13788 +static int netlink_prepare(struct user_helper_data *uhd)
13789 +{
13790 +       uhd->next = uhd_list;
13791 +       uhd_list = uhd;
13792 +
13793 +       uhd->sock_seq = 0x42c0ffee;
13794 +       uhd->nl = netlink_kernel_create(&init_net, uhd->netlink_id, 0,
13795 +                       toi_user_rcv_skb, NULL, THIS_MODULE);
13796 +       if (!uhd->nl) {
13797 +               printk(KERN_INFO "Failed to allocate netlink socket for %s.\n",
13798 +                               uhd->name);
13799 +               return -ENOMEM;
13800 +       }
13801 +
13802 +       toi_fill_skb_pool(uhd);
13803 +
13804 +       return 0;
13805 +}
13806 +
13807 +void toi_netlink_close(struct user_helper_data *uhd)
13808 +{
13809 +       struct task_struct *t;
13810 +
13811 +       read_lock(&tasklist_lock);
13812 +       t = find_task_by_pid_type_ns(PIDTYPE_PID, uhd->pid, &init_pid_ns);
13813 +       if (t)
13814 +               t->flags &= ~PF_NOFREEZE;
13815 +       read_unlock(&tasklist_lock);
13816 +
13817 +       toi_send_netlink_message(uhd, NETLINK_MSG_CLEANUP, NULL, 0);
13818 +}
13819 +EXPORT_SYMBOL_GPL(toi_netlink_close);
13820 +
13821 +int toi_netlink_setup(struct user_helper_data *uhd)
13822 +{
13823 +       /* In case userui didn't cleanup properly on us */
13824 +       toi_netlink_close_complete(uhd);
13825 +
13826 +       if (netlink_prepare(uhd) < 0) {
13827 +               printk(KERN_INFO "Netlink prepare failed.\n");
13828 +               return 1;
13829 +       }
13830 +
13831 +       if (toi_launch_userspace_program(uhd->program, uhd->netlink_id,
13832 +                               UMH_WAIT_EXEC, uhd->debug) < 0) {
13833 +               printk(KERN_INFO "Launch userspace program failed.\n");
13834 +               toi_netlink_close_complete(uhd);
13835 +               return 1;
13836 +       }
13837 +
13838 +       /* Wait 2 seconds for the userspace process to make contact */
13839 +       wait_for_completion_timeout(&uhd->wait_for_process, 2*HZ);
13840 +
13841 +       if (uhd->pid == -1) {
13842 +               printk(KERN_INFO "%s: Failed to contact userspace process.\n",
13843 +                               uhd->name);
13844 +               toi_netlink_close_complete(uhd);
13845 +               return 1;
13846 +       }
13847 +
13848 +       return 0;
13849 +}
13850 +EXPORT_SYMBOL_GPL(toi_netlink_setup);
13851 diff --git a/kernel/power/tuxonice_netlink.h b/kernel/power/tuxonice_netlink.h
13852 new file mode 100644
13853 index 0000000..37e174b
13854 --- /dev/null
13855 +++ b/kernel/power/tuxonice_netlink.h
13856 @@ -0,0 +1,62 @@
13857 +/*
13858 + * kernel/power/tuxonice_netlink.h
13859 + *
13860 + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net)
13861 + *
13862 + * This file is released under the GPLv2.
13863 + *
13864 + * Declarations for functions for communicating with a userspace helper
13865 + * via netlink.
13866 + */
13867 +
13868 +#include <linux/netlink.h>
13869 +#include <net/sock.h>
13870 +
13871 +#define NETLINK_MSG_BASE 0x10
13872 +
13873 +#define NETLINK_MSG_READY 0x10
13874 +#define        NETLINK_MSG_NOFREEZE_ME 0x16
13875 +#define NETLINK_MSG_GET_DEBUGGING 0x19
13876 +#define NETLINK_MSG_CLEANUP 0x24
13877 +#define NETLINK_MSG_NOFREEZE_ACK 0x27
13878 +#define NETLINK_MSG_IS_DEBUGGING 0x28
13879 +
13880 +struct user_helper_data {
13881 +       int (*rcv_msg) (struct sk_buff *skb, struct nlmsghdr *nlh);
13882 +       void (*not_ready) (void);
13883 +       struct sock *nl;
13884 +       u32 sock_seq;
13885 +       pid_t pid;
13886 +       char *comm;
13887 +       char program[256];
13888 +       int pool_level;
13889 +       int pool_limit;
13890 +       struct sk_buff *emerg_skbs;
13891 +       int skb_size;
13892 +       int netlink_id;
13893 +       char *name;
13894 +       struct user_helper_data *next;
13895 +       struct completion wait_for_process;
13896 +       u32 interface_version;
13897 +       int must_init;
13898 +       int debug;
13899 +};
13900 +
13901 +#ifdef CONFIG_NET
13902 +int toi_netlink_setup(struct user_helper_data *uhd);
13903 +void toi_netlink_close(struct user_helper_data *uhd);
13904 +void toi_send_netlink_message(struct user_helper_data *uhd,
13905 +               int type, void *params, size_t len);
13906 +void toi_netlink_close_complete(struct user_helper_data *uhd);
13907 +#else
13908 +static inline int toi_netlink_setup(struct user_helper_data *uhd)
13909 +{
13910 +       return 0;
13911 +}
13912 +
13913 +static inline void toi_netlink_close(struct user_helper_data *uhd) { };
13914 +static inline void toi_send_netlink_message(struct user_helper_data *uhd,
13915 +               int type, void *params, size_t len) { };
13916 +static inline void toi_netlink_close_complete(struct user_helper_data *uhd)
13917 +       { };
13918 +#endif
13919 diff --git a/kernel/power/tuxonice_pagedir.c b/kernel/power/tuxonice_pagedir.c
13920 new file mode 100644
13921 index 0000000..934bb27
13922 --- /dev/null
13923 +++ b/kernel/power/tuxonice_pagedir.c
13924 @@ -0,0 +1,361 @@
13925 +/*
13926 + * kernel/power/tuxonice_pagedir.c
13927 + *
13928 + * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
13929 + * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz>
13930 + * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr>
13931 + * Copyright (C) 2006-2008 Nigel Cunningham (nigel at tuxonice net)
13932 + *
13933 + * This file is released under the GPLv2.
13934 + *
13935 + * Routines for handling pagesets.
13936 + * Note that pbes aren't actually stored as such. They're stored as
13937 + * bitmaps and extents.
13938 + */
13939 +
13940 +#include <linux/suspend.h>
13941 +#include <linux/highmem.h>
13942 +#include <linux/bootmem.h>
13943 +#include <linux/hardirq.h>
13944 +#include <linux/sched.h>
13945 +#include <asm/tlbflush.h>
13946 +
13947 +#include "tuxonice_pageflags.h"
13948 +#include "tuxonice_ui.h"
13949 +#include "tuxonice_pagedir.h"
13950 +#include "tuxonice_prepare_image.h"
13951 +#include "tuxonice.h"
13952 +#include "power.h"
13953 +#include "tuxonice_builtin.h"
13954 +#include "tuxonice_alloc.h"
13955 +
13956 +static int ptoi_pfn;
13957 +static struct pbe *this_low_pbe;
13958 +static struct pbe **last_low_pbe_ptr;
13959 +static struct memory_bitmap dup_map1, dup_map2;
13960 +
13961 +void toi_reset_alt_image_pageset2_pfn(void)
13962 +{
13963 +       memory_bm_position_reset(&pageset2_map);
13964 +}
13965 +
13966 +static struct page *first_conflicting_page;
13967 +
13968 +/*
13969 + * free_conflicting_pages
13970 + */
13971 +
13972 +static void free_conflicting_pages(void)
13973 +{
13974 +       while (first_conflicting_page) {
13975 +               struct page *next =
13976 +                       *((struct page **) kmap(first_conflicting_page));
13977 +               kunmap(first_conflicting_page);
13978 +               toi__free_page(29, first_conflicting_page);
13979 +               first_conflicting_page = next;
13980 +       }
13981 +}
13982 +
13983 +/* __toi_get_nonconflicting_page
13984 + *
13985 + * Description: Gets order zero pages that won't be overwritten
13986 + *             while copying the original pages.
13987 + */
13988 +
13989 +struct page *___toi_get_nonconflicting_page(int can_be_highmem)
13990 +{
13991 +       struct page *page;
13992 +       gfp_t flags = TOI_ATOMIC_GFP;
13993 +       if (can_be_highmem)
13994 +               flags |= __GFP_HIGHMEM;
13995 +
13996 +
13997 +       if (test_toi_state(TOI_LOADING_ALT_IMAGE) &&
13998 +                       pageset2_map.zone_bm_list &&
13999 +                       (ptoi_pfn != BM_END_OF_MAP)) {
14000 +               do {
14001 +                       ptoi_pfn = memory_bm_next_pfn(&pageset2_map);
14002 +                       if (ptoi_pfn != BM_END_OF_MAP) {
14003 +                               page = pfn_to_page(ptoi_pfn);
14004 +                               if (!PagePageset1(page) &&
14005 +                                   (can_be_highmem || !PageHighMem(page)))
14006 +                                       return page;
14007 +                       }
14008 +               } while (ptoi_pfn != BM_END_OF_MAP);
14009 +       }
14010 +
14011 +       do {
14012 +               page = toi_alloc_page(29, flags);
14013 +               if (!page) {
14014 +                       printk(KERN_INFO "Failed to get nonconflicting "
14015 +                                       "page.\n");
14016 +                       return NULL;
14017 +               }
14018 +               if (PagePageset1(page)) {
14019 +                       struct page **next = (struct page **) kmap(page);
14020 +                       *next = first_conflicting_page;
14021 +                       first_conflicting_page = page;
14022 +                       kunmap(page);
14023 +               }
14024 +       } while (PagePageset1(page));
14025 +
14026 +       return page;
14027 +}
14028 +
14029 +unsigned long __toi_get_nonconflicting_page(void)
14030 +{
14031 +       struct page *page = ___toi_get_nonconflicting_page(0);
14032 +       return page ? (unsigned long) page_address(page) : 0;
14033 +}
14034 +
14035 +static struct pbe *get_next_pbe(struct page **page_ptr, struct pbe *this_pbe,
14036 +               int highmem)
14037 +{
14038 +       if (((((unsigned long) this_pbe) & (PAGE_SIZE - 1))
14039 +                    + 2 * sizeof(struct pbe)) > PAGE_SIZE) {
14040 +               struct page *new_page =
14041 +                       ___toi_get_nonconflicting_page(highmem);
14042 +               if (!new_page)
14043 +                       return ERR_PTR(-ENOMEM);
14044 +               this_pbe = (struct pbe *) kmap(new_page);
14045 +               memset(this_pbe, 0, PAGE_SIZE);
14046 +               *page_ptr = new_page;
14047 +       } else
14048 +               this_pbe++;
14049 +
14050 +       return this_pbe;
14051 +}
14052 +
14053 +/* get_pageset1_load_addresses
14054 + *
14055 + * Description: We check here that pagedir & pages it points to won't collide
14056 + *             with pages where we're going to restore from the loaded pages
14057 + *             later.
14058 + * Returns:    Zero on success, one if couldn't find enough pages (shouldn't
14059 + *             happen).
14060 + */
14061 +
14062 +int toi_get_pageset1_load_addresses(void)
14063 +{
14064 +       int pfn, highallocd = 0, lowallocd = 0;
14065 +       int low_needed = pagedir1.size - get_highmem_size(pagedir1);
14066 +       int high_needed = get_highmem_size(pagedir1);
14067 +       int low_pages_for_highmem = 0;
14068 +       gfp_t flags = GFP_ATOMIC | __GFP_NOWARN | __GFP_HIGHMEM;
14069 +       struct page *page, *high_pbe_page = NULL, *last_high_pbe_page = NULL,
14070 +                   *low_pbe_page;
14071 +       struct pbe **last_high_pbe_ptr = &restore_highmem_pblist,
14072 +                  *this_high_pbe = NULL;
14073 +       int orig_low_pfn, orig_high_pfn;
14074 +       int high_pbes_done = 0, low_pbes_done = 0;
14075 +       int low_direct = 0, high_direct = 0;
14076 +       int high_to_free, low_to_free, result = 0;
14077 +
14078 +       /*
14079 +        * We need to duplicate pageset1's map because memory_bm_next_pfn's state
14080 +        * gets stomped on by the PagePageset1() test in setup_pbes.
14081 +        */
14082 +       memory_bm_create(&dup_map1, GFP_KERNEL, 0);
14083 +       memory_bm_dup(&pageset1_map, &dup_map1);
14084 +
14085 +       memory_bm_create(&dup_map2, GFP_KERNEL, 0);
14086 +       memory_bm_dup(&pageset1_map, &dup_map2);
14087 +
14088 +       memory_bm_position_reset(&pageset1_map);
14089 +       memory_bm_position_reset(&dup_map1);
14090 +       memory_bm_position_reset(&dup_map2);
14091 +
14092 +       last_low_pbe_ptr = &restore_pblist;
14093 +
14094 +       /* First, allocate pages for the start of our pbe lists. */
14095 +       if (high_needed) {
14096 +               high_pbe_page = ___toi_get_nonconflicting_page(1);
14097 +               if (!high_pbe_page) {
14098 +                       result = -ENOMEM;
14099 +                       goto out;
14100 +               }
14101 +               this_high_pbe = (struct pbe *) kmap(high_pbe_page);
14102 +               memset(this_high_pbe, 0, PAGE_SIZE);
14103 +       }
14104 +
14105 +       low_pbe_page = ___toi_get_nonconflicting_page(0);
14106 +       if (!low_pbe_page) {
14107 +               result = -ENOMEM;
14108 +               goto out;
14109 +       }
14110 +       this_low_pbe = (struct pbe *) page_address(low_pbe_page);
14111 +
14112 +       /*
14113 +        * Next, allocate all possible memory to find where we can
14114 +        * load data directly into destination pages. I'd like to do
14115 +        * this in bigger chunks, but then we can't free pages
14116 +        * individually later.
14117 +        */
14118 +
14119 +       do {
14120 +               page = toi_alloc_page(30, flags);
14121 +               if (page)
14122 +                       SetPagePageset1Copy(page);
14123 +       } while (page);
14124 +
14125 +       /*
14126 +        * Find out how many high- and lowmem pages we allocated above,
14127 +        * and how many pages we can reload directly to their original
14128 +        * location.
14129 +        */
14130 +       BITMAP_FOR_EACH_SET(pageset1_copy_map, pfn) {
14131 +               int is_high;
14132 +               page = pfn_to_page(pfn);
14133 +               is_high = PageHighMem(page);
14134 +
14135 +               if (PagePageset1(page)) {
14136 +                       if (test_action_state(TOI_NO_DIRECT_LOAD)) {
14137 +                               ClearPagePageset1Copy(page);
14138 +                               toi__free_page(30, page);
14139 +                               continue;
14140 +                       } else {
14141 +                               if (is_high)
14142 +                                       high_direct++;
14143 +                               else
14144 +                                       low_direct++;
14145 +                       }
14146 +               } else {
14147 +                       if (is_high)
14148 +                               highallocd++;
14149 +                       else
14150 +                               lowallocd++;
14151 +               }
14152 +       }
14153 +
14154 +       high_needed -= high_direct;
14155 +       low_needed -= low_direct;
14156 +
14157 +       /*
14158 +        * Do we need to use some lowmem pages for the copies of highmem
14159 +        * pages?
14160 +        */
14161 +       if (high_needed > highallocd) {
14162 +               low_pages_for_highmem = high_needed - highallocd;
14163 +               high_needed -= low_pages_for_highmem;
14164 +               low_needed += low_pages_for_highmem;
14165 +       }
14166 +
14167 +       high_to_free = highallocd - high_needed;
14168 +       low_to_free = lowallocd - low_needed;
14169 +
14170 +       /*
14171 +        * Now generate our pbes (which will be used for the atomic restore),
14172 +        * and free unneeded pages.
14173 +        */
14174 +       BITMAP_FOR_EACH_SET(pageset1_copy_map, pfn) {
14175 +               int is_high;
14176 +               page = pfn_to_page(pfn);
14177 +               is_high = PageHighMem(page);
14178 +
14179 +               if (PagePageset1(page))
14180 +                       continue;
14181 +
14182 +               /* Free the page? */
14183 +               if ((is_high && high_to_free) ||
14184 +                   (!is_high && low_to_free)) {
14185 +                       ClearPagePageset1Copy(page);
14186 +                       toi__free_page(30, page);
14187 +                       if (is_high)
14188 +                               high_to_free--;
14189 +                       else
14190 +                               low_to_free--;
14191 +                       continue;
14192 +               }
14193 +
14194 +               /* Nope. We're going to use this page. Add a pbe. */
14195 +               if (is_high || low_pages_for_highmem) {
14196 +                       struct page *orig_page;
14197 +                       high_pbes_done++;
14198 +                       if (!is_high)
14199 +                               low_pages_for_highmem--;
14200 +                       do {
14201 +                               orig_high_pfn = memory_bm_next_pfn(&dup_map1);
14202 +                               BUG_ON(orig_high_pfn == BM_END_OF_MAP);
14203 +                               orig_page = pfn_to_page(orig_high_pfn);
14204 +                       } while (!PageHighMem(orig_page) ||
14205 +                                       load_direct(orig_page));
14206 +
14207 +                       this_high_pbe->orig_address = orig_page;
14208 +                       this_high_pbe->address = page;
14209 +                       this_high_pbe->next = NULL;
14210 +                       if (last_high_pbe_page != high_pbe_page) {
14211 +                               *last_high_pbe_ptr =
14212 +                                       (struct pbe *) high_pbe_page;
14213 +                               if (!last_high_pbe_page)
14214 +                                       last_high_pbe_page = high_pbe_page;
14215 +                       } else
14216 +                               *last_high_pbe_ptr = this_high_pbe;
14217 +                       last_high_pbe_ptr = &this_high_pbe->next;
14218 +                       if (last_high_pbe_page != high_pbe_page) {
14219 +                               kunmap(last_high_pbe_page);
14220 +                               last_high_pbe_page = high_pbe_page;
14221 +                       }
14222 +                       this_high_pbe = get_next_pbe(&high_pbe_page,
14223 +                                       this_high_pbe, 1);
14224 +                       if (IS_ERR(this_high_pbe)) {
14225 +                               printk(KERN_INFO
14226 +                                               "This high pbe is an error.\n");
14227 +                               return -ENOMEM;
14228 +                       }
14229 +               } else {
14230 +                       struct page *orig_page;
14231 +                       low_pbes_done++;
14232 +                       do {
14233 +                               orig_low_pfn = memory_bm_next_pfn(&dup_map2);
14234 +                               BUG_ON(orig_low_pfn == BM_END_OF_MAP);
14235 +                               orig_page = pfn_to_page(orig_low_pfn);
14236 +                       } while (PageHighMem(orig_page) ||
14237 +                                       load_direct(orig_page));
14238 +
14239 +                       this_low_pbe->orig_address = page_address(orig_page);
14240 +                       this_low_pbe->address = page_address(page);
14241 +                       this_low_pbe->next = NULL;
14242 +                       *last_low_pbe_ptr = this_low_pbe;
14243 +                       last_low_pbe_ptr = &this_low_pbe->next;
14244 +                       this_low_pbe = get_next_pbe(&low_pbe_page,
14245 +                                       this_low_pbe, 0);
14246 +                       if (IS_ERR(this_low_pbe)) {
14247 +                               printk(KERN_INFO "this_low_pbe is an error.\n");
14248 +                               return -ENOMEM;
14249 +                       }
14250 +               }
14251 +       }
14252 +
14253 +       if (high_pbe_page)
14254 +               kunmap(high_pbe_page);
14255 +
14256 +       if (last_high_pbe_page != high_pbe_page) {
14257 +               if (last_high_pbe_page)
14258 +                       kunmap(last_high_pbe_page);
14259 +               toi__free_page(29, high_pbe_page);
14260 +       }
14261 +
14262 +       free_conflicting_pages();
14263 +
14264 +out:
14265 +       memory_bm_free(&dup_map1, 0);
14266 +       memory_bm_free(&dup_map2, 0);
14267 +       return result;
14268 +}
14269 +
14270 +int add_boot_kernel_data_pbe(void)
14271 +{
14272 +       this_low_pbe->address = (char *) __toi_get_nonconflicting_page();
14273 +       if (!this_low_pbe->address) {
14274 +               printk(KERN_INFO "Failed to get bkd atomic restore buffer.");
14275 +               return -ENOMEM;
14276 +       }
14277 +
14278 +       toi_bkd.size = sizeof(toi_bkd);
14279 +       memcpy(this_low_pbe->address, &toi_bkd, sizeof(toi_bkd));
14280 +
14281 +       *last_low_pbe_ptr = this_low_pbe;
14282 +       this_low_pbe->orig_address = (char *) boot_kernel_data_buffer;
14283 +       this_low_pbe->next = NULL;
14284 +       return 0;
14285 +}
14286 diff --git a/kernel/power/tuxonice_pagedir.h b/kernel/power/tuxonice_pagedir.h
14287 new file mode 100644
14288 index 0000000..9d0d929
14289 --- /dev/null
14290 +++ b/kernel/power/tuxonice_pagedir.h
14291 @@ -0,0 +1,50 @@
14292 +/*
14293 + * kernel/power/tuxonice_pagedir.h
14294 + *
14295 + * Copyright (C) 2006-2008 Nigel Cunningham (nigel at tuxonice net)
14296 + *
14297 + * This file is released under the GPLv2.
14298 + *
14299 + * Declarations for routines for handling pagesets.
14300 + */
14301 +
14302 +#ifndef KERNEL_POWER_PAGEDIR_H
14303 +#define KERNEL_POWER_PAGEDIR_H
14304 +
14305 +/* Pagedir
14306 + *
14307 + * Contains the metadata for a set of pages saved in the image.
14308 + */
14309 +
14310 +struct pagedir {
14311 +       int id;
14312 +       long size;
14313 +#ifdef CONFIG_HIGHMEM
14314 +       long size_high;
14315 +#endif
14316 +};
14317 +
14318 +#ifdef CONFIG_HIGHMEM
14319 +#define get_highmem_size(pagedir) (pagedir.size_high)
14320 +#define set_highmem_size(pagedir, sz) do { pagedir.size_high = sz; } while (0)
14321 +#define inc_highmem_size(pagedir) do { pagedir.size_high++; } while (0)
14322 +#define get_lowmem_size(pagedir) (pagedir.size - pagedir.size_high)
14323 +#else
14324 +#define get_highmem_size(pagedir) (0)
14325 +#define set_highmem_size(pagedir, sz) do { } while (0)
14326 +#define inc_highmem_size(pagedir) do { } while (0)
14327 +#define get_lowmem_size(pagedir) (pagedir.size)
14328 +#endif
14329 +
14330 +extern struct pagedir pagedir1, pagedir2;
14331 +
14332 +extern void toi_copy_pageset1(void);
14333 +
14334 +extern int toi_get_pageset1_load_addresses(void);
14335 +
14336 +extern unsigned long __toi_get_nonconflicting_page(void);
14337 +struct page *___toi_get_nonconflicting_page(int can_be_highmem);
14338 +
14339 +extern void toi_reset_alt_image_pageset2_pfn(void);
14340 +extern int add_boot_kernel_data_pbe(void);
14341 +#endif
14342 diff --git a/kernel/power/tuxonice_pageflags.c b/kernel/power/tuxonice_pageflags.c
14343 new file mode 100644
14344 index 0000000..23e1873
14345 --- /dev/null
14346 +++ b/kernel/power/tuxonice_pageflags.c
14347 @@ -0,0 +1,46 @@
14348 +/*
14349 + * kernel/power/tuxonice_pageflags.c
14350 + *
14351 + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net)
14352 + *
14353 + * This file is released under the GPLv2.
14354 + *
14355 + * Routines for serialising and relocating pageflags in which we
14356 + * store our image metadata.
14357 + */
14358 +
14359 +#include <linux/kernel.h>
14360 +#include <linux/mm.h>
14361 +#include <linux/module.h>
14362 +#include <linux/bitops.h>
14363 +#include <linux/list.h>
14364 +#include <linux/suspend.h>
14365 +#include "tuxonice_pageflags.h"
14366 +#include "tuxonice_modules.h"
14367 +#include "tuxonice_pagedir.h"
14368 +#include "tuxonice.h"
14369 +
14370 +struct memory_bitmap pageset2_map;
14371 +struct memory_bitmap page_resave_map;
14372 +struct memory_bitmap io_map;
14373 +struct memory_bitmap nosave_map;
14374 +struct memory_bitmap free_map;
14375 +
14376 +int toi_pageflags_space_needed(void)
14377 +{
14378 +       int total = 0;
14379 +       struct zone_bitmap *zone_bm;
14380 +       struct bm_block *bb;
14381 +
14382 +       total = sizeof(unsigned int);
14383 +
14384 +       for (zone_bm = pageset1_map.zone_bm_list; zone_bm;
14385 +                       zone_bm = zone_bm->next) {
14386 +               total += 2 * sizeof(unsigned long) + sizeof(unsigned int);
14387 +
14388 +               for (bb = zone_bm->bm_blocks; bb; bb = bb->next)
14389 +                       total += PAGE_SIZE;
14390 +       }
14391 +
14392 +       return total;
14393 +}
14394 diff --git a/kernel/power/tuxonice_pageflags.h b/kernel/power/tuxonice_pageflags.h
14395 new file mode 100644
14396 index 0000000..84142d2
14397 --- /dev/null
14398 +++ b/kernel/power/tuxonice_pageflags.h
14399 @@ -0,0 +1,62 @@
14400 +/*
14401 + * kernel/power/tuxonice_pageflags.h
14402 + *
14403 + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net)
14404 + *
14405 + * This file is released under the GPLv2.
14406 + */
14407 +
14408 +#ifndef KERNEL_POWER_TUXONICE_PAGEFLAGS_H
14409 +#define KERNEL_POWER_TUXONICE_PAGEFLAGS_H
14410 +
14411 +#include "power.h"
14412 +
14413 +extern struct memory_bitmap pageset1_map;
14414 +extern struct memory_bitmap pageset1_copy_map;
14415 +extern struct memory_bitmap pageset2_map;
14416 +extern struct memory_bitmap page_resave_map;
14417 +extern struct memory_bitmap io_map;
14418 +extern struct memory_bitmap nosave_map;
14419 +extern struct memory_bitmap free_map;
14420 +
14421 +#define PagePageset1(page) (memory_bm_test_bit(&pageset1_map, page_to_pfn(page)))
14422 +#define SetPagePageset1(page) (memory_bm_set_bit(&pageset1_map, page_to_pfn(page)))
14423 +#define ClearPagePageset1(page) (memory_bm_clear_bit(&pageset1_map, page_to_pfn(page)))
14424 +
14425 +#define PagePageset1Copy(page) (memory_bm_test_bit(&pageset1_copy_map, page_to_pfn(page)))
14426 +#define SetPagePageset1Copy(page) (memory_bm_set_bit(&pageset1_copy_map, page_to_pfn(page)))
14427 +#define ClearPagePageset1Copy(page) \
14428 +       (memory_bm_clear_bit(&pageset1_copy_map, page_to_pfn(page)))
14429 +
14430 +#define PagePageset2(page) (memory_bm_test_bit(&pageset2_map, page_to_pfn(page)))
14431 +#define SetPagePageset2(page) (memory_bm_set_bit(&pageset2_map, page_to_pfn(page)))
14432 +#define ClearPagePageset2(page) (memory_bm_clear_bit(&pageset2_map, page_to_pfn(page)))
14433 +
14434 +#define PageWasRW(page) (memory_bm_test_bit(&pageset2_map, page_to_pfn(page)))
14435 +#define SetPageWasRW(page) (memory_bm_set_bit(&pageset2_map, page_to_pfn(page)))
14436 +#define ClearPageWasRW(page) (memory_bm_clear_bit(&pageset2_map, page_to_pfn(page)))
14437 +
14438 +#define PageResave(page) (page_resave_map.zone_bm_list ? \
14439 +       memory_bm_test_bit(&page_resave_map, page_to_pfn(page)) : 0)
14440 +#define SetPageResave(page) (memory_bm_set_bit(&page_resave_map, page_to_pfn(page)))
14441 +#define ClearPageResave(page) (memory_bm_clear_bit(&page_resave_map, page_to_pfn(page)))
14442 +
14443 +#define PageNosave(page) (nosave_map.zone_bm_list ? \
14444 +               memory_bm_test_bit(&nosave_map, page_to_pfn(page)) : 0)
14445 +#define SetPageNosave(page) (memory_bm_set_bit(&nosave_map, page_to_pfn(page)))
14446 +#define ClearPageNosave(page) (memory_bm_clear_bit(&nosave_map, page_to_pfn(page)))
14447 +
14448 +#define PageNosaveFree(page) (free_map.zone_bm_list ? \
14449 +               memory_bm_test_bit(&free_map, page_to_pfn(page)) : 0)
14450 +#define SetPageNosaveFree(page) (memory_bm_set_bit(&free_map, page_to_pfn(page)))
14451 +#define ClearPageNosaveFree(page) (memory_bm_clear_bit(&free_map, page_to_pfn(page)))
14452 +
14453 +extern void save_pageflags(struct memory_bitmap *pagemap);
14454 +extern int load_pageflags(struct memory_bitmap *pagemap);
14455 +extern int toi_pageflags_space_needed(void);
14456 +
14457 +#define BITMAP_FOR_EACH_SET(map, pfn) \
14458 +       memory_bm_position_reset(&map); \
14459 +       for (pfn = memory_bm_next_pfn(&map); pfn != BM_END_OF_MAP; \
14460 +               pfn = memory_bm_next_pfn(&map))
14461 +#endif
14462 diff --git a/kernel/power/tuxonice_power_off.c b/kernel/power/tuxonice_power_off.c
14463 new file mode 100644
14464 index 0000000..d1d5f07
14465 --- /dev/null
14466 +++ b/kernel/power/tuxonice_power_off.c
14467 @@ -0,0 +1,281 @@
14468 +/*
14469 + * kernel/power/tuxonice_power_off.c
14470 + *
14471 + * Copyright (C) 2006-2008 Nigel Cunningham (nigel at tuxonice net)
14472 + *
14473 + * This file is released under the GPLv2.
14474 + *
14475 + * Support for powering down.
14476 + */
14477 +
14478 +#include <linux/device.h>
14479 +#include <linux/suspend.h>
14480 +#include <linux/mm.h>
14481 +#include <linux/pm.h>
14482 +#include <linux/reboot.h>
14483 +#include <linux/cpu.h>
14484 +#include <linux/console.h>
14485 +#include <linux/fs.h>
14486 +#include "tuxonice.h"
14487 +#include "tuxonice_ui.h"
14488 +#include "tuxonice_power_off.h"
14489 +#include "tuxonice_sysfs.h"
14490 +#include "tuxonice_modules.h"
14491 +#include "tuxonice_io.h"
14492 +
14493 +unsigned long toi_poweroff_method; /* 0 - Kernel power off */
14494 +EXPORT_SYMBOL_GPL(toi_poweroff_method);
14495 +
14496 +static int wake_delay;
14497 +static char lid_state_file[256], wake_alarm_dir[256];
14498 +static struct file *lid_file, *alarm_file, *epoch_file;
14499 +static int post_wake_state = -1;
14500 +
14501 +static int did_suspend_to_both;
14502 +
14503 +/*
14504 + * __toi_power_down
14505 + * Functionality   : Powers down or reboots the computer once the image
14506 + *                   has been written to disk.
14507 + * Key Assumptions : Able to reboot/power down via code called or that
14508 + *                   the warning emitted if the calls fail will be visible
14509 + *                   to the user (ie printk resumes devices).
14510 + */
14511 +
14512 +static void __toi_power_down(int method)
14513 +{
14514 +       int error;
14515 +
14516 +       toi_cond_pause(1, test_action_state(TOI_REBOOT) ? "Ready to reboot." :
14517 +                       "Powering down.");
14518 +
14519 +       if (test_result_state(TOI_ABORTED))
14520 +               goto out;
14521 +
14522 +       if (test_action_state(TOI_REBOOT))
14523 +               kernel_restart(NULL);
14524 +
14525 +       switch (method) {
14526 +       case 0:
14527 +               break;
14528 +       case 3:
14529 +               /*
14530 +                * Re-read the overwritten part of pageset2 to make post-resume
14531 +                * faster.
14532 +                */
14533 +               if (read_pageset2(1))
14534 +                       panic("Attempt to reload pagedir 2 failed. Try rebooting.");
14535 +
14536 +               error = pm_notifier_call_chain(PM_SUSPEND_PREPARE);
14537 +               if (!error) {
14538 +                       error = suspend_devices_and_enter(PM_SUSPEND_MEM);
14539 +                       if (!error)
14540 +                               did_suspend_to_both = 1;
14541 +               }
14542 +               pm_notifier_call_chain(PM_POST_SUSPEND);
14543 +
14544 +               /* Success - we're now post-resume-from-ram */
14545 +               if (did_suspend_to_both)
14546 +                       return;
14547 +
14548 +               /* Failed to suspend to ram - do normal power off */
14549 +               break;
14550 +       case 4:
14551 +               /*
14552 +                * If succeeds, doesn't return. If fails, do a simple
14553 +                * powerdown.
14554 +                */
14555 +               hibernation_platform_enter();
14556 +               break;
14557 +       case 5:
14558 +               /* Historic entry only now */
14559 +               break;
14560 +       }
14561 +
14562 +       if (method && method != 5)
14563 +               toi_cond_pause(1,
14564 +                       "Falling back to alternate power off method.");
14565 +
14566 +       if (test_result_state(TOI_ABORTED))
14567 +               goto out;
14568 +
14569 +       kernel_power_off();
14570 +       kernel_halt();
14571 +       toi_cond_pause(1, "Powerdown failed.");
14572 +       while (1)
14573 +               cpu_relax();
14574 +
14575 +out:
14576 +       if (read_pageset2(1))
14577 +               panic("Attempt to reload pagedir 2 failed. Try rebooting.");
14578 +       return;
14579 +}
14580 +
14581 +#define CLOSE_FILE(file) \
14582 +       if (file) { \
14583 +               filp_close(file, NULL); file = NULL; \
14584 +       }
14585 +
14586 +static void powerdown_cleanup(int toi_or_resume)
14587 +{
14588 +       if (!toi_or_resume)
14589 +               return;
14590 +
14591 +       CLOSE_FILE(lid_file);
14592 +       CLOSE_FILE(alarm_file);
14593 +       CLOSE_FILE(epoch_file);
14594 +}
14595 +
14596 +static void open_file(char *format, char *arg, struct file **var, int mode,
14597 +               char *desc)
14598 +{
14599 +       char buf[256];
14600 +
14601 +       if (strlen(arg)) {
14602 +               sprintf(buf, format, arg);
14603 +               *var = filp_open(buf, mode, 0);
14604 +               if (IS_ERR(*var) || !*var) {
14605 +                       printk(KERN_INFO "Failed to open %s file '%s' (%p).\n",
14606 +                               desc, buf, *var);
14607 +                       *var = NULL;
14608 +               }
14609 +       }
14610 +}
14611 +
14612 +static int powerdown_init(int toi_or_resume)
14613 +{
14614 +       if (!toi_or_resume)
14615 +               return 0;
14616 +
14617 +       did_suspend_to_both = 0;
14618 +
14619 +       open_file("/proc/acpi/button/%s/state", lid_state_file, &lid_file,
14620 +                       O_RDONLY, "lid");
14621 +
14622 +       if (strlen(wake_alarm_dir)) {
14623 +               open_file("/sys/class/rtc/%s/wakealarm", wake_alarm_dir,
14624 +                               &alarm_file, O_WRONLY, "alarm");
14625 +
14626 +               open_file("/sys/class/rtc/%s/since_epoch", wake_alarm_dir,
14627 +                               &epoch_file, O_RDONLY, "epoch");
14628 +       }
14629 +
14630 +       return 0;
14631 +}
14632 +
14633 +static int lid_closed(void)
14634 +{
14635 +       char array[25];
14636 +       ssize_t size;
14637 +       loff_t pos = 0;
14638 +
14639 +       if (!lid_file)
14640 +               return 0;
14641 +
14642 +       size = vfs_read(lid_file, (char __user *) array, 25, &pos);
14643 +       if ((int) size < 1) {
14644 +               printk(KERN_INFO "Failed to read lid state file (%d).\n",
14645 +                       (int) size);
14646 +               return 0;
14647 +       }
14648 +
14649 +       if (!strcmp(array, "state:      closed\n"))
14650 +               return 1;
14651 +
14652 +       return 0;
14653 +}
14654 +
14655 +static void write_alarm_file(int value)
14656 +{
14657 +       ssize_t size;
14658 +       char buf[40];
14659 +       loff_t pos = 0;
14660 +
14661 +       if (!alarm_file)
14662 +               return;
14663 +
14664 +       sprintf(buf, "%d\n", value);
14665 +
14666 +       size = vfs_write(alarm_file, (char __user *)buf, strlen(buf), &pos);
14667 +
14668 +       if (size < 0)
14669 +               printk(KERN_INFO "Error %d writing alarm value %s.\n",
14670 +                               (int) size, buf);
14671 +}
14672 +
14673 +/**
14674 + * toi_check_resleep: See whether to powerdown again after waking.
14675 + *
14676 + * After waking, check whether we should powerdown again in a (usually
14677 + * different) way. We only do this if the lid switch is still closed.
14678 + */
14679 +void toi_check_resleep(void)
14680 +{
14681 +       /* We only return if we suspended to ram and woke. */
14682 +       if (lid_closed() && post_wake_state >= 0)
14683 +               __toi_power_down(post_wake_state);
14684 +}
14685 +
14686 +void toi_power_down(void)
14687 +{
14688 +       if (alarm_file && wake_delay) {
14689 +               char array[25];
14690 +               loff_t pos = 0;
14691 +               size_t size = vfs_read(epoch_file, (char __user *) array, 25,
14692 +                               &pos);
14693 +
14694 +               if (((int) size) < 1)
14695 +                       printk(KERN_INFO "Failed to read epoch file (%d).\n",
14696 +                                       (int) size);
14697 +               else {
14698 +                       unsigned long since_epoch =
14699 +                               simple_strtol(array, NULL, 0);
14700 +
14701 +                       /* Clear any wakeup time. */
14702 +                       write_alarm_file(0);
14703 +
14704 +                       /* Set new wakeup time. */
14705 +                       write_alarm_file(since_epoch + wake_delay);
14706 +               }
14707 +       }
14708 +
14709 +       __toi_power_down(toi_poweroff_method);
14710 +
14711 +       toi_check_resleep();
14712 +}
14713 +EXPORT_SYMBOL_GPL(toi_power_down);
14714 +
14715 +static struct toi_sysfs_data sysfs_params[] = {
14716 +#if defined(CONFIG_ACPI)
14717 +       SYSFS_STRING("lid_file", SYSFS_RW, lid_state_file, 256, 0, NULL),
14718 +       SYSFS_INT("wake_delay", SYSFS_RW, &wake_delay, 0, INT_MAX, 0, NULL),
14719 +       SYSFS_STRING("wake_alarm_dir", SYSFS_RW, wake_alarm_dir, 256, 0, NULL),
14720 +       SYSFS_INT("post_wake_state", SYSFS_RW, &post_wake_state, -1, 5, 0,
14721 +                       NULL),
14722 +       SYSFS_UL("powerdown_method", SYSFS_RW, &toi_poweroff_method, 0, 5, 0),
14723 +       SYSFS_INT("did_suspend_to_both", SYSFS_READONLY, &did_suspend_to_both,
14724 +               0, 0, 0, NULL)
14725 +#endif
14726 +};
14727 +
14728 +static struct toi_module_ops powerdown_ops = {
14729 +       .type                           = MISC_HIDDEN_MODULE,
14730 +       .name                           = "poweroff",
14731 +       .initialise                     = powerdown_init,
14732 +       .cleanup                        = powerdown_cleanup,
14733 +       .directory                      = "[ROOT]",
14734 +       .module                         = THIS_MODULE,
14735 +       .sysfs_data                     = sysfs_params,
14736 +       .num_sysfs_entries              = sizeof(sysfs_params) /
14737 +               sizeof(struct toi_sysfs_data),
14738 +};
14739 +
14740 +int toi_poweroff_init(void)
14741 +{
14742 +       return toi_register_module(&powerdown_ops);
14743 +}
14744 +
14745 +void toi_poweroff_exit(void)
14746 +{
14747 +       toi_unregister_module(&powerdown_ops);
14748 +}
14749 diff --git a/kernel/power/tuxonice_power_off.h b/kernel/power/tuxonice_power_off.h
14750 new file mode 100644
14751 index 0000000..a85633a
14752 --- /dev/null
14753 +++ b/kernel/power/tuxonice_power_off.h
14754 @@ -0,0 +1,24 @@
14755 +/*
14756 + * kernel/power/tuxonice_power_off.h
14757 + *
14758 + * Copyright (C) 2006-2008 Nigel Cunningham (nigel at tuxonice net)
14759 + *
14760 + * This file is released under the GPLv2.
14761 + *
14762 + * Support for the powering down.
14763 + */
14764 +
14765 +int toi_pm_state_finish(void);
14766 +void toi_power_down(void);
14767 +extern unsigned long toi_poweroff_method;
14768 +int toi_poweroff_init(void);
14769 +void toi_poweroff_exit(void);
14770 +void toi_check_resleep(void);
14771 +
14772 +extern int platform_begin(int platform_mode);
14773 +extern int platform_pre_snapshot(int platform_mode);
14774 +extern void platform_leave(int platform_mode);
14775 +extern void platform_end(int platform_mode);
14776 +extern void platform_finish(int platform_mode);
14777 +extern int platform_pre_restore(int platform_mode);
14778 +extern void platform_restore_cleanup(int platform_mode);
14779 diff --git a/kernel/power/tuxonice_prepare_image.c b/kernel/power/tuxonice_prepare_image.c
14780 new file mode 100644
14781 index 0000000..a49adde
14782 --- /dev/null
14783 +++ b/kernel/power/tuxonice_prepare_image.c
14784 @@ -0,0 +1,1041 @@
14785 +/*
14786 + * kernel/power/tuxonice_prepare_image.c
14787 + *
14788 + * Copyright (C) 2003-2008 Nigel Cunningham (nigel at tuxonice net)
14789 + *
14790 + * This file is released under the GPLv2.
14791 + *
14792 + * We need to eat memory until we can:
14793 + * 1. Perform the save without changing anything (RAM_NEEDED < #pages)
14794 + * 2. Fit it all in available space (toiActiveAllocator->available_space() >=
14795 + *    main_storage_needed())
14796 + * 3. Reload the pagedir and pageset1 to places that don't collide with their
14797 + *    final destinations, not knowing to what extent the resumed kernel will
14798 + *    overlap with the one loaded at boot time. I think the resumed kernel
14799 + *    should overlap completely, but I don't want to rely on this as it is
14800 + *    an unproven assumption. We therefore assume there will be no overlap at
14801 + *    all (worse case).
14802 + * 4. Meet the user's requested limit (if any) on the size of the image.
14803 + *    The limit is in MB, so pages/256 (assuming 4K pages).
14804 + *
14805 + */
14806 +
14807 +#include <linux/module.h>
14808 +#include <linux/highmem.h>
14809 +#include <linux/freezer.h>
14810 +#include <linux/hardirq.h>
14811 +#include <linux/mmzone.h>
14812 +#include <linux/console.h>
14813 +
14814 +#include "tuxonice_pageflags.h"
14815 +#include "tuxonice_modules.h"
14816 +#include "tuxonice_io.h"
14817 +#include "tuxonice_ui.h"
14818 +#include "tuxonice_extent.h"
14819 +#include "tuxonice_prepare_image.h"
14820 +#include "tuxonice_block_io.h"
14821 +#include "tuxonice.h"
14822 +#include "tuxonice_checksum.h"
14823 +#include "tuxonice_sysfs.h"
14824 +#include "tuxonice_alloc.h"
14825 +
14826 +static long num_nosave, header_space_allocated, main_storage_allocated,
14827 +          storage_available;
14828 +long extra_pd1_pages_allowance = CONFIG_TOI_DEFAULT_EXTRA_PAGES_ALLOWANCE;
14829 +int image_size_limit;
14830 +static int no_ps2_needed;
14831 +
14832 +struct attention_list {
14833 +       struct task_struct *task;
14834 +       struct attention_list *next;
14835 +};
14836 +
14837 +static struct attention_list *attention_list;
14838 +
14839 +#define PAGESET1 0
14840 +#define PAGESET2 1
14841 +
14842 +void free_attention_list(void)
14843 +{
14844 +       struct attention_list *last = NULL;
14845 +
14846 +       while (attention_list) {
14847 +               last = attention_list;
14848 +               attention_list = attention_list->next;
14849 +               toi_kfree(6, last);
14850 +       }
14851 +}
14852 +
14853 +static int build_attention_list(void)
14854 +{
14855 +       int i, task_count = 0;
14856 +       struct task_struct *p;
14857 +       struct attention_list *next;
14858 +
14859 +       /*
14860 +        * Count all userspace process (with task->mm) marked PF_NOFREEZE.
14861 +        */
14862 +       read_lock(&tasklist_lock);
14863 +       for_each_process(p)
14864 +               if ((p->flags & PF_NOFREEZE) || p == current)
14865 +                       task_count++;
14866 +       read_unlock(&tasklist_lock);
14867 +
14868 +       /*
14869 +        * Allocate attention list structs.
14870 +        */
14871 +       for (i = 0; i < task_count; i++) {
14872 +               struct attention_list *this =
14873 +                       toi_kzalloc(6, sizeof(struct attention_list),
14874 +                                       TOI_WAIT_GFP);
14875 +               if (!this) {
14876 +                       printk(KERN_INFO "Failed to allocate slab for "
14877 +                                       "attention list.\n");
14878 +                       free_attention_list();
14879 +                       return 1;
14880 +               }
14881 +               this->next = NULL;
14882 +               if (attention_list)
14883 +                       this->next = attention_list;
14884 +               attention_list = this;
14885 +       }
14886 +
14887 +       next = attention_list;
14888 +       read_lock(&tasklist_lock);
14889 +       for_each_process(p)
14890 +               if ((p->flags & PF_NOFREEZE) || p == current) {
14891 +                       next->task = p;
14892 +                       next = next->next;
14893 +               }
14894 +       read_unlock(&tasklist_lock);
14895 +       return 0;
14896 +}
14897 +
14898 +static void pageset2_full(void)
14899 +{
14900 +       struct zone *zone;
14901 +       unsigned long flags;
14902 +
14903 +       for_each_zone(zone) {
14904 +               spin_lock_irqsave(&zone->lru_lock, flags);
14905 +               if (zone_page_state(zone, NR_INACTIVE)) {
14906 +                       struct page *page;
14907 +                       list_for_each_entry(page, &zone->inactive_list, lru)
14908 +                               SetPagePageset2(page);
14909 +               }
14910 +               if (zone_page_state(zone, NR_ACTIVE)) {
14911 +                       struct page *page;
14912 +                       list_for_each_entry(page, &zone->active_list, lru)
14913 +                               SetPagePageset2(page);
14914 +               }
14915 +               spin_unlock_irqrestore(&zone->lru_lock, flags);
14916 +       }
14917 +}
14918 +
14919 +/*
14920 + * toi_mark_task_as_pageset
14921 + * Functionality   : Marks all the saveable pages belonging to a given process
14922 + *                  as belonging to a particular pageset.
14923 + */
14924 +
14925 +static void toi_mark_task_as_pageset(struct task_struct *t, int pageset2)
14926 +{
14927 +       struct vm_area_struct *vma;
14928 +       struct mm_struct *mm;
14929 +
14930 +       mm = t->active_mm;
14931 +
14932 +       if (!mm || !mm->mmap)
14933 +               return;
14934 +
14935 +       if (!irqs_disabled())
14936 +               down_read(&mm->mmap_sem);
14937 +
14938 +       for (vma = mm->mmap; vma; vma = vma->vm_next) {
14939 +               unsigned long posn;
14940 +
14941 +               if (vma->vm_flags & (VM_PFNMAP | VM_IO | VM_RESERVED) ||
14942 +                   !vma->vm_start)
14943 +                       continue;
14944 +
14945 +               for (posn = vma->vm_start; posn < vma->vm_end;
14946 +                               posn += PAGE_SIZE) {
14947 +                       struct page *page = follow_page(vma, posn, 0);
14948 +                       if (!page)
14949 +                               continue;
14950 +
14951 +                       if (pageset2)
14952 +                               SetPagePageset2(page);
14953 +                       else {
14954 +                               ClearPagePageset2(page);
14955 +                               SetPagePageset1(page);
14956 +                       }
14957 +               }
14958 +       }
14959 +
14960 +       if (!irqs_disabled())
14961 +               up_read(&mm->mmap_sem);
14962 +}
14963 +
14964 +/* mark_pages_for_pageset2
14965 + *
14966 + * Description:        Mark unshared pages in processes not needed for hibernate as
14967 + *             being able to be written out in a separate pagedir.
14968 + *             HighMem pages are simply marked as pageset2. They won't be
14969 + *             needed during hibernate.
14970 + */
14971 +
14972 +static void toi_mark_pages_for_pageset2(void)
14973 +{
14974 +       struct task_struct *p;
14975 +       struct attention_list *this = attention_list;
14976 +
14977 +       memory_bm_clear(&pageset2_map);
14978 +
14979 +       if (test_action_state(TOI_NO_PAGESET2) || no_ps2_needed)
14980 +               return;
14981 +
14982 +       if (test_action_state(TOI_PAGESET2_FULL))
14983 +               pageset2_full();
14984 +       else {
14985 +               read_lock(&tasklist_lock);
14986 +               for_each_process(p) {
14987 +                       if (!p->mm || (p->flags & PF_KTHREAD))
14988 +                               continue;
14989 +
14990 +                       toi_mark_task_as_pageset(p, PAGESET2);
14991 +               }
14992 +               read_unlock(&tasklist_lock);
14993 +       }
14994 +
14995 +       /*
14996 +        * Because the tasks in attention_list are ones related to hibernating,
14997 +        * we know that they won't go away under us.
14998 +        */
14999 +
15000 +       while (this) {
15001 +               if (!test_result_state(TOI_ABORTED))
15002 +                       toi_mark_task_as_pageset(this->task, PAGESET1);
15003 +               this = this->next;
15004 +       }
15005 +}
15006 +
15007 +/*
15008 + * The atomic copy of pageset1 is stored in pageset2 pages.
15009 + * But if pageset1 is larger (normally only just after boot),
15010 + * we need to allocate extra pages to store the atomic copy.
15011 + * The following data struct and functions are used to handle
15012 + * the allocation and freeing of that memory.
15013 + */
15014 +
15015 +static long extra_pages_allocated;
15016 +
15017 +struct extras {
15018 +       struct page *page;
15019 +       int order;
15020 +       struct extras *next;
15021 +};
15022 +
15023 +static struct extras *extras_list;
15024 +
15025 +/* toi_free_extra_pagedir_memory
15026 + *
15027 + * Description:        Free previously allocated extra pagedir memory.
15028 + */
15029 +void toi_free_extra_pagedir_memory(void)
15030 +{
15031 +       /* Free allocated pages */
15032 +       while (extras_list) {
15033 +               struct extras *this = extras_list;
15034 +               int i;
15035 +
15036 +               extras_list = this->next;
15037 +
15038 +               for (i = 0; i < (1 << this->order); i++)
15039 +                       ClearPageNosave(this->page + i);
15040 +
15041 +               toi_free_pages(9, this->page, this->order);
15042 +               toi_kfree(7, this);
15043 +       }
15044 +
15045 +       extra_pages_allocated = 0;
15046 +}
15047 +
15048 +/* toi_allocate_extra_pagedir_memory
15049 + *
15050 + * Description:        Allocate memory for making the atomic copy of pagedir1 in the
15051 + *             case where it is bigger than pagedir2.
15052 + * Arguments:  int     num_to_alloc: Number of extra pages needed.
15053 + * Result:     int.    Number of extra pages we now have allocated.
15054 + */
15055 +static int toi_allocate_extra_pagedir_memory(int extra_pages_needed)
15056 +{
15057 +       int j, order, num_to_alloc = extra_pages_needed - extra_pages_allocated;
15058 +       gfp_t flags = TOI_ATOMIC_GFP;
15059 +
15060 +       if (num_to_alloc < 1)
15061 +               return 0;
15062 +
15063 +       order = fls(num_to_alloc);
15064 +       if (order >= MAX_ORDER)
15065 +               order = MAX_ORDER - 1;
15066 +
15067 +       while (num_to_alloc) {
15068 +               struct page *newpage;
15069 +               unsigned long virt;
15070 +               struct extras *extras_entry;
15071 +
15072 +               while ((1 << order) > num_to_alloc)
15073 +                       order--;
15074 +
15075 +               extras_entry = (struct extras *) toi_kzalloc(7,
15076 +                       sizeof(struct extras), TOI_ATOMIC_GFP);
15077 +
15078 +               if (!extras_entry)
15079 +                       return extra_pages_allocated;
15080 +
15081 +               virt = toi_get_free_pages(9, flags, order);
15082 +               while (!virt && order) {
15083 +                       order--;
15084 +                       virt = toi_get_free_pages(9, flags, order);
15085 +               }
15086 +
15087 +               if (!virt) {
15088 +                       toi_kfree(7, extras_entry);
15089 +                       return extra_pages_allocated;
15090 +               }
15091 +
15092 +               newpage = virt_to_page(virt);
15093 +
15094 +               extras_entry->page = newpage;
15095 +               extras_entry->order = order;
15096 +               extras_entry->next = NULL;
15097 +
15098 +               if (extras_list)
15099 +                       extras_entry->next = extras_list;
15100 +
15101 +               extras_list = extras_entry;
15102 +
15103 +               for (j = 0; j < (1 << order); j++) {
15104 +                       SetPageNosave(newpage + j);
15105 +                       SetPagePageset1Copy(newpage + j);
15106 +               }
15107 +
15108 +               extra_pages_allocated += (1 << order);
15109 +               num_to_alloc -= (1 << order);
15110 +       }
15111 +
15112 +       return extra_pages_allocated;
15113 +}
15114 +
15115 +/*
15116 + * real_nr_free_pages: Count pcp pages for a zone type or all zones
15117 + * (-1 for all, otherwise zone_idx() result desired).
15118 + */
15119 +long real_nr_free_pages(unsigned long zone_idx_mask)
15120 +{
15121 +       struct zone *zone;
15122 +       int result = 0, cpu;
15123 +
15124 +       /* PCP lists */
15125 +       for_each_zone(zone) {
15126 +               if (!populated_zone(zone))
15127 +                       continue;
15128 +
15129 +               if (!(zone_idx_mask & (1 << zone_idx(zone))))
15130 +                       continue;
15131 +
15132 +               for_each_online_cpu(cpu) {
15133 +                       struct per_cpu_pageset *pset = zone_pcp(zone, cpu);
15134 +                       struct per_cpu_pages *pcp = &pset->pcp;
15135 +                       result += pcp->count;
15136 +               }
15137 +
15138 +               result += zone_page_state(zone, NR_FREE_PAGES);
15139 +       }
15140 +       return result;
15141 +}
15142 +EXPORT_SYMBOL_GPL(real_nr_free_pages);
15143 +
15144 +/*
15145 + * Discover how much extra memory will be required by the drivers
15146 + * when they're asked to hibernate. We can then ensure that amount
15147 + * of memory is available when we really want it.
15148 + */
15149 +static void get_extra_pd1_allowance(void)
15150 +{
15151 +       long orig_num_free = real_nr_free_pages(all_zones_mask), final;
15152 +
15153 +       toi_prepare_status(CLEAR_BAR, "Finding allowance for drivers.");
15154 +
15155 +       suspend_console();
15156 +       device_suspend(PMSG_FREEZE);
15157 +       device_pm_lock();
15158 +       local_irq_disable(); /* irqs might have been re-enabled on us */
15159 +       device_power_down(PMSG_FREEZE);
15160 +
15161 +       final = real_nr_free_pages(all_zones_mask);
15162 +
15163 +       device_power_up(PMSG_THAW);
15164 +       local_irq_enable();
15165 +       device_pm_unlock();
15166 +       device_resume(PMSG_THAW);
15167 +       resume_console();
15168 +
15169 +       extra_pd1_pages_allowance = max(
15170 +               orig_num_free - final + MIN_EXTRA_PAGES_ALLOWANCE,
15171 +               (long) MIN_EXTRA_PAGES_ALLOWANCE);
15172 +}
15173 +
15174 +/*
15175 + * Amount of storage needed, possibly taking into account the
15176 + * expected compression ratio and possibly also ignoring our
15177 + * allowance for extra pages.
15178 + */
15179 +static long main_storage_needed(int use_ecr,
15180 +               int ignore_extra_pd1_allow)
15181 +{
15182 +       return (pagedir1.size + pagedir2.size +
15183 +         (ignore_extra_pd1_allow ? 0 : extra_pd1_pages_allowance)) *
15184 +        (use_ecr ? toi_expected_compression_ratio() : 100) / 100;
15185 +}
15186 +
15187 +/*
15188 + * Storage needed for the image header, in bytes until the return.
15189 + */
15190 +static long header_storage_needed(void)
15191 +{
15192 +       long bytes = (int) sizeof(struct toi_header) +
15193 +                       toi_header_storage_for_modules() +
15194 +                       toi_pageflags_space_needed();
15195 +
15196 +       return DIV_ROUND_UP(bytes, PAGE_SIZE);
15197 +}
15198 +
15199 +/*
15200 + * When freeing memory, pages from either pageset might be freed.
15201 + *
15202 + * When seeking to free memory to be able to hibernate, for every ps1 page
15203 + * freed, we need 2 less pages for the atomic copy because there is one less
15204 + * page to copy and one more page into which data can be copied.
15205 + *
15206 + * Freeing ps2 pages saves us nothing directly. No more memory is available
15207 + * for the atomic copy. Indirectly, a ps1 page might be freed (slab?), but
15208 + * that's too much work to figure out.
15209 + *
15210 + * => ps1_to_free functions
15211 + *
15212 + * Of course if we just want to reduce the image size, because of storage
15213 + * limitations or an image size limit either ps will do.
15214 + *
15215 + * => any_to_free function
15216 + */
15217 +
15218 +static long highpages_ps1_to_free(void)
15219 +{
15220 +       return max_t(long, 0, DIV_ROUND_UP(get_highmem_size(pagedir1) -
15221 +               get_highmem_size(pagedir2), 2) - real_nr_free_high_pages());
15222 +}
15223 +
15224 +static long lowpages_ps1_to_free(void)
15225 +{
15226 +       return max_t(long, 0, DIV_ROUND_UP(get_lowmem_size(pagedir1) +
15227 +               extra_pd1_pages_allowance + MIN_FREE_RAM +
15228 +               toi_memory_for_modules(0) - get_lowmem_size(pagedir2) -
15229 +               real_nr_free_low_pages() - extra_pages_allocated, 2));
15230 +}
15231 +
15232 +static long current_image_size(void)
15233 +{
15234 +       return pagedir1.size + pagedir2.size + header_space_allocated;
15235 +}
15236 +
15237 +static long storage_still_required(void)
15238 +{
15239 +       return max_t(long, 0, main_storage_needed(1, 1) - storage_available);
15240 +}
15241 +
15242 +static long ram_still_required(void)
15243 +{
15244 +       return max_t(long, 0, MIN_FREE_RAM + toi_memory_for_modules(0) -
15245 +               real_nr_free_low_pages() + 2 * extra_pd1_pages_allowance);
15246 +}
15247 +
15248 +static long any_to_free(int use_image_size_limit)
15249 +{
15250 +       long user_limit = (use_image_size_limit && image_size_limit > 0) ?
15251 +                       max_t(long, 0, current_image_size() -
15252 +                                       (image_size_limit << 8)) : 0,
15253 +               storage_limit = storage_still_required(),
15254 +               ram_limit = ram_still_required(),
15255 +               first_max = max(user_limit, storage_limit);
15256 +
15257 +       return max(first_max, ram_limit);
15258 +}
15259 +
15260 +static int need_pageset2(void)
15261 +{
15262 +       return (real_nr_free_low_pages() + extra_pages_allocated -
15263 +               2 * extra_pd1_pages_allowance - MIN_FREE_RAM -
15264 +                toi_memory_for_modules(0) - pagedir1.size) < pagedir2.size;
15265 +}
15266 +
15267 +/* amount_needed
15268 + *
15269 + * Calculates the amount by which the image size needs to be reduced to meet
15270 + * our constraints.
15271 + */
15272 +static long amount_needed(int use_image_size_limit)
15273 +{
15274 +       return max(highpages_ps1_to_free() + lowpages_ps1_to_free(),
15275 +                       any_to_free(use_image_size_limit));
15276 +}
15277 +
15278 +static long image_not_ready(int use_image_size_limit)
15279 +{
15280 +       toi_message(TOI_EAT_MEMORY, TOI_LOW, 1,
15281 +               "Amount still needed (%ld) > 0:%d. Header: %ld < %ld: %d,"
15282 +               " Storage allocd: %ld < %ld: %d.\n",
15283 +                       amount_needed(use_image_size_limit),
15284 +                       (amount_needed(use_image_size_limit) > 0),
15285 +                       header_space_allocated, header_storage_needed(),
15286 +                       header_space_allocated < header_storage_needed(),
15287 +                       main_storage_allocated,
15288 +                       main_storage_needed(1, 1),
15289 +                       main_storage_allocated < main_storage_needed(1, 1));
15290 +
15291 +       toi_cond_pause(0, NULL);
15292 +
15293 +       return (amount_needed(use_image_size_limit) > 0) ||
15294 +               header_space_allocated < header_storage_needed() ||
15295 +                main_storage_allocated < main_storage_needed(1, 1);
15296 +}
15297 +
15298 +static void display_failure_reason(int tries_exceeded)
15299 +{
15300 +       long storage_required = storage_still_required(),
15301 +           ram_required = ram_still_required(),
15302 +           high_ps1 = highpages_ps1_to_free(),
15303 +           low_ps1 = lowpages_ps1_to_free();
15304 +
15305 +       printk(KERN_INFO "Failed to prepare the image because...\n");
15306 +
15307 +       if (!storage_available) {
15308 +               printk(KERN_INFO "- You need some storage available to be "
15309 +                               "able to hibernate.\n");
15310 +               return;
15311 +       }
15312 +
15313 +       if (tries_exceeded)
15314 +               printk(KERN_INFO "- The maximum number of iterations was "
15315 +                               "reached without successfully preparing the "
15316 +                               "image.\n");
15317 +
15318 +       if (header_space_allocated < header_storage_needed()) {
15319 +               printk(KERN_INFO "- Insufficient header storage allocated. "
15320 +                               "Need %ld, have %ld.\n",
15321 +                               header_storage_needed(),
15322 +                               header_space_allocated);
15323 +               set_abort_result(TOI_INSUFFICIENT_STORAGE);
15324 +       }
15325 +
15326 +       if (storage_required) {
15327 +               printk(KERN_INFO " - We need at least %ld pages of storage "
15328 +                               "(ignoring the header), but only have %ld.\n",
15329 +                               main_storage_needed(1, 1),
15330 +                               main_storage_allocated);
15331 +               set_abort_result(TOI_INSUFFICIENT_STORAGE);
15332 +       }
15333 +
15334 +       if (ram_required) {
15335 +               printk(KERN_INFO " - We need %ld more free pages of low "
15336 +                               "memory.\n", ram_required);
15337 +               printk(KERN_INFO "     Minimum free     : %8d\n", MIN_FREE_RAM);
15338 +               printk(KERN_INFO "   + Reqd. by modules : %8ld\n",
15339 +                               toi_memory_for_modules(0));
15340 +               printk(KERN_INFO "   - Currently free   : %8ld\n",
15341 +                               real_nr_free_low_pages());
15342 +               printk(KERN_INFO "   + 2 * extra allow  : %8ld\n",
15343 +                               2 * extra_pd1_pages_allowance);
15344 +               printk(KERN_INFO "                      : ========\n");
15345 +               printk(KERN_INFO "     Still needed     : %8ld\n",
15346 +                               ram_required);
15347 +
15348 +               /* Print breakdown of memory needed for modules */
15349 +               toi_memory_for_modules(1);
15350 +               set_abort_result(TOI_UNABLE_TO_FREE_ENOUGH_MEMORY);
15351 +       }
15352 +
15353 +       if (high_ps1) {
15354 +               printk(KERN_INFO "- We need to free %ld highmem pageset 1 "
15355 +                               "pages.\n", high_ps1);
15356 +               set_abort_result(TOI_UNABLE_TO_FREE_ENOUGH_MEMORY);
15357 +       }
15358 +
15359 +       if (low_ps1) {
15360 +               printk(KERN_INFO " - We need to free %ld lowmem pageset 1 "
15361 +                               "pages.\n", low_ps1);
15362 +               set_abort_result(TOI_UNABLE_TO_FREE_ENOUGH_MEMORY);
15363 +       }
15364 +}
15365 +
15366 +static void display_stats(int always, int sub_extra_pd1_allow)
15367 +{
15368 +       char buffer[255];
15369 +       snprintf(buffer, 254,
15370 +               "Free:%ld(%ld). Sets:%ld(%ld),%ld(%ld). Header:%ld/%ld. "
15371 +               "Nosave:%ld-%ld=%ld. Storage:%lu/%lu(%lu=>%lu). "
15372 +               "Needed:%ld,%ld,%ld(%d,%ld,%ld,%ld) (PS2:%s)\n",
15373 +
15374 +               /* Free */
15375 +               real_nr_free_pages(all_zones_mask),
15376 +               real_nr_free_low_pages(),
15377 +
15378 +               /* Sets */
15379 +               pagedir1.size, pagedir1.size - get_highmem_size(pagedir1),
15380 +               pagedir2.size, pagedir2.size - get_highmem_size(pagedir2),
15381 +
15382 +               /* Header */
15383 +               header_space_allocated, header_storage_needed(),
15384 +
15385 +               /* Nosave */
15386 +               num_nosave, extra_pages_allocated,
15387 +               num_nosave - extra_pages_allocated,
15388 +
15389 +               /* Storage */
15390 +               main_storage_allocated,
15391 +               storage_available,
15392 +               main_storage_needed(1, sub_extra_pd1_allow),
15393 +               main_storage_needed(1, 1),
15394 +
15395 +               /* Needed */
15396 +               lowpages_ps1_to_free(), highpages_ps1_to_free(),
15397 +               any_to_free(1),
15398 +               MIN_FREE_RAM, toi_memory_for_modules(0),
15399 +               extra_pd1_pages_allowance, ((long) image_size_limit) << 8,
15400 +               
15401 +               need_pageset2() ? "yes" : "no");
15402 +
15403 +       if (always)
15404 +               printk("%s", buffer);
15405 +       else
15406 +               toi_message(TOI_EAT_MEMORY, TOI_MEDIUM, 1, buffer);
15407 +}
15408 +
15409 +/* generate_free_page_map
15410 + *
15411 + * Description:        This routine generates a bitmap of free pages from the
15412 + *             lists used by the memory manager. We then use the bitmap
15413 + *             to quickly calculate which pages to save and in which
15414 + *             pagesets.
15415 + */
15416 +static void generate_free_page_map(void)
15417 +{
15418 +       int order, pfn, cpu, t;
15419 +       unsigned long flags, i;
15420 +       struct zone *zone;
15421 +       struct list_head *curr;
15422 +
15423 +       for_each_zone(zone) {
15424 +               if (!populated_zone(zone))
15425 +                       continue;
15426 +
15427 +               spin_lock_irqsave(&zone->lock, flags);
15428 +
15429 +               for (i = 0; i < zone->spanned_pages; i++)
15430 +                       ClearPageNosaveFree(pfn_to_page(
15431 +                                               ZONE_START(zone) + i));
15432 +
15433 +               for_each_migratetype_order(order, t) {
15434 +                       list_for_each(curr,
15435 +                                       &zone->free_area[order].free_list[t]) {
15436 +                               unsigned long j;
15437 +
15438 +                               pfn = page_to_pfn(list_entry(curr, struct page,
15439 +                                                       lru));
15440 +                               for (j = 0; j < (1UL << order); j++)
15441 +                                       SetPageNosaveFree(pfn_to_page(pfn + j));
15442 +                       }
15443 +               }
15444 +
15445 +               for_each_online_cpu(cpu) {
15446 +                       struct per_cpu_pageset *pset = zone_pcp(zone, cpu);
15447 +                       struct per_cpu_pages *pcp = &pset->pcp;
15448 +                       struct page *page;
15449 +
15450 +                       list_for_each_entry(page, &pcp->list, lru)
15451 +                               SetPageNosaveFree(page);
15452 +               }
15453 +
15454 +               spin_unlock_irqrestore(&zone->lock, flags);
15455 +       }
15456 +}
15457 +
15458 +/* size_of_free_region
15459 + *
15460 + * Description:        Return the number of pages that are free, beginning with and
15461 + *             including this one.
15462 + */
15463 +static int size_of_free_region(struct page *page)
15464 +{
15465 +       struct zone *zone = page_zone(page);
15466 +       unsigned long this_pfn = page_to_pfn(page),
15467 +                     orig_pfn = this_pfn,
15468 +                     end_pfn = ZONE_START(zone) + zone->spanned_pages - 1;
15469 +
15470 +       while (this_pfn <= end_pfn && PageNosaveFree(pfn_to_page(this_pfn)))
15471 +               this_pfn++;
15472 +
15473 +       return this_pfn - orig_pfn;
15474 +}
15475 +
15476 +/* flag_image_pages
15477 + *
15478 + * This routine generates our lists of pages to be stored in each
15479 + * pageset. Since we store the data using extents, and adding new
15480 + * extents might allocate a new extent page, this routine may well
15481 + * be called more than once.
15482 + */
15483 +static void flag_image_pages(int atomic_copy)
15484 +{
15485 +       int num_free = 0;
15486 +       unsigned long loop;
15487 +       struct zone *zone;
15488 +
15489 +       pagedir1.size = 0;
15490 +       pagedir2.size = 0;
15491 +
15492 +       set_highmem_size(pagedir1, 0);
15493 +       set_highmem_size(pagedir2, 0);
15494 +
15495 +       num_nosave = 0;
15496 +
15497 +       memory_bm_clear(&pageset1_map);
15498 +
15499 +       generate_free_page_map();
15500 +
15501 +       /*
15502 +        * Pages not to be saved are marked Nosave irrespective of being
15503 +        * reserved.
15504 +        */
15505 +       for_each_zone(zone) {
15506 +               int highmem = is_highmem(zone);
15507 +
15508 +               if (!populated_zone(zone))
15509 +                       continue;
15510 +
15511 +               for (loop = 0; loop < zone->spanned_pages; loop++) {
15512 +                       unsigned long pfn = ZONE_START(zone) + loop;
15513 +                       struct page *page;
15514 +                       int chunk_size;
15515 +
15516 +                       if (!pfn_valid(pfn))
15517 +                               continue;
15518 +
15519 +                       page = pfn_to_page(pfn);
15520 +
15521 +                       chunk_size = size_of_free_region(page);
15522 +                       if (chunk_size) {
15523 +                               num_free += chunk_size;
15524 +                               loop += chunk_size - 1;
15525 +                               continue;
15526 +                       }
15527 +
15528 +                       if (highmem)
15529 +                               page = saveable_highmem_page(pfn);
15530 +                       else
15531 +                               page = saveable_page(pfn);
15532 +
15533 +                       if (!page || PageNosave(page)) {
15534 +                               num_nosave++;
15535 +                               continue;
15536 +                       }
15537 +
15538 +                       if (PagePageset2(page)) {
15539 +                               pagedir2.size++;
15540 +                               if (PageHighMem(page))
15541 +                                       inc_highmem_size(pagedir2);
15542 +                               else
15543 +                                       SetPagePageset1Copy(page);
15544 +                               if (PageResave(page)) {
15545 +                                       SetPagePageset1(page);
15546 +                                       ClearPagePageset1Copy(page);
15547 +                                       pagedir1.size++;
15548 +                                       if (PageHighMem(page))
15549 +                                               inc_highmem_size(pagedir1);
15550 +                               }
15551 +                       } else {
15552 +                               pagedir1.size++;
15553 +                               SetPagePageset1(page);
15554 +                               if (PageHighMem(page))
15555 +                                       inc_highmem_size(pagedir1);
15556 +                       }
15557 +               }
15558 +       }
15559 +
15560 +       if (!atomic_copy)
15561 +               toi_message(TOI_EAT_MEMORY, TOI_MEDIUM, 0,
15562 +                       "Count data pages: Set1 (%d) + Set2 (%d) + Nosave (%ld)"
15563 +                                               " + NumFree (%d) = %d.\n",
15564 +                       pagedir1.size, pagedir2.size, num_nosave, num_free,
15565 +                       pagedir1.size + pagedir2.size + num_nosave + num_free);
15566 +}
15567 +
15568 +void toi_recalculate_image_contents(int atomic_copy)
15569 +{
15570 +       memory_bm_clear(&pageset1_map);
15571 +       if (!atomic_copy) {
15572 +               unsigned long pfn;
15573 +               BITMAP_FOR_EACH_SET(pageset2_map, pfn)
15574 +                       ClearPagePageset1Copy(pfn_to_page(pfn));
15575 +               /* Need to call this before getting pageset1_size! */
15576 +               toi_mark_pages_for_pageset2();
15577 +       }
15578 +       flag_image_pages(atomic_copy);
15579 +
15580 +       if (!atomic_copy) {
15581 +               storage_available = toiActiveAllocator->storage_available();
15582 +               display_stats(0, 0);
15583 +       }
15584 +}
15585 +
15586 +/* update_image
15587 + *
15588 + * Allocate [more] memory and storage for the image.
15589 + */
15590 +static void update_image(int ps2_recalc)
15591 +{
15592 +       int wanted, got;
15593 +       long seek;
15594 +
15595 +       toi_recalculate_image_contents(0);
15596 +
15597 +       /* Include allowance for growth in pagedir1 while writing pagedir 2 */
15598 +       wanted = pagedir1.size +  extra_pd1_pages_allowance -
15599 +               get_lowmem_size(pagedir2);
15600 +       if (wanted > extra_pages_allocated) {
15601 +               got = toi_allocate_extra_pagedir_memory(wanted);
15602 +               if (wanted < got) {
15603 +                       toi_message(TOI_EAT_MEMORY, TOI_LOW, 1,
15604 +                               "Want %d extra pages for pageset1, got %d.\n",
15605 +                               wanted, got);
15606 +                       return;
15607 +               }
15608 +       }
15609 +
15610 +       if (ps2_recalc)
15611 +               goto recalc;
15612 +
15613 +       thaw_kernel_threads();
15614 +
15615 +       /*
15616 +        * Allocate remaining storage space, if possible, up to the
15617 +        * maximum we know we'll need. It's okay to allocate the
15618 +        * maximum if the writer is the swapwriter, but
15619 +        * we don't want to grab all available space on an NFS share.
15620 +        * We therefore ignore the expected compression ratio here,
15621 +        * thereby trying to allocate the maximum image size we could
15622 +        * need (assuming compression doesn't expand the image), but
15623 +        * don't complain if we can't get the full amount we're after.
15624 +        */
15625 +
15626 +       storage_available = toiActiveAllocator->storage_available();
15627 +
15628 +       header_space_allocated = header_storage_needed();
15629 +
15630 +       toiActiveAllocator->reserve_header_space(header_space_allocated);
15631 +
15632 +       seek = min(storage_available, main_storage_needed(0, 0));
15633 +
15634 +       toiActiveAllocator->allocate_storage(seek);
15635 +
15636 +       main_storage_allocated = toiActiveAllocator->storage_allocated();
15637 +
15638 +       if (freeze_processes())
15639 +               set_abort_result(TOI_FREEZING_FAILED);
15640 +
15641 +recalc:
15642 +       toi_recalculate_image_contents(0);
15643 +}
15644 +
15645 +/* attempt_to_freeze
15646 + *
15647 + * Try to freeze processes.
15648 + */
15649 +
15650 +static int attempt_to_freeze(void)
15651 +{
15652 +       int result;
15653 +
15654 +       /* Stop processes before checking again */
15655 +       thaw_processes();
15656 +       toi_prepare_status(CLEAR_BAR, "Freezing processes & syncing "
15657 +                       "filesystems.");
15658 +       result = freeze_processes();
15659 +
15660 +       if (result)
15661 +               set_abort_result(TOI_FREEZING_FAILED);
15662 +
15663 +       return result;
15664 +}
15665 +
15666 +/* eat_memory
15667 + *
15668 + * Try to free some memory, either to meet hard or soft constraints on the image
15669 + * characteristics.
15670 + *
15671 + * Hard constraints:
15672 + * - Pageset1 must be < half of memory;
15673 + * - We must have enough memory free at resume time to have pageset1
15674 + *   be able to be loaded in pages that don't conflict with where it has to
15675 + *   be restored.
15676 + * Soft constraints
15677 + * - User specificied image size limit.
15678 + */
15679 +static void eat_memory(void)
15680 +{
15681 +       long amount_wanted = 0;
15682 +       int did_eat_memory = 0;
15683 +
15684 +       /*
15685 +        * Note that if we have enough storage space and enough free memory, we
15686 +        * may exit without eating anything. We give up when the last 10
15687 +        * iterations ate no extra pages because we're not going to get much
15688 +        * more anyway, but the few pages we get will take a lot of time.
15689 +        *
15690 +        * We freeze processes before beginning, and then unfreeze them if we
15691 +        * need to eat memory until we think we have enough. If our attempts
15692 +        * to freeze fail, we give up and abort.
15693 +        */
15694 +
15695 +       toi_recalculate_image_contents(0);
15696 +       amount_wanted = amount_needed(1);
15697 +
15698 +       switch (image_size_limit) {
15699 +       case -1: /* Don't eat any memory */
15700 +               if (amount_wanted > 0) {
15701 +                       set_abort_result(TOI_WOULD_EAT_MEMORY);
15702 +                       return;
15703 +               }
15704 +               break;
15705 +       case -2:  /* Free caches only */
15706 +               drop_pagecache();
15707 +               toi_recalculate_image_contents(0);
15708 +               amount_wanted = amount_needed(1);
15709 +               did_eat_memory = 1;
15710 +               break;
15711 +       default:
15712 +               break;
15713 +       }
15714 +
15715 +       if (amount_wanted > 0 && !test_result_state(TOI_ABORTED) &&
15716 +                       image_size_limit != -1) {
15717 +
15718 +               toi_prepare_status(CLEAR_BAR,
15719 +                               "Seeking to free %ldMB of memory.",
15720 +                               MB(amount_wanted));
15721 +
15722 +               thaw_kernel_threads();
15723 +
15724 +               shrink_all_memory(amount_wanted);
15725 +
15726 +               did_eat_memory = 1;
15727 +
15728 +               toi_recalculate_image_contents(0);
15729 +
15730 +               amount_wanted = amount_needed(1);
15731 +
15732 +               toi_cond_pause(0, NULL);
15733 +
15734 +               if (freeze_processes())
15735 +                       set_abort_result(TOI_FREEZING_FAILED);
15736 +       }
15737 +
15738 +       if (did_eat_memory)
15739 +               toi_recalculate_image_contents(0);
15740 +}
15741 +
15742 +/* toi_prepare_image
15743 + *
15744 + * Entry point to the whole image preparation section.
15745 + *
15746 + * We do four things:
15747 + * - Freeze processes;
15748 + * - Ensure image size constraints are met;
15749 + * - Complete all the preparation for saving the image,
15750 + *   including allocation of storage. The only memory
15751 + *   that should be needed when we're finished is that
15752 + *   for actually storing the image (and we know how
15753 + *   much is needed for that because the modules tell
15754 + *   us).
15755 + * - Make sure that all dirty buffers are written out.
15756 + */
15757 +#define MAX_TRIES 2
15758 +int toi_prepare_image(void)
15759 +{
15760 +       int result = 1, tries = 1;
15761 +
15762 +       header_space_allocated = 0;
15763 +       main_storage_allocated = 0;
15764 +       no_ps2_needed = 0;
15765 +
15766 +       if (attempt_to_freeze())
15767 +               return 1;
15768 +
15769 +       if (!extra_pd1_pages_allowance)
15770 +               get_extra_pd1_allowance();
15771 +
15772 +       storage_available = toiActiveAllocator->storage_available();
15773 +
15774 +       if (!storage_available) {
15775 +               printk(KERN_INFO "No storage available. Didn't try to prepare "
15776 +                               "an image.\n");
15777 +               display_failure_reason(0);
15778 +               set_abort_result(TOI_NOSTORAGE_AVAILABLE);
15779 +               return 1;
15780 +       }
15781 +
15782 +       if (build_attention_list()) {
15783 +               abort_hibernate(TOI_UNABLE_TO_PREPARE_IMAGE,
15784 +                               "Unable to successfully prepare the image.\n");
15785 +               return 1;
15786 +       }
15787 +
15788 +       do {
15789 +               toi_prepare_status(CLEAR_BAR,
15790 +                               "Preparing Image. Try %d.", tries);
15791 +
15792 +               eat_memory();
15793 +
15794 +               if (test_result_state(TOI_ABORTED))
15795 +                       break;
15796 +
15797 +               update_image(0);
15798 +
15799 +               tries++;
15800 +
15801 +       } while (image_not_ready(1) && tries <= MAX_TRIES &&
15802 +                       !test_result_state(TOI_ABORTED));
15803 +
15804 +       result = image_not_ready(0);
15805 +
15806 +       if (!test_result_state(TOI_ABORTED)) {
15807 +               if (result) {
15808 +                       display_stats(1, 0);
15809 +                       display_failure_reason(tries > MAX_TRIES);
15810 +                       abort_hibernate(TOI_UNABLE_TO_PREPARE_IMAGE,
15811 +                               "Unable to successfully prepare the image.\n");
15812 +               } else {
15813 +                       /* Pageset 2 needed? */
15814 +                       if (!need_pageset2() &&
15815 +                                 test_action_state(TOI_NO_PS2_IF_UNNEEDED)) {
15816 +                               no_ps2_needed = 1;
15817 +                               update_image(1);
15818 +                       }
15819 +
15820 +                       toi_cond_pause(1, "Image preparation complete.");
15821 +               }
15822 +       }
15823 +
15824 +       return result ? result : allocate_checksum_pages();
15825 +}
15826 diff --git a/kernel/power/tuxonice_prepare_image.h b/kernel/power/tuxonice_prepare_image.h
15827 new file mode 100644
15828 index 0000000..46eda88
15829 --- /dev/null
15830 +++ b/kernel/power/tuxonice_prepare_image.h
15831 @@ -0,0 +1,35 @@
15832 +/*
15833 + * kernel/power/tuxonice_prepare_image.h
15834 + *
15835 + * Copyright (C) 2003-2008 Nigel Cunningham (nigel at tuxonice net)
15836 + *
15837 + * This file is released under the GPLv2.
15838 + *
15839 + */
15840 +
15841 +#include <asm/sections.h>
15842 +
15843 +extern int toi_prepare_image(void);
15844 +extern void toi_recalculate_image_contents(int storage_available);
15845 +extern long real_nr_free_pages(unsigned long zone_idx_mask);
15846 +extern int image_size_limit;
15847 +extern void toi_free_extra_pagedir_memory(void);
15848 +extern long extra_pd1_pages_allowance;
15849 +extern void free_attention_list(void);
15850 +
15851 +#define MIN_FREE_RAM 100
15852 +#define MIN_EXTRA_PAGES_ALLOWANCE 500
15853 +
15854 +#define all_zones_mask ((unsigned long) ((1 << MAX_NR_ZONES) - 1))
15855 +#ifdef CONFIG_HIGHMEM
15856 +#define real_nr_free_high_pages() (real_nr_free_pages(1 << ZONE_HIGHMEM))
15857 +#define real_nr_free_low_pages() (real_nr_free_pages(all_zones_mask - \
15858 +                                               (1 << ZONE_HIGHMEM)))
15859 +#else
15860 +#define real_nr_free_high_pages() (0)
15861 +#define real_nr_free_low_pages() (real_nr_free_pages(all_zones_mask))
15862 +
15863 +/* For eat_memory function */
15864 +#define ZONE_HIGHMEM (MAX_NR_ZONES + 1)
15865 +#endif
15866 +
15867 diff --git a/kernel/power/tuxonice_storage.c b/kernel/power/tuxonice_storage.c
15868 new file mode 100644
15869 index 0000000..5dafc95
15870 --- /dev/null
15871 +++ b/kernel/power/tuxonice_storage.c
15872 @@ -0,0 +1,282 @@
15873 +/*
15874 + * kernel/power/tuxonice_storage.c
15875 + *
15876 + * Copyright (C) 2005-2008 Nigel Cunningham (nigel at tuxonice net)
15877 + *
15878 + * This file is released under the GPLv2.
15879 + *
15880 + * Routines for talking to a userspace program that manages storage.
15881 + *
15882 + * The kernel side:
15883 + * - starts the userspace program;
15884 + * - sends messages telling it when to open and close the connection;
15885 + * - tells it when to quit;
15886 + *
15887 + * The user space side:
15888 + * - passes messages regarding status;
15889 + *
15890 + */
15891 +
15892 +#include <linux/suspend.h>
15893 +#include <linux/freezer.h>
15894 +
15895 +#include "tuxonice_sysfs.h"
15896 +#include "tuxonice_modules.h"
15897 +#include "tuxonice_netlink.h"
15898 +#include "tuxonice_storage.h"
15899 +#include "tuxonice_ui.h"
15900 +
15901 +static struct user_helper_data usm_helper_data;
15902 +static struct toi_module_ops usm_ops;
15903 +static int message_received, usm_prepare_count;
15904 +static int storage_manager_last_action, storage_manager_action;
15905 +
15906 +static int usm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
15907 +{
15908 +       int type;
15909 +       int *data;
15910 +
15911 +       type = nlh->nlmsg_type;
15912 +
15913 +       /* A control message: ignore them */
15914 +       if (type < NETLINK_MSG_BASE)
15915 +               return 0;
15916 +
15917 +       /* Unknown message: reply with EINVAL */
15918 +       if (type >= USM_MSG_MAX)
15919 +               return -EINVAL;
15920 +
15921 +       /* All operations require privileges, even GET */
15922 +       if (security_netlink_recv(skb, CAP_NET_ADMIN))
15923 +               return -EPERM;
15924 +
15925 +       /* Only allow one task to receive NOFREEZE privileges */
15926 +       if (type == NETLINK_MSG_NOFREEZE_ME && usm_helper_data.pid != -1)
15927 +               return -EBUSY;
15928 +
15929 +       data = (int *) NLMSG_DATA(nlh);
15930 +
15931 +       switch (type) {
15932 +       case USM_MSG_SUCCESS:
15933 +       case USM_MSG_FAILED:
15934 +               message_received = type;
15935 +               complete(&usm_helper_data.wait_for_process);
15936 +               break;
15937 +       default:
15938 +               printk(KERN_INFO "Storage manager doesn't recognise "
15939 +                               "message %d.\n", type);
15940 +       }
15941 +
15942 +       return 1;
15943 +}
15944 +
15945 +#ifdef CONFIG_NET
15946 +static int activations;
15947 +
15948 +int toi_activate_storage(int force)
15949 +{
15950 +       int tries = 1;
15951 +
15952 +       if (usm_helper_data.pid == -1 || !usm_ops.enabled)
15953 +               return 0;
15954 +
15955 +       message_received = 0;
15956 +       activations++;
15957 +
15958 +       if (activations > 1 && !force)
15959 +               return 0;
15960 +
15961 +       while ((!message_received || message_received == USM_MSG_FAILED) &&
15962 +                       tries < 2) {
15963 +               toi_prepare_status(DONT_CLEAR_BAR, "Activate storage attempt "
15964 +                               "%d.\n", tries);
15965 +
15966 +               init_completion(&usm_helper_data.wait_for_process);
15967 +
15968 +               toi_send_netlink_message(&usm_helper_data,
15969 +                       USM_MSG_CONNECT,
15970 +                       NULL, 0);
15971 +
15972 +               /* Wait 2 seconds for the userspace process to make contact */
15973 +               wait_for_completion_timeout(&usm_helper_data.wait_for_process,
15974 +                               2*HZ);
15975 +
15976 +               tries++;
15977 +       }
15978 +
15979 +       return 0;
15980 +}
15981 +
15982 +int toi_deactivate_storage(int force)
15983 +{
15984 +       if (usm_helper_data.pid == -1 || !usm_ops.enabled)
15985 +               return 0;
15986 +
15987 +       message_received = 0;
15988 +       activations--;
15989 +
15990 +       if (activations && !force)
15991 +               return 0;
15992 +
15993 +       init_completion(&usm_helper_data.wait_for_process);
15994 +
15995 +       toi_send_netlink_message(&usm_helper_data,
15996 +                       USM_MSG_DISCONNECT,
15997 +                       NULL, 0);
15998 +
15999 +       wait_for_completion_timeout(&usm_helper_data.wait_for_process, 2*HZ);
16000 +
16001 +       if (!message_received || message_received == USM_MSG_FAILED) {
16002 +               printk(KERN_INFO "Returning failure disconnecting storage.\n");
16003 +               return 1;
16004 +       }
16005 +
16006 +       return 0;
16007 +}
16008 +#endif
16009 +
16010 +static void storage_manager_simulate(void)
16011 +{
16012 +       printk(KERN_INFO "--- Storage manager simulate ---\n");
16013 +       toi_prepare_usm();
16014 +       schedule();
16015 +       printk(KERN_INFO "--- Activate storage 1 ---\n");
16016 +       toi_activate_storage(1);
16017 +       schedule();
16018 +       printk(KERN_INFO "--- Deactivate storage 1 ---\n");
16019 +       toi_deactivate_storage(1);
16020 +       schedule();
16021 +       printk(KERN_INFO "--- Cleanup usm ---\n");
16022 +       toi_cleanup_usm();
16023 +       schedule();
16024 +       printk(KERN_INFO "--- Storage manager simulate ends ---\n");
16025 +}
16026 +
16027 +static int usm_storage_needed(void)
16028 +{
16029 +       return strlen(usm_helper_data.program);
16030 +}
16031 +
16032 +static int usm_save_config_info(char *buf)
16033 +{
16034 +       int len = strlen(usm_helper_data.program);
16035 +       memcpy(buf, usm_helper_data.program, len);
16036 +       return len;
16037 +}
16038 +
16039 +static void usm_load_config_info(char *buf, int size)
16040 +{
16041 +       /* Don't load the saved path if one has already been set */
16042 +       if (usm_helper_data.program[0])
16043 +               return;
16044 +
16045 +       memcpy(usm_helper_data.program, buf, size);
16046 +}
16047 +
16048 +static int usm_memory_needed(void)
16049 +{
16050 +       /* ball park figure of 32 pages */
16051 +       return 32 * PAGE_SIZE;
16052 +}
16053 +
16054 +/* toi_prepare_usm
16055 + */
16056 +int toi_prepare_usm(void)
16057 +{
16058 +       usm_prepare_count++;
16059 +
16060 +       if (usm_prepare_count > 1 || !usm_ops.enabled)
16061 +               return 0;
16062 +
16063 +       usm_helper_data.pid = -1;
16064 +
16065 +       if (!*usm_helper_data.program)
16066 +               return 0;
16067 +
16068 +       toi_netlink_setup(&usm_helper_data);
16069 +
16070 +       if (usm_helper_data.pid == -1)
16071 +               printk(KERN_INFO "TuxOnIce Storage Manager wanted, but couldn't"
16072 +                               " start it.\n");
16073 +
16074 +       toi_activate_storage(0);
16075 +
16076 +       return usm_helper_data.pid != -1;
16077 +}
16078 +
16079 +void toi_cleanup_usm(void)
16080 +{
16081 +       usm_prepare_count--;
16082 +
16083 +       if (usm_helper_data.pid > -1 && !usm_prepare_count) {
16084 +               toi_deactivate_storage(0);
16085 +               toi_netlink_close(&usm_helper_data);
16086 +       }
16087 +}
16088 +
16089 +static void storage_manager_activate(void)
16090 +{
16091 +       if (storage_manager_action == storage_manager_last_action)
16092 +               return;
16093 +
16094 +       if (storage_manager_action)
16095 +               toi_prepare_usm();
16096 +       else
16097 +               toi_cleanup_usm();
16098 +
16099 +       storage_manager_last_action = storage_manager_action;
16100 +}
16101 +
16102 +/*
16103 + * User interface specific /sys/power/tuxonice entries.
16104 + */
16105 +
16106 +static struct toi_sysfs_data sysfs_params[] = {
16107 +       SYSFS_NONE("simulate_atomic_copy", storage_manager_simulate),
16108 +       SYSFS_INT("enabled", SYSFS_RW, &usm_ops.enabled, 0, 1, 0, NULL),
16109 +       SYSFS_STRING("program", SYSFS_RW, usm_helper_data.program, 254, 0,
16110 +               NULL),
16111 +       SYSFS_INT("activate_storage", SYSFS_RW , &storage_manager_action, 0, 1,
16112 +                       0, storage_manager_activate)
16113 +};
16114 +
16115 +static struct toi_module_ops usm_ops = {
16116 +       .type                           = MISC_MODULE,
16117 +       .name                           = "usm",
16118 +       .directory                      = "storage_manager",
16119 +       .module                         = THIS_MODULE,
16120 +       .storage_needed                 = usm_storage_needed,
16121 +       .save_config_info               = usm_save_config_info,
16122 +       .load_config_info               = usm_load_config_info,
16123 +       .memory_needed                  = usm_memory_needed,
16124 +
16125 +       .sysfs_data                     = sysfs_params,
16126 +       .num_sysfs_entries              = sizeof(sysfs_params) /
16127 +               sizeof(struct toi_sysfs_data),
16128 +};
16129 +
16130 +/* toi_usm_sysfs_init
16131 + * Description: Boot time initialisation for user interface.
16132 + */
16133 +int toi_usm_init(void)
16134 +{
16135 +       usm_helper_data.nl = NULL;
16136 +       usm_helper_data.program[0] = '\0';
16137 +       usm_helper_data.pid = -1;
16138 +       usm_helper_data.skb_size = 0;
16139 +       usm_helper_data.pool_limit = 6;
16140 +       usm_helper_data.netlink_id = NETLINK_TOI_USM;
16141 +       usm_helper_data.name = "userspace storage manager";
16142 +       usm_helper_data.rcv_msg = usm_user_rcv_msg;
16143 +       usm_helper_data.interface_version = 2;
16144 +       usm_helper_data.must_init = 0;
16145 +       init_completion(&usm_helper_data.wait_for_process);
16146 +
16147 +       return toi_register_module(&usm_ops);
16148 +}
16149 +
16150 +void toi_usm_exit(void)
16151 +{
16152 +       toi_netlink_close_complete(&usm_helper_data);
16153 +       toi_unregister_module(&usm_ops);
16154 +}
16155 diff --git a/kernel/power/tuxonice_storage.h b/kernel/power/tuxonice_storage.h
16156 new file mode 100644
16157 index 0000000..5477056
16158 --- /dev/null
16159 +++ b/kernel/power/tuxonice_storage.h
16160 @@ -0,0 +1,53 @@
16161 +/*
16162 + * kernel/power/tuxonice_storage.h
16163 + *
16164 + * Copyright (C) 2005-2008 Nigel Cunningham (nigel at tuxonice net)
16165 + *
16166 + * This file is released under the GPLv2.
16167 + */
16168 +
16169 +#ifdef CONFIG_NET
16170 +int toi_prepare_usm(void);
16171 +void toi_cleanup_usm(void);
16172 +
16173 +int toi_activate_storage(int force);
16174 +int toi_deactivate_storage(int force);
16175 +extern int toi_usm_init(void);
16176 +extern void toi_usm_exit(void);
16177 +#else
16178 +static inline int toi_usm_init(void) { return 0; }
16179 +static inline void toi_usm_exit(void) { }
16180 +
16181 +static inline int toi_activate_storage(int force)
16182 +{
16183 +       return 0;
16184 +}
16185 +
16186 +static inline int toi_deactivate_storage(int force)
16187 +{
16188 +       return 0;
16189 +}
16190 +
16191 +static inline int toi_prepare_usm(void) { return 0; }
16192 +static inline void toi_cleanup_usm(void) { }
16193 +#endif
16194 +
16195 +enum {
16196 +       USM_MSG_BASE = 0x10,
16197 +
16198 +       /* Kernel -> Userspace */
16199 +       USM_MSG_CONNECT = 0x30,
16200 +       USM_MSG_DISCONNECT = 0x31,
16201 +       USM_MSG_SUCCESS = 0x40,
16202 +       USM_MSG_FAILED = 0x41,
16203 +
16204 +       USM_MSG_MAX,
16205 +};
16206 +
16207 +#ifdef CONFIG_NET
16208 +extern __init int toi_usm_init(void);
16209 +extern __exit void toi_usm_cleanup(void);
16210 +#else
16211 +#define toi_usm_init() do { } while (0)
16212 +#define toi_usm_cleanup() do { } while (0)
16213 +#endif
16214 diff --git a/kernel/power/tuxonice_swap.c b/kernel/power/tuxonice_swap.c
16215 new file mode 100644
16216 index 0000000..aa21de9
16217 --- /dev/null
16218 +++ b/kernel/power/tuxonice_swap.c
16219 @@ -0,0 +1,1279 @@
16220 +/*
16221 + * kernel/power/tuxonice_swap.c
16222 + *
16223 + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net)
16224 + *
16225 + * Distributed under GPLv2.
16226 + *
16227 + * This file encapsulates functions for usage of swap space as a
16228 + * backing store.
16229 + */
16230 +
16231 +#include <linux/suspend.h>
16232 +#include <linux/module.h>
16233 +#include <linux/blkdev.h>
16234 +#include <linux/swapops.h>
16235 +#include <linux/swap.h>
16236 +#include <linux/syscalls.h>
16237 +
16238 +#include "tuxonice.h"
16239 +#include "tuxonice_sysfs.h"
16240 +#include "tuxonice_modules.h"
16241 +#include "tuxonice_io.h"
16242 +#include "tuxonice_ui.h"
16243 +#include "tuxonice_extent.h"
16244 +#include "tuxonice_block_io.h"
16245 +#include "tuxonice_alloc.h"
16246 +#include "tuxonice_builtin.h"
16247 +
16248 +static struct toi_module_ops toi_swapops;
16249 +
16250 +/* --- Struct of pages stored on disk */
16251 +
16252 +struct sig_data {
16253 +       dev_t device;
16254 +       unsigned long sector;
16255 +       int resume_attempted;
16256 +       int orig_sig_type;
16257 +};
16258 +
16259 +union diskpage {
16260 +       union swap_header swh;  /* swh.magic is the only member used */
16261 +       struct sig_data sig_data;
16262 +};
16263 +
16264 +union p_diskpage {
16265 +       union diskpage *pointer;
16266 +       char *ptr;
16267 +       unsigned long address;
16268 +};
16269 +
16270 +enum {
16271 +       IMAGE_SIGNATURE,
16272 +       NO_IMAGE_SIGNATURE,
16273 +       TRIED_RESUME,
16274 +       NO_TRIED_RESUME,
16275 +};
16276 +
16277 +/*
16278 + * Both of these point to versions of the swap header page. original_sig points
16279 + * to the data we read from disk at the start of hibernating or checking whether
16280 + * to resume. no_image is the page stored in the image header, showing what the
16281 + * swap header page looked like at the start of hibernating.
16282 + */
16283 +static char *current_signature_page;
16284 +static char no_image_signature_contents[sizeof(struct sig_data)];
16285 +
16286 +/* Devices used for swap */
16287 +static struct toi_bdev_info devinfo[MAX_SWAPFILES];
16288 +
16289 +/* Extent chains for swap & blocks */
16290 +static struct hibernate_extent_chain swapextents;
16291 +static struct hibernate_extent_chain block_chain[MAX_SWAPFILES];
16292 +
16293 +static dev_t header_dev_t;
16294 +static struct block_device *header_block_device;
16295 +static unsigned long headerblock;
16296 +
16297 +/* For swapfile automatically swapon/off'd. */
16298 +static char swapfilename[32] = "";
16299 +static int toi_swapon_status;
16300 +
16301 +/* Header Page Information */
16302 +static long header_pages_reserved;
16303 +
16304 +/* Swap Pages */
16305 +static long swap_pages_allocated;
16306 +
16307 +/* User Specified Parameters. */
16308 +
16309 +static unsigned long resume_firstblock;
16310 +static dev_t resume_swap_dev_t;
16311 +static struct block_device *resume_block_device;
16312 +
16313 +static struct sysinfo swapinfo;
16314 +
16315 +/* Block devices open. */
16316 +struct bdev_opened {
16317 +       dev_t device;
16318 +       struct block_device *bdev;
16319 +};
16320 +
16321 +/*
16322 + * Entry MAX_SWAPFILES is the resume block device, which may
16323 + * be a swap device not enabled when we hibernate.
16324 + * Entry MAX_SWAPFILES + 1 is the header block device, which
16325 + * is needed before we find out which slot it occupies.
16326 + *
16327 + * We use a separate struct to devInfo so that we can track
16328 + * the bdevs we open, because if we need to abort resuming
16329 + * prior to the atomic restore, they need to be closed, but
16330 + * closing them after sucessfully resuming would be wrong.
16331 + */
16332 +static struct bdev_opened *bdevs_opened[MAX_SWAPFILES + 2];
16333 +
16334 +/**
16335 + * close_bdev: Close a swap bdev.
16336 + *
16337 + * int: The swap entry number to close.
16338 + */
16339 +static void close_bdev(int i)
16340 +{
16341 +       struct bdev_opened *this = bdevs_opened[i];
16342 +
16343 +       if (!this)
16344 +               return;
16345 +
16346 +       blkdev_put(this->bdev);
16347 +       toi_kfree(8, this);
16348 +       bdevs_opened[i] = NULL;
16349 +}
16350 +
16351 +/**
16352 + * close_bdevs: Close all bdevs we opened.
16353 + *
16354 + * Close all bdevs that we opened and reset the related vars.
16355 + */
16356 +static void close_bdevs(void)
16357 +{
16358 +       int i;
16359 +
16360 +       for (i = 0; i < MAX_SWAPFILES + 2; i++)
16361 +               close_bdev(i);
16362 +
16363 +       resume_block_device = header_block_device = NULL;
16364 +}
16365 +
16366 +/**
16367 + * open_bdev: Open a bdev at resume time.
16368 + *
16369 + * index: The swap index. May be MAX_SWAPFILES for the resume_dev_t
16370 + * (the user can have resume= pointing at a swap partition/file that isn't
16371 + * swapon'd when they hibernate. MAX_SWAPFILES+1 for the first page of the
16372 + * header. It will be from a swap partition that was enabled when we hibernated,
16373 + * but we don't know it's real index until we read that first page.
16374 + * dev_t: The device major/minor.
16375 + * display_errs: Whether to try to do this quietly.
16376 + *
16377 + * We stored a dev_t in the image header. Open the matching device without
16378 + * requiring /dev/<whatever> in most cases and record the details needed
16379 + * to close it later and avoid duplicating work.
16380 + */
16381 +static struct block_device *open_bdev(int index, dev_t device, int display_errs)
16382 +{
16383 +       struct bdev_opened *this;
16384 +       struct block_device *bdev;
16385 +
16386 +       if (bdevs_opened[index]) {
16387 +               if (bdevs_opened[index]->device == device)
16388 +                       return bdevs_opened[index]->bdev;
16389 +
16390 +               close_bdev(index);
16391 +       }
16392 +
16393 +       bdev = toi_open_by_devnum(device, FMODE_READ);
16394 +
16395 +       if (IS_ERR(bdev) || !bdev) {
16396 +               if (display_errs)
16397 +                       toi_early_boot_message(1, TOI_CONTINUE_REQ,
16398 +                               "Failed to get access to block device "
16399 +                               "\"%x\" (error %d).\n Maybe you need "
16400 +                               "to run mknod and/or lvmsetup in an "
16401 +                               "initrd/ramfs?", device, bdev);
16402 +               return ERR_PTR(-EINVAL);
16403 +       }
16404 +
16405 +       this = toi_kzalloc(8, sizeof(struct bdev_opened), GFP_KERNEL);
16406 +       if (!this) {
16407 +               printk(KERN_WARNING "TuxOnIce: Failed to allocate memory for "
16408 +                               "opening a bdev.");
16409 +               blkdev_put(bdev);
16410 +               return ERR_PTR(-ENOMEM);
16411 +       }
16412 +
16413 +       bdevs_opened[index] = this;
16414 +       this->device = device;
16415 +       this->bdev = bdev;
16416 +
16417 +       return bdev;
16418 +}
16419 +
16420 +/**
16421 + * enable_swapfile: Swapon the user specified swapfile prior to hibernating.
16422 + *
16423 + * Activate the given swapfile if it wasn't already enabled. Remember whether
16424 + * we really did swapon it for swapoffing later.
16425 + */
16426 +static void enable_swapfile(void)
16427 +{
16428 +       int activateswapresult = -EINVAL;
16429 +
16430 +       if (swapfilename[0]) {
16431 +               /* Attempt to swap on with maximum priority */
16432 +               activateswapresult = sys_swapon(swapfilename, 0xFFFF);
16433 +               if (activateswapresult && activateswapresult != -EBUSY)
16434 +                       printk("TuxOnIce: The swapfile/partition specified by "
16435 +                               "/sys/power/tuxonice/swap/swapfile "
16436 +                               "(%s) could not be turned on (error %d). "
16437 +                               "Attempting to continue.\n",
16438 +                               swapfilename, activateswapresult);
16439 +               if (!activateswapresult)
16440 +                       toi_swapon_status = 1;
16441 +       }
16442 +}
16443 +
16444 +/**
16445 + * disable_swapfile: Swapoff any file swaponed at the start of the cycle.
16446 + *
16447 + * If we did successfully swapon a file at the start of the cycle, swapoff
16448 + * it now (finishing up).
16449 + */
16450 +static void disable_swapfile(void)
16451 +{
16452 +       if (!toi_swapon_status)
16453 +               return;
16454 +
16455 +       sys_swapoff(swapfilename);
16456 +       toi_swapon_status = 0;
16457 +}
16458 +
16459 +/**
16460 + * try_to_parse_resume_device: Try to parse resume=
16461 + *
16462 + * Any "swap:" has been stripped away and we just have the path to deal with.
16463 + * We attempt to do name_to_dev_t, open and stat the file. Having opened the
16464 + * file, get the struct block_device * to match.
16465 + */
16466 +static int try_to_parse_resume_device(char *commandline, int quiet)
16467 +{
16468 +       struct kstat stat;
16469 +       int error = 0;
16470 +
16471 +       resume_swap_dev_t = name_to_dev_t(commandline);
16472 +
16473 +       if (!resume_swap_dev_t) {
16474 +               struct file *file = filp_open(commandline,
16475 +                               O_RDONLY|O_LARGEFILE, 0);
16476 +
16477 +               if (!IS_ERR(file) && file) {
16478 +                       vfs_getattr(file->f_vfsmnt, file->f_dentry, &stat);
16479 +                       filp_close(file, NULL);
16480 +               } else
16481 +                       error = vfs_stat(commandline, &stat);
16482 +               if (!error)
16483 +                       resume_swap_dev_t = stat.rdev;
16484 +       }
16485 +
16486 +       if (!resume_swap_dev_t) {
16487 +               if (quiet)
16488 +                       return 1;
16489 +
16490 +               if (test_toi_state(TOI_TRYING_TO_RESUME))
16491 +                       toi_early_boot_message(1, TOI_CONTINUE_REQ,
16492 +                         "Failed to translate \"%s\" into a device id.\n",
16493 +                         commandline);
16494 +               else
16495 +                       printk("TuxOnIce: Can't translate \"%s\" into a device "
16496 +                                       "id yet.\n", commandline);
16497 +               return 1;
16498 +       }
16499 +
16500 +       resume_block_device = open_bdev(MAX_SWAPFILES, resume_swap_dev_t, 0);
16501 +       if (IS_ERR(resume_block_device)) {
16502 +               if (!quiet)
16503 +                       toi_early_boot_message(1, TOI_CONTINUE_REQ,
16504 +                               "Failed to get access to \"%s\", where"
16505 +                               " the swap header should be found.",
16506 +                               commandline);
16507 +               return 1;
16508 +       }
16509 +
16510 +       return 0;
16511 +}
16512 +
16513 +/*
16514 + * If we have read part of the image, we might have filled  memory with
16515 + * data that should be zeroed out.
16516 + */
16517 +static void toi_swap_noresume_reset(void)
16518 +{
16519 +       toi_bio_ops.rw_cleanup(READ);
16520 +       memset((char *) &devinfo, 0, sizeof(devinfo));
16521 +}
16522 +
16523 +static int get_current_signature(void)
16524 +{
16525 +       int result;
16526 +
16527 +       if (current_signature_page)
16528 +               return 0;
16529 +
16530 +       current_signature_page = (char *) toi_get_zeroed_page(38,
16531 +                       TOI_ATOMIC_GFP);
16532 +       if (!current_signature_page)
16533 +               return -ENOMEM;
16534 +
16535 +       result = toi_bio_ops.bdev_page_io(READ, resume_block_device,
16536 +               resume_firstblock, virt_to_page(current_signature_page));
16537 +
16538 +       return result;
16539 +}
16540 +
16541 +static int parse_signature(void)
16542 +{
16543 +       union p_diskpage swap_header_page;
16544 +       struct sig_data *sig;
16545 +       int type;
16546 +       char *swap_header;
16547 +       const char *sigs[] = {
16548 +               "SWAP-SPACE", "SWAPSPACE2", "S1SUSP", "S2SUSP", "S1SUSPEND"
16549 +       };
16550 +
16551 +       if (!current_signature_page) {
16552 +               int result = get_current_signature();
16553 +
16554 +               if (result)
16555 +                       return result;
16556 +       }
16557 +
16558 +       swap_header_page = (union p_diskpage) current_signature_page;
16559 +       sig = (struct sig_data *) current_signature_page;
16560 +       swap_header = swap_header_page.pointer->swh.magic.magic;
16561 +
16562 +       for (type = 0; type < 5; type++)
16563 +               if (!memcmp(sigs[type], swap_header, strlen(sigs[type])))
16564 +                       return type;
16565 +
16566 +       if (memcmp(tuxonice_signature, swap_header, sizeof(tuxonice_signature)))
16567 +               return -1;
16568 +
16569 +       header_dev_t = sig->device;
16570 +       clear_toi_state(TOI_RESUMED_BEFORE);
16571 +       if (sig->resume_attempted)
16572 +               set_toi_state(TOI_RESUMED_BEFORE);
16573 +       headerblock = sig->sector;
16574 +
16575 +       return 10;
16576 +}
16577 +
16578 +static void forget_signatures(void)
16579 +{
16580 +       if (current_signature_page) {
16581 +               toi_free_page(38, (unsigned long) current_signature_page);
16582 +               current_signature_page = NULL;
16583 +       }
16584 +}
16585 +
16586 +/*
16587 + * write_modified_signature
16588 + *
16589 + * Write a (potentially) modified signature page without forgetting the
16590 + * original contents.
16591 + */
16592 +static int write_modified_signature(int modification)
16593 +{
16594 +       union p_diskpage swap_header_page;
16595 +       struct swap_info_struct *si;
16596 +       int result;
16597 +       char *orig_sig;
16598 +
16599 +       /* In case we haven't already */
16600 +       result = get_current_signature();
16601 +
16602 +       if (result)
16603 +               return result;
16604 +
16605 +       swap_header_page.address = toi_get_zeroed_page(38, TOI_ATOMIC_GFP);
16606 +
16607 +       if (!swap_header_page.address)
16608 +               return -ENOMEM;
16609 +
16610 +       memcpy(swap_header_page.ptr, current_signature_page, PAGE_SIZE);
16611 +
16612 +       switch (modification) {
16613 +       case IMAGE_SIGNATURE:
16614 +
16615 +               memcpy(no_image_signature_contents, swap_header_page.ptr,
16616 +                               sizeof(no_image_signature_contents));
16617 +
16618 +               /* Get the details of the header first page. */
16619 +               toi_extent_state_goto_start(&toi_writer_posn);
16620 +               toi_bio_ops.forward_one_page(1);
16621 +
16622 +               si = get_swap_info_struct(toi_writer_posn.current_chain);
16623 +
16624 +               /* Prepare the signature */
16625 +               swap_header_page.pointer->sig_data.device = si->bdev->bd_dev;
16626 +               swap_header_page.pointer->sig_data.sector =
16627 +                       toi_writer_posn.current_offset;
16628 +               swap_header_page.pointer->sig_data.resume_attempted = 0;
16629 +               swap_header_page.pointer->sig_data.orig_sig_type =
16630 +                       parse_signature();
16631 +
16632 +               memcpy(swap_header_page.pointer->swh.magic.magic,
16633 +                               tuxonice_signature, sizeof(tuxonice_signature));
16634 +
16635 +               break;
16636 +       case NO_IMAGE_SIGNATURE:
16637 +               if (!swap_header_page.pointer->sig_data.orig_sig_type)
16638 +                       orig_sig = "SWAP-SPACE";
16639 +               else
16640 +                       orig_sig = "SWAPSPACE2";
16641 +
16642 +               memcpy(swap_header_page.pointer->swh.magic.magic, orig_sig, 10);
16643 +               memcpy(swap_header_page.ptr, no_image_signature_contents,
16644 +                               sizeof(no_image_signature_contents));
16645 +               break;
16646 +       case TRIED_RESUME:
16647 +               swap_header_page.pointer->sig_data.resume_attempted = 1;
16648 +               break;
16649 +       case NO_TRIED_RESUME:
16650 +               swap_header_page.pointer->sig_data.resume_attempted = 0;
16651 +               break;
16652 +       }
16653 +
16654 +       result = toi_bio_ops.bdev_page_io(WRITE, resume_block_device,
16655 +               resume_firstblock, virt_to_page(swap_header_page.address));
16656 +
16657 +       memcpy(current_signature_page, swap_header_page.ptr, PAGE_SIZE);
16658 +
16659 +       toi_free_page(38, swap_header_page.address);
16660 +
16661 +       return result;
16662 +}
16663 +
16664 +/*
16665 + * apply_header_reservation
16666 + *
16667 + * Use 0 (READ) to forward_one_page so it doesn't complain if we haven't
16668 + * allocated storage yet.
16669 + */
16670 +static int apply_header_reservation(void)
16671 +{
16672 +       int i;
16673 +
16674 +       toi_extent_state_goto_start(&toi_writer_posn);
16675 +       toi_bio_ops.forward_one_page(0); /* To first page */
16676 +
16677 +       for (i = 0; i < header_pages_reserved; i++)
16678 +               if (toi_bio_ops.forward_one_page(0))
16679 +                       return -ENOSPC;
16680 +
16681 +       /* The end of header pages will be the start of pageset 2;
16682 +        * we are now sitting on the first pageset2 page. */
16683 +       toi_extent_state_save(&toi_writer_posn, &toi_writer_posn_save[2]);
16684 +       return 0;
16685 +}
16686 +
16687 +static void toi_swap_reserve_header_space(int request)
16688 +{
16689 +       header_pages_reserved = (long) request;
16690 +
16691 +       /* If we've already allocated storage (hence ignoring return value): */
16692 +       apply_header_reservation();
16693 +}
16694 +
16695 +static void free_block_chains(void)
16696 +{
16697 +       int i;
16698 +
16699 +       for (i = 0; i < MAX_SWAPFILES; i++)
16700 +               if (block_chain[i].first)
16701 +                       toi_put_extent_chain(&block_chain[i]);
16702 +}
16703 +
16704 +static int add_blocks_to_extent_chain(int chain, int start, int end)
16705 +{
16706 +       if (test_action_state(TOI_TEST_BIO))
16707 +               printk(KERN_INFO "Adding extent chain %d %d-%d.\n", chain,
16708 +                               start << devinfo[chain].bmap_shift,
16709 +                               end << devinfo[chain].bmap_shift);
16710 +
16711 +       if (toi_add_to_extent_chain(&block_chain[chain], start, end)) {
16712 +               free_block_chains();
16713 +               return -ENOMEM;
16714 +       }
16715 +
16716 +       return 0;
16717 +}
16718 +
16719 +
16720 +static int get_main_pool_phys_params(void)
16721 +{
16722 +       struct hibernate_extent *extentpointer = NULL;
16723 +       unsigned long address;
16724 +       int extent_min = -1, extent_max = -1, last_chain = -1;
16725 +
16726 +       free_block_chains();
16727 +
16728 +       toi_extent_for_each(&swapextents, extentpointer, address) {
16729 +               swp_entry_t swap_address = (swp_entry_t) { address };
16730 +               pgoff_t offset = swp_offset(swap_address);
16731 +               unsigned swapfilenum = swp_type(swap_address);
16732 +               struct swap_info_struct *sis =
16733 +                       get_swap_info_struct(swapfilenum);
16734 +               sector_t new_sector = map_swap_page(sis, offset);
16735 +
16736 +               if ((new_sector == extent_max + 1) &&
16737 +                   (last_chain == swapfilenum)) {
16738 +                       extent_max++;
16739 +                       continue;
16740 +               }
16741 +
16742 +               if (extent_min > -1 && add_blocks_to_extent_chain(last_chain,
16743 +                                       extent_min, extent_max))
16744 +                       return -ENOMEM;
16745 +
16746 +               extent_min = extent_max = new_sector;
16747 +               last_chain = swapfilenum;
16748 +       }
16749 +
16750 +       if (extent_min > -1 && add_blocks_to_extent_chain(last_chain,
16751 +                               extent_min, extent_max))
16752 +                       return -ENOMEM;
16753 +
16754 +       return apply_header_reservation();
16755 +}
16756 +
16757 +static long raw_to_real(long raw)
16758 +{
16759 +       long result;
16760 +
16761 +       result = raw - (raw * (sizeof(unsigned long) + sizeof(int)) +
16762 +               (PAGE_SIZE + sizeof(unsigned long) + sizeof(int) + 1)) /
16763 +               (PAGE_SIZE + sizeof(unsigned long) + sizeof(int));
16764 +
16765 +       return result < 0 ? 0 : result;
16766 +}
16767 +
16768 +static int toi_swap_storage_allocated(void)
16769 +{
16770 +       return (int) raw_to_real(swap_pages_allocated - header_pages_reserved);
16771 +}
16772 +
16773 +/*
16774 + * We can't just remember the value from allocation time, because other
16775 + * processes might have allocated swap in the mean time.
16776 + */
16777 +static int toi_swap_storage_available(void)
16778 +{
16779 +       si_swapinfo(&swapinfo);
16780 +       return (int) raw_to_real((long) swapinfo.freeswap +
16781 +                       swap_pages_allocated - header_pages_reserved);
16782 +}
16783 +
16784 +static int toi_swap_initialise(int starting_cycle)
16785 +{
16786 +       if (!starting_cycle)
16787 +               return 0;
16788 +
16789 +       enable_swapfile();
16790 +
16791 +       if (resume_swap_dev_t && !resume_block_device &&
16792 +           IS_ERR(resume_block_device =
16793 +                       open_bdev(MAX_SWAPFILES, resume_swap_dev_t, 1)))
16794 +               return 1;
16795 +
16796 +       return 0;
16797 +}
16798 +
16799 +static void toi_swap_cleanup(int ending_cycle)
16800 +{
16801 +       if (ending_cycle)
16802 +               disable_swapfile();
16803 +
16804 +       close_bdevs();
16805 +
16806 +       forget_signatures();
16807 +}
16808 +
16809 +static int toi_swap_release_storage(void)
16810 +{
16811 +       if (test_action_state(TOI_KEEP_IMAGE) &&
16812 +           test_toi_state(TOI_NOW_RESUMING))
16813 +               return 0;
16814 +
16815 +       header_pages_reserved = 0;
16816 +       swap_pages_allocated = 0;
16817 +
16818 +       if (swapextents.first) {
16819 +               /* Free swap entries */
16820 +               struct hibernate_extent *extentpointer;
16821 +               unsigned long extentvalue;
16822 +               toi_extent_for_each(&swapextents, extentpointer,
16823 +                               extentvalue)
16824 +                       swap_free((swp_entry_t) { extentvalue });
16825 +
16826 +               toi_put_extent_chain(&swapextents);
16827 +
16828 +               free_block_chains();
16829 +       }
16830 +
16831 +       return 0;
16832 +}
16833 +
16834 +static void free_swap_range(unsigned long min, unsigned long max)
16835 +{
16836 +       int j;
16837 +
16838 +       for (j = min; j <= max; j++)
16839 +               swap_free((swp_entry_t) { j });
16840 +}
16841 +
16842 +/*
16843 + * Round robin allocation (where swap storage has the same priority).
16844 + * could make this very inefficient, so we track extents allocated on
16845 + * a per-swapfile basis.
16846 + *
16847 + * We ignore here the fact that some space is for the header and doesn't
16848 + * have the overhead. It will only rarely make a 1 page difference.
16849 + */
16850 +static int toi_swap_allocate_storage(int request)
16851 +{
16852 +       int i, result = 0, to_add[MAX_SWAPFILES], pages_to_get, extra_pages,
16853 +           gotten = 0;
16854 +       unsigned long extent_min[MAX_SWAPFILES], extent_max[MAX_SWAPFILES];
16855 +
16856 +       extra_pages = DIV_ROUND_UP(request * (sizeof(unsigned long)
16857 +                              + sizeof(int)), PAGE_SIZE);
16858 +       pages_to_get = request + extra_pages - swapextents.size;
16859 +
16860 +       if (pages_to_get < 1)
16861 +               return 0;
16862 +
16863 +       for (i = 0; i < MAX_SWAPFILES; i++) {
16864 +               struct swap_info_struct *si = get_swap_info_struct(i);
16865 +               to_add[i] = 0;
16866 +               if (!si->bdev)
16867 +                       continue;
16868 +               devinfo[i].bdev = si->bdev;
16869 +               devinfo[i].dev_t = si->bdev->bd_dev;
16870 +               devinfo[i].bmap_shift = 3;
16871 +               devinfo[i].blocks_per_page = 1;
16872 +       }
16873 +
16874 +       for (i = 0; i < pages_to_get; i++) {
16875 +               swp_entry_t entry;
16876 +               unsigned long new_value;
16877 +               unsigned swapfilenum;
16878 +
16879 +               entry = get_swap_page();
16880 +               if (!entry.val)
16881 +                       break;
16882 +
16883 +               swapfilenum = swp_type(entry);
16884 +               new_value = entry.val;
16885 +
16886 +               if (!to_add[swapfilenum]) {
16887 +                       to_add[swapfilenum] = 1;
16888 +                       extent_min[swapfilenum] = new_value;
16889 +                       extent_max[swapfilenum] = new_value;
16890 +                       gotten++;
16891 +                       continue;
16892 +               }
16893 +
16894 +               if (new_value == extent_max[swapfilenum] + 1) {
16895 +                       extent_max[swapfilenum]++;
16896 +                       gotten++;
16897 +                       continue;
16898 +               }
16899 +
16900 +               if (toi_add_to_extent_chain(&swapextents,
16901 +                                       extent_min[swapfilenum],
16902 +                                       extent_max[swapfilenum])) {
16903 +                       printk(KERN_INFO "Failed to allocate extent for "
16904 +                                       "%lu-%lu.\n", extent_min[swapfilenum],
16905 +                                       extent_max[swapfilenum]);
16906 +                       free_swap_range(extent_min[swapfilenum],
16907 +                                       extent_max[swapfilenum]);
16908 +                       swap_free(entry);
16909 +                       gotten -= (extent_max[swapfilenum] -
16910 +                                       extent_min[swapfilenum] + 1);
16911 +                       /* Don't try to add again below */
16912 +                       to_add[swapfilenum] = 0;
16913 +                       break;
16914 +               } else {
16915 +                       extent_min[swapfilenum] = new_value;
16916 +                       extent_max[swapfilenum] = new_value;
16917 +                       gotten++;
16918 +               }
16919 +       }
16920 +
16921 +       for (i = 0; i < MAX_SWAPFILES; i++) {
16922 +               if (!to_add[i] || !toi_add_to_extent_chain(&swapextents,
16923 +                                       extent_min[i], extent_max[i]))
16924 +                       continue;
16925 +
16926 +               free_swap_range(extent_min[i], extent_max[i]);
16927 +               gotten -= (extent_max[i] - extent_min[i] + 1);
16928 +               break;
16929 +       }
16930 +
16931 +       if (gotten < pages_to_get)
16932 +               result = -ENOSPC;
16933 +
16934 +       swap_pages_allocated += (long) gotten;
16935 +
16936 +       return result ? result : get_main_pool_phys_params();
16937 +}
16938 +
16939 +static int toi_swap_write_header_init(void)
16940 +{
16941 +       int i, result;
16942 +       struct swap_info_struct *si;
16943 +
16944 +       toi_bio_ops.rw_init(WRITE, 0);
16945 +       toi_writer_buffer_posn = 0;
16946 +
16947 +       /* Info needed to bootstrap goes at the start of the header.
16948 +        * First we save the positions and devinfo, including the number
16949 +        * of header pages. Then we save the structs containing data needed
16950 +        * for reading the header pages back.
16951 +        * Note that even if header pages take more than one page, when we
16952 +        * read back the info, we will have restored the location of the
16953 +        * next header page by the time we go to use it.
16954 +        */
16955 +
16956 +       result = toi_bio_ops.rw_header_chunk(WRITE, &toi_swapops,
16957 +                       (char *) &no_image_signature_contents,
16958 +                       sizeof(struct sig_data));
16959 +
16960 +       if (result)
16961 +               return result;
16962 +
16963 +       /* Forward one page will be done prior to the read */
16964 +       for (i = 0; i < MAX_SWAPFILES; i++) {
16965 +               si = get_swap_info_struct(i);
16966 +               if (si->swap_file)
16967 +                       devinfo[i].dev_t = si->bdev->bd_dev;
16968 +               else
16969 +                       devinfo[i].dev_t = (dev_t) 0;
16970 +       }
16971 +
16972 +       result = toi_bio_ops.rw_header_chunk(WRITE, &toi_swapops,
16973 +                       (char *) &toi_writer_posn_save,
16974 +                       sizeof(toi_writer_posn_save));
16975 +
16976 +       if (result)
16977 +               return result;
16978 +
16979 +       result = toi_bio_ops.rw_header_chunk(WRITE, &toi_swapops,
16980 +                       (char *) &devinfo, sizeof(devinfo));
16981 +
16982 +       if (result)
16983 +               return result;
16984 +
16985 +       for (i = 0; i < MAX_SWAPFILES; i++)
16986 +               toi_serialise_extent_chain(&toi_swapops, &block_chain[i]);
16987 +
16988 +       return 0;
16989 +}
16990 +
16991 +static int toi_swap_write_header_cleanup(void)
16992 +{
16993 +       /* Write any unsaved data */
16994 +       if (toi_writer_buffer_posn)
16995 +               toi_bio_ops.write_header_chunk_finish();
16996 +
16997 +       toi_bio_ops.finish_all_io();
16998 +
16999 +       /* Set signature to save we have an image */
17000 +       return write_modified_signature(IMAGE_SIGNATURE);
17001 +}
17002 +
17003 +/* ------------------------- HEADER READING ------------------------- */
17004 +
17005 +/*
17006 + * read_header_init()
17007 + *
17008 + * Description:
17009 + * 1. Attempt to read the device specified with resume=.
17010 + * 2. Check the contents of the swap header for our signature.
17011 + * 3. Warn, ignore, reset and/or continue as appropriate.
17012 + * 4. If continuing, read the toi_swap configuration section
17013 + *    of the header and set up block device info so we can read
17014 + *    the rest of the header & image.
17015 + *
17016 + * Returns:
17017 + * May not return if user choose to reboot at a warning.
17018 + * -EINVAL if cannot resume at this time. Booting should continue
17019 + * normally.
17020 + */
17021 +
17022 +static int toi_swap_read_header_init(void)
17023 +{
17024 +       int i, result = 0;
17025 +       toi_writer_buffer_posn = 0;
17026 +
17027 +       if (!header_dev_t) {
17028 +               printk(KERN_INFO "read_header_init called when we haven't "
17029 +                               "verified there is an image!\n");
17030 +               return -EINVAL;
17031 +       }
17032 +
17033 +       /*
17034 +        * If the header is not on the resume_swap_dev_t, get the resume device
17035 +        * first.
17036 +        */
17037 +       if (header_dev_t != resume_swap_dev_t) {
17038 +               header_block_device = open_bdev(MAX_SWAPFILES + 1,
17039 +                               header_dev_t, 1);
17040 +
17041 +               if (IS_ERR(header_block_device))
17042 +                       return PTR_ERR(header_block_device);
17043 +       } else
17044 +               header_block_device = resume_block_device;
17045 +
17046 +       toi_bio_ops.read_header_init();
17047 +
17048 +       /*
17049 +        * Read toi_swap configuration.
17050 +        * Headerblock size taken into account already.
17051 +        */
17052 +       result = toi_bio_ops.bdev_page_io(READ, header_block_device,
17053 +                       headerblock << 3,
17054 +                       virt_to_page((unsigned long) toi_writer_buffer));
17055 +       if (result)
17056 +               return result;
17057 +
17058 +       memcpy(&no_image_signature_contents, toi_writer_buffer,
17059 +                       sizeof(no_image_signature_contents));
17060 +
17061 +       toi_writer_buffer_posn = sizeof(no_image_signature_contents);
17062 +
17063 +       memcpy(&toi_writer_posn_save, toi_writer_buffer +
17064 +                       toi_writer_buffer_posn, sizeof(toi_writer_posn_save));
17065 +
17066 +       toi_writer_buffer_posn += sizeof(toi_writer_posn_save);
17067 +
17068 +       memcpy(&devinfo, toi_writer_buffer + toi_writer_buffer_posn,
17069 +                       sizeof(devinfo));
17070 +
17071 +       toi_writer_buffer_posn += sizeof(devinfo);
17072 +
17073 +       /* Restore device info */
17074 +       for (i = 0; i < MAX_SWAPFILES; i++) {
17075 +               dev_t thisdevice = devinfo[i].dev_t;
17076 +               struct block_device *bdev_result;
17077 +
17078 +               devinfo[i].bdev = NULL;
17079 +
17080 +               if (!thisdevice)
17081 +                       continue;
17082 +
17083 +               if (thisdevice == resume_swap_dev_t) {
17084 +                       devinfo[i].bdev = resume_block_device;
17085 +                       continue;
17086 +               }
17087 +
17088 +               if (thisdevice == header_dev_t) {
17089 +                       devinfo[i].bdev = header_block_device;
17090 +                       continue;
17091 +               }
17092 +
17093 +               bdev_result = open_bdev(i, thisdevice, 1);
17094 +               if (IS_ERR(bdev_result))
17095 +                       return PTR_ERR(bdev_result);
17096 +               devinfo[i].bdev = bdevs_opened[i]->bdev;
17097 +       }
17098 +
17099 +       toi_extent_state_goto_start(&toi_writer_posn);
17100 +       toi_bio_ops.set_extra_page_forward();
17101 +
17102 +       for (i = 0; i < MAX_SWAPFILES && !result; i++)
17103 +               result = toi_load_extent_chain(&block_chain[i]);
17104 +
17105 +       return result;
17106 +}
17107 +
17108 +static int toi_swap_read_header_cleanup(void)
17109 +{
17110 +       toi_bio_ops.rw_cleanup(READ);
17111 +       return 0;
17112 +}
17113 +
17114 +/*
17115 + * workspace_size
17116 + *
17117 + * Description:
17118 + * Returns the number of bytes of RAM needed for this
17119 + * code to do its work. (Used when calculating whether
17120 + * we have enough memory to be able to hibernate & resume).
17121 + *
17122 + */
17123 +static int toi_swap_memory_needed(void)
17124 +{
17125 +       return 1;
17126 +}
17127 +
17128 +/*
17129 + * Print debug info
17130 + *
17131 + * Description:
17132 + */
17133 +static int toi_swap_print_debug_stats(char *buffer, int size)
17134 +{
17135 +       int len = 0;
17136 +       struct sysinfo sysinfo;
17137 +
17138 +       if (toiActiveAllocator != &toi_swapops) {
17139 +               len = scnprintf(buffer, size,
17140 +                               "- SwapAllocator inactive.\n");
17141 +               return len;
17142 +       }
17143 +
17144 +       len = scnprintf(buffer, size, "- SwapAllocator active.\n");
17145 +       if (swapfilename[0])
17146 +               len += scnprintf(buffer+len, size-len,
17147 +                       "  Attempting to automatically swapon: %s.\n",
17148 +                       swapfilename);
17149 +
17150 +       si_swapinfo(&sysinfo);
17151 +
17152 +       len += scnprintf(buffer+len, size-len,
17153 +                       "  Swap available for image: %d pages.\n",
17154 +                       (int) sysinfo.freeswap + toi_swap_storage_allocated());
17155 +
17156 +       return len;
17157 +}
17158 +
17159 +/*
17160 + * Storage needed
17161 + *
17162 + * Returns amount of space in the swap header required
17163 + * for the toi_swap's data. This ignores the links between
17164 + * pages, which we factor in when allocating the space.
17165 + *
17166 + * We ensure the space is allocated, but actually save the
17167 + * data from write_header_init and therefore don't also define a
17168 + * save_config_info routine.
17169 + */
17170 +static int toi_swap_storage_needed(void)
17171 +{
17172 +       int i, result;
17173 +       result = sizeof(toi_writer_posn_save) + sizeof(devinfo);
17174 +
17175 +       for (i = 0; i < MAX_SWAPFILES; i++) {
17176 +               result += 3 * sizeof(int);
17177 +               result += (2 * sizeof(unsigned long) *
17178 +                       block_chain[i].num_extents);
17179 +       }
17180 +
17181 +       return result;
17182 +}
17183 +
17184 +/*
17185 + * Image_exists
17186 + *
17187 + * Returns -1 if don't know, otherwise 0 (no) or 1 (yes).
17188 + */
17189 +static int toi_swap_image_exists(int quiet)
17190 +{
17191 +       int signature_found;
17192 +
17193 +       if (!resume_swap_dev_t) {
17194 +               if (!quiet)
17195 +                       printk(KERN_INFO "Not even trying to read header "
17196 +                               "because resume_swap_dev_t is not set.\n");
17197 +               return -1;
17198 +       }
17199 +
17200 +       if (!resume_block_device &&
17201 +           IS_ERR(resume_block_device =
17202 +                       open_bdev(MAX_SWAPFILES, resume_swap_dev_t, 1))) {
17203 +               if (!quiet)
17204 +                       printk(KERN_INFO "Failed to open resume dev_t (%x).\n",
17205 +                               resume_swap_dev_t);
17206 +               return -1;
17207 +       }
17208 +
17209 +       signature_found = parse_signature();
17210 +
17211 +       switch (signature_found) {
17212 +       case -ENOMEM:
17213 +               return -1;
17214 +       case -1:
17215 +               if (!quiet)
17216 +                       printk(KERN_ERR "TuxOnIce: Unable to find a signature."
17217 +                               " Could you have moved a swap file?\n");
17218 +               return -1;
17219 +       case 0:
17220 +       case 1:
17221 +               if (!quiet)
17222 +                       printk(KERN_INFO "TuxOnIce: Normal swapspace found.\n");
17223 +               return 0;
17224 +       case 2:
17225 +       case 3:
17226 +       case 4:
17227 +               if (!quiet)
17228 +                       printk(KERN_INFO "TuxOnIce: Detected another "
17229 +                               "implementation's signature.\n");
17230 +               return 0;
17231 +       case 10:
17232 +               if (!quiet)
17233 +                       printk(KERN_INFO "TuxOnIce: Detected TuxOnIce binary "
17234 +                               "signature.\n");
17235 +               return 1;
17236 +       }
17237 +
17238 +       BUG();
17239 +       return 0;
17240 +}
17241 +
17242 +/* toi_swap_remove_image
17243 + *
17244 + */
17245 +static int toi_swap_remove_image(void)
17246 +{
17247 +       /*
17248 +        * If nr_hibernates == 0, we must be booting, so no swap pages
17249 +        * will be recorded as used yet.
17250 +        */
17251 +
17252 +       if (nr_hibernates)
17253 +               toi_swap_release_storage();
17254 +
17255 +       /*
17256 +        * We don't do a sanity check here: we want to restore the swap
17257 +        * whatever version of kernel made the hibernate image.
17258 +        *
17259 +        * We need to write swap, but swap may not be enabled so
17260 +        * we write the device directly
17261 +        *
17262 +        * If we don't have an current_signature_page, we didn't
17263 +        * read an image header, so don't change anything.
17264 +        */
17265 +
17266 +       return toi_swap_image_exists(1) ?
17267 +               write_modified_signature(NO_IMAGE_SIGNATURE) : 0;
17268 +}
17269 +
17270 +/*
17271 + * Mark resume attempted.
17272 + *
17273 + * Record that we tried to resume from this image. We have already read the
17274 + * signature in. We just need to write the modified version.
17275 + */
17276 +static int toi_swap_mark_resume_attempted(int mark)
17277 +{
17278 +       if (!resume_swap_dev_t) {
17279 +               printk(KERN_INFO "Not even trying to record attempt at resuming"
17280 +                               " because resume_swap_dev_t is not set.\n");
17281 +               return -ENODEV;
17282 +       }
17283 +
17284 +       return write_modified_signature(mark ? TRIED_RESUME : NO_TRIED_RESUME);
17285 +}
17286 +
17287 +/*
17288 + * Parse Image Location
17289 + *
17290 + * Attempt to parse a resume= parameter.
17291 + * Swap Writer accepts:
17292 + * resume=swap:DEVNAME[:FIRSTBLOCK][@BLOCKSIZE]
17293 + *
17294 + * Where:
17295 + * DEVNAME is convertable to a dev_t by name_to_dev_t
17296 + * FIRSTBLOCK is the location of the first block in the swap file
17297 + * (specifying for a swap partition is nonsensical but not prohibited).
17298 + * Data is validated by attempting to read a swap header from the
17299 + * location given. Failure will result in toi_swap refusing to
17300 + * save an image, and a reboot with correct parameters will be
17301 + * necessary.
17302 + */
17303 +static int toi_swap_parse_sig_location(char *commandline,
17304 +               int only_allocator, int quiet)
17305 +{
17306 +       char *thischar, *devstart, *colon = NULL;
17307 +       int signature_found, result = -EINVAL, temp_result;
17308 +
17309 +       if (strncmp(commandline, "swap:", 5)) {
17310 +               /*
17311 +                * Failing swap:, we'll take a simple
17312 +                * resume=/dev/hda2, but fall through to
17313 +                * other allocators if /dev/ isn't matched.
17314 +                */
17315 +               if (strncmp(commandline, "/dev/", 5))
17316 +                       return 1;
17317 +       } else
17318 +               commandline += 5;
17319 +
17320 +       devstart = thischar = commandline;
17321 +       while ((*thischar != ':') && (*thischar != '@') &&
17322 +               ((thischar - commandline) < 250) && (*thischar))
17323 +               thischar++;
17324 +
17325 +       if (*thischar == ':') {
17326 +               colon = thischar;
17327 +               *colon = 0;
17328 +               thischar++;
17329 +       }
17330 +
17331 +       while ((thischar - commandline) < 250 && *thischar)
17332 +               thischar++;
17333 +
17334 +       if (colon)
17335 +               resume_firstblock = (int) simple_strtoul(colon + 1, NULL, 0);
17336 +       else
17337 +               resume_firstblock = 0;
17338 +
17339 +       clear_toi_state(TOI_CAN_HIBERNATE);
17340 +       clear_toi_state(TOI_CAN_RESUME);
17341 +
17342 +       temp_result = try_to_parse_resume_device(devstart, quiet);
17343 +
17344 +       if (colon)
17345 +               *colon = ':';
17346 +
17347 +       if (temp_result)
17348 +               return -EINVAL;
17349 +
17350 +       signature_found = toi_swap_image_exists(quiet);
17351 +
17352 +       if (signature_found != -1) {
17353 +               result = 0;
17354 +
17355 +               toi_bio_ops.set_devinfo(devinfo);
17356 +               toi_writer_posn.chains = &block_chain[0];
17357 +               toi_writer_posn.num_chains = MAX_SWAPFILES;
17358 +               set_toi_state(TOI_CAN_HIBERNATE);
17359 +               set_toi_state(TOI_CAN_RESUME);
17360 +       } else
17361 +               if (!quiet)
17362 +                       printk(KERN_ERR "TuxOnIce: SwapAllocator: No swap "
17363 +                               "signature found at %s.\n", devstart);
17364 +       return result;
17365 +}
17366 +
17367 +static int header_locations_read_sysfs(const char *page, int count)
17368 +{
17369 +       int i, printedpartitionsmessage = 0, len = 0, haveswap = 0;
17370 +       struct inode *swapf = NULL;
17371 +       int zone;
17372 +       char *path_page = (char *) toi_get_free_page(10, GFP_KERNEL);
17373 +       char *path, *output = (char *) page;
17374 +       int path_len;
17375 +
17376 +       if (!page)
17377 +               return 0;
17378 +
17379 +       for (i = 0; i < MAX_SWAPFILES; i++) {
17380 +               struct swap_info_struct *si =  get_swap_info_struct(i);
17381 +
17382 +               if (!si->swap_file)
17383 +                       continue;
17384 +
17385 +               if (S_ISBLK(si->swap_file->f_mapping->host->i_mode)) {
17386 +                       haveswap = 1;
17387 +                       if (!printedpartitionsmessage) {
17388 +                               len += sprintf(output + len,
17389 +                                       "For swap partitions, simply use the "
17390 +                                       "format: resume=swap:/dev/hda1.\n");
17391 +                               printedpartitionsmessage = 1;
17392 +                       }
17393 +               } else {
17394 +                       path_len = 0;
17395 +
17396 +                       path = d_path(&si->swap_file->f_path, path_page,
17397 +                                       PAGE_SIZE);
17398 +                       path_len = snprintf(path_page, 31, "%s", path);
17399 +
17400 +                       haveswap = 1;
17401 +                       swapf = si->swap_file->f_mapping->host;
17402 +                       zone = bmap(swapf, 0);
17403 +                       if (!zone) {
17404 +                               len += sprintf(output + len,
17405 +                                       "Swapfile %s has been corrupted. Reuse"
17406 +                                       " mkswap on it and try again.\n",
17407 +                                       path_page);
17408 +                       } else {
17409 +                               char name_buffer[255];
17410 +                               len += sprintf(output + len,
17411 +                                       "For swapfile `%s`,"
17412 +                                       " use resume=swap:/dev/%s:0x%x.\n",
17413 +                                       path_page,
17414 +                                       bdevname(si->bdev, name_buffer),
17415 +                                       zone << (swapf->i_blkbits - 9));
17416 +                       }
17417 +               }
17418 +       }
17419 +
17420 +       if (!haveswap)
17421 +               len = sprintf(output, "You need to turn on swap partitions "
17422 +                               "before examining this file.\n");
17423 +
17424 +       toi_free_page(10, (unsigned long) path_page);
17425 +       return len;
17426 +}
17427 +
17428 +static struct toi_sysfs_data sysfs_params[] = {
17429 +       SYSFS_STRING("swapfilename", SYSFS_RW, swapfilename, 255, 0, NULL),
17430 +       SYSFS_CUSTOM("headerlocations", SYSFS_READONLY,
17431 +                       header_locations_read_sysfs, NULL, 0, NULL),
17432 +       SYSFS_INT("enabled", SYSFS_RW, &toi_swapops.enabled, 0, 1, 0,
17433 +                       attempt_to_parse_resume_device2),
17434 +};
17435 +
17436 +static struct toi_module_ops toi_swapops = {
17437 +       .type                                   = WRITER_MODULE,
17438 +       .name                                   = "swap storage",
17439 +       .directory                              = "swap",
17440 +       .module                                 = THIS_MODULE,
17441 +       .memory_needed                          = toi_swap_memory_needed,
17442 +       .print_debug_info                       = toi_swap_print_debug_stats,
17443 +       .storage_needed                         = toi_swap_storage_needed,
17444 +       .initialise                             = toi_swap_initialise,
17445 +       .cleanup                                = toi_swap_cleanup,
17446 +
17447 +       .noresume_reset         = toi_swap_noresume_reset,
17448 +       .storage_available      = toi_swap_storage_available,
17449 +       .storage_allocated      = toi_swap_storage_allocated,
17450 +       .release_storage        = toi_swap_release_storage,
17451 +       .reserve_header_space   = toi_swap_reserve_header_space,
17452 +       .allocate_storage       = toi_swap_allocate_storage,
17453 +       .image_exists           = toi_swap_image_exists,
17454 +       .mark_resume_attempted  = toi_swap_mark_resume_attempted,
17455 +       .write_header_init      = toi_swap_write_header_init,
17456 +       .write_header_cleanup   = toi_swap_write_header_cleanup,
17457 +       .read_header_init       = toi_swap_read_header_init,
17458 +       .read_header_cleanup    = toi_swap_read_header_cleanup,
17459 +       .remove_image           = toi_swap_remove_image,
17460 +       .parse_sig_location     = toi_swap_parse_sig_location,
17461 +
17462 +       .sysfs_data             = sysfs_params,
17463 +       .num_sysfs_entries      = sizeof(sysfs_params) /
17464 +               sizeof(struct toi_sysfs_data),
17465 +};
17466 +
17467 +/* ---- Registration ---- */
17468 +static __init int toi_swap_load(void)
17469 +{
17470 +       toi_swapops.rw_init = toi_bio_ops.rw_init;
17471 +       toi_swapops.rw_cleanup = toi_bio_ops.rw_cleanup;
17472 +       toi_swapops.read_page = toi_bio_ops.read_page;
17473 +       toi_swapops.write_page = toi_bio_ops.write_page;
17474 +       toi_swapops.rw_header_chunk = toi_bio_ops.rw_header_chunk;
17475 +       toi_swapops.rw_header_chunk_noreadahead =
17476 +               toi_bio_ops.rw_header_chunk_noreadahead;
17477 +       toi_swapops.io_flusher = toi_bio_ops.io_flusher;
17478 +       toi_swapops.update_throughput_throttle = toi_bio_ops.update_throughput_throttle;
17479 +       toi_swapops.monitor_outstanding_io = toi_bio_ops.monitor_outstanding_io;
17480 +       toi_swapops.finish_all_io = toi_bio_ops.finish_all_io;
17481 +
17482 +       return toi_register_module(&toi_swapops);
17483 +}
17484 +
17485 +#ifdef MODULE
17486 +static __exit void toi_swap_unload(void)
17487 +{
17488 +       toi_unregister_module(&toi_swapops);
17489 +}
17490 +
17491 +module_init(toi_swap_load);
17492 +module_exit(toi_swap_unload);
17493 +MODULE_LICENSE("GPL");
17494 +MODULE_AUTHOR("Nigel Cunningham");
17495 +MODULE_DESCRIPTION("TuxOnIce SwapAllocator");
17496 +#else
17497 +late_initcall(toi_swap_load);
17498 +#endif
17499 diff --git a/kernel/power/tuxonice_sysfs.c b/kernel/power/tuxonice_sysfs.c
17500 new file mode 100644
17501 index 0000000..4f64dc7
17502 --- /dev/null
17503 +++ b/kernel/power/tuxonice_sysfs.c
17504 @@ -0,0 +1,324 @@
17505 +/*
17506 + * kernel/power/tuxonice_sysfs.c
17507 + *
17508 + * Copyright (C) 2002-2008 Nigel Cunningham (nigel at tuxonice net)
17509 + *
17510 + * This file is released under the GPLv2.
17511 + *
17512 + * This file contains support for sysfs entries for tuning TuxOnIce.
17513 + *
17514 + * We have a generic handler that deals with the most common cases, and
17515 + * hooks for special handlers to use.
17516 + */
17517 +
17518 +#include <linux/suspend.h>
17519 +#include <linux/module.h>
17520 +
17521 +#include "tuxonice_sysfs.h"
17522 +#include "tuxonice.h"
17523 +#include "tuxonice_storage.h"
17524 +#include "tuxonice_alloc.h"
17525 +
17526 +static int toi_sysfs_initialised;
17527 +
17528 +static void toi_initialise_sysfs(void);
17529 +
17530 +static struct toi_sysfs_data sysfs_params[];
17531 +
17532 +#define to_sysfs_data(_attr) container_of(_attr, struct toi_sysfs_data, attr)
17533 +
17534 +static void toi_main_wrapper(void)
17535 +{
17536 +       _toi_try_hibernate();
17537 +}
17538 +
17539 +static ssize_t toi_attr_show(struct kobject *kobj, struct attribute *attr,
17540 +                             char *page)
17541 +{
17542 +       struct toi_sysfs_data *sysfs_data = to_sysfs_data(attr);
17543 +       int len = 0;
17544 +       int full_prep = sysfs_data->flags & SYSFS_NEEDS_SM_FOR_READ;
17545 +
17546 +       if (full_prep && toi_start_anything(0))
17547 +               return -EBUSY;
17548 +
17549 +       if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_READ)
17550 +               toi_prepare_usm();
17551 +
17552 +       switch (sysfs_data->type) {
17553 +       case TOI_SYSFS_DATA_CUSTOM:
17554 +               len = (sysfs_data->data.special.read_sysfs) ?
17555 +                       (sysfs_data->data.special.read_sysfs)(page, PAGE_SIZE)
17556 +                       : 0;
17557 +               break;
17558 +       case TOI_SYSFS_DATA_BIT:
17559 +               len = sprintf(page, "%d\n",
17560 +                       -test_bit(sysfs_data->data.bit.bit,
17561 +                               sysfs_data->data.bit.bit_vector));
17562 +               break;
17563 +       case TOI_SYSFS_DATA_INTEGER:
17564 +               len = sprintf(page, "%d\n",
17565 +                       *(sysfs_data->data.integer.variable));
17566 +               break;
17567 +       case TOI_SYSFS_DATA_LONG:
17568 +               len = sprintf(page, "%ld\n",
17569 +                       *(sysfs_data->data.a_long.variable));
17570 +               break;
17571 +       case TOI_SYSFS_DATA_UL:
17572 +               len = sprintf(page, "%lu\n",
17573 +                       *(sysfs_data->data.ul.variable));
17574 +               break;
17575 +       case TOI_SYSFS_DATA_STRING:
17576 +               len = sprintf(page, "%s\n",
17577 +                       sysfs_data->data.string.variable);
17578 +               break;
17579 +       }
17580 +
17581 +       if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_READ)
17582 +               toi_cleanup_usm();
17583 +
17584 +       if (full_prep)
17585 +               toi_finish_anything(0);
17586 +
17587 +       return len;
17588 +}
17589 +
17590 +#define BOUND(_variable, _type) do { \
17591 +       if (*_variable < sysfs_data->data._type.minimum) \
17592 +               *_variable = sysfs_data->data._type.minimum; \
17593 +       else if (*_variable > sysfs_data->data._type.maximum) \
17594 +               *_variable = sysfs_data->data._type.maximum; \
17595 +} while (0)
17596 +
17597 +static ssize_t toi_attr_store(struct kobject *kobj, struct attribute *attr,
17598 +               const char *my_buf, size_t count)
17599 +{
17600 +       int assigned_temp_buffer = 0, result = count;
17601 +       struct toi_sysfs_data *sysfs_data = to_sysfs_data(attr);
17602 +
17603 +       if (toi_start_anything((sysfs_data->flags & SYSFS_HIBERNATE_OR_RESUME)))
17604 +               return -EBUSY;
17605 +
17606 +       ((char *) my_buf)[count] = 0;
17607 +
17608 +       if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_WRITE)
17609 +               toi_prepare_usm();
17610 +
17611 +       switch (sysfs_data->type) {
17612 +       case TOI_SYSFS_DATA_CUSTOM:
17613 +               if (sysfs_data->data.special.write_sysfs)
17614 +                       result = (sysfs_data->data.special.write_sysfs)(my_buf,
17615 +                                       count);
17616 +               break;
17617 +       case TOI_SYSFS_DATA_BIT:
17618 +               {
17619 +               int value = simple_strtoul(my_buf, NULL, 0);
17620 +               if (value)
17621 +                       set_bit(sysfs_data->data.bit.bit,
17622 +                               (sysfs_data->data.bit.bit_vector));
17623 +               else
17624 +                       clear_bit(sysfs_data->data.bit.bit,
17625 +                               (sysfs_data->data.bit.bit_vector));
17626 +               }
17627 +               break;
17628 +       case TOI_SYSFS_DATA_INTEGER:
17629 +               {
17630 +                       int *variable =
17631 +                               sysfs_data->data.integer.variable;
17632 +                       *variable = simple_strtol(my_buf, NULL, 0);
17633 +                       BOUND(variable, integer);
17634 +                       break;
17635 +               }
17636 +       case TOI_SYSFS_DATA_LONG:
17637 +               {
17638 +                       long *variable =
17639 +                               sysfs_data->data.a_long.variable;
17640 +                       *variable = simple_strtol(my_buf, NULL, 0);
17641 +                       BOUND(variable, a_long);
17642 +                       break;
17643 +               }
17644 +       case TOI_SYSFS_DATA_UL:
17645 +               {
17646 +                       unsigned long *variable =
17647 +                               sysfs_data->data.ul.variable;
17648 +                       *variable = simple_strtoul(my_buf, NULL, 0);
17649 +                       BOUND(variable, ul);
17650 +                       break;
17651 +               }
17652 +               break;
17653 +       case TOI_SYSFS_DATA_STRING:
17654 +               {
17655 +                       int copy_len = count;
17656 +                       char *variable =
17657 +                               sysfs_data->data.string.variable;
17658 +
17659 +                       if (sysfs_data->data.string.max_length &&
17660 +                           (copy_len > sysfs_data->data.string.max_length))
17661 +                               copy_len = sysfs_data->data.string.max_length;
17662 +
17663 +                       if (!variable) {
17664 +                               variable = (char *) toi_get_zeroed_page(31,
17665 +                                               TOI_ATOMIC_GFP);
17666 +                               sysfs_data->data.string.variable = variable;
17667 +                               assigned_temp_buffer = 1;
17668 +                       }
17669 +                       strncpy(variable, my_buf, copy_len);
17670 +                       if (copy_len && my_buf[copy_len - 1] == '\n')
17671 +                               variable[count - 1] = 0;
17672 +                       variable[count] = 0;
17673 +               }
17674 +               break;
17675 +       }
17676 +
17677 +       /* Side effect routine? */
17678 +       if (sysfs_data->write_side_effect)
17679 +               sysfs_data->write_side_effect();
17680 +
17681 +       /* Free temporary buffers */
17682 +       if (assigned_temp_buffer) {
17683 +               toi_free_page(31,
17684 +                       (unsigned long) sysfs_data->data.string.variable);
17685 +               sysfs_data->data.string.variable = NULL;
17686 +       }
17687 +
17688 +       if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_WRITE)
17689 +               toi_cleanup_usm();
17690 +
17691 +       toi_finish_anything(sysfs_data->flags & SYSFS_HIBERNATE_OR_RESUME);
17692 +
17693 +       return result;
17694 +}
17695 +
17696 +static struct sysfs_ops toi_sysfs_ops = {
17697 +       .show   = &toi_attr_show,
17698 +       .store  = &toi_attr_store,
17699 +};
17700 +
17701 +static struct kobj_type toi_ktype = {
17702 +       .sysfs_ops      = &toi_sysfs_ops,
17703 +};
17704 +
17705 +struct kobject *tuxonice_kobj;
17706 +
17707 +/* Non-module sysfs entries.
17708 + *
17709 + * This array contains entries that are automatically registered at
17710 + * boot. Modules and the console code register their own entries separately.
17711 + */
17712 +
17713 +static struct toi_sysfs_data sysfs_params[] = {
17714 +       SYSFS_CUSTOM("do_hibernate", SYSFS_WRITEONLY, NULL, NULL,
17715 +               SYSFS_HIBERNATING, toi_main_wrapper),
17716 +       SYSFS_CUSTOM("do_resume", SYSFS_WRITEONLY, NULL, NULL,
17717 +               SYSFS_RESUMING, __toi_try_resume)
17718 +};
17719 +
17720 +void remove_toi_sysdir(struct kobject *kobj)
17721 +{
17722 +       if (!kobj)
17723 +               return;
17724 +
17725 +       kobject_put(kobj);
17726 +}
17727 +
17728 +struct kobject *make_toi_sysdir(char *name)
17729 +{
17730 +       struct kobject *kobj = kobject_create_and_add(name, tuxonice_kobj);
17731 +
17732 +       if (!kobj) {
17733 +               printk(KERN_INFO "TuxOnIce: Can't allocate kobject for sysfs "
17734 +                               "dir!\n");
17735 +               return NULL;
17736 +       }
17737 +
17738 +       kobj->ktype = &toi_ktype;
17739 +
17740 +       return kobj;
17741 +}
17742 +
17743 +/* toi_register_sysfs_file
17744 + *
17745 + * Helper for registering a new /sysfs/tuxonice entry.
17746 + */
17747 +
17748 +int toi_register_sysfs_file(
17749 +               struct kobject *kobj,
17750 +               struct toi_sysfs_data *toi_sysfs_data)
17751 +{
17752 +       int result;
17753 +
17754 +       if (!toi_sysfs_initialised)
17755 +               toi_initialise_sysfs();
17756 +
17757 +       result = sysfs_create_file(kobj, &toi_sysfs_data->attr);
17758 +       if (result)
17759 +               printk(KERN_INFO "TuxOnIce: sysfs_create_file for %s "
17760 +                       "returned %d.\n",
17761 +                       toi_sysfs_data->attr.name, result);
17762 +       kobj->ktype = &toi_ktype;
17763 +
17764 +       return result;
17765 +}
17766 +EXPORT_SYMBOL_GPL(toi_register_sysfs_file);
17767 +
17768 +/* toi_unregister_sysfs_file
17769 + *
17770 + * Helper for removing unwanted /sys/power/tuxonice entries.
17771 + *
17772 + */
17773 +void toi_unregister_sysfs_file(struct kobject *kobj,
17774 +               struct toi_sysfs_data *toi_sysfs_data)
17775 +{
17776 +       sysfs_remove_file(kobj, &toi_sysfs_data->attr);
17777 +}
17778 +EXPORT_SYMBOL_GPL(toi_unregister_sysfs_file);
17779 +
17780 +void toi_cleanup_sysfs(void)
17781 +{
17782 +       int i,
17783 +           numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data);
17784 +
17785 +       if (!toi_sysfs_initialised)
17786 +               return;
17787 +
17788 +       for (i = 0; i < numfiles; i++)
17789 +               toi_unregister_sysfs_file(tuxonice_kobj, &sysfs_params[i]);
17790 +
17791 +       kobject_put(tuxonice_kobj);
17792 +       toi_sysfs_initialised = 0;
17793 +}
17794 +
17795 +/* toi_initialise_sysfs
17796 + *
17797 + * Initialise the /sysfs/tuxonice directory.
17798 + */
17799 +
17800 +static void toi_initialise_sysfs(void)
17801 +{
17802 +       int i;
17803 +       int numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data);
17804 +
17805 +       if (toi_sysfs_initialised)
17806 +               return;
17807 +
17808 +       /* Make our TuxOnIce directory a child of /sys/power */
17809 +       tuxonice_kobj = kobject_create_and_add("tuxonice", power_kobj);
17810 +       if (!tuxonice_kobj)
17811 +               return;
17812 +
17813 +       toi_sysfs_initialised = 1;
17814 +
17815 +       for (i = 0; i < numfiles; i++)
17816 +               toi_register_sysfs_file(tuxonice_kobj, &sysfs_params[i]);
17817 +}
17818 +
17819 +int toi_sysfs_init(void)
17820 +{
17821 +       toi_initialise_sysfs();
17822 +       return 0;
17823 +}
17824 +
17825 +void toi_sysfs_exit(void)
17826 +{
17827 +       toi_cleanup_sysfs();
17828 +}
17829 diff --git a/kernel/power/tuxonice_sysfs.h b/kernel/power/tuxonice_sysfs.h
17830 new file mode 100644
17831 index 0000000..2fea1cc
17832 --- /dev/null
17833 +++ b/kernel/power/tuxonice_sysfs.h
17834 @@ -0,0 +1,138 @@
17835 +/*
17836 + * kernel/power/tuxonice_sysfs.h
17837 + *
17838 + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net)
17839 + *
17840 + * This file is released under the GPLv2.
17841 + */
17842 +
17843 +#include <linux/sysfs.h>
17844 +#include "power.h"
17845 +
17846 +struct toi_sysfs_data {
17847 +       struct attribute attr;
17848 +       int type;
17849 +       int flags;
17850 +       union {
17851 +               struct {
17852 +                       unsigned long *bit_vector;
17853 +                       int bit;
17854 +               } bit;
17855 +               struct {
17856 +                       int *variable;
17857 +                       int minimum;
17858 +                       int maximum;
17859 +               } integer;
17860 +               struct {
17861 +                       long *variable;
17862 +                       long minimum;
17863 +                       long maximum;
17864 +               } a_long;
17865 +               struct {
17866 +                       unsigned long *variable;
17867 +                       unsigned long minimum;
17868 +                       unsigned long maximum;
17869 +               } ul;
17870 +               struct {
17871 +                       char *variable;
17872 +                       int max_length;
17873 +               } string;
17874 +               struct {
17875 +                       int (*read_sysfs) (const char *buffer, int count);
17876 +                       int (*write_sysfs) (const char *buffer, int count);
17877 +                       void *data;
17878 +               } special;
17879 +       } data;
17880 +
17881 +       /* Side effects routine. Used, eg, for reparsing the
17882 +        * resume= entry when it changes */
17883 +       void (*write_side_effect) (void);
17884 +       struct list_head sysfs_data_list;
17885 +};
17886 +
17887 +enum {
17888 +       TOI_SYSFS_DATA_NONE = 1,
17889 +       TOI_SYSFS_DATA_CUSTOM,
17890 +       TOI_SYSFS_DATA_BIT,
17891 +       TOI_SYSFS_DATA_INTEGER,
17892 +       TOI_SYSFS_DATA_UL,
17893 +       TOI_SYSFS_DATA_LONG,
17894 +       TOI_SYSFS_DATA_STRING
17895 +};
17896 +
17897 +#define SYSFS_WRITEONLY 0200
17898 +#define SYSFS_READONLY 0444
17899 +#define SYSFS_RW 0644
17900 +
17901 +#define SYSFS_BIT(_name, _mode, _ul, _bit, _flags) { \
17902 +       .attr = {.name  = _name , .mode   = _mode }, \
17903 +       .type = TOI_SYSFS_DATA_BIT, \
17904 +       .flags = _flags, \
17905 +       .data = { .bit = { .bit_vector = _ul, .bit = _bit } } }
17906 +
17907 +#define SYSFS_INT(_name, _mode, _int, _min, _max, _flags, _wse) { \
17908 +       .attr = {.name  = _name , .mode   = _mode }, \
17909 +       .type = TOI_SYSFS_DATA_INTEGER, \
17910 +       .flags = _flags, \
17911 +       .data = { .integer = { .variable = _int, .minimum = _min, \
17912 +                       .maximum = _max } }, \
17913 +       .write_side_effect = _wse }
17914 +
17915 +#define SYSFS_UL(_name, _mode, _ul, _min, _max, _flags) { \
17916 +       .attr = {.name  = _name , .mode   = _mode }, \
17917 +       .type = TOI_SYSFS_DATA_UL, \
17918 +       .flags = _flags, \
17919 +       .data = { .ul = { .variable = _ul, .minimum = _min, \
17920 +                       .maximum = _max } } }
17921 +
17922 +#define SYSFS_LONG(_name, _mode, _long, _min, _max, _flags) { \
17923 +       .attr = {.name  = _name , .mode   = _mode }, \
17924 +       .type = TOI_SYSFS_DATA_LONG, \
17925 +       .flags = _flags, \
17926 +       .data = { .a_long = { .variable = _long, .minimum = _min, \
17927 +                       .maximum = _max } } }
17928 +
17929 +#define SYSFS_STRING(_name, _mode, _string, _max_len, _flags, _wse) { \
17930 +       .attr = {.name  = _name , .mode   = _mode }, \
17931 +       .type = TOI_SYSFS_DATA_STRING, \
17932 +       .flags = _flags, \
17933 +       .data = { .string = { .variable = _string, .max_length = _max_len } }, \
17934 +       .write_side_effect = _wse }
17935 +
17936 +#define SYSFS_CUSTOM(_name, _mode, _read, _write, _flags, _wse) { \
17937 +       .attr = {.name  = _name , .mode   = _mode }, \
17938 +       .type = TOI_SYSFS_DATA_CUSTOM, \
17939 +       .flags = _flags, \
17940 +       .data = { .special = { .read_sysfs = _read, .write_sysfs = _write } }, \
17941 +       .write_side_effect = _wse }
17942 +
17943 +#define SYSFS_NONE(_name, _wse) { \
17944 +       .attr = {.name  = _name , .mode   = SYSFS_WRITEONLY }, \
17945 +       .type = TOI_SYSFS_DATA_NONE, \
17946 +       .write_side_effect = _wse, \
17947 +}
17948 +
17949 +/* Flags */
17950 +#define SYSFS_NEEDS_SM_FOR_READ 1
17951 +#define SYSFS_NEEDS_SM_FOR_WRITE 2
17952 +#define SYSFS_HIBERNATE 4
17953 +#define SYSFS_RESUME 8
17954 +#define SYSFS_HIBERNATE_OR_RESUME (SYSFS_HIBERNATE | SYSFS_RESUME)
17955 +#define SYSFS_HIBERNATING (SYSFS_HIBERNATE | SYSFS_NEEDS_SM_FOR_WRITE)
17956 +#define SYSFS_RESUMING (SYSFS_RESUME | SYSFS_NEEDS_SM_FOR_WRITE)
17957 +#define SYSFS_NEEDS_SM_FOR_BOTH \
17958 + (SYSFS_NEEDS_SM_FOR_READ | SYSFS_NEEDS_SM_FOR_WRITE)
17959 +
17960 +int toi_register_sysfs_file(struct kobject *kobj,
17961 +               struct toi_sysfs_data *toi_sysfs_data);
17962 +void toi_unregister_sysfs_file(struct kobject *kobj,
17963 +               struct toi_sysfs_data *toi_sysfs_data);
17964 +
17965 +extern struct kobject *tuxonice_kobj;
17966 +
17967 +struct kobject *make_toi_sysdir(char *name);
17968 +void remove_toi_sysdir(struct kobject *obj);
17969 +extern void toi_cleanup_sysfs(void);
17970 +
17971 +extern int toi_sysfs_init(void);
17972 +extern void toi_sysfs_exit(void);
17973 diff --git a/kernel/power/tuxonice_ui.c b/kernel/power/tuxonice_ui.c
17974 new file mode 100644
17975 index 0000000..4da4afd
17976 --- /dev/null
17977 +++ b/kernel/power/tuxonice_ui.c
17978 @@ -0,0 +1,250 @@
17979 +/*
17980 + * kernel/power/tuxonice_ui.c
17981 + *
17982 + * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
17983 + * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz>
17984 + * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr>
17985 + * Copyright (C) 2002-2008 Nigel Cunningham (nigel at tuxonice net)
17986 + *
17987 + * This file is released under the GPLv2.
17988 + *
17989 + * Routines for TuxOnIce's user interface.
17990 + *
17991 + * The user interface code talks to a userspace program via a
17992 + * netlink socket.
17993 + *
17994 + * The kernel side:
17995 + * - starts the userui program;
17996 + * - sends text messages and progress bar status;
17997 + *
17998 + * The user space side:
17999 + * - passes messages regarding user requests (abort, toggle reboot etc)
18000 + *
18001 + */
18002 +
18003 +#define __KERNEL_SYSCALLS__
18004 +
18005 +#include <linux/reboot.h>
18006 +
18007 +#include "tuxonice_sysfs.h"
18008 +#include "tuxonice_modules.h"
18009 +#include "tuxonice.h"
18010 +#include "tuxonice_ui.h"
18011 +#include "tuxonice_netlink.h"
18012 +#include "tuxonice_power_off.h"
18013 +#include "tuxonice_builtin.h"
18014 +
18015 +static char local_printf_buf[1024];    /* Same as printk - should be safe */
18016 +struct ui_ops *toi_current_ui;
18017 +EXPORT_SYMBOL_GPL(toi_current_ui);
18018 +
18019 +/**
18020 + * toi_wait_for_keypress - Wait for keypress via userui or /dev/console.
18021 + *
18022 + * @timeout: Maximum time to wait.
18023 + *
18024 + * Wait for a keypress, either from userui or /dev/console if userui isn't
18025 + * available. The non-userui path is particularly for at boot-time, prior
18026 + * to userui being started, when we have an important warning to give to
18027 + * the user.
18028 + */
18029 +static char toi_wait_for_keypress(int timeout)
18030 +{
18031 +       if (toi_current_ui && toi_current_ui->wait_for_key(timeout))
18032 +               return ' ';
18033 +
18034 +       return toi_wait_for_keypress_dev_console(timeout);
18035 +}
18036 +
18037 +/* toi_early_boot_message()
18038 + * Description:        Handle errors early in the process of booting.
18039 + *             The user may press C to continue booting, perhaps
18040 + *             invalidating the image,  or space to reboot.
18041 + *             This works from either the serial console or normally
18042 + *             attached keyboard.
18043 + *
18044 + *             Note that we come in here from init, while the kernel is
18045 + *             locked. If we want to get events from the serial console,
18046 + *             we need to temporarily unlock the kernel.
18047 + *
18048 + *             toi_early_boot_message may also be called post-boot.
18049 + *             In this case, it simply printks the message and returns.
18050 + *
18051 + * Arguments:  int     Whether we are able to erase the image.
18052 + *             int     default_answer. What to do when we timeout. This
18053 + *                     will normally be continue, but the user might
18054 + *                     provide command line options (__setup) to override
18055 + *                     particular cases.
18056 + *             Char *. Pointer to a string explaining why we're moaning.
18057 + */
18058 +
18059 +#define say(message, a...) printk(KERN_EMERG message, ##a)
18060 +
18061 +void toi_early_boot_message(int message_detail, int default_answer,
18062 +       char *warning_reason, ...)
18063 +{
18064 +#if defined(CONFIG_VT) || defined(CONFIG_SERIAL_CONSOLE)
18065 +       unsigned long orig_state = get_toi_state(), continue_req = 0;
18066 +       unsigned long orig_loglevel = console_loglevel;
18067 +       int can_ask = 1;
18068 +#else
18069 +       int can_ask = 0;
18070 +#endif
18071 +
18072 +       va_list args;
18073 +       int printed_len;
18074 +
18075 +       if (!toi_wait) {
18076 +               set_toi_state(TOI_CONTINUE_REQ);
18077 +               can_ask = 0;
18078 +       }
18079 +
18080 +       if (warning_reason) {
18081 +               va_start(args, warning_reason);
18082 +               printed_len = vsnprintf(local_printf_buf,
18083 +                               sizeof(local_printf_buf),
18084 +                               warning_reason,
18085 +                               args);
18086 +               va_end(args);
18087 +       }
18088 +
18089 +       if (!test_toi_state(TOI_BOOT_TIME)) {
18090 +               printk("TuxOnIce: %s\n", local_printf_buf);
18091 +               return;
18092 +       }
18093 +
18094 +       if (!can_ask) {
18095 +               continue_req = !!default_answer;
18096 +               goto post_ask;
18097 +       }
18098 +
18099 +#if defined(CONFIG_VT) || defined(CONFIG_SERIAL_CONSOLE)
18100 +       console_loglevel = 7;
18101 +
18102 +       say("=== TuxOnIce ===\n\n");
18103 +       if (warning_reason) {
18104 +               say("BIG FAT WARNING!! %s\n\n", local_printf_buf);
18105 +               switch (message_detail) {
18106 +               case 0:
18107 +                       say("If you continue booting, note that any image WILL"
18108 +                               "NOT BE REMOVED.\nTuxOnIce is unable to do so "
18109 +                               "because the appropriate modules aren't\n"
18110 +                               "loaded. You should manually remove the image "
18111 +                               "to avoid any\npossibility of corrupting your "
18112 +                               "filesystem(s) later.\n");
18113 +                       break;
18114 +               case 1:
18115 +                       say("If you want to use the current TuxOnIce image, "
18116 +                               "reboot and try\nagain with the same kernel "
18117 +                               "that you hibernated from. If you want\n"
18118 +                               "to forget that image, continue and the image "
18119 +                               "will be erased.\n");
18120 +                       break;
18121 +               }
18122 +               say("Press SPACE to reboot or C to continue booting with "
18123 +                       "this kernel\n\n");
18124 +               if (toi_wait > 0)
18125 +                       say("Default action if you don't select one in %d "
18126 +                               "seconds is: %s.\n",
18127 +                               toi_wait,
18128 +                               default_answer == TOI_CONTINUE_REQ ?
18129 +                               "continue booting" : "reboot");
18130 +       } else {
18131 +               say("BIG FAT WARNING!!\n\n"
18132 +                       "You have tried to resume from this image before.\n"
18133 +                       "If it failed once, it may well fail again.\n"
18134 +                       "Would you like to remove the image and boot "
18135 +                       "normally?\nThis will be equivalent to entering "
18136 +                       "noresume on the\nkernel command line.\n\n"
18137 +                       "Press SPACE to remove the image or C to continue "
18138 +                       "resuming.\n\n");
18139 +               if (toi_wait > 0)
18140 +                       say("Default action if you don't select one in %d "
18141 +                               "seconds is: %s.\n", toi_wait,
18142 +                               !!default_answer ?
18143 +                               "continue resuming" : "remove the image");
18144 +       }
18145 +       console_loglevel = orig_loglevel;
18146 +
18147 +       set_toi_state(TOI_SANITY_CHECK_PROMPT);
18148 +       clear_toi_state(TOI_CONTINUE_REQ);
18149 +
18150 +       if (toi_wait_for_keypress(toi_wait) == 0) /* We timed out */
18151 +               continue_req = !!default_answer;
18152 +       else
18153 +               continue_req = test_toi_state(TOI_CONTINUE_REQ);
18154 +
18155 +#endif /* CONFIG_VT or CONFIG_SERIAL_CONSOLE */
18156 +
18157 +post_ask:
18158 +       if ((warning_reason) && (!continue_req))
18159 +               machine_restart(NULL);
18160 +
18161 +       restore_toi_state(orig_state);
18162 +       if (continue_req)
18163 +               set_toi_state(TOI_CONTINUE_REQ);
18164 +}
18165 +EXPORT_SYMBOL_GPL(toi_early_boot_message);
18166 +#undef say
18167 +
18168 +/*
18169 + * User interface specific /sys/power/tuxonice entries.
18170 + */
18171 +
18172 +static struct toi_sysfs_data sysfs_params[] = {
18173 +#if defined(CONFIG_NET) && defined(CONFIG_SYSFS)
18174 +       SYSFS_INT("default_console_level", SYSFS_RW,
18175 +                       &toi_bkd.toi_default_console_level, 0, 7, 0, NULL),
18176 +       SYSFS_UL("debug_sections", SYSFS_RW, &toi_bkd.toi_debug_state, 0,
18177 +                       1 << 30, 0),
18178 +       SYSFS_BIT("log_everything", SYSFS_RW, &toi_bkd.toi_action, TOI_LOGALL,
18179 +                       0)
18180 +#endif
18181 +};
18182 +
18183 +static struct toi_module_ops userui_ops = {
18184 +       .type                           = MISC_HIDDEN_MODULE,
18185 +       .name                           = "printk ui",
18186 +       .directory                      = "user_interface",
18187 +       .module                         = THIS_MODULE,
18188 +       .sysfs_data                     = sysfs_params,
18189 +       .num_sysfs_entries              = sizeof(sysfs_params) /
18190 +               sizeof(struct toi_sysfs_data),
18191 +};
18192 +
18193 +int toi_register_ui_ops(struct ui_ops *this_ui)
18194 +{
18195 +       if (toi_current_ui) {
18196 +               printk(KERN_INFO "Only one TuxOnIce user interface module can "
18197 +                               "be loaded at a time.");
18198 +               return -EBUSY;
18199 +       }
18200 +
18201 +       toi_current_ui = this_ui;
18202 +
18203 +       return 0;
18204 +}
18205 +EXPORT_SYMBOL_GPL(toi_register_ui_ops);
18206 +
18207 +void toi_remove_ui_ops(struct ui_ops *this_ui)
18208 +{
18209 +       if (toi_current_ui != this_ui)
18210 +               return;
18211 +
18212 +       toi_current_ui = NULL;
18213 +}
18214 +EXPORT_SYMBOL_GPL(toi_remove_ui_ops);
18215 +
18216 +/* toi_console_sysfs_init
18217 + * Description: Boot time initialisation for user interface.
18218 + */
18219 +
18220 +int toi_ui_init(void)
18221 +{
18222 +       return toi_register_module(&userui_ops);
18223 +}
18224 +
18225 +void toi_ui_exit(void)
18226 +{
18227 +       toi_unregister_module(&userui_ops);
18228 +}
18229 diff --git a/kernel/power/tuxonice_ui.h b/kernel/power/tuxonice_ui.h
18230 new file mode 100644
18231 index 0000000..884d0c5
18232 --- /dev/null
18233 +++ b/kernel/power/tuxonice_ui.h
18234 @@ -0,0 +1,105 @@
18235 +/*
18236 + * kernel/power/tuxonice_ui.h
18237 + *
18238 + * Copyright (C) 2004-2008 Nigel Cunningham (nigel at tuxonice net)
18239 + */
18240 +
18241 +enum {
18242 +       DONT_CLEAR_BAR,
18243 +       CLEAR_BAR
18244 +};
18245 +
18246 +enum {
18247 +       /* Userspace -> Kernel */
18248 +       USERUI_MSG_ABORT = 0x11,
18249 +       USERUI_MSG_SET_STATE = 0x12,
18250 +       USERUI_MSG_GET_STATE = 0x13,
18251 +       USERUI_MSG_GET_DEBUG_STATE = 0x14,
18252 +       USERUI_MSG_SET_DEBUG_STATE = 0x15,
18253 +       USERUI_MSG_SPACE = 0x18,
18254 +       USERUI_MSG_GET_POWERDOWN_METHOD = 0x1A,
18255 +       USERUI_MSG_SET_POWERDOWN_METHOD = 0x1B,
18256 +       USERUI_MSG_GET_LOGLEVEL = 0x1C,
18257 +       USERUI_MSG_SET_LOGLEVEL = 0x1D,
18258 +       USERUI_MSG_PRINTK = 0x1E,
18259 +
18260 +       /* Kernel -> Userspace */
18261 +       USERUI_MSG_MESSAGE = 0x21,
18262 +       USERUI_MSG_PROGRESS = 0x22,
18263 +       USERUI_MSG_POST_ATOMIC_RESTORE = 0x25,
18264 +
18265 +       USERUI_MSG_MAX,
18266 +};
18267 +
18268 +struct userui_msg_params {
18269 +       u32 a, b, c, d;
18270 +       char text[255];
18271 +};
18272 +
18273 +struct ui_ops {
18274 +       char (*wait_for_key) (int timeout);
18275 +       u32 (*update_status) (u32 value, u32 maximum, const char *fmt, ...);
18276 +       void (*prepare_status) (int clearbar, const char *fmt, ...);
18277 +       void (*cond_pause) (int pause, char *message);
18278 +       void (*abort)(int result_code, const char *fmt, ...);
18279 +       void (*prepare)(void);
18280 +       void (*cleanup)(void);
18281 +       void (*post_atomic_restore)(void);
18282 +       void (*message)(u32 section, u32 level, u32 normally_logged,
18283 +                       const char *fmt, ...);
18284 +};
18285 +
18286 +extern struct ui_ops *toi_current_ui;
18287 +
18288 +#define toi_update_status(val, max, fmt, args...) \
18289 + (toi_current_ui ? (toi_current_ui->update_status) (val, max, fmt, ##args) : \
18290 +       max)
18291 +
18292 +#define toi_ui_post_atomic_restore(void) \
18293 +       do { if (toi_current_ui) \
18294 +               (toi_current_ui->post_atomic_restore)(); \
18295 +       } while (0)
18296 +
18297 +#define toi_prepare_console(void) \
18298 +       do { pm_prepare_console(); \
18299 +            if (toi_current_ui) \
18300 +               (toi_current_ui->prepare)(); \
18301 +       } while (0)
18302 +
18303 +#define toi_cleanup_console(void) \
18304 +       do { pm_restore_console(); \
18305 +            if (toi_current_ui) \
18306 +               (toi_current_ui->cleanup)(); \
18307 +       } while (0)
18308 +
18309 +#define abort_hibernate(result, fmt, args...) \
18310 +       do { if (toi_current_ui) \
18311 +               (toi_current_ui->abort)(result, fmt, ##args); \
18312 +            else { \
18313 +               set_abort_result(result); \
18314 +            } \
18315 +       } while (0)
18316 +
18317 +#define toi_cond_pause(pause, message) \
18318 +       do { if (toi_current_ui) \
18319 +               (toi_current_ui->cond_pause)(pause, message); \
18320 +       } while (0)
18321 +
18322 +#define toi_prepare_status(clear, fmt, args...) \
18323 +       do { if (toi_current_ui) \
18324 +               (toi_current_ui->prepare_status)(clear, fmt, ##args); \
18325 +            else \
18326 +               printk(KERN_ERR fmt "%s", ##args, "\n"); \
18327 +       } while (0)
18328 +
18329 +#define toi_message(sn, lev, log, fmt, a...) \
18330 +do { \
18331 +       if (toi_current_ui && (!sn || test_debug_state(sn))) \
18332 +               toi_current_ui->message(sn, lev, log, fmt, ##a); \
18333 +} while (0)
18334 +
18335 +__exit void toi_ui_cleanup(void);
18336 +extern int toi_ui_init(void);
18337 +extern void toi_ui_exit(void);
18338 +extern int toi_register_ui_ops(struct ui_ops *this_ui);
18339 +extern void toi_remove_ui_ops(struct ui_ops *this_ui);
18340 diff --git a/kernel/power/tuxonice_userui.c b/kernel/power/tuxonice_userui.c
18341 new file mode 100644
18342 index 0000000..7cc5666
18343 --- /dev/null
18344 +++ b/kernel/power/tuxonice_userui.c
18345 @@ -0,0 +1,663 @@
18346 +/*
18347 + * kernel/power/user_ui.c
18348 + *
18349 + * Copyright (C) 2005-2007 Bernard Blackham
18350 + * Copyright (C) 2002-2008 Nigel Cunningham (nigel at tuxonice net)
18351 + *
18352 + * This file is released under the GPLv2.
18353 + *
18354 + * Routines for TuxOnIce's user interface.
18355 + *
18356 + * The user interface code talks to a userspace program via a
18357 + * netlink socket.
18358 + *
18359 + * The kernel side:
18360 + * - starts the userui program;
18361 + * - sends text messages and progress bar status;
18362 + *
18363 + * The user space side:
18364 + * - passes messages regarding user requests (abort, toggle reboot etc)
18365 + *
18366 + */
18367 +
18368 +#define __KERNEL_SYSCALLS__
18369 +
18370 +#include <linux/suspend.h>
18371 +#include <linux/freezer.h>
18372 +#include <linux/console.h>
18373 +#include <linux/ctype.h>
18374 +#include <linux/tty.h>
18375 +#include <linux/vt_kern.h>
18376 +#include <linux/module.h>
18377 +#include <linux/reboot.h>
18378 +#include <linux/kmod.h>
18379 +#include <linux/security.h>
18380 +#include <linux/syscalls.h>
18381 +
18382 +#include "tuxonice_sysfs.h"
18383 +#include "tuxonice_modules.h"
18384 +#include "tuxonice.h"
18385 +#include "tuxonice_ui.h"
18386 +#include "tuxonice_netlink.h"
18387 +#include "tuxonice_power_off.h"
18388 +
18389 +static char local_printf_buf[1024];    /* Same as printk - should be safe */
18390 +
18391 +static struct user_helper_data ui_helper_data;
18392 +static struct toi_module_ops userui_ops;
18393 +static int orig_kmsg;
18394 +
18395 +static char lastheader[512];
18396 +static int lastheader_message_len;
18397 +static int ui_helper_changed; /* Used at resume-time so don't overwrite value
18398 +                               set from initrd/ramfs. */
18399 +
18400 +/* Number of distinct progress amounts that userspace can display */
18401 +static int progress_granularity = 30;
18402 +
18403 +static DECLARE_WAIT_QUEUE_HEAD(userui_wait_for_key);
18404 +
18405 +/**
18406 + * ui_nl_set_state - Update toi_action based on a message from userui.
18407 + *
18408 + * @n: The bit (1 << bit) to set.
18409 + */
18410 +static void ui_nl_set_state(int n)
18411 +{
18412 +       /* Only let them change certain settings */
18413 +       static const u32 toi_action_mask =
18414 +               (1 << TOI_REBOOT) | (1 << TOI_PAUSE) |
18415 +               (1 << TOI_LOGALL) |
18416 +               (1 << TOI_SINGLESTEP) |
18417 +               (1 << TOI_PAUSE_NEAR_PAGESET_END);
18418 +
18419 +       toi_bkd.toi_action = (toi_bkd.toi_action & (~toi_action_mask)) |
18420 +               (n & toi_action_mask);
18421 +
18422 +       if (!test_action_state(TOI_PAUSE) &&
18423 +                       !test_action_state(TOI_SINGLESTEP))
18424 +               wake_up_interruptible(&userui_wait_for_key);
18425 +}
18426 +
18427 +/**
18428 + * userui_post_atomic_restore - Tell userui that atomic restore just happened.
18429 + *
18430 + * Tell userui that atomic restore just occured, so that it can do things like
18431 + * redrawing the screen, re-getting settings and so on.
18432 + */
18433 +static void userui_post_atomic_restore(void)
18434 +{
18435 +       toi_send_netlink_message(&ui_helper_data,
18436 +                       USERUI_MSG_POST_ATOMIC_RESTORE, NULL, 0);
18437 +}
18438 +
18439 +/**
18440 + * userui_storage_needed - Report how much memory in image header is needed.
18441 + */
18442 +static int userui_storage_needed(void)
18443 +{
18444 +       return sizeof(ui_helper_data.program) + 1 + sizeof(int);
18445 +}
18446 +
18447 +/**
18448 + * userui_save_config_info - Fill buffer with config info for image header.
18449 + *
18450 + * @buf: Buffer into which to put the config info we want to save.
18451 + */
18452 +static int userui_save_config_info(char *buf)
18453 +{
18454 +       *((int *) buf) = progress_granularity;
18455 +       memcpy(buf + sizeof(int), ui_helper_data.program,
18456 +                       sizeof(ui_helper_data.program));
18457 +       return sizeof(ui_helper_data.program) + sizeof(int) + 1;
18458 +}
18459 +
18460 +/**
18461 + * userui_load_config_info - Restore config info from buffer.
18462 + *
18463 + * @buf: Buffer containing header info loaded.
18464 + * @size: Size of data loaded for this module.
18465 + */
18466 +static void userui_load_config_info(char *buf, int size)
18467 +{
18468 +       progress_granularity = *((int *) buf);
18469 +       size -= sizeof(int);
18470 +
18471 +       /* Don't load the saved path if one has already been set */
18472 +       if (ui_helper_changed)
18473 +               return;
18474 +
18475 +       if (size > sizeof(ui_helper_data.program))
18476 +               size = sizeof(ui_helper_data.program);
18477 +
18478 +       memcpy(ui_helper_data.program, buf + sizeof(int), size);
18479 +       ui_helper_data.program[sizeof(ui_helper_data.program)-1] = '\0';
18480 +}
18481 +
18482 +/**
18483 + * set_ui_program_set: Record that userui program was changed.
18484 + *
18485 + * Side effect routine for when the userui program is set. In an initrd or
18486 + * ramfs, the user may set a location for the userui program. If this happens,
18487 + * we don't want to reload the value that was saved in the image header. This
18488 + * routine allows us to flag that we shouldn't restore the program name from
18489 + * the image header.
18490 + */
18491 +static void set_ui_program_set(void)
18492 +{
18493 +       ui_helper_changed = 1;
18494 +}
18495 +
18496 +/**
18497 + * userui_memory_needed - Tell core how much memory to reserve for us.
18498 + */
18499 +static int userui_memory_needed(void)
18500 +{
18501 +       /* ball park figure of 128 pages */
18502 +       return 128 * PAGE_SIZE;
18503 +}
18504 +
18505 +/**
18506 + * userui_update_status - Update the progress bar and (if on) in-bar message.
18507 + *
18508 + * @value: Current progress percentage numerator.
18509 + * @maximum: Current progress percentage denominator.
18510 + * @fmt: Message to be displayed in the middle of the progress bar.
18511 + *
18512 + * Note that a NULL message does not mean that any previous message is erased!
18513 + * For that, you need toi_prepare_status with clearbar on.
18514 + *
18515 + * Returns an unsigned long, being the next numerator (as determined by the
18516 + * maximum and progress granularity) where status needs to be updated.
18517 + * This is to reduce unnecessary calls to update_status.
18518 + */
18519 +static u32 userui_update_status(u32 value, u32 maximum, const char *fmt, ...)
18520 +{
18521 +       static u32 last_step = 9999;
18522 +       struct userui_msg_params msg;
18523 +       u32 this_step, next_update;
18524 +       int bitshift;
18525 +
18526 +       if (ui_helper_data.pid == -1)
18527 +               return 0;
18528 +
18529 +       if ((!maximum) || (!progress_granularity))
18530 +               return maximum;
18531 +
18532 +       if (value < 0)
18533 +               value = 0;
18534 +
18535 +       if (value > maximum)
18536 +               value = maximum;
18537 +
18538 +       /* Try to avoid math problems - we can't do 64 bit math here
18539 +        * (and shouldn't need it - anyone got screen resolution
18540 +        * of 65536 pixels or more?) */
18541 +       bitshift = fls(maximum) - 16;
18542 +       if (bitshift > 0) {
18543 +               u32 temp_maximum = maximum >> bitshift;
18544 +               u32 temp_value = value >> bitshift;
18545 +               this_step = (u32)
18546 +                       (temp_value * progress_granularity / temp_maximum);
18547 +               next_update = (((this_step + 1) * temp_maximum /
18548 +                                       progress_granularity) + 1) << bitshift;
18549 +       } else {
18550 +               this_step = (u32) (value * progress_granularity / maximum);
18551 +               next_update = ((this_step + 1) * maximum /
18552 +                               progress_granularity) + 1;
18553 +       }
18554 +
18555 +       if (this_step == last_step)
18556 +               return next_update;
18557 +
18558 +       memset(&msg, 0, sizeof(msg));
18559 +
18560 +       msg.a = this_step;
18561 +       msg.b = progress_granularity;
18562 +
18563 +       if (fmt) {
18564 +               va_list args;
18565 +               va_start(args, fmt);
18566 +               vsnprintf(msg.text, sizeof(msg.text), fmt, args);
18567 +               va_end(args);
18568 +               msg.text[sizeof(msg.text)-1] = '\0';
18569 +       }
18570 +
18571 +       toi_send_netlink_message(&ui_helper_data, USERUI_MSG_PROGRESS,
18572 +                       &msg, sizeof(msg));
18573 +       last_step = this_step;
18574 +
18575 +       return next_update;
18576 +}
18577 +
18578 +/**
18579 + * userui_message - Display a message without necessarily logging it.
18580 + *
18581 + * @section: Type of message. Messages can be filtered by type.
18582 + * @level: Degree of importance of the message. Lower values = higher priority.
18583 + * @normally_logged: Whether logged even if log_everything is off.
18584 + * @fmt: Message (and parameters).
18585 + *
18586 + * This function is intended to do the same job as printk, but without normally
18587 + * logging what is printed. The point is to be able to get debugging info on
18588 + * screen without filling the logs with "1/534. ^M 2/534^M. 3/534^M"
18589 + *
18590 + * It may be called from an interrupt context - can't sleep!
18591 + */
18592 +static void userui_message(u32 section, u32 level, u32 normally_logged,
18593 +               const char *fmt, ...)
18594 +{
18595 +       struct userui_msg_params msg;
18596 +
18597 +       if ((level) && (level > console_loglevel))
18598 +               return;
18599 +
18600 +       memset(&msg, 0, sizeof(msg));
18601 +
18602 +       msg.a = section;
18603 +       msg.b = level;
18604 +       msg.c = normally_logged;
18605 +
18606 +       if (fmt) {
18607 +               va_list args;
18608 +               va_start(args, fmt);
18609 +               vsnprintf(msg.text, sizeof(msg.text), fmt, args);
18610 +               va_end(args);
18611 +               msg.text[sizeof(msg.text)-1] = '\0';
18612 +       }
18613 +
18614 +       if (test_action_state(TOI_LOGALL))
18615 +               printk(KERN_INFO "%s\n", msg.text);
18616 +
18617 +       toi_send_netlink_message(&ui_helper_data, USERUI_MSG_MESSAGE,
18618 +                       &msg, sizeof(msg));
18619 +}
18620 +
18621 +/**
18622 + * wait_for_key_via_userui - Wait for userui to receive a keypress.
18623 + */
18624 +static void wait_for_key_via_userui(void)
18625 +{
18626 +       DECLARE_WAITQUEUE(wait, current);
18627 +
18628 +       add_wait_queue(&userui_wait_for_key, &wait);
18629 +       set_current_state(TASK_INTERRUPTIBLE);
18630 +
18631 +       interruptible_sleep_on(&userui_wait_for_key);
18632 +
18633 +       set_current_state(TASK_RUNNING);
18634 +       remove_wait_queue(&userui_wait_for_key, &wait);
18635 +}
18636 +
18637 +/**
18638 + * userui_prepare_status - Display high level messages.
18639 + *
18640 + * @clearbar: Whether to clear the progress bar.
18641 + * @fmt...: New message for the title.
18642 + *
18643 + * Prepare the 'nice display', drawing the header and version, along with the
18644 + * current action and perhaps also resetting the progress bar.
18645 + */
18646 +static void userui_prepare_status(int clearbar, const char *fmt, ...)
18647 +{
18648 +       va_list args;
18649 +
18650 +       if (fmt) {
18651 +               va_start(args, fmt);
18652 +               lastheader_message_len = vsnprintf(lastheader, 512, fmt, args);
18653 +               va_end(args);
18654 +       }
18655 +
18656 +       if (clearbar)
18657 +               toi_update_status(0, 1, NULL);
18658 +
18659 +       if (ui_helper_data.pid == -1)
18660 +               printk(KERN_EMERG "%s\n", lastheader);
18661 +       else
18662 +               toi_message(0, TOI_STATUS, 1, lastheader, NULL);
18663 +}
18664 +
18665 +/**
18666 + * toi_wait_for_keypress - Wait for keypress via userui.
18667 + *
18668 + * @timeout: Maximum time to wait.
18669 + *
18670 + * Wait for a keypress from userui.
18671 + *
18672 + * FIXME: Implement timeout?
18673 + */
18674 +static char userui_wait_for_keypress(int timeout)
18675 +{
18676 +       char key = '\0';
18677 +
18678 +       if (ui_helper_data.pid != -1) {
18679 +               wait_for_key_via_userui();
18680 +               key = ' ';
18681 +       }
18682 +
18683 +       return key;
18684 +}
18685 +
18686 +/**
18687 + * userui_abort_hibernate - Abort a cycle & tell user if they didn't request it.
18688 + *
18689 + * @result_code: Reason why we're aborting (1 << bit).
18690 + * @fmt: Message to display if telling the user what's going on.
18691 + *
18692 + * Abort a cycle. If this wasn't at the user's request (and we're displaying
18693 + * output), tell the user why and wait for them to acknowledge the message.
18694 + */
18695 +static void userui_abort_hibernate(int result_code, const char *fmt, ...)
18696 +{
18697 +       va_list args;
18698 +       int printed_len = 0;
18699 +
18700 +       set_result_state(result_code);
18701 +
18702 +       if (test_result_state(TOI_ABORTED))
18703 +               return;
18704 +
18705 +       set_result_state(TOI_ABORTED);
18706 +
18707 +       if (test_result_state(TOI_ABORT_REQUESTED))
18708 +               return;
18709 +
18710 +       va_start(args, fmt);
18711 +       printed_len = vsnprintf(local_printf_buf,  sizeof(local_printf_buf),
18712 +                       fmt, args);
18713 +       va_end(args);
18714 +       if (ui_helper_data.pid != -1)
18715 +               printed_len = sprintf(local_printf_buf + printed_len,
18716 +                                       " (Press SPACE to continue)");
18717 +
18718 +       toi_prepare_status(CLEAR_BAR, "%s", local_printf_buf);
18719 +
18720 +       if (ui_helper_data.pid != -1)
18721 +               userui_wait_for_keypress(0);
18722 +}
18723 +
18724 +/**
18725 + * request_abort_hibernate - Abort hibernating or resuming at user request.
18726 + *
18727 + * Handle the user requesting the cancellation of a hibernation or resume by
18728 + * pressing escape.
18729 + */
18730 +static void request_abort_hibernate(void)
18731 +{
18732 +       if (test_result_state(TOI_ABORT_REQUESTED))
18733 +               return;
18734 +
18735 +       if (test_toi_state(TOI_NOW_RESUMING)) {
18736 +               toi_prepare_status(CLEAR_BAR, "Escape pressed. "
18737 +                                       "Powering down again.");
18738 +               set_toi_state(TOI_STOP_RESUME);
18739 +               while (!test_toi_state(TOI_IO_STOPPED))
18740 +                       schedule();
18741 +               if (toiActiveAllocator->mark_resume_attempted)
18742 +                       toiActiveAllocator->mark_resume_attempted(0);
18743 +               toi_power_down();
18744 +       }
18745 +
18746 +       toi_prepare_status(CLEAR_BAR, "--- ESCAPE PRESSED :"
18747 +                                       " ABORTING HIBERNATION ---");
18748 +       set_abort_result(TOI_ABORT_REQUESTED);
18749 +       wake_up_interruptible(&userui_wait_for_key);
18750 +}
18751 +
18752 +/**
18753 + * userui_user_rcv_msg - Receive a netlink message from userui.
18754 + *
18755 + * @skb: skb received.
18756 + * @nlh: Netlink header received.
18757 + */
18758 +static int userui_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
18759 +{
18760 +       int type;
18761 +       int *data;
18762 +
18763 +       type = nlh->nlmsg_type;
18764 +
18765 +       /* A control message: ignore them */
18766 +       if (type < NETLINK_MSG_BASE)
18767 +               return 0;
18768 +
18769 +       /* Unknown message: reply with EINVAL */
18770 +       if (type >= USERUI_MSG_MAX)
18771 +               return -EINVAL;
18772 +
18773 +       /* All operations require privileges, even GET */
18774 +       if (security_netlink_recv(skb, CAP_NET_ADMIN))
18775 +               return -EPERM;
18776 +
18777 +       /* Only allow one task to receive NOFREEZE privileges */
18778 +       if (type == NETLINK_MSG_NOFREEZE_ME && ui_helper_data.pid != -1) {
18779 +               printk(KERN_INFO "Got NOFREEZE_ME request when "
18780 +                       "ui_helper_data.pid is %d.\n", ui_helper_data.pid);
18781 +               return -EBUSY;
18782 +       }
18783 +
18784 +       data = (int *) NLMSG_DATA(nlh);
18785 +
18786 +       switch (type) {
18787 +       case USERUI_MSG_ABORT:
18788 +               request_abort_hibernate();
18789 +               return 0;
18790 +       case USERUI_MSG_GET_STATE:
18791 +               toi_send_netlink_message(&ui_helper_data,
18792 +                               USERUI_MSG_GET_STATE, &toi_bkd.toi_action,
18793 +                               sizeof(toi_bkd.toi_action));
18794 +               return 0;
18795 +       case USERUI_MSG_GET_DEBUG_STATE:
18796 +               toi_send_netlink_message(&ui_helper_data,
18797 +                               USERUI_MSG_GET_DEBUG_STATE,
18798 +                               &toi_bkd.toi_debug_state,
18799 +                               sizeof(toi_bkd.toi_debug_state));
18800 +               return 0;
18801 +       case USERUI_MSG_SET_STATE:
18802 +               if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(int)))
18803 +                       return -EINVAL;
18804 +               ui_nl_set_state(*data);
18805 +               return 0;
18806 +       case USERUI_MSG_SET_DEBUG_STATE:
18807 +               if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(int)))
18808 +                       return -EINVAL;
18809 +               toi_bkd.toi_debug_state = (*data);
18810 +               return 0;
18811 +       case USERUI_MSG_SPACE:
18812 +               wake_up_interruptible(&userui_wait_for_key);
18813 +               return 0;
18814 +       case USERUI_MSG_GET_POWERDOWN_METHOD:
18815 +               toi_send_netlink_message(&ui_helper_data,
18816 +                               USERUI_MSG_GET_POWERDOWN_METHOD,
18817 +                               &toi_poweroff_method,
18818 +                               sizeof(toi_poweroff_method));
18819 +               return 0;
18820 +       case USERUI_MSG_SET_POWERDOWN_METHOD:
18821 +               if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(int)))
18822 +                       return -EINVAL;
18823 +               toi_poweroff_method = (*data);
18824 +               return 0;
18825 +       case USERUI_MSG_GET_LOGLEVEL:
18826 +               toi_send_netlink_message(&ui_helper_data,
18827 +                               USERUI_MSG_GET_LOGLEVEL,
18828 +                               &toi_bkd.toi_default_console_level,
18829 +                               sizeof(toi_bkd.toi_default_console_level));
18830 +               return 0;
18831 +       case USERUI_MSG_SET_LOGLEVEL:
18832 +               if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(int)))
18833 +                       return -EINVAL;
18834 +               toi_bkd.toi_default_console_level = (*data);
18835 +               return 0;
18836 +       case USERUI_MSG_PRINTK:
18837 +               printk("%s", (char *) data);
18838 +               return 0;
18839 +       }
18840 +
18841 +       /* Unhandled here */
18842 +       return 1;
18843 +}
18844 +
18845 +/**
18846 + * userui_cond_pause - Possibly pause at user request.
18847 + *
18848 + * @pause: Whether to pause or just display the message.
18849 + * @message: Message to display at the start of pausing.
18850 + *
18851 + * Potentially pause and wait for the user to tell us to continue. We normally
18852 + * only pause when @pause is set. While paused, the user can do things like
18853 + * changing the loglevel, toggling the display of debugging sections and such
18854 + * like.
18855 + */
18856 +static void userui_cond_pause(int pause, char *message)
18857 +{
18858 +       int displayed_message = 0, last_key = 0;
18859 +
18860 +       while (last_key != 32 &&
18861 +               ui_helper_data.pid != -1 &&
18862 +               ((test_action_state(TOI_PAUSE) && pause) ||
18863 +                (test_action_state(TOI_SINGLESTEP)))) {
18864 +               if (!displayed_message) {
18865 +                       toi_prepare_status(DONT_CLEAR_BAR,
18866 +                          "%s Press SPACE to continue.%s",
18867 +                          message ? message : "",
18868 +                          (test_action_state(TOI_SINGLESTEP)) ?
18869 +                          " Single step on." : "");
18870 +                       displayed_message = 1;
18871 +               }
18872 +               last_key = userui_wait_for_keypress(0);
18873 +       }
18874 +       schedule();
18875 +}
18876 +
18877 +/**
18878 + * userui_prepare_console - Prepare the console for use.
18879 + *
18880 + * Prepare a console for use, saving current kmsg settings and attempting to
18881 + * start userui. Console loglevel changes are handled by userui.
18882 + */
18883 +static void userui_prepare_console(void)
18884 +{
18885 +       orig_kmsg = kmsg_redirect;
18886 +       kmsg_redirect = fg_console + 1;
18887 +
18888 +       ui_helper_data.pid = -1;
18889 +
18890 +       if (!userui_ops.enabled) {
18891 +               printk("TuxOnIce: Userui disabled.\n");
18892 +               return;
18893 +       }
18894 +
18895 +       if (*ui_helper_data.program)
18896 +               toi_netlink_setup(&ui_helper_data);
18897 +       else
18898 +               printk(KERN_INFO "TuxOnIce: Userui program not configured.\n");
18899 +}
18900 +
18901 +/**
18902 + * userui_cleanup_console - Cleanup after a cycle.
18903 + *
18904 + * Tell userui to cleanup, and restore kmsg_redirect to its original value.
18905 + */
18906 +
18907 +static void userui_cleanup_console(void)
18908 +{
18909 +       if (ui_helper_data.pid > -1)
18910 +               toi_netlink_close(&ui_helper_data);
18911 +
18912 +       kmsg_redirect = orig_kmsg;
18913 +}
18914 +
18915 +/*
18916 + * User interface specific /sys/power/tuxonice entries.
18917 + */
18918 +
18919 +static struct toi_sysfs_data sysfs_params[] = {
18920 +#if defined(CONFIG_NET) && defined(CONFIG_SYSFS)
18921 +       SYSFS_BIT("enable_escape", SYSFS_RW, &toi_bkd.toi_action,
18922 +                       TOI_CAN_CANCEL, 0),
18923 +       SYSFS_BIT("pause_between_steps", SYSFS_RW, &toi_bkd.toi_action,
18924 +                       TOI_PAUSE, 0),
18925 +       SYSFS_INT("enabled", SYSFS_RW, &userui_ops.enabled, 0, 1, 0, NULL),
18926 +       SYSFS_INT("progress_granularity", SYSFS_RW, &progress_granularity, 1,
18927 +                       2048, 0, NULL),
18928 +       SYSFS_STRING("program", SYSFS_RW, ui_helper_data.program, 255, 0,
18929 +                       set_ui_program_set),
18930 +       SYSFS_INT("debug", SYSFS_RW, &ui_helper_data.debug, 0, 1, 0, NULL)
18931 +#endif
18932 +};
18933 +
18934 +static struct toi_module_ops userui_ops = {
18935 +       .type                           = MISC_MODULE,
18936 +       .name                           = "userui",
18937 +       .shared_directory               = "user_interface",
18938 +       .module                         = THIS_MODULE,
18939 +       .storage_needed                 = userui_storage_needed,
18940 +       .save_config_info               = userui_save_config_info,
18941 +       .load_config_info               = userui_load_config_info,
18942 +       .memory_needed                  = userui_memory_needed,
18943 +       .sysfs_data                     = sysfs_params,
18944 +       .num_sysfs_entries              = sizeof(sysfs_params) /
18945 +               sizeof(struct toi_sysfs_data),
18946 +};
18947 +
18948 +static struct ui_ops my_ui_ops = {
18949 +       .post_atomic_restore            = userui_post_atomic_restore,
18950 +       .update_status                  = userui_update_status,
18951 +       .message                        = userui_message,
18952 +       .prepare_status                 = userui_prepare_status,
18953 +       .abort                          = userui_abort_hibernate,
18954 +       .cond_pause                     = userui_cond_pause,
18955 +       .prepare                        = userui_prepare_console,
18956 +       .cleanup                        = userui_cleanup_console,
18957 +       .wait_for_key                   = userui_wait_for_keypress,
18958 +};
18959 +
18960 +/**
18961 + * toi_user_ui_init - Boot time initialisation for user interface.
18962 + *
18963 + * Invoked from the core init routine.
18964 + */
18965 +static __init int toi_user_ui_init(void)
18966 +{
18967 +       int result;
18968 +
18969 +       ui_helper_data.nl = NULL;
18970 +       strncpy(ui_helper_data.program, CONFIG_TOI_USERUI_DEFAULT_PATH, 255);
18971 +       ui_helper_data.pid = -1;
18972 +       ui_helper_data.skb_size = sizeof(struct userui_msg_params);
18973 +       ui_helper_data.pool_limit = 6;
18974 +       ui_helper_data.netlink_id = NETLINK_TOI_USERUI;
18975 +       ui_helper_data.name = "userspace ui";
18976 +       ui_helper_data.rcv_msg = userui_user_rcv_msg;
18977 +       ui_helper_data.interface_version = 8;
18978 +       ui_helper_data.must_init = 0;
18979 +       ui_helper_data.not_ready = userui_cleanup_console;
18980 +       init_completion(&ui_helper_data.wait_for_process);
18981 +       result = toi_register_module(&userui_ops);
18982 +       if (!result)
18983 +               result = toi_register_ui_ops(&my_ui_ops);
18984 +       if (result)
18985 +               toi_unregister_module(&userui_ops);
18986 +
18987 +       return result;
18988 +}
18989 +
18990 +#ifdef MODULE
18991 +/**
18992 + * toi_user_ui_ext - Cleanup code for if the core is unloaded.
18993 + */
18994 +static __exit void toi_user_ui_exit(void)
18995 +{
18996 +       toi_netlink_close_complete(&ui_helper_data);
18997 +       toi_remove_ui_ops(&my_ui_ops);
18998 +       toi_unregister_module(&userui_ops);
18999 +}
19000 +
19001 +module_init(toi_user_ui_init);
19002 +module_exit(toi_user_ui_exit);
19003 +MODULE_AUTHOR("Nigel Cunningham");
19004 +MODULE_DESCRIPTION("TuxOnIce Userui Support");
19005 +MODULE_LICENSE("GPL");
19006 +#else
19007 +late_initcall(toi_user_ui_init);
19008 +#endif
19009 diff --git a/kernel/power/user.c b/kernel/power/user.c
19010 index a6332a3..66f4eb6 100644
19011 --- a/kernel/power/user.c
19012 +++ b/kernel/power/user.c
19013 @@ -64,6 +64,7 @@ static struct snapshot_data {
19014  } snapshot_state;
19015  
19016  atomic_t snapshot_device_available = ATOMIC_INIT(1);
19017 +EXPORT_SYMBOL_GPL(snapshot_device_available);
19018  
19019  static int snapshot_open(struct inode *inode, struct file *filp)
19020  {
19021 diff --git a/kernel/printk.c b/kernel/printk.c
19022 index b51b156..bc78cb3 100644
19023 --- a/kernel/printk.c
19024 +++ b/kernel/printk.c
19025 @@ -32,6 +32,7 @@
19026  #include <linux/security.h>
19027  #include <linux/bootmem.h>
19028  #include <linux/syscalls.h>
19029 +#include <linux/suspend.h>
19030  
19031  #include <asm/uaccess.h>
19032  
19033 @@ -59,6 +60,7 @@ int console_printk[4] = {
19034         MINIMUM_CONSOLE_LOGLEVEL,       /* minimum_console_loglevel */
19035         DEFAULT_CONSOLE_LOGLEVEL,       /* default_console_loglevel */
19036  };
19037 +EXPORT_SYMBOL_GPL(console_printk);
19038  
19039  /*
19040   * Low level drivers may need that to know if they can schedule in
19041 @@ -937,6 +939,7 @@ void suspend_console(void)
19042         acquire_console_sem();
19043         console_suspended = 1;
19044  }
19045 +EXPORT_SYMBOL_GPL(suspend_console);
19046  
19047  void resume_console(void)
19048  {
19049 @@ -945,6 +948,7 @@ void resume_console(void)
19050         console_suspended = 0;
19051         release_console_sem();
19052  }
19053 +EXPORT_SYMBOL_GPL(resume_console);
19054  
19055  /**
19056   * acquire_console_sem - lock the console system for exclusive use.
19057 diff --git a/kernel/timer.c b/kernel/timer.c
19058 index 03bc7f1..d70831d 100644
19059 --- a/kernel/timer.c
19060 +++ b/kernel/timer.c
19061 @@ -37,6 +37,8 @@
19062  #include <linux/delay.h>
19063  #include <linux/tick.h>
19064  #include <linux/kallsyms.h>
19065 +#include <linux/notifier.h>
19066 +#include <linux/suspend.h>
19067  
19068  #include <asm/uaccess.h>
19069  #include <asm/unistd.h>
19070 @@ -1002,6 +1004,59 @@ unsigned long avenrun[3];
19071  
19072  EXPORT_SYMBOL(avenrun);
19073  
19074 +#ifdef CONFIG_PM
19075 +static unsigned long avenrun_save[3];
19076 +/*
19077 + * save_avenrun - Record the values prior to starting a hibernation cycle.
19078 + * We do this to make the work done in hibernation invisible to userspace
19079 + * post-suspend. Some programs, including some MTAs, watch the load average
19080 + * and stop work until it lowers. Without this, they would stop working for
19081 + * a while post-resume, unnecessarily.
19082 + */
19083 +
19084 +static void save_avenrun(void)
19085 +{
19086 +       avenrun_save[0] = avenrun[0];
19087 +       avenrun_save[1] = avenrun[1];
19088 +       avenrun_save[2] = avenrun[2];
19089 +}
19090 +
19091 +static void restore_avenrun(void)
19092 +{
19093 +       if (!avenrun_save[0])
19094 +               return;
19095 +
19096 +       avenrun[0] = avenrun_save[0];
19097 +       avenrun[1] = avenrun_save[1];
19098 +       avenrun[2] = avenrun_save[2];
19099 +
19100 +       avenrun_save[0] = 0;
19101 +}
19102 +
19103 +static int avenrun_pm_callback(struct notifier_block *nfb,
19104 +                                       unsigned long action,
19105 +                                       void *ignored)
19106 +{
19107 +       switch (action) {
19108 +       case PM_HIBERNATION_PREPARE:
19109 +               save_avenrun();
19110 +               return NOTIFY_OK;
19111 +       case PM_POST_HIBERNATION:
19112 +               restore_avenrun();
19113 +               return NOTIFY_OK;
19114 +       }
19115 +
19116 +       return NOTIFY_DONE;
19117 +}
19118 +
19119 +static void register_pm_notifier_callback(void)
19120 +{
19121 +       pm_notifier(avenrun_pm_callback, 0);
19122 +}
19123 +#else
19124 +static inline void register_pm_notifier_callback(void) { }
19125 +#endif
19126 +
19127  /*
19128   * calc_load - given tick count, update the avenrun load estimates.
19129   * This is called while holding a write_lock on xtime_lock.
19130 @@ -1495,6 +1550,7 @@ void __init init_timers(void)
19131         BUG_ON(err == NOTIFY_BAD);
19132         register_cpu_notifier(&timers_nb);
19133         open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
19134 +       register_pm_notifier_callback();
19135  }
19136  
19137  /**
19138 diff --git a/mm/bootmem.c b/mm/bootmem.c
19139 index ad8eec6..5d7d040 100644
19140 --- a/mm/bootmem.c
19141 +++ b/mm/bootmem.c
19142 @@ -22,6 +22,7 @@
19143  unsigned long max_low_pfn;
19144  unsigned long min_low_pfn;
19145  unsigned long max_pfn;
19146 +EXPORT_SYMBOL_GPL(max_pfn);
19147  
19148  #ifdef CONFIG_CRASH_DUMP
19149  /*
19150 diff --git a/mm/highmem.c b/mm/highmem.c
19151 index e16e152..cab4227 100644
19152 --- a/mm/highmem.c
19153 +++ b/mm/highmem.c
19154 @@ -58,6 +58,7 @@ unsigned int nr_free_highpages (void)
19155  
19156         return pages;
19157  }
19158 +EXPORT_SYMBOL_GPL(nr_free_highpages);
19159  
19160  static int pkmap_count[LAST_PKMAP];
19161  static unsigned int last_pkmap_nr;
19162 diff --git a/mm/memory.c b/mm/memory.c
19163 index 1002f47..034de70 100644
19164 --- a/mm/memory.c
19165 +++ b/mm/memory.c
19166 @@ -1110,6 +1110,7 @@ no_page_table:
19167         }
19168         return page;
19169  }
19170 +EXPORT_SYMBOL_GPL(follow_page);
19171  
19172  /* Can we do the FOLL_ANON optimization? */
19173  static inline int use_zero_page(struct vm_area_struct *vma)
19174 diff --git a/mm/mmzone.c b/mm/mmzone.c
19175 index 16ce8b9..8dd207b 100644
19176 --- a/mm/mmzone.c
19177 +++ b/mm/mmzone.c
19178 @@ -13,6 +13,7 @@ struct pglist_data *first_online_pgdat(void)
19179  {
19180         return NODE_DATA(first_online_node);
19181  }
19182 +EXPORT_SYMBOL_GPL(first_online_pgdat);
19183  
19184  struct pglist_data *next_online_pgdat(struct pglist_data *pgdat)
19185  {
19186 @@ -22,6 +23,7 @@ struct pglist_data *next_online_pgdat(struct pglist_data *pgdat)
19187                 return NULL;
19188         return NODE_DATA(nid);
19189  }
19190 +EXPORT_SYMBOL_GPL(next_online_pgdat);
19191  
19192  /*
19193   * next_zone - helper magic for for_each_zone()
19194 @@ -41,6 +43,7 @@ struct zone *next_zone(struct zone *zone)
19195         }
19196         return zone;
19197  }
19198 +EXPORT_SYMBOL_GPL(next_zone);
19199  
19200  static inline int zref_in_nodemask(struct zoneref *zref, nodemask_t *nodes)
19201  {
19202 diff --git a/mm/page-writeback.c b/mm/page-writeback.c
19203 index 24de8b6..048fada 100644
19204 --- a/mm/page-writeback.c
19205 +++ b/mm/page-writeback.c
19206 @@ -93,6 +93,7 @@ int dirty_expire_interval = 30 * HZ;
19207   * Flag that makes the machine dump writes/reads and block dirtyings.
19208   */
19209  int block_dump;
19210 +EXPORT_SYMBOL_GPL(block_dump);
19211  
19212  /*
19213   * Flag that puts the machine in "laptop mode". Doubles as a timeout in jiffies:
19214 diff --git a/mm/page_alloc.c b/mm/page_alloc.c
19215 index 51daae5..6c73ebd 100644
19216 --- a/mm/page_alloc.c
19217 +++ b/mm/page_alloc.c
19218 @@ -1791,6 +1791,26 @@ static unsigned int nr_free_zone_pages(int offset)
19219         return sum;
19220  }
19221  
19222 +static unsigned int nr_unallocated_zone_pages(int offset)
19223 +{
19224 +       struct zoneref *z;
19225 +       struct zone *zone;
19226 +
19227 +       /* Just pick one node, since fallback list is circular */
19228 +       unsigned int sum = 0;
19229 +
19230 +       struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL);
19231 +
19232 +       for_each_zone_zonelist(zone, z, zonelist, offset) {
19233 +               unsigned long high = zone->pages_high;
19234 +               unsigned long left = zone_page_state(zone, NR_FREE_PAGES);
19235 +               if (left > high)
19236 +                       sum += left - high;
19237 +       }
19238 +
19239 +       return sum;
19240 +}
19241 +
19242  /*
19243   * Amount of free RAM allocatable within ZONE_DMA and ZONE_NORMAL
19244   */
19245 @@ -1801,6 +1821,15 @@ unsigned int nr_free_buffer_pages(void)
19246  EXPORT_SYMBOL_GPL(nr_free_buffer_pages);
19247  
19248  /*
19249 + * Amount of free RAM allocatable within ZONE_DMA and ZONE_NORMAL
19250 + */
19251 +unsigned int nr_unallocated_buffer_pages(void)
19252 +{
19253 +       return nr_unallocated_zone_pages(gfp_zone(GFP_USER));
19254 +}
19255 +EXPORT_SYMBOL_GPL(nr_unallocated_buffer_pages);
19256 +
19257 +/*
19258   * Amount of free RAM allocatable within all zones
19259   */
19260  unsigned int nr_free_pagecache_pages(void)
19261 diff --git a/mm/swapfile.c b/mm/swapfile.c
19262 index 1e330f2..dcead18 100644
19263 --- a/mm/swapfile.c
19264 +++ b/mm/swapfile.c
19265 @@ -213,6 +213,7 @@ noswap:
19266         spin_unlock(&swap_lock);
19267         return (swp_entry_t) {0};
19268  }
19269 +EXPORT_SYMBOL_GPL(get_swap_page);
19270  
19271  swp_entry_t get_swap_page_of_type(int type)
19272  {
19273 @@ -305,6 +306,7 @@ void swap_free(swp_entry_t entry)
19274                 spin_unlock(&swap_lock);
19275         }
19276  }
19277 +EXPORT_SYMBOL_GPL(swap_free);
19278  
19279  /*
19280   * How many references to page are currently swapped out?
19281 @@ -982,6 +984,7 @@ sector_t map_swap_page(struct swap_info_struct *sis, pgoff_t offset)
19282                 BUG_ON(se == start_se);         /* It *must* be present */
19283         }
19284  }
19285 +EXPORT_SYMBOL_GPL(map_swap_page);
19286  
19287  #ifdef CONFIG_HIBERNATION
19288  /*
19289 @@ -1342,6 +1345,7 @@ out_dput:
19290  out:
19291         return err;
19292  }
19293 +EXPORT_SYMBOL_GPL(sys_swapoff);
19294  
19295  #ifdef CONFIG_PROC_FS
19296  /* iterator */
19297 @@ -1732,6 +1736,7 @@ out:
19298         }
19299         return error;
19300  }
19301 +EXPORT_SYMBOL_GPL(sys_swapon);
19302  
19303  void si_swapinfo(struct sysinfo *val)
19304  {
19305 @@ -1749,6 +1754,7 @@ void si_swapinfo(struct sysinfo *val)
19306         val->totalswap = total_swap_pages + nr_to_be_unused;
19307         spin_unlock(&swap_lock);
19308  }
19309 +EXPORT_SYMBOL_GPL(si_swapinfo);
19310  
19311  /*
19312   * Verify that a swap entry is valid and increment its swap map count.
19313 @@ -1797,6 +1803,7 @@ get_swap_info_struct(unsigned type)
19314  {
19315         return &swap_info[type];
19316  }
19317 +EXPORT_SYMBOL_GPL(get_swap_info_struct);
19318  
19319  /*
19320   * swap_lock prevents swap_map being freed. Don't grab an extra
19321 diff --git a/mm/vmscan.c b/mm/vmscan.c
19322 index 1ff1a58..6d718c1 100644
19323 --- a/mm/vmscan.c
19324 +++ b/mm/vmscan.c
19325 @@ -1749,6 +1749,9 @@ void wakeup_kswapd(struct zone *zone, int order)
19326         if (!populated_zone(zone))
19327                 return;
19328  
19329 +       if (freezer_is_on())
19330 +               return;
19331 +
19332         pgdat = zone->zone_pgdat;
19333         if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0))
19334                 return;
19335 @@ -1910,6 +1913,7 @@ out:
19336  
19337         return ret;
19338  }
19339 +EXPORT_SYMBOL_GPL(shrink_all_memory);
19340  #endif
19341  
19342  /* It's optimal to keep kswapds on the same CPUs as their memory, but
This page took 1.68886 seconds and 3 git commands to generate.