]> git.pld-linux.org Git - packages/kernel.git/blob - kernel-tuxonice.patch
- rel 3
[packages/kernel.git] / kernel-tuxonice.patch
1 diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
2 index 1808f11..8e8d3b7 100644
3 --- a/Documentation/kernel-parameters.txt
4 +++ b/Documentation/kernel-parameters.txt
5 @@ -2773,6 +2773,9 @@ and is between 256 and 4096 characters. It is defined in the file
6                                         HIGHMEM regardless of setting
7                                         of CONFIG_HIGHPTE.
8  
9 +       uuid_debug=     (Boolean) whether to enable debugging of TuxOnIce's
10 +                       uuid support.
11 +
12         vdso=           [X86,SH]
13                         vdso=2: enable compat VDSO (default with COMPAT_VDSO)
14                         vdso=1: enable VDSO (default)
15 diff --git a/Documentation/power/tuxonice-internals.txt b/Documentation/power/tuxonice-internals.txt
16 new file mode 100644
17 index 0000000..7a96186
18 --- /dev/null
19 +++ b/Documentation/power/tuxonice-internals.txt
20 @@ -0,0 +1,477 @@
21 +                  TuxOnIce 3.0 Internal Documentation.
22 +                       Updated to 26 March 2009
23 +
24 +1.  Introduction.
25 +
26 +    TuxOnIce 3.0 is an addition to the Linux Kernel, designed to
27 +    allow the user to quickly shutdown and quickly boot a computer, without
28 +    needing to close documents or programs. It is equivalent to the
29 +    hibernate facility in some laptops. This implementation, however,
30 +    requires no special BIOS or hardware support.
31 +
32 +    The code in these files is based upon the original implementation
33 +    prepared by Gabor Kuti and additional work by Pavel Machek and a
34 +    host of others. This code has been substantially reworked by Nigel
35 +    Cunningham, again with the help and testing of many others, not the
36 +    least of whom is Michael Frank. At its heart, however, the operation is
37 +    essentially the same as Gabor's version.
38 +
39 +2.  Overview of operation.
40 +
41 +    The basic sequence of operations is as follows:
42 +
43 +       a. Quiesce all other activity.
44 +       b. Ensure enough memory and storage space are available, and attempt
45 +          to free memory/storage if necessary.
46 +       c. Allocate the required memory and storage space.
47 +       d. Write the image.
48 +       e. Power down.
49 +
50 +    There are a number of complicating factors which mean that things are
51 +    not as simple as the above would imply, however...
52 +
53 +    o The activity of each process must be stopped at a point where it will
54 +    not be holding locks necessary for saving the image, or unexpectedly
55 +    restart operations due to something like a timeout and thereby make
56 +    our image inconsistent.
57 +
58 +    o It is desirous that we sync outstanding I/O to disk before calculating
59 +    image statistics. This reduces corruption if one should suspend but
60 +    then not resume, and also makes later parts of the operation safer (see
61 +    below).
62 +
63 +    o We need to get as close as we can to an atomic copy of the data.
64 +    Inconsistencies in the image will result in inconsistent memory contents at
65 +    resume time, and thus in instability of the system and/or file system
66 +    corruption. This would appear to imply a maximum image size of one half of
67 +    the amount of RAM, but we have a solution... (again, below).
68 +
69 +    o In 2.6, we choose to play nicely with the other suspend-to-disk
70 +    implementations.
71 +
72 +3.  Detailed description of internals.
73 +
74 +    a. Quiescing activity.
75 +
76 +    Safely quiescing the system is achieved using three separate but related
77 +    aspects.
78 +
79 +    First, we note that the vast majority of processes don't need to run during
80 +    suspend. They can be 'frozen'. We therefore implement a refrigerator
81 +    routine, which processes enter and in which they remain until the cycle is
82 +    complete. Processes enter the refrigerator via try_to_freeze() invocations
83 +    at appropriate places.  A process cannot be frozen in any old place. It
84 +    must not be holding locks that will be needed for writing the image or
85 +    freezing other processes. For this reason, userspace processes generally
86 +    enter the refrigerator via the signal handling code, and kernel threads at
87 +    the place in their event loops where they drop locks and yield to other
88 +    processes or sleep.
89 +
90 +    The task of freezing processes is complicated by the fact that there can be
91 +    interdependencies between processes. Freezing process A before process B may
92 +    mean that process B cannot be frozen, because it stops at waiting for
93 +    process A rather than in the refrigerator. This issue is seen where
94 +    userspace waits on freezeable kernel threads or fuse filesystem threads. To
95 +    address this issue, we implement the following algorithm for quiescing
96 +    activity:
97 +
98 +       - Freeze filesystems (including fuse - userspace programs starting
99 +               new requests are immediately frozen; programs already running
100 +               requests complete their work before being frozen in the next
101 +               step)
102 +       - Freeze userspace
103 +       - Thaw filesystems (this is safe now that userspace is frozen and no
104 +               fuse requests are outstanding).
105 +       - Invoke sys_sync (noop on fuse).
106 +       - Freeze filesystems
107 +       - Freeze kernel threads
108 +
109 +    If we need to free memory, we thaw kernel threads and filesystems, but not
110 +    userspace. We can then free caches without worrying about deadlocks due to
111 +    swap files being on frozen filesystems or such like.
112 +
113 +    b. Ensure enough memory & storage are available.
114 +
115 +    We have a number of constraints to meet in order to be able to successfully
116 +    suspend and resume.
117 +
118 +    First, the image will be written in two parts, described below. One of these
119 +    parts needs to have an atomic copy made, which of course implies a maximum
120 +    size of one half of the amount of system memory. The other part ('pageset')
121 +    is not atomically copied, and can therefore be as large or small as desired.
122 +
123 +    Second, we have constraints on the amount of storage available. In these
124 +    calculations, we may also consider any compression that will be done. The
125 +    cryptoapi module allows the user to configure an expected compression ratio.
126 +
127 +    Third, the user can specify an arbitrary limit on the image size, in
128 +    megabytes. This limit is treated as a soft limit, so that we don't fail the
129 +    attempt to suspend if we cannot meet this constraint.
130 +
131 +    c. Allocate the required memory and storage space.
132 +
133 +    Having done the initial freeze, we determine whether the above constraints
134 +    are met, and seek to allocate the metadata for the image. If the constraints
135 +    are not met, or we fail to allocate the required space for the metadata, we
136 +    seek to free the amount of memory that we calculate is needed and try again.
137 +    We allow up to four iterations of this loop before aborting the cycle. If we
138 +    do fail, it should only be because of a bug in TuxOnIce's calculations.
139 +
140 +    These steps are merged together in the prepare_image function, found in
141 +    prepare_image.c. The functions are merged because of the cyclical nature
142 +    of the problem of calculating how much memory and storage is needed. Since
143 +    the data structures containing the information about the image must
144 +    themselves take memory and use storage, the amount of memory and storage
145 +    required changes as we prepare the image. Since the changes are not large,
146 +    only one or two iterations will be required to achieve a solution.
147 +
148 +    The recursive nature of the algorithm is miminised by keeping user space
149 +    frozen while preparing the image, and by the fact that our records of which
150 +    pages are to be saved and which pageset they are saved in use bitmaps (so
151 +    that changes in number or fragmentation of the pages to be saved don't
152 +    feedback via changes in the amount of memory needed for metadata). The
153 +    recursiveness is thus limited to any extra slab pages allocated to store the
154 +    extents that record storage used, and the effects of seeking to free memory.
155 +
156 +    d. Write the image.
157 +
158 +    We previously mentioned the need to create an atomic copy of the data, and
159 +    the half-of-memory limitation that is implied in this. This limitation is
160 +    circumvented by dividing the memory to be saved into two parts, called
161 +    pagesets.
162 +
163 +    Pageset2 contains most of the page cache - the pages on the active and
164 +    inactive LRU lists that aren't needed or modified while TuxOnIce is
165 +    running, so they can be safely written without an atomic copy. They are
166 +    therefore saved first and reloaded last. While saving these pages,
167 +    TuxOnIce carefully ensures that the work of writing the pages doesn't make
168 +    the image inconsistent. With the support for Kernel (Video) Mode Setting
169 +    going into the kernel at the time of writing, we need to check for pages
170 +    on the LRU that are used by KMS, and exclude them from pageset2. They are
171 +    atomically copied as part of pageset 1.
172 +
173 +    Once pageset2 has been saved, we prepare to do the atomic copy of remaining
174 +    memory. As part of the preparation, we power down drivers, thereby providing
175 +    them with the opportunity to have their state recorded in the image. The
176 +    amount of memory allocated by drivers for this is usually negligible, but if
177 +    DRI is in use, video drivers may require significants amounts. Ideally we
178 +    would be able to query drivers while preparing the image as to the amount of
179 +    memory they will need. Unfortunately no such mechanism exists at the time of
180 +    writing. For this reason, TuxOnIce allows the user to set an
181 +    'extra_pages_allowance', which is used to seek to ensure sufficient memory
182 +    is available for drivers at this point. TuxOnIce also lets the user set this
183 +    value to 0. In this case, a test driver suspend is done while preparing the
184 +    image, and the difference (plus a margin) used instead. TuxOnIce will also
185 +    automatically restart the hibernation process (twice at most) if it finds
186 +    that the extra pages allowance is not sufficient. It will then use what was
187 +    actually needed (plus a margin, again). Failure to hibernate should thus
188 +    be an extremely rare occurence.
189 +
190 +    Having suspended the drivers, we save the CPU context before making an
191 +    atomic copy of pageset1, resuming the drivers and saving the atomic copy.
192 +    After saving the two pagesets, we just need to save our metadata before
193 +    powering down.
194 +
195 +    As we mentioned earlier, the contents of pageset2 pages aren't needed once
196 +    they've been saved. We therefore use them as the destination of our atomic
197 +    copy. In the unlikely event that pageset1 is larger, extra pages are
198 +    allocated while the image is being prepared. This is normally only a real
199 +    possibility when the system has just been booted and the page cache is
200 +    small.
201 +
202 +    This is where we need to be careful about syncing, however. Pageset2 will
203 +    probably contain filesystem meta data. If this is overwritten with pageset1
204 +    and then a sync occurs, the filesystem will be corrupted - at least until
205 +    resume time and another sync of the restored data. Since there is a
206 +    possibility that the user might not resume or (may it never be!) that
207 +    TuxOnIce might oops, we do our utmost to avoid syncing filesystems after
208 +    copying pageset1.
209 +
210 +    e. Power down.
211 +
212 +    Powering down uses standard kernel routines. TuxOnIce supports powering down
213 +    using the ACPI S3, S4 and S5 methods or the kernel's non-ACPI power-off.
214 +    Supporting suspend to ram (S3) as a power off option might sound strange,
215 +    but it allows the user to quickly get their system up and running again if
216 +    the battery doesn't run out (we just need to re-read the overwritten pages)
217 +    and if the battery does run out (or the user removes power), they can still
218 +    resume.
219 +
220 +4.  Data Structures.
221 +
222 +    TuxOnIce uses three main structures to store its metadata and configuration
223 +    information:
224 +
225 +    a) Pageflags bitmaps.
226 +
227 +    TuxOnIce records which pages will be in pageset1, pageset2, the destination
228 +    of the atomic copy and the source of the atomically restored image using
229 +    bitmaps. The code used is that written for swsusp, with small improvements
230 +    to match TuxOnIce's requirements.
231 +
232 +    The pageset1 bitmap is thus easily stored in the image header for use at
233 +    resume time.
234 +
235 +    As mentioned above, using bitmaps also means that the amount of memory and
236 +    storage required for recording the above information is constant. This
237 +    greatly simplifies the work of preparing the image. In earlier versions of
238 +    TuxOnIce, extents were used to record which pages would be stored. In that
239 +    case, however, eating memory could result in greater fragmentation of the
240 +    lists of pages, which in turn required more memory to store the extents and
241 +    more storage in the image header. These could in turn require further
242 +    freeing of memory, and another iteration. All of this complexity is removed
243 +    by having bitmaps.
244 +
245 +    Bitmaps also make a lot of sense because TuxOnIce only ever iterates
246 +    through the lists. There is therefore no cost to not being able to find the
247 +    nth page in order 0 time. We only need to worry about the cost of finding
248 +    the n+1th page, given the location of the nth page. Bitwise optimisations
249 +    help here.
250 +
251 +    b) Extents for block data.
252 +
253 +    TuxOnIce supports writing the image to multiple block devices. In the case
254 +    of swap, multiple partitions and/or files may be in use, and we happily use
255 +    them all (with the exception of compcache pages, which we allocate but do
256 +    not use). This use of multiple block devices is accomplished as follows:
257 +
258 +    Whatever the actual source of the allocated storage, the destination of the
259 +    image can be viewed in terms of one or more block devices, and on each
260 +    device, a list of sectors. To simplify matters, we only use contiguous,
261 +    PAGE_SIZE aligned sectors, like the swap code does.
262 +
263 +    Since sector numbers on each bdev may well not start at 0, it makes much
264 +    more sense to use extents here. Contiguous ranges of pages can thus be
265 +    represented in the extents by contiguous values.
266 +
267 +    Variations in block size are taken account of in transforming this data
268 +    into the parameters for bio submission.
269 +
270 +    We can thus implement a layer of abstraction wherein the core of TuxOnIce
271 +    doesn't have to worry about which device we're currently writing to or
272 +    where in the device we are. It simply requests that the next page in the
273 +    pageset or header be written, leaving the details to this lower layer.
274 +    The lower layer remembers where in the sequence of devices and blocks each
275 +    pageset starts. The header always starts at the beginning of the allocated
276 +    storage.
277 +
278 +    So extents are:
279 +
280 +    struct extent {
281 +      unsigned long minimum, maximum;
282 +      struct extent *next;
283 +    }
284 +
285 +    These are combined into chains of extents for a device:
286 +
287 +    struct extent_chain {
288 +      int size; /* size of the extent ie sum (max-min+1) */
289 +      int allocs, frees;
290 +      char *name;
291 +      struct extent *first, *last_touched;
292 +    };
293 +
294 +    For each bdev, we need to store a little more info:
295 +
296 +    struct suspend_bdev_info {
297 +       struct block_device *bdev;
298 +       dev_t dev_t;
299 +       int bmap_shift;
300 +       int blocks_per_page;
301 +    };
302 +
303 +    The dev_t is used to identify the device in the stored image. As a result,
304 +    we expect devices at resume time to have the same major and minor numbers
305 +    as they had while suspending.  This is primarily a concern where the user
306 +    utilises LVM for storage, as they will need to dmsetup their partitions in
307 +    such a way as to maintain this consistency at resume time.
308 +
309 +    bmap_shift and blocks_per_page apply the effects of variations in blocks
310 +    per page settings for the filesystem and underlying bdev. For most
311 +    filesystems, these are the same, but for xfs, they can have independant
312 +    values.
313 +
314 +    Combining these two structures together, we have everything we need to
315 +    record what devices and what blocks on each device are being used to
316 +    store the image, and to submit i/o using bio_submit.
317 +
318 +    The last elements in the picture are a means of recording how the storage
319 +    is being used.
320 +
321 +    We do this first and foremost by implementing a layer of abstraction on
322 +    top of the devices and extent chains which allows us to view however many
323 +    devices there might be as one long storage tape, with a single 'head' that
324 +    tracks a 'current position' on the tape:
325 +
326 +    struct extent_iterate_state {
327 +      struct extent_chain *chains;
328 +      int num_chains;
329 +      int current_chain;
330 +      struct extent *current_extent;
331 +      unsigned long current_offset;
332 +    };
333 +
334 +    That is, *chains points to an array of size num_chains of extent chains.
335 +    For the filewriter, this is always a single chain. For the swapwriter, the
336 +    array is of size MAX_SWAPFILES.
337 +
338 +    current_chain, current_extent and current_offset thus point to the current
339 +    index in the chains array (and into a matching array of struct
340 +    suspend_bdev_info), the current extent in that chain (to optimise access),
341 +    and the current value in the offset.
342 +
343 +    The image is divided into three parts:
344 +    - The header
345 +    - Pageset 1
346 +    - Pageset 2
347 +
348 +    The header always starts at the first device and first block. We know its
349 +    size before we begin to save the image because we carefully account for
350 +    everything that will be stored in it.
351 +
352 +    The second pageset (LRU) is stored first. It begins on the next page after
353 +    the end of the header.
354 +
355 +    The first pageset is stored second. It's start location is only known once
356 +    pageset2 has been saved, since pageset2 may be compressed as it is written.
357 +    This location is thus recorded at the end of saving pageset2. It is page
358 +    aligned also.
359 +
360 +    Since this information is needed at resume time, and the location of extents
361 +    in memory will differ at resume time, this needs to be stored in a portable
362 +    way:
363 +
364 +    struct extent_iterate_saved_state {
365 +        int chain_num;
366 +        int extent_num;
367 +        unsigned long offset;
368 +    };
369 +
370 +    We can thus implement a layer of abstraction wherein the core of TuxOnIce
371 +    doesn't have to worry about which device we're currently writing to or
372 +    where in the device we are. It simply requests that the next page in the
373 +    pageset or header be written, leaving the details to this layer, and
374 +    invokes the routines to remember and restore the position, without having
375 +    to worry about the details of how the data is arranged on disk or such like.
376 +
377 +    c) Modules
378 +
379 +    One aim in designing TuxOnIce was to make it flexible. We wanted to allow
380 +    for the implementation of different methods of transforming a page to be
381 +    written to disk and different methods of getting the pages stored.
382 +
383 +    In early versions (the betas and perhaps Suspend1), compression support was
384 +    inlined in the image writing code, and the data structures and code for
385 +    managing swap were intertwined with the rest of the code. A number of people
386 +    had expressed interest in implementing image encryption, and alternative
387 +    methods of storing the image.
388 +
389 +    In order to achieve this, TuxOnIce was given a modular design.
390 +
391 +    A module is a single file which encapsulates the functionality needed
392 +    to transform a pageset of data (encryption or compression, for example),
393 +    or to write the pageset to a device. The former type of module is called
394 +    a 'page-transformer', the later a 'writer'.
395 +
396 +    Modules are linked together in pipeline fashion. There may be zero or more
397 +    page transformers in a pipeline, and there is always exactly one writer.
398 +    The pipeline follows this pattern:
399 +
400 +               ---------------------------------
401 +               |          TuxOnIce Core        |
402 +               ---------------------------------
403 +                               |
404 +                               |
405 +               ---------------------------------
406 +               |       Page transformer 1      |
407 +               ---------------------------------
408 +                               |
409 +                               |
410 +               ---------------------------------
411 +               |       Page transformer 2      |
412 +               ---------------------------------
413 +                               |
414 +                               |
415 +               ---------------------------------
416 +               |            Writer             |
417 +               ---------------------------------
418 +
419 +    During the writing of an image, the core code feeds pages one at a time
420 +    to the first module. This module performs whatever transformations it
421 +    implements on the incoming data, completely consuming the incoming data and
422 +    feeding output in a similar manner to the next module.
423 +
424 +    All routines are SMP safe, and the final result of the transformations is
425 +    written with an index (provided by the core) and size of the output by the
426 +    writer. As a result, we can have multithreaded I/O without needing to
427 +    worry about the sequence in which pages are written (or read).
428 +
429 +    During reading, the pipeline works in the reverse direction. The core code
430 +    calls the first module with the address of a buffer which should be filled.
431 +    (Note that the buffer size is always PAGE_SIZE at this time). This module
432 +    will in turn request data from the next module and so on down until the
433 +    writer is made to read from the stored image.
434 +
435 +    Part of definition of the structure of a module thus looks like this:
436 +
437 +        int (*rw_init) (int rw, int stream_number);
438 +        int (*rw_cleanup) (int rw);
439 +        int (*write_chunk) (struct page *buffer_page);
440 +        int (*read_chunk) (struct page *buffer_page, int sync);
441 +
442 +    It should be noted that the _cleanup routine may be called before the
443 +    full stream of data has been read or written. While writing the image,
444 +    the user may (depending upon settings) choose to abort suspending, and
445 +    if we are in the midst of writing the last portion of the image, a portion
446 +    of the second pageset may be reread. This may also happen if an error
447 +    occurs and we seek to abort the process of writing the image.
448 +
449 +    The modular design is also useful in a number of other ways. It provides
450 +    a means where by we can add support for:
451 +
452 +    - providing overall initialisation and cleanup routines;
453 +    - serialising configuration information in the image header;
454 +    - providing debugging information to the user;
455 +    - determining memory and image storage requirements;
456 +    - dis/enabling components at run-time;
457 +    - configuring the module (see below);
458 +
459 +    ...and routines for writers specific to their work:
460 +    - Parsing a resume= location;
461 +    - Determining whether an image exists;
462 +    - Marking a resume as having been attempted;
463 +    - Invalidating an image;
464 +
465 +    Since some parts of the core - the user interface and storage manager
466 +    support - have use for some of these functions, they are registered as
467 +    'miscellaneous' modules as well.
468 +
469 +    d) Sysfs data structures.
470 +
471 +    This brings us naturally to support for configuring TuxOnIce. We desired to
472 +    provide a way to make TuxOnIce as flexible and configurable as possible.
473 +    The user shouldn't have to reboot just because they want to now hibernate to
474 +    a file instead of a partition, for example.
475 +
476 +    To accomplish this, TuxOnIce implements a very generic means whereby the
477 +    core and modules can register new sysfs entries. All TuxOnIce entries use
478 +    a single _store and _show routine, both of which are found in
479 +    tuxonice_sysfs.c in the kernel/power directory. These routines handle the
480 +    most common operations - getting and setting the values of bits, integers,
481 +    longs, unsigned longs and strings in one place, and allow overrides for
482 +    customised get and set options as well as side-effect routines for all
483 +    reads and writes.
484 +
485 +    When combined with some simple macros, a new sysfs entry can then be defined
486 +    in just a couple of lines:
487 +
488 +        SYSFS_INT("progress_granularity", SYSFS_RW, &progress_granularity, 1,
489 +                        2048, 0, NULL),
490 +
491 +    This defines a sysfs entry named "progress_granularity" which is rw and
492 +    allows the user to access an integer stored at &progress_granularity, giving
493 +    it a value between 1 and 2048 inclusive.
494 +
495 +    Sysfs entries are registered under /sys/power/tuxonice, and entries for
496 +    modules are located in a subdirectory named after the module.
497 +
498 diff --git a/Documentation/power/tuxonice.txt b/Documentation/power/tuxonice.txt
499 new file mode 100644
500 index 0000000..3bf0575
501 --- /dev/null
502 +++ b/Documentation/power/tuxonice.txt
503 @@ -0,0 +1,948 @@
504 +       --- TuxOnIce, version 3.0 ---
505 +
506 +1.  What is it?
507 +2.  Why would you want it?
508 +3.  What do you need to use it?
509 +4.  Why not just use the version already in the kernel?
510 +5.  How do you use it?
511 +6.  What do all those entries in /sys/power/tuxonice do?
512 +7.  How do you get support?
513 +8.  I think I've found a bug. What should I do?
514 +9.  When will XXX be supported?
515 +10  How does it work?
516 +11. Who wrote TuxOnIce?
517 +
518 +1. What is it?
519 +
520 +   Imagine you're sitting at your computer, working away. For some reason, you
521 +   need to turn off your computer for a while - perhaps it's time to go home
522 +   for the day. When you come back to your computer next, you're going to want
523 +   to carry on where you left off. Now imagine that you could push a button and
524 +   have your computer store the contents of its memory to disk and power down.
525 +   Then, when you next start up your computer, it loads that image back into
526 +   memory and you can carry on from where you were, just as if you'd never
527 +   turned the computer off. You have far less time to start up, no reopening of
528 +   applications or finding what directory you put that file in yesterday.
529 +   That's what TuxOnIce does.
530 +
531 +   TuxOnIce has a long heritage. It began life as work by Gabor Kuti, who,
532 +   with some help from Pavel Machek, got an early version going in 1999. The
533 +   project was then taken over by Florent Chabaud while still in alpha version
534 +   numbers. Nigel Cunningham came on the scene when Florent was unable to
535 +   continue, moving the project into betas, then 1.0, 2.0 and so on up to
536 +   the present series. During the 2.0 series, the name was contracted to
537 +   Suspend2 and the website suspend2.net created. Beginning around July 2007,
538 +   a transition to calling the software TuxOnIce was made, to seek to help
539 +   make it clear that TuxOnIce is more concerned with hibernation than suspend
540 +   to ram.
541 +
542 +   Pavel Machek's swsusp code, which was merged around 2.5.17 retains the
543 +   original name, and was essentially a fork of the beta code until Rafael
544 +   Wysocki came on the scene in 2005 and began to improve it further.
545 +
546 +2. Why would you want it?
547 +
548 +   Why wouldn't you want it?
549 +
550 +   Being able to save the state of your system and quickly restore it improves
551 +   your productivity - you get a useful system in far less time than through
552 +   the normal boot process. You also get to be completely 'green', using zero
553 +   power, or as close to that as possible (the computer may still provide
554 +   minimal power to some devices, so they can initiate a power on, but that
555 +   will be the same amount of power as would be used if you told the computer
556 +   to shutdown.
557 +
558 +3. What do you need to use it?
559 +
560 +   a. Kernel Support.
561 +
562 +   i) The TuxOnIce patch.
563 +
564 +   TuxOnIce is part of the Linux Kernel. This version is not part of Linus's
565 +   2.6 tree at the moment, so you will need to download the kernel source and
566 +   apply the latest patch. Having done that, enable the appropriate options in
567 +   make [menu|x]config (under Power Management Options - look for "Enhanced
568 +   Hibernation"), compile and install your kernel. TuxOnIce works with SMP,
569 +   Highmem, preemption, fuse filesystems, x86-32, PPC and x86_64.
570 +
571 +   TuxOnIce patches are available from http://tuxonice.net.
572 +
573 +   ii) Compression support.
574 +
575 +   Compression support is implemented via the cryptoapi. You will therefore want
576 +   to select any Cryptoapi transforms that you want to use on your image from
577 +   the Cryptoapi menu while configuring your kernel. We recommend the use of the
578 +   LZO compression method - it is very fast and still achieves good compression.
579 +
580 +   You can also tell TuxOnIce to write its image to an encrypted and/or
581 +   compressed filesystem/swap partition. In that case, you don't need to do
582 +   anything special for TuxOnIce when it comes to kernel configuration.
583 +
584 +   iii) Configuring other options.
585 +
586 +   While you're configuring your kernel, try to configure as much as possible
587 +   to build as modules. We recommend this because there are a number of drivers
588 +   that are still in the process of implementing proper power management
589 +   support. In those cases, the best way to work around their current lack is
590 +   to build them as modules and remove the modules while hibernating. You might
591 +   also bug the driver authors to get their support up to speed, or even help!
592 +
593 +   b. Storage.
594 +
595 +   i) Swap.
596 +
597 +   TuxOnIce can store the hibernation image in your swap partition, a swap file or
598 +   a combination thereof. Whichever combination you choose, you will probably
599 +   want to create enough swap space to store the largest image you could have,
600 +   plus the space you'd normally use for swap. A good rule of thumb would be
601 +   to calculate the amount of swap you'd want without using TuxOnIce, and then
602 +   add the amount of memory you have. This swapspace can be arranged in any way
603 +   you'd like. It can be in one partition or file, or spread over a number. The
604 +   only requirement is that they be active when you start a hibernation cycle.
605 +
606 +   There is one exception to this requirement. TuxOnIce has the ability to turn
607 +   on one swap file or partition at the start of hibernating and turn it back off
608 +   at the end. If you want to ensure you have enough memory to store a image
609 +   when your memory is fully used, you might want to make one swap partition or
610 +   file for 'normal' use, and another for TuxOnIce to activate & deactivate
611 +   automatically. (Further details below).
612 +
613 +   ii) Normal files.
614 +
615 +   TuxOnIce includes a 'file allocator'. The file allocator can store your
616 +   image in a simple file. Since Linux has the concept of everything being a
617 +   file, this is more powerful than it initially sounds. If, for example, you
618 +   were to set up a network block device file, you could hibernate to a network
619 +   server. This has been tested and works to a point, but nbd itself isn't
620 +   stateless enough for our purposes.
621 +
622 +   Take extra care when setting up the file allocator. If you just type
623 +   commands without thinking and then try to hibernate, you could cause
624 +   irreversible corruption on your filesystems! Make sure you have backups.
625 +
626 +   Most people will only want to hibernate to a local file. To achieve that, do
627 +   something along the lines of:
628 +
629 +   echo "TuxOnIce" > /hibernation-file
630 +   dd if=/dev/zero bs=1M count=512 >> /hibernation-file
631 +
632 +   This will create a 512MB file called /hibernation-file. To get TuxOnIce to use
633 +   it:
634 +
635 +   echo /hibernation-file > /sys/power/tuxonice/file/target
636 +
637 +   Then
638 +
639 +   cat /sys/power/tuxonice/resume
640 +
641 +   Put the results of this into your bootloader's configuration (see also step
642 +   C, below):
643 +
644 +   ---EXAMPLE-ONLY-DON'T-COPY-AND-PASTE---
645 +   # cat /sys/power/tuxonice/resume
646 +   file:/dev/hda2:0x1e001
647 +
648 +   In this example, we would edit the append= line of our lilo.conf|menu.lst
649 +   so that it included:
650 +
651 +   resume=file:/dev/hda2:0x1e001
652 +   ---EXAMPLE-ONLY-DON'T-COPY-AND-PASTE---
653 +
654 +   For those who are thinking 'Could I make the file sparse?', the answer is
655 +   'No!'. At the moment, there is no way for TuxOnIce to fill in the holes in
656 +   a sparse file while hibernating. In the longer term (post merge!), I'd like
657 +   to change things so that the file could be dynamically resized and have
658 +   holes filled as needed. Right now, however, that's not possible and not a
659 +   priority.
660 +
661 +   c. Bootloader configuration.
662 +
663 +   Using TuxOnIce also requires that you add an extra parameter to
664 +   your lilo.conf or equivalent. Here's an example for a swap partition:
665 +
666 +   append="resume=swap:/dev/hda1"
667 +
668 +   This would tell TuxOnIce that /dev/hda1 is a swap partition you
669 +   have. TuxOnIce will use the swap signature of this partition as a
670 +   pointer to your data when you hibernate. This means that (in this example)
671 +   /dev/hda1 doesn't need to be _the_ swap partition where all of your data
672 +   is actually stored. It just needs to be a swap partition that has a
673 +   valid signature.
674 +
675 +   You don't need to have a swap partition for this purpose. TuxOnIce
676 +   can also use a swap file, but usage is a little more complex. Having made
677 +   your swap file, turn it on and do
678 +
679 +   cat /sys/power/tuxonice/swap/headerlocations
680 +
681 +   (this assumes you've already compiled your kernel with TuxOnIce
682 +   support and booted it). The results of the cat command will tell you
683 +   what you need to put in lilo.conf:
684 +
685 +   For swap partitions like /dev/hda1, simply use resume=/dev/hda1.
686 +   For swapfile `swapfile`, use resume=swap:/dev/hda2:0x242d.
687 +
688 +   If the swapfile changes for any reason (it is moved to a different
689 +   location, it is deleted and recreated, or the filesystem is
690 +   defragmented) then you will have to check
691 +   /sys/power/tuxonice/swap/headerlocations for a new resume_block value.
692 +
693 +   Once you've compiled and installed the kernel and adjusted your bootloader
694 +   configuration, you should only need to reboot for the most basic part
695 +   of TuxOnIce to be ready.
696 +
697 +   If you only compile in the swap allocator, or only compile in the file
698 +   allocator, you don't need to add the "swap:" part of the resume=
699 +   parameters above. resume=/dev/hda2:0x242d will work just as well. If you
700 +   have compiled both and your storage is on swap, you can also use this
701 +   format (the swap allocator is the default allocator).
702 +
703 +   When compiling your kernel, one of the options in the 'Power Management
704 +   Support' menu, just above the 'Enhanced Hibernation (TuxOnIce)' entry is
705 +   called 'Default resume partition'. This can be used to set a default value
706 +   for the resume= parameter.
707 +
708 +   d. The hibernate script.
709 +
710 +   Since the driver model in 2.6 kernels is still being developed, you may need
711 +   to do more than just configure TuxOnIce. Users of TuxOnIce usually start the
712 +   process via a script which prepares for the hibernation cycle, tells the
713 +   kernel to do its stuff and then restore things afterwards. This script might
714 +   involve:
715 +
716 +   - Switching to a text console and back if X doesn't like the video card
717 +     status on resume.
718 +   - Un/reloading drivers that don't play well with hibernation.
719 +
720 +   Note that you might not be able to unload some drivers if there are
721 +   processes using them. You might have to kill off processes that hold
722 +   devices open. Hint: if your X server accesses an USB mouse, doing a
723 +   'chvt' to a text console releases the device and you can unload the
724 +   module.
725 +
726 +   Check out the latest script (available on tuxonice.net).
727 +
728 +   e. The userspace user interface.
729 +
730 +   TuxOnIce has very limited support for displaying status if you only apply
731 +   the kernel patch - it can printk messages, but that is all. In addition,
732 +   some of the functions mentioned in this document (such as cancelling a cycle
733 +   or performing interactive debugging) are unavailable. To utilise these
734 +   functions, or simply get a nice display, you need the 'userui' component.
735 +   Userui comes in three flavours, usplash, fbsplash and text. Text should
736 +   work on any console. Usplash and fbsplash require the appropriate
737 +   (distro specific?) support.
738 +
739 +   To utilise a userui, TuxOnIce just needs to be told where to find the
740 +   userspace binary:
741 +
742 +   echo "/usr/local/sbin/tuxoniceui_fbsplash" > /sys/power/tuxonice/user_interface/program
743 +
744 +   The hibernate script can do this for you, and a default value for this
745 +   setting can be configured when compiling the kernel. This path is also
746 +   stored in the image header, so if you have an initrd or initramfs, you can
747 +   use the userui during the first part of resuming (prior to the atomic
748 +   restore) by putting the binary in the same path in your initrd/ramfs.
749 +   Alternatively, you can put it in a different location and do an echo
750 +   similar to the above prior to the echo > do_resume. The value saved in the
751 +   image header will then be ignored.
752 +
753 +4. Why not just use the version already in the kernel?
754 +
755 +   The version in the vanilla kernel has a number of drawbacks. The most
756 +   serious of these are:
757 +       - it has a maximum image size of 1/2 total memory;
758 +       - it doesn't allocate storage until after it has snapshotted memory.
759 +         This means that you can't be sure hibernating will work until you
760 +         see it start to write the image;
761 +       - it does not allow you to press escape to cancel a cycle;
762 +       - it does not allow you to press escape to cancel resuming;
763 +       - it does not allow you to automatically swapon a file when
764 +         starting a cycle;
765 +       - it does not allow you to use multiple swap partitions or files;
766 +       - it does not allow you to use ordinary files;
767 +       - it just invalidates an image and continues to boot if you
768 +         accidentally boot the wrong kernel after hibernating;
769 +       - it doesn't support any sort of nice display while hibernating;
770 +       - it is moving toward requiring that you have an initrd/initramfs
771 +         to ever have a hope of resuming (uswsusp). While uswsusp will
772 +         address some of the concerns above, it won't address all of them,
773 +          and will be more complicated to get set up;
774 +        - it doesn't have support for suspend-to-both (write a hibernation
775 +         image, then suspend to ram; I think this is known as ReadySafe
776 +         under M$).
777 +
778 +5. How do you use it?
779 +
780 +   A hibernation cycle can be started directly by doing:
781 +
782 +       echo > /sys/power/tuxonice/do_hibernate
783 +
784 +   In practice, though, you'll probably want to use the hibernate script
785 +   to unload modules, configure the kernel the way you like it and so on.
786 +   In that case, you'd do (as root):
787 +
788 +       hibernate
789 +
790 +   See the hibernate script's man page for more details on the options it
791 +   takes.
792 +
793 +   If you're using the text or splash user interface modules, one feature of
794 +   TuxOnIce that you might find useful is that you can press Escape at any time
795 +   during hibernating, and the process will be aborted.
796 +
797 +   Due to the way hibernation works, this means you'll have your system back and
798 +   perfectly usable almost instantly. The only exception is when it's at the
799 +   very end of writing the image. Then it will need to reload a small (usually
800 +   4-50MBs, depending upon the image characteristics) portion first.
801 +
802 +   Likewise, when resuming, you can press escape and resuming will be aborted.
803 +   The computer will then powerdown again according to settings at that time for
804 +   the powerdown method or rebooting.
805 +
806 +   You can change the settings for powering down while the image is being
807 +   written by pressing 'R' to toggle rebooting and 'O' to toggle between
808 +   suspending to ram and powering down completely).
809 +
810 +   If you run into problems with resuming, adding the "noresume" option to
811 +   the kernel command line will let you skip the resume step and recover your
812 +   system. This option shouldn't normally be needed, because TuxOnIce modifies
813 +   the image header prior to the atomic restore, and will thus prompt you
814 +   if it detects that you've tried to resume an image before (this flag is
815 +   removed if you press Escape to cancel a resume, so you won't be prompted
816 +   then).
817 +
818 +   Recent kernels (2.6.24 onwards) add support for resuming from a different
819 +   kernel to the one that was hibernated (thanks to Rafael for his work on
820 +   this - I've just embraced and enhanced the support for TuxOnIce). This
821 +   should further reduce the need for you to use the noresume option.
822 +
823 +6. What do all those entries in /sys/power/tuxonice do?
824 +
825 +   /sys/power/tuxonice is the directory which contains files you can use to
826 +   tune and configure TuxOnIce to your liking. The exact contents of
827 +   the directory will depend upon the version of TuxOnIce you're
828 +   running and the options you selected at compile time. In the following
829 +   descriptions, names in brackets refer to compile time options.
830 +   (Note that they're all dependant upon you having selected CONFIG_TUXONICE
831 +   in the first place!).
832 +
833 +   Since the values of these settings can open potential security risks, the
834 +   writeable ones are accessible only to the root user. You may want to
835 +   configure sudo to allow you to invoke your hibernate script as an ordinary
836 +   user.
837 +
838 +   - alloc/failure_test
839 +
840 +   This debugging option provides a way of testing TuxOnIce's handling of
841 +   memory allocation failures. Each allocation type that TuxOnIce makes has
842 +   been given a unique number (see the source code). Echo the appropriate
843 +   number into this entry, and when TuxOnIce attempts to do that allocation,
844 +   it will pretend there was a failure and act accordingly.
845 +
846 +   - alloc/find_max_mem_allocated
847 +
848 +   This debugging option will cause TuxOnIce to find the maximum amount of
849 +   memory it used during a cycle, and report that information in debugging
850 +   information at the end of the cycle.
851 +
852 +   - alt_resume_param
853 +
854 +   Instead of powering down after writing a hibernation image, TuxOnIce
855 +   supports resuming from a different image. This entry lets you set the
856 +   location of the signature for that image (the resume= value you'd use
857 +   for it). Using an alternate image and keep_image mode, you can do things
858 +   like using an alternate image to power down an uninterruptible power
859 +   supply.
860 +
861 +   - block_io/target_outstanding_io
862 +
863 +   This value controls the amount of memory that the block I/O code says it
864 +   needs when the core code is calculating how much memory is needed for
865 +   hibernating and for resuming. It doesn't directly control the amount of
866 +   I/O that is submitted at any one time - that depends on the amount of
867 +   available memory (we may have more available than we asked for), the
868 +   throughput that is being achieved and the ability of the CPU to keep up
869 +   with disk throughput (particularly where we're compressing pages).
870 +
871 +   - checksum/enabled
872 +
873 +   Use cryptoapi hashing routines to verify that Pageset2 pages don't change
874 +   while we're saving the first part of the image, and to get any pages that
875 +   do change resaved in the atomic copy. This should normally not be needed,
876 +   but if you're seeing issues, please enable this. If your issues stop you
877 +   being able to resume, enable this option, hibernate and cancel the cycle
878 +   after the atomic copy is done. If the debugging info shows a non-zero
879 +   number of pages resaved, please report this to Nigel.
880 +
881 +   - compression/algorithm
882 +
883 +   Set the cryptoapi algorithm used for compressing the image.
884 +
885 +   - compression/expected_compression
886 +
887 +   These values allow you to set an expected compression ratio, which TuxOnice
888 +   will use in calculating whether it meets constraints on the image size. If
889 +   this expected compression ratio is not attained, the hibernation cycle will
890 +   abort, so it is wise to allow some spare. You can see what compression
891 +   ratio is achieved in the logs after hibernating.
892 +
893 +   - debug_info:
894 +
895 +   This file returns information about your configuration that may be helpful
896 +   in diagnosing problems with hibernating.
897 +
898 +   - did_suspend_to_both:
899 +
900 +   This file can be used when you hibernate with powerdown method 3 (ie suspend
901 +   to ram after writing the image). There can be two outcomes in this case. We
902 +   can resume from the suspend-to-ram before the battery runs out, or we can run
903 +   out of juice and and up resuming like normal. This entry lets you find out,
904 +   post resume, which way we went. If the value is 1, we resumed from suspend
905 +   to ram. This can be useful when actions need to be run post suspend-to-ram
906 +   that don't need to be run if we did the normal resume from power off.
907 +
908 +   - do_hibernate:
909 +
910 +   When anything is written to this file, the kernel side of TuxOnIce will
911 +   begin to attempt to write an image to disk and power down. You'll normally
912 +   want to run the hibernate script instead, to get modules unloaded first.
913 +
914 +   - do_resume:
915 +
916 +   When anything is written to this file TuxOnIce will attempt to read and
917 +   restore an image. If there is no image, it will return almost immediately.
918 +   If an image exists, the echo > will never return. Instead, the original
919 +   kernel context will be restored and the original echo > do_hibernate will
920 +   return.
921 +
922 +   - */enabled
923 +
924 +   These option can be used to temporarily disable various parts of TuxOnIce.
925 +
926 +   - extra_pages_allowance
927 +
928 +   When TuxOnIce does its atomic copy, it calls the driver model suspend
929 +   and resume methods. If you have DRI enabled with a driver such as fglrx,
930 +   this can result in the driver allocating a substantial amount of memory
931 +   for storing its state. Extra_pages_allowance tells TuxOnIce how much
932 +   extra memory it should ensure is available for those allocations. If
933 +   your attempts at hibernating end with a message in dmesg indicating that
934 +   insufficient extra pages were allowed, you need to increase this value.
935 +
936 +   - file/target:
937 +
938 +   Read this value to get the current setting. Write to it to point TuxOnice
939 +   at a new storage location for the file allocator. See section 3.b.ii above
940 +   for details of how to set up the file allocator.
941 +
942 +   - freezer_test
943 +
944 +   This entry can be used to get TuxOnIce to just test the freezer and prepare
945 +   an image without actually doing a hibernation cycle. It is useful for
946 +   diagnosing freezing and image preparation issues.
947 +
948 +   - full_pageset2
949 +
950 +   TuxOnIce divides the pages that are stored in an image into two sets. The
951 +   difference between the two sets is that pages in pageset 1 are atomically
952 +   copied, and pages in pageset 2 are written to disk without being copied
953 +   first. A page CAN be written to disk without being copied first if and only
954 +   if its contents will not be modified or used at any time after userspace
955 +   processes are frozen. A page MUST be in pageset 1 if its contents are
956 +   modified or used at any time after userspace processes have been frozen.
957 +
958 +   Normally (ie if this option is enabled), TuxOnIce will put all pages on the
959 +   per-zone LRUs in pageset2, then remove those pages used by any userspace
960 +   user interface helper and TuxOnIce storage manager that are running,
961 +   together with pages used by the GEM memory manager introduced around 2.6.28
962 +   kernels.
963 +
964 +   If this option is disabled, a much more conservative approach will be taken.
965 +   The only pages in pageset2 will be those belonging to userspace processes,
966 +   with the exclusion of those belonging to the TuxOnIce userspace helpers
967 +   mentioned above. This will result in a much smaller pageset2, and will
968 +   therefore result in smaller images than are possible with this option
969 +   enabled.
970 +
971 +   - ignore_rootfs
972 +
973 +   TuxOnIce records which device is mounted as the root filesystem when
974 +   writing the hibernation image. It will normally check at resume time that
975 +   this device isn't already mounted - that would be a cause of filesystem
976 +   corruption. In some particular cases (RAM based root filesystems), you
977 +   might want to disable this check. This option allows you to do that.
978 +
979 +   - image_exists:
980 +
981 +   Can be used in a script to determine whether a valid image exists at the
982 +   location currently pointed to by resume=. Returns up to three lines.
983 +   The first is whether an image exists (-1 for unsure, otherwise 0 or 1).
984 +   If an image eixsts, additional lines will return the machine and version.
985 +   Echoing anything to this entry removes any current image.
986 +
987 +   - image_size_limit:
988 +
989 +   The maximum size of hibernation image written to disk, measured in megabytes
990 +   (1024*1024).
991 +
992 +   - last_result:
993 +
994 +   The result of the last hibernation cycle, as defined in
995 +   include/linux/suspend-debug.h with the values SUSPEND_ABORTED to
996 +   SUSPEND_KEPT_IMAGE. This is a bitmask.
997 +
998 +   - late_cpu_hotplug:
999 +
1000 +   This sysfs entry controls whether cpu hotplugging is done - as normal - just
1001 +   before (unplug) and after (replug) the atomic copy/restore (so that all
1002 +   CPUs/cores are available for multithreaded I/O). The alternative is to
1003 +   unplug all secondary CPUs/cores at the start of hibernating/resuming, and
1004 +   replug them at the end of resuming. No multithreaded I/O will be possible in
1005 +   this configuration, but the odd machine has been reported to require it.
1006 +
1007 +   - lid_file:
1008 +
1009 +   This determines which ACPI button file we look in to determine whether the
1010 +   lid is open or closed after resuming from suspend to disk or power off.
1011 +   If the entry is set to "lid/LID", we'll open /proc/acpi/button/lid/LID/state
1012 +   and check its contents at the appropriate moment. See post_wake_state below
1013 +   for more details on how this entry is used.
1014 +
1015 +   - log_everything (CONFIG_PM_DEBUG):
1016 +
1017 +   Setting this option results in all messages printed being logged. Normally,
1018 +   only a subset are logged, so as to not slow the process and not clutter the
1019 +   logs. Useful for debugging. It can be toggled during a cycle by pressing
1020 +   'L'.
1021 +
1022 +   - no_load_direct:
1023 +
1024 +   This is a debugging option. If, when loading the atomically copied pages of
1025 +   an image, TuxOnIce finds that the destination address for a page is free,
1026 +   it will normally allocate the image, load the data directly into that
1027 +   address and skip it in the atomic restore. If this option is disabled, the
1028 +   page will be loaded somewhere else and atomically restored like other pages.
1029 +
1030 +   - no_flusher_thread:
1031 +
1032 +   When doing multithreaded I/O (see below), the first online CPU can be used
1033 +   to _just_ submit compressed pages when writing the image, rather than
1034 +   compressing and submitting data. This option is normally disabled, but has
1035 +   been included because Nigel would like to see whether it will be more useful
1036 +   as the number of cores/cpus in computers increases.
1037 +
1038 +   - no_multithreaded_io:
1039 +
1040 +   TuxOnIce will normally create one thread per cpu/core on your computer,
1041 +   each of which will then perform I/O. This will generally result in
1042 +   throughput that's the maximum the storage medium can handle. There
1043 +   shouldn't be any reason to disable multithreaded I/O now, but this option
1044 +   has been retained for debugging purposes.
1045 +
1046 +   - no_pageset2
1047 +
1048 +   See the entry for full_pageset2 above for an explanation of pagesets.
1049 +   Enabling this option causes TuxOnIce to do an atomic copy of all pages,
1050 +   thereby limiting the maximum image size to 1/2 of memory, as swsusp does.
1051 +
1052 +   - no_pageset2_if_unneeded
1053 +
1054 +   See the entry for full_pageset2 above for an explanation of pagesets.
1055 +   Enabling this option causes TuxOnIce to act like no_pageset2 was enabled
1056 +   if and only it isn't needed anyway. This option may still make TuxOnIce
1057 +   less reliable because pageset2 pages are normally used to store the
1058 +   atomic copy - drivers that want to do allocations of larger amounts of
1059 +   memory in one shot will be more likely to find that those amounts aren't
1060 +   available if this option is enabled.
1061 +
1062 +   - pause_between_steps (CONFIG_PM_DEBUG):
1063 +
1064 +   This option is used during debugging, to make TuxOnIce pause between
1065 +   each step of the process. It is ignored when the nice display is on.
1066 +
1067 +   - post_wake_state:
1068 +
1069 +   TuxOnIce provides support for automatically waking after a user-selected
1070 +   delay, and using a different powerdown method if the lid is still closed.
1071 +   (Yes, we're assuming a laptop).  This entry lets you choose what state
1072 +   should be entered next. The values are those described under
1073 +   powerdown_method, below. It can be used to suspend to RAM after hibernating,
1074 +   then powerdown properly (say) 20 minutes. It can also be used to power down
1075 +   properly, then wake at (say) 6.30am and suspend to RAM until you're ready
1076 +   to use the machine.
1077 +
1078 +   - powerdown_method:
1079 +
1080 +   Used to select a method by which TuxOnIce should powerdown after writing the
1081 +   image. Currently:
1082 +
1083 +   0: Don't use ACPI to power off.
1084 +   3: Attempt to enter Suspend-to-ram.
1085 +   4: Attempt to enter ACPI S4 mode.
1086 +   5: Attempt to power down via ACPI S5 mode.
1087 +
1088 +   Note that these options are highly dependant upon your hardware & software:
1089 +
1090 +   3: When succesful, your machine suspends to ram instead of powering off.
1091 +      The advantage of using this mode is that it doesn't matter whether your
1092 +      battery has enough charge to make it through to your next resume. If it
1093 +      lasts, you will simply resume from suspend to ram (and the image on disk
1094 +      will be discarded). If the battery runs out, you will resume from disk
1095 +      instead. The disadvantage is that it takes longer than a normal
1096 +      suspend-to-ram to enter the state, since the suspend-to-disk image needs
1097 +      to be written first.
1098 +   4/5: When successful, your machine will be off and comsume (almost) no power.
1099 +      But it might still react to some external events like opening the lid or
1100 +      trafic on  a network or usb device. For the bios, resume is then the same
1101 +      as warm boot, similar to a situation where you used the command `reboot'
1102 +      to reboot your machine. If your machine has problems on warm boot or if
1103 +      you want to protect your machine with the bios password, this is probably
1104 +      not the right choice. Mode 4 may be necessary on some machines where ACPI
1105 +      wake up methods need to be run to properly reinitialise hardware after a
1106 +      hibernation cycle.
1107 +   0: Switch the machine completely off. The only possible wakeup is the power
1108 +      button. For the bios, resume is then the same as a cold boot, in
1109 +      particular you would  have to provide your bios boot password if your
1110 +      machine uses that feature for booting.
1111 +
1112 +   - progressbar_granularity_limit:
1113 +
1114 +   This option can be used to limit the granularity of the progress bar
1115 +   displayed with a bootsplash screen. The value is the maximum number of
1116 +   steps. That is, 10 will make the progress bar jump in 10% increments.
1117 +
1118 +   - reboot:
1119 +
1120 +   This option causes TuxOnIce to reboot rather than powering down
1121 +   at the end of saving an image. It can be toggled during a cycle by pressing
1122 +   'R'.
1123 +
1124 +   - resume:
1125 +
1126 +   This sysfs entry can be used to read and set the location in which TuxOnIce
1127 +   will look for the signature of an image - the value set using resume= at
1128 +   boot time or CONFIG_PM_STD_PARTITION ("Default resume partition"). By
1129 +   writing to this file as well as modifying your bootloader's configuration
1130 +   file (eg menu.lst), you can set or reset the location of your image or the
1131 +   method of storing the image without rebooting.
1132 +
1133 +   - replace_swsusp (CONFIG_TOI_REPLACE_SWSUSP):
1134 +
1135 +   This option makes
1136 +
1137 +     echo disk > /sys/power/state
1138 +
1139 +   activate TuxOnIce instead of swsusp. Regardless of whether this option is
1140 +   enabled, any invocation of swsusp's resume time trigger will cause TuxOnIce
1141 +   to check for an image too. This is due to the fact that at resume time, we
1142 +   can't know whether this option was enabled until we see if an image is there
1143 +   for us to resume from. (And when an image exists, we don't care whether we
1144 +   did replace swsusp anyway - we just want to resume).
1145 +
1146 +   - resume_commandline:
1147 +
1148 +   This entry can be read after resuming to see the commandline that was used
1149 +   when resuming began. You might use this to set up two bootloader entries
1150 +   that are the same apart from the fact that one includes a extra append=
1151 +   argument "at_work=1". You could then grep resume_commandline in your
1152 +   post-resume scripts and configure networking (for example) differently
1153 +   depending upon whether you're at home or work. resume_commandline can be
1154 +   set to arbitrary text if you wish to remove sensitive contents.
1155 +
1156 +   - swap/swapfilename:
1157 +
1158 +   This entry is used to specify the swapfile or partition that
1159 +   TuxOnIce will attempt to swapon/swapoff automatically. Thus, if
1160 +   I normally use /dev/hda1 for swap, and want to use /dev/hda2 for specifically
1161 +   for my hibernation image, I would
1162 +
1163 +   echo /dev/hda2 > /sys/power/tuxonice/swap/swapfile
1164 +
1165 +   /dev/hda2 would then be automatically swapon'd and swapoff'd. Note that the
1166 +   swapon and swapoff occur while other processes are frozen (including kswapd)
1167 +   so this swap file will not be used up when attempting to free memory. The
1168 +   parition/file is also given the highest priority, so other swapfiles/partitions
1169 +   will only be used to save the image when this one is filled.
1170 +
1171 +   The value of this file is used by headerlocations along with any currently
1172 +   activated swapfiles/partitions.
1173 +
1174 +   - swap/headerlocations:
1175 +
1176 +   This option tells you the resume= options to use for swap devices you
1177 +   currently have activated. It is particularly useful when you only want to
1178 +   use a swap file to store your image. See above for further details.
1179 +
1180 +   - test_bio
1181 +
1182 +   This is a debugging option. When enabled, TuxOnIce will not hibernate.
1183 +   Instead, when asked to write an image, it will skip the atomic copy,
1184 +   just doing the writing of the image and then returning control to the
1185 +   user at the point where it would have powered off. This is useful for
1186 +   testing throughput in different configurations.
1187 +
1188 +   - test_filter_speed
1189 +
1190 +   This is a debugging option. When enabled, TuxOnIce will not hibernate.
1191 +   Instead, when asked to write an image, it will not write anything or do
1192 +   an atomic copy, but will only run any enabled compression algorithm on the
1193 +   data that would have been written (the source pages of the atomic copy in
1194 +   the case of pageset 1). This is useful for comparing the performance of
1195 +   compression algorithms and for determining the extent to which an upgrade
1196 +   to your storage method would improve hibernation speed.
1197 +
1198 +   - user_interface/debug_sections (CONFIG_PM_DEBUG):
1199 +
1200 +   This value, together with the console log level, controls what debugging
1201 +   information is displayed. The console log level determines the level of
1202 +   detail, and this value determines what detail is displayed. This value is
1203 +   a bit vector, and the meaning of the bits can be found in the kernel tree
1204 +   in include/linux/tuxonice.h. It can be overridden using the kernel's
1205 +   command line option suspend_dbg.
1206 +
1207 +   - user_interface/default_console_level (CONFIG_PM_DEBUG):
1208 +
1209 +   This determines the value of the console log level at the start of a
1210 +   hibernation cycle. If debugging is compiled in, the console log level can be
1211 +   changed during a cycle by pressing the digit keys. Meanings are:
1212 +
1213 +   0: Nice display.
1214 +   1: Nice display plus numerical progress.
1215 +   2: Errors only.
1216 +   3: Low level debugging info.
1217 +   4: Medium level debugging info.
1218 +   5: High level debugging info.
1219 +   6: Verbose debugging info.
1220 +
1221 +   - user_interface/enable_escape:
1222 +
1223 +   Setting this to "1" will enable you abort a hibernation cycle or resuming by
1224 +   pressing escape, "0" (default) disables this feature. Note that enabling
1225 +   this option means that you cannot initiate a hibernation cycle and then walk
1226 +   away from your computer, expecting it to be secure. With feature disabled,
1227 +   you can validly have this expectation once TuxOnice begins to write the
1228 +   image to disk. (Prior to this point, it is possible that TuxOnice might
1229 +   about because of failure to freeze all processes or because constraints
1230 +   on its ability to save the image are not met).
1231 +
1232 +   - user_interface/program
1233 +
1234 +   This entry is used to tell TuxOnice what userspace program to use for
1235 +   providing a user interface while hibernating. The program uses a netlink
1236 +   socket to pass messages back and forward to the kernel, allowing all of the
1237 +   functions formerly implemented in the kernel user interface components.
1238 +
1239 +   - version:
1240 +
1241 +   The version of TuxOnIce you have compiled into the currently running kernel.
1242 +
1243 +   - wake_alarm_dir:
1244 +
1245 +   As mentioned above (post_wake_state), TuxOnIce supports automatically waking
1246 +   after some delay. This entry allows you to select which wake alarm to use.
1247 +   It should contain the value "rtc0" if you're wanting to use
1248 +   /sys/class/rtc/rtc0.
1249 +
1250 +   - wake_delay:
1251 +
1252 +   This value determines the delay from the end of writing the image until the
1253 +   wake alarm is triggered. You can set an absolute time by writing the desired
1254 +   time into /sys/class/rtc/<wake_alarm_dir>/wakealarm and leaving these values
1255 +   empty.
1256 +
1257 +   Note that for the wakeup to actually occur, you may need to modify entries
1258 +   in /proc/acpi/wakeup. This is done by echoing the name of the button in the
1259 +   first column (eg PBTN) into the file.
1260 +
1261 +7. How do you get support?
1262 +
1263 +   Glad you asked. TuxOnIce is being actively maintained and supported
1264 +   by Nigel (the guy doing most of the kernel coding at the moment), Bernard
1265 +   (who maintains the hibernate script and userspace user interface components)
1266 +   and its users.
1267 +
1268 +   Resources availble include HowTos, FAQs and a Wiki, all available via
1269 +   tuxonice.net.  You can find the mailing lists there.
1270 +
1271 +8. I think I've found a bug. What should I do?
1272 +
1273 +   By far and a way, the most common problems people have with TuxOnIce
1274 +   related to drivers not having adequate power management support. In this
1275 +   case, it is not a bug with TuxOnIce, but we can still help you. As we
1276 +   mentioned above, such issues can usually be worked around by building the
1277 +   functionality as modules and unloading them while hibernating. Please visit
1278 +   the Wiki for up-to-date lists of known issues and work arounds.
1279 +
1280 +   If this information doesn't help, try running:
1281 +
1282 +   hibernate --bug-report
1283 +
1284 +   ..and sending the output to the users mailing list.
1285 +
1286 +   Good information on how to provide us with useful information from an
1287 +   oops is found in the file REPORTING-BUGS, in the top level directory
1288 +   of the kernel tree. If you get an oops, please especially note the
1289 +   information about running what is printed on the screen through ksymoops.
1290 +   The raw information is useless.
1291 +
1292 +9. When will XXX be supported?
1293 +
1294 +   If there's a feature missing from TuxOnIce that you'd like, feel free to
1295 +   ask. We try to be obliging, within reason.
1296 +
1297 +   Patches are welcome. Please send to the list.
1298 +
1299 +10. How does it work?
1300 +
1301 +   TuxOnIce does its work in a number of steps.
1302 +
1303 +   a. Freezing system activity.
1304 +
1305 +   The first main stage in hibernating is to stop all other activity. This is
1306 +   achieved in stages. Processes are considered in fours groups, which we will
1307 +   describe in reverse order for clarity's sake: Threads with the PF_NOFREEZE
1308 +   flag, kernel threads without this flag, userspace processes with the
1309 +   PF_SYNCTHREAD flag and all other processes. The first set (PF_NOFREEZE) are
1310 +   untouched by the refrigerator code. They are allowed to run during hibernating
1311 +   and resuming, and are used to support user interaction, storage access or the
1312 +   like. Other kernel threads (those unneeded while hibernating) are frozen last.
1313 +   This leaves us with userspace processes that need to be frozen. When a
1314 +   process enters one of the *_sync system calls, we set a PF_SYNCTHREAD flag on
1315 +   that process for the duration of that call. Processes that have this flag are
1316 +   frozen after processes without it, so that we can seek to ensure that dirty
1317 +   data is synced to disk as quickly as possible in a situation where other
1318 +   processes may be submitting writes at the same time. Freezing the processes
1319 +   that are submitting data stops new I/O from being submitted. Syncthreads can
1320 +   then cleanly finish their work. So the order is:
1321 +
1322 +   - Userspace processes without PF_SYNCTHREAD or PF_NOFREEZE;
1323 +   - Userspace processes with PF_SYNCTHREAD (they won't have NOFREEZE);
1324 +   - Kernel processes without PF_NOFREEZE.
1325 +
1326 +   b. Eating memory.
1327 +
1328 +   For a successful hibernation cycle, you need to have enough disk space to store the
1329 +   image and enough memory for the various limitations of TuxOnIce's
1330 +   algorithm. You can also specify a maximum image size. In order to attain
1331 +   to those constraints, TuxOnIce may 'eat' memory. If, after freezing
1332 +   processes, the constraints aren't met, TuxOnIce will thaw all the
1333 +   other processes and begin to eat memory until its calculations indicate
1334 +   the constraints are met. It will then freeze processes again and recheck
1335 +   its calculations.
1336 +
1337 +   c. Allocation of storage.
1338 +
1339 +   Next, TuxOnIce allocates the storage that will be used to save
1340 +   the image.
1341 +
1342 +   The core of TuxOnIce knows nothing about how or where pages are stored. We
1343 +   therefore request the active allocator (remember you might have compiled in
1344 +   more than one!) to allocate enough storage for our expect image size. If
1345 +   this request cannot be fulfilled, we eat more memory and try again. If it
1346 +   is fulfiled, we seek to allocate additional storage, just in case our
1347 +   expected compression ratio (if any) isn't achieved. This time, however, we
1348 +   just continue if we can't allocate enough storage.
1349 +
1350 +   If these calls to our allocator change the characteristics of the image
1351 +   such that we haven't allocated enough memory, we also loop. (The allocator
1352 +   may well need to allocate space for its storage information).
1353 +
1354 +   d. Write the first part of the image.
1355 +
1356 +   TuxOnIce stores the image in two sets of pages called 'pagesets'.
1357 +   Pageset 2 contains pages on the active and inactive lists; essentially
1358 +   the page cache. Pageset 1 contains all other pages, including the kernel.
1359 +   We use two pagesets for one important reason: We need to make an atomic copy
1360 +   of the kernel to ensure consistency of the image. Without a second pageset,
1361 +   that would limit us to an image that was at most half the amount of memory
1362 +   available. Using two pagesets allows us to store a full image. Since pageset
1363 +   2 pages won't be needed in saving pageset 1, we first save pageset 2 pages.
1364 +   We can then make our atomic copy of the remaining pages using both pageset 2
1365 +   pages and any other pages that are free. While saving both pagesets, we are
1366 +   careful not to corrupt the image. Among other things, we use lowlevel block
1367 +   I/O routines that don't change the pagecache contents.
1368 +
1369 +   The next step, then, is writing pageset 2.
1370 +
1371 +   e. Suspending drivers and storing processor context.
1372 +
1373 +   Having written pageset2, TuxOnIce calls the power management functions to
1374 +   notify drivers of the hibernation, and saves the processor state in preparation
1375 +   for the atomic copy of memory we are about to make.
1376 +
1377 +   f. Atomic copy.
1378 +
1379 +   At this stage, everything else but the TuxOnIce code is halted. Processes
1380 +   are frozen or idling, drivers are quiesced and have stored (ideally and where
1381 +   necessary) their configuration in memory we are about to atomically copy.
1382 +   In our lowlevel architecture specific code, we have saved the CPU state.
1383 +   We can therefore now do our atomic copy before resuming drivers etc.
1384 +
1385 +   g. Save the atomic copy (pageset 1).
1386 +
1387 +   TuxOnice can then write the atomic copy of the remaining pages. Since we
1388 +   have copied the pages into other locations, we can continue to use the
1389 +   normal block I/O routines without fear of corruption our image.
1390 +
1391 +   f. Save the image header.
1392 +
1393 +   Nearly there! We save our settings and other parameters needed for
1394 +   reloading pageset 1 in an 'image header'. We also tell our allocator to
1395 +   serialise its data at this stage, so that it can reread the image at resume
1396 +   time.
1397 +
1398 +   g. Set the image header.
1399 +
1400 +   Finally, we edit the header at our resume= location. The signature is
1401 +   changed by the allocator to reflect the fact that an image exists, and to
1402 +   point to the start of that data if necessary (swap allocator).
1403 +
1404 +   h. Power down.
1405 +
1406 +   Or reboot if we're debugging and the appropriate option is selected.
1407 +
1408 +   Whew!
1409 +
1410 +   Reloading the image.
1411 +   --------------------
1412 +
1413 +   Reloading the image is essentially the reverse of all the above. We load
1414 +   our copy of pageset 1, being careful to choose locations that aren't going
1415 +   to be overwritten as we copy it back (We start very early in the boot
1416 +   process, so there are no other processes to quiesce here). We then copy
1417 +   pageset 1 back to its original location in memory and restore the process
1418 +   context. We are now running with the original kernel. Next, we reload the
1419 +   pageset 2 pages, free the memory and swap used by TuxOnIce, restore
1420 +   the pageset header and restart processes. Sounds easy in comparison to
1421 +   hibernating, doesn't it!
1422 +
1423 +   There is of course more to TuxOnIce than this, but this explanation
1424 +   should be a good start. If there's interest, I'll write further
1425 +   documentation on range pages and the low level I/O.
1426 +
1427 +11. Who wrote TuxOnIce?
1428 +
1429 +   (Answer based on the writings of Florent Chabaud, credits in files and
1430 +   Nigel's limited knowledge; apologies to anyone missed out!)
1431 +
1432 +   The main developers of TuxOnIce have been...
1433 +
1434 +   Gabor Kuti
1435 +   Pavel Machek
1436 +   Florent Chabaud
1437 +   Bernard Blackham
1438 +   Nigel Cunningham
1439 +
1440 +   Significant portions of swsusp, the code in the vanilla kernel which
1441 +   TuxOnIce enhances, have been worked on by Rafael Wysocki. Thanks should
1442 +   also be expressed to him.
1443 +
1444 +   The above mentioned developers have been aided in their efforts by a host
1445 +   of hundreds, if not thousands of testers and people who have submitted bug
1446 +   fixes & suggestions. Of special note are the efforts of Michael Frank, who
1447 +   had his computers repetitively hibernate and resume for literally tens of
1448 +   thousands of cycles and developed scripts to stress the system and test
1449 +   TuxOnIce far beyond the point most of us (Nigel included!) would consider
1450 +   testing. His efforts have contributed as much to TuxOnIce as any of the
1451 +   names above.
1452 diff --git a/MAINTAINERS b/MAINTAINERS
1453 index 6d119c9..a2f6ce6 100644
1454 --- a/MAINTAINERS
1455 +++ b/MAINTAINERS
1456 @@ -5679,6 +5679,13 @@ S:       Maintained
1457  F:     drivers/tc/
1458  F:     include/linux/tc.h
1459  
1460 +TUXONICE (ENHANCED HIBERNATION)
1461 +P:     Nigel Cunningham
1462 +M:     nigel@tuxonice.net
1463 +L:     tuxonice-devel@tuxonice.net
1464 +W:     http://tuxonice.net
1465 +S:     Maintained
1466 +
1467  U14-34F SCSI DRIVER
1468  M:     Dario Ballabio <ballabio_dario@emc.com>
1469  L:     linux-scsi@vger.kernel.org
1470 diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
1471 index 9fc02dc..a99d7e7 100644
1472 --- a/arch/powerpc/mm/pgtable_32.c
1473 +++ b/arch/powerpc/mm/pgtable_32.c
1474 @@ -427,6 +427,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
1475  
1476         change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
1477  }
1478 +EXPORT_SYMBOL_GPL(kernel_map_pages);
1479  #endif /* CONFIG_DEBUG_PAGEALLOC */
1480  
1481  static int fixmaps;
1482 diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
1483 index 8e1aac8..84568af 100644
1484 --- a/arch/x86/kernel/reboot.c
1485 +++ b/arch/x86/kernel/reboot.c
1486 @@ -718,6 +718,7 @@ void machine_restart(char *cmd)
1487  {
1488         machine_ops.restart(cmd);
1489  }
1490 +EXPORT_SYMBOL_GPL(machine_restart);
1491  
1492  void machine_halt(void)
1493  {
1494 diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
1495 index 532e793..bad27ae 100644
1496 --- a/arch/x86/mm/pageattr.c
1497 +++ b/arch/x86/mm/pageattr.c
1498 @@ -1354,6 +1354,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
1499          */
1500         __flush_tlb_all();
1501  }
1502 +EXPORT_SYMBOL_GPL(kernel_map_pages);
1503  
1504  #ifdef CONFIG_HIBERNATION
1505  
1506 @@ -1368,7 +1369,7 @@ bool kernel_page_present(struct page *page)
1507         pte = lookup_address((unsigned long)page_address(page), &level);
1508         return (pte_val(*pte) & _PAGE_PRESENT);
1509  }
1510 -
1511 +EXPORT_SYMBOL_GPL(kernel_page_present);
1512  #endif /* CONFIG_HIBERNATION */
1513  
1514  #endif /* CONFIG_DEBUG_PAGEALLOC */
1515 diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
1516 index 1290ba5..2280003 100644
1517 --- a/arch/x86/power/cpu.c
1518 +++ b/arch/x86/power/cpu.c
1519 @@ -114,9 +114,7 @@ void save_processor_state(void)
1520  {
1521         __save_processor_state(&saved_context);
1522  }
1523 -#ifdef CONFIG_X86_32
1524  EXPORT_SYMBOL(save_processor_state);
1525 -#endif
1526  
1527  static void do_fpu_end(void)
1528  {
1529 diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c
1530 index 3769079..4dabd68 100644
1531 --- a/arch/x86/power/hibernate_32.c
1532 +++ b/arch/x86/power/hibernate_32.c
1533 @@ -9,6 +9,7 @@
1534  #include <linux/gfp.h>
1535  #include <linux/suspend.h>
1536  #include <linux/bootmem.h>
1537 +#include <linux/module.h>
1538  
1539  #include <asm/system.h>
1540  #include <asm/page.h>
1541 @@ -164,6 +165,7 @@ int swsusp_arch_resume(void)
1542         restore_image();
1543         return 0;
1544  }
1545 +EXPORT_SYMBOL_GPL(swsusp_arch_resume);
1546  
1547  /*
1548   *     pfn_is_nosave - check if given pfn is in the 'nosave' section
1549 diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
1550 index d24f983..803b20a 100644
1551 --- a/arch/x86/power/hibernate_64.c
1552 +++ b/arch/x86/power/hibernate_64.c
1553 @@ -11,6 +11,7 @@
1554  #include <linux/gfp.h>
1555  #include <linux/smp.h>
1556  #include <linux/suspend.h>
1557 +#include <linux/module.h>
1558  #include <asm/proto.h>
1559  #include <asm/page.h>
1560  #include <asm/pgtable.h>
1561 @@ -119,6 +120,7 @@ int swsusp_arch_resume(void)
1562         restore_image();
1563         return 0;
1564  }
1565 +EXPORT_SYMBOL_GPL(swsusp_arch_resume);
1566  
1567  /*
1568   *     pfn_is_nosave - check if given pfn is in the 'nosave' section
1569 @@ -169,3 +171,4 @@ int arch_hibernation_header_restore(void *addr)
1570         restore_cr3 = rdr->cr3;
1571         return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL;
1572  }
1573 +EXPORT_SYMBOL_GPL(arch_hibernation_header_restore);
1574 diff --git a/block/Makefile b/block/Makefile
1575 index 0bb499a..49f36d0 100644
1576 --- a/block/Makefile
1577 +++ b/block/Makefile
1578 @@ -5,7 +5,8 @@
1579  obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
1580                         blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
1581                         blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
1582 -                       blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o
1583 +                       blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o \
1584 +                       uuid.o
1585  
1586  obj-$(CONFIG_BLK_DEV_BSG)      += bsg.o
1587  obj-$(CONFIG_BLK_CGROUP)       += blk-cgroup.o
1588 diff --git a/block/blk-core.c b/block/blk-core.c
1589 index f84cce4..6c28098 100644
1590 --- a/block/blk-core.c
1591 +++ b/block/blk-core.c
1592 @@ -37,6 +37,9 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap);
1593  EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
1594  EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
1595  
1596 +int trap_non_toi_io;
1597 +EXPORT_SYMBOL_GPL(trap_non_toi_io);
1598 +
1599  static int __make_request(struct request_queue *q, struct bio *bio);
1600  
1601  /*
1602 @@ -1582,6 +1585,9 @@ void submit_bio(int rw, struct bio *bio)
1603  
1604         bio->bi_rw |= rw;
1605  
1606 +       if (unlikely(trap_non_toi_io))
1607 +               BUG_ON(!bio_rw_flagged(bio, BIO_RW_TUXONICE));
1608 +
1609         /*
1610          * If it's a regular read/write or a barrier with data attached,
1611          * go through the normal accounting stuff before submission.
1612 diff --git a/block/genhd.c b/block/genhd.c
1613 index 59a2db6..6875d7d 100644
1614 --- a/block/genhd.c
1615 +++ b/block/genhd.c
1616 @@ -18,6 +18,8 @@
1617  #include <linux/buffer_head.h>
1618  #include <linux/mutex.h>
1619  #include <linux/idr.h>
1620 +#include <linux/ctype.h>
1621 +#include <linux/fs_uuid.h>
1622  
1623  #include "blk.h"
1624  
1625 @@ -1286,3 +1288,84 @@ int invalidate_partition(struct gendisk *disk, int partno)
1626  }
1627  
1628  EXPORT_SYMBOL(invalidate_partition);
1629 +
1630 +dev_t blk_lookup_fs_info(struct fs_info *seek)
1631 +{
1632 +       dev_t devt = MKDEV(0, 0);
1633 +       struct class_dev_iter iter;
1634 +       struct device *dev;
1635 +       int best_score = 0;
1636 +
1637 +       class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
1638 +       while (best_score < 3 && (dev = class_dev_iter_next(&iter))) {
1639 +               struct gendisk *disk = dev_to_disk(dev);
1640 +               struct disk_part_iter piter;
1641 +               struct hd_struct *part;
1642 +
1643 +               disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
1644 +
1645 +               while (best_score < 3 && (part = disk_part_iter_next(&piter))) {
1646 +                       int score = part_matches_fs_info(part, seek);
1647 +                       if (score > best_score) {
1648 +                               devt = part_devt(part);
1649 +                               best_score = score;
1650 +                       }
1651 +               }
1652 +               disk_part_iter_exit(&piter);
1653 +       }
1654 +       class_dev_iter_exit(&iter);
1655 +       return devt;
1656 +}
1657 +EXPORT_SYMBOL_GPL(blk_lookup_fs_info);
1658 +
1659 +/* Caller uses NULL, key to start. For each match found, we return a bdev on
1660 + * which we have done blkdev_get, and we do the blkdev_put on block devices
1661 + * that are passed to us. When no more matches are found, we return NULL.
1662 + */
1663 +struct block_device *next_bdev_of_type(struct block_device *last,
1664 +       const char *key)
1665 +{
1666 +       dev_t devt = MKDEV(0, 0);
1667 +       struct class_dev_iter iter;
1668 +       struct device *dev;
1669 +       struct block_device *next = NULL, *bdev;
1670 +       int got_last = 0;
1671 +
1672 +       if (!key)
1673 +               goto out;
1674 +
1675 +       class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
1676 +       while (!devt && (dev = class_dev_iter_next(&iter))) {
1677 +               struct gendisk *disk = dev_to_disk(dev);
1678 +               struct disk_part_iter piter;
1679 +               struct hd_struct *part;
1680 +
1681 +               disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
1682 +
1683 +               while ((part = disk_part_iter_next(&piter))) {
1684 +                       bdev = bdget(part_devt(part));
1685 +                       if (last && !got_last) {
1686 +                               if (last == bdev)
1687 +                                       got_last = 1;
1688 +                               continue;
1689 +                       }
1690 +
1691 +                       if (blkdev_get(bdev, FMODE_READ))
1692 +                               continue;
1693 +
1694 +                       if (bdev_matches_key(bdev, key)) {
1695 +                               next = bdev;
1696 +                               break;
1697 +                       }
1698 +
1699 +                       blkdev_put(bdev, FMODE_READ);
1700 +               }
1701 +               disk_part_iter_exit(&piter);
1702 +       }
1703 +       class_dev_iter_exit(&iter);
1704 +out:
1705 +       if (last)
1706 +               blkdev_put(last, FMODE_READ);
1707 +       return next;
1708 +}
1709 +EXPORT_SYMBOL_GPL(next_bdev_of_type);
1710 diff --git a/block/uuid.c b/block/uuid.c
1711 new file mode 100644
1712 index 0000000..37fd8e4
1713 --- /dev/null
1714 +++ b/block/uuid.c
1715 @@ -0,0 +1,492 @@
1716 +#include <linux/blkdev.h>
1717 +#include <linux/ctype.h>
1718 +#include <linux/fs_uuid.h>
1719 +#include <linux/slab.h>
1720 +
1721 +static int debug_enabled;
1722 +
1723 +#define PRINTK(fmt, args...) do {                                      \
1724 +       if (debug_enabled)                                              \
1725 +               printk(KERN_DEBUG fmt, ## args);                        \
1726 +       } while(0)
1727 +
1728 +#define PRINT_HEX_DUMP(v1, v2, v3, v4, v5, v6, v7, v8)                 \
1729 +       do {                                                            \
1730 +               if (debug_enabled)                                      \
1731 +                       print_hex_dump(v1, v2, v3, v4, v5, v6, v7, v8); \
1732 +       } while(0)
1733 +
1734 +/*
1735 + * Simple UUID translation
1736 + */
1737 +
1738 +struct uuid_info {
1739 +       const char *key;
1740 +       const char *name;
1741 +       long bkoff;
1742 +       unsigned sboff;
1743 +       unsigned sig_len;
1744 +       const char *magic;
1745 +       int uuid_offset;
1746 +       int last_mount_offset;
1747 +       int last_mount_size;
1748 +};
1749 +
1750 +/*
1751 + * Based on libuuid's blkid_magic array. Note that I don't
1752 + * have uuid offsets for all of these yet - mssing ones are 0x0.
1753 + * Further information welcome.
1754 + *
1755 + * Rearranged by page of fs signature for optimisation.
1756 + */
1757 +static struct uuid_info uuid_list[] = {
1758 + { NULL, "oracleasm", 0, 32, 8, "ORCLDISK", 0x0, 0, 0 },
1759 + { "ntfs", "ntfs", 0, 3, 8, "NTFS    ", 0x0, 0, 0 },
1760 + { "vfat", "vfat", 0, 0x52, 5, "MSWIN", 0x0, 0, 0 },
1761 + { "vfat", "vfat", 0, 0x52, 8, "FAT32   ", 0x0, 0, 0 },
1762 + { "vfat", "vfat", 0, 0x36, 5, "MSDOS", 0x0, 0, 0 },
1763 + { "vfat", "vfat", 0, 0x36, 8, "FAT16   ", 0x0, 0, 0 },
1764 + { "vfat", "vfat", 0, 0x36, 8, "FAT12   ", 0x0, 0, 0 },
1765 + { "vfat", "vfat", 0, 0, 1, "\353", 0x0, 0, 0 },
1766 + { "vfat", "vfat", 0, 0, 1, "\351", 0x0, 0, 0 },
1767 + { "vfat", "vfat", 0, 0x1fe, 2, "\125\252", 0x0, 0, 0 },
1768 + { "xfs", "xfs", 0, 0, 4, "XFSB", 0x14, 0, 0 },
1769 + { "romfs", "romfs", 0, 0, 8, "-rom1fs-", 0x0, 0, 0 },
1770 + { "bfs", "bfs", 0, 0, 4, "\316\372\173\033", 0, 0, 0 },
1771 + { "cramfs", "cramfs", 0, 0, 4, "E=\315\050", 0x0, 0, 0 },
1772 + { "qnx4", "qnx4", 0, 4, 6, "QNX4FS", 0, 0, 0 },
1773 + { NULL, "crypt_LUKS", 0, 0, 6, "LUKS\xba\xbe", 0x0, 0, 0 },
1774 + { "squashfs", "squashfs", 0, 0, 4, "sqsh", 0, 0, 0 },
1775 + { "squashfs", "squashfs", 0, 0, 4, "hsqs", 0, 0, 0 },
1776 + { "ocfs", "ocfs", 0, 8, 9, "OracleCFS", 0x0, 0, 0 },
1777 + { "lvm2pv", "lvm2pv", 0, 0x018, 8, "LVM2 001", 0x0, 0, 0 },
1778 + { "sysv", "sysv", 0, 0x3f8, 4, "\020~\030\375", 0, 0, 0 },
1779 + { "ext", "ext", 1, 0x38, 2, "\123\357", 0x468, 0x42c, 4 },
1780 + { "minix", "minix", 1, 0x10, 2, "\177\023", 0, 0, 0 },
1781 + { "minix", "minix", 1, 0x10, 2, "\217\023", 0, 0, 0 },
1782 + { "minix", "minix", 1, 0x10, 2, "\150\044", 0, 0, 0 },
1783 + { "minix", "minix", 1, 0x10, 2, "\170\044", 0, 0, 0 },
1784 + { "lvm2pv", "lvm2pv", 1, 0x018, 8, "LVM2 001", 0x0, 0, 0 },
1785 + { "vxfs", "vxfs", 1, 0, 4, "\365\374\001\245", 0, 0, 0 },
1786 + { "hfsplus", "hfsplus", 1, 0, 2, "BD", 0x0, 0, 0 },
1787 + { "hfsplus", "hfsplus", 1, 0, 2, "H+", 0x0, 0, 0 },
1788 + { "hfsplus", "hfsplus", 1, 0, 2, "HX", 0x0, 0, 0 },
1789 + { "hfs", "hfs", 1, 0, 2, "BD", 0x0, 0, 0 },
1790 + { "ocfs2", "ocfs2", 1, 0, 6, "OCFSV2", 0x0, 0, 0 },
1791 + { "lvm2pv", "lvm2pv", 0, 0x218, 8, "LVM2 001", 0x0, 0, 0 },
1792 + { "lvm2pv", "lvm2pv", 1, 0x218, 8, "LVM2 001", 0x0, 0, 0 },
1793 + { "ocfs2", "ocfs2", 2, 0, 6, "OCFSV2", 0x0, 0, 0 },
1794 + { "swap", "swap", 0, 0xff6, 10, "SWAP-SPACE", 0x40c, 0, 0 },
1795 + { "swap", "swap", 0, 0xff6, 10, "SWAPSPACE2", 0x40c, 0, 0 },
1796 + { "swap", "swsuspend", 0, 0xff6, 9, "S1SUSPEND", 0x40c, 0, 0 },
1797 + { "swap", "swsuspend", 0, 0xff6, 9, "S2SUSPEND", 0x40c, 0, 0 },
1798 + { "swap", "swsuspend", 0, 0xff6, 9, "ULSUSPEND", 0x40c, 0, 0 },
1799 + { "ocfs2", "ocfs2", 4, 0, 6, "OCFSV2", 0x0, 0, 0 },
1800 + { "ocfs2", "ocfs2", 8, 0, 6, "OCFSV2", 0x0, 0, 0 },
1801 + { "hpfs", "hpfs", 8, 0, 4, "I\350\225\371", 0, 0, 0 },
1802 + { "reiserfs", "reiserfs", 8, 0x34, 8, "ReIsErFs", 0x10054, 0, 0 },
1803 + { "reiserfs", "reiserfs", 8, 20, 8, "ReIsErFs", 0x10054, 0, 0 },
1804 + { "zfs", "zfs", 8, 0, 8, "\0\0\x02\xf5\xb0\x07\xb1\x0c", 0x0, 0, 0 },
1805 + { "zfs", "zfs", 8, 0, 8, "\x0c\xb1\x07\xb0\xf5\x02\0\0", 0x0, 0, 0 },
1806 + { "ufs", "ufs", 8, 0x55c, 4, "T\031\001\000", 0, 0, 0 },
1807 + { "swap", "swap", 0, 0x1ff6, 10, "SWAP-SPACE", 0x40c, 0, 0 },
1808 + { "swap", "swap", 0, 0x1ff6, 10, "SWAPSPACE2", 0x40c, 0, 0 },
1809 + { "swap", "swsuspend", 0, 0x1ff6, 9, "S1SUSPEND", 0x40c, 0, 0 },
1810 + { "swap", "swsuspend", 0, 0x1ff6, 9, "S2SUSPEND", 0x40c, 0, 0 },
1811 + { "swap", "swsuspend", 0, 0x1ff6, 9, "ULSUSPEND", 0x40c, 0, 0 },
1812 + { "reiserfs", "reiserfs", 64, 0x34, 9, "ReIsEr2Fs", 0x10054, 0, 0 },
1813 + { "reiserfs", "reiserfs", 64, 0x34, 9, "ReIsEr3Fs", 0x10054, 0, 0 },
1814 + { "reiserfs", "reiserfs", 64, 0x34, 8, "ReIsErFs", 0x10054, 0, 0 },
1815 + { "reiser4", "reiser4", 64, 0, 7, "ReIsEr4", 0x100544, 0, 0 },
1816 + { "gfs2", "gfs2", 64, 0, 4, "\x01\x16\x19\x70", 0x0, 0, 0 },
1817 + { "gfs", "gfs", 64, 0, 4, "\x01\x16\x19\x70", 0x0, 0, 0 },
1818 + { "btrfs", "btrfs", 64, 0x40, 8, "_BHRfS_M", 0x0, 0, 0 },
1819 + { "swap", "swap", 0, 0x3ff6, 10, "SWAP-SPACE", 0x40c, 0, 0 },
1820 + { "swap", "swap", 0, 0x3ff6, 10, "SWAPSPACE2", 0x40c, 0, 0 },
1821 + { "swap", "swsuspend", 0, 0x3ff6, 9, "S1SUSPEND", 0x40c, 0, 0 },
1822 + { "swap", "swsuspend", 0, 0x3ff6, 9, "S2SUSPEND", 0x40c, 0, 0 },
1823 + { "swap", "swsuspend", 0, 0x3ff6, 9, "ULSUSPEND", 0x40c, 0, 0 },
1824 + { "udf", "udf", 32, 1, 5, "BEA01", 0x0, 0, 0 },
1825 + { "udf", "udf", 32, 1, 5, "BOOT2", 0x0, 0, 0 },
1826 + { "udf", "udf", 32, 1, 5, "CD001", 0x0, 0, 0 },
1827 + { "udf", "udf", 32, 1, 5, "CDW02", 0x0, 0, 0 },
1828 + { "udf", "udf", 32, 1, 5, "NSR02", 0x0, 0, 0 },
1829 + { "udf", "udf", 32, 1, 5, "NSR03", 0x0, 0, 0 },
1830 + { "udf", "udf", 32, 1, 5, "TEA01", 0x0, 0, 0 },
1831 + { "iso9660", "iso9660", 32, 1, 5, "CD001", 0x0, 0, 0 },
1832 + { "iso9660", "iso9660", 32, 9, 5, "CDROM", 0x0, 0, 0 },
1833 + { "jfs", "jfs", 32, 0, 4, "JFS1", 0x88, 0, 0 },
1834 + { "swap", "swap", 0, 0x7ff6, 10, "SWAP-SPACE", 0x40c, 0, 0 },
1835 + { "swap", "swap", 0, 0x7ff6, 10, "SWAPSPACE2", 0x40c, 0, 0 },
1836 + { "swap", "swsuspend", 0, 0x7ff6, 9, "S1SUSPEND", 0x40c, 0, 0 },
1837 + { "swap", "swsuspend", 0, 0x7ff6, 9, "S2SUSPEND", 0x40c, 0, 0 },
1838 + { "swap", "swsuspend", 0, 0x7ff6, 9, "ULSUSPEND", 0x40c, 0, 0 },
1839 + { "swap", "swap", 0, 0xfff6, 10, "SWAP-SPACE", 0x40c, 0, 0 },
1840 + { "swap", "swap", 0, 0xfff6, 10, "SWAPSPACE2", 0x40c, 0, 0 },
1841 + { "swap", "swsuspend", 0, 0xfff6, 9, "S1SUSPEND", 0x40c, 0, 0 },
1842 + { "swap", "swsuspend", 0, 0xfff6, 9, "S2SUSPEND", 0x40c, 0, 0 },
1843 + { "swap", "swsuspend", 0, 0xfff6, 9, "ULSUSPEND", 0x40c, 0, 0 },
1844 + { "zfs", "zfs", 264, 0, 8, "\0\0\x02\xf5\xb0\x07\xb1\x0c", 0x0, 0, 0 },
1845 + { "zfs", "zfs", 264, 0, 8, "\x0c\xb1\x07\xb0\xf5\x02\0\0", 0x0, 0, 0 },
1846 + { NULL, NULL, 0, 0, 0, NULL, 0x0, 0, 0 }
1847 +};
1848 +
1849 +static int null_uuid(const char *uuid)
1850 +{
1851 +       int i;
1852 +
1853 +       for (i = 0; i < 16 && !uuid[i]; i++);
1854 +
1855 +       return (i == 16);
1856 +}
1857 +
1858 +
1859 +static void uuid_end_bio(struct bio *bio, int err)
1860 +{
1861 +       struct page *page = bio->bi_io_vec[0].bv_page;
1862 +
1863 +       if(!test_bit(BIO_UPTODATE, &bio->bi_flags))
1864 +               SetPageError(page);
1865 +
1866 +       unlock_page(page);
1867 +       bio_put(bio);
1868 +}
1869 +
1870 +
1871 +/**
1872 + * submit - submit BIO request
1873 + * @dev: The block device we're using.
1874 + * @page_num: The page we're reading.
1875 + *
1876 + * Based on Patrick Mochell's pmdisk code from long ago: "Straight from the
1877 + * textbook - allocate and initialize the bio. If we're writing, make sure
1878 + * the page is marked as dirty. Then submit it and carry on."
1879 + **/
1880 +static struct page *read_bdev_page(struct block_device *dev, int page_num)
1881 +{
1882 +       struct bio *bio = NULL;
1883 +       struct page *page = alloc_page(GFP_NOFS);
1884 +
1885 +       if (!page) {
1886 +               printk(KERN_ERR "Failed to allocate a page for reading data "
1887 +                               "in UUID checks.");
1888 +               return NULL;
1889 +       }
1890 +
1891 +       bio = bio_alloc(GFP_NOFS, 1);
1892 +       bio->bi_bdev = dev;
1893 +       bio->bi_sector = page_num << 3;
1894 +       bio->bi_end_io = uuid_end_bio;
1895 +
1896 +       PRINTK("Submitting bio on device %lx, page %d.\n",
1897 +                       (unsigned long) dev->bd_dev, page_num);
1898 +
1899 +       if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
1900 +               printk(KERN_DEBUG "ERROR: adding page to bio at %d\n",
1901 +                               page_num);
1902 +               bio_put(bio);
1903 +               __free_page(page);
1904 +               printk(KERN_DEBUG "read_bdev_page freed page %p (in error "
1905 +                               "path).\n", page);
1906 +               return NULL;
1907 +       }
1908 +
1909 +       lock_page(page);
1910 +       submit_bio(READ | (1 << BIO_RW_SYNCIO) |
1911 +                       (1 << BIO_RW_UNPLUG), bio);
1912 +
1913 +       wait_on_page_locked(page);
1914 +       if (PageError(page)) {
1915 +               __free_page(page);
1916 +               page = NULL;
1917 +       }
1918 +       return page;
1919 +}
1920 +
1921 +int bdev_matches_key(struct block_device *bdev, const char *key)
1922 +{
1923 +       unsigned char *data = NULL;
1924 +       struct page *data_page = NULL;
1925 +
1926 +       int dev_offset, pg_num, pg_off, i;
1927 +       int last_pg_num = -1;
1928 +       int result = 0;
1929 +       char buf[50];
1930 +
1931 +       if (null_uuid(key)) {
1932 +               PRINTK("Refusing to find a NULL key.\n");
1933 +               return 0;
1934 +       }
1935 +
1936 +       if (!bdev->bd_disk) {
1937 +               bdevname(bdev, buf);
1938 +               PRINTK("bdev %s has no bd_disk.\n", buf);
1939 +               return 0;
1940 +       }
1941 +
1942 +       if (!bdev->bd_disk->queue) {
1943 +               bdevname(bdev, buf);
1944 +               PRINTK("bdev %s has no queue.\n", buf);
1945 +               return 0;
1946 +       }
1947 +
1948 +       for (i = 0; uuid_list[i].name; i++) {
1949 +               struct uuid_info *dat = &uuid_list[i];
1950 +
1951 +               if (!dat->key || strcmp(dat->key, key))
1952 +                       continue;
1953 +
1954 +               dev_offset = (dat->bkoff << 10) + dat->sboff;
1955 +               pg_num = dev_offset >> 12;
1956 +               pg_off = dev_offset & 0xfff;
1957 +
1958 +               if ((((pg_num + 1) << 3) - 1) > bdev->bd_part->nr_sects >> 1)
1959 +                       continue;
1960 +
1961 +               if (pg_num != last_pg_num) {
1962 +                       if (data_page)
1963 +                               __free_page(data_page);
1964 +                       data_page = read_bdev_page(bdev, pg_num);
1965 +                       if (!data_page)
1966 +                               continue;
1967 +                       data = page_address(data_page);
1968 +               }
1969 +
1970 +               last_pg_num = pg_num;
1971 +
1972 +               if (strncmp(&data[pg_off], dat->magic, dat->sig_len))
1973 +                       continue;
1974 +
1975 +               result = 1;
1976 +               break;
1977 +       }
1978 +
1979 +       if (data_page)
1980 +               __free_page(data_page);
1981 +
1982 +       return result;
1983 +}
1984 +
1985 +/* 
1986 + * part_matches_fs_info - Does the given partition match the details given?
1987 + *
1988 + * Returns a score saying how good the match is.
1989 + * 0 = no UUID match.
1990 + * 1 = UUID but last mount time differs.
1991 + * 2 = UUID, last mount time but not dev_t
1992 + * 3 = perfect match
1993 + *
1994 + * This lets us cope elegantly with probing resulting in dev_ts changing
1995 + * from boot to boot, and with the case where a user copies a partition
1996 + * (UUID is non unique), and we need to check the last mount time of the
1997 + * correct partition.
1998 + */
1999 +int part_matches_fs_info(struct hd_struct *part, struct fs_info *seek)
2000 +{
2001 +       struct block_device *bdev;
2002 +       struct fs_info *got;
2003 +       int result = 0;
2004 +       char buf[50];
2005 +
2006 +       if (null_uuid((char *) &seek->uuid)) {
2007 +               PRINTK("Refusing to find a NULL uuid.\n");
2008 +               return 0;
2009 +       }
2010 +
2011 +       bdev = bdget(part_devt(part));
2012 +
2013 +       PRINTK("part_matches fs info considering %x.\n", part_devt(part));
2014 +
2015 +       if (blkdev_get(bdev, FMODE_READ)) {
2016 +               PRINTK("blkdev_get failed.\n");
2017 +               return 0;
2018 +       }
2019 +
2020 +       if (!bdev->bd_disk) {
2021 +               bdevname(bdev, buf);
2022 +               PRINTK("bdev %s has no bd_disk.\n", buf);
2023 +               goto out;
2024 +       }
2025 +
2026 +       if (!bdev->bd_disk->queue) {
2027 +               bdevname(bdev, buf);
2028 +               PRINTK("bdev %s has no queue.\n", buf);
2029 +               goto out;
2030 +       }
2031 +
2032 +       got = fs_info_from_block_dev(bdev);
2033 +
2034 +       if (got && !memcmp(got->uuid, seek->uuid, 16)) {
2035 +               PRINTK(" Having matching UUID.\n");
2036 +               PRINTK(" Got: LMS %d, LM %p.\n", got->last_mount_size, got->last_mount);
2037 +               PRINTK(" Seek: LMS %d, LM %p.\n", seek->last_mount_size, seek->last_mount);
2038 +               result = 1;
2039 +
2040 +               if (got->last_mount_size == seek->last_mount_size &&
2041 +                   got->last_mount && seek->last_mount &&
2042 +                   !memcmp(got->last_mount, seek->last_mount,
2043 +                           got->last_mount_size)) {
2044 +                       result = 2;
2045 +
2046 +                       PRINTK(" Matching last mount time.\n");
2047 +
2048 +                       if (part_devt(part) == seek->dev_t) {
2049 +                               result = 3;
2050 +                               PRINTK(" Matching dev_t.\n");
2051 +                       } else
2052 +                               PRINTK("Dev_ts differ (%x vs %x).\n", part_devt(part), seek->dev_t);
2053 +               }
2054 +       }
2055 +
2056 +       PRINTK(" Score for %x is %d.\n", part_devt(part), result);
2057 +       free_fs_info(got);
2058 +out:
2059 +       blkdev_put(bdev, FMODE_READ);
2060 +       return result;
2061 +}
2062 +
2063 +void free_fs_info(struct fs_info *fs_info)
2064 +{
2065 +       if (!fs_info || IS_ERR(fs_info))
2066 +               return;
2067 +
2068 +       if (fs_info->last_mount)
2069 +               kfree(fs_info->last_mount);
2070 +
2071 +       kfree(fs_info);
2072 +}
2073 +EXPORT_SYMBOL_GPL(free_fs_info);
2074 +
2075 +struct fs_info *fs_info_from_block_dev(struct block_device *bdev)
2076 +{
2077 +       unsigned char *data = NULL;
2078 +       struct page *data_page = NULL;
2079 +
2080 +       int dev_offset, pg_num, pg_off;
2081 +       int uuid_pg_num, uuid_pg_off, i;
2082 +       unsigned char *uuid_data = NULL;
2083 +       struct page *uuid_data_page = NULL;
2084 +
2085 +       int last_pg_num = -1, last_uuid_pg_num = 0;
2086 +       char buf[50];
2087 +       struct fs_info *fs_info = NULL;
2088 +
2089 +       bdevname(bdev, buf);
2090 +
2091 +       PRINTK("uuid_from_block_dev looking for partition type of %s.\n", buf);
2092 +
2093 +       for (i = 0; uuid_list[i].name; i++) {
2094 +               struct uuid_info *dat = &uuid_list[i];
2095 +               dev_offset = (dat->bkoff << 10) + dat->sboff;
2096 +               pg_num = dev_offset >> 12;
2097 +               pg_off = dev_offset & 0xfff;
2098 +               uuid_pg_num = dat->uuid_offset >> 12;
2099 +               uuid_pg_off = dat->uuid_offset & 0xfff;
2100 +
2101 +               if ((((pg_num + 1) << 3) - 1) > bdev->bd_part->nr_sects >> 1)
2102 +                       continue;
2103 +
2104 +               /* Ignore partition types with no UUID offset */
2105 +               if (!dat->uuid_offset)
2106 +                       continue;
2107 +
2108 +               if (pg_num != last_pg_num) {
2109 +                       if (data_page)
2110 +                               __free_page(data_page);
2111 +                       data_page = read_bdev_page(bdev, pg_num);
2112 +                       if (!data_page)
2113 +                               continue;
2114 +                       data = page_address(data_page);
2115 +               }
2116 +
2117 +               last_pg_num = pg_num;
2118 +
2119 +               if (strncmp(&data[pg_off], dat->magic, dat->sig_len))
2120 +                       continue;
2121 +
2122 +               PRINTK("This partition looks like %s.\n", dat->name);
2123 +
2124 +               fs_info = kzalloc(sizeof(struct fs_info), GFP_KERNEL);
2125 +
2126 +               if (!fs_info) {
2127 +                       PRINTK("Failed to allocate fs_info struct.");
2128 +                       fs_info = ERR_PTR(-ENOMEM);
2129 +                       break;
2130 +               }
2131 +
2132 +               /* UUID can't be off the end of the disk */
2133 +               if ((uuid_pg_num > bdev->bd_part->nr_sects >> 3) ||
2134 +                               !dat->uuid_offset)
2135 +                       goto no_uuid;
2136 +
2137 +               if (!uuid_data || uuid_pg_num != last_uuid_pg_num) {
2138 +                       if (uuid_data_page)
2139 +                               __free_page(uuid_data_page);
2140 +                       uuid_data_page = read_bdev_page(bdev, uuid_pg_num);
2141 +                       if (!uuid_data_page)
2142 +                               continue;
2143 +                       uuid_data = page_address(uuid_data_page);
2144 +               }
2145 +
2146 +               last_uuid_pg_num = uuid_pg_num;
2147 +               memcpy(&fs_info->uuid, &uuid_data[uuid_pg_off], 16);
2148 +               fs_info->dev_t = bdev->bd_dev;
2149 +
2150 +no_uuid:
2151 +               PRINT_HEX_DUMP(KERN_EMERG, "fs_info_from_block_dev "
2152 +                               "returning uuid ", DUMP_PREFIX_NONE, 16, 1,
2153 +                               fs_info->uuid, 16, 0);
2154 +
2155 +               if (dat->last_mount_size) {
2156 +                       int pg = dat->last_mount_offset >> 12, sz;
2157 +                       int off = dat->last_mount_offset & 0xfff;
2158 +                       struct page *last_mount = read_bdev_page(bdev, pg);
2159 +                       unsigned char *last_mount_data;
2160 +                       char *ptr;
2161 +
2162 +                       if (!last_mount) {
2163 +                               fs_info = ERR_PTR(-ENOMEM);
2164 +                               break;
2165 +                       }
2166 +                       last_mount_data = page_address(last_mount);
2167 +                       sz = dat->last_mount_size;
2168 +                       ptr = kmalloc(sz, GFP_KERNEL);
2169 +
2170 +                       if (!ptr) {
2171 +                               printk(KERN_EMERG "fs_info_from_block_dev "
2172 +                                       "failed to get memory for last mount "
2173 +                                       "timestamp.");
2174 +                               free_fs_info(fs_info);
2175 +                               fs_info = ERR_PTR(-ENOMEM);
2176 +                       } else {
2177 +                               fs_info->last_mount = ptr;
2178 +                               fs_info->last_mount_size = sz;
2179 +                               memcpy(ptr, &last_mount_data[off], sz);
2180 +                       }
2181 +
2182 +                       __free_page(last_mount);
2183 +               }
2184 +               break;
2185 +       }
2186 +
2187 +       if (data_page)
2188 +               __free_page(data_page);
2189 +
2190 +       if (uuid_data_page)
2191 +               __free_page(uuid_data_page);
2192 +
2193 +       return fs_info;
2194 +}
2195 +EXPORT_SYMBOL_GPL(fs_info_from_block_dev);
2196 +
2197 +static int __init uuid_debug_setup(char *str)
2198 +{
2199 +       int value;
2200 +
2201 +       if (sscanf(str, "=%d", &value))
2202 +               debug_enabled = value;
2203 +
2204 +       return 1;
2205 +}
2206 +
2207 +__setup("uuid_debug", uuid_debug_setup);
2208 diff --git a/crypto/Kconfig b/crypto/Kconfig
2209 index 9d9434f..b5911be 100644
2210 --- a/crypto/Kconfig
2211 +++ b/crypto/Kconfig
2212 @@ -816,6 +816,13 @@ config CRYPTO_LZO
2213         help
2214           This is the LZO algorithm.
2215  
2216 +config CRYPTO_LZF
2217 +       tristate "LZF compression algorithm"
2218 +       select CRYPTO_ALGAPI
2219 +       help
2220 +         This is the LZF algorithm. It is especially useful for TuxOnIce,
2221 +         because it achieves good compression quickly.
2222 +
2223  comment "Random Number Generation"
2224  
2225  config CRYPTO_ANSI_CPRNG
2226 diff --git a/crypto/Makefile b/crypto/Makefile
2227 index d7e6441..76b9a9e 100644
2228 --- a/crypto/Makefile
2229 +++ b/crypto/Makefile
2230 @@ -78,6 +78,7 @@ obj-$(CONFIG_CRYPTO_DEFLATE) += deflate.o
2231  obj-$(CONFIG_CRYPTO_ZLIB) += zlib.o
2232  obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o
2233  obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o
2234 +obj-$(CONFIG_CRYPTO_LZF) += lzf.o
2235  obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o
2236  obj-$(CONFIG_CRYPTO_LZO) += lzo.o
2237  obj-$(CONFIG_CRYPTO_RNG2) += rng.o
2238 diff --git a/crypto/lzf.c b/crypto/lzf.c
2239 new file mode 100644
2240 index 0000000..ccaf83a
2241 --- /dev/null
2242 +++ b/crypto/lzf.c
2243 @@ -0,0 +1,326 @@
2244 +/*
2245 + * Cryptoapi LZF compression module.
2246 + *
2247 + * Copyright (c) 2004-2008 Nigel Cunningham <nigel at tuxonice net>
2248 + *
2249 + * based on the deflate.c file:
2250 + *
2251 + * Copyright (c) 2003 James Morris <jmorris@intercode.com.au>
2252 + *
2253 + * and upon the LZF compression module donated to the TuxOnIce project with
2254 + * the following copyright:
2255 + *
2256 + * This program is free software; you can redistribute it and/or modify it
2257 + * under the terms of the GNU General Public License as published by the Free
2258 + * Software Foundation; either version 2 of the License, or (at your option)
2259 + * any later version.
2260 + * Copyright (c) 2000-2003 Marc Alexander Lehmann <pcg@goof.com>
2261 + *
2262 + * Redistribution and use in source and binary forms, with or without modifica-
2263 + * tion, are permitted provided that the following conditions are met:
2264 + *
2265 + *   1.  Redistributions of source code must retain the above copyright notice,
2266 + *       this list of conditions and the following disclaimer.
2267 + *
2268 + *   2.  Redistributions in binary form must reproduce the above copyright
2269 + *       notice, this list of conditions and the following disclaimer in the
2270 + *       documentation and/or other materials provided with the distribution.
2271 + *
2272 + *   3.  The name of the author may not be used to endorse or promote products
2273 + *       derived from this software without specific prior written permission.
2274 + *
2275 + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
2276 + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
2277 + * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
2278 + * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
2279 + * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
2280 + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
2281 + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
2282 + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
2283 + * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
2284 + * OF THE POSSIBILITY OF SUCH DAMAGE.
2285 + *
2286 + * Alternatively, the contents of this file may be used under the terms of
2287 + * the GNU General Public License version 2 (the "GPL"), in which case the
2288 + * provisions of the GPL are applicable instead of the above. If you wish to
2289 + * allow the use of your version of this file only under the terms of the
2290 + * GPL and not to allow others to use your version of this file under the
2291 + * BSD license, indicate your decision by deleting the provisions above and
2292 + * replace them with the notice and other provisions required by the GPL. If
2293 + * you do not delete the provisions above, a recipient may use your version
2294 + * of this file under either the BSD or the GPL.
2295 + */
2296 +
2297 +#include <linux/kernel.h>
2298 +#include <linux/module.h>
2299 +#include <linux/init.h>
2300 +#include <linux/module.h>
2301 +#include <linux/crypto.h>
2302 +#include <linux/err.h>
2303 +#include <linux/vmalloc.h>
2304 +#include <linux/string.h>
2305 +
2306 +struct lzf_ctx {
2307 +       void *hbuf;
2308 +       unsigned int bufofs;
2309 +};
2310 +
2311 +/*
2312 + * size of hashtable is (1 << hlog) * sizeof (char *)
2313 + * decompression is independent of the hash table size
2314 + * the difference between 15 and 14 is very small
2315 + * for small blocks (and 14 is also faster).
2316 + * For a low-memory configuration, use hlog == 13;
2317 + * For best compression, use 15 or 16.
2318 + */
2319 +static const int hlog = 13;
2320 +
2321 +/*
2322 + * don't play with this unless you benchmark!
2323 + * decompression is not dependent on the hash function
2324 + * the hashing function might seem strange, just believe me
2325 + * it works ;)
2326 + */
2327 +static inline u16 first(const u8 *p)
2328 +{
2329 +       return ((p[0]) << 8) + p[1];
2330 +}
2331 +
2332 +static inline u16 next(u8 v, const u8 *p)
2333 +{
2334 +       return ((v) << 8) + p[2];
2335 +}
2336 +
2337 +static inline u32 idx(unsigned int h)
2338 +{
2339 +       return (((h ^ (h << 5)) >> (3*8 - hlog)) + h*3) & ((1 << hlog) - 1);
2340 +}
2341 +
2342 +/*
2343 + * IDX works because it is very similar to a multiplicative hash, e.g.
2344 + * (h * 57321 >> (3*8 - hlog))
2345 + * the next one is also quite good, albeit slow ;)
2346 + * (int)(cos(h & 0xffffff) * 1e6)
2347 + */
2348 +
2349 +static const int max_lit = (1 <<  5);
2350 +static const int max_off = (1 << 13);
2351 +static const int max_ref = ((1 <<  8) + (1 << 3));
2352 +
2353 +/*
2354 + * compressed format
2355 + *
2356 + * 000LLLLL <L+1>    ; literal
2357 + * LLLOOOOO oooooooo ; backref L
2358 + * 111OOOOO LLLLLLLL oooooooo ; backref L+7
2359 + *
2360 + */
2361 +
2362 +static void lzf_compress_exit(struct crypto_tfm *tfm)
2363 +{
2364 +       struct lzf_ctx *ctx = crypto_tfm_ctx(tfm);
2365 +
2366 +       if (!ctx->hbuf)
2367 +               return;
2368 +
2369 +       vfree(ctx->hbuf);
2370 +       ctx->hbuf = NULL;
2371 +}
2372 +
2373 +static int lzf_compress_init(struct crypto_tfm *tfm)
2374 +{
2375 +       struct lzf_ctx *ctx = crypto_tfm_ctx(tfm);
2376 +
2377 +       /* Get LZF ready to go */
2378 +       ctx->hbuf = vmalloc_32((1 << hlog) * sizeof(char *));
2379 +       if (ctx->hbuf)
2380 +               return 0;
2381 +
2382 +       printk(KERN_WARNING "Failed to allocate %ld bytes for lzf workspace\n",
2383 +                       (long) ((1 << hlog) * sizeof(char *)));
2384 +       return -ENOMEM;
2385 +}
2386 +
2387 +static int lzf_compress(struct crypto_tfm *tfm, const u8 *in_data,
2388 +               unsigned int in_len, u8 *out_data, unsigned int *out_len)
2389 +{
2390 +       struct lzf_ctx *ctx = crypto_tfm_ctx(tfm);
2391 +       const u8 **htab = ctx->hbuf;
2392 +       const u8 **hslot;
2393 +       const u8 *ip = in_data;
2394 +       u8 *op = out_data;
2395 +       const u8 *in_end = ip + in_len;
2396 +       u8 *out_end = op + *out_len - 3;
2397 +       const u8 *ref;
2398 +
2399 +       unsigned int hval = first(ip);
2400 +       unsigned long off;
2401 +       int lit = 0;
2402 +
2403 +       memset(htab, 0, sizeof(htab));
2404 +
2405 +       for (;;) {
2406 +               if (ip < in_end - 2) {
2407 +                       hval = next(hval, ip);
2408 +                       hslot = htab + idx(hval);
2409 +                       ref = *hslot;
2410 +                       *hslot = ip;
2411 +
2412 +                       off = ip - ref - 1;
2413 +                       if (off < max_off
2414 +                           && ip + 4 < in_end && ref > in_data
2415 +                           && *(u16 *) ref == *(u16 *) ip && ref[2] == ip[2]
2416 +                           ) {
2417 +                               /* match found at *ref++ */
2418 +                               unsigned int len = 2;
2419 +                               unsigned int maxlen = in_end - ip - len;
2420 +                               maxlen = maxlen > max_ref ? max_ref : maxlen;
2421 +
2422 +                               do {
2423 +                                       len++;
2424 +                               } while (len < maxlen && ref[len] == ip[len]);
2425 +
2426 +                               if (op + lit + 1 + 3 >= out_end) {
2427 +                                       *out_len = PAGE_SIZE;
2428 +                                       return 0;
2429 +                               }
2430 +
2431 +                               if (lit) {
2432 +                                       *op++ = lit - 1;
2433 +                                       lit = -lit;
2434 +                                       do {
2435 +                                               *op++ = ip[lit];
2436 +                                       } while (++lit);
2437 +                               }
2438 +
2439 +                               len -= 2;
2440 +                               ip++;
2441 +
2442 +                               if (len < 7) {
2443 +                                       *op++ = (off >> 8) + (len << 5);
2444 +                               } else {
2445 +                                       *op++ = (off >> 8) + (7 << 5);
2446 +                                       *op++ = len - 7;
2447 +                               }
2448 +
2449 +                               *op++ = off;
2450 +
2451 +                               ip += len;
2452 +                               hval = first(ip);
2453 +                               hval = next(hval, ip);
2454 +                               htab[idx(hval)] = ip;
2455 +                               ip++;
2456 +                               continue;
2457 +                       }
2458 +               } else if (ip == in_end)
2459 +                       break;
2460 +
2461 +               /* one more literal byte we must copy */
2462 +               lit++;
2463 +               ip++;
2464 +
2465 +               if (lit == max_lit) {
2466 +                       if (op + 1 + max_lit >= out_end) {
2467 +                               *out_len = PAGE_SIZE;
2468 +                               return 0;
2469 +                       }
2470 +
2471 +                       *op++ = max_lit - 1;
2472 +                       memcpy(op, ip - max_lit, max_lit);
2473 +                       op += max_lit;
2474 +                       lit = 0;
2475 +               }
2476 +       }
2477 +
2478 +       if (lit) {
2479 +               if (op + lit + 1 >= out_end) {
2480 +                       *out_len = PAGE_SIZE;
2481 +                       return 0;
2482 +               }
2483 +
2484 +               *op++ = lit - 1;
2485 +               lit = -lit;
2486 +               do {
2487 +                       *op++ = ip[lit];
2488 +               } while (++lit);
2489 +       }
2490 +
2491 +       *out_len = op - out_data;
2492 +       return 0;
2493 +}
2494 +
2495 +static int lzf_decompress(struct crypto_tfm *tfm, const u8 *src,
2496 +               unsigned int slen, u8 *dst, unsigned int *dlen)
2497 +{
2498 +       u8 const *ip = src;
2499 +       u8 *op = dst;
2500 +       u8 const *const in_end = ip + slen;
2501 +       u8 *const out_end = op + *dlen;
2502 +
2503 +       *dlen = PAGE_SIZE;
2504 +       do {
2505 +               unsigned int ctrl = *ip++;
2506 +
2507 +               if (ctrl < (1 << 5)) {
2508 +                       /* literal run */
2509 +                       ctrl++;
2510 +
2511 +                       if (op + ctrl > out_end)
2512 +                               return 0;
2513 +                       memcpy(op, ip, ctrl);
2514 +                       op += ctrl;
2515 +                       ip += ctrl;
2516 +               } else {        /* back reference */
2517 +
2518 +                       unsigned int len = ctrl >> 5;
2519 +
2520 +                       u8 *ref = op - ((ctrl & 0x1f) << 8) - 1;
2521 +
2522 +                       if (len == 7)
2523 +                               len += *ip++;
2524 +
2525 +                       ref -= *ip++;
2526 +                       len += 2;
2527 +
2528 +                       if (op + len > out_end || ref < (u8 *) dst)
2529 +                               return 0;
2530 +
2531 +                       do {
2532 +                               *op++ = *ref++;
2533 +                       } while (--len);
2534 +               }
2535 +       } while (op < out_end && ip < in_end);
2536 +
2537 +       *dlen = op - (u8 *) dst;
2538 +       return 0;
2539 +}
2540 +
2541 +static struct crypto_alg alg = {
2542 +       .cra_name = "lzf",
2543 +       .cra_flags = CRYPTO_ALG_TYPE_COMPRESS,
2544 +       .cra_ctxsize = sizeof(struct lzf_ctx),
2545 +       .cra_module = THIS_MODULE,
2546 +       .cra_list = LIST_HEAD_INIT(alg.cra_list),
2547 +       .cra_init = lzf_compress_init,
2548 +       .cra_exit = lzf_compress_exit,
2549 +       .cra_u = { .compress = {
2550 +       .coa_compress = lzf_compress,
2551 +       .coa_decompress = lzf_decompress } }
2552 +};
2553 +
2554 +static int __init init(void)
2555 +{
2556 +       return crypto_register_alg(&alg);
2557 +}
2558 +
2559 +static void __exit fini(void)
2560 +{
2561 +       crypto_unregister_alg(&alg);
2562 +}
2563 +
2564 +module_init(init);
2565 +module_exit(fini);
2566 +
2567 +MODULE_LICENSE("GPL");
2568 +MODULE_DESCRIPTION("LZF Compression Algorithm");
2569 +MODULE_AUTHOR("Marc Alexander Lehmann & Nigel Cunningham");
2570 diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
2571 index 941fcb8..1a2a2e7 100644
2572 --- a/drivers/base/power/main.c
2573 +++ b/drivers/base/power/main.c
2574 @@ -69,6 +69,7 @@ void device_pm_lock(void)
2575  {
2576         mutex_lock(&dpm_list_mtx);
2577  }
2578 +EXPORT_SYMBOL_GPL(device_pm_lock);
2579  
2580  /**
2581   * device_pm_unlock - Unlock the list of active devices used by the PM core.
2582 @@ -77,6 +78,7 @@ void device_pm_unlock(void)
2583  {
2584         mutex_unlock(&dpm_list_mtx);
2585  }
2586 +EXPORT_SYMBOL_GPL(device_pm_unlock);
2587  
2588  /**
2589   * device_pm_add - Add a device to the PM core's list of active devices.
2590 diff --git a/drivers/char/vt.c b/drivers/char/vt.c
2591 index 7cdb6ee..f114914 100644
2592 --- a/drivers/char/vt.c
2593 +++ b/drivers/char/vt.c
2594 @@ -2461,6 +2461,7 @@ int vt_kmsg_redirect(int new)
2595         else
2596                 return kmsg_con;
2597  }
2598 +EXPORT_SYMBOL_GPL(vt_kmsg_redirect);
2599  
2600  /*
2601   *     Console on virtual terminal
2602 diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
2603 index 33dad3f..47fb186 100644
2604 --- a/drivers/gpu/drm/drm_gem.c
2605 +++ b/drivers/gpu/drm/drm_gem.c
2606 @@ -133,7 +133,8 @@ int drm_gem_object_init(struct drm_device *dev,
2607         BUG_ON((size & (PAGE_SIZE - 1)) != 0);
2608  
2609         obj->dev = dev;
2610 -       obj->filp = shmem_file_setup("drm mm object", size, VM_NORESERVE);
2611 +       obj->filp = shmem_file_setup("drm mm object", size,
2612 +                       VM_NORESERVE | VM_ATOMIC_COPY);
2613         if (IS_ERR(obj->filp))
2614                 return -ENOMEM;
2615  
2616 diff --git a/drivers/md/md.c b/drivers/md/md.c
2617 index 46b3a04..883513f 100644
2618 --- a/drivers/md/md.c
2619 +++ b/drivers/md/md.c
2620 @@ -6602,6 +6602,9 @@ void md_do_sync(mddev_t *mddev)
2621                 mddev->curr_resync = 2;
2622  
2623         try_again:
2624 +               while (freezer_is_on())
2625 +                       yield();
2626 +
2627                 if (kthread_should_stop())
2628                         set_bit(MD_RECOVERY_INTR, &mddev->recovery);
2629  
2630 @@ -6624,6 +6627,10 @@ void md_do_sync(mddev_t *mddev)
2631                                          * time 'round when curr_resync == 2
2632                                          */
2633                                         continue;
2634 +
2635 +                               while (freezer_is_on())
2636 +                                       yield();
2637 +
2638                                 /* We need to wait 'interruptible' so as not to
2639                                  * contribute to the load average, and not to
2640                                  * be caught by 'softlockup'
2641 @@ -6636,6 +6643,7 @@ void md_do_sync(mddev_t *mddev)
2642                                                " share one or more physical units)\n",
2643                                                desc, mdname(mddev), mdname(mddev2));
2644                                         mddev_put(mddev2);
2645 +                                       try_to_freeze();
2646                                         if (signal_pending(current))
2647                                                 flush_signals(current);
2648                                         schedule();
2649 @@ -6745,6 +6753,9 @@ void md_do_sync(mddev_t *mddev)
2650                                                  || kthread_should_stop());
2651                 }
2652  
2653 +               while (freezer_is_on())
2654 +                       yield();
2655 +
2656                 if (kthread_should_stop())
2657                         goto interrupted;
2658  
2659 @@ -6789,6 +6800,9 @@ void md_do_sync(mddev_t *mddev)
2660                         last_mark = next;
2661                 }
2662  
2663 +               while (freezer_is_on())
2664 +                       yield();
2665 +
2666  
2667                 if (kthread_should_stop())
2668                         goto interrupted;
2669 diff --git a/fs/block_dev.c b/fs/block_dev.c
2670 index 99d6af8..f270494 100644
2671 --- a/fs/block_dev.c
2672 +++ b/fs/block_dev.c
2673 @@ -295,6 +295,93 @@ out:
2674  }
2675  EXPORT_SYMBOL(thaw_bdev);
2676  
2677 +#ifdef CONFIG_FS_FREEZER_DEBUG
2678 +#define FS_PRINTK(fmt, args...) printk(fmt, ## args)
2679 +#else
2680 +#define FS_PRINTK(fmt, args...)
2681 +#endif
2682 +
2683 +/* #define DEBUG_FS_FREEZING */
2684 +
2685 +/**
2686 + * freeze_filesystems - lock all filesystems and force them into a consistent
2687 + * state
2688 + * @which:     What combination of fuse & non-fuse to freeze.
2689 + */
2690 +void freeze_filesystems(int which)
2691 +{
2692 +       struct super_block *sb;
2693 +
2694 +       lockdep_off();
2695 +
2696 +       /*
2697 +        * Freeze in reverse order so filesystems dependant upon others are
2698 +        * frozen in the right order (eg. loopback on ext3).
2699 +        */
2700 +       list_for_each_entry_reverse(sb, &super_blocks, s_list) {
2701 +               FS_PRINTK(KERN_INFO "Considering %s.%s: (root %p, bdev %x)",
2702 +                       sb->s_type->name ? sb->s_type->name : "?",
2703 +                       sb->s_subtype ? sb->s_subtype : "", sb->s_root,
2704 +                       sb->s_bdev ? sb->s_bdev->bd_dev : 0);
2705 +
2706 +               if (sb->s_type->fs_flags & FS_IS_FUSE &&
2707 +                   sb->s_frozen == SB_UNFROZEN &&
2708 +                   which & FS_FREEZER_FUSE) {
2709 +                       sb->s_frozen = SB_FREEZE_TRANS;
2710 +                       sb->s_flags |= MS_FROZEN;
2711 +                       FS_PRINTK("Fuse filesystem done.\n");
2712 +                       continue;
2713 +               }
2714 +
2715 +               if (!sb->s_root || !sb->s_bdev ||
2716 +                   (sb->s_frozen == SB_FREEZE_TRANS) ||
2717 +                   (sb->s_flags & MS_RDONLY) ||
2718 +                   (sb->s_flags & MS_FROZEN) ||
2719 +                   !(which & FS_FREEZER_NORMAL)) {
2720 +                       FS_PRINTK(KERN_INFO "Nope.\n");
2721 +                       continue;
2722 +               }
2723 +
2724 +               FS_PRINTK(KERN_INFO "Freezing %x... ", sb->s_bdev->bd_dev);
2725 +               freeze_bdev(sb->s_bdev);
2726 +               sb->s_flags |= MS_FROZEN;
2727 +               FS_PRINTK(KERN_INFO "Done.\n");
2728 +       }
2729 +
2730 +       lockdep_on();
2731 +}
2732 +
2733 +/**
2734 + * thaw_filesystems - unlock all filesystems
2735 + * @which:     What combination of fuse & non-fuse to thaw.
2736 + */
2737 +void thaw_filesystems(int which)
2738 +{
2739 +       struct super_block *sb;
2740 +
2741 +       lockdep_off();
2742 +
2743 +       list_for_each_entry(sb, &super_blocks, s_list) {
2744 +               if (!(sb->s_flags & MS_FROZEN))
2745 +                       continue;
2746 +
2747 +               if (sb->s_type->fs_flags & FS_IS_FUSE) {
2748 +                       if (!(which & FS_FREEZER_FUSE))
2749 +                               continue;
2750 +
2751 +                       sb->s_frozen = SB_UNFROZEN;
2752 +               } else {
2753 +                       if (!(which & FS_FREEZER_NORMAL))
2754 +                               continue;
2755 +
2756 +                       thaw_bdev(sb->s_bdev, sb);
2757 +               }
2758 +               sb->s_flags &= ~MS_FROZEN;
2759 +       }
2760 +
2761 +       lockdep_on();
2762 +}
2763 +
2764  static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
2765  {
2766         return block_write_full_page(page, blkdev_get_block, wbc);
2767 diff --git a/fs/drop_caches.c b/fs/drop_caches.c
2768 index 83c4f60..8f7ec03 100644
2769 --- a/fs/drop_caches.c
2770 +++ b/fs/drop_caches.c
2771 @@ -8,6 +8,7 @@
2772  #include <linux/writeback.h>
2773  #include <linux/sysctl.h>
2774  #include <linux/gfp.h>
2775 +#include <linux/module.h>
2776  
2777  /* A global variable is a bit ugly, but it keeps the code simple */
2778  int sysctl_drop_caches;
2779 @@ -42,6 +43,13 @@ static void drop_slab(void)
2780         } while (nr_objects > 10);
2781  }
2782  
2783 +/* For TuxOnIce */
2784 +void drop_pagecache(void)
2785 +{
2786 +       iterate_supers(drop_pagecache_sb, NULL);
2787 +}
2788 +EXPORT_SYMBOL_GPL(drop_pagecache);
2789 +
2790  int drop_caches_sysctl_handler(ctl_table *table, int write,
2791         void __user *buffer, size_t *length, loff_t *ppos)
2792  {
2793 diff --git a/fs/fuse/control.c b/fs/fuse/control.c
2794 index 3773fd6..6272b60 100644
2795 --- a/fs/fuse/control.c
2796 +++ b/fs/fuse/control.c
2797 @@ -341,6 +341,7 @@ static void fuse_ctl_kill_sb(struct super_block *sb)
2798  static struct file_system_type fuse_ctl_fs_type = {
2799         .owner          = THIS_MODULE,
2800         .name           = "fusectl",
2801 +       .fs_flags       = FS_IS_FUSE,
2802         .get_sb         = fuse_ctl_get_sb,
2803         .kill_sb        = fuse_ctl_kill_sb,
2804  };
2805 diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
2806 index 9424796..25c6277 100644
2807 --- a/fs/fuse/dev.c
2808 +++ b/fs/fuse/dev.c
2809 @@ -7,6 +7,7 @@
2810  */
2811  
2812  #include "fuse_i.h"
2813 +#include "fuse.h"
2814  
2815  #include <linux/init.h>
2816  #include <linux/module.h>
2817 @@ -16,6 +17,7 @@
2818  #include <linux/pagemap.h>
2819  #include <linux/file.h>
2820  #include <linux/slab.h>
2821 +#include <linux/freezer.h>
2822  #include <linux/pipe_fs_i.h>
2823  #include <linux/swap.h>
2824  #include <linux/splice.h>
2825 @@ -961,6 +963,8 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file,
2826         struct fuse_in *in;
2827         unsigned reqsize;
2828  
2829 +       FUSE_MIGHT_FREEZE(file->f_mapping->host->i_sb, "fuse_dev_read");
2830 +
2831   restart:
2832         spin_lock(&fc->lock);
2833         err = -EAGAIN;
2834 @@ -1395,6 +1399,9 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
2835         if (!fc)
2836                 return -EPERM;
2837  
2838 +       FUSE_MIGHT_FREEZE(iocb->ki_filp->f_mapping->host->i_sb,
2839 +                       "fuse_dev_write");
2840 +
2841         fuse_copy_init(&cs, fc, 0, iov, nr_segs);
2842  
2843         return fuse_dev_do_write(fc, &cs, iov_length(iov, nr_segs));
2844 diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
2845 index 3cdc5f7..725cb5a 100644
2846 --- a/fs/fuse/dir.c
2847 +++ b/fs/fuse/dir.c
2848 @@ -7,12 +7,14 @@
2849  */
2850  
2851  #include "fuse_i.h"
2852 +#include "fuse.h"
2853  
2854  #include <linux/pagemap.h>
2855  #include <linux/file.h>
2856  #include <linux/gfp.h>
2857  #include <linux/sched.h>
2858  #include <linux/namei.h>
2859 +#include <linux/freezer.h>
2860  
2861  #if BITS_PER_LONG >= 64
2862  static inline void fuse_dentry_settime(struct dentry *entry, u64 time)
2863 @@ -174,6 +176,9 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
2864                         return 0;
2865  
2866                 fc = get_fuse_conn(inode);
2867 +
2868 +               FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_dentry_revalidate");
2869 +
2870                 req = fuse_get_req(fc);
2871                 if (IS_ERR(req))
2872                         return 0;
2873 @@ -268,6 +273,8 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
2874         if (name->len > FUSE_NAME_MAX)
2875                 goto out;
2876  
2877 +       FUSE_MIGHT_FREEZE(sb, "fuse_lookup_name");
2878 +
2879         req = fuse_get_req(fc);
2880         err = PTR_ERR(req);
2881         if (IS_ERR(req))
2882 @@ -331,6 +338,8 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
2883         if (err)
2884                 goto out_err;
2885  
2886 +       FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_lookup");
2887 +
2888         err = -EIO;
2889         if (inode && get_node_id(inode) == FUSE_ROOT_ID)
2890                 goto out_iput;
2891 @@ -392,6 +401,8 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
2892         if (IS_ERR(forget_req))
2893                 return PTR_ERR(forget_req);
2894  
2895 +       FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_create_open");
2896 +
2897         req = fuse_get_req(fc);
2898         err = PTR_ERR(req);
2899         if (IS_ERR(req))
2900 @@ -485,6 +496,8 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
2901         int err;
2902         struct fuse_req *forget_req;
2903  
2904 +       FUSE_MIGHT_FREEZE(dir->i_sb, "create_new_entry");
2905 +
2906         forget_req = fuse_get_req(fc);
2907         if (IS_ERR(forget_req)) {
2908                 fuse_put_request(fc, req);
2909 @@ -587,7 +600,11 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, int mode)
2910  {
2911         struct fuse_mkdir_in inarg;
2912         struct fuse_conn *fc = get_fuse_conn(dir);
2913 -       struct fuse_req *req = fuse_get_req(fc);
2914 +       struct fuse_req *req;
2915 +
2916 +       FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_mkdir");
2917 +
2918 +       req = fuse_get_req(fc);
2919         if (IS_ERR(req))
2920                 return PTR_ERR(req);
2921  
2922 @@ -611,7 +628,11 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry,
2923  {
2924         struct fuse_conn *fc = get_fuse_conn(dir);
2925         unsigned len = strlen(link) + 1;
2926 -       struct fuse_req *req = fuse_get_req(fc);
2927 +       struct fuse_req *req;
2928 +
2929 +       FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_symlink");
2930 +
2931 +       req = fuse_get_req(fc);
2932         if (IS_ERR(req))
2933                 return PTR_ERR(req);
2934  
2935 @@ -628,7 +649,11 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
2936  {
2937         int err;
2938         struct fuse_conn *fc = get_fuse_conn(dir);
2939 -       struct fuse_req *req = fuse_get_req(fc);
2940 +       struct fuse_req *req;
2941 +
2942 +       FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_unlink");
2943 +
2944 +       req = fuse_get_req(fc);
2945         if (IS_ERR(req))
2946                 return PTR_ERR(req);
2947  
2948 @@ -661,7 +686,11 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
2949  {
2950         int err;
2951         struct fuse_conn *fc = get_fuse_conn(dir);
2952 -       struct fuse_req *req = fuse_get_req(fc);
2953 +       struct fuse_req *req;
2954 +
2955 +       FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_rmdir");
2956 +
2957 +       req = fuse_get_req(fc);
2958         if (IS_ERR(req))
2959                 return PTR_ERR(req);
2960  
2961 diff --git a/fs/fuse/file.c b/fs/fuse/file.c
2962 index ada0ade..ca89e06 100644
2963 --- a/fs/fuse/file.c
2964 +++ b/fs/fuse/file.c
2965 @@ -7,11 +7,13 @@
2966  */
2967  
2968  #include "fuse_i.h"
2969 +#include "fuse.h"
2970  
2971  #include <linux/pagemap.h>
2972  #include <linux/slab.h>
2973  #include <linux/kernel.h>
2974  #include <linux/sched.h>
2975 +#include <linux/freezer.h>
2976  #include <linux/module.h>
2977  
2978  static const struct file_operations fuse_direct_io_file_operations;
2979 @@ -109,6 +111,8 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
2980         int err;
2981         int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
2982  
2983 +       FUSE_MIGHT_FREEZE(file->f_path.dentry->d_inode->i_sb, "fuse_send_open");
2984 +
2985         ff = fuse_file_alloc(fc);
2986         if (!ff)
2987                 return -ENOMEM;
2988 @@ -316,6 +320,8 @@ static int fuse_flush(struct file *file, fl_owner_t id)
2989         if (fc->no_flush)
2990                 return 0;
2991  
2992 +       FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_flush");
2993 +
2994         req = fuse_get_req_nofail(fc, file);
2995         memset(&inarg, 0, sizeof(inarg));
2996         inarg.fh = ff->fh;
2997 @@ -366,6 +372,8 @@ int fuse_fsync_common(struct file *file, int datasync, int isdir)
2998         if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
2999                 return 0;
3000  
3001 +       FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_fsync_common");
3002 +
3003         /*
3004          * Start writeback against all dirty pages of the inode, then
3005          * wait for all outstanding writes, before sending the FSYNC
3006 @@ -473,6 +481,8 @@ static int fuse_readpage(struct file *file, struct page *page)
3007         if (is_bad_inode(inode))
3008                 goto out;
3009  
3010 +       FUSE_MIGHT_FREEZE(file->f_mapping->host->i_sb, "fuse_readpage");
3011 +
3012         /*
3013          * Page writeback can extend beyond the liftime of the
3014          * page-cache page, so make sure we read a properly synced
3015 @@ -586,6 +596,9 @@ static int fuse_readpages_fill(void *_data, struct page *page)
3016         struct inode *inode = data->inode;
3017         struct fuse_conn *fc = get_fuse_conn(inode);
3018  
3019 +       FUSE_MIGHT_FREEZE(data->file->f_mapping->host->i_sb,
3020 +                       "fuse_readpages_fill");
3021 +
3022         fuse_wait_on_page_writeback(inode, page->index);
3023  
3024         if (req->num_pages &&
3025 @@ -617,6 +630,8 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
3026         if (is_bad_inode(inode))
3027                 goto out;
3028  
3029 +       FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_readpages");
3030 +
3031         data.file = file;
3032         data.inode = inode;
3033         data.req = fuse_get_req(fc);
3034 @@ -730,6 +745,8 @@ static int fuse_buffered_write(struct file *file, struct inode *inode,
3035         if (is_bad_inode(inode))
3036                 return -EIO;
3037  
3038 +       FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_buffered_write");
3039 +
3040         /*
3041          * Make sure writepages on the same page are not mixed up with
3042          * plain writes.
3043 @@ -889,6 +906,8 @@ static ssize_t fuse_perform_write(struct file *file,
3044                 struct fuse_req *req;
3045                 ssize_t count;
3046  
3047 +               FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_perform_write");
3048 +
3049                 req = fuse_get_req(fc);
3050                 if (IS_ERR(req)) {
3051                         err = PTR_ERR(req);
3052 @@ -1033,6 +1052,8 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf,
3053         ssize_t res = 0;
3054         struct fuse_req *req;
3055  
3056 +       FUSE_MIGHT_FREEZE(file->f_mapping->host->i_sb, "fuse_direct_io");
3057 +
3058         req = fuse_get_req(fc);
3059         if (IS_ERR(req))
3060                 return PTR_ERR(req);
3061 @@ -1420,6 +1441,8 @@ static int fuse_getlk(struct file *file, struct file_lock *fl)
3062         struct fuse_lk_out outarg;
3063         int err;
3064  
3065 +       FUSE_MIGHT_FREEZE(file->f_mapping->host->i_sb, "fuse_getlk");
3066 +
3067         req = fuse_get_req(fc);
3068         if (IS_ERR(req))
3069                 return PTR_ERR(req);
3070 @@ -1455,6 +1478,8 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
3071         if (fl->fl_flags & FL_CLOSE)
3072                 return 0;
3073  
3074 +       FUSE_MIGHT_FREEZE(file->f_mapping->host->i_sb, "fuse_setlk");
3075 +
3076         req = fuse_get_req(fc);
3077         if (IS_ERR(req))
3078                 return PTR_ERR(req);
3079 @@ -1521,6 +1546,8 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
3080         if (!inode->i_sb->s_bdev || fc->no_bmap)
3081                 return 0;
3082  
3083 +       FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_bmap");
3084 +
3085         req = fuse_get_req(fc);
3086         if (IS_ERR(req))
3087                 return 0;
3088 diff --git a/fs/fuse/fuse.h b/fs/fuse/fuse.h
3089 new file mode 100644
3090 index 0000000..170e49a
3091 --- /dev/null
3092 +++ b/fs/fuse/fuse.h
3093 @@ -0,0 +1,13 @@
3094 +#define FUSE_MIGHT_FREEZE(superblock, desc) \
3095 +do { \
3096 +       int printed = 0; \
3097 +       while (superblock->s_frozen != SB_UNFROZEN) { \
3098 +               if (!printed) { \
3099 +                       printk(KERN_INFO "%d frozen in " desc ".\n", \
3100 +                                               current->pid); \
3101 +                       printed = 1; \
3102 +               } \
3103 +               try_to_freeze(); \
3104 +               yield(); \
3105 +       } \
3106 +} while (0)
3107 diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
3108 index ec14d19..2a82a08 100644
3109 --- a/fs/fuse/inode.c
3110 +++ b/fs/fuse/inode.c
3111 @@ -1062,7 +1062,7 @@ static void fuse_kill_sb_anon(struct super_block *sb)
3112  static struct file_system_type fuse_fs_type = {
3113         .owner          = THIS_MODULE,
3114         .name           = "fuse",
3115 -       .fs_flags       = FS_HAS_SUBTYPE,
3116 +       .fs_flags       = FS_HAS_SUBTYPE | FS_IS_FUSE,
3117         .get_sb         = fuse_get_sb,
3118         .kill_sb        = fuse_kill_sb_anon,
3119  };
3120 @@ -1094,7 +1094,7 @@ static struct file_system_type fuseblk_fs_type = {
3121         .name           = "fuseblk",
3122         .get_sb         = fuse_get_sb_blk,
3123         .kill_sb        = fuse_kill_sb_blk,
3124 -       .fs_flags       = FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
3125 +       .fs_flags       = FS_REQUIRES_DEV | FS_HAS_SUBTYPE | FS_IS_FUSE,
3126  };
3127  
3128  static inline int register_fuseblk(void)
3129 diff --git a/fs/namei.c b/fs/namei.c
3130 index 868d0cb..325b6cf 100644
3131 --- a/fs/namei.c
3132 +++ b/fs/namei.c
3133 @@ -2256,6 +2256,8 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
3134         if (!dir->i_op->unlink)
3135                 return -EPERM;
3136  
3137 +       vfs_check_frozen(dir->i_sb, SB_FREEZE_WRITE);
3138 +
3139         mutex_lock(&dentry->d_inode->i_mutex);
3140         if (d_mountpoint(dentry))
3141                 error = -EBUSY;
3142 diff --git a/fs/super.c b/fs/super.c
3143 index 5c35bc7..7c2e6e8 100644
3144 --- a/fs/super.c
3145 +++ b/fs/super.c
3146 @@ -34,6 +34,8 @@
3147  
3148  
3149  LIST_HEAD(super_blocks);
3150 +EXPORT_SYMBOL_GPL(super_blocks);
3151 +
3152  DEFINE_SPINLOCK(sb_lock);
3153  
3154  /**
3155 diff --git a/include/linux/Kbuild b/include/linux/Kbuild
3156 index 2fc8e14..709a571 100644
3157 --- a/include/linux/Kbuild
3158 +++ b/include/linux/Kbuild
3159 @@ -216,6 +216,7 @@ unifdef-y += filter.h
3160  unifdef-y += flat.h
3161  unifdef-y += futex.h
3162  unifdef-y += fs.h
3163 +unifdef-y += freezer.h
3164  unifdef-y += gameport.h
3165  unifdef-y += generic_serial.h
3166  unifdef-y += hdlcdrv.h
3167 diff --git a/include/linux/bio.h b/include/linux/bio.h
3168 index 7fc5606..07e9b97 100644
3169 --- a/include/linux/bio.h
3170 +++ b/include/linux/bio.h
3171 @@ -175,8 +175,11 @@ enum bio_rw_flags {
3172         BIO_RW_META,
3173         BIO_RW_DISCARD,
3174         BIO_RW_NOIDLE,
3175 +       BIO_RW_TUXONICE,
3176  };
3177  
3178 +extern int trap_non_toi_io;
3179 +
3180  /*
3181   * First four bits must match between bio->bi_rw and rq->cmd_flags, make
3182   * that explicit here.
3183 diff --git a/include/linux/freezer.h b/include/linux/freezer.h
3184 index da7e52b..a45b332 100644
3185 --- a/include/linux/freezer.h
3186 +++ b/include/linux/freezer.h
3187 @@ -124,6 +124,19 @@ static inline void set_freezable(void)
3188         current->flags &= ~PF_NOFREEZE;
3189  }
3190  
3191 +extern int freezer_state;
3192 +#define FREEZER_OFF 0
3193 +#define FREEZER_FILESYSTEMS_FROZEN 1
3194 +#define FREEZER_USERSPACE_FROZEN 2
3195 +#define FREEZER_FULLY_ON 3
3196 +
3197 +static inline int freezer_is_on(void)
3198 +{
3199 +       return freezer_state == FREEZER_FULLY_ON;
3200 +}
3201 +
3202 +extern void thaw_kernel_threads(void);
3203 +
3204  /*
3205   * Tell the freezer that the current task should be frozen by it and that it
3206   * should send a fake signal to the task to freeze it.
3207 @@ -175,6 +188,8 @@ static inline int freeze_processes(void) { BUG(); return 0; }
3208  static inline void thaw_processes(void) {}
3209  
3210  static inline int try_to_freeze(void) { return 0; }
3211 +static inline int freezer_is_on(void) { return 0; }
3212 +static inline void thaw_kernel_threads(void) { }
3213  
3214  static inline void freezer_do_not_count(void) {}
3215  static inline void freezer_count(void) {}
3216 diff --git a/include/linux/fs.h b/include/linux/fs.h
3217 index 471e1ff..63da27b 100644
3218 --- a/include/linux/fs.h
3219 +++ b/include/linux/fs.h
3220 @@ -176,6 +176,7 @@ struct inodes_stat_t {
3221  #define FS_REQUIRES_DEV 1 
3222  #define FS_BINARY_MOUNTDATA 2
3223  #define FS_HAS_SUBTYPE 4
3224 +#define FS_IS_FUSE     8       /* Fuse filesystem - bdev freeze these too */
3225  #define FS_REVAL_DOT   16384   /* Check the paths ".", ".." for staleness */
3226  #define FS_RENAME_DOES_D_MOVE  32768   /* FS will handle d_move()
3227                                          * during rename() internally.
3228 @@ -209,6 +210,7 @@ struct inodes_stat_t {
3229  #define MS_KERNMOUNT   (1<<22) /* this is a kern_mount call */
3230  #define MS_I_VERSION   (1<<23) /* Update inode I_version field */
3231  #define MS_STRICTATIME (1<<24) /* Always perform atime updates */
3232 +#define MS_FROZEN      (1<<25) /* Frozen by freeze_filesystems() */
3233  #define MS_BORN                (1<<29)
3234  #define MS_ACTIVE      (1<<30)
3235  #define MS_NOUSER      (1<<31)
3236 @@ -235,6 +237,8 @@ struct inodes_stat_t {
3237  #define S_NOCMTIME     128     /* Do not update file c/mtime */
3238  #define S_SWAPFILE     256     /* Do not truncate: swapon got its bmaps */
3239  #define S_PRIVATE      512     /* Inode is fs-internal */
3240 +#define S_ATOMIC_COPY  1024    /* Pages mapped with this inode need to be
3241 +                                  atomically copied (gem) */
3242  
3243  /*
3244   * Note that nosuid etc flags are inode-specific: setting some file-system
3245 @@ -382,6 +386,7 @@ struct inodes_stat_t {
3246  #include <linux/capability.h>
3247  #include <linux/semaphore.h>
3248  #include <linux/fiemap.h>
3249 +#include <linux/freezer.h>
3250  
3251  #include <asm/atomic.h>
3252  #include <asm/byteorder.h>
3253 @@ -1395,8 +1400,11 @@ enum {
3254         SB_FREEZE_TRANS = 2,
3255  };
3256  
3257 -#define vfs_check_frozen(sb, level) \
3258 -       wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level)))
3259 +#define vfs_check_frozen(sb, level) do { \
3260 +       freezer_do_not_count(); \
3261 +       wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level))); \
3262 +       freezer_count(); \
3263 +} while (0)
3264  
3265  #define get_fs_excl() atomic_inc(&current->fs_excl)
3266  #define put_fs_excl() atomic_dec(&current->fs_excl)
3267 @@ -1954,6 +1962,13 @@ extern struct super_block *freeze_bdev(struct block_device *);
3268  extern void emergency_thaw_all(void);
3269  extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
3270  extern int fsync_bdev(struct block_device *);
3271 +extern int fsync_super(struct super_block *);
3272 +extern int fsync_no_super(struct block_device *);
3273 +#define FS_FREEZER_FUSE 1
3274 +#define FS_FREEZER_NORMAL 2
3275 +#define FS_FREEZER_ALL (FS_FREEZER_FUSE | FS_FREEZER_NORMAL)
3276 +void freeze_filesystems(int which);
3277 +void thaw_filesystems(int which);
3278  #else
3279  static inline void bd_forget(struct inode *inode) {}
3280  static inline int sync_blockdev(struct block_device *bdev) { return 0; }
3281 diff --git a/include/linux/fs_uuid.h b/include/linux/fs_uuid.h
3282 new file mode 100644
3283 index 0000000..3234135
3284 --- /dev/null
3285 +++ b/include/linux/fs_uuid.h
3286 @@ -0,0 +1,19 @@
3287 +#include <linux/device.h>
3288 +
3289 +struct hd_struct;
3290 +struct block_device;
3291 +
3292 +struct fs_info {
3293 +       char uuid[16];
3294 +       dev_t dev_t;
3295 +       char *last_mount;
3296 +       int last_mount_size;
3297 +};
3298 +
3299 +int part_matches_fs_info(struct hd_struct *part, struct fs_info *seek);
3300 +dev_t blk_lookup_fs_info(struct fs_info *seek);
3301 +struct fs_info *fs_info_from_block_dev(struct block_device *bdev);
3302 +void free_fs_info(struct fs_info *fs_info);
3303 +int bdev_matches_key(struct block_device *bdev, const char *key);
3304 +struct block_device *next_bdev_of_type(struct block_device *last,
3305 +       const char *key);
3306 diff --git a/include/linux/mm.h b/include/linux/mm.h
3307 index b969efb..1e63042 100644
3308 --- a/include/linux/mm.h
3309 +++ b/include/linux/mm.h
3310 @@ -98,6 +98,7 @@ extern unsigned int kobjsize(const void *objp);
3311  #define VM_HUGETLB     0x00400000      /* Huge TLB Page VM */
3312  #define VM_NONLINEAR   0x00800000      /* Is non-linear (remap_file_pages) */
3313  #define VM_MAPPED_COPY 0x01000000      /* T if mapped copy of data (nommu mmap) */
3314 +#define VM_ATOMIC_COPY 0x01000000      /* TOI should do atomic copy (mmu) */
3315  #define VM_INSERTPAGE  0x02000000      /* The vma has had "vm_insert_page()" done on it */
3316  #define VM_ALWAYSDUMP  0x04000000      /* Always include in core dumps */
3317  
3318 @@ -1424,6 +1425,7 @@ int drop_caches_sysctl_handler(struct ctl_table *, int,
3319                                         void __user *, size_t *, loff_t *);
3320  unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
3321                         unsigned long lru_pages);
3322 +void drop_pagecache(void);
3323  
3324  #ifndef CONFIG_MMU
3325  #define randomize_va_space 0
3326 diff --git a/include/linux/netlink.h b/include/linux/netlink.h
3327 index 59d0669..5efa8e0 100644
3328 --- a/include/linux/netlink.h
3329 +++ b/include/linux/netlink.h
3330 @@ -24,6 +24,8 @@
3331  /* leave room for NETLINK_DM (DM Events) */
3332  #define NETLINK_SCSITRANSPORT  18      /* SCSI Transports */
3333  #define NETLINK_ECRYPTFS       19
3334 +#define NETLINK_TOI_USERUI     20      /* TuxOnIce's userui */
3335 +#define NETLINK_TOI_USM                21      /* Userspace storage manager */
3336  
3337  #define MAX_LINKS 32           
3338  
3339 diff --git a/include/linux/suspend.h b/include/linux/suspend.h
3340 index 5e781d8..a1c07f3 100644
3341 --- a/include/linux/suspend.h
3342 +++ b/include/linux/suspend.h
3343 @@ -329,4 +329,70 @@ static inline void unlock_system_sleep(void)
3344  }
3345  #endif
3346  
3347 +enum {
3348 +       TOI_CAN_HIBERNATE,
3349 +       TOI_CAN_RESUME,
3350 +       TOI_RESUME_DEVICE_OK,
3351 +       TOI_NORESUME_SPECIFIED,
3352 +       TOI_SANITY_CHECK_PROMPT,
3353 +       TOI_CONTINUE_REQ,
3354 +       TOI_RESUMED_BEFORE,
3355 +       TOI_BOOT_TIME,
3356 +       TOI_NOW_RESUMING,
3357 +       TOI_IGNORE_LOGLEVEL,
3358 +       TOI_TRYING_TO_RESUME,
3359 +       TOI_LOADING_ALT_IMAGE,
3360 +       TOI_STOP_RESUME,
3361 +       TOI_IO_STOPPED,
3362 +       TOI_NOTIFIERS_PREPARE,
3363 +       TOI_CLUSTER_MODE,
3364 +       TOI_BOOT_KERNEL,
3365 +};
3366 +
3367 +#ifdef CONFIG_TOI
3368 +
3369 +/* Used in init dir files */
3370 +extern unsigned long toi_state;
3371 +#define set_toi_state(bit) (set_bit(bit, &toi_state))
3372 +#define clear_toi_state(bit) (clear_bit(bit, &toi_state))
3373 +#define test_toi_state(bit) (test_bit(bit, &toi_state))
3374 +extern int toi_running;
3375 +
3376 +#define test_action_state(bit) (test_bit(bit, &toi_bkd.toi_action))
3377 +extern int try_tuxonice_hibernate(void);
3378 +
3379 +#else /* !CONFIG_TOI */
3380 +
3381 +#define toi_state              (0)
3382 +#define set_toi_state(bit) do { } while (0)
3383 +#define clear_toi_state(bit) do { } while (0)
3384 +#define test_toi_state(bit) (0)
3385 +#define toi_running (0)
3386 +
3387 +static inline int try_tuxonice_hibernate(void) { return 0; }
3388 +#define test_action_state(bit) (0)
3389 +
3390 +#endif /* CONFIG_TOI */
3391 +
3392 +#ifdef CONFIG_HIBERNATION
3393 +#ifdef CONFIG_TOI
3394 +extern void try_tuxonice_resume(void);
3395 +#else
3396 +#define try_tuxonice_resume() do { } while (0)
3397 +#endif
3398 +
3399 +extern int resume_attempted;
3400 +extern int software_resume(void);
3401 +
3402 +static inline void check_resume_attempted(void)
3403 +{
3404 +       if (resume_attempted)
3405 +               return;
3406 +
3407 +       software_resume();
3408 +}
3409 +#else
3410 +#define check_resume_attempted() do { } while (0)
3411 +#define resume_attempted (0)
3412 +#endif
3413  #endif /* _LINUX_SUSPEND_H */
3414 diff --git a/include/linux/swap.h b/include/linux/swap.h
3415 index ff4acea..5aa8559 100644
3416 --- a/include/linux/swap.h
3417 +++ b/include/linux/swap.h
3418 @@ -198,6 +198,7 @@ struct swap_list_t {
3419  extern unsigned long totalram_pages;
3420  extern unsigned long totalreserve_pages;
3421  extern unsigned int nr_free_buffer_pages(void);
3422 +extern unsigned int nr_unallocated_buffer_pages(void);
3423  extern unsigned int nr_free_pagecache_pages(void);
3424  
3425  /* Definition of global_page_state not available yet */
3426 @@ -248,6 +249,8 @@ extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
3427                                                 int nid);
3428  extern int __isolate_lru_page(struct page *page, int mode, int file);
3429  extern unsigned long shrink_all_memory(unsigned long nr_pages);
3430 +extern unsigned long shrink_memory_mask(unsigned long nr_to_reclaim,
3431 +               gfp_t mask);
3432  extern int vm_swappiness;
3433  extern int remove_mapping(struct address_space *mapping, struct page *page);
3434  extern long vm_total_pages;
3435 @@ -327,8 +330,10 @@ extern void swapcache_free(swp_entry_t, struct page *page);
3436  extern int free_swap_and_cache(swp_entry_t);
3437  extern int swap_type_of(dev_t, sector_t, struct block_device **);
3438  extern unsigned int count_swap_pages(int, int);
3439 +extern sector_t map_swap_entry(swp_entry_t entry, struct block_device **);
3440  extern sector_t map_swap_page(struct page *, struct block_device **);
3441  extern sector_t swapdev_block(int, pgoff_t);
3442 +extern struct swap_info_struct *get_swap_info_struct(unsigned);
3443  extern int reuse_swap_page(struct page *);
3444  extern int try_to_free_swap(struct page *);
3445  struct backing_dev_info;
3446 diff --git a/init/do_mounts.c b/init/do_mounts.c
3447 index 02e3ca4..5af8c3e 100644
3448 --- a/init/do_mounts.c
3449 +++ b/init/do_mounts.c
3450 @@ -144,6 +144,7 @@ fail:
3451  done:
3452         return res;
3453  }
3454 +EXPORT_SYMBOL_GPL(name_to_dev_t);
3455  
3456  static int __init root_dev_setup(char *line)
3457  {
3458 @@ -414,6 +415,8 @@ void __init prepare_namespace(void)
3459         if (is_floppy && rd_doload && rd_load_disk(0))
3460                 ROOT_DEV = Root_RAM0;
3461  
3462 +       check_resume_attempted();
3463 +
3464         mount_root();
3465  out:
3466         devtmpfs_mount("dev");
3467 diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
3468 index 2b10853..ec3e087 100644
3469 --- a/init/do_mounts_initrd.c
3470 +++ b/init/do_mounts_initrd.c
3471 @@ -6,6 +6,7 @@
3472  #include <linux/romfs_fs.h>
3473  #include <linux/initrd.h>
3474  #include <linux/sched.h>
3475 +#include <linux/suspend.h>
3476  #include <linux/freezer.h>
3477  
3478  #include "do_mounts.h"
3479 @@ -64,6 +65,11 @@ static void __init handle_initrd(void)
3480  
3481         current->flags &= ~PF_FREEZER_SKIP;
3482  
3483 +       if (!resume_attempted)
3484 +               printk(KERN_ERR "TuxOnIce: No attempt was made to resume from "
3485 +                               "any image that might exist.\n");
3486 +       clear_toi_state(TOI_BOOT_TIME);
3487 +
3488         /* move initrd to rootfs' /old */
3489         sys_fchdir(old_fd);
3490         sys_mount("/", ".", NULL, MS_MOVE, NULL);
3491 diff --git a/init/main.c b/init/main.c
3492 index 3bdb152..f74eb5b 100644
3493 --- a/init/main.c
3494 +++ b/init/main.c
3495 @@ -117,6 +117,7 @@ extern void softirq_init(void);
3496  char __initdata boot_command_line[COMMAND_LINE_SIZE];
3497  /* Untouched saved command line (eg. for /proc) */
3498  char *saved_command_line;
3499 +EXPORT_SYMBOL_GPL(saved_command_line);
3500  /* Command line for parameter parsing */
3501  static char *static_command_line;
3502  
3503 diff --git a/kernel/cpu.c b/kernel/cpu.c
3504 index 97d1b42..b6e21bb 100644
3505 --- a/kernel/cpu.c
3506 +++ b/kernel/cpu.c
3507 @@ -428,6 +428,7 @@ int disable_nonboot_cpus(void)
3508         cpu_maps_update_done();
3509         return error;
3510  }
3511 +EXPORT_SYMBOL_GPL(disable_nonboot_cpus);
3512  
3513  void __weak arch_enable_nonboot_cpus_begin(void)
3514  {
3515 @@ -466,6 +467,7 @@ void __ref enable_nonboot_cpus(void)
3516  out:
3517         cpu_maps_update_done();
3518  }
3519 +EXPORT_SYMBOL_GPL(enable_nonboot_cpus);
3520  
3521  static int alloc_frozen_cpus(void)
3522  {
3523 diff --git a/kernel/kmod.c b/kernel/kmod.c
3524 index 6e9b196..19247e0 100644
3525 --- a/kernel/kmod.c
3526 +++ b/kernel/kmod.c
3527 @@ -290,6 +290,7 @@ int usermodehelper_disable(void)
3528         usermodehelper_disabled = 0;
3529         return -EAGAIN;
3530  }
3531 +EXPORT_SYMBOL_GPL(usermodehelper_disable);
3532  
3533  /**
3534   * usermodehelper_enable - allow new helpers to be started again
3535 @@ -298,6 +299,7 @@ void usermodehelper_enable(void)
3536  {
3537         usermodehelper_disabled = 0;
3538  }
3539 +EXPORT_SYMBOL_GPL(usermodehelper_enable);
3540  
3541  static void helper_lock(void)
3542  {
3543 diff --git a/kernel/pid.c b/kernel/pid.c
3544 index e9fd8c1..32d2697 100644
3545 --- a/kernel/pid.c
3546 +++ b/kernel/pid.c
3547 @@ -384,6 +384,7 @@ struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
3548  {
3549         return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID);
3550  }
3551 +EXPORT_SYMBOL_GPL(find_task_by_pid_ns);
3552  
3553  struct task_struct *find_task_by_vpid(pid_t vnr)
3554  {
3555 diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
3556 index 5c36ea9..cd32677 100644
3557 --- a/kernel/power/Kconfig
3558 +++ b/kernel/power/Kconfig
3559 @@ -47,6 +47,13 @@ config CAN_PM_TRACE
3560         def_bool y
3561         depends on PM_DEBUG && PM_SLEEP && EXPERIMENTAL
3562  
3563 +config FS_FREEZER_DEBUG
3564 +       bool "Filesystem freezer debugging"
3565 +       depends on PM_DEBUG
3566 +       default n
3567 +       ---help---
3568 +       This option enables debugging of the filesystem freezing code.
3569 +
3570  config PM_TRACE
3571         bool
3572         help
3573 @@ -197,6 +204,238 @@ config PM_STD_PARTITION
3574           suspended image to. It will simply pick the first available swap 
3575           device.
3576  
3577 +menuconfig TOI_CORE
3578 +       tristate "Enhanced Hibernation (TuxOnIce)"
3579 +       depends on HIBERNATION
3580 +       default y
3581 +       ---help---
3582 +         TuxOnIce is the 'new and improved' suspend support.
3583 +
3584 +         See the TuxOnIce home page (tuxonice.net)
3585 +         for FAQs, HOWTOs and other documentation.
3586 +
3587 +       comment "Image Storage (you need at least one allocator)"
3588 +               depends on TOI_CORE
3589 +
3590 +       config TOI_FILE
3591 +               tristate "File Allocator"
3592 +               depends on TOI_CORE
3593 +               default y
3594 +               ---help---
3595 +                 This option enables support for storing an image in a
3596 +                 simple file. You might want this if your swap is
3597 +                 sometimes full enough that you don't have enough spare
3598 +                 space to store an image.
3599 +
3600 +       config TOI_SWAP
3601 +               tristate "Swap Allocator"
3602 +               depends on TOI_CORE && SWAP
3603 +               default y
3604 +               ---help---
3605 +                 This option enables support for storing an image in your
3606 +                 swap space.
3607 +
3608 +       comment "General Options"
3609 +               depends on TOI_CORE
3610 +
3611 +       config TOI_CRYPTO
3612 +               tristate "Compression support"
3613 +               depends on TOI_CORE && CRYPTO
3614 +               default y
3615 +               ---help---
3616 +                 This option adds support for using cryptoapi compression
3617 +                 algorithms. Compression is particularly useful as it can
3618 +                 more than double your suspend and resume speed (depending
3619 +                 upon how well your image compresses).
3620 +
3621 +                 You probably want this, so say Y here.
3622 +
3623 +       comment "No compression support available without Cryptoapi support."
3624 +               depends on TOI_CORE && !CRYPTO
3625 +
3626 +       config TOI_USERUI
3627 +               tristate "Userspace User Interface support"
3628 +               depends on TOI_CORE && NET && (VT || SERIAL_CONSOLE)
3629 +               default y
3630 +               ---help---
3631 +                 This option enabled support for a userspace based user interface
3632 +                 to TuxOnIce, which allows you to have a nice display while suspending
3633 +                 and resuming, and also enables features such as pressing escape to
3634 +                 cancel a cycle or interactive debugging.
3635 +
3636 +       config TOI_USERUI_DEFAULT_PATH
3637 +               string "Default userui program location"
3638 +               default "/usr/local/sbin/tuxoniceui_text"
3639 +               depends on TOI_USERUI
3640 +               ---help---
3641 +                 This entry allows you to specify a default path to the userui binary.
3642 +
3643 +       config TOI_KEEP_IMAGE
3644 +               bool "Allow Keep Image Mode"
3645 +               depends on TOI_CORE
3646 +               ---help---
3647 +                 This option allows you to keep and image and reuse it. It is intended
3648 +                 __ONLY__ for use with systems where all filesystems are mounted read-
3649 +                 only (kiosks, for example). To use it, compile this option in and boot
3650 +                 normally. Set the KEEP_IMAGE flag in /sys/power/tuxonice and suspend.
3651 +                 When you resume, the image will not be removed. You will be unable to turn
3652 +                 off swap partitions (assuming you are using the swap allocator), but future
3653 +                 suspends simply do a power-down. The image can be updated using the
3654 +                 kernel command line parameter suspend_act= to turn off the keep image
3655 +                 bit. Keep image mode is a little less user friendly on purpose - it
3656 +                 should not be used without thought!
3657 +
3658 +       config TOI_REPLACE_SWSUSP
3659 +               bool "Replace swsusp by default"
3660 +               default y
3661 +               depends on TOI_CORE
3662 +               ---help---
3663 +                 TuxOnIce can replace swsusp. This option makes that the default state,
3664 +                 requiring you to echo 0 > /sys/power/tuxonice/replace_swsusp if you want
3665 +                 to use the vanilla kernel functionality. Note that your initrd/ramfs will
3666 +                 need to do this before trying to resume, too.
3667 +                 With overriding swsusp enabled, echoing disk  to /sys/power/state will
3668 +                 start a TuxOnIce cycle. If resume= doesn't specify an allocator and both
3669 +                 the swap and file allocators are compiled in, the swap allocator will be
3670 +                 used by default.
3671 +
3672 +       config TOI_IGNORE_LATE_INITCALL
3673 +               bool "Wait for initrd/ramfs to run, by default"
3674 +               default n
3675 +               depends on TOI_CORE
3676 +               ---help---
3677 +                 When booting, TuxOnIce can check for an image and start to resume prior
3678 +                 to any initrd/ramfs running (via a late initcall).
3679 +
3680 +                 If you don't have an initrd/ramfs, this is what you want to happen -
3681 +                 otherwise you won't be able to safely resume. You should set this option
3682 +                 to 'No'.
3683 +
3684 +                 If, however, you want your initrd/ramfs to run anyway before resuming,
3685 +                 you need to tell TuxOnIce to ignore that earlier opportunity to resume.
3686 +                 This can be done either by using this compile time option, or by
3687 +                 overriding this option with the boot-time parameter toi_initramfs_resume_only=1.
3688 +
3689 +                 Note that if TuxOnIce can't resume at the earlier opportunity, the
3690 +                 value of this option won't matter - the initramfs/initrd (if any) will
3691 +                 run anyway.
3692 +
3693 +       menuconfig TOI_CLUSTER
3694 +               tristate "Cluster support"
3695 +               default n
3696 +               depends on TOI_CORE && NET && BROKEN
3697 +               ---help---
3698 +                 Support for linking multiple machines in a cluster so that they suspend
3699 +                 and resume together.
3700 +
3701 +       config TOI_DEFAULT_CLUSTER_INTERFACE
3702 +               string "Default cluster interface"
3703 +               depends on TOI_CLUSTER
3704 +               ---help---
3705 +                 The default interface on which to communicate with other nodes in
3706 +                 the cluster.
3707 +
3708 +                 If no value is set here, cluster support will be disabled by default.
3709 +
3710 +       config TOI_DEFAULT_CLUSTER_KEY
3711 +               string "Default cluster key"
3712 +               default "Default"
3713 +               depends on TOI_CLUSTER
3714 +               ---help---
3715 +                 The default key used by this node. All nodes in the same cluster
3716 +                 have the same key. Multiple clusters may coexist on the same lan
3717 +                 by using different values for this key.
3718 +
3719 +       config TOI_CLUSTER_IMAGE_TIMEOUT
3720 +               int "Timeout when checking for image"
3721 +               default 15
3722 +               depends on TOI_CLUSTER
3723 +               ---help---
3724 +                 Timeout (seconds) before continuing to boot when waiting to see
3725 +                 whether other nodes might have an image. Set to -1 to wait
3726 +                 indefinitely. In WAIT_UNTIL_NODES is non zero, we might continue
3727 +                 booting sooner than this timeout.
3728 +
3729 +       config TOI_CLUSTER_WAIT_UNTIL_NODES
3730 +               int "Nodes without image before continuing"
3731 +               default 0
3732 +               depends on TOI_CLUSTER
3733 +               ---help---
3734 +                 When booting and no image is found, we wait to see if other nodes
3735 +                 have an image before continuing to boot. This value lets us
3736 +                 continue after seeing a certain number of nodes without an image,
3737 +                 instead of continuing to wait for the timeout. Set to 0 to only
3738 +                 use the timeout.
3739 +
3740 +       config TOI_DEFAULT_CLUSTER_PRE_HIBERNATE
3741 +               string "Default pre-hibernate script"
3742 +               depends on TOI_CLUSTER
3743 +               ---help---
3744 +                 The default script to be called when starting to hibernate.
3745 +
3746 +       config TOI_DEFAULT_CLUSTER_POST_HIBERNATE
3747 +               string "Default post-hibernate script"
3748 +               depends on TOI_CLUSTER
3749 +               ---help---
3750 +                 The default script to be called after resuming from hibernation.
3751 +
3752 +       config TOI_DEFAULT_WAIT
3753 +               int "Default waiting time for emergency boot messages"
3754 +               default "25"
3755 +               range -1 32768
3756 +               depends on TOI_CORE
3757 +               help
3758 +                 TuxOnIce can display warnings very early in the process of resuming,
3759 +                 if (for example) it appears that you have booted a kernel that doesn't
3760 +                 match an image on disk. It can then give you the opportunity to either
3761 +                 continue booting that kernel, or reboot the machine. This option can be
3762 +                 used to control how long to wait in such circumstances. -1 means wait
3763 +                 forever. 0 means don't wait at all (do the default action, which will
3764 +                 generally be to continue booting and remove the image). Values of 1 or
3765 +                 more indicate a number of seconds (up to 255) to wait before doing the
3766 +                 default.
3767 +
3768 +       config  TOI_DEFAULT_EXTRA_PAGES_ALLOWANCE
3769 +               int "Default extra pages allowance"
3770 +               default "2000"
3771 +               range 500 32768
3772 +               depends on TOI_CORE
3773 +               help
3774 +                 This value controls the default for the allowance TuxOnIce makes for
3775 +                 drivers to allocate extra memory during the atomic copy. The default
3776 +                 value of 2000 will be okay in most cases. If you are using
3777 +                 DRI, the easiest way to find what value to use is to try to hibernate
3778 +                 and look at how many pages were actually needed in the sysfs entry
3779 +                 /sys/power/tuxonice/debug_info (first number on the last line), adding
3780 +                 a little extra because the value is not always the same.
3781 +
3782 +       config TOI_CHECKSUM
3783 +               bool "Checksum pageset2"
3784 +               default n
3785 +               depends on TOI_CORE
3786 +               select CRYPTO
3787 +               select CRYPTO_ALGAPI
3788 +               select CRYPTO_MD4
3789 +               ---help---
3790 +                 Adds support for checksumming pageset2 pages, to ensure you really get an
3791 +                 atomic copy. Since some filesystems (XFS especially) change metadata even
3792 +                 when there's no other activity, we need this to check for pages that have
3793 +                 been changed while we were saving the page cache. If your debugging output
3794 +                 always says no pages were resaved, you may be able to safely disable this
3795 +                 option.
3796 +
3797 +config TOI
3798 +       bool
3799 +       depends on TOI_CORE!=n
3800 +       default y
3801 +
3802 +config TOI_EXPORTS
3803 +       bool
3804 +       depends on TOI_SWAP=m || TOI_FILE=m || \
3805 +               TOI_CRYPTO=m || TOI_CLUSTER=m || \
3806 +               TOI_USERUI=m || TOI_CORE=m
3807 +       default y
3808 +
3809  config APM_EMULATION
3810         tristate "Advanced Power Management Emulation"
3811         depends on PM && SYS_SUPPORTS_APM_EMULATION
3812 diff --git a/kernel/power/Makefile b/kernel/power/Makefile
3813 index 524e058..3d736f4 100644
3814 --- a/kernel/power/Makefile
3815 +++ b/kernel/power/Makefile
3816 @@ -3,6 +3,35 @@ ifeq ($(CONFIG_PM_DEBUG),y)
3817  EXTRA_CFLAGS   +=      -DDEBUG
3818  endif
3819  
3820 +tuxonice_core-y := tuxonice_modules.o
3821 +
3822 +obj-$(CONFIG_TOI)              += tuxonice_builtin.o
3823 +
3824 +tuxonice_core-$(CONFIG_PM_DEBUG)       += tuxonice_alloc.o
3825 +
3826 +# Compile these in after allocation debugging, if used.
3827 +
3828 +tuxonice_core-y += tuxonice_sysfs.o tuxonice_highlevel.o \
3829 +               tuxonice_io.o tuxonice_pagedir.o tuxonice_prepare_image.o \
3830 +               tuxonice_extent.o tuxonice_pageflags.o tuxonice_ui.o \
3831 +               tuxonice_power_off.o tuxonice_atomic_copy.o
3832 +
3833 +tuxonice_core-$(CONFIG_TOI_CHECKSUM)   += tuxonice_checksum.o
3834 +
3835 +tuxonice_core-$(CONFIG_NET)    += tuxonice_storage.o tuxonice_netlink.o
3836 +
3837 +obj-$(CONFIG_TOI_CORE)         += tuxonice_core.o
3838 +obj-$(CONFIG_TOI_CRYPTO)       += tuxonice_compress.o
3839 +
3840 +tuxonice_bio-y := tuxonice_bio_core.o tuxonice_bio_chains.o \
3841 +               tuxonice_bio_signature.o
3842 +
3843 +obj-$(CONFIG_TOI_SWAP)         += tuxonice_bio.o tuxonice_swap.o
3844 +obj-$(CONFIG_TOI_FILE)         += tuxonice_bio.o tuxonice_file.o
3845 +obj-$(CONFIG_TOI_CLUSTER)      += tuxonice_cluster.o
3846 +
3847 +obj-$(CONFIG_TOI_USERUI)       += tuxonice_userui.o
3848 +
3849  obj-$(CONFIG_PM)               += main.o
3850  obj-$(CONFIG_PM_SLEEP)         += console.o
3851  obj-$(CONFIG_FREEZER)          += process.o
3852 diff --git a/kernel/power/console.c b/kernel/power/console.c
3853 index 218e5af..95a6bdc 100644
3854 --- a/kernel/power/console.c
3855 +++ b/kernel/power/console.c
3856 @@ -24,6 +24,7 @@ int pm_prepare_console(void)
3857         orig_kmsg = vt_kmsg_redirect(SUSPEND_CONSOLE);
3858         return 0;
3859  }
3860 +EXPORT_SYMBOL_GPL(pm_prepare_console);
3861  
3862  void pm_restore_console(void)
3863  {
3864 @@ -32,4 +33,5 @@ void pm_restore_console(void)
3865                 vt_kmsg_redirect(orig_kmsg);
3866         }
3867  }
3868 +EXPORT_SYMBOL_GPL(pm_restore_console);
3869  #endif
3870 diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
3871 index aa9e916..4a836b4 100644
3872 --- a/kernel/power/hibernate.c
3873 +++ b/kernel/power/hibernate.c
3874 @@ -26,11 +26,12 @@
3875  #include <scsi/scsi_scan.h>
3876  #include <asm/suspend.h>
3877  
3878 -#include "power.h"
3879 +#include "tuxonice.h"
3880  
3881  
3882  static int noresume = 0;
3883 -static char resume_file[256] = CONFIG_PM_STD_PARTITION;
3884 +char resume_file[256] = CONFIG_PM_STD_PARTITION;
3885 +EXPORT_SYMBOL_GPL(resume_file);
3886  dev_t swsusp_resume_device;
3887  sector_t swsusp_resume_block;
3888  int in_suspend __nosavedata = 0;
3889 @@ -117,55 +118,60 @@ static int hibernation_test(int level) { return 0; }
3890   *     hibernation
3891   */
3892  
3893 -static int platform_begin(int platform_mode)
3894 +int platform_begin(int platform_mode)
3895  {
3896         return (platform_mode && hibernation_ops) ?
3897                 hibernation_ops->begin() : 0;
3898  }
3899 +EXPORT_SYMBOL_GPL(platform_begin);
3900  
3901  /**
3902   *     platform_end - tell the platform driver that we've entered the
3903   *     working state
3904   */
3905  
3906 -static void platform_end(int platform_mode)
3907 +void platform_end(int platform_mode)
3908  {
3909         if (platform_mode && hibernation_ops)
3910                 hibernation_ops->end();
3911  }
3912 +EXPORT_SYMBOL_GPL(platform_end);
3913  
3914  /**
3915   *     platform_pre_snapshot - prepare the machine for hibernation using the
3916   *     platform driver if so configured and return an error code if it fails
3917   */
3918  
3919 -static int platform_pre_snapshot(int platform_mode)
3920 +int platform_pre_snapshot(int platform_mode)
3921  {
3922         return (platform_mode && hibernation_ops) ?
3923                 hibernation_ops->pre_snapshot() : 0;
3924  }
3925 +EXPORT_SYMBOL_GPL(platform_pre_snapshot);
3926  
3927  /**
3928   *     platform_leave - prepare the machine for switching to the normal mode
3929   *     of operation using the platform driver (called with interrupts disabled)
3930   */
3931  
3932 -static void platform_leave(int platform_mode)
3933 +void platform_leave(int platform_mode)
3934  {
3935         if (platform_mode && hibernation_ops)
3936                 hibernation_ops->leave();
3937  }
3938 +EXPORT_SYMBOL_GPL(platform_leave);
3939  
3940  /**
3941   *     platform_finish - switch the machine to the normal mode of operation
3942   *     using the platform driver (must be called after platform_prepare())
3943   */
3944  
3945 -static void platform_finish(int platform_mode)
3946 +void platform_finish(int platform_mode)
3947  {
3948         if (platform_mode && hibernation_ops)
3949                 hibernation_ops->finish();
3950  }
3951 +EXPORT_SYMBOL_GPL(platform_finish);
3952  
3953  /**
3954   *     platform_pre_restore - prepare the platform for the restoration from a
3955 @@ -173,11 +179,12 @@ static void platform_finish(int platform_mode)
3956   *     called, platform_restore_cleanup() must be called.
3957   */
3958  
3959 -static int platform_pre_restore(int platform_mode)
3960 +int platform_pre_restore(int platform_mode)
3961  {
3962         return (platform_mode && hibernation_ops) ?
3963                 hibernation_ops->pre_restore() : 0;
3964  }
3965 +EXPORT_SYMBOL_GPL(platform_pre_restore);
3966  
3967  /**
3968   *     platform_restore_cleanup - switch the platform to the normal mode of
3969 @@ -186,22 +193,24 @@ static int platform_pre_restore(int platform_mode)
3970   *     regardless of the result of platform_pre_restore().
3971   */
3972  
3973 -static void platform_restore_cleanup(int platform_mode)
3974 +void platform_restore_cleanup(int platform_mode)
3975  {
3976         if (platform_mode && hibernation_ops)
3977                 hibernation_ops->restore_cleanup();
3978  }
3979 +EXPORT_SYMBOL_GPL(platform_restore_cleanup);
3980  
3981  /**
3982   *     platform_recover - recover the platform from a failure to suspend
3983   *     devices.
3984   */
3985  
3986 -static void platform_recover(int platform_mode)
3987 +void platform_recover(int platform_mode)
3988  {
3989         if (platform_mode && hibernation_ops && hibernation_ops->recover)
3990                 hibernation_ops->recover();
3991  }
3992 +EXPORT_SYMBOL_GPL(platform_recover);
3993  
3994  /**
3995   *     swsusp_show_speed - print the time elapsed between two events.
3996 @@ -535,6 +544,7 @@ int hibernation_platform_enter(void)
3997  
3998         return error;
3999  }
4000 +EXPORT_SYMBOL_GPL(hibernation_platform_enter);
4001  
4002  /**
4003   *     power_down - Shut the machine down for hibernation.
4004 @@ -586,6 +596,9 @@ int hibernate(void)
4005  {
4006         int error;
4007  
4008 +       if (test_action_state(TOI_REPLACE_SWSUSP))
4009 +               return try_tuxonice_hibernate();
4010 +
4011         mutex_lock(&pm_mutex);
4012         /* The snapshot device should not be opened while we're running */
4013         if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
4014 @@ -666,11 +679,19 @@ int hibernate(void)
4015   *
4016   */
4017  
4018 -static int software_resume(void)
4019 +int software_resume(void)
4020  {
4021         int error;
4022         unsigned int flags;
4023  
4024 +       resume_attempted = 1;
4025 +
4026 +       /*
4027 +        * We can't know (until an image header - if any - is loaded), whether
4028 +        * we did override swsusp. We therefore ensure that both are tried.
4029 +        */
4030 +       try_tuxonice_resume();
4031 +
4032         /*
4033          * If the user said "noresume".. bail out early.
4034          */
4035 @@ -999,6 +1020,7 @@ static int __init resume_offset_setup(char *str)
4036  static int __init noresume_setup(char *str)
4037  {
4038         noresume = 1;
4039 +       set_toi_state(TOI_NORESUME_SPECIFIED);
4040         return 1;
4041  }
4042  
4043 diff --git a/kernel/power/main.c b/kernel/power/main.c
4044 index b58800b..d23adf9 100644
4045 --- a/kernel/power/main.c
4046 +++ b/kernel/power/main.c
4047 @@ -16,6 +16,7 @@
4048  #include "power.h"
4049  
4050  DEFINE_MUTEX(pm_mutex);
4051 +EXPORT_SYMBOL_GPL(pm_mutex);
4052  
4053  unsigned int pm_flags;
4054  EXPORT_SYMBOL(pm_flags);
4055 @@ -24,7 +25,8 @@ EXPORT_SYMBOL(pm_flags);
4056  
4057  /* Routines for PM-transition notifications */
4058  
4059 -static BLOCKING_NOTIFIER_HEAD(pm_chain_head);
4060 +BLOCKING_NOTIFIER_HEAD(pm_chain_head);
4061 +EXPORT_SYMBOL_GPL(pm_chain_head);
4062  
4063  int register_pm_notifier(struct notifier_block *nb)
4064  {
4065 @@ -43,6 +45,7 @@ int pm_notifier_call_chain(unsigned long val)
4066         return (blocking_notifier_call_chain(&pm_chain_head, val, NULL)
4067                         == NOTIFY_BAD) ? -EINVAL : 0;
4068  }
4069 +EXPORT_SYMBOL_GPL(pm_notifier_call_chain);
4070  
4071  /* If set, devices may be suspended and resumed asynchronously. */
4072  int pm_async_enabled = 1;
4073 @@ -136,6 +139,7 @@ power_attr(pm_test);
4074  #endif /* CONFIG_PM_SLEEP */
4075  
4076  struct kobject *power_kobj;
4077 +EXPORT_SYMBOL_GPL(power_kobj);
4078  
4079  /**
4080   *     state - control system power state.
4081 diff --git a/kernel/power/power.h b/kernel/power/power.h
4082 index 006270f..a5e538f 100644
4083 --- a/kernel/power/power.h
4084 +++ b/kernel/power/power.h
4085 @@ -31,8 +31,12 @@ static inline char *check_image_kernel(struct swsusp_info *info)
4086         return arch_hibernation_header_restore(info) ?
4087                         "architecture specific data" : NULL;
4088  }
4089 +#else
4090 +extern char *check_image_kernel(struct swsusp_info *info);
4091  #endif /* CONFIG_ARCH_HIBERNATION_HEADER */
4092 +extern int init_header(struct swsusp_info *info);
4093  
4094 +extern char resume_file[256];
4095  /*
4096   * Keep some memory free so that I/O operations can succeed without paging
4097   * [Might this be more than 4 MB?]
4098 @@ -49,6 +53,7 @@ static inline char *check_image_kernel(struct swsusp_info *info)
4099  extern int hibernation_snapshot(int platform_mode);
4100  extern int hibernation_restore(int platform_mode);
4101  extern int hibernation_platform_enter(void);
4102 +extern void platform_recover(int platform_mode);
4103  #endif
4104  
4105  extern int pfn_is_nosave(unsigned long);
4106 @@ -63,6 +68,8 @@ static struct kobj_attribute _name##_attr = { \
4107         .store  = _name##_store,                \
4108  }
4109  
4110 +extern struct pbe *restore_pblist;
4111 +
4112  /* Preferred image size in bytes (default 500 MB) */
4113  extern unsigned long image_size;
4114  extern int in_suspend;
4115 @@ -233,3 +240,86 @@ static inline void suspend_thaw_processes(void)
4116  {
4117  }
4118  #endif
4119 +
4120 +extern struct page *saveable_page(struct zone *z, unsigned long p);
4121 +#ifdef CONFIG_HIGHMEM
4122 +extern struct page *saveable_highmem_page(struct zone *z, unsigned long p);
4123 +#else
4124 +static
4125 +inline struct page *saveable_highmem_page(struct zone *z, unsigned long p)
4126 +{
4127 +       return NULL;
4128 +}
4129 +#endif
4130 +
4131 +#define PBES_PER_PAGE (PAGE_SIZE / sizeof(struct pbe))
4132 +extern struct list_head nosave_regions;
4133 +
4134 +/**
4135 + *     This structure represents a range of page frames the contents of which
4136 + *     should not be saved during the suspend.
4137 + */
4138 +
4139 +struct nosave_region {
4140 +       struct list_head list;
4141 +       unsigned long start_pfn;
4142 +       unsigned long end_pfn;
4143 +};
4144 +
4145 +#ifndef PHYS_PFN_OFFSET
4146 +#define PHYS_PFN_OFFSET 0
4147 +#endif
4148 +
4149 +#define ZONE_START(thiszone) ((thiszone)->zone_start_pfn - PHYS_PFN_OFFSET)
4150 +
4151 +#define BM_END_OF_MAP  (~0UL)
4152 +
4153 +#define BM_BITS_PER_BLOCK      (PAGE_SIZE * BITS_PER_BYTE)
4154 +
4155 +struct bm_block {
4156 +       struct list_head hook;          /* hook into a list of bitmap blocks */
4157 +       unsigned long start_pfn;        /* pfn represented by the first bit */
4158 +       unsigned long end_pfn;  /* pfn represented by the last bit plus 1 */
4159 +       unsigned long *data;    /* bitmap representing pages */
4160 +};
4161 +
4162 +/* struct bm_position is used for browsing memory bitmaps */
4163 +
4164 +struct bm_position {
4165 +       struct bm_block *block;
4166 +       int bit;
4167 +};
4168 +
4169 +struct memory_bitmap {
4170 +       struct list_head blocks;        /* list of bitmap blocks */
4171 +       struct linked_page *p_list;     /* list of pages used to store zone
4172 +                                        * bitmap objects and bitmap block
4173 +                                        * objects
4174 +                                        */
4175 +       struct bm_position cur;         /* most recently used bit position */
4176 +       struct bm_position iter;        /* most recently used bit position
4177 +                                        * when iterating over a bitmap.
4178 +                                        */
4179 +};
4180 +
4181 +extern int memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask,
4182 +               int safe_needed);
4183 +extern void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
4184 +extern void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn);
4185 +extern void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn);
4186 +extern int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn);
4187 +extern unsigned long memory_bm_next_pfn(struct memory_bitmap *bm);
4188 +extern void memory_bm_position_reset(struct memory_bitmap *bm);
4189 +extern void memory_bm_clear(struct memory_bitmap *bm);
4190 +extern void memory_bm_copy(struct memory_bitmap *source,
4191 +               struct memory_bitmap *dest);
4192 +extern void memory_bm_dup(struct memory_bitmap *source,
4193 +               struct memory_bitmap *dest);
4194 +
4195 +#ifdef CONFIG_TOI
4196 +struct toi_module_ops;
4197 +extern int memory_bm_read(struct memory_bitmap *bm, int (*rw_chunk)
4198 +       (int rw, struct toi_module_ops *owner, char *buffer, int buffer_size));
4199 +extern int memory_bm_write(struct memory_bitmap *bm, int (*rw_chunk)
4200 +       (int rw, struct toi_module_ops *owner, char *buffer, int buffer_size));
4201 +#endif
4202 diff --git a/kernel/power/process.c b/kernel/power/process.c
4203 index 71ae290..8733143 100644
4204 --- a/kernel/power/process.c
4205 +++ b/kernel/power/process.c
4206 @@ -15,6 +15,13 @@
4207  #include <linux/syscalls.h>
4208  #include <linux/freezer.h>
4209  #include <linux/delay.h>
4210 +#include <linux/buffer_head.h>
4211 +
4212 +int freezer_state;
4213 +EXPORT_SYMBOL_GPL(freezer_state);
4214 +
4215 +int freezer_sync = 1;
4216 +EXPORT_SYMBOL_GPL(freezer_sync);
4217  
4218  /* 
4219   * Timeout for stopping processes
4220 @@ -112,17 +119,26 @@ int freeze_processes(void)
4221  {
4222         int error;
4223  
4224 -       printk("Freezing user space processes ... ");
4225 +       printk(KERN_INFO "Stopping fuse filesystems.\n");
4226 +       freeze_filesystems(FS_FREEZER_FUSE);
4227 +       freezer_state = FREEZER_FILESYSTEMS_FROZEN;
4228 +       printk(KERN_INFO "Freezing user space processes ... ");
4229         error = try_to_freeze_tasks(true);
4230         if (error)
4231                 goto Exit;
4232         printk("done.\n");
4233  
4234 -       printk("Freezing remaining freezable tasks ... ");
4235 +       if (freezer_sync)
4236 +               sys_sync();
4237 +       printk(KERN_INFO "Stopping normal filesystems.\n");
4238 +       freeze_filesystems(FS_FREEZER_NORMAL);
4239 +       freezer_state = FREEZER_USERSPACE_FROZEN;
4240 +       printk(KERN_INFO "Freezing remaining freezable tasks ... ");
4241         error = try_to_freeze_tasks(false);
4242         if (error)
4243                 goto Exit;
4244         printk("done.");
4245 +       freezer_state = FREEZER_FULLY_ON;
4246  
4247         oom_killer_disable();
4248   Exit:
4249 @@ -131,6 +147,7 @@ int freeze_processes(void)
4250  
4251         return error;
4252  }
4253 +EXPORT_SYMBOL_GPL(freeze_processes);
4254  
4255  static void thaw_tasks(bool nosig_only)
4256  {
4257 @@ -154,12 +171,39 @@ static void thaw_tasks(bool nosig_only)
4258  
4259  void thaw_processes(void)
4260  {
4261 +       int old_state = freezer_state;
4262 +
4263 +       if (old_state == FREEZER_OFF)
4264 +               return;
4265 +
4266 +       freezer_state = FREEZER_OFF;
4267 +
4268         oom_killer_enable();
4269  
4270 +       printk(KERN_INFO "Restarting all filesystems ...\n");
4271 +       thaw_filesystems(FS_FREEZER_ALL);
4272 +
4273 +       printk(KERN_INFO "Restarting tasks ... ");
4274 +       if (old_state == FREEZER_FULLY_ON)
4275 +               thaw_tasks(true);
4276 +
4277         printk("Restarting tasks ... ");
4278 -       thaw_tasks(true);
4279         thaw_tasks(false);
4280         schedule();
4281         printk("done.\n");
4282  }
4283 +EXPORT_SYMBOL_GPL(thaw_processes);
4284  
4285 +void thaw_kernel_threads(void)
4286 +{
4287 +       freezer_state = FREEZER_USERSPACE_FROZEN;
4288 +       printk(KERN_INFO "Restarting normal filesystems.\n");
4289 +       thaw_filesystems(FS_FREEZER_NORMAL);
4290 +       thaw_tasks(true);
4291 +}
4292 +
4293 +/*
4294 + * It's ugly putting this EXPORT down here, but it's necessary so that it
4295 + * doesn't matter whether the fs-freezing patch is applied or not.
4296 + */
4297 +EXPORT_SYMBOL_GPL(thaw_kernel_threads);
4298 diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
4299 index 25ce010..4fde437 100644
4300 --- a/kernel/power/snapshot.c
4301 +++ b/kernel/power/snapshot.c
4302 @@ -35,6 +35,8 @@
4303  #include <asm/io.h>
4304  
4305  #include "power.h"
4306 +#include "tuxonice_builtin.h"
4307 +#include "tuxonice_pagedir.h"
4308  
4309  static int swsusp_page_is_free(struct page *);
4310  static void swsusp_set_page_forbidden(struct page *);
4311 @@ -54,6 +56,10 @@ unsigned long image_size = 500 * 1024 * 1024;
4312   * directly to their "original" page frames.
4313   */
4314  struct pbe *restore_pblist;
4315 +EXPORT_SYMBOL_GPL(restore_pblist);
4316 +
4317 +int resume_attempted;
4318 +EXPORT_SYMBOL_GPL(resume_attempted);
4319  
4320  /* Pointer to an auxiliary buffer (1 page) */
4321  static void *buffer;
4322 @@ -96,6 +102,9 @@ static void *get_image_page(gfp_t gfp_mask, int safe_needed)
4323  
4324  unsigned long get_safe_page(gfp_t gfp_mask)
4325  {
4326 +       if (toi_running)
4327 +               return toi_get_nonconflicting_page();
4328 +
4329         return (unsigned long)get_image_page(gfp_mask, PG_SAFE);
4330  }
4331  
4332 @@ -232,47 +241,22 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
4333   *     the represented memory area.
4334   */
4335  
4336 -#define BM_END_OF_MAP  (~0UL)
4337 -
4338 -#define BM_BITS_PER_BLOCK      (PAGE_SIZE * BITS_PER_BYTE)
4339 -
4340 -struct bm_block {
4341 -       struct list_head hook;  /* hook into a list of bitmap blocks */
4342 -       unsigned long start_pfn;        /* pfn represented by the first bit */
4343 -       unsigned long end_pfn;  /* pfn represented by the last bit plus 1 */
4344 -       unsigned long *data;    /* bitmap representing pages */
4345 -};
4346 -
4347  static inline unsigned long bm_block_bits(struct bm_block *bb)
4348  {
4349         return bb->end_pfn - bb->start_pfn;
4350  }
4351  
4352 -/* strcut bm_position is used for browsing memory bitmaps */
4353 -
4354 -struct bm_position {
4355 -       struct bm_block *block;
4356 -       int bit;
4357 -};
4358 -
4359 -struct memory_bitmap {
4360 -       struct list_head blocks;        /* list of bitmap blocks */
4361 -       struct linked_page *p_list;     /* list of pages used to store zone
4362 -                                        * bitmap objects and bitmap block
4363 -                                        * objects
4364 -                                        */
4365 -       struct bm_position cur; /* most recently used bit position */
4366 -};
4367 -
4368  /* Functions that operate on memory bitmaps */
4369  
4370 -static void memory_bm_position_reset(struct memory_bitmap *bm)
4371 +void memory_bm_position_reset(struct memory_bitmap *bm)
4372  {
4373         bm->cur.block = list_entry(bm->blocks.next, struct bm_block, hook);
4374         bm->cur.bit = 0;
4375 -}
4376  
4377 -static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
4378 +       bm->iter.block = list_entry(bm->blocks.next, struct bm_block, hook);
4379 +       bm->iter.bit = 0;
4380 +}
4381 +EXPORT_SYMBOL_GPL(memory_bm_position_reset);
4382  
4383  /**
4384   *     create_bm_block_list - create a list of block bitmap objects
4385 @@ -380,7 +364,7 @@ static int create_mem_extents(struct list_head *list, gfp_t gfp_mask)
4386  /**
4387    *    memory_bm_create - allocate memory for a memory bitmap
4388    */
4389 -static int
4390 +int
4391  memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
4392  {
4393         struct chain_allocator ca;
4394 @@ -436,11 +420,12 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
4395         memory_bm_free(bm, PG_UNSAFE_CLEAR);
4396         goto Exit;
4397  }
4398 +EXPORT_SYMBOL_GPL(memory_bm_create);
4399  
4400  /**
4401    *    memory_bm_free - free memory occupied by the memory bitmap @bm
4402    */
4403 -static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
4404 +void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
4405  {
4406         struct bm_block *bb;
4407  
4408 @@ -452,6 +437,7 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
4409  
4410         INIT_LIST_HEAD(&bm->blocks);
4411  }
4412 +EXPORT_SYMBOL_GPL(memory_bm_free);
4413  
4414  /**
4415   *     memory_bm_find_bit - find the bit in the bitmap @bm that corresponds
4416 @@ -490,7 +476,7 @@ static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
4417         return 0;
4418  }
4419  
4420 -static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
4421 +void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
4422  {
4423         void *addr;
4424         unsigned int bit;
4425 @@ -500,6 +486,7 @@ static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
4426         BUG_ON(error);
4427         set_bit(bit, addr);
4428  }
4429 +EXPORT_SYMBOL_GPL(memory_bm_set_bit);
4430  
4431  static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn)
4432  {
4433 @@ -513,7 +500,7 @@ static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn)
4434         return error;
4435  }
4436  
4437 -static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn)
4438 +void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn)
4439  {
4440         void *addr;
4441         unsigned int bit;
4442 @@ -523,8 +510,9 @@ static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn)
4443         BUG_ON(error);
4444         clear_bit(bit, addr);
4445  }
4446 +EXPORT_SYMBOL_GPL(memory_bm_clear_bit);
4447  
4448 -static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
4449 +int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
4450  {
4451         void *addr;
4452         unsigned int bit;
4453 @@ -534,6 +522,7 @@ static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
4454         BUG_ON(error);
4455         return test_bit(bit, addr);
4456  }
4457 +EXPORT_SYMBOL_GPL(memory_bm_test_bit);
4458  
4459  static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn)
4460  {
4461 @@ -552,43 +541,178 @@ static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn)
4462   *     this function.
4463   */
4464  
4465 -static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
4466 +unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
4467  {
4468         struct bm_block *bb;
4469         int bit;
4470  
4471 -       bb = bm->cur.block;
4472 +       bb = bm->iter.block;
4473         do {
4474 -               bit = bm->cur.bit;
4475 +               bit = bm->iter.bit;
4476                 bit = find_next_bit(bb->data, bm_block_bits(bb), bit);
4477                 if (bit < bm_block_bits(bb))
4478                         goto Return_pfn;
4479  
4480                 bb = list_entry(bb->hook.next, struct bm_block, hook);
4481 -               bm->cur.block = bb;
4482 -               bm->cur.bit = 0;
4483 +               bm->iter.block = bb;
4484 +               bm->iter.bit = 0;
4485         } while (&bb->hook != &bm->blocks);
4486  
4487         memory_bm_position_reset(bm);
4488         return BM_END_OF_MAP;
4489  
4490   Return_pfn:
4491 -       bm->cur.bit = bit + 1;
4492 +       bm->iter.bit = bit + 1;
4493         return bb->start_pfn + bit;
4494  }
4495 +EXPORT_SYMBOL_GPL(memory_bm_next_pfn);
4496  
4497 -/**
4498 - *     This structure represents a range of page frames the contents of which
4499 - *     should not be saved during the suspend.
4500 - */
4501 +void memory_bm_clear(struct memory_bitmap *bm)
4502 +{
4503 +       unsigned long pfn;
4504  
4505 -struct nosave_region {
4506 -       struct list_head list;
4507 -       unsigned long start_pfn;
4508 -       unsigned long end_pfn;
4509 -};
4510 +       memory_bm_position_reset(bm);
4511 +       pfn = memory_bm_next_pfn(bm);
4512 +       while (pfn != BM_END_OF_MAP) {
4513 +               memory_bm_clear_bit(bm, pfn);
4514 +               pfn = memory_bm_next_pfn(bm);
4515 +       }
4516 +}
4517 +EXPORT_SYMBOL_GPL(memory_bm_clear);
4518 +
4519 +void memory_bm_copy(struct memory_bitmap *source, struct memory_bitmap *dest)
4520 +{
4521 +       unsigned long pfn;
4522 +
4523 +       memory_bm_position_reset(source);
4524 +       pfn = memory_bm_next_pfn(source);
4525 +       while (pfn != BM_END_OF_MAP) {
4526 +               memory_bm_set_bit(dest, pfn);
4527 +               pfn = memory_bm_next_pfn(source);
4528 +       }
4529 +}
4530 +EXPORT_SYMBOL_GPL(memory_bm_copy);
4531 +
4532 +void memory_bm_dup(struct memory_bitmap *source, struct memory_bitmap *dest)
4533 +{
4534 +       memory_bm_clear(dest);
4535 +       memory_bm_copy(source, dest);
4536 +}
4537 +EXPORT_SYMBOL_GPL(memory_bm_dup);
4538 +
4539 +#ifdef CONFIG_TOI
4540 +#define DEFINE_MEMORY_BITMAP(name) \
4541 +struct memory_bitmap *name; \
4542 +EXPORT_SYMBOL_GPL(name)
4543 +
4544 +DEFINE_MEMORY_BITMAP(pageset1_map);
4545 +DEFINE_MEMORY_BITMAP(pageset1_copy_map);
4546 +DEFINE_MEMORY_BITMAP(pageset2_map);
4547 +DEFINE_MEMORY_BITMAP(page_resave_map);
4548 +DEFINE_MEMORY_BITMAP(io_map);
4549 +DEFINE_MEMORY_BITMAP(nosave_map);
4550 +DEFINE_MEMORY_BITMAP(free_map);
4551 +
4552 +int memory_bm_write(struct memory_bitmap *bm, int (*rw_chunk)
4553 +       (int rw, struct toi_module_ops *owner, char *buffer, int buffer_size))
4554 +{
4555 +       int result = 0;
4556 +       unsigned int nr = 0;
4557 +       struct bm_block *bb;
4558 +
4559 +       if (!bm)
4560 +               return result;
4561  
4562 -static LIST_HEAD(nosave_regions);
4563 +       list_for_each_entry(bb, &bm->blocks, hook)
4564 +               nr++;
4565 +
4566 +       result = (*rw_chunk)(WRITE, NULL, (char *) &nr, sizeof(unsigned int));
4567 +       if (result)
4568 +               return result;
4569 +
4570 +       list_for_each_entry(bb, &bm->blocks, hook) {
4571 +               result = (*rw_chunk)(WRITE, NULL, (char *) &bb->start_pfn,
4572 +                               2 * sizeof(unsigned long));
4573 +               if (result)
4574 +                       return result;
4575 +
4576 +               result = (*rw_chunk)(WRITE, NULL, (char *) bb->data, PAGE_SIZE);
4577 +               if (result)
4578 +                       return result;
4579 +       }
4580 +
4581 +       return 0;
4582 +}
4583 +EXPORT_SYMBOL_GPL(memory_bm_write);
4584 +
4585 +int memory_bm_read(struct memory_bitmap *bm, int (*rw_chunk)
4586 +       (int rw, struct toi_module_ops *owner, char *buffer, int buffer_size))
4587 +{
4588 +       int result = 0;
4589 +       unsigned int nr, i;
4590 +       struct bm_block *bb;
4591 +
4592 +       if (!bm)
4593 +               return result;
4594 +
4595 +       result = memory_bm_create(bm, GFP_KERNEL, 0);
4596 +
4597 +       if (result)
4598 +               return result;
4599 +
4600 +       result = (*rw_chunk)(READ, NULL, (char *) &nr, sizeof(unsigned int));
4601 +       if (result)
4602 +               goto Free;
4603 +
4604 +       for (i = 0; i < nr; i++) {
4605 +               unsigned long pfn;
4606 +
4607 +               result = (*rw_chunk)(READ, NULL, (char *) &pfn,
4608 +                               sizeof(unsigned long));
4609 +               if (result)
4610 +                       goto Free;
4611 +
4612 +               list_for_each_entry(bb, &bm->blocks, hook)
4613 +                       if (bb->start_pfn == pfn)
4614 +                               break;
4615 +
4616 +               if (&bb->hook == &bm->blocks) {
4617 +                       printk(KERN_ERR
4618 +                               "TuxOnIce: Failed to load memory bitmap.\n");
4619 +                       result = -EINVAL;
4620 +                       goto Free;
4621 +               }
4622 +
4623 +               result = (*rw_chunk)(READ, NULL, (char *) &pfn,
4624 +                               sizeof(unsigned long));
4625 +               if (result)
4626 +                       goto Free;
4627 +
4628 +               if (pfn != bb->end_pfn) {
4629 +                       printk(KERN_ERR
4630 +                               "TuxOnIce: Failed to load memory bitmap. "
4631 +                               "End PFN doesn't match what was saved.\n");
4632 +                       result = -EINVAL;
4633 +                       goto Free;
4634 +               }
4635 +
4636 +               result = (*rw_chunk)(READ, NULL, (char *) bb->data, PAGE_SIZE);
4637 +
4638 +               if (result)
4639 +                       goto Free;
4640 +       }
4641 +
4642 +       return 0;
4643 +
4644 +Free:
4645 +       memory_bm_free(bm, PG_ANY);
4646 +       return result;
4647 +}
4648 +EXPORT_SYMBOL_GPL(memory_bm_read);
4649 +#endif
4650 +
4651 +LIST_HEAD(nosave_regions);
4652 +EXPORT_SYMBOL_GPL(nosave_regions);
4653  
4654  /**
4655   *     register_nosave_region - register a range of page frames the contents
4656 @@ -824,7 +948,7 @@ static unsigned int count_free_highmem_pages(void)
4657   *     We should save the page if it isn't Nosave or NosaveFree, or Reserved,
4658   *     and it isn't a part of a free chunk of pages.
4659   */
4660 -static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
4661 +struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
4662  {
4663         struct page *page;
4664  
4665 @@ -843,6 +967,7 @@ static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
4666  
4667         return page;
4668  }
4669 +EXPORT_SYMBOL_GPL(saveable_highmem_page);
4670  
4671  /**
4672   *     count_highmem_pages - compute the total number of saveable highmem
4673 @@ -868,11 +993,6 @@ static unsigned int count_highmem_pages(void)
4674         }
4675         return n;
4676  }
4677 -#else
4678 -static inline void *saveable_highmem_page(struct zone *z, unsigned long p)
4679 -{
4680 -       return NULL;
4681 -}
4682  #endif /* CONFIG_HIGHMEM */
4683  
4684  /**
4685 @@ -883,7 +1003,7 @@ static inline void *saveable_highmem_page(struct zone *z, unsigned long p)
4686   *     of pages statically defined as 'unsaveable', and it isn't a part of
4687   *     a free chunk of pages.
4688   */
4689 -static struct page *saveable_page(struct zone *zone, unsigned long pfn)
4690 +struct page *saveable_page(struct zone *zone, unsigned long pfn)
4691  {
4692         struct page *page;
4693  
4694 @@ -905,6 +1025,7 @@ static struct page *saveable_page(struct zone *zone, unsigned long pfn)
4695  
4696         return page;
4697  }
4698 +EXPORT_SYMBOL_GPL(saveable_page);
4699  
4700  /**
4701   *     count_data_pages - compute the total number of saveable non-highmem
4702 @@ -1501,6 +1622,9 @@ asmlinkage int swsusp_save(void)
4703  {
4704         unsigned int nr_pages, nr_highmem;
4705  
4706 +       if (toi_running)
4707 +               return toi_post_context_save();
4708 +
4709         printk(KERN_INFO "PM: Creating hibernation image:\n");
4710  
4711         drain_local_pages(NULL);
4712 @@ -1541,14 +1665,14 @@ asmlinkage int swsusp_save(void)
4713  }
4714  
4715  #ifndef CONFIG_ARCH_HIBERNATION_HEADER
4716 -static int init_header_complete(struct swsusp_info *info)
4717 +int init_header_complete(struct swsusp_info *info)
4718  {
4719         memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname));
4720         info->version_code = LINUX_VERSION_CODE;
4721         return 0;
4722  }
4723  
4724 -static char *check_image_kernel(struct swsusp_info *info)
4725 +char *check_image_kernel(struct swsusp_info *info)
4726  {
4727         if (info->version_code != LINUX_VERSION_CODE)
4728                 return "kernel version";
4729 @@ -1562,6 +1686,7 @@ static char *check_image_kernel(struct swsusp_info *info)
4730                 return "machine";
4731         return NULL;
4732  }
4733 +EXPORT_SYMBOL_GPL(check_image_kernel);
4734  #endif /* CONFIG_ARCH_HIBERNATION_HEADER */
4735  
4736  unsigned long snapshot_get_image_size(void)
4737 @@ -1569,7 +1694,7 @@ unsigned long snapshot_get_image_size(void)
4738         return nr_copy_pages + nr_meta_pages + 1;
4739  }
4740  
4741 -static int init_header(struct swsusp_info *info)
4742 +int init_header(struct swsusp_info *info)
4743  {
4744         memset(info, 0, sizeof(struct swsusp_info));
4745         info->num_physpages = num_physpages;
4746 @@ -1579,6 +1704,7 @@ static int init_header(struct swsusp_info *info)
4747         info->size <<= PAGE_SHIFT;
4748         return init_header_complete(info);
4749  }
4750 +EXPORT_SYMBOL_GPL(init_header);
4751  
4752  /**
4753   *     pack_pfns - pfns corresponding to the set bits found in the bitmap @bm
4754 diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
4755 index 56e7dbb..9618d42 100644
4756 --- a/kernel/power/suspend.c
4757 +++ b/kernel/power/suspend.c
4758 @@ -230,6 +230,7 @@ int suspend_devices_and_enter(suspend_state_t state)
4759                 suspend_ops->recover();
4760         goto Resume_devices;
4761  }
4762 +EXPORT_SYMBOL_GPL(suspend_devices_and_enter);
4763  
4764  /**
4765   *     suspend_finish - Do final work before exiting suspend sequence.
4766 diff --git a/kernel/power/tuxonice.h b/kernel/power/tuxonice.h
4767 new file mode 100644
4768 index 0000000..537291e
4769 --- /dev/null
4770 +++ b/kernel/power/tuxonice.h
4771 @@ -0,0 +1,211 @@
4772 +/*
4773 + * kernel/power/tuxonice.h
4774 + *
4775 + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net)
4776 + *
4777 + * This file is released under the GPLv2.
4778 + *
4779 + * It contains declarations used throughout swsusp.
4780 + *
4781 + */
4782 +
4783 +#ifndef KERNEL_POWER_TOI_H
4784 +#define KERNEL_POWER_TOI_H
4785 +
4786 +#include <linux/delay.h>
4787 +#include <linux/bootmem.h>
4788 +#include <linux/suspend.h>
4789 +#include <linux/fs.h>
4790 +#include <linux/kmod.h>
4791 +#include <asm/setup.h>
4792 +#include "tuxonice_pageflags.h"
4793 +#include "power.h"
4794 +
4795 +#define TOI_CORE_VERSION "3.1.1.1"
4796 +#define        TOI_HEADER_VERSION 3
4797 +#define MY_BOOT_KERNEL_DATA_VERSION 3
4798 +
4799 +struct toi_boot_kernel_data {
4800 +       int version;
4801 +       int size;
4802 +       unsigned long toi_action;
4803 +       unsigned long toi_debug_state;
4804 +       u32 toi_default_console_level;
4805 +       int toi_io_time[2][2];
4806 +       char toi_nosave_commandline[COMMAND_LINE_SIZE];
4807 +       unsigned long pages_used[33];
4808 +       unsigned long compress_bytes_in;
4809 +       unsigned long compress_bytes_out;
4810 +};
4811 +
4812 +extern struct toi_boot_kernel_data toi_bkd;
4813 +
4814 +/* Location of book kernel data struct in kernel being resumed */
4815 +extern unsigned long boot_kernel_data_buffer;
4816 +
4817 +/*              == Action states ==            */
4818 +
4819 +enum {
4820 +       TOI_REBOOT,
4821 +       TOI_PAUSE,
4822 +       TOI_LOGALL,
4823 +       TOI_CAN_CANCEL,
4824 +       TOI_KEEP_IMAGE,
4825 +       TOI_FREEZER_TEST,
4826 +       TOI_SINGLESTEP,
4827 +       TOI_PAUSE_NEAR_PAGESET_END,
4828 +       TOI_TEST_FILTER_SPEED,
4829 +       TOI_TEST_BIO,
4830 +       TOI_NO_PAGESET2,
4831 +       TOI_IGNORE_ROOTFS,
4832 +       TOI_REPLACE_SWSUSP,
4833 +       TOI_PAGESET2_FULL,
4834 +       TOI_ABORT_ON_RESAVE_NEEDED,
4835 +       TOI_NO_MULTITHREADED_IO,
4836 +       TOI_NO_DIRECT_LOAD, /* Obsolete */
4837 +       TOI_LATE_CPU_HOTPLUG,
4838 +       TOI_GET_MAX_MEM_ALLOCD,
4839 +       TOI_NO_FLUSHER_THREAD,
4840 +       TOI_NO_PS2_IF_UNNEEDED
4841 +};
4842 +
4843 +#define clear_action_state(bit) (test_and_clear_bit(bit, &toi_bkd.toi_action))
4844 +
4845 +/*              == Result states ==            */
4846 +
4847 +enum {
4848 +       TOI_ABORTED,
4849 +       TOI_ABORT_REQUESTED,
4850 +       TOI_NOSTORAGE_AVAILABLE,
4851 +       TOI_INSUFFICIENT_STORAGE,
4852 +       TOI_FREEZING_FAILED,
4853 +       TOI_KEPT_IMAGE,
4854 +       TOI_WOULD_EAT_MEMORY,
4855 +       TOI_UNABLE_TO_FREE_ENOUGH_MEMORY,
4856 +       TOI_PM_SEM,
4857 +       TOI_DEVICE_REFUSED,
4858 +       TOI_SYSDEV_REFUSED,
4859 +       TOI_EXTRA_PAGES_ALLOW_TOO_SMALL,
4860 +       TOI_UNABLE_TO_PREPARE_IMAGE,
4861 +       TOI_FAILED_MODULE_INIT,
4862 +       TOI_FAILED_MODULE_CLEANUP,
4863 +       TOI_FAILED_IO,
4864 +       TOI_OUT_OF_MEMORY,
4865 +       TOI_IMAGE_ERROR,
4866 +       TOI_PLATFORM_PREP_FAILED,
4867 +       TOI_CPU_HOTPLUG_FAILED,
4868 +       TOI_ARCH_PREPARE_FAILED,
4869 +       TOI_RESAVE_NEEDED,
4870 +       TOI_CANT_SUSPEND,
4871 +       TOI_NOTIFIERS_PREPARE_FAILED,
4872 +       TOI_PRE_SNAPSHOT_FAILED,
4873 +       TOI_PRE_RESTORE_FAILED,
4874 +       TOI_USERMODE_HELPERS_ERR,
4875 +       TOI_CANT_USE_ALT_RESUME,
4876 +       TOI_HEADER_TOO_BIG,
4877 +       TOI_NUM_RESULT_STATES   /* Used in printing debug info only */
4878 +};
4879 +
4880 +extern unsigned long toi_result;
4881 +
4882 +#define set_result_state(bit) (test_and_set_bit(bit, &toi_result))
4883 +#define set_abort_result(bit) (test_and_set_bit(TOI_ABORTED, &toi_result), \
4884 +                               test_and_set_bit(bit, &toi_result))
4885 +#define clear_result_state(bit) (test_and_clear_bit(bit, &toi_result))
4886 +#define test_result_state(bit) (test_bit(bit, &toi_result))
4887 +
4888 +/*      == Debug sections and levels ==        */
4889 +
4890 +/* debugging levels. */
4891 +enum {
4892 +       TOI_STATUS = 0,
4893 +       TOI_ERROR = 2,
4894 +       TOI_LOW,
4895 +       TOI_MEDIUM,
4896 +       TOI_HIGH,
4897 +       TOI_VERBOSE,
4898 +};
4899 +
4900 +enum {
4901 +       TOI_ANY_SECTION,
4902 +       TOI_EAT_MEMORY,
4903 +       TOI_IO,
4904 +       TOI_HEADER,
4905 +       TOI_WRITER,
4906 +       TOI_MEMORY,
4907 +};
4908 +
4909 +#define set_debug_state(bit) (test_and_set_bit(bit, &toi_bkd.toi_debug_state))
4910 +#define clear_debug_state(bit) \
4911 +       (test_and_clear_bit(bit, &toi_bkd.toi_debug_state))
4912 +#define test_debug_state(bit) (test_bit(bit, &toi_bkd.toi_debug_state))
4913 +
4914 +/*             == Steps in hibernating ==      */
4915 +
4916 +enum {
4917 +       STEP_HIBERNATE_PREPARE_IMAGE,
4918 +       STEP_HIBERNATE_SAVE_IMAGE,
4919 +       STEP_HIBERNATE_POWERDOWN,
4920 +       STEP_RESUME_CAN_RESUME,
4921 +       STEP_RESUME_LOAD_PS1,
4922 +       STEP_RESUME_DO_RESTORE,
4923 +       STEP_RESUME_READ_PS2,
4924 +       STEP_RESUME_GO,
4925 +       STEP_RESUME_ALT_IMAGE,
4926 +       STEP_CLEANUP,
4927 +       STEP_QUIET_CLEANUP
4928 +};
4929 +
4930 +/*             == TuxOnIce states ==
4931 +       (see also include/linux/suspend.h)      */
4932 +
4933 +#define get_toi_state()  (toi_state)
4934 +#define restore_toi_state(saved_state) \
4935 +       do { toi_state = saved_state; } while (0)
4936 +
4937 +/*             == Module support ==            */
4938 +
4939 +struct toi_core_fns {
4940 +       int (*post_context_save)(void);
4941 +       unsigned long (*get_nonconflicting_page)(void);
4942 +       int (*try_hibernate)(void);
4943 +       void (*try_resume)(void);
4944 +};
4945 +
4946 +extern struct toi_core_fns *toi_core_fns;
4947 +
4948 +/*             == All else ==                  */
4949 +#define KB(x) ((x) << (PAGE_SHIFT - 10))
4950 +#define MB(x) ((x) >> (20 - PAGE_SHIFT))
4951 +
4952 +extern int toi_start_anything(int toi_or_resume);
4953 +extern void toi_finish_anything(int toi_or_resume);
4954 +
4955 +extern int save_image_part1(void);
4956 +extern int toi_atomic_restore(void);
4957 +
4958 +extern int toi_try_hibernate(void);
4959 +extern void toi_try_resume(void);
4960 +
4961 +extern int __toi_post_context_save(void);
4962 +
4963 +extern unsigned int nr_hibernates;
4964 +extern char alt_resume_param[256];
4965 +
4966 +extern void copyback_post(void);
4967 +extern int toi_hibernate(void);
4968 +extern unsigned long extra_pd1_pages_used;
4969 +
4970 +#define SECTOR_SIZE 512
4971 +
4972 +extern void toi_early_boot_message(int can_erase_image, int default_answer,
4973 +       char *warning_reason, ...);
4974 +
4975 +extern int do_check_can_resume(void);
4976 +extern int do_toi_step(int step);
4977 +extern int toi_launch_userspace_program(char *command, int channel_no,
4978 +               enum umh_wait wait, int debug);
4979 +
4980 +extern char tuxonice_signature[9];
4981 +extern int freezer_sync;
4982 +#endif
4983 diff --git a/kernel/power/tuxonice_alloc.c b/kernel/power/tuxonice_alloc.c
4984 new file mode 100644
4985 index 0000000..891c5b2
4986 --- /dev/null
4987 +++ b/kernel/power/tuxonice_alloc.c
4988 @@ -0,0 +1,313 @@
4989 +/*
4990 + * kernel/power/tuxonice_alloc.c
4991 + *
4992 + * Copyright (C) 2008-2010 Nigel Cunningham (nigel at tuxonice net)
4993 + *
4994 + * This file is released under the GPLv2.
4995 + *
4996 + */
4997 +
4998 +#ifdef CONFIG_PM_DEBUG
4999 +#include <linux/module.h>
5000 +#include <linux/slab.h>
5001 +#include "tuxonice_modules.h"
5002 +#include "tuxonice_alloc.h"
5003 +#include "tuxonice_sysfs.h"
5004 +#include "tuxonice.h"
5005 +
5006 +#define TOI_ALLOC_PATHS 40
5007 +
5008 +static DEFINE_MUTEX(toi_alloc_mutex);
5009 +
5010 +static struct toi_module_ops toi_alloc_ops;
5011 +
5012 +static int toi_fail_num;
5013 +static int trace_allocs;
5014 +static atomic_t toi_alloc_count[TOI_ALLOC_PATHS],
5015 +               toi_free_count[TOI_ALLOC_PATHS],
5016 +               toi_test_count[TOI_ALLOC_PATHS],
5017 +               toi_fail_count[TOI_ALLOC_PATHS];
5018 +static int toi_cur_allocd[TOI_ALLOC_PATHS], toi_max_allocd[TOI_ALLOC_PATHS];
5019 +static int cur_allocd, max_allocd;
5020 +
5021 +static char *toi_alloc_desc[TOI_ALLOC_PATHS] = {
5022 +       "", /* 0 */
5023 +       "get_io_info_struct",
5024 +       "extent",
5025 +       "extent (loading chain)",
5026 +       "userui channel",
5027 +       "userui arg", /* 5 */
5028 +       "attention list metadata",
5029 +       "extra pagedir memory metadata",
5030 +       "bdev metadata",
5031 +       "extra pagedir memory",
5032 +       "header_locations_read", /* 10 */
5033 +       "bio queue",
5034 +       "prepare_readahead",
5035 +       "i/o buffer",
5036 +       "writer buffer in bio_init",
5037 +       "checksum buffer", /* 15 */
5038 +       "compression buffer",
5039 +       "filewriter signature op",
5040 +       "set resume param alloc1",
5041 +       "set resume param alloc2",
5042 +       "debugging info buffer", /* 20 */
5043 +       "check can resume buffer",
5044 +       "write module config buffer",
5045 +       "read module config buffer",
5046 +       "write image header buffer",
5047 +       "read pageset1 buffer", /* 25 */
5048 +       "get_have_image_data buffer",
5049 +       "checksum page",
5050 +       "worker rw loop",
5051 +       "get nonconflicting page",
5052 +       "ps1 load addresses", /* 30 */
5053 +       "remove swap image",
5054 +       "swap image exists",
5055 +       "swap parse sig location",
5056 +       "sysfs kobj",
5057 +       "swap mark resume attempted buffer", /* 35 */
5058 +       "cluster member",
5059 +       "boot kernel data buffer",
5060 +       "setting swap signature",
5061 +       "block i/o bdev struct"
5062 +};
5063 +
5064 +#define MIGHT_FAIL(FAIL_NUM, FAIL_VAL) \
5065 +       do { \
5066 +               BUG_ON(FAIL_NUM >= TOI_ALLOC_PATHS); \
5067 +               \
5068 +               if (FAIL_NUM == toi_fail_num) { \
5069 +                       atomic_inc(&toi_test_count[FAIL_NUM]); \
5070 +                       toi_fail_num = 0; \
5071 +                       return FAIL_VAL; \
5072 +               } \
5073 +       } while (0)
5074 +
5075 +static void alloc_update_stats(int fail_num, void *result, int size)
5076 +{
5077 +       if (!result) {
5078 +               atomic_inc(&toi_fail_count[fail_num]);
5079 +               return;
5080 +       }
5081 +
5082 +       atomic_inc(&toi_alloc_count[fail_num]);
5083 +       if (unlikely(test_action_state(TOI_GET_MAX_MEM_ALLOCD))) {
5084 +               mutex_lock(&toi_alloc_mutex);
5085 +               toi_cur_allocd[fail_num]++;
5086 +               cur_allocd += size;
5087 +               if (unlikely(cur_allocd > max_allocd)) {
5088 +                       int i;
5089 +
5090 +                       for (i = 0; i < TOI_ALLOC_PATHS; i++)
5091 +                               toi_max_allocd[i] = toi_cur_allocd[i];
5092 +                       max_allocd = cur_allocd;
5093 +               }
5094 +               mutex_unlock(&toi_alloc_mutex);
5095 +       }
5096 +}
5097 +
5098 +static void free_update_stats(int fail_num, int size)
5099 +{
5100 +       BUG_ON(fail_num >= TOI_ALLOC_PATHS);
5101 +       atomic_inc(&toi_free_count[fail_num]);
5102 +       if (unlikely(atomic_read(&toi_free_count[fail_num]) >
5103 +                               atomic_read(&toi_alloc_count[fail_num])))
5104 +               dump_stack();
5105 +       if (unlikely(test_action_state(TOI_GET_MAX_MEM_ALLOCD))) {
5106 +               mutex_lock(&toi_alloc_mutex);
5107 +               cur_allocd -= size;
5108 +               toi_cur_allocd[fail_num]--;
5109 +               mutex_unlock(&toi_alloc_mutex);
5110 +       }
5111 +}
5112 +
5113 +void *toi_kzalloc(int fail_num, size_t size, gfp_t flags)
5114 +{
5115 +       void *result;
5116 +
5117 +       if (toi_alloc_ops.enabled)
5118 +               MIGHT_FAIL(fail_num, NULL);
5119 +       result = kzalloc(size, flags);
5120 +       if (toi_alloc_ops.enabled)
5121 +               alloc_update_stats(fail_num, result, size);
5122 +       if (fail_num == trace_allocs)
5123 +               dump_stack();
5124 +       return result;
5125 +}
5126 +EXPORT_SYMBOL_GPL(toi_kzalloc);
5127 +
5128 +unsigned long toi_get_free_pages(int fail_num, gfp_t mask,
5129 +               unsigned int order)
5130 +{
5131 +       unsigned long result;
5132 +
5133 +       if (toi_alloc_ops.enabled)
5134 +               MIGHT_FAIL(fail_num, 0);
5135 +       result = __get_free_pages(mask, order);
5136 +       if (toi_alloc_ops.enabled)
5137 +               alloc_update_stats(fail_num, (void *) result,
5138 +                               PAGE_SIZE << order);
5139 +       if (fail_num == trace_allocs)
5140 +               dump_stack();
5141 +       return result;
5142 +}
5143 +EXPORT_SYMBOL_GPL(toi_get_free_pages);
5144 +
5145 +struct page *toi_alloc_page(int fail_num, gfp_t mask)
5146 +{
5147 +       struct page *result;
5148 +
5149 +       if (toi_alloc_ops.enabled)
5150 +               MIGHT_FAIL(fail_num, NULL);
5151 +       result = alloc_page(mask);
5152 +       if (toi_alloc_ops.enabled)
5153 +               alloc_update_stats(fail_num, (void *) result, PAGE_SIZE);
5154 +       if (fail_num == trace_allocs)
5155 +               dump_stack();
5156 +       return result;
5157 +}
5158 +EXPORT_SYMBOL_GPL(toi_alloc_page);
5159 +
5160 +unsigned long toi_get_zeroed_page(int fail_num, gfp_t mask)
5161 +{
5162 +       unsigned long result;
5163 +
5164 +       if (fail_num == trace_allocs)
5165 +               dump_stack();
5166 +       if (toi_alloc_ops.enabled)
5167 +               MIGHT_FAIL(fail_num, 0);
5168 +       result = get_zeroed_page(mask);
5169 +       if (toi_alloc_ops.enabled)
5170 +               alloc_update_stats(fail_num, (void *) result, PAGE_SIZE);
5171 +       if (fail_num == trace_allocs)
5172 +               dump_stack();
5173 +       return result;
5174 +}
5175 +EXPORT_SYMBOL_GPL(toi_get_zeroed_page);
5176 +
5177 +void toi_kfree(int fail_num, const void *arg, int size)
5178 +{
5179 +       if (arg && toi_alloc_ops.enabled)
5180 +               free_update_stats(fail_num, size);
5181 +
5182 +       if (fail_num == trace_allocs)
5183 +               dump_stack();
5184 +       kfree(arg);
5185 +}
5186 +EXPORT_SYMBOL_GPL(toi_kfree);
5187 +
5188 +void toi_free_page(int fail_num, unsigned long virt)
5189 +{
5190 +       if (virt && toi_alloc_ops.enabled)
5191 +               free_update_stats(fail_num, PAGE_SIZE);
5192 +
5193 +       if (fail_num == trace_allocs)
5194 +               dump_stack();
5195 +       free_page(virt);
5196 +}
5197 +EXPORT_SYMBOL_GPL(toi_free_page);
5198 +
5199 +void toi__free_page(int fail_num, struct page *page)
5200 +{
5201 +       if (page && toi_alloc_ops.enabled)
5202 +               free_update_stats(fail_num, PAGE_SIZE);
5203 +
5204 +       if (fail_num == trace_allocs)
5205 +               dump_stack();
5206 +       __free_page(page);
5207 +}
5208 +EXPORT_SYMBOL_GPL(toi__free_page);
5209 +
5210 +void toi_free_pages(int fail_num, struct page *page, int order)
5211 +{
5212 +       if (page && toi_alloc_ops.enabled)
5213 +               free_update_stats(fail_num, PAGE_SIZE << order);
5214 +
5215 +       if (fail_num == trace_allocs)
5216 +               dump_stack();
5217 +       __free_pages(page, order);
5218 +}
5219 +
5220 +void toi_alloc_print_debug_stats(void)
5221 +{
5222 +       int i, header_done = 0;
5223 +
5224 +       if (!toi_alloc_ops.enabled)
5225 +               return;
5226 +
5227 +       for (i = 0; i < TOI_ALLOC_PATHS; i++)
5228 +               if (atomic_read(&toi_alloc_count[i]) !=
5229 +                   atomic_read(&toi_free_count[i])) {
5230 +                       if (!header_done) {
5231 +                               printk(KERN_INFO "Idx  Allocs   Frees   Tests "
5232 +                                       "  Fails     Max Description\n");
5233 +                               header_done = 1;
5234 +                       }
5235 +
5236 +                       printk(KERN_INFO "%3d %7d %7d %7d %7d %7d %s\n", i,
5237 +                               atomic_read(&toi_alloc_count[i]),
5238 +                               atomic_read(&toi_free_count[i]),
5239 +                               atomic_read(&toi_test_count[i]),
5240 +                               atomic_read(&toi_fail_count[i]),
5241 +                               toi_max_allocd[i],
5242 +                               toi_alloc_desc[i]);
5243 +               }
5244 +}
5245 +EXPORT_SYMBOL_GPL(toi_alloc_print_debug_stats);
5246 +
5247 +static int toi_alloc_initialise(int starting_cycle)
5248 +{
5249 +       int i;
5250 +
5251 +       if (!starting_cycle)
5252 +               return 0;
5253 +
5254 +       for (i = 0; i < TOI_ALLOC_PATHS; i++) {
5255 +               atomic_set(&toi_alloc_count[i], 0);
5256 +               atomic_set(&toi_free_count[i], 0);
5257 +               atomic_set(&toi_test_count[i], 0);
5258 +               atomic_set(&toi_fail_count[i], 0);
5259 +               toi_cur_allocd[i] = 0;
5260 +               toi_max_allocd[i] = 0;
5261 +       };
5262 +
5263 +       max_allocd = 0;
5264 +       cur_allocd = 0;
5265 +       return 0;
5266 +}
5267 +
5268 +static struct toi_sysfs_data sysfs_params[] = {
5269 +       SYSFS_INT("failure_test", SYSFS_RW, &toi_fail_num, 0, 99, 0, NULL),
5270 +       SYSFS_INT("trace", SYSFS_RW, &trace_allocs, 0, TOI_ALLOC_PATHS, 0,
5271 +                       NULL),
5272 +       SYSFS_BIT("find_max_mem_allocated", SYSFS_RW, &toi_bkd.toi_action,
5273 +                       TOI_GET_MAX_MEM_ALLOCD, 0),
5274 +       SYSFS_INT("enabled", SYSFS_RW, &toi_alloc_ops.enabled, 0, 1, 0,
5275 +                       NULL)
5276 +};
5277 +
5278 +static struct toi_module_ops toi_alloc_ops = {
5279 +       .type                                   = MISC_HIDDEN_MODULE,
5280 +       .name                                   = "allocation debugging",
5281 +       .directory                              = "alloc",
5282 +       .module                                 = THIS_MODULE,
5283 +       .early                                  = 1,
5284 +       .initialise                             = toi_alloc_initialise,
5285 +
5286 +       .sysfs_data             = sysfs_params,
5287 +       .num_sysfs_entries      = sizeof(sysfs_params) /
5288 +               sizeof(struct toi_sysfs_data),
5289 +};
5290 +
5291 +int toi_alloc_init(void)
5292 +{
5293 +       int result = toi_register_module(&toi_alloc_ops);
5294 +       return result;
5295 +}
5296 +
5297 +void toi_alloc_exit(void)
5298 +{
5299 +       toi_unregister_module(&toi_alloc_ops);
5300 +}
5301 +#endif
5302 diff --git a/kernel/power/tuxonice_alloc.h b/kernel/power/tuxonice_alloc.h
5303 new file mode 100644
5304 index 0000000..77e0f0d
5305 --- /dev/null
5306 +++ b/kernel/power/tuxonice_alloc.h
5307 @@ -0,0 +1,52 @@
5308 +/*
5309 + * kernel/power/tuxonice_alloc.h
5310 + *
5311 + * Copyright (C) 2008-2010 Nigel Cunningham (nigel at tuxonice net)
5312 + *
5313 + * This file is released under the GPLv2.
5314 + *
5315 + */
5316 +
5317 +#include <linux/slab.h>
5318 +#define TOI_WAIT_GFP (GFP_NOFS | __GFP_NOWARN)
5319 +#define TOI_ATOMIC_GFP (GFP_ATOMIC | __GFP_NOWARN)
5320 +
5321 +#ifdef CONFIG_PM_DEBUG
5322 +extern void *toi_kzalloc(int fail_num, size_t size, gfp_t flags);
5323 +extern void toi_kfree(int fail_num, const void *arg, int size);
5324 +
5325 +extern unsigned long toi_get_free_pages(int fail_num, gfp_t mask,
5326 +               unsigned int order);
5327 +#define toi_get_free_page(FAIL_NUM, MASK) toi_get_free_pages(FAIL_NUM, MASK, 0)
5328 +extern unsigned long toi_get_zeroed_page(int fail_num, gfp_t mask);
5329 +extern void toi_free_page(int fail_num, unsigned long buf);
5330 +extern void toi__free_page(int fail_num, struct page *page);
5331 +extern void toi_free_pages(int fail_num, struct page *page, int order);
5332 +extern struct page *toi_alloc_page(int fail_num, gfp_t mask);
5333 +extern int toi_alloc_init(void);
5334 +extern void toi_alloc_exit(void);
5335 +
5336 +extern void toi_alloc_print_debug_stats(void);
5337 +
5338 +#else /* CONFIG_PM_DEBUG */
5339 +
5340 +#define toi_kzalloc(FAIL, SIZE, FLAGS) (kzalloc(SIZE, FLAGS))
5341 +#define toi_kfree(FAIL, ALLOCN, SIZE) (kfree(ALLOCN))
5342 +
5343 +#define toi_get_free_pages(FAIL, FLAGS, ORDER) __get_free_pages(FLAGS, ORDER)
5344 +#define toi_get_free_page(FAIL, FLAGS) __get_free_page(FLAGS)
5345 +#define toi_get_zeroed_page(FAIL, FLAGS) get_zeroed_page(FLAGS)
5346 +#define toi_free_page(FAIL, ALLOCN) do { free_page(ALLOCN); } while (0)
5347 +#define toi__free_page(FAIL, PAGE) __free_page(PAGE)
5348 +#define toi_free_pages(FAIL, PAGE, ORDER) __free_pages(PAGE, ORDER)
5349 +#define toi_alloc_page(FAIL, MASK) alloc_page(MASK)
5350 +static inline int toi_alloc_init(void)
5351 +{
5352 +       return 0;
5353 +}
5354 +
5355 +static inline void toi_alloc_exit(void) { }
5356 +
5357 +static inline void toi_alloc_print_debug_stats(void) { }
5358 +
5359 +#endif
5360 diff --git a/kernel/power/tuxonice_atomic_copy.c b/kernel/power/tuxonice_atomic_copy.c
5361 new file mode 100644
5362 index 0000000..1807f8b
5363 --- /dev/null
5364 +++ b/kernel/power/tuxonice_atomic_copy.c
5365 @@ -0,0 +1,418 @@
5366 +/*
5367 + * kernel/power/tuxonice_atomic_copy.c
5368 + *
5369 + * Copyright 2004-2010 Nigel Cunningham (nigel at tuxonice net)
5370 + *
5371 + * Distributed under GPLv2.
5372 + *
5373 + * Routines for doing the atomic save/restore.
5374 + */
5375 +
5376 +#include <linux/suspend.h>
5377 +#include <linux/highmem.h>
5378 +#include <linux/cpu.h>
5379 +#include <linux/freezer.h>
5380 +#include <linux/console.h>
5381 +#include <asm/suspend.h>
5382 +#include "tuxonice.h"
5383 +#include "tuxonice_storage.h"
5384 +#include "tuxonice_power_off.h"
5385 +#include "tuxonice_ui.h"
5386 +#include "tuxonice_io.h"
5387 +#include "tuxonice_prepare_image.h"
5388 +#include "tuxonice_pageflags.h"
5389 +#include "tuxonice_checksum.h"
5390 +#include "tuxonice_builtin.h"
5391 +#include "tuxonice_atomic_copy.h"
5392 +#include "tuxonice_alloc.h"
5393 +#include "tuxonice_modules.h"
5394 +
5395 +unsigned long extra_pd1_pages_used;
5396 +
5397 +/**
5398 + * free_pbe_list - free page backup entries used by the atomic copy code.
5399 + * @list:      List to free.
5400 + * @highmem:   Whether the list is in highmem.
5401 + *
5402 + * Normally, this function isn't used. If, however, we need to abort before
5403 + * doing the atomic copy, we use this to free the pbes previously allocated.
5404 + **/
5405 +static void free_pbe_list(struct pbe **list, int highmem)
5406 +{
5407 +       while (*list) {
5408 +               int i;
5409 +               struct pbe *free_pbe, *next_page = NULL;
5410 +               struct page *page;
5411 +
5412 +               if (highmem) {
5413 +                       page = (struct page *) *list;
5414 +                       free_pbe = (struct pbe *) kmap(page);
5415 +               } else {
5416 +                       page = virt_to_page(*list);
5417 +                       free_pbe = *list;
5418 +               }
5419 +
5420 +               for (i = 0; i < PBES_PER_PAGE; i++) {
5421 +                       if (!free_pbe)
5422 +                               break;
5423 +                       if (highmem)
5424 +                               toi__free_page(29, free_pbe->address);
5425 +                       else
5426 +                               toi_free_page(29,
5427 +                                       (unsigned long) free_pbe->address);
5428 +                       free_pbe = free_pbe->next;
5429 +               }
5430 +
5431 +               if (highmem) {
5432 +                       if (free_pbe)
5433 +                               next_page = free_pbe;
5434 +                       kunmap(page);
5435 +               } else {
5436 +                       if (free_pbe)
5437 +                               next_page = free_pbe;
5438 +               }
5439 +
5440 +               toi__free_page(29, page);
5441 +               *list = (struct pbe *) next_page;
5442 +       };
5443 +}
5444 +
5445 +/**
5446 + * copyback_post - post atomic-restore actions
5447 + *
5448 + * After doing the atomic restore, we have a few more things to do:
5449 + *     1) We want to retain some values across the restore, so we now copy
5450 + *     these from the nosave variables to the normal ones.
5451 + *     2) Set the status flags.
5452 + *     3) Resume devices.
5453 + *     4) Tell userui so it can redraw & restore settings.
5454 + *     5) Reread the page cache.
5455 + **/
5456 +void copyback_post(void)
5457 +{
5458 +       struct toi_boot_kernel_data *bkd =
5459 +               (struct toi_boot_kernel_data *) boot_kernel_data_buffer;
5460 +
5461 +       /*
5462 +        * The boot kernel's data may be larger (newer version) or
5463 +        * smaller (older version) than ours. Copy the minimum
5464 +        * of the two sizes, so that we don't overwrite valid values
5465 +        * from pre-atomic copy.
5466 +        */
5467 +
5468 +       memcpy(&toi_bkd, (char *) boot_kernel_data_buffer,
5469 +                       min_t(int, sizeof(struct toi_boot_kernel_data),
5470 +                               bkd->size));
5471 +
5472 +       if (toi_activate_storage(1))
5473 +               panic("Failed to reactivate our storage.");
5474 +
5475 +       toi_post_atomic_restore_modules(bkd);
5476 +
5477 +       toi_cond_pause(1, "About to reload secondary pagedir.");
5478 +
5479 +       if (read_pageset2(0))
5480 +               panic("Unable to successfully reread the page cache.");
5481 +
5482 +       /*
5483 +        * If the user wants to sleep again after resuming from full-off,
5484 +        * it's most likely to be in order to suspend to ram, so we'll
5485 +        * do this check after loading pageset2, to give them the fastest
5486 +        * wakeup when they are ready to use the computer again.
5487 +        */
5488 +       toi_check_resleep();
5489 +}
5490 +
5491 +/**
5492 + * toi_copy_pageset1 - do the atomic copy of pageset1
5493 + *
5494 + * Make the atomic copy of pageset1. We can't use copy_page (as we once did)
5495 + * because we can't be sure what side effects it has. On my old Duron, with
5496 + * 3DNOW, kernel_fpu_begin increments preempt count, making our preempt
5497 + * count at resume time 4 instead of 3.
5498 + *
5499 + * We don't want to call kmap_atomic unconditionally because it has the side
5500 + * effect of incrementing the preempt count, which will leave it one too high
5501 + * post resume (the page containing the preempt count will be copied after
5502 + * its incremented. This is essentially the same problem.
5503 + **/
5504 +void toi_copy_pageset1(void)
5505 +{
5506 +       int i;
5507 +       unsigned long source_index, dest_index;
5508 +
5509 +       memory_bm_position_reset(pageset1_map);
5510 +       memory_bm_position_reset(pageset1_copy_map);
5511 +
5512 +       source_index = memory_bm_next_pfn(pageset1_map);
5513 +       dest_index = memory_bm_next_pfn(pageset1_copy_map);
5514 +
5515 +       for (i = 0; i < pagedir1.size; i++) {
5516 +               unsigned long *origvirt, *copyvirt;
5517 +               struct page *origpage, *copypage;
5518 +               int loop = (PAGE_SIZE / sizeof(unsigned long)) - 1,
5519 +                   was_present1, was_present2;
5520 +
5521 +               origpage = pfn_to_page(source_index);
5522 +               copypage = pfn_to_page(dest_index);
5523 +
5524 +               origvirt = PageHighMem(origpage) ?
5525 +                       kmap_atomic(origpage, KM_USER0) :
5526 +                       page_address(origpage);
5527 +
5528 +               copyvirt = PageHighMem(copypage) ?
5529 +                       kmap_atomic(copypage, KM_USER1) :
5530 +                       page_address(copypage);
5531 +
5532 +               was_present1 = kernel_page_present(origpage);
5533 +               if (!was_present1)
5534 +                       kernel_map_pages(origpage, 1, 1);
5535 +
5536 +               was_present2 = kernel_page_present(copypage);
5537 +               if (!was_present2)
5538 +                       kernel_map_pages(copypage, 1, 1);
5539 +
5540 +               while (loop >= 0) {
5541 +                       *(copyvirt + loop) = *(origvirt + loop);
5542 +                       loop--;
5543 +               }
5544 +
5545 +               if (!was_present1)
5546 +                       kernel_map_pages(origpage, 1, 0);
5547 +
5548 +               if (!was_present2)
5549 +                       kernel_map_pages(copypage, 1, 0);
5550 +
5551 +               if (PageHighMem(origpage))
5552 +                       kunmap_atomic(origvirt, KM_USER0);
5553 +
5554 +               if (PageHighMem(copypage))
5555 +                       kunmap_atomic(copyvirt, KM_USER1);
5556 +
5557 +               source_index = memory_bm_next_pfn(pageset1_map);
5558 +               dest_index = memory_bm_next_pfn(pageset1_copy_map);
5559 +       }
5560 +}
5561 +
5562 +/**
5563 + * __toi_post_context_save - steps after saving the cpu context
5564 + *
5565 + * Steps taken after saving the CPU state to make the actual
5566 + * atomic copy.
5567 + *
5568 + * Called from swsusp_save in snapshot.c via toi_post_context_save.
5569 + **/
5570 +int __toi_post_context_save(void)
5571 +{
5572 +       unsigned long old_ps1_size = pagedir1.size;
5573 +
5574 +       check_checksums();
5575 +
5576 +       free_checksum_pages();
5577 +
5578 +       toi_recalculate_image_contents(1);
5579 +
5580 +       extra_pd1_pages_used = pagedir1.size > old_ps1_size ?
5581 +               pagedir1.size - old_ps1_size : 0;
5582 +
5583 +       if (extra_pd1_pages_used > extra_pd1_pages_allowance) {
5584 +               printk(KERN_INFO "Pageset1 has grown by %lu pages. "
5585 +                       "extra_pages_allowance is currently only %lu.\n",
5586 +                       pagedir1.size - old_ps1_size,
5587 +                       extra_pd1_pages_allowance);
5588 +
5589 +               /*
5590 +                * Highlevel code will see this, clear the state and
5591 +                * retry if we haven't already done so twice.
5592 +                */
5593 +               set_abort_result(TOI_EXTRA_PAGES_ALLOW_TOO_SMALL);
5594 +               return 1;
5595 +       }
5596 +
5597 +       if (!test_action_state(TOI_TEST_FILTER_SPEED) &&
5598 +           !test_action_state(TOI_TEST_BIO))
5599 +               toi_copy_pageset1();
5600 +
5601 +       return 0;
5602 +}
5603 +
5604 +/**
5605 + * toi_hibernate - high level code for doing the atomic copy
5606 + *
5607 + * High-level code which prepares to do the atomic copy. Loosely based
5608 + * on the swsusp version, but with the following twists:
5609 + *     - We set toi_running so the swsusp code uses our code paths.
5610 + *     - We give better feedback regarding what goes wrong if there is a
5611 + *       problem.
5612 + *     - We use an extra function to call the assembly, just in case this code
5613 + *       is in a module (return address).
5614 + **/
5615 +int toi_hibernate(void)
5616 +{
5617 +       int error;
5618 +
5619 +       toi_running = 1; /* For the swsusp code we use :< */
5620 +
5621 +       error = toi_lowlevel_builtin();
5622 +
5623 +       toi_running = 0;
5624 +       return error;
5625 +}
5626 +
5627 +/**
5628 + * toi_atomic_restore - prepare to do the atomic restore
5629 + *
5630 + * Get ready to do the atomic restore. This part gets us into the same
5631 + * state we are in prior to do calling do_toi_lowlevel while
5632 + * hibernating: hot-unplugging secondary cpus and freeze processes,
5633 + * before starting the thread that will do the restore.
5634 + **/
5635 +int toi_atomic_restore(void)
5636 +{
5637 +       int error;
5638 +
5639 +       toi_running = 1;
5640 +
5641 +       toi_prepare_status(DONT_CLEAR_BAR,      "Atomic restore.");
5642 +
5643 +       memcpy(&toi_bkd.toi_nosave_commandline, saved_command_line,
5644 +               strlen(saved_command_line));
5645 +
5646 +       toi_pre_atomic_restore_modules(&toi_bkd);
5647 +
5648 +       if (add_boot_kernel_data_pbe())
5649 +               goto Failed;
5650 +
5651 +       toi_prepare_status(DONT_CLEAR_BAR, "Doing atomic copy/restore.");
5652 +
5653 +       if (toi_go_atomic(PMSG_QUIESCE, 0))
5654 +               goto Failed;
5655 +
5656 +       /* We'll ignore saved state, but this gets preempt count (etc) right */
5657 +       save_processor_state();
5658 +
5659 +       error = swsusp_arch_resume();
5660 +       /*
5661 +        * Code below is only ever reached in case of failure. Otherwise
5662 +        * execution continues at place where swsusp_arch_suspend was called.
5663 +        *
5664 +        * We don't know whether it's safe to continue (this shouldn't happen),
5665 +        * so lets err on the side of caution.
5666 +        */
5667 +       BUG();
5668 +
5669 +Failed:
5670 +       free_pbe_list(&restore_pblist, 0);
5671 +#ifdef CONFIG_HIGHMEM
5672 +       free_pbe_list(&restore_highmem_pblist, 1);
5673 +#endif
5674 +       toi_running = 0;
5675 +       return 1;
5676 +}
5677 +
5678 +/**
5679 + * toi_go_atomic - do the actual atomic copy/restore
5680 + * @state:        The state to use for dpm_suspend_start & power_down calls.
5681 + * @suspend_time:  Whether we're suspending or resuming.
5682 + **/
5683 +int toi_go_atomic(pm_message_t state, int suspend_time)
5684 +{
5685 +       if (suspend_time && platform_begin(1)) {
5686 +               set_abort_result(TOI_PLATFORM_PREP_FAILED);
5687 +               return 1;
5688 +       }
5689 +
5690 +       suspend_console();
5691 +
5692 +       if (dpm_suspend_start(state)) {
5693 +               set_abort_result(TOI_DEVICE_REFUSED);
5694 +               toi_end_atomic(ATOMIC_STEP_DEVICE_RESUME, suspend_time, 3);
5695 +               return 1;
5696 +       }
5697 +
5698 +       if (suspend_time && arch_prepare_suspend()) {
5699 +               set_abort_result(TOI_ARCH_PREPARE_FAILED);
5700 +               toi_end_atomic(ATOMIC_STEP_DEVICE_RESUME, suspend_time, 1);
5701 +               return 1;
5702 +       }
5703 +
5704 +       /* At this point, dpm_suspend_start() has been called, but *not*
5705 +        * dpm_suspend_noirq(). We *must* dpm_suspend_noirq() now.
5706 +        * Otherwise, drivers for some devices (e.g. interrupt controllers)
5707 +        * become desynchronized with the actual state of the hardware
5708 +        * at resume time, and evil weirdness ensues.
5709 +        */
5710 +
5711 +       if (dpm_suspend_noirq(state)) {
5712 +               set_abort_result(TOI_DEVICE_REFUSED);
5713 +               toi_end_atomic(ATOMIC_STEP_DEVICE_RESUME, suspend_time, 1);
5714 +               return 1;
5715 +       }
5716 +
5717 +       if (suspend_time && platform_pre_snapshot(1)) {
5718 +               set_abort_result(TOI_PRE_SNAPSHOT_FAILED);
5719 +               toi_end_atomic(ATOMIC_STEP_PLATFORM_FINISH, suspend_time, 1);
5720 +               return 1;
5721 +       }
5722 +
5723 +       if (!suspend_time && platform_pre_restore(1)) {
5724 +               set_abort_result(TOI_PRE_RESTORE_FAILED);
5725 +               toi_end_atomic(ATOMIC_STEP_PLATFORM_FINISH, suspend_time, 1);
5726 +               return 1;
5727 +       }
5728 +
5729 +       if (test_action_state(TOI_LATE_CPU_HOTPLUG)) {
5730 +               if (disable_nonboot_cpus()) {
5731 +                       set_abort_result(TOI_CPU_HOTPLUG_FAILED);
5732 +                       toi_end_atomic(ATOMIC_STEP_CPU_HOTPLUG,
5733 +                                       suspend_time, 1);
5734 +                       return 1;
5735 +               }
5736 +       }
5737 +
5738 +       local_irq_disable();
5739 +
5740 +       if (sysdev_suspend(state)) {
5741 +               set_abort_result(TOI_SYSDEV_REFUSED);
5742 +               toi_end_atomic(ATOMIC_STEP_IRQS, suspend_time, 1);
5743 +               return 1;
5744 +       }
5745 +
5746 +       return 0;
5747 +}
5748 +
5749 +/**
5750 + * toi_end_atomic - post atomic copy/restore routines
5751 + * @stage:             What step to start at.
5752 + * @suspend_time:      Whether we're suspending or resuming.
5753 + * @error:             Whether we're recovering from an error.
5754 + **/
5755 +void toi_end_atomic(int stage, int suspend_time, int error)
5756 +{
5757 +       switch (stage) {
5758 +       case ATOMIC_ALL_STEPS:
5759 +               if (!suspend_time)
5760 +                       platform_leave(1);
5761 +               sysdev_resume();
5762 +       case ATOMIC_STEP_IRQS:
5763 +               local_irq_enable();
5764 +       case ATOMIC_STEP_CPU_HOTPLUG:
5765 +               if (test_action_state(TOI_LATE_CPU_HOTPLUG))
5766 +                       enable_nonboot_cpus();
5767 +               platform_restore_cleanup(1);
5768 +       case ATOMIC_STEP_PLATFORM_FINISH:
5769 +               platform_finish(1);
5770 +               dpm_resume_noirq(suspend_time ?
5771 +                       (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
5772 +       case ATOMIC_STEP_DEVICE_RESUME:
5773 +               if (suspend_time && (error & 2))
5774 +                       platform_recover(1);
5775 +               dpm_resume_end(suspend_time ?
5776 +                       ((error & 1) ? PMSG_RECOVER : PMSG_THAW) :
5777 +                       PMSG_RESTORE);
5778 +               resume_console();
5779 +               platform_end(1);
5780 +
5781 +               toi_prepare_status(DONT_CLEAR_BAR, "Post atomic.");
5782 +       }
5783 +}
5784 diff --git a/kernel/power/tuxonice_atomic_copy.h b/kernel/power/tuxonice_atomic_copy.h
5785 new file mode 100644
5786 index 0000000..e61b27b
5787 --- /dev/null
5788 +++ b/kernel/power/tuxonice_atomic_copy.h
5789 @@ -0,0 +1,20 @@
5790 +/*
5791 + * kernel/power/tuxonice_atomic_copy.h
5792 + *
5793 + * Copyright 2008-2010 Nigel Cunningham (nigel at tuxonice net)
5794 + *
5795 + * Distributed under GPLv2.
5796 + *
5797 + * Routines for doing the atomic save/restore.
5798 + */
5799 +
5800 +enum {
5801 +       ATOMIC_ALL_STEPS,
5802 +       ATOMIC_STEP_IRQS,
5803 +       ATOMIC_STEP_CPU_HOTPLUG,
5804 +       ATOMIC_STEP_PLATFORM_FINISH,
5805 +       ATOMIC_STEP_DEVICE_RESUME,
5806 +};
5807 +
5808 +int toi_go_atomic(pm_message_t state, int toi_time);
5809 +void toi_end_atomic(int stage, int toi_time, int error);
5810 diff --git a/kernel/power/tuxonice_bio.h b/kernel/power/tuxonice_bio.h
5811 new file mode 100644
5812 index 0000000..9627ccc
5813 --- /dev/null
5814 +++ b/kernel/power/tuxonice_bio.h
5815 @@ -0,0 +1,77 @@
5816 +/*
5817 + * kernel/power/tuxonice_bio.h
5818 + *
5819 + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net)
5820 + *
5821 + * Distributed under GPLv2.
5822 + *
5823 + * This file contains declarations for functions exported from
5824 + * tuxonice_bio.c, which contains low level io functions.
5825 + */
5826 +
5827 +#include <linux/buffer_head.h>
5828 +#include "tuxonice_extent.h"
5829 +
5830 +void toi_put_extent_chain(struct hibernate_extent_chain *chain);
5831 +int toi_add_to_extent_chain(struct hibernate_extent_chain *chain,
5832 +               unsigned long start, unsigned long end);
5833 +
5834 +struct hibernate_extent_saved_state {
5835 +       int extent_num;
5836 +       struct hibernate_extent *extent_ptr;
5837 +       unsigned long offset;
5838 +};
5839 +
5840 +struct toi_bdev_info {
5841 +       struct toi_bdev_info *next;
5842 +       struct hibernate_extent_chain blocks;
5843 +       struct block_device *bdev;
5844 +       struct toi_module_ops *allocator;
5845 +       int allocator_index;
5846 +       struct hibernate_extent_chain allocations;
5847 +       char name[266]; /* "swap on " or "file " + up to 256 chars */
5848 +
5849 +       /* Saved in header */
5850 +       char uuid[17];
5851 +       dev_t dev_t;
5852 +       int prio;
5853 +       int bmap_shift;
5854 +       int blocks_per_page;
5855 +       unsigned long pages_used;
5856 +       struct hibernate_extent_saved_state saved_state[4];
5857 +};
5858 +
5859 +struct toi_extent_iterate_state {
5860 +       struct toi_bdev_info *current_chain;
5861 +       int num_chains;
5862 +       int saved_chain_number[4];
5863 +       struct toi_bdev_info *saved_chain_ptr[4];
5864 +};
5865 +
5866 +/*
5867 + * Our exported interface so the swapwriter and filewriter don't
5868 + * need these functions duplicated.
5869 + */
5870 +struct toi_bio_ops {
5871 +       int (*bdev_page_io) (int rw, struct block_device *bdev, long pos,
5872 +                       struct page *page);
5873 +       int (*register_storage)(struct toi_bdev_info *new);
5874 +       void (*free_storage)(void);
5875 +};
5876 +
5877 +struct toi_allocator_ops {
5878 +       unsigned long (*toi_swap_storage_available) (void);
5879 +};
5880 +
5881 +extern struct toi_bio_ops toi_bio_ops;
5882 +
5883 +extern char *toi_writer_buffer;
5884 +extern int toi_writer_buffer_posn;
5885 +
5886 +struct toi_bio_allocator_ops {
5887 +       int (*register_storage) (void);
5888 +       unsigned long (*storage_available)(void);
5889 +       int (*allocate_storage) (struct toi_bdev_info *, unsigned long);
5890 +       int (*bmap) (struct toi_bdev_info *);
5891 +       void (*free_storage) (struct toi_bdev_info *);
5892 +};
5893 diff --git a/kernel/power/tuxonice_bio_chains.c b/kernel/power/tuxonice_bio_chains.c
5894 new file mode 100644
5895 index 0000000..2ac2042
5896 --- /dev/null
5897 +++ b/kernel/power/tuxonice_bio_chains.c
5898 @@ -0,0 +1,1044 @@
5899 +/*
5900 + * kernel/power/tuxonice_bio_devinfo.c
5901 + *
5902 + * Copyright (C) 2009-2010 Nigel Cunningham (nigel at tuxonice net)
5903 + *
5904 + * Distributed under GPLv2.
5905 + *
5906 + */
5907 +
5908 +#include <linux/mm_types.h>
5909 +#include "tuxonice_bio.h"
5910 +#include "tuxonice_bio_internal.h"
5911 +#include "tuxonice_alloc.h"
5912 +#include "tuxonice_ui.h"
5913 +#include "tuxonice.h"
5914 +#include "tuxonice_io.h"
5915 +
5916 +static struct toi_bdev_info *prio_chain_head;
5917 +static int num_chains;
5918 +
5919 +/* Pointer to current entry being loaded/saved. */
5920 +struct toi_extent_iterate_state toi_writer_posn;
5921 +
5922 +#define metadata_size (sizeof(struct toi_bdev_info) - \
5923 +               offsetof(struct toi_bdev_info, uuid))
5924 +
5925 +/*
5926 + * After section 0 (header) comes 2 => next_section[0] = 2
5927 + */
5928 +static int next_section[3] = { 2, 3, 1 };
5929 +
5930 +/**
5931 + * dump_block_chains - print the contents of the bdev info array.
5932 + **/
5933 +void dump_block_chains(void)
5934 +{
5935 +       int i = 0;
5936 +       int j;
5937 +       struct toi_bdev_info *cur_chain = prio_chain_head;
5938 +
5939 +       while (cur_chain) {
5940 +               struct hibernate_extent *this = cur_chain->blocks.first;
5941 +
5942 +               printk(KERN_DEBUG "Chain %d (prio %d):", i, cur_chain->prio);
5943 +
5944 +               while (this) {
5945 +                       printk(KERN_CONT " [%lu-%lu]%s", this->start,
5946 +                                       this->end, this->next ? "," : "");
5947 +                       this = this->next;
5948 +               }
5949 +
5950 +               printk("\n");
5951 +               cur_chain = cur_chain->next;
5952 +               i++;
5953 +       }
5954 +
5955 +       printk(KERN_DEBUG "Saved states:\n");
5956 +       for (i = 0; i < 4; i++) {
5957 +               printk(KERN_DEBUG "Slot %d: Chain %d.\n",
5958 +                       i, toi_writer_posn.saved_chain_number[i]);
5959 +
5960 +               cur_chain = prio_chain_head;
5961 +               j = 0;
5962 +               while (cur_chain) {
5963 +                       printk(KERN_DEBUG " Chain %d: Extent %d. Offset %lu.\n",
5964 +                                       j, cur_chain->saved_state[i].extent_num,
5965 +                                       cur_chain->saved_state[i].offset);
5966 +                       cur_chain = cur_chain->next;
5967 +                       j++;
5968 +               }
5969 +               printk(KERN_CONT "\n");
5970 +       }
5971 +}
5972 +
5973 +/**
5974 + *
5975 + **/
5976 +static void toi_extent_chain_next(void)
5977 +{
5978 +       struct toi_bdev_info *this = toi_writer_posn.current_chain;
5979 +
5980 +       if (!this->blocks.current_extent)
5981 +               return;
5982 +
5983 +       if (this->blocks.current_offset == this->blocks.current_extent->end) {
5984 +               if (this->blocks.current_extent->next) {
5985 +                       this->blocks.current_extent =
5986 +                               this->blocks.current_extent->next;
5987 +                       this->blocks.current_offset =
5988 +                               this->blocks.current_extent->start;
5989 +               } else {
5990 +                       this->blocks.current_extent = NULL;
5991 +                       this->blocks.current_offset = 0;
5992 +               }
5993 +       } else
5994 +               this->blocks.current_offset++;
5995 +}
5996 +
5997 +/**
5998 + *
5999 + */
6000 +
6001 +static struct toi_bdev_info *__find_next_chain_same_prio(void)
6002 +{
6003 +       struct toi_bdev_info *start_chain = toi_writer_posn.current_chain;
6004 +       struct toi_bdev_info *this = start_chain;
6005 +       int orig_prio = this->prio;
6006 +
6007 +       do {
6008 +               this = this->next;
6009 +
6010 +               if (!this)
6011 +                       this = prio_chain_head;
6012 +
6013 +               /* Back on original chain? Use it again. */
6014 +               if (this == start_chain)
6015 +                       return start_chain;
6016 +
6017 +       } while (!this->blocks.current_extent || this->prio != orig_prio);
6018 +
6019 +       return this;
6020 +}
6021 +
6022 +static void find_next_chain(void)
6023 +{
6024 +       struct toi_bdev_info *this;
6025 +
6026 +       this = __find_next_chain_same_prio();
6027 +
6028 +       /*
6029 +        * If we didn't get another chain of the same priority that we
6030 +        * can use, look for the next priority.
6031 +        */
6032 +       while (this && !this->blocks.current_extent)
6033 +               this = this->next;
6034 +
6035 +       toi_writer_posn.current_chain = this;
6036 +}
6037 +
6038 +/**
6039 + * toi_extent_state_next - go to the next extent
6040 + * @blocks: The number of values to progress.
6041 + * @stripe_mode: Whether to spread usage across all chains.
6042 + *
6043 + * Given a state, progress to the next valid entry. We may begin in an
6044 + * invalid state, as we do when invoked after extent_state_goto_start below.
6045 + *
6046 + * When using compression and expected_compression > 0, we let the image size
6047 + * be larger than storage, so we can validly run out of data to return.
6048 + **/
6049 +static unsigned long toi_extent_state_next(int blocks, int current_stream)
6050 +{
6051 +       int i;
6052 +
6053 +       if (!toi_writer_posn.current_chain)
6054 +               return -ENOSPC;
6055 +
6056 +       /* Assume chains always have lengths that are multiples of @blocks */
6057 +       for (i = 0; i < blocks; i++)
6058 +               toi_extent_chain_next();
6059 +
6060 +       /* The header stream is not striped */
6061 +       if (current_stream ||
6062 +           !toi_writer_posn.current_chain->blocks.current_extent)
6063 +               find_next_chain();
6064 +
6065 +       return  toi_writer_posn.current_chain ? 0 : -ENOSPC;
6066 +}
6067 +
6068 +static void toi_insert_chain_in_prio_list(struct toi_bdev_info *this)
6069 +{
6070 +       struct toi_bdev_info **prev_ptr;
6071 +       struct toi_bdev_info *cur;
6072 +
6073 +       /* Loop through the existing chain, finding where to insert it */
6074 +       prev_ptr = &prio_chain_head;
6075 +       cur = prio_chain_head;
6076 +
6077 +       while (cur && cur->prio >= this->prio) {
6078 +               prev_ptr = &cur->next;
6079 +               cur = cur->next;
6080 +       }
6081 +
6082 +       this->next = *prev_ptr;
6083 +       *prev_ptr = this;
6084 +
6085 +       this = prio_chain_head;
6086 +       while (this)
6087 +               this = this->next;
6088 +       num_chains++;
6089 +}
6090 +
6091 +/**
6092 + * toi_extent_state_goto_start - reinitialize an extent chain iterator
6093 + * @state:     Iterator to reinitialize
6094 + **/
6095 +void toi_extent_state_goto_start(void)
6096 +{
6097 +       struct toi_bdev_info *this = prio_chain_head;
6098 +
6099 +       while (this) {
6100 +               toi_message(TOI_IO, TOI_VERBOSE, 0,
6101 +                       "Setting current extent to %p.", this->blocks.first);
6102 +               this->blocks.current_extent = this->blocks.first;
6103 +               if (this->blocks.current_extent) {
6104 +                       toi_message(TOI_IO, TOI_VERBOSE, 0,
6105 +                                       "Setting current offset to %lu.",
6106 +                                       this->blocks.current_extent->start);
6107 +                       this->blocks.current_offset =
6108 +                               this->blocks.current_extent->start;
6109 +               }
6110 +
6111 +               this = this->next;
6112 +       }
6113 +
6114 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "Setting current chain to %p.",
6115 +                       prio_chain_head);
6116 +       toi_writer_posn.current_chain = prio_chain_head;
6117 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "Leaving extent state goto start.");
6118 +}
6119 +
6120 +/**
6121 + * toi_extent_state_save - save state of the iterator
6122 + * @state:             Current state of the chain
6123 + * @saved_state:       Iterator to populate
6124 + *
6125 + * Given a state and a struct hibernate_extent_state_store, save the current
6126 + * position in a format that can be used with relocated chains (at
6127 + * resume time).
6128 + **/
6129 +void toi_extent_state_save(int slot)
6130 +{
6131 +       struct toi_bdev_info *cur_chain = prio_chain_head;
6132 +       struct hibernate_extent *extent;
6133 +       struct hibernate_extent_saved_state *chain_state;
6134 +       int i = 0;
6135 +
6136 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_extent_state_save, slot %d.",
6137 +                       slot);
6138 +
6139 +       if (!toi_writer_posn.current_chain) {
6140 +               toi_message(TOI_IO, TOI_VERBOSE, 0, "No current chain => "
6141 +                               "chain_num = -1.");
6142 +               toi_writer_posn.saved_chain_number[slot] = -1;
6143 +               return;
6144 +       }
6145 +
6146 +       while (cur_chain) {
6147 +               i++;
6148 +               toi_message(TOI_IO, TOI_VERBOSE, 0, "Saving chain %d (%p) "
6149 +                               "state, slot %d.", i, cur_chain, slot);
6150 +
6151 +               chain_state = &cur_chain->saved_state[slot];
6152 +
6153 +               chain_state->offset = cur_chain->blocks.current_offset;
6154 +
6155 +               if (toi_writer_posn.current_chain == cur_chain) {
6156 +                       toi_writer_posn.saved_chain_number[slot] = i;
6157 +                       toi_message(TOI_IO, TOI_VERBOSE, 0, "This is the chain "
6158 +                                       "we were on => chain_num is %d.", i);
6159 +               }
6160 +
6161 +               if (!cur_chain->blocks.current_extent) {
6162 +                       chain_state->extent_num = 0;
6163 +                       toi_message(TOI_IO, TOI_VERBOSE, 0, "No current extent "
6164 +                                       "for this chain => extent_num %d is 0.",
6165 +                                       i);
6166 +                       cur_chain = cur_chain->next;
6167 +                       continue;
6168 +               }
6169 +
6170 +               extent = cur_chain->blocks.first;
6171 +               chain_state->extent_num = 1;
6172 +
6173 +               while (extent != cur_chain->blocks.current_extent) {
6174 +                       chain_state->extent_num++;
6175 +                       extent = extent->next;
6176 +               }
6177 +
6178 +               toi_message(TOI_IO, TOI_VERBOSE, 0, "extent num %d is %d.", i,
6179 +                               chain_state->extent_num);
6180 +
6181 +               cur_chain = cur_chain->next;
6182 +       }
6183 +       toi_message(TOI_IO, TOI_VERBOSE, 0,
6184 +                       "Completed saving extent state slot %d.", slot);
6185 +}
6186 +
6187 +/**
6188 + * toi_extent_state_restore - restore the position saved by extent_state_save
6189 + * @state:             State to populate
6190 + * @saved_state:       Iterator saved to restore
6191 + **/
6192 +void toi_extent_state_restore(int slot)
6193 +{
6194 +       int i = 0;
6195 +       struct toi_bdev_info *cur_chain = prio_chain_head;
6196 +       struct hibernate_extent_saved_state *chain_state;
6197 +
6198 +       toi_message(TOI_IO, TOI_VERBOSE, 0,
6199 +                       "toi_extent_state_restore - slot %d.", slot);
6200 +
6201 +       if (toi_writer_posn.saved_chain_number[slot] == -1) {
6202 +               toi_writer_posn.current_chain = NULL;
6203 +               return;
6204 +       }
6205 +
6206 +       while (cur_chain) {
6207 +               int posn;
6208 +               int j;
6209 +               i++;
6210 +               toi_message(TOI_IO, TOI_VERBOSE, 0, "Restoring chain %d (%p) "
6211 +                               "state, slot %d.", i, cur_chain, slot);
6212 +
6213 +               chain_state = &cur_chain->saved_state[slot];
6214 +
6215 +               posn = chain_state->extent_num;
6216 +
6217 +               cur_chain->blocks.current_extent = cur_chain->blocks.first;
6218 +               cur_chain->blocks.current_offset = chain_state->offset;
6219 +
6220 +               if (i == toi_writer_posn.saved_chain_number[slot]) {
6221 +                       toi_writer_posn.current_chain = cur_chain;
6222 +                       toi_message(TOI_IO, TOI_VERBOSE, 0,
6223 +                                       "Found current chain.");
6224 +               }
6225 +
6226 +               for (j = 0; j < 4; j++)
6227 +                       if (i == toi_writer_posn.saved_chain_number[j]) {
6228 +                               toi_writer_posn.saved_chain_ptr[j] = cur_chain;
6229 +                               toi_message(TOI_IO, TOI_VERBOSE, 0,
6230 +                                       "Found saved chain ptr %d (%p) (offset"
6231 +                                       " %d).", j, cur_chain,
6232 +                                       cur_chain->saved_state[j].offset);
6233 +                       }
6234 +
6235 +               if (posn) {
6236 +                       while (--posn)
6237 +                               cur_chain->blocks.current_extent =
6238 +                                       cur_chain->blocks.current_extent->next;
6239 +               } else
6240 +                       cur_chain->blocks.current_extent = NULL;
6241 +
6242 +               cur_chain = cur_chain->next;
6243 +       }
6244 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "Done.");
6245 +       if (test_action_state(TOI_LOGALL))
6246 +               dump_block_chains();
6247 +}
6248 +
6249 +/*
6250 + * Storage needed
6251 + *
6252 + * Returns amount of space in the image header required
6253 + * for the chain data. This ignores the links between
6254 + * pages, which we factor in when allocating the space.
6255 + */
6256 +int toi_bio_devinfo_storage_needed(void)
6257 +{
6258 +       int result = sizeof(num_chains);
6259 +       struct toi_bdev_info *chain = prio_chain_head;
6260 +
6261 +       while (chain) {
6262 +               result += metadata_size;
6263 +
6264 +               /* Chain size */
6265 +               result += sizeof(int);
6266 +
6267 +               /* Extents */
6268 +               result += (2 * sizeof(unsigned long) *
6269 +                       chain->blocks.num_extents);
6270 +
6271 +               chain = chain->next;
6272 +       }
6273 +
6274 +       result += 4 * sizeof(int);
6275 +       return result;
6276 +}
6277 +
6278 +static unsigned long chain_pages_used(struct toi_bdev_info *chain)
6279 +{
6280 +       struct hibernate_extent *this = chain->blocks.first;
6281 +       struct hibernate_extent_saved_state *state = &chain->saved_state[3];
6282 +       unsigned long size = 0;
6283 +       int extent_idx = 1;
6284 +
6285 +       if (!state->extent_num) {
6286 +               if (!this)
6287 +                       return 0;
6288 +               else
6289 +                       return chain->blocks.size;
6290 +       }
6291 +
6292 +       while (extent_idx < state->extent_num) {
6293 +               size += (this->end - this->start + 1);
6294 +               this = this->next;
6295 +               extent_idx++;
6296 +       }
6297 +
6298 +       /* We didn't use the one we're sitting on, so don't count it */
6299 +       return size + state->offset - this->start;
6300 +}
6301 +
6302 +/**
6303 + * toi_serialise_extent_chain - write a chain in the image
6304 + * @chain:     Chain to write.
6305 + **/
6306 +static int toi_serialise_extent_chain(struct toi_bdev_info *chain)
6307 +{
6308 +       struct hibernate_extent *this;
6309 +       int ret;
6310 +       int i = 1;
6311 +
6312 +       chain->pages_used = chain_pages_used(chain);
6313 +
6314 +       if (test_action_state(TOI_LOGALL))
6315 +               dump_block_chains();
6316 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "Serialising chain (dev_t %lx).",
6317 +                       chain->dev_t);
6318 +       /* Device info -  dev_t, prio, bmap_shift, blocks per page, positions */
6319 +       ret = toiActiveAllocator->rw_header_chunk(WRITE, &toi_blockwriter_ops,
6320 +                       (char *) &chain->uuid, metadata_size);
6321 +       if (ret)
6322 +               return ret;
6323 +
6324 +       /* Num extents */
6325 +       ret = toiActiveAllocator->rw_header_chunk(WRITE, &toi_blockwriter_ops,
6326 +                       (char *) &chain->blocks.num_extents, sizeof(int));
6327 +       if (ret)
6328 +               return ret;
6329 +
6330 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "%d extents.",
6331 +                       chain->blocks.num_extents);
6332 +
6333 +       this = chain->blocks.first;
6334 +       while (this) {
6335 +               toi_message(TOI_IO, TOI_VERBOSE, 0, "Extent %d.", i);
6336 +               ret = toiActiveAllocator->rw_header_chunk(WRITE,
6337 +                               &toi_blockwriter_ops,
6338 +                               (char *) this, 2 * sizeof(this->start));
6339 +               if (ret)
6340 +                       return ret;
6341 +               this = this->next;
6342 +               i++;
6343 +       }
6344 +
6345 +       return ret;
6346 +}
6347 +
6348 +int toi_serialise_extent_chains(void)
6349 +{
6350 +       struct toi_bdev_info *this = prio_chain_head;
6351 +       int result;
6352 +
6353 +       /* Write the number of chains */
6354 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "Write number of chains (%d)",
6355 +                       num_chains);
6356 +       result = toiActiveAllocator->rw_header_chunk(WRITE,
6357 +                       &toi_blockwriter_ops, (char *) &num_chains,
6358 +                       sizeof(int));
6359 +       if (result)
6360 +               return result;
6361 +
6362 +       /* Then the chains themselves */
6363 +       while (this) {
6364 +               result = toi_serialise_extent_chain(this);
6365 +               if (result)
6366 +                       return result;
6367 +               this = this->next;
6368 +       }
6369 +
6370 +       /*
6371 +        * Finally, the chain we should be on at the start of each
6372 +        * section.
6373 +        */
6374 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "Saved chain numbers.");
6375 +       result = toiActiveAllocator->rw_header_chunk(WRITE,
6376 +                       &toi_blockwriter_ops,
6377 +                       (char *) &toi_writer_posn.saved_chain_number[0],
6378 +                       4 * sizeof(int));
6379 +
6380 +       return result;
6381 +}
6382 +
6383 +int toi_register_storage_chain(struct toi_bdev_info *new)
6384 +{
6385 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "Inserting chain %p into list.",
6386 +                       new);
6387 +       toi_insert_chain_in_prio_list(new);
6388 +       return 0;
6389 +}
6390 +
6391 +static void free_bdev_info(struct toi_bdev_info *chain)
6392 +{
6393 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "Free chain %p.", chain);
6394 +
6395 +       toi_message(TOI_IO, TOI_VERBOSE, 0, " - Block extents.");
6396 +       toi_put_extent_chain(&chain->blocks);
6397 +
6398 +       /*
6399 +        * The allocator may need to do more than just free the chains
6400 +        * (swap_free, for example). Don't call from boot kernel.
6401 +        */
6402 +       toi_message(TOI_IO, TOI_VERBOSE, 0, " - Allocator extents.");
6403 +       if (chain->allocator)
6404 +               chain->allocator->bio_allocator_ops->free_storage(chain);
6405 +
6406 +       /*
6407 +        * Dropping out of reading atomic copy? Need to undo
6408 +        * toi_open_by_devnum.
6409 +        */
6410 +       toi_message(TOI_IO, TOI_VERBOSE, 0, " - Bdev.");
6411 +       if (chain->bdev && !IS_ERR(chain->bdev) &&
6412 +                       chain->bdev != resume_block_device &&
6413 +                       chain->bdev != header_block_device &&
6414 +                       test_toi_state(TOI_TRYING_TO_RESUME))
6415 +               toi_close_bdev(chain->bdev);
6416 +
6417 +       /* Poison */
6418 +       toi_message(TOI_IO, TOI_VERBOSE, 0, " - Struct.");
6419 +       toi_kfree(39, chain, sizeof(*chain));
6420 +
6421 +       if (prio_chain_head == chain)
6422 +               prio_chain_head = NULL;
6423 +
6424 +       num_chains--;
6425 +}
6426 +
6427 +void free_all_bdev_info(void)
6428 +{
6429 +       struct toi_bdev_info *this = prio_chain_head;
6430 +
6431 +       while (this) {
6432 +               struct toi_bdev_info *next = this->next;
6433 +               free_bdev_info(this);
6434 +               this = next;
6435 +       }
6436 +
6437 +       memset((char *) &toi_writer_posn, 0, sizeof(toi_writer_posn));
6438 +       prio_chain_head = NULL;
6439 +}
6440 +
6441 +static void set_up_start_position(void)
6442 +{
6443 +       toi_writer_posn.current_chain = prio_chain_head;
6444 +       go_next_page(0, 0);
6445 +}
6446 +
6447 +/**
6448 + * toi_load_extent_chain - read back a chain saved in the image
6449 + * @chain:     Chain to load
6450 + *
6451 + * The linked list of extents is reconstructed from the disk. chain will point
6452 + * to the first entry.
6453 + **/
6454 +int toi_load_extent_chain(int index, int *num_loaded)
6455 +{
6456 +       struct toi_bdev_info *chain = toi_kzalloc(39,
6457 +                       sizeof(struct toi_bdev_info), GFP_ATOMIC);
6458 +       struct hibernate_extent *this, *last = NULL;
6459 +       int i, ret;
6460 +
6461 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "Loading extent chain %d.", index);
6462 +       /* Get dev_t, prio, bmap_shift, blocks per page, positions */
6463 +       ret = toiActiveAllocator->rw_header_chunk_noreadahead(READ, NULL,
6464 +                       (char *) &chain->uuid, metadata_size);
6465 +
6466 +       if (ret) {
6467 +               printk(KERN_ERR "Failed to read the size of extent chain.\n");
6468 +               toi_kfree(39, chain, sizeof(*chain));
6469 +               return 1;
6470 +       }
6471 +
6472 +       toi_bkd.pages_used[index] = chain->pages_used;
6473 +
6474 +       ret = toiActiveAllocator->rw_header_chunk_noreadahead(READ, NULL,
6475 +                       (char *) &chain->blocks.num_extents, sizeof(int));
6476 +       if (ret) {
6477 +               printk(KERN_ERR "Failed to read the size of extent chain.\n");
6478 +               toi_kfree(39, chain, sizeof(*chain));
6479 +               return 1;
6480 +       }
6481 +
6482 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "%d extents.",
6483 +                       chain->blocks.num_extents);
6484 +
6485 +       for (i = 0; i < chain->blocks.num_extents; i++) {
6486 +               toi_message(TOI_IO, TOI_VERBOSE, 0, "Extent %d.", i + 1);
6487 +
6488 +               this = toi_kzalloc(2, sizeof(struct hibernate_extent),
6489 +                               TOI_ATOMIC_GFP);
6490 +               if (!this) {
6491 +                       printk(KERN_INFO "Failed to allocate a new extent.\n");
6492 +                       free_bdev_info(chain);
6493 +                       return -ENOMEM;
6494 +               }
6495 +               this->next = NULL;
6496 +               /* Get the next page */
6497 +               ret = toiActiveAllocator->rw_header_chunk_noreadahead(READ,
6498 +                               NULL, (char *) this, 2 * sizeof(this->start));
6499 +               if (ret) {
6500 +                       printk(KERN_INFO "Failed to read an extent.\n");
6501 +                       toi_kfree(2, this, sizeof(struct hibernate_extent));
6502 +                       free_bdev_info(chain);
6503 +                       return 1;
6504 +               }
6505 +
6506 +               if (last)
6507 +                       last->next = this;
6508 +               else {
6509 +                       char b1[32], b2[32], b3[32];
6510 +                       /*
6511 +                        * Open the bdev
6512 +                        */
6513 +                       toi_message(TOI_IO, TOI_VERBOSE, 0,
6514 +                               "Chain dev_t is %s. Resume dev t is %s. Header"
6515 +                               " bdev_t is %s.\n",
6516 +                               format_dev_t(b1, chain->dev_t),
6517 +                               format_dev_t(b2, resume_dev_t),
6518 +                               format_dev_t(b3, toi_sig_data->header_dev_t));
6519 +
6520 +                       if (chain->dev_t == resume_dev_t)
6521 +                               chain->bdev = resume_block_device;
6522 +                       else if (chain->dev_t == toi_sig_data->header_dev_t)
6523 +                               chain->bdev = header_block_device;
6524 +                       else {
6525 +                               chain->bdev = toi_open_bdev(chain->uuid,
6526 +                                               chain->dev_t, 1);
6527 +                               if (IS_ERR(chain->bdev)) {
6528 +                                       free_bdev_info(chain);
6529 +                                       return -ENODEV;
6530 +                               }
6531 +                       }
6532 +
6533 +                       toi_message(TOI_IO, TOI_VERBOSE, 0, "Chain bmap shift "
6534 +                                       "is %d and blocks per page is %d.",
6535 +                                       chain->bmap_shift,
6536 +                                       chain->blocks_per_page);
6537 +
6538 +                       chain->blocks.first = this;
6539 +
6540 +                       /*
6541 +                        * Couldn't do this earlier, but can't do
6542 +                        * goto_start now - we may have already used blocks
6543 +                        * in the first chain.
6544 +                        */
6545 +                       chain->blocks.current_extent = this;
6546 +                       chain->blocks.current_offset = this->start;
6547 +
6548 +                       /*
6549 +                        * Can't wait until we've read the whole chain
6550 +                        * before we insert it in the list. We might need
6551 +                        * this chain to read the next page in the header
6552 +                        */
6553 +                       toi_insert_chain_in_prio_list(chain);
6554 +               }
6555 +
6556 +               /*
6557 +                * We have to wait until 2 extents are loaded before setting up
6558 +                * properly because if the first extent has only one page, we
6559 +                * will need to put the position on the second extent. Sounds
6560 +                * obvious, but it wasn't!
6561 +                */
6562 +               (*num_loaded)++;
6563 +               if ((*num_loaded) == 2)
6564 +                       set_up_start_position();
6565 +               last = this;
6566 +       }
6567 +
6568 +       /*
6569 +        * Shouldn't get empty chains, but it's not impossible. Link them in so
6570 +        * they get freed properly later.
6571 +        */
6572 +       if (!chain->blocks.num_extents)
6573 +               toi_insert_chain_in_prio_list(chain);
6574 +
6575 +       if (!chain->blocks.current_extent) {
6576 +               chain->blocks.current_extent = chain->blocks.first;
6577 +               if (chain->blocks.current_extent)
6578 +                       chain->blocks.current_offset =
6579 +                               chain->blocks.current_extent->start;
6580 +       }
6581 +       return 0;
6582 +}
6583 +
6584 +int toi_load_extent_chains(void)
6585 +{
6586 +       int result;
6587 +       int to_load;
6588 +       int i;
6589 +       int extents_loaded = 0;
6590 +
6591 +       result = toiActiveAllocator->rw_header_chunk_noreadahead(READ, NULL,
6592 +                       (char *) &to_load,
6593 +                       sizeof(int));
6594 +       if (result)
6595 +               return result;
6596 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "%d chains to read.", to_load);
6597 +
6598 +       for (i = 0; i < to_load; i++) {
6599 +               toi_message(TOI_IO, TOI_VERBOSE, 0, " >> Loading chain %d/%d.",
6600 +                               i, to_load);
6601 +               result = toi_load_extent_chain(i, &extents_loaded);
6602 +               if (result)
6603 +                       return result;
6604 +       }
6605 +
6606 +       /* If we never got to a second extent, we still need to do this. */
6607 +       if (extents_loaded == 1)
6608 +               set_up_start_position();
6609 +
6610 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "Save chain numbers.");
6611 +       result = toiActiveAllocator->rw_header_chunk_noreadahead(READ,
6612 +                       &toi_blockwriter_ops,
6613 +                       (char *) &toi_writer_posn.saved_chain_number[0],
6614 +                       4 * sizeof(int));
6615 +
6616 +       return result;
6617 +}
6618 +
6619 +static int toi_end_of_stream(int writing, int section_barrier)
6620 +{
6621 +       struct toi_bdev_info *cur_chain = toi_writer_posn.current_chain;
6622 +       int compare_to = next_section[current_stream];
6623 +       struct toi_bdev_info *compare_chain =
6624 +               toi_writer_posn.saved_chain_ptr[compare_to];
6625 +       int compare_offset = compare_chain ?
6626 +               compare_chain->saved_state[compare_to].offset : 0;
6627 +
6628 +       if (!section_barrier)
6629 +               return 0;
6630 +
6631 +       if (!cur_chain)
6632 +               return 1;
6633 +
6634 +       if (cur_chain == compare_chain &&
6635 +           cur_chain->blocks.current_offset == compare_offset) {
6636 +               if (writing) {
6637 +                       if (!current_stream) {
6638 +                               debug_broken_header();
6639 +                               return 1;
6640 +                       }
6641 +               } else {
6642 +                       more_readahead = 0;
6643 +                       toi_message(TOI_IO, TOI_VERBOSE, 0,
6644 +                                       "Reached the end of stream %d "
6645 +                                       "(not an error).", current_stream);
6646 +                       return 1;
6647 +               }
6648 +       }
6649 +
6650 +       return 0;
6651 +}
6652 +
6653 +/**
6654 + * go_next_page - skip blocks to the start of the next page
6655 + * @writing: Whether we're reading or writing the image.
6656 + *
6657 + * Go forward one page.
6658 + **/
6659 +int go_next_page(int writing, int section_barrier)
6660 +{
6661 +       struct toi_bdev_info *cur_chain = toi_writer_posn.current_chain;
6662 +       int max = cur_chain ? cur_chain->blocks_per_page : 1;
6663 +
6664 +       /* Nope. Go foward a page - or maybe two. Don't stripe the header,
6665 +        * so that bad fragmentation doesn't put the extent data containing
6666 +        * the location of the second page out of the first header page.
6667 +        */
6668 +       if (toi_extent_state_next(max, current_stream)) {
6669 +               /* Don't complain if readahead falls off the end */
6670 +               if (writing && section_barrier) {
6671 +                       toi_message(TOI_IO, TOI_VERBOSE, 0, "Extent state eof. "
6672 +                               "Expected compression ratio too optimistic?");
6673 +                       if (test_action_state(TOI_LOGALL))
6674 +                               dump_block_chains();
6675 +               }
6676 +               toi_message(TOI_IO, TOI_VERBOSE, 0, "Ran out of extents to "
6677 +                               "read/write. (Not necessarily a fatal error.");
6678 +               return -ENOSPC;
6679 +       }
6680 +
6681 +       return 0;
6682 +}
6683 +
6684 +int devices_of_same_priority(struct toi_bdev_info *this)
6685 +{
6686 +       struct toi_bdev_info *check = prio_chain_head;
6687 +       int i = 0;
6688 +
6689 +       while (check) {
6690 +               if (check->prio == this->prio)
6691 +                       i++;
6692 +               check = check->next;
6693 +       }
6694 +
6695 +       return i;
6696 +}
6697 +
6698 +/**
6699 + * toi_bio_rw_page - do i/o on the next disk page in the image
6700 + * @writing: Whether reading or writing.
6701 + * @page: Page to do i/o on.
6702 + * @is_readahead: Whether we're doing readahead
6703 + * @free_group: The group used in allocating the page
6704 + *
6705 + * Submit a page for reading or writing, possibly readahead.
6706 + * Pass the group used in allocating the page as well, as it should
6707 + * be freed on completion of the bio if we're writing the page.
6708 + **/
6709 +int toi_bio_rw_page(int writing, struct page *page,
6710 +               int is_readahead, int free_group)
6711 +{
6712 +       int result = toi_end_of_stream(writing, 1);
6713 +       struct toi_bdev_info *dev_info = toi_writer_posn.current_chain;
6714 +
6715 +       if (result) {
6716 +               toi_message(TOI_IO, TOI_VERBOSE, 0, "Seeking to read/write "
6717 +                               "another page when stream has ended.");
6718 +               return -ENOSPC;
6719 +       }
6720 +
6721 +       toi_message(TOI_IO, TOI_VERBOSE, 0,
6722 +                       "%sing device %lx, sector %d << %d.",
6723 +                       writing ? "Writ" : "Read",
6724 +                       dev_info->bdev, dev_info->blocks.current_offset,
6725 +                       dev_info->bmap_shift);
6726 +
6727 +       result = toi_do_io(writing, dev_info->bdev,
6728 +               dev_info->blocks.current_offset << dev_info->bmap_shift,
6729 +               page, is_readahead, 0, free_group);
6730 +
6731 +       /* Ignore the result here - will check end of stream if come in again */
6732 +       go_next_page(writing, 1);
6733 +
6734 +       if (result)
6735 +               printk(KERN_ERR "toi_do_io returned %d.\n", result);
6736 +       return result;
6737 +}
6738 +
6739 +dev_t get_header_dev_t(void)
6740 +{
6741 +       return prio_chain_head->dev_t;
6742 +}
6743 +
6744 +struct block_device *get_header_bdev(void)
6745 +{
6746 +       return prio_chain_head->bdev;
6747 +}
6748 +
6749 +unsigned long get_headerblock(void)
6750 +{
6751 +       return prio_chain_head->blocks.first->start <<
6752 +               prio_chain_head->bmap_shift;
6753 +}
6754 +
6755 +int get_main_pool_phys_params(void)
6756 +{
6757 +       struct toi_bdev_info *this = prio_chain_head;
6758 +       int result;
6759 +
6760 +       while (this) {
6761 +               result = this->allocator->bio_allocator_ops->bmap(this);
6762 +               if (result)
6763 +                       return result;
6764 +               this = this->next;
6765 +       }
6766 +
6767 +       return 0;
6768 +}
6769 +
6770 +static int apply_header_reservation(void)
6771 +{
6772 +       int i;
6773 +
6774 +       if (!header_pages_reserved) {
6775 +               toi_message(TOI_IO, TOI_VERBOSE, 0,
6776 +                               "No header pages reserved at the moment.");
6777 +               return 0;
6778 +       }
6779 +
6780 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "Applying header reservation.");
6781 +
6782 +       /* Apply header space reservation */
6783 +       toi_extent_state_goto_start();
6784 +
6785 +       for (i = 0; i < header_pages_reserved; i++)
6786 +               if (go_next_page(1, 0))
6787 +                       return -ENOSPC;
6788 +
6789 +       /* The end of header pages will be the start of pageset 2 */
6790 +       toi_extent_state_save(2);
6791 +
6792 +       toi_message(TOI_IO, TOI_VERBOSE, 0,
6793 +                       "Finished applying header reservation.");
6794 +       return 0;
6795 +}
6796 +
6797 +static int toi_bio_register_storage(void)
6798 +{
6799 +       int result = 0;
6800 +       struct toi_module_ops *this_module;
6801 +
6802 +       list_for_each_entry(this_module, &toi_modules, module_list) {
6803 +               if (!this_module->enabled ||
6804 +                   this_module->type != BIO_ALLOCATOR_MODULE)
6805 +                       continue;
6806 +               toi_message(TOI_IO, TOI_VERBOSE, 0,
6807 +                               "Registering storage from %s.",
6808 +                               this_module->name);
6809 +               result = this_module->bio_allocator_ops->register_storage();
6810 +               if (result)
6811 +                       break;
6812 +       }
6813 +
6814 +       return result;
6815 +}
6816 +
6817 +int toi_bio_allocate_storage(unsigned long request)
6818 +{
6819 +       struct toi_bdev_info *chain = prio_chain_head;
6820 +       unsigned long to_get = request;
6821 +       unsigned long extra_pages, needed;
6822 +       int no_free = 0;
6823 +
6824 +       if (!chain) {
6825 +               int result = toi_bio_register_storage();
6826 +               toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_bio_allocate_storage: "
6827 +                       "Registering storage.");
6828 +               if (result)
6829 +                       return 0;
6830 +               chain = prio_chain_head;
6831 +               if (!chain) {
6832 +                       printk("TuxOnIce: No storage was registered.\n");
6833 +                       return 0;
6834 +               }
6835 +       }
6836 +
6837 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_bio_allocate_storage: "
6838 +                       "Request is %lu pages.", request);
6839 +       extra_pages = DIV_ROUND_UP(request * (sizeof(unsigned long)
6840 +                              + sizeof(int)), PAGE_SIZE);
6841 +       needed = request + extra_pages + header_pages_reserved;
6842 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "Adding %lu extra pages and %lu "
6843 +                       "for header => %lu.",
6844 +                       extra_pages, header_pages_reserved, needed);
6845 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "Already allocated %lu pages.",
6846 +                       raw_pages_allocd);
6847 +
6848 +       to_get = needed > raw_pages_allocd ? needed - raw_pages_allocd : 0;
6849 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "Need to get %lu pages.", to_get);
6850 +
6851 +       if (!to_get)
6852 +               return apply_header_reservation();
6853 +
6854 +       while (to_get && chain) {
6855 +               int num_group = devices_of_same_priority(chain);
6856 +               int divisor = num_group - no_free;
6857 +               int i;
6858 +               unsigned long portion = DIV_ROUND_UP(to_get, divisor);
6859 +               unsigned long got = 0;
6860 +               unsigned long got_this_round = 0;
6861 +               struct toi_bdev_info *top = chain;
6862 +
6863 +               toi_message(TOI_IO, TOI_VERBOSE, 0,
6864 +                               " Start of loop. To get is %lu. Divisor is %d.",
6865 +                               to_get, divisor);
6866 +               no_free = 0;
6867 +
6868 +               /*
6869 +                * We're aiming to spread the allocated storage as evenly
6870 +                * as possible, but we also want to get all the storage we
6871 +                * can off this priority.
6872 +                */
6873 +               for (i = 0; i < num_group; i++) {
6874 +                       struct toi_bio_allocator_ops *ops =
6875 +                               chain->allocator->bio_allocator_ops;
6876 +                       toi_message(TOI_IO, TOI_VERBOSE, 0,
6877 +                                       " Asking for %lu pages from chain %p.",
6878 +                                       portion, chain);
6879 +                       got = ops->allocate_storage(chain, portion);
6880 +                       toi_message(TOI_IO, TOI_VERBOSE, 0,
6881 +                                       " Got %lu pages from allocator %p.",
6882 +                                       got, chain);
6883 +                       if (!got)
6884 +                               no_free++;
6885 +                       got_this_round += got;
6886 +                       chain = chain->next;
6887 +               }
6888 +               toi_message(TOI_IO, TOI_VERBOSE, 0, " Loop finished. Got a "
6889 +                               "total of %lu pages from %d allocators.",
6890 +                               got_this_round, divisor - no_free);
6891 +
6892 +               raw_pages_allocd += got_this_round;
6893 +               to_get = needed > raw_pages_allocd ? needed - raw_pages_allocd :
6894 +                       0;
6895 +
6896 +               /*
6897 +                * If we got anything from chains of this priority and we
6898 +                * still have storage to allocate, go over this priority
6899 +                * again.
6900 +                */
6901 +               if (got_this_round && to_get)
6902 +                       chain = top;
6903 +               else
6904 +                       no_free = 0;
6905 +       }
6906 +
6907 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "Finished allocating. Calling "
6908 +                       "get_main_pool_phys_params");
6909 +       /* Now let swap allocator bmap the pages */
6910 +       get_main_pool_phys_params();
6911 +
6912 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "Done. Reserving header.");
6913 +       return apply_header_reservation();
6914 +}
6915 +
6916 +void toi_bio_chains_post_atomic(struct toi_boot_kernel_data *bkd)
6917 +{
6918 +       int i = 0;
6919 +       struct toi_bdev_info *cur_chain = prio_chain_head;
6920 +
6921 +       while (cur_chain) {
6922 +               cur_chain->pages_used = bkd->pages_used[i];
6923 +               cur_chain = cur_chain->next;
6924 +               i++;
6925 +       }
6926 +}
6927 +
6928 +int toi_bio_chains_debug_info(char *buffer, int size)
6929 +{
6930 +       /* Show what we actually used */
6931 +       struct toi_bdev_info *cur_chain = prio_chain_head;
6932 +       int len = 0;
6933 +
6934 +       while (cur_chain) {
6935 +               len += scnprintf(buffer + len, size - len, "  Used %lu pages "
6936 +                               "from %s.\n", cur_chain->pages_used,
6937 +                               cur_chain->name);
6938 +               cur_chain = cur_chain->next;
6939 +       }
6940 +
6941 +       return len;
6942 +}
6943 diff --git a/kernel/power/tuxonice_bio_core.c b/kernel/power/tuxonice_bio_core.c
6944 new file mode 100644
6945 index 0000000..414d249
6946 --- /dev/null
6947 +++ b/kernel/power/tuxonice_bio_core.c
6948 @@ -0,0 +1,1822 @@
6949 +/*
6950 + * kernel/power/tuxonice_bio.c
6951 + *
6952 + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net)
6953 + *
6954 + * Distributed under GPLv2.
6955 + *
6956 + * This file contains block io functions for TuxOnIce. These are
6957 + * used by the swapwriter and it is planned that they will also
6958 + * be used by the NFSwriter.
6959 + *
6960 + */
6961 +
6962 +#include <linux/blkdev.h>
6963 +#include <linux/syscalls.h>
6964 +#include <linux/suspend.h>
6965 +#include <linux/ctype.h>
6966 +#include <linux/fs_uuid.h>
6967 +#include <scsi/scsi_scan.h>
6968 +
6969 +#include "tuxonice.h"
6970 +#include "tuxonice_sysfs.h"
6971 +#include "tuxonice_modules.h"
6972 +#include "tuxonice_prepare_image.h"
6973 +#include "tuxonice_bio.h"
6974 +#include "tuxonice_ui.h"
6975 +#include "tuxonice_alloc.h"
6976 +#include "tuxonice_io.h"
6977 +#include "tuxonice_builtin.h"
6978 +#include "tuxonice_bio_internal.h"
6979 +
6980 +#define MEMORY_ONLY 1
6981 +#define THROTTLE_WAIT 2
6982 +
6983 +/* #define MEASURE_MUTEX_CONTENTION */
6984 +#ifndef MEASURE_MUTEX_CONTENTION
6985 +#define my_mutex_lock(index, the_lock) mutex_lock(the_lock)
6986 +#define my_mutex_unlock(index, the_lock) mutex_unlock(the_lock)
6987 +#else
6988 +unsigned long mutex_times[2][2][NR_CPUS];
6989 +#define my_mutex_lock(index, the_lock) do { \
6990 +       int have_mutex; \
6991 +       have_mutex = mutex_trylock(the_lock); \
6992 +       if (!have_mutex) { \
6993 +               mutex_lock(the_lock); \
6994 +               mutex_times[index][0][smp_processor_id()]++; \
6995 +       } else { \
6996 +               mutex_times[index][1][smp_processor_id()]++; \
6997 +       }
6998 +
6999 +#define my_mutex_unlock(index, the_lock) \
7000 +       mutex_unlock(the_lock); \
7001 +} while (0)
7002 +#endif
7003 +
7004 +static int page_idx, reset_idx;
7005 +
7006 +static int target_outstanding_io = 1024;
7007 +static int max_outstanding_writes, max_outstanding_reads;
7008 +
7009 +static struct page *bio_queue_head, *bio_queue_tail;
7010 +static atomic_t toi_bio_queue_size;
7011 +static DEFINE_SPINLOCK(bio_queue_lock);
7012 +
7013 +static int free_mem_throttle, throughput_throttle;
7014 +int more_readahead = 1;
7015 +static struct page *readahead_list_head, *readahead_list_tail;
7016 +
7017 +static struct page *waiting_on;
7018 +
7019 +static atomic_t toi_io_in_progress, toi_io_done;
7020 +static DECLARE_WAIT_QUEUE_HEAD(num_in_progress_wait);
7021 +
7022 +int current_stream;
7023 +/* Not static, so that the allocators can setup and complete
7024 + * writing the header */
7025 +char *toi_writer_buffer;
7026 +int toi_writer_buffer_posn;
7027 +
7028 +static DEFINE_MUTEX(toi_bio_mutex);
7029 +static DEFINE_MUTEX(toi_bio_readahead_mutex);
7030 +
7031 +static struct task_struct *toi_queue_flusher;
7032 +static int toi_bio_queue_flush_pages(int dedicated_thread);
7033 +
7034 +struct toi_module_ops toi_blockwriter_ops;
7035 +
7036 +#define TOTAL_OUTSTANDING_IO (atomic_read(&toi_io_in_progress) + \
7037 +              atomic_read(&toi_bio_queue_size))
7038 +
7039 +unsigned long raw_pages_allocd, header_pages_reserved;
7040 +
7041 +/**
7042 + * set_free_mem_throttle - set the point where we pause to avoid oom.
7043 + *
7044 + * Initially, this value is zero, but when we first fail to allocate memory,
7045 + * we set it (plus a buffer) and thereafter throttle i/o once that limit is
7046 + * reached.
7047 + **/
7048 +static void set_free_mem_throttle(void)
7049 +{
7050 +       int new_throttle = nr_unallocated_buffer_pages() + 256;
7051 +
7052 +       if (new_throttle > free_mem_throttle)
7053 +               free_mem_throttle = new_throttle;
7054 +}
7055 +
7056 +#define NUM_REASONS 7
7057 +static atomic_t reasons[NUM_REASONS];
7058 +static char *reason_name[NUM_REASONS] = {
7059 +       "readahead not ready",
7060 +       "bio allocation",
7061 +       "synchronous I/O",
7062 +       "toi_bio_get_new_page",
7063 +       "memory low",
7064 +       "readahead buffer allocation",
7065 +       "throughput_throttle",
7066 +};
7067 +
7068 +/* User Specified Parameters. */
7069 +unsigned long resume_firstblock;
7070 +dev_t resume_dev_t;
7071 +struct block_device *resume_block_device;
7072 +static atomic_t resume_bdev_open_count;
7073 +
7074 +struct block_device *header_block_device;
7075 +
7076 +/**
7077 + * toi_open_bdev: Open a bdev at resume time.
7078 + *
7079 + * index: The swap index. May be MAX_SWAPFILES for the resume_dev_t
7080 + * (the user can have resume= pointing at a swap partition/file that isn't
7081 + * swapon'd when they hibernate. MAX_SWAPFILES+1 for the first page of the
7082 + * header. It will be from a swap partition that was enabled when we hibernated,
7083 + * but we don't know it's real index until we read that first page.
7084 + * dev_t: The device major/minor.
7085 + * display_errs: Whether to try to do this quietly.
7086 + *
7087 + * We stored a dev_t in the image header. Open the matching device without
7088 + * requiring /dev/<whatever> in most cases and record the details needed
7089 + * to close it later and avoid duplicating work.
7090 + */
7091 +struct block_device *toi_open_bdev(char *uuid, dev_t default_device,
7092 +               int display_errs)
7093 +{
7094 +       struct block_device *bdev;
7095 +       dev_t device = default_device;
7096 +       char buf[32];
7097 +
7098 +       if (uuid) {
7099 +               struct fs_info seek;
7100 +               strncpy((char *) &seek.uuid, uuid, 16);
7101 +               seek.dev_t = 0;
7102 +               seek.last_mount_size = 0;
7103 +               device = blk_lookup_fs_info(&seek);
7104 +               if (!device) {
7105 +                       device = default_device;
7106 +                       printk(KERN_DEBUG "Unable to resolve uuid. Falling back"
7107 +                                       " to dev_t.\n");
7108 +               } else
7109 +                       printk(KERN_DEBUG "Resolved uuid to device %s.\n",
7110 +                                       format_dev_t(buf, device));
7111 +       }
7112 +
7113 +       if (!device) {
7114 +               printk(KERN_ERR "TuxOnIce attempting to open a "
7115 +                               "blank dev_t!\n");
7116 +               dump_stack();
7117 +               return NULL;
7118 +       }
7119 +       bdev = toi_open_by_devnum(device);
7120 +
7121 +       if (IS_ERR(bdev) || !bdev) {
7122 +               if (display_errs)
7123 +                       toi_early_boot_message(1, TOI_CONTINUE_REQ,
7124 +                               "Failed to get access to block device "
7125 +                               "\"%x\" (error %d).\n Maybe you need "
7126 +                               "to run mknod and/or lvmsetup in an "
7127 +                               "initrd/ramfs?", device, bdev);
7128 +               return ERR_PTR(-EINVAL);
7129 +       }
7130 +       toi_message(TOI_IO, TOI_VERBOSE, 0,
7131 +                       "TuxOnIce got bdev %p for dev_t %x.",
7132 +                       bdev, device);
7133 +
7134 +       return bdev;
7135 +}
7136 +
7137 +static void toi_bio_reserve_header_space(unsigned long request)
7138 +{
7139 +       header_pages_reserved = request;
7140 +}
7141 +
7142 +/**
7143 + * do_bio_wait - wait for some TuxOnIce I/O to complete
7144 + * @reason: The array index of the reason we're waiting.
7145 + *
7146 + * Wait for a particular page of I/O if we're after a particular page.
7147 + * If we're not after a particular page, wait instead for all in flight
7148 + * I/O to be completed or for us to have enough free memory to be able
7149 + * to submit more I/O.
7150 + *
7151 + * If we wait, we also update our statistics regarding why we waited.
7152 + **/
7153 +static void do_bio_wait(int reason)
7154 +{
7155 +       struct page *was_waiting_on = waiting_on;
7156 +
7157 +       /* On SMP, waiting_on can be reset, so we make a copy */
7158 +       if (was_waiting_on) {
7159 +               wait_on_page_locked(was_waiting_on);
7160 +               atomic_inc(&reasons[reason]);
7161 +       } else {
7162 +               atomic_inc(&reasons[reason]);
7163 +
7164 +               wait_event(num_in_progress_wait,
7165 +                       !atomic_read(&toi_io_in_progress) ||
7166 +                       nr_unallocated_buffer_pages() > free_mem_throttle);
7167 +       }
7168 +}
7169 +
7170 +/**
7171 + * throttle_if_needed - wait for I/O completion if throttle points are reached
7172 + * @flags: What to check and how to act.
7173 + *
7174 + * Check whether we need to wait for some I/O to complete. We always check
7175 + * whether we have enough memory available, but may also (depending upon
7176 + * @reason) check if the throughput throttle limit has been reached.
7177 + **/
7178 +static int throttle_if_needed(int flags)
7179 +{
7180 +       int free_pages = nr_unallocated_buffer_pages();
7181 +
7182 +       /* Getting low on memory and I/O is in progress? */
7183 +       while (unlikely(free_pages < free_mem_throttle) &&
7184 +                       atomic_read(&toi_io_in_progress) &&
7185 +                       !test_result_state(TOI_ABORTED)) {
7186 +               if (!(flags & THROTTLE_WAIT))
7187 +                       return -ENOMEM;
7188 +               do_bio_wait(4);
7189 +               free_pages = nr_unallocated_buffer_pages();
7190 +       }
7191 +
7192 +       while (!(flags & MEMORY_ONLY) && throughput_throttle &&
7193 +               TOTAL_OUTSTANDING_IO >= throughput_throttle &&
7194 +               !test_result_state(TOI_ABORTED)) {
7195 +               int result = toi_bio_queue_flush_pages(0);
7196 +               if (result)
7197 +                       return result;
7198 +               atomic_inc(&reasons[6]);
7199 +               wait_event(num_in_progress_wait,
7200 +                       !atomic_read(&toi_io_in_progress) ||
7201 +                       TOTAL_OUTSTANDING_IO < throughput_throttle);
7202 +       }
7203 +
7204 +       return 0;
7205 +}
7206 +
7207 +/**
7208 + * update_throughput_throttle - update the raw throughput throttle
7209 + * @jif_index: The number of times this function has been called.
7210 + *
7211 + * This function is called four times per second by the core, and used to limit
7212 + * the amount of I/O we submit at once, spreading out our waiting through the
7213 + * whole job and letting userui get an opportunity to do its work.
7214 + *
7215 + * We don't start limiting I/O until 1/4s has gone so that we get a
7216 + * decent sample for our initial limit, and keep updating it because
7217 + * throughput may vary (on rotating media, eg) with our block number.
7218 + *
7219 + * We throttle to 1/10s worth of I/O.
7220 + **/
7221 +static void update_throughput_throttle(int jif_index)
7222 +{
7223 +       int done = atomic_read(&toi_io_done);
7224 +       throughput_throttle = done * 2 / 5 / jif_index;
7225 +}
7226 +
7227 +/**
7228 + * toi_finish_all_io - wait for all outstanding i/o to complete
7229 + *
7230 + * Flush any queued but unsubmitted I/O and wait for it all to complete.
7231 + **/
7232 +static int toi_finish_all_io(void)
7233 +{
7234 +       int result = toi_bio_queue_flush_pages(0);
7235 +       wait_event(num_in_progress_wait, !TOTAL_OUTSTANDING_IO);
7236 +       return result;
7237 +}
7238 +
7239 +/**
7240 + * toi_end_bio - bio completion function.
7241 + * @bio: bio that has completed.
7242 + * @err: Error value. Yes, like end_swap_bio_read, we ignore it.
7243 + *
7244 + * Function called by the block driver from interrupt context when I/O is
7245 + * completed. If we were writing the page, we want to free it and will have
7246 + * set bio->bi_private to the parameter we should use in telling the page
7247 + * allocation accounting code what the page was allocated for. If we're
7248 + * reading the page, it will be in the singly linked list made from
7249 + * page->private pointers.
7250 + **/
7251 +static void toi_end_bio(struct bio *bio, int err)
7252 +{
7253 +       struct page *page = bio->bi_io_vec[0].bv_page;
7254 +
7255 +       BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags));
7256 +
7257 +       unlock_page(page);
7258 +       bio_put(bio);
7259 +
7260 +       if (waiting_on == page)
7261 +               waiting_on = NULL;
7262 +
7263 +       put_page(page);
7264 +
7265 +       if (bio->bi_private)
7266 +               toi__free_page((int) ((unsigned long) bio->bi_private) , page);
7267 +
7268 +       bio_put(bio);
7269 +
7270 +       atomic_dec(&toi_io_in_progress);
7271 +       atomic_inc(&toi_io_done);
7272 +
7273 +       wake_up(&num_in_progress_wait);
7274 +}
7275 +
7276 +/**
7277 + * submit - submit BIO request
7278 + * @writing: READ or WRITE.
7279 + * @dev: The block device we're using.
7280 + * @first_block: The first sector we're using.
7281 + * @page: The page being used for I/O.
7282 + * @free_group: If writing, the group that was used in allocating the page
7283 + *     and which will be used in freeing the page from the completion
7284 + *     routine.
7285 + *
7286 + * Based on Patrick Mochell's pmdisk code from long ago: "Straight from the
7287 + * textbook - allocate and initialize the bio. If we're writing, make sure
7288 + * the page is marked as dirty. Then submit it and carry on."
7289 + *
7290 + * If we're just testing the speed of our own code, we fake having done all
7291 + * the hard work and all toi_end_bio immediately.
7292 + **/
7293 +static int submit(int writing, struct block_device *dev, sector_t first_block,
7294 +               struct page *page, int free_group)
7295 +{
7296 +       struct bio *bio = NULL;
7297 +       int cur_outstanding_io, result;
7298 +
7299 +       /*
7300 +        * Shouldn't throttle if reading - can deadlock in the single
7301 +        * threaded case as pages are only freed when we use the
7302 +        * readahead.
7303 +        */
7304 +       if (writing) {
7305 +               result = throttle_if_needed(MEMORY_ONLY | THROTTLE_WAIT);
7306 +               if (result)
7307 +                       return result;
7308 +       }
7309 +
7310 +       while (!bio) {
7311 +               bio = bio_alloc(TOI_ATOMIC_GFP, 1);
7312 +               if (!bio) {
7313 +                       set_free_mem_throttle();
7314 +                       do_bio_wait(1);
7315 +               }
7316 +       }
7317 +
7318 +       bio->bi_bdev = dev;
7319 +       bio->bi_sector = first_block;
7320 +       bio->bi_private = (void *) ((unsigned long) free_group);
7321 +       bio->bi_end_io = toi_end_bio;
7322 +
7323 +       if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
7324 +               printk(KERN_DEBUG "ERROR: adding page to bio at %lld\n",
7325 +                               (unsigned long long) first_block);
7326 +               bio_put(bio);
7327 +               return -EFAULT;
7328 +       }
7329 +
7330 +       bio_get(bio);
7331 +
7332 +       cur_outstanding_io = atomic_add_return(1, &toi_io_in_progress);
7333 +       if (writing) {
7334 +               if (cur_outstanding_io > max_outstanding_writes)
7335 +                       max_outstanding_writes = cur_outstanding_io;
7336 +       } else {
7337 +               if (cur_outstanding_io > max_outstanding_reads)
7338 +                       max_outstanding_reads = cur_outstanding_io;
7339 +       }
7340 +
7341 +
7342 +       if (unlikely(test_action_state(TOI_TEST_BIO))) {
7343 +               /* Fake having done the hard work */
7344 +               set_bit(BIO_UPTODATE, &bio->bi_flags);
7345 +               toi_end_bio(bio, 0);
7346 +       } else
7347 +               submit_bio(writing | (1 << BIO_RW_SYNCIO) |
7348 +                               (1 << BIO_RW_TUXONICE) |
7349 +                               (1 << BIO_RW_UNPLUG), bio);
7350 +
7351 +       return 0;
7352 +}
7353 +
7354 +/**
7355 + * toi_do_io: Prepare to do some i/o on a page and submit or batch it.
7356 + *
7357 + * @writing: Whether reading or writing.
7358 + * @bdev: The block device which we're using.
7359 + * @block0: The first sector we're reading or writing.
7360 + * @page: The page on which I/O is being done.
7361 + * @readahead_index: If doing readahead, the index (reset this flag when done).
7362 + * @syncio: Whether the i/o is being done synchronously.
7363 + *
7364 + * Prepare and start a read or write operation.
7365 + *
7366 + * Note that we always work with our own page. If writing, we might be given a
7367 + * compression buffer that will immediately be used to start compressing the
7368 + * next page. For reading, we do readahead and therefore don't know the final
7369 + * address where the data needs to go.
7370 + **/
7371 +int toi_do_io(int writing, struct block_device *bdev, long block0,
7372 +       struct page *page, int is_readahead, int syncio, int free_group)
7373 +{
7374 +       page->private = 0;
7375 +
7376 +       /* Do here so we don't race against toi_bio_get_next_page_read */
7377 +       lock_page(page);
7378 +
7379 +       if (is_readahead) {
7380 +               if (readahead_list_head)
7381 +                       readahead_list_tail->private = (unsigned long) page;
7382 +               else
7383 +                       readahead_list_head = page;
7384 +
7385 +               readahead_list_tail = page;
7386 +       }
7387 +
7388 +       /* Done before submitting to avoid races. */
7389 +       if (syncio)
7390 +               waiting_on = page;
7391 +
7392 +       /* Submit the page */
7393 +       get_page(page);
7394 +
7395 +       if (submit(writing, bdev, block0, page, free_group))
7396 +               return -EFAULT;
7397 +
7398 +       if (syncio)
7399 +               do_bio_wait(2);
7400 +
7401 +       return 0;
7402 +}
7403 +
7404 +/**
7405 + * toi_bdev_page_io - simpler interface to do directly i/o on a single page
7406 + * @writing: Whether reading or writing.
7407 + * @bdev: Block device on which we're operating.
7408 + * @pos: Sector at which page to read or write starts.
7409 + * @page: Page to be read/written.
7410 + *
7411 + * A simple interface to submit a page of I/O and wait for its completion.
7412 + * The caller must free the page used.
7413 + **/
7414 +static int toi_bdev_page_io(int writing, struct block_device *bdev,
7415 +               long pos, struct page *page)
7416 +{
7417 +       return toi_do_io(writing, bdev, pos, page, 0, 1, 0);
7418 +}
7419 +
7420 +/**
7421 + * toi_bio_memory_needed - report the amount of memory needed for block i/o
7422 + *
7423 + * We want to have at least enough memory so as to have target_outstanding_io
7424 + * or more transactions on the fly at once. If we can do more, fine.
7425 + **/
7426 +static int toi_bio_memory_needed(void)
7427 +{
7428 +       return target_outstanding_io * (PAGE_SIZE + sizeof(struct request) +
7429 +                               sizeof(struct bio));
7430 +}
7431 +
7432 +/**
7433 + * toi_bio_print_debug_stats - put out debugging info in the buffer provided
7434 + * @buffer: A buffer of size @size into which text should be placed.
7435 + * @size: The size of @buffer.
7436 + *
7437 + * Fill a buffer with debugging info. This is used for both our debug_info sysfs
7438 + * entry and for recording the same info in dmesg.
7439 + **/
7440 +static int toi_bio_print_debug_stats(char *buffer, int size)
7441 +{
7442 +       int len = 0;
7443 +
7444 +       if (toiActiveAllocator != &toi_blockwriter_ops) {
7445 +               len = scnprintf(buffer, size,
7446 +                               "- Block I/O inactive.\n");
7447 +               return len;
7448 +       }
7449 +
7450 +       len = scnprintf(buffer, size, "- Block I/O active.\n");
7451 +
7452 +       len += toi_bio_chains_debug_info(buffer + len, size - len);
7453 +
7454 +       len += scnprintf(buffer + len, size - len,
7455 +                       "- Max outstanding reads %d. Max writes %d.\n",
7456 +                       max_outstanding_reads, max_outstanding_writes);
7457 +
7458 +       len += scnprintf(buffer + len, size - len,
7459 +               "  Memory_needed: %d x (%lu + %u + %u) = %d bytes.\n",
7460 +               target_outstanding_io,
7461 +               PAGE_SIZE, (unsigned int) sizeof(struct request),
7462 +               (unsigned int) sizeof(struct bio), toi_bio_memory_needed());
7463 +
7464 +#ifdef MEASURE_MUTEX_CONTENTION
7465 +       {
7466 +       int i;
7467 +
7468 +       len += scnprintf(buffer + len, size - len,
7469 +               "  Mutex contention while reading:\n  Contended      Free\n");
7470 +
7471 +       for_each_online_cpu(i)
7472 +               len += scnprintf(buffer + len, size - len,
7473 +               "  %9lu %9lu\n",
7474 +               mutex_times[0][0][i], mutex_times[0][1][i]);
7475 +
7476 +       len += scnprintf(buffer + len, size - len,
7477 +               "  Mutex contention while writing:\n  Contended      Free\n");
7478 +
7479 +       for_each_online_cpu(i)
7480 +               len += scnprintf(buffer + len, size - len,
7481 +               "  %9lu %9lu\n",
7482 +               mutex_times[1][0][i], mutex_times[1][1][i]);
7483 +
7484 +       }
7485 +#endif
7486 +
7487 +       return len + scnprintf(buffer + len, size - len,
7488 +               "  Free mem throttle point reached %d.\n", free_mem_throttle);
7489 +}
7490 +
7491 +static int total_header_bytes;
7492 +static int unowned;
7493 +
7494 +void debug_broken_header(void)
7495 +{
7496 +       printk(KERN_DEBUG "Image header too big for size allocated!\n");
7497 +       print_toi_header_storage_for_modules();
7498 +       printk(KERN_DEBUG "Page flags : %d.\n", toi_pageflags_space_needed());
7499 +       printk(KERN_DEBUG "toi_header : %zu.\n", sizeof(struct toi_header));
7500 +       printk(KERN_DEBUG "Total unowned : %d.\n", unowned);
7501 +       printk(KERN_DEBUG "Total used : %d (%ld pages).\n", total_header_bytes,
7502 +                       DIV_ROUND_UP(total_header_bytes, PAGE_SIZE));
7503 +       printk(KERN_DEBUG "Space needed now : %ld.\n",
7504 +                       get_header_storage_needed());
7505 +       dump_block_chains();
7506 +       abort_hibernate(TOI_HEADER_TOO_BIG, "Header reservation too small.");
7507 +}
7508 +
7509 +/**
7510 + * toi_rw_init - prepare to read or write a stream in the image
7511 + * @writing: Whether reading or writing.
7512 + * @stream number: Section of the image being processed.
7513 + *
7514 + * Prepare to read or write a section ('stream') in the image.
7515 + **/
7516 +static int toi_rw_init(int writing, int stream_number)
7517 +{
7518 +       if (stream_number)
7519 +               toi_extent_state_restore(stream_number);
7520 +       else
7521 +               toi_extent_state_goto_start();
7522 +
7523 +       if (writing) {
7524 +               reset_idx = 0;
7525 +               if (!current_stream)
7526 +                       page_idx = 0;
7527 +       } else {
7528 +               reset_idx = 1;
7529 +       }
7530 +
7531 +       atomic_set(&toi_io_done, 0);
7532 +       if (!toi_writer_buffer)
7533 +               toi_writer_buffer = (char *) toi_get_zeroed_page(11,
7534 +                               TOI_ATOMIC_GFP);
7535 +       toi_writer_buffer_posn = writing ? 0 : PAGE_SIZE;
7536 +
7537 +       current_stream = stream_number;
7538 +
7539 +       more_readahead = 1;
7540 +
7541 +       return toi_writer_buffer ? 0 : -ENOMEM;
7542 +}
7543 +
7544 +/**
7545 + * toi_bio_queue_write - queue a page for writing
7546 + * @full_buffer: Pointer to a page to be queued
7547 + *
7548 + * Add a page to the queue to be submitted. If we're the queue flusher,
7549 + * we'll do this once we've dropped toi_bio_mutex, so other threads can
7550 + * continue to submit I/O while we're on the slow path doing the actual
7551 + * submission.
7552 + **/
7553 +static void toi_bio_queue_write(char **full_buffer)
7554 +{
7555 +       struct page *page = virt_to_page(*full_buffer);
7556 +       unsigned long flags;
7557 +
7558 +       *full_buffer = NULL;
7559 +       page->private = 0;
7560 +
7561 +       spin_lock_irqsave(&bio_queue_lock, flags);
7562 +       if (!bio_queue_head)
7563 +               bio_queue_head = page;
7564 +       else
7565 +               bio_queue_tail->private = (unsigned long) page;
7566 +
7567 +       bio_queue_tail = page;
7568 +       atomic_inc(&toi_bio_queue_size);
7569 +
7570 +       spin_unlock_irqrestore(&bio_queue_lock, flags);
7571 +       wake_up(&toi_io_queue_flusher);
7572 +}
7573 +
7574 +/**
7575 + * toi_rw_cleanup - Cleanup after i/o.
7576 + * @writing: Whether we were reading or writing.
7577 + *
7578 + * Flush all I/O and clean everything up after reading or writing a
7579 + * section of the image.
7580 + **/
7581 +static int toi_rw_cleanup(int writing)
7582 +{
7583 +       int i, result = 0;
7584 +
7585 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_rw_cleanup.");
7586 +       if (writing) {
7587 +               if (toi_writer_buffer_posn && !test_result_state(TOI_ABORTED))
7588 +                       toi_bio_queue_write(&toi_writer_buffer);
7589 +
7590 +               while (bio_queue_head && !result)
7591 +                       result = toi_bio_queue_flush_pages(0);
7592 +
7593 +               if (result)
7594 +                       return result;
7595 +
7596 +               if (current_stream == 2)
7597 +                       toi_extent_state_save(1);
7598 +               else if (current_stream == 1)
7599 +                       toi_extent_state_save(3);
7600 +       }
7601 +
7602 +       result = toi_finish_all_io();
7603 +
7604 +       while (readahead_list_head) {
7605 +               void *next = (void *) readahead_list_head->private;
7606 +               toi__free_page(12, readahead_list_head);
7607 +               readahead_list_head = next;
7608 +       }
7609 +
7610 +       readahead_list_tail = NULL;
7611 +
7612 +       if (!current_stream)
7613 +               return result;
7614 +
7615 +       for (i = 0; i < NUM_REASONS; i++) {
7616 +               if (!atomic_read(&reasons[i]))
7617 +                       continue;
7618 +               printk(KERN_DEBUG "Waited for i/o due to %s %d times.\n",
7619 +                               reason_name[i], atomic_read(&reasons[i]));
7620 +               atomic_set(&reasons[i], 0);
7621 +       }
7622 +
7623 +       current_stream = 0;
7624 +       return result;
7625 +}
7626 +
7627 +/**
7628 + * toi_start_one_readahead - start one page of readahead
7629 + * @dedicated_thread: Is this a thread dedicated to doing readahead?
7630 + *
7631 + * Start one new page of readahead. If this is being called by a thread
7632 + * whose only just is to submit readahead, don't quit because we failed
7633 + * to allocate a page.
7634 + **/
7635 +static int toi_start_one_readahead(int dedicated_thread)
7636 +{
7637 +       char *buffer = NULL;
7638 +       int oom = 0, result;
7639 +
7640 +       result = throttle_if_needed(dedicated_thread ? THROTTLE_WAIT : 0);
7641 +       if (result)
7642 +               return result;
7643 +
7644 +       mutex_lock(&toi_bio_readahead_mutex);
7645 +
7646 +       while (!buffer) {
7647 +               buffer = (char *) toi_get_zeroed_page(12,
7648 +                               TOI_ATOMIC_GFP);
7649 +               if (!buffer) {
7650 +                       if (oom && !dedicated_thread) {
7651 +                               mutex_unlock(&toi_bio_readahead_mutex);
7652 +                               return -ENOMEM;
7653 +                       }
7654 +
7655 +                       oom = 1;
7656 +                       set_free_mem_throttle();
7657 +                       do_bio_wait(5);
7658 +               }
7659 +       }
7660 +
7661 +       result = toi_bio_rw_page(READ, virt_to_page(buffer), 1, 0);
7662 +       if (result == -ENOSPC)
7663 +               toi__free_page(12, virt_to_page(buffer));
7664 +       mutex_unlock(&toi_bio_readahead_mutex);
7665 +       if (result) {
7666 +               if (result == -ENOSPC)
7667 +                       toi_message(TOI_IO, TOI_VERBOSE, 0,
7668 +                                       "Last readahead page submitted.");
7669 +               else
7670 +                       printk(KERN_DEBUG "toi_bio_rw_page returned %d.\n",
7671 +                                       result);
7672 +       }
7673 +       return result;
7674 +}
7675 +
7676 +/**
7677 + * toi_start_new_readahead - start new readahead
7678 + * @dedicated_thread: Are we dedicated to this task?
7679 + *
7680 + * Start readahead of image pages.
7681 + *
7682 + * We can be called as a thread dedicated to this task (may be helpful on
7683 + * systems with lots of CPUs), in which case we don't exit until there's no
7684 + * more readahead.
7685 + *
7686 + * If this is not called by a dedicated thread, we top up our queue until
7687 + * there's no more readahead to submit, we've submitted the number given
7688 + * in target_outstanding_io or the number in progress exceeds the target
7689 + * outstanding I/O value.
7690 + *
7691 + * No mutex needed because this is only ever called by the first cpu.
7692 + **/
7693 +static int toi_start_new_readahead(int dedicated_thread)
7694 +{
7695 +       int last_result, num_submitted = 0;
7696 +
7697 +       /* Start a new readahead? */
7698 +       if (!more_readahead)
7699 +               return 0;
7700 +
7701 +       do {
7702 +               last_result = toi_start_one_readahead(dedicated_thread);
7703 +
7704 +               if (last_result) {
7705 +                       if (last_result == -ENOMEM || last_result == -ENOSPC)
7706 +                               return 0;
7707 +
7708 +                       printk(KERN_DEBUG
7709 +                               "Begin read chunk returned %d.\n",
7710 +                               last_result);
7711 +               } else
7712 +                       num_submitted++;
7713 +
7714 +       } while (more_readahead && !last_result &&
7715 +                (dedicated_thread ||
7716 +                 (num_submitted < target_outstanding_io &&
7717 +                  atomic_read(&toi_io_in_progress) < target_outstanding_io)));
7718 +
7719 +       return last_result;
7720 +}
7721 +
7722 +/**
7723 + * bio_io_flusher - start the dedicated I/O flushing routine
7724 + * @writing: Whether we're writing the image.
7725 + **/
7726 +static int bio_io_flusher(int writing)
7727 +{
7728 +
7729 +       if (writing)
7730 +               return toi_bio_queue_flush_pages(1);
7731 +       else
7732 +               return toi_start_new_readahead(1);
7733 +}
7734 +
7735 +/**
7736 + * toi_bio_get_next_page_read - read a disk page, perhaps with readahead
7737 + * @no_readahead: Whether we can use readahead
7738 + *
7739 + * Read a page from disk, submitting readahead and cleaning up finished i/o
7740 + * while we wait for the page we're after.
7741 + **/
7742 +static int toi_bio_get_next_page_read(int no_readahead)
7743 +{
7744 +       unsigned long *virt;
7745 +       struct page *next;
7746 +
7747 +       /*
7748 +        * When reading the second page of the header, we have to
7749 +        * delay submitting the read until after we've gotten the
7750 +        * extents out of the first page.
7751 +        */
7752 +       if (unlikely(no_readahead && toi_start_one_readahead(0))) {
7753 +               printk(KERN_EMERG "No readahead and toi_start_one_readahead "
7754 +                               "returned non-zero.\n");
7755 +               return -EIO;
7756 +       }
7757 +
7758 +       if (unlikely(!readahead_list_head)) {
7759 +               /*
7760 +                * If the last page finishes exactly on the page
7761 +                * boundary, we will be called one extra time and
7762 +                * have no data to return. In this case, we should
7763 +                * not BUG(), like we used to!
7764 +                */
7765 +               if (!more_readahead) {
7766 +                       printk(KERN_EMERG "No more readahead.\n");
7767 +                       return -ENOSPC;
7768 +               }
7769 +               if (unlikely(toi_start_one_readahead(0))) {
7770 +                       printk(KERN_EMERG "No readahead and "
7771 +                        "toi_start_one_readahead returned non-zero.\n");
7772 +                       return -EIO;
7773 +               }
7774 +       }
7775 +
7776 +       if (PageLocked(readahead_list_head)) {
7777 +               waiting_on = readahead_list_head;
7778 +               do_bio_wait(0);
7779 +       }
7780 +
7781 +       virt = page_address(readahead_list_head);
7782 +       memcpy(toi_writer_buffer, virt, PAGE_SIZE);
7783 +
7784 +       next = (struct page *) readahead_list_head->private;
7785 +       toi__free_page(12, readahead_list_head);
7786 +       readahead_list_head = next;
7787 +       return 0;
7788 +}
7789 +
7790 +/**
7791 + * toi_bio_queue_flush_pages - flush the queue of pages queued for writing
7792 + * @dedicated_thread: Whether we're a dedicated thread
7793 + *
7794 + * Flush the queue of pages ready to be written to disk.
7795 + *
7796 + * If we're a dedicated thread, stay in here until told to leave,
7797 + * sleeping in wait_event.
7798 + *
7799 + * The first thread is normally the only one to come in here. Another
7800 + * thread can enter this routine too, though, via throttle_if_needed.
7801 + * Since that's the case, we must be careful to only have one thread
7802 + * doing this work at a time. Otherwise we have a race and could save
7803 + * pages out of order.
7804 + *
7805 + * If an error occurs, free all remaining pages without submitting them
7806 + * for I/O.
7807 + **/
7808 +
7809 +int toi_bio_queue_flush_pages(int dedicated_thread)
7810 +{
7811 +       unsigned long flags;
7812 +       int result = 0;
7813 +       static DEFINE_MUTEX(busy);
7814 +
7815 +       if (!mutex_trylock(&busy))
7816 +               return 0;
7817 +
7818 +top:
7819 +       spin_lock_irqsave(&bio_queue_lock, flags);
7820 +       while (bio_queue_head) {
7821 +               struct page *page = bio_queue_head;
7822 +               bio_queue_head = (struct page *) page->private;
7823 +               if (bio_queue_tail == page)
7824 +                       bio_queue_tail = NULL;
7825 +               atomic_dec(&toi_bio_queue_size);
7826 +               spin_unlock_irqrestore(&bio_queue_lock, flags);
7827 +
7828 +               /* Don't generate more error messages if already had one */
7829 +               if (!result)
7830 +                       result = toi_bio_rw_page(WRITE, page, 0, 11);
7831 +               /*
7832 +                * If writing the page failed, don't drop out.
7833 +                * Flush the rest of the queue too.
7834 +                */
7835 +               if (result)
7836 +                       toi__free_page(11 , page);
7837 +               spin_lock_irqsave(&bio_queue_lock, flags);
7838 +       }
7839 +       spin_unlock_irqrestore(&bio_queue_lock, flags);
7840 +
7841 +       if (dedicated_thread) {
7842 +               wait_event(toi_io_queue_flusher, bio_queue_head ||
7843 +                               toi_bio_queue_flusher_should_finish);
7844 +               if (likely(!toi_bio_queue_flusher_should_finish))
7845 +                       goto top;
7846 +               toi_bio_queue_flusher_should_finish = 0;
7847 +       }
7848 +
7849 +       mutex_unlock(&busy);
7850 +       return result;
7851 +}
7852 +
7853 +/**
7854 + * toi_bio_get_new_page - get a new page for I/O
7855 + * @full_buffer: Pointer to a page to allocate.
7856 + **/
7857 +static int toi_bio_get_new_page(char **full_buffer)
7858 +{
7859 +       int result = throttle_if_needed(THROTTLE_WAIT);
7860 +       if (result)
7861 +               return result;
7862 +
7863 +       while (!*full_buffer) {
7864 +               *full_buffer = (char *) toi_get_zeroed_page(11, TOI_ATOMIC_GFP);
7865 +               if (!*full_buffer) {
7866 +                       set_free_mem_throttle();
7867 +                       do_bio_wait(3);
7868 +               }
7869 +       }
7870 +
7871 +       return 0;
7872 +}
7873 +
7874 +/**
7875 + * toi_rw_buffer - combine smaller buffers into PAGE_SIZE I/O
7876 + * @writing:           Bool - whether writing (or reading).
7877 + * @buffer:            The start of the buffer to write or fill.
7878 + * @buffer_size:       The size of the buffer to write or fill.
7879 + * @no_readahead:      Don't try to start readhead (when getting extents).
7880 + **/
7881 +static int toi_rw_buffer(int writing, char *buffer, int buffer_size,
7882 +               int no_readahead)
7883 +{
7884 +       int bytes_left = buffer_size, result = 0;
7885 +
7886 +       while (bytes_left) {
7887 +               char *source_start = buffer + buffer_size - bytes_left;
7888 +               char *dest_start = toi_writer_buffer + toi_writer_buffer_posn;
7889 +               int capacity = PAGE_SIZE - toi_writer_buffer_posn;
7890 +               char *to = writing ? dest_start : source_start;
7891 +               char *from = writing ? source_start : dest_start;
7892 +
7893 +               if (bytes_left <= capacity) {
7894 +                       memcpy(to, from, bytes_left);
7895 +                       toi_writer_buffer_posn += bytes_left;
7896 +                       return 0;
7897 +               }
7898 +
7899 +               /* Complete this page and start a new one */
7900 +               memcpy(to, from, capacity);
7901 +               bytes_left -= capacity;
7902 +
7903 +               if (!writing) {
7904 +                       /*
7905 +                        * Perform actual I/O:
7906 +                        * read readahead_list_head into toi_writer_buffer
7907 +                        */
7908 +                       int result = toi_bio_get_next_page_read(no_readahead);
7909 +                       if (result) {
7910 +                               printk("toi_bio_get_next_page_read "
7911 +                                               "returned %d.\n", result);
7912 +                               return result;
7913 +                       }
7914 +               } else {
7915 +                       toi_bio_queue_write(&toi_writer_buffer);
7916 +                       result = toi_bio_get_new_page(&toi_writer_buffer);
7917 +                       if (result) {
7918 +                               printk(KERN_ERR "toi_bio_get_new_page returned "
7919 +                                               "%d.\n", result);
7920 +                               return result;
7921 +                       }
7922 +               }
7923 +
7924 +               toi_writer_buffer_posn = 0;
7925 +               toi_cond_pause(0, NULL);
7926 +       }
7927 +
7928 +       return 0;
7929 +}
7930 +
7931 +/**
7932 + * toi_bio_read_page - read a page of the image
7933 + * @pfn:               The pfn where the data belongs.
7934 + * @buffer_page:       The page containing the (possibly compressed) data.
7935 + * @buf_size:          The number of bytes on @buffer_page used (PAGE_SIZE).
7936 + *
7937 + * Read a (possibly compressed) page from the image, into buffer_page,
7938 + * returning its pfn and the buffer size.
7939 + **/
7940 +static int toi_bio_read_page(unsigned long *pfn, struct page *buffer_page,
7941 +               unsigned int *buf_size)
7942 +{
7943 +       int result = 0;
7944 +       int this_idx;
7945 +       char *buffer_virt = kmap(buffer_page);
7946 +
7947 +       /*
7948 +        * Only call start_new_readahead if we don't have a dedicated thread
7949 +        * and we're the queue flusher.
7950 +        */
7951 +       if (current == toi_queue_flusher && more_readahead) {
7952 +               int result2 = toi_start_new_readahead(0);
7953 +               if (result2) {
7954 +                       printk(KERN_DEBUG "Queue flusher and "
7955 +                        "toi_start_one_readahead returned non-zero.\n");
7956 +                       result = -EIO;
7957 +                       goto out;
7958 +               }
7959 +       }
7960 +
7961 +       my_mutex_lock(0, &toi_bio_mutex);
7962 +
7963 +       /*
7964 +        * Structure in the image:
7965 +        *      [destination pfn|page size|page data]
7966 +        * buf_size is PAGE_SIZE
7967 +        */
7968 +       if (toi_rw_buffer(READ, (char *) &this_idx, sizeof(int), 0) ||
7969 +           toi_rw_buffer(READ, (char *) pfn, sizeof(unsigned long), 0) ||
7970 +           toi_rw_buffer(READ, (char *) buf_size, sizeof(int), 0) ||
7971 +           toi_rw_buffer(READ, buffer_virt, *buf_size, 0)) {
7972 +               abort_hibernate(TOI_FAILED_IO, "Read of data failed.");
7973 +               result = 1;
7974 +       }
7975 +
7976 +       if (reset_idx) {
7977 +               page_idx = this_idx;
7978 +               reset_idx = 0;
7979 +       } else {
7980 +               page_idx++;
7981 +               if (page_idx != this_idx)
7982 +                       printk(KERN_ERR "Got page index %d, expected %d.\n",
7983 +                                       this_idx, page_idx);
7984 +       }
7985 +
7986 +       my_mutex_unlock(0, &toi_bio_mutex);
7987 +out:
7988 +       kunmap(buffer_page);
7989 +       return result;
7990 +}
7991 +
7992 +/**
7993 + * toi_bio_write_page - write a page of the image
7994 + * @pfn:               The pfn where the data belongs.
7995 + * @buffer_page:       The page containing the (possibly compressed) data.
7996 + * @buf_size:  The number of bytes on @buffer_page used.
7997 + *
7998 + * Write a (possibly compressed) page to the image from the buffer, together
7999 + * with it's index and buffer size.
8000 + **/
8001 +static int toi_bio_write_page(unsigned long pfn, struct page *buffer_page,
8002 +               unsigned int buf_size)
8003 +{
8004 +       char *buffer_virt;
8005 +       int result = 0, result2 = 0;
8006 +
8007 +       if (unlikely(test_action_state(TOI_TEST_FILTER_SPEED)))
8008 +               return 0;
8009 +
8010 +       my_mutex_lock(1, &toi_bio_mutex);
8011 +
8012 +       if (test_result_state(TOI_ABORTED)) {
8013 +               my_mutex_unlock(1, &toi_bio_mutex);
8014 +               return -EIO;
8015 +       }
8016 +
8017 +       buffer_virt = kmap(buffer_page);
8018 +       page_idx++;
8019 +
8020 +       /*
8021 +        * Structure in the image:
8022 +        *      [destination pfn|page size|page data]
8023 +        * buf_size is PAGE_SIZE
8024 +        */
8025 +       if (toi_rw_buffer(WRITE, (char *) &page_idx, sizeof(int), 0) ||
8026 +           toi_rw_buffer(WRITE, (char *) &pfn, sizeof(unsigned long), 0) ||
8027 +           toi_rw_buffer(WRITE, (char *) &buf_size, sizeof(int), 0) ||
8028 +           toi_rw_buffer(WRITE, buffer_virt, buf_size, 0)) {
8029 +               printk(KERN_DEBUG "toi_rw_buffer returned non-zero to "
8030 +                               "toi_bio_write_page.\n");
8031 +               result = -EIO;
8032 +       }
8033 +
8034 +       kunmap(buffer_page);
8035 +       my_mutex_unlock(1, &toi_bio_mutex);
8036 +
8037 +       if (current == toi_queue_flusher)
8038 +               result2 = toi_bio_queue_flush_pages(0);
8039 +
8040 +       return result ? result : result2;
8041 +}
8042 +
8043 +/**
8044 + * _toi_rw_header_chunk - read or write a portion of the image header
8045 + * @writing:           Whether reading or writing.
8046 + * @owner:             The module for which we're writing.
8047 + *                     Used for confirming that modules
8048 + *                     don't use more header space than they asked for.
8049 + * @buffer:            Address of the data to write.
8050 + * @buffer_size:       Size of the data buffer.
8051 + * @no_readahead:      Don't try to start readhead (when getting extents).
8052 + *
8053 + * Perform PAGE_SIZE I/O. Start readahead if needed.
8054 + **/
8055 +static int _toi_rw_header_chunk(int writing, struct toi_module_ops *owner,
8056 +               char *buffer, int buffer_size, int no_readahead)
8057 +{
8058 +       int result = 0;
8059 +
8060 +       if (owner) {
8061 +               owner->header_used += buffer_size;
8062 +               toi_message(TOI_HEADER, TOI_LOW, 1,
8063 +                       "Header: %s : %d bytes (%d/%d) from offset %d.",
8064 +                       owner->name,
8065 +                       buffer_size, owner->header_used,
8066 +                       owner->header_requested,
8067 +                       toi_writer_buffer_posn);
8068 +               if (owner->header_used > owner->header_requested && writing) {
8069 +                       printk(KERN_EMERG "TuxOnIce module %s is using more "
8070 +                               "header space (%u) than it requested (%u).\n",
8071 +                               owner->name,
8072 +                               owner->header_used,
8073 +                               owner->header_requested);
8074 +                       return buffer_size;
8075 +               }
8076 +       } else {
8077 +               unowned += buffer_size;
8078 +               toi_message(TOI_HEADER, TOI_LOW, 1,
8079 +                       "Header: (No owner): %d bytes (%d total so far) from "
8080 +                       "offset %d.", buffer_size, unowned,
8081 +                       toi_writer_buffer_posn);
8082 +       }
8083 +
8084 +       if (!writing && !no_readahead && more_readahead) {
8085 +               result = toi_start_new_readahead(0);
8086 +               toi_message(TOI_IO, TOI_VERBOSE, 0, "Start new readahead "
8087 +                               "returned %d.", result);
8088 +       }
8089 +
8090 +       if (!result) {
8091 +               result = toi_rw_buffer(writing, buffer, buffer_size,
8092 +                               no_readahead);
8093 +               toi_message(TOI_IO, TOI_VERBOSE, 0, "rw_buffer returned "
8094 +                               "%d.", result);
8095 +       }
8096 +
8097 +       total_header_bytes += buffer_size;
8098 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "_toi_rw_header_chunk returning "
8099 +                       "%d.", result);
8100 +       return result;
8101 +}
8102 +
8103 +static int toi_rw_header_chunk(int writing, struct toi_module_ops *owner,
8104 +               char *buffer, int size)
8105 +{
8106 +       return _toi_rw_header_chunk(writing, owner, buffer, size, 1);
8107 +}
8108 +
8109 +static int toi_rw_header_chunk_noreadahead(int writing,
8110 +               struct toi_module_ops *owner, char *buffer, int size)
8111 +{
8112 +       return _toi_rw_header_chunk(writing, owner, buffer, size, 1);
8113 +}
8114 +
8115 +/**
8116 + * toi_bio_storage_needed - get the amount of storage needed for my fns
8117 + **/
8118 +static int toi_bio_storage_needed(void)
8119 +{
8120 +       return sizeof(int) + PAGE_SIZE + toi_bio_devinfo_storage_needed();
8121 +}
8122 +
8123 +/**
8124 + * toi_bio_save_config_info - save block I/O config to image header
8125 + * @buf:       PAGE_SIZE'd buffer into which data should be saved.
8126 + **/
8127 +static int toi_bio_save_config_info(char *buf)
8128 +{
8129 +       int *ints = (int *) buf;
8130 +       ints[0] = target_outstanding_io;
8131 +       return sizeof(int);
8132 +}
8133 +
8134 +/**
8135 + * toi_bio_load_config_info - restore block I/O config
8136 + * @buf:       Data to be reloaded.
8137 + * @size:      Size of the buffer saved.
8138 + **/
8139 +static void toi_bio_load_config_info(char *buf, int size)
8140 +{
8141 +       int *ints = (int *) buf;
8142 +       target_outstanding_io  = ints[0];
8143 +}
8144 +
8145 +void close_resume_dev_t(int force)
8146 +{
8147 +       if (!resume_block_device)
8148 +               return;
8149 +
8150 +       if (force)
8151 +               atomic_set(&resume_bdev_open_count, 0);
8152 +       else
8153 +               atomic_dec(&resume_bdev_open_count);
8154 +
8155 +       if (!atomic_read(&resume_bdev_open_count)) {
8156 +               toi_close_bdev(resume_block_device);
8157 +               resume_block_device = NULL;
8158 +       }
8159 +}
8160 +
8161 +int open_resume_dev_t(int force, int quiet)
8162 +{
8163 +       if (force) {
8164 +               close_resume_dev_t(1);
8165 +               atomic_set(&resume_bdev_open_count, 1);
8166 +       } else
8167 +               atomic_inc(&resume_bdev_open_count);
8168 +
8169 +       if (resume_block_device)
8170 +               return 0;
8171 +
8172 +       resume_block_device = toi_open_bdev(NULL, resume_dev_t, 0);
8173 +       if (IS_ERR(resume_block_device)) {
8174 +               if (!quiet)
8175 +                       toi_early_boot_message(1, TOI_CONTINUE_REQ,
8176 +                               "Failed to open device %x, where"
8177 +                               " the header should be found.",
8178 +                               resume_dev_t);
8179 +               resume_block_device = NULL;
8180 +               atomic_set(&resume_bdev_open_count, 0);
8181 +               return 1;
8182 +       }
8183 +
8184 +       return 0;
8185 +}
8186 +
8187 +/**
8188 + * toi_bio_initialise - initialise bio code at start of some action
8189 + * @starting_cycle:    Whether starting a hibernation cycle, or just reading or
8190 + *                     writing a sysfs value.
8191 + **/
8192 +static int toi_bio_initialise(int starting_cycle)
8193 +{
8194 +       int result;
8195 +
8196 +       if (!starting_cycle || !resume_dev_t)
8197 +               return 0;
8198 +
8199 +       max_outstanding_writes = 0;
8200 +       max_outstanding_reads = 0;
8201 +       current_stream = 0;
8202 +       toi_queue_flusher = current;
8203 +#ifdef MEASURE_MUTEX_CONTENTION
8204 +       {
8205 +               int i, j, k;
8206 +
8207 +               for (i = 0; i < 2; i++)
8208 +                       for (j = 0; j < 2; j++)
8209 +                               for_each_online_cpu(k)
8210 +                                       mutex_times[i][j][k] = 0;
8211 +       }
8212 +#endif
8213 +       result = open_resume_dev_t(0, 1);
8214 +
8215 +       if (result)
8216 +               return result;
8217 +
8218 +       return get_signature_page();
8219 +}
8220 +
8221 +static unsigned long raw_to_real(unsigned long raw)
8222 +{
8223 +       unsigned long result;
8224 +
8225 +       result = raw - (raw * (sizeof(unsigned long) + sizeof(int)) +
8226 +               (PAGE_SIZE + sizeof(unsigned long) + sizeof(int) + 1)) /
8227 +               (PAGE_SIZE + sizeof(unsigned long) + sizeof(int));
8228 +
8229 +       return result < 0 ? 0 : result;
8230 +}
8231 +
8232 +static unsigned long toi_bio_storage_available(void)
8233 +{
8234 +       unsigned long sum = 0;
8235 +       struct toi_module_ops *this_module;
8236 +
8237 +       list_for_each_entry(this_module, &toi_modules, module_list) {
8238 +               if (!this_module->enabled ||
8239 +                   this_module->type != BIO_ALLOCATOR_MODULE)
8240 +                       continue;
8241 +               toi_message(TOI_IO, TOI_VERBOSE, 0, "Seeking storage "
8242 +                               "available from %s.", this_module->name);
8243 +               sum += this_module->bio_allocator_ops->storage_available();
8244 +       }
8245 +
8246 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "Total storage available is %lu "
8247 +                       "pages.", sum);
8248 +       return raw_to_real(sum - header_pages_reserved);
8249 +
8250 +}
8251 +
8252 +static unsigned long toi_bio_storage_allocated(void)
8253 +{
8254 +       return raw_pages_allocd > header_pages_reserved ?
8255 +               raw_to_real(raw_pages_allocd - header_pages_reserved) : 0;
8256 +}
8257 +
8258 +/*
8259 + * If we have read part of the image, we might have filled  memory with
8260 + * data that should be zeroed out.
8261 + */
8262 +static void toi_bio_noresume_reset(void)
8263 +{
8264 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_bio_noresume_reset.");
8265 +       toi_rw_cleanup(READ);
8266 +       free_all_bdev_info();
8267 +}
8268 +
8269 +/**
8270 + * toi_bio_cleanup - cleanup after some action
8271 + * @finishing_cycle:   Whether completing a cycle.
8272 + **/
8273 +static void toi_bio_cleanup(int finishing_cycle)
8274 +{
8275 +       if (!finishing_cycle)
8276 +               return;
8277 +
8278 +       if (toi_writer_buffer) {
8279 +               toi_free_page(11, (unsigned long) toi_writer_buffer);
8280 +               toi_writer_buffer = NULL;
8281 +       }
8282 +
8283 +       forget_signature_page();
8284 +
8285 +       if (header_block_device && toi_sig_data &&
8286 +                       toi_sig_data->header_dev_t != resume_dev_t)
8287 +               toi_close_bdev(header_block_device);
8288 +
8289 +       header_block_device = NULL;
8290 +
8291 +       close_resume_dev_t(0);
8292 +}
8293 +
8294 +static int toi_bio_write_header_init(void)
8295 +{
8296 +       int result;
8297 +
8298 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_bio_write_header_init");
8299 +       toi_rw_init(WRITE, 0);
8300 +       toi_writer_buffer_posn = 0;
8301 +
8302 +       /* Info needed to bootstrap goes at the start of the header.
8303 +        * First we save the positions and devinfo, including the number
8304 +        * of header pages. Then we save the structs containing data needed
8305 +        * for reading the header pages back.
8306 +        * Note that even if header pages take more than one page, when we
8307 +        * read back the info, we will have restored the location of the
8308 +        * next header page by the time we go to use it.
8309 +        */
8310 +
8311 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "serialise extent chains.");
8312 +       result = toi_serialise_extent_chains();
8313 +
8314 +       if (result)
8315 +               return result;
8316 +
8317 +       /*
8318 +        * Signature page hasn't been modified at this point. Write it in
8319 +        * the header so we can restore it later.
8320 +        */
8321 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "serialise signature page.");
8322 +       return toi_rw_header_chunk_noreadahead(WRITE, &toi_blockwriter_ops,
8323 +                       (char *) toi_cur_sig_page,
8324 +                       PAGE_SIZE);
8325 +}
8326 +
8327 +static int toi_bio_write_header_cleanup(void)
8328 +{
8329 +       int result = 0;
8330 +
8331 +       if (toi_writer_buffer_posn)
8332 +               toi_bio_queue_write(&toi_writer_buffer);
8333 +
8334 +       result = toi_finish_all_io();
8335 +
8336 +       unowned = 0;
8337 +       total_header_bytes = 0;
8338 +
8339 +       /* Set signature to save we have an image */
8340 +       if (!result)
8341 +               result = toi_bio_mark_have_image();
8342 +
8343 +       return result;
8344 +}
8345 +
8346 +/*
8347 + * toi_bio_read_header_init()
8348 + *
8349 + * Description:
8350 + * 1. Attempt to read the device specified with resume=.
8351 + * 2. Check the contents of the swap header for our signature.
8352 + * 3. Warn, ignore, reset and/or continue as appropriate.
8353 + * 4. If continuing, read the toi_swap configuration section
8354 + *    of the header and set up block device info so we can read
8355 + *    the rest of the header & image.
8356 + *
8357 + * Returns:
8358 + * May not return if user choose to reboot at a warning.
8359 + * -EINVAL if cannot resume at this time. Booting should continue
8360 + * normally.
8361 + */
8362 +
8363 +static int toi_bio_read_header_init(void)
8364 +{
8365 +       int result = 0;
8366 +       char buf[32];
8367 +
8368 +       toi_writer_buffer_posn = 0;
8369 +
8370 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_bio_read_header_init");
8371 +
8372 +       if (!toi_sig_data) {
8373 +               printk(KERN_INFO "toi_bio_read_header_init called when we "
8374 +                               "haven't verified there is an image!\n");
8375 +               return -EINVAL;
8376 +       }
8377 +
8378 +       /*
8379 +        * If the header is not on the resume_swap_dev_t, get the resume device
8380 +        * first.
8381 +        */
8382 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "Header dev_t is %lx.",
8383 +                       toi_sig_data->header_dev_t);
8384 +       if (toi_sig_data->have_uuid) {
8385 +               struct fs_info seek;
8386 +               dev_t device;
8387 +
8388 +               strncpy((char *) seek.uuid, toi_sig_data->header_uuid, 16);
8389 +               seek.dev_t = toi_sig_data->header_dev_t;
8390 +               seek.last_mount_size = 0;
8391 +               device = blk_lookup_fs_info(&seek);
8392 +               if (device) {
8393 +                       printk("Using dev_t %s, returned by blk_lookup_fs_info.\n",
8394 +                                       format_dev_t(buf, device));
8395 +                       toi_sig_data->header_dev_t = device;
8396 +               }
8397 +       }
8398 +       if (toi_sig_data->header_dev_t != resume_dev_t) {
8399 +               header_block_device = toi_open_bdev(NULL,
8400 +                               toi_sig_data->header_dev_t, 1);
8401 +
8402 +               if (IS_ERR(header_block_device))
8403 +                       return PTR_ERR(header_block_device);
8404 +       } else
8405 +               header_block_device = resume_block_device;
8406 +
8407 +       if (!toi_writer_buffer)
8408 +               toi_writer_buffer = (char *) toi_get_zeroed_page(11,
8409 +                               TOI_ATOMIC_GFP);
8410 +       more_readahead = 1;
8411 +
8412 +       /*
8413 +        * Read toi_swap configuration.
8414 +        * Headerblock size taken into account already.
8415 +        */
8416 +       result = toi_bio_ops.bdev_page_io(READ, header_block_device,
8417 +                       toi_sig_data->first_header_block,
8418 +                       virt_to_page((unsigned long) toi_writer_buffer));
8419 +       if (result)
8420 +               return result;
8421 +
8422 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "load extent chains.");
8423 +       result = toi_load_extent_chains();
8424 +
8425 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "load original signature page.");
8426 +       toi_orig_sig_page = (char *) toi_get_zeroed_page(38, TOI_ATOMIC_GFP);
8427 +       if (!toi_orig_sig_page) {
8428 +               printk(KERN_ERR "Failed to allocate memory for the current"
8429 +                       " image signature.\n");
8430 +               return -ENOMEM;
8431 +       }
8432 +
8433 +       return toi_rw_header_chunk_noreadahead(READ, &toi_blockwriter_ops,
8434 +                       (char *) toi_orig_sig_page,
8435 +                       PAGE_SIZE);
8436 +}
8437 +
8438 +static int toi_bio_read_header_cleanup(void)
8439 +{
8440 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_bio_read_header_cleanup.");
8441 +       return toi_rw_cleanup(READ);
8442 +}
8443 +
8444 +/* Works only for digits and letters, but small and fast */
8445 +#define TOLOWER(x) ((x) | 0x20)
8446 +
8447 +/*
8448 + * UUID must be 32 chars long. It may have dashes, but nothing
8449 + * else.
8450 + */
8451 +char *uuid_from_commandline(char *commandline)
8452 +{
8453 +       int low = 0;
8454 +       char *result = NULL, *output, *ptr;
8455 +
8456 +       if (strncmp(commandline, "UUID=", 5))
8457 +               return NULL;
8458 +
8459 +       result = kzalloc(17, GFP_KERNEL);
8460 +       if (!result) {
8461 +               printk("Failed to kzalloc UUID text memory.\n");
8462 +               return NULL;
8463 +       }
8464 +
8465 +       ptr = commandline + 5;
8466 +       output = result;
8467 +
8468 +       while (*ptr && (output - result) < 16) {
8469 +               if (isxdigit(*ptr)) {
8470 +                       int value = isdigit(*ptr) ? *ptr - '0' :
8471 +                               TOLOWER(*ptr) - 'a' + 10;
8472 +                       if (low) {
8473 +                               *output += value;
8474 +                               output++;
8475 +                       } else {
8476 +                               *output = value << 4;
8477 +                       }
8478 +                       low = !low;
8479 +               } else if (*ptr != '-')
8480 +                       break;
8481 +               ptr++;
8482 +       }
8483 +
8484 +       if ((output - result) < 16 || *ptr) {
8485 +               printk(KERN_DEBUG "Found resume=UUID=, but the value looks "
8486 +                               "invalid.\n");
8487 +               kfree(result);
8488 +               result = NULL;
8489 +       }
8490 +
8491 +       return result;
8492 +}
8493 +
8494 +#define retry_if_fails(command) \
8495 +do { \
8496 +       command; \
8497 +       if (!resume_dev_t && !waited_for_device_probe) { \
8498 +               wait_for_device_probe(); \
8499 +               scsi_complete_async_scans(); \
8500 +               command; \
8501 +               waited_for_device_probe = 1; \
8502 +       } \
8503 +} while(0)
8504 +
8505 +/**
8506 + * try_to_open_resume_device: Try to parse and open resume=
8507 + *
8508 + * Any "swap:" has been stripped away and we just have the path to deal with.
8509 + * We attempt to do name_to_dev_t, open and stat the file. Having opened the
8510 + * file, get the struct block_device * to match.
8511 + */
8512 +static int try_to_open_resume_device(char *commandline, int quiet)
8513 +{
8514 +       struct kstat stat;
8515 +       int error = 0;
8516 +       char *uuid = uuid_from_commandline(commandline);
8517 +       int waited_for_device_probe = 0;
8518 +
8519 +       resume_dev_t = MKDEV(0, 0);
8520 +
8521 +       if (!strlen(commandline))
8522 +               retry_if_fails(toi_bio_scan_for_image(quiet));
8523 +
8524 +       if (uuid) {
8525 +               struct fs_info seek;
8526 +               strncpy((char *) &seek.uuid, uuid, 16);
8527 +               seek.dev_t = resume_dev_t;
8528 +               seek.last_mount_size = 0;
8529 +               retry_if_fails(resume_dev_t = blk_lookup_fs_info(&seek));
8530 +               kfree(uuid);
8531 +       }
8532 +
8533 +       if (!resume_dev_t)
8534 +               retry_if_fails(resume_dev_t = name_to_dev_t(commandline));
8535 +
8536 +       if (!resume_dev_t) {
8537 +               struct file *file = filp_open(commandline,
8538 +                               O_RDONLY|O_LARGEFILE, 0);
8539 +
8540 +               if (!IS_ERR(file) && file) {
8541 +                       vfs_getattr(file->f_vfsmnt, file->f_dentry, &stat);
8542 +                       filp_close(file, NULL);
8543 +               } else
8544 +                       error = vfs_stat(commandline, &stat);
8545 +               if (!error)
8546 +                       resume_dev_t = stat.rdev;
8547 +       }
8548 +
8549 +       if (!resume_dev_t) {
8550 +               if (quiet)
8551 +                       return 1;
8552 +
8553 +               if (test_toi_state(TOI_TRYING_TO_RESUME))
8554 +                       toi_early_boot_message(1, toi_translate_err_default,
8555 +                         "Failed to translate \"%s\" into a device id.\n",
8556 +                         commandline);
8557 +               else
8558 +                       printk("TuxOnIce: Can't translate \"%s\" into a device "
8559 +                                       "id yet.\n", commandline);
8560 +               return 1;
8561 +       }
8562 +
8563 +       return open_resume_dev_t(1, quiet);
8564 +}
8565 +
8566 +/*
8567 + * Parse Image Location
8568 + *
8569 + * Attempt to parse a resume= parameter.
8570 + * Swap Writer accepts:
8571 + * resume=[swap:|file:]DEVNAME[:FIRSTBLOCK][@BLOCKSIZE]
8572 + *
8573 + * Where:
8574 + * DEVNAME is convertable to a dev_t by name_to_dev_t
8575 + * FIRSTBLOCK is the location of the first block in the swap file
8576 + * (specifying for a swap partition is nonsensical but not prohibited).
8577 + * Data is validated by attempting to read a swap header from the
8578 + * location given. Failure will result in toi_swap refusing to
8579 + * save an image, and a reboot with correct parameters will be
8580 + * necessary.
8581 + */
8582 +static int toi_bio_parse_sig_location(char *commandline,
8583 +               int only_allocator, int quiet)
8584 +{
8585 +       char *thischar, *devstart, *colon = NULL;
8586 +       int signature_found, result = -EINVAL, temp_result = 0;
8587 +
8588 +       if (strncmp(commandline, "swap:", 5) &&
8589 +           strncmp(commandline, "file:", 5)) {
8590 +               /*
8591 +                * Failing swap:, we'll take a simple resume=/dev/hda2, or a
8592 +                * blank value (scan) but fall through to other allocators
8593 +                * if /dev/ or UUID= isn't matched.
8594 +                */
8595 +               if (strncmp(commandline, "/dev/", 5) &&
8596 +                   strncmp(commandline, "UUID=", 5) &&
8597 +                   strlen(commandline))
8598 +                       return 1;
8599 +       } else
8600 +               commandline += 5;
8601 +
8602 +       devstart = commandline;
8603 +       thischar = commandline;
8604 +       while ((*thischar != ':') && (*thischar != '@') &&
8605 +               ((thischar - commandline) < 250) && (*thischar))
8606 +               thischar++;
8607 +
8608 +       if (*thischar == ':') {
8609 +               colon = thischar;
8610 +               *colon = 0;
8611 +               thischar++;
8612 +       }
8613 +
8614 +       while ((thischar - commandline) < 250 && *thischar)
8615 +               thischar++;
8616 +
8617 +       if (colon) {
8618 +               unsigned long block;
8619 +               temp_result = strict_strtoul(colon + 1, 0, &block);
8620 +               if (!temp_result)
8621 +                       resume_firstblock = (int) block;
8622 +       } else
8623 +               resume_firstblock = 0;
8624 +
8625 +       clear_toi_state(TOI_CAN_HIBERNATE);
8626 +       clear_toi_state(TOI_CAN_RESUME);
8627 +
8628 +       if (!temp_result)
8629 +               temp_result = try_to_open_resume_device(devstart, quiet);
8630 +
8631 +       if (colon)
8632 +               *colon = ':';
8633 +
8634 +       /* No error if we only scanned */
8635 +       if (temp_result)
8636 +               return strlen(commandline) ? -EINVAL : 1;
8637 +
8638 +       signature_found = toi_bio_image_exists(quiet);
8639 +
8640 +       if (signature_found != -1) {
8641 +               result = 0;
8642 +               /*
8643 +                * TODO: If only file storage, CAN_HIBERNATE should only be
8644 +                * set if file allocator's target is valid.
8645 +                */
8646 +               set_toi_state(TOI_CAN_HIBERNATE);
8647 +               set_toi_state(TOI_CAN_RESUME);
8648 +       } else
8649 +               if (!quiet)
8650 +                       printk(KERN_ERR "TuxOnIce: Block I/O: No "
8651 +                               "signature found at %s.\n", devstart);
8652 +
8653 +       return result;
8654 +}
8655 +
8656 +static void toi_bio_release_storage(void)
8657 +{
8658 +       header_pages_reserved = 0;
8659 +       raw_pages_allocd = 0;
8660 +
8661 +       free_all_bdev_info();
8662 +}
8663 +
8664 +/* toi_swap_remove_image
8665 + *
8666 + */
8667 +static int toi_bio_remove_image(void)
8668 +{
8669 +       int result;
8670 +
8671 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_bio_remove_image.");
8672 +
8673 +       result = toi_bio_restore_original_signature();
8674 +
8675 +       /*
8676 +        * We don't do a sanity check here: we want to restore the swap
8677 +        * whatever version of kernel made the hibernate image.
8678 +        *
8679 +        * We need to write swap, but swap may not be enabled so
8680 +        * we write the device directly
8681 +        *
8682 +        * If we don't have an current_signature_page, we didn't
8683 +        * read an image header, so don't change anything.
8684 +        */
8685 +
8686 +       toi_bio_release_storage();
8687 +
8688 +       return result;
8689 +}
8690 +
8691 +struct toi_bio_ops toi_bio_ops = {
8692 +       .bdev_page_io = toi_bdev_page_io,
8693 +       .register_storage = toi_register_storage_chain,
8694 +       .free_storage = toi_bio_release_storage,
8695 +};
8696 +EXPORT_SYMBOL_GPL(toi_bio_ops);
8697 +
8698 +static struct toi_sysfs_data sysfs_params[] = {
8699 +       SYSFS_INT("target_outstanding_io", SYSFS_RW, &target_outstanding_io,
8700 +                       0, 16384, 0, NULL),
8701 +};
8702 +
8703 +struct toi_module_ops toi_blockwriter_ops = {
8704 +       .type                           = WRITER_MODULE,
8705 +       .name                           = "block i/o",
8706 +       .directory                      = "block_io",
8707 +       .module                         = THIS_MODULE,
8708 +       .memory_needed                  = toi_bio_memory_needed,
8709 +       .print_debug_info               = toi_bio_print_debug_stats,
8710 +       .storage_needed                 = toi_bio_storage_needed,
8711 +       .save_config_info               = toi_bio_save_config_info,
8712 +       .load_config_info               = toi_bio_load_config_info,
8713 +       .initialise                     = toi_bio_initialise,
8714 +       .cleanup                        = toi_bio_cleanup,
8715 +       .post_atomic_restore            = toi_bio_chains_post_atomic,
8716 +
8717 +       .rw_init                        = toi_rw_init,
8718 +       .rw_cleanup                     = toi_rw_cleanup,
8719 +       .read_page                      = toi_bio_read_page,
8720 +       .write_page                     = toi_bio_write_page,
8721 +       .rw_header_chunk                = toi_rw_header_chunk,
8722 +       .rw_header_chunk_noreadahead    = toi_rw_header_chunk_noreadahead,
8723 +       .io_flusher                     = bio_io_flusher,
8724 +       .update_throughput_throttle     = update_throughput_throttle,
8725 +       .finish_all_io                  = toi_finish_all_io,
8726 +
8727 +       .noresume_reset                 = toi_bio_noresume_reset,
8728 +       .storage_available              = toi_bio_storage_available,
8729 +       .storage_allocated              = toi_bio_storage_allocated,
8730 +       .reserve_header_space           = toi_bio_reserve_header_space,
8731 +       .allocate_storage               = toi_bio_allocate_storage,
8732 +       .image_exists                   = toi_bio_image_exists,
8733 +       .mark_resume_attempted          = toi_bio_mark_resume_attempted,
8734 +       .write_header_init              = toi_bio_write_header_init,
8735 +       .write_header_cleanup           = toi_bio_write_header_cleanup,
8736 +       .read_header_init               = toi_bio_read_header_init,
8737 +       .read_header_cleanup            = toi_bio_read_header_cleanup,
8738 +       .get_header_version             = toi_bio_get_header_version,
8739 +       .remove_image                   = toi_bio_remove_image,
8740 +       .parse_sig_location             = toi_bio_parse_sig_location,
8741 +
8742 +       .sysfs_data                     = sysfs_params,
8743 +       .num_sysfs_entries              = sizeof(sysfs_params) /
8744 +               sizeof(struct toi_sysfs_data),
8745 +};
8746 +
8747 +/**
8748 + * toi_block_io_load - load time routine for block I/O module
8749 + *
8750 + * Register block i/o ops and sysfs entries.
8751 + **/
8752 +static __init int toi_block_io_load(void)
8753 +{
8754 +       return toi_register_module(&toi_blockwriter_ops);
8755 +}
8756 +
8757 +#ifdef MODULE
8758 +static __exit void toi_block_io_unload(void)
8759 +{
8760 +       toi_unregister_module(&toi_blockwriter_ops);
8761 +}
8762 +
8763 +module_init(toi_block_io_load);
8764 +module_exit(toi_block_io_unload);
8765 +MODULE_LICENSE("GPL");
8766 +MODULE_AUTHOR("Nigel Cunningham");
8767 +MODULE_DESCRIPTION("TuxOnIce block io functions");
8768 +#else
8769 +late_initcall(toi_block_io_load);
8770 +#endif
8771 diff --git a/kernel/power/tuxonice_bio_internal.h b/kernel/power/tuxonice_bio_internal.h
8772 new file mode 100644
8773 index 0000000..58c2481
8774 --- /dev/null
8775 +++ b/kernel/power/tuxonice_bio_internal.h
8776 @@ -0,0 +1,86 @@
8777 +/*
8778 + * kernel/power/tuxonice_bio_internal.h
8779 + *
8780 + * Copyright (C) 2009-2010 Nigel Cunningham (nigel at tuxonice net)
8781 + *
8782 + * Distributed under GPLv2.
8783 + *
8784 + * This file contains declarations for functions exported from
8785 + * tuxonice_bio.c, which contains low level io functions.
8786 + */
8787 +
8788 +/* Extent chains */
8789 +void toi_extent_state_goto_start(void);
8790 +void toi_extent_state_save(int slot);
8791 +int go_next_page(int writing, int section_barrier);
8792 +void toi_extent_state_restore(int slot);
8793 +void free_all_bdev_info(void);
8794 +int devices_of_same_priority(struct toi_bdev_info *this);
8795 +int toi_register_storage_chain(struct toi_bdev_info *new);
8796 +int toi_serialise_extent_chains(void);
8797 +int toi_load_extent_chains(void);
8798 +int toi_bio_rw_page(int writing, struct page *page, int is_readahead,
8799 +               int free_group);
8800 +int toi_bio_restore_original_signature(void);
8801 +int toi_bio_devinfo_storage_needed(void);
8802 +unsigned long get_headerblock(void);
8803 +dev_t get_header_dev_t(void);
8804 +struct block_device *get_header_bdev(void);
8805 +int toi_bio_allocate_storage(unsigned long request);
8806 +
8807 +/* Signature functions */
8808 +#define HaveImage "HaveImage"
8809 +#define NoImage "TuxOnIce"
8810 +#define sig_size (sizeof(HaveImage))
8811 +
8812 +struct sig_data {
8813 +       char sig[sig_size];
8814 +       int have_image;
8815 +       int resumed_before;
8816 +
8817 +       char have_uuid;
8818 +       char header_uuid[17];
8819 +       dev_t header_dev_t;
8820 +       unsigned long first_header_block;
8821 +
8822 +       /* Repeat the signature to be sure we have a header version */
8823 +       char sig2[sig_size];
8824 +       int header_version;
8825 +};
8826 +
8827 +void forget_signature_page(void);
8828 +int toi_check_for_signature(void);
8829 +int toi_bio_image_exists(int quiet);
8830 +int get_signature_page(void);
8831 +int toi_bio_mark_resume_attempted(int);
8832 +extern char *toi_cur_sig_page;
8833 +extern char *toi_orig_sig_page;
8834 +int toi_bio_mark_have_image(void);
8835 +extern struct sig_data *toi_sig_data;
8836 +extern dev_t resume_dev_t;
8837 +extern struct block_device *resume_block_device;
8838 +extern struct block_device *header_block_device;
8839 +extern unsigned long resume_firstblock;
8840 +
8841 +struct block_device *open_bdev(dev_t device, int display_errs);
8842 +extern int current_stream;
8843 +extern int more_readahead;
8844 +int toi_do_io(int writing, struct block_device *bdev, long block0,
8845 +       struct page *page, int is_readahead, int syncio, int free_group);
8846 +int get_main_pool_phys_params(void);
8847 +
8848 +void toi_close_bdev(struct block_device *bdev);
8849 +struct block_device *toi_open_bdev(char *uuid, dev_t default_device,
8850 +               int display_errs);
8851 +
8852 +extern struct toi_module_ops toi_blockwriter_ops;
8853 +void dump_block_chains(void);
8854 +void debug_broken_header(void);
8855 +extern unsigned long raw_pages_allocd, header_pages_reserved;
8856 +int toi_bio_chains_debug_info(char *buffer, int size);
8857 +void toi_bio_chains_post_atomic(struct toi_boot_kernel_data *bkd);
8858 +int toi_bio_scan_for_image(int quiet);
8859 +int toi_bio_get_header_version(void);
8860 +
8861 +void close_resume_dev_t(int force);
8862 +int open_resume_dev_t(int force, int quiet);
8863 diff --git a/kernel/power/tuxonice_bio_signature.c b/kernel/power/tuxonice_bio_signature.c
8864 new file mode 100644
8865 index 0000000..2ebee7e
8866 --- /dev/null
8867 +++ b/kernel/power/tuxonice_bio_signature.c
8868 @@ -0,0 +1,404 @@
8869 +/*
8870 + * kernel/power/tuxonice_bio_signature.c
8871 + *
8872 + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net)
8873 + *
8874 + * Distributed under GPLv2.
8875 + *
8876 + */
8877 +
8878 +#include <linux/fs_uuid.h>
8879 +
8880 +#include "tuxonice.h"
8881 +#include "tuxonice_sysfs.h"
8882 +#include "tuxonice_modules.h"
8883 +#include "tuxonice_prepare_image.h"
8884 +#include "tuxonice_bio.h"
8885 +#include "tuxonice_ui.h"
8886 +#include "tuxonice_alloc.h"
8887 +#include "tuxonice_io.h"
8888 +#include "tuxonice_builtin.h"
8889 +#include "tuxonice_bio_internal.h"
8890 +
8891 +struct sig_data *toi_sig_data;
8892 +
8893 +/* Struct of swap header pages */
8894 +
8895 +struct old_sig_data {
8896 +       dev_t device;
8897 +       unsigned long sector;
8898 +       int resume_attempted;
8899 +       int orig_sig_type;
8900 +};
8901 +
8902 +union diskpage {
8903 +       union swap_header swh;  /* swh.magic is the only member used */
8904 +       struct sig_data sig_data;
8905 +       struct old_sig_data old_sig_data;
8906 +};
8907 +
8908 +union p_diskpage {
8909 +       union diskpage *pointer;
8910 +       char *ptr;
8911 +       unsigned long address;
8912 +};
8913 +
8914 +char *toi_cur_sig_page;
8915 +char *toi_orig_sig_page;
8916 +int have_image;
8917 +int have_old_image;
8918 +
8919 +int get_signature_page(void)
8920 +{
8921 +       if (!toi_cur_sig_page) {
8922 +               toi_message(TOI_IO, TOI_VERBOSE, 0,
8923 +                               "Allocating current signature page.");
8924 +               toi_cur_sig_page = (char *) toi_get_zeroed_page(38,
8925 +                       TOI_ATOMIC_GFP);
8926 +               if (!toi_cur_sig_page) {
8927 +                       printk(KERN_ERR "Failed to allocate memory for the "
8928 +                               "current image signature.\n");
8929 +                       return -ENOMEM;
8930 +               }
8931 +
8932 +               toi_sig_data = (struct sig_data *) toi_cur_sig_page;
8933 +       }
8934 +
8935 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "Reading signature from dev %lx,"
8936 +                       " sector %d.",
8937 +                       resume_block_device->bd_dev, resume_firstblock);
8938 +
8939 +       return toi_bio_ops.bdev_page_io(READ, resume_block_device,
8940 +               resume_firstblock, virt_to_page(toi_cur_sig_page));
8941 +}
8942 +
8943 +void forget_signature_page(void)
8944 +{
8945 +       if (toi_cur_sig_page) {
8946 +               toi_sig_data = NULL;
8947 +               toi_message(TOI_IO, TOI_VERBOSE, 0, "Freeing toi_cur_sig_page"
8948 +                               " (%p).", toi_cur_sig_page);
8949 +               toi_free_page(38, (unsigned long) toi_cur_sig_page);
8950 +               toi_cur_sig_page = NULL;
8951 +       }
8952 +
8953 +       if (toi_orig_sig_page) {
8954 +               toi_message(TOI_IO, TOI_VERBOSE, 0, "Freeing toi_orig_sig_page"
8955 +                               " (%p).", toi_orig_sig_page);
8956 +               toi_free_page(38, (unsigned long) toi_orig_sig_page);
8957 +               toi_orig_sig_page = NULL;
8958 +       }
8959 +}
8960 +
8961 +/*
8962 + * We need to ensure we use the signature page that's currently on disk,
8963 + * so as to not remove the image header. Post-atomic-restore, the orig sig
8964 + * page will be empty, so we can use that as our method of knowing that we
8965 + * need to load the on-disk signature and not use the non-image sig in
8966 + * memory. (We're going to powerdown after writing the change, so it's safe.
8967 + */
8968 +int toi_bio_mark_resume_attempted(int flag)
8969 +{
8970 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "Make resume attempted = %d.",
8971 +                       flag);
8972 +       if (!toi_orig_sig_page) {
8973 +               forget_signature_page();
8974 +               get_signature_page();
8975 +       }
8976 +       toi_sig_data->resumed_before = flag;
8977 +       return toi_bio_ops.bdev_page_io(WRITE, resume_block_device,
8978 +               resume_firstblock, virt_to_page(toi_cur_sig_page));
8979 +}
8980 +
8981 +int toi_bio_mark_have_image(void)
8982 +{
8983 +       int result = 0;
8984 +       char buf[32];
8985 +       struct fs_info *fs_info;
8986 +
8987 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "Recording that an image exists.");
8988 +       memcpy(toi_sig_data->sig, tuxonice_signature,
8989 +                       sizeof(tuxonice_signature));
8990 +       toi_sig_data->have_image = 1;
8991 +       toi_sig_data->resumed_before = 0;
8992 +       toi_sig_data->header_dev_t = get_header_dev_t();
8993 +       toi_sig_data->have_uuid = 0;
8994 +
8995 +       fs_info = fs_info_from_block_dev(get_header_bdev());
8996 +       if (fs_info && !IS_ERR(fs_info)) {
8997 +               memcpy(toi_sig_data->header_uuid, &fs_info->uuid, 16);
8998 +               free_fs_info(fs_info);
8999 +       } else
9000 +               result = (int) PTR_ERR(fs_info);
9001 +
9002 +       if (!result) {
9003 +               toi_message(TOI_IO, TOI_VERBOSE, 0, "Got uuid for dev_t %s.",
9004 +                               format_dev_t(buf, get_header_dev_t()));
9005 +               toi_sig_data->have_uuid = 1;
9006 +       } else
9007 +               toi_message(TOI_IO, TOI_VERBOSE, 0, "Could not get uuid for "
9008 +                               "dev_t %s.",
9009 +                               format_dev_t(buf, get_header_dev_t()));
9010 +
9011 +       toi_sig_data->first_header_block = get_headerblock();
9012 +       have_image = 1;
9013 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "header dev_t is %x. First block "
9014 +                       "is %d.", toi_sig_data->header_dev_t,
9015 +                       toi_sig_data->first_header_block);
9016 +
9017 +       memcpy(toi_sig_data->sig2, tuxonice_signature,
9018 +                       sizeof(tuxonice_signature));
9019 +       toi_sig_data->header_version = TOI_HEADER_VERSION;
9020 +
9021 +       return toi_bio_ops.bdev_page_io(WRITE, resume_block_device,
9022 +               resume_firstblock, virt_to_page(toi_cur_sig_page));
9023 +}
9024 +
9025 +int remove_old_signature(void)
9026 +{
9027 +       union p_diskpage swap_header_page = (union p_diskpage) toi_cur_sig_page;
9028 +       char *orig_sig, *no_image_signature_contents;
9029 +       char *header_start = (char *) toi_get_zeroed_page(38, TOI_ATOMIC_GFP);
9030 +       int result;
9031 +       struct block_device *header_bdev;
9032 +       struct old_sig_data *old_sig_data =
9033 +               &swap_header_page.pointer->old_sig_data;
9034 +
9035 +       header_bdev = toi_open_bdev(NULL, old_sig_data->device, 1);
9036 +       result = toi_bio_ops.bdev_page_io(READ, header_bdev,
9037 +                       old_sig_data->sector, virt_to_page(header_start));
9038 +
9039 +       if (result)
9040 +               goto out;
9041 +
9042 +       /*
9043 +        * TODO: Get the original contents of the first bytes of the swap
9044 +        * header page.
9045 +        */
9046 +       if (!old_sig_data->orig_sig_type)
9047 +               orig_sig = "SWAP-SPACE";
9048 +       else
9049 +               orig_sig = "SWAPSPACE2";
9050 +
9051 +       memcpy(swap_header_page.pointer->swh.magic.magic, orig_sig, 10);
9052 +       memcpy(swap_header_page.ptr, header_start,
9053 +                       sizeof(no_image_signature_contents));
9054 +
9055 +       result = toi_bio_ops.bdev_page_io(WRITE, resume_block_device,
9056 +               resume_firstblock, virt_to_page(swap_header_page.ptr));
9057 +
9058 +out:
9059 +       toi_close_bdev(header_bdev);
9060 +       have_old_image = 0;
9061 +       toi_free_page(38, (unsigned long) header_start);
9062 +       return result;
9063 +}
9064 +
9065 +/*
9066 + * toi_bio_restore_original_signature - restore the original signature
9067 + *
9068 + * At boot time (aborting pre atomic-restore), toi_orig_sig_page gets used.
9069 + * It will have the original signature page contents, stored in the image
9070 + * header. Post atomic-restore, we use :toi_cur_sig_page, which will contain
9071 + * the contents that were loaded when we started the cycle.
9072 + */
9073 +int toi_bio_restore_original_signature(void)
9074 +{
9075 +       char *use = toi_orig_sig_page ? toi_orig_sig_page : toi_cur_sig_page;
9076 +
9077 +       if (have_old_image)
9078 +               return remove_old_signature();
9079 +
9080 +       if (!use) {
9081 +               printk("toi_bio_restore_original_signature: No signature "
9082 +                               "page loaded.\n");
9083 +               return 0;
9084 +       }
9085 +
9086 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "Recording that no image exists.");
9087 +       have_image = 0;
9088 +       toi_sig_data->have_image = 0;
9089 +       return toi_bio_ops.bdev_page_io(WRITE, resume_block_device,
9090 +               resume_firstblock, virt_to_page(use));
9091 +}
9092 +
9093 +/*
9094 + * check_for_signature - See whether we have an image.
9095 + *
9096 + * Returns 0 if no image, 1 if there is one, -1 if indeterminate.
9097 + */
9098 +int toi_check_for_signature(void)
9099 +{
9100 +       union p_diskpage swap_header_page;
9101 +       int type;
9102 +       const char *normal_sigs[] = {"SWAP-SPACE", "SWAPSPACE2" };
9103 +       const char *swsusp_sigs[] = {"S1SUSP", "S2SUSP", "S1SUSPEND" };
9104 +       char *swap_header;
9105 +
9106 +       if (!toi_cur_sig_page) {
9107 +               int result = get_signature_page();
9108 +
9109 +               if (result)
9110 +                       return result;
9111 +       }
9112 +
9113 +       /*
9114 +        * Start by looking for the binary header.
9115 +        */
9116 +       if (!memcmp(tuxonice_signature, toi_cur_sig_page,
9117 +                               sizeof(tuxonice_signature))) {
9118 +               have_image = toi_sig_data->have_image;
9119 +               toi_message(TOI_IO, TOI_VERBOSE, 0, "Have binary signature. "
9120 +                               "Have image is %d.", have_image);
9121 +               if (have_image)
9122 +                       toi_message(TOI_IO, TOI_VERBOSE, 0, "header dev_t is "
9123 +                                       "%x. First block is %d.",
9124 +                                       toi_sig_data->header_dev_t,
9125 +                                       toi_sig_data->first_header_block);
9126 +               return toi_sig_data->have_image;
9127 +       }
9128 +
9129 +       /*
9130 +        * Failing that, try old file allocator headers.
9131 +        */
9132 +
9133 +       if (!memcmp(HaveImage, toi_cur_sig_page, strlen(HaveImage))) {
9134 +               have_image = 1;
9135 +               return 1;
9136 +       }
9137 +
9138 +       have_image = 0;
9139 +
9140 +       if (!memcmp(NoImage, toi_cur_sig_page, strlen(NoImage)))
9141 +               return 0;
9142 +
9143 +       /*
9144 +        * Nope? How about swap?
9145 +        */
9146 +       swap_header_page = (union p_diskpage) toi_cur_sig_page;
9147 +       swap_header = swap_header_page.pointer->swh.magic.magic;
9148 +
9149 +       /* Normal swapspace? */
9150 +       for (type = 0; type < 2; type++)
9151 +               if (!memcmp(normal_sigs[type], swap_header,
9152 +                                       strlen(normal_sigs[type])))
9153 +                       return 0;
9154 +
9155 +       /* Swsusp or uswsusp? */
9156 +       for (type = 0; type < 3; type++)
9157 +               if (!memcmp(swsusp_sigs[type], swap_header,
9158 +                                       strlen(swsusp_sigs[type])))
9159 +                       return 2;
9160 +
9161 +       /* Old TuxOnIce version? */
9162 +       if (!memcmp(tuxonice_signature, swap_header,
9163 +                               sizeof(tuxonice_signature) - 1)) {
9164 +               toi_message(TOI_IO, TOI_VERBOSE, 0, "Found old TuxOnIce "
9165 +                               "signature.");
9166 +               have_old_image = 1;
9167 +               return 3;
9168 +       }
9169 +
9170 +       return -1;
9171 +}
9172 +
9173 +/*
9174 + * Image_exists
9175 + *
9176 + * Returns -1 if don't know, otherwise 0 (no) or 1 (yes).
9177 + */
9178 +int toi_bio_image_exists(int quiet)
9179 +{
9180 +       int result;
9181 +       char *msg = NULL;
9182 +
9183 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_bio_image_exists.");
9184 +
9185 +       if (!resume_dev_t) {
9186 +               if (!quiet)
9187 +                       printk(KERN_INFO "Not even trying to read header "
9188 +                               "because resume_dev_t is not set.\n");
9189 +               return -1;
9190 +       }
9191 +
9192 +       if (open_resume_dev_t(0, quiet))
9193 +               return -1;
9194 +
9195 +       result = toi_check_for_signature();
9196 +
9197 +       clear_toi_state(TOI_RESUMED_BEFORE);
9198 +       if (toi_sig_data->resumed_before)
9199 +               set_toi_state(TOI_RESUMED_BEFORE);
9200 +
9201 +       if (quiet || result == -ENOMEM)
9202 +               return result;
9203 +
9204 +       if (result == -1)
9205 +               msg = "TuxOnIce: Unable to find a signature."
9206 +                               " Could you have moved a swap file?\n";
9207 +       else if (!result)
9208 +               msg = "TuxOnIce: No image found.\n";
9209 +       else if (result == 1)
9210 +               msg = "TuxOnIce: Image found.\n";
9211 +       else if (result == 2)
9212 +               msg = "TuxOnIce: uswsusp or swsusp image found.\n";
9213 +       else if (result == 3)
9214 +               msg = "TuxOnIce: Old implementation's signature found.\n";
9215 +
9216 +       printk(KERN_INFO "%s", msg);
9217 +
9218 +       return result;
9219 +}
9220 +
9221 +int toi_bio_scan_for_image(int quiet)
9222 +{
9223 +       struct block_device *bdev;
9224 +       char default_name[255] = "";
9225 +
9226 +       if (!quiet)
9227 +               printk(KERN_DEBUG "Scanning swap devices for TuxOnIce "
9228 +                               "signature...\n");
9229 +       for (bdev = next_bdev_of_type(NULL, "swap"); bdev;
9230 +                               bdev = next_bdev_of_type(bdev, "swap")) {
9231 +               int result;
9232 +               char name[255] = "";
9233 +               sprintf(name, "%u:%u", MAJOR(bdev->bd_dev),
9234 +                               MINOR(bdev->bd_dev));
9235 +               if (!quiet)
9236 +                       printk(KERN_DEBUG "- Trying %s.\n", name);
9237 +               resume_block_device = bdev;
9238 +               resume_dev_t = bdev->bd_dev;
9239 +
9240 +               result = toi_check_for_signature();
9241 +
9242 +               resume_block_device = NULL;
9243 +               resume_dev_t = MKDEV(0, 0);
9244 +
9245 +               if (!default_name[0])
9246 +                       strcpy(default_name, name);
9247 +
9248 +               if (result == 1) {
9249 +                       /* Got one! */
9250 +                       strcpy(resume_file, name);
9251 +                       next_bdev_of_type(bdev, NULL);
9252 +                       if (!quiet)
9253 +                               printk(KERN_DEBUG " ==> Image found on %s.\n",
9254 +                                               resume_file);
9255 +                       return 1;
9256 +               }
9257 +               forget_signature_page();
9258 +       }
9259 +
9260 +       if (!quiet)
9261 +               printk(KERN_DEBUG "TuxOnIce scan: No image found.\n");
9262 +       strcpy(resume_file, default_name);
9263 +       return 0;
9264 +}
9265 +
9266 +int toi_bio_get_header_version(void)
9267 +{
9268 +       return (memcmp(toi_sig_data->sig2, tuxonice_signature,
9269 +                               sizeof(tuxonice_signature))) ?
9270 +               0 : toi_sig_data->header_version;
9271 +
9272 +}
9273 diff --git a/kernel/power/tuxonice_builtin.c b/kernel/power/tuxonice_builtin.c
9274 new file mode 100644
9275 index 0000000..bc967d6
9276 --- /dev/null
9277 +++ b/kernel/power/tuxonice_builtin.c
9278 @@ -0,0 +1,372 @@
9279 +/*
9280 + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net)
9281 + *
9282 + * This file is released under the GPLv2.
9283 + */
9284 +#include <linux/resume-trace.h>
9285 +#include <linux/kernel.h>
9286 +#include <linux/swap.h>
9287 +#include <linux/syscalls.h>
9288 +#include <linux/bio.h>
9289 +#include <linux/root_dev.h>
9290 +#include <linux/freezer.h>
9291 +#include <linux/reboot.h>
9292 +#include <linux/writeback.h>
9293 +#include <linux/tty.h>
9294 +#include <linux/crypto.h>
9295 +#include <linux/cpu.h>
9296 +#include <linux/ctype.h>
9297 +#include "tuxonice_io.h"
9298 +#include "tuxonice.h"
9299 +#include "tuxonice_extent.h"
9300 +#include "tuxonice_netlink.h"
9301 +#include "tuxonice_prepare_image.h"
9302 +#include "tuxonice_ui.h"
9303 +#include "tuxonice_sysfs.h"
9304 +#include "tuxonice_pagedir.h"
9305 +#include "tuxonice_modules.h"
9306 +#include "tuxonice_builtin.h"
9307 +#include "tuxonice_power_off.h"
9308 +
9309 +/*
9310 + * Highmem related functions (x86 only).
9311 + */
9312 +
9313 +#ifdef CONFIG_HIGHMEM
9314 +
9315 +/**
9316 + * copyback_high: Restore highmem pages.
9317 + *
9318 + * Highmem data and pbe lists are/can be stored in highmem.
9319 + * The format is slightly different to the lowmem pbe lists
9320 + * used for the assembly code: the last pbe in each page is
9321 + * a struct page * instead of struct pbe *, pointing to the
9322 + * next page where pbes are stored (or NULL if happens to be
9323 + * the end of the list). Since we don't want to generate
9324 + * unnecessary deltas against swsusp code, we use a cast
9325 + * instead of a union.
9326 + **/
9327 +
9328 +static void copyback_high(void)
9329 +{
9330 +       struct page *pbe_page = (struct page *) restore_highmem_pblist;
9331 +       struct pbe *this_pbe, *first_pbe;
9332 +       unsigned long *origpage, *copypage;
9333 +       int pbe_index = 1;
9334 +
9335 +       if (!pbe_page)
9336 +               return;
9337 +
9338 +       this_pbe = (struct pbe *) kmap_atomic(pbe_page, KM_BOUNCE_READ);
9339 +       first_pbe = this_pbe;
9340 +
9341 +       while (this_pbe) {
9342 +               int loop = (PAGE_SIZE / sizeof(unsigned long)) - 1;
9343 +
9344 +               origpage = kmap_atomic((struct page *) this_pbe->orig_address,
9345 +                       KM_BIO_DST_IRQ);
9346 +               copypage = kmap_atomic((struct page *) this_pbe->address,
9347 +                       KM_BIO_SRC_IRQ);
9348 +
9349 +               while (loop >= 0) {
9350 +                       *(origpage + loop) = *(copypage + loop);
9351 +                       loop--;
9352 +               }
9353 +
9354 +               kunmap_atomic(origpage, KM_BIO_DST_IRQ);
9355 +               kunmap_atomic(copypage, KM_BIO_SRC_IRQ);
9356 +
9357 +               if (!this_pbe->next)
9358 +                       break;
9359 +
9360 +               if (pbe_index < PBES_PER_PAGE) {
9361 +                       this_pbe++;
9362 +                       pbe_index++;
9363 +               } else {
9364 +                       pbe_page = (struct page *) this_pbe->next;
9365 +                       kunmap_atomic(first_pbe, KM_BOUNCE_READ);
9366 +                       if (!pbe_page)
9367 +                               return;
9368 +                       this_pbe = (struct pbe *) kmap_atomic(pbe_page,
9369 +                                       KM_BOUNCE_READ);
9370 +                       first_pbe = this_pbe;
9371 +                       pbe_index = 1;
9372 +               }
9373 +       }
9374 +       kunmap_atomic(first_pbe, KM_BOUNCE_READ);
9375 +}
9376 +
9377 +#else /* CONFIG_HIGHMEM */
9378 +static void copyback_high(void) { }
9379 +#endif
9380 +
9381 +char toi_wait_for_keypress_dev_console(int timeout)
9382 +{
9383 +       int fd, this_timeout = 255;
9384 +       char key = '\0';
9385 +       struct termios t, t_backup;
9386 +
9387 +       /* We should be guaranteed /dev/console exists after populate_rootfs()
9388 +        * in init/main.c.
9389 +        */
9390 +       fd = sys_open("/dev/console", O_RDONLY, 0);
9391 +       if (fd < 0) {
9392 +               printk(KERN_INFO "Couldn't open /dev/console.\n");
9393 +               return key;
9394 +       }
9395 +
9396 +       if (sys_ioctl(fd, TCGETS, (long)&t) < 0)
9397 +               goto out_close;
9398 +
9399 +       memcpy(&t_backup, &t, sizeof(t));
9400 +
9401 +       t.c_lflag &= ~(ISIG|ICANON|ECHO);
9402 +       t.c_cc[VMIN] = 0;
9403 +
9404 +new_timeout:
9405 +       if (timeout > 0) {
9406 +               this_timeout = timeout < 26 ? timeout : 25;
9407 +               timeout -= this_timeout;
9408 +               this_timeout *= 10;
9409 +       }
9410 +
9411 +       t.c_cc[VTIME] = this_timeout;
9412 +
9413 +       if (sys_ioctl(fd, TCSETS, (long)&t) < 0)
9414 +               goto out_restore;
9415 +
9416 +       while (1) {
9417 +               if (sys_read(fd, &key, 1) <= 0) {
9418 +                       if (timeout)
9419 +                               goto new_timeout;
9420 +                       key = '\0';
9421 +                       break;
9422 +               }
9423 +               key = tolower(key);
9424 +               if (test_toi_state(TOI_SANITY_CHECK_PROMPT)) {
9425 +                       if (key == 'c') {
9426 +                               set_toi_state(TOI_CONTINUE_REQ);
9427 +                               break;
9428 +                       } else if (key == ' ')
9429 +                               break;
9430 +               } else
9431 +                       break;
9432 +       }
9433 +
9434 +out_restore:
9435 +       sys_ioctl(fd, TCSETS, (long)&t_backup);
9436 +out_close:
9437 +       sys_close(fd);
9438 +
9439 +       return key;
9440 +}
9441 +EXPORT_SYMBOL_GPL(toi_wait_for_keypress_dev_console);
9442 +
9443 +struct toi_boot_kernel_data toi_bkd __nosavedata
9444 +               __attribute__((aligned(PAGE_SIZE))) = {
9445 +       MY_BOOT_KERNEL_DATA_VERSION,
9446 +       0,
9447 +#ifdef CONFIG_TOI_REPLACE_SWSUSP
9448 +       (1 << TOI_REPLACE_SWSUSP) |
9449 +#endif
9450 +       (1 << TOI_NO_FLUSHER_THREAD) |
9451 +       (1 << TOI_PAGESET2_FULL) | (1 << TOI_LATE_CPU_HOTPLUG),
9452 +};
9453 +EXPORT_SYMBOL_GPL(toi_bkd);
9454 +
9455 +struct block_device *toi_open_by_devnum(dev_t dev)
9456 +{
9457 +       struct block_device *bdev = bdget(dev);
9458 +       int err = -ENOMEM;
9459 +       if (bdev)
9460 +               err = blkdev_get(bdev, FMODE_READ | FMODE_NDELAY);
9461 +       return err ? ERR_PTR(err) : bdev;
9462 +}
9463 +EXPORT_SYMBOL_GPL(toi_open_by_devnum);
9464 +
9465 +/**
9466 + * toi_close_bdev: Close a swap bdev.
9467 + *
9468 + * int: The swap entry number to close.
9469 + */
9470 +void toi_close_bdev(struct block_device *bdev)
9471 +{
9472 +       blkdev_put(bdev, FMODE_READ | FMODE_NDELAY);
9473 +}
9474 +EXPORT_SYMBOL_GPL(toi_close_bdev);
9475 +
9476 +int toi_wait = CONFIG_TOI_DEFAULT_WAIT;
9477 +EXPORT_SYMBOL_GPL(toi_wait);
9478 +
9479 +struct toi_core_fns *toi_core_fns;
9480 +EXPORT_SYMBOL_GPL(toi_core_fns);
9481 +
9482 +unsigned long toi_result;
9483 +EXPORT_SYMBOL_GPL(toi_result);
9484 +
9485 +struct pagedir pagedir1 = {1};
9486 +EXPORT_SYMBOL_GPL(pagedir1);
9487 +
9488 +unsigned long toi_get_nonconflicting_page(void)
9489 +{
9490 +       return toi_core_fns->get_nonconflicting_page();
9491 +}
9492 +
9493 +int toi_post_context_save(void)
9494 +{
9495 +       return toi_core_fns->post_context_save();
9496 +}
9497 +
9498 +int try_tuxonice_hibernate(void)
9499 +{
9500 +       if (!toi_core_fns)
9501 +               return -ENODEV;
9502 +
9503 +       return toi_core_fns->try_hibernate();
9504 +}
9505 +
9506 +static int num_resume_calls;
9507 +#ifdef CONFIG_TOI_IGNORE_LATE_INITCALL
9508 +static int ignore_late_initcall = 1;
9509 +#else
9510 +static int ignore_late_initcall;
9511 +#endif
9512 +
9513 +int toi_translate_err_default = TOI_CONTINUE_REQ;
9514 +EXPORT_SYMBOL_GPL(toi_translate_err_default);
9515 +
9516 +void try_tuxonice_resume(void)
9517 +{
9518 +       /* Don't let it wrap around eventually */
9519 +       if (num_resume_calls < 2)
9520 +               num_resume_calls++;
9521 +
9522 +       if (num_resume_calls == 1 && ignore_late_initcall) {
9523 +               printk(KERN_INFO "TuxOnIce: Ignoring late initcall, as requested.\n");
9524 +               return;
9525 +       }
9526 +
9527 +       if (toi_core_fns)
9528 +               toi_core_fns->try_resume();
9529 +       else
9530 +               printk(KERN_INFO "TuxOnIce core not loaded yet.\n");
9531 +}
9532 +
9533 +int toi_lowlevel_builtin(void)
9534 +{
9535 +       int error = 0;
9536 +
9537 +       save_processor_state();
9538 +       error = swsusp_arch_suspend();
9539 +       if (error)
9540 +               printk(KERN_ERR "Error %d hibernating\n", error);
9541 +
9542 +       /* Restore control flow appears here */
9543 +       if (!toi_in_hibernate) {
9544 +               copyback_high();
9545 +               set_toi_state(TOI_NOW_RESUMING);
9546 +       }
9547 +
9548 +       restore_processor_state();
9549 +
9550 +       return error;
9551 +}
9552 +EXPORT_SYMBOL_GPL(toi_lowlevel_builtin);
9553 +
9554 +unsigned long toi_compress_bytes_in;
9555 +EXPORT_SYMBOL_GPL(toi_compress_bytes_in);
9556 +
9557 +unsigned long toi_compress_bytes_out;
9558 +EXPORT_SYMBOL_GPL(toi_compress_bytes_out);
9559 +
9560 +unsigned long toi_state = ((1 << TOI_BOOT_TIME) |
9561 +               (1 << TOI_IGNORE_LOGLEVEL) |
9562 +               (1 << TOI_IO_STOPPED));
9563 +EXPORT_SYMBOL_GPL(toi_state);
9564 +
9565 +/* The number of hibernates we have started (some may have been cancelled) */
9566 +unsigned int nr_hibernates;
9567 +EXPORT_SYMBOL_GPL(nr_hibernates);
9568 +
9569 +int toi_running;
9570 +EXPORT_SYMBOL_GPL(toi_running);
9571 +
9572 +__nosavedata int toi_in_hibernate;
9573 +EXPORT_SYMBOL_GPL(toi_in_hibernate);
9574 +
9575 +__nosavedata struct pbe *restore_highmem_pblist;
9576 +EXPORT_SYMBOL_GPL(restore_highmem_pblist);
9577 +
9578 +void toi_read_lock_tasklist(void)
9579 +{
9580 +       read_lock(&tasklist_lock);
9581 +}
9582 +EXPORT_SYMBOL_GPL(toi_read_lock_tasklist);
9583 +
9584 +void toi_read_unlock_tasklist(void)
9585 +{
9586 +       read_unlock(&tasklist_lock);
9587 +}
9588 +EXPORT_SYMBOL_GPL(toi_read_unlock_tasklist);
9589 +
9590 +static int __init toi_wait_setup(char *str)
9591 +{
9592 +       int value;
9593 +
9594 +       if (sscanf(str, "=%d", &value)) {
9595 +               if (value < -1 || value > 255)
9596 +                       printk(KERN_INFO "TuxOnIce_wait outside range -1 to "
9597 +                                       "255.\n");
9598 +               else
9599 +                       toi_wait = value;
9600 +       }
9601 +
9602 +       return 1;
9603 +}
9604 +
9605 +__setup("toi_wait", toi_wait_setup);
9606 +
9607 +static int __init toi_translate_retry_setup(char *str)
9608 +{
9609 +       toi_translate_err_default = 0;
9610 +       return 1;
9611 +}
9612 +
9613 +__setup("toi_translate_retry", toi_translate_retry_setup);
9614 +
9615 +static int __init toi_debug_setup(char *str)
9616 +{
9617 +       toi_bkd.toi_action |= (1 << TOI_LOGALL) | (1 << TOI_PAUSE);
9618 +       toi_bkd.toi_debug_state = 255;
9619 +       toi_bkd.toi_default_console_level = 7;
9620 +       return 1;
9621 +}
9622 +
9623 +__setup("toi_debug_setup", toi_debug_setup);
9624 +
9625 +static int __init toi_ignore_late_initcall_setup(char *str)
9626 +{
9627 +       int value;
9628 +
9629 +       if (sscanf(str, "=%d", &value))
9630 +               ignore_late_initcall = value;
9631 +
9632 +       return 1;
9633 +}
9634 +
9635 +__setup("toi_initramfs_resume_only", toi_ignore_late_initcall_setup);
9636 +
9637 +int toi_force_no_multithreaded;
9638 +EXPORT_SYMBOL_GPL(toi_force_no_multithreaded);
9639 +
9640 +static int __init toi_force_no_multithreaded_setup(char *str)
9641 +{
9642 +       int value;
9643 +
9644 +       if (sscanf(str, "=%d", &value))
9645 +               toi_force_no_multithreaded = value;
9646 +
9647 +       return 1;
9648 +}
9649 +
9650 +__setup("toi_no_multithreaded", toi_force_no_multithreaded_setup);
9651 diff --git a/kernel/power/tuxonice_builtin.h b/kernel/power/tuxonice_builtin.h
9652 new file mode 100644
9653 index 0000000..ab67d31
9654 --- /dev/null
9655 +++ b/kernel/power/tuxonice_builtin.h
9656 @@ -0,0 +1,32 @@
9657 +/*
9658 + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net)
9659 + *
9660 + * This file is released under the GPLv2.
9661 + */
9662 +#include <asm/setup.h>
9663 +
9664 +extern struct toi_core_fns *toi_core_fns;
9665 +extern unsigned long toi_compress_bytes_in, toi_compress_bytes_out;
9666 +extern unsigned int nr_hibernates;
9667 +extern int toi_in_hibernate;
9668 +
9669 +extern __nosavedata struct pbe *restore_highmem_pblist;
9670 +
9671 +int toi_lowlevel_builtin(void);
9672 +
9673 +#ifdef CONFIG_HIGHMEM
9674 +extern __nosavedata struct zone_data *toi_nosave_zone_list;
9675 +extern __nosavedata unsigned long toi_nosave_max_pfn;
9676 +#endif
9677 +
9678 +extern unsigned long toi_get_nonconflicting_page(void);
9679 +extern int toi_post_context_save(void);
9680 +
9681 +extern char toi_wait_for_keypress_dev_console(int timeout);
9682 +extern struct block_device *toi_open_by_devnum(dev_t dev);
9683 +extern void toi_close_bdev(struct block_device *bdev);
9684 +extern int toi_wait;
9685 +extern int toi_translate_err_default;
9686 +extern int toi_force_no_multithreaded;
9687 +extern void toi_read_lock_tasklist(void);
9688 +extern void toi_read_unlock_tasklist(void);
9689 diff --git a/kernel/power/tuxonice_checksum.c b/kernel/power/tuxonice_checksum.c
9690 new file mode 100644
9691 index 0000000..3ec2c76
9692 --- /dev/null
9693 +++ b/kernel/power/tuxonice_checksum.c
9694 @@ -0,0 +1,377 @@
9695 +/*
9696 + * kernel/power/tuxonice_checksum.c
9697 + *
9698 + * Copyright (C) 2006-2010 Nigel Cunningham (nigel at tuxonice net)
9699 + *
9700 + * This file is released under the GPLv2.
9701 + *
9702 + * This file contains data checksum routines for TuxOnIce,
9703 + * using cryptoapi. They are used to locate any modifications
9704 + * made to pageset 2 while we're saving it.
9705 + */
9706 +
9707 +#include <linux/suspend.h>
9708 +#include <linux/highmem.h>
9709 +#include <linux/vmalloc.h>
9710 +#include <linux/crypto.h>
9711 +#include <linux/scatterlist.h>
9712 +
9713 +#include "tuxonice.h"
9714 +#include "tuxonice_modules.h"
9715 +#include "tuxonice_sysfs.h"
9716 +#include "tuxonice_io.h"
9717 +#include "tuxonice_pageflags.h"
9718 +#include "tuxonice_checksum.h"
9719 +#include "tuxonice_pagedir.h"
9720 +#include "tuxonice_alloc.h"
9721 +
9722 +static struct toi_module_ops toi_checksum_ops;
9723 +
9724 +/* Constant at the mo, but I might allow tuning later */
9725 +static char toi_checksum_name[32] = "md4";
9726 +/* Bytes per checksum */
9727 +#define CHECKSUM_SIZE (16)
9728 +
9729 +#define CHECKSUMS_PER_PAGE ((PAGE_SIZE - sizeof(void *)) / CHECKSUM_SIZE)
9730 +
9731 +struct cpu_context {
9732 +       struct crypto_hash *transform;
9733 +       struct hash_desc desc;
9734 +       struct scatterlist sg[2];
9735 +       char *buf;
9736 +};
9737 +
9738 +static DEFINE_PER_CPU(struct cpu_context, contexts);
9739 +static int pages_allocated;
9740 +static unsigned long page_list;
9741 +
9742 +static int toi_num_resaved;
9743 +
9744 +static unsigned long this_checksum, next_page;
9745 +static int checksum_index;
9746 +
9747 +static inline int checksum_pages_needed(void)
9748 +{
9749 +       return DIV_ROUND_UP(pagedir2.size, CHECKSUMS_PER_PAGE);
9750 +}
9751 +
9752 +/* ---- Local buffer management ---- */
9753 +
9754 +/*
9755 + * toi_checksum_cleanup
9756 + *
9757 + * Frees memory allocated for our labours.
9758 + */
9759 +static void toi_checksum_cleanup(int ending_cycle)
9760 +{
9761 +       int cpu;
9762 +
9763 +       if (ending_cycle) {
9764 +               for_each_online_cpu(cpu) {
9765 +                       struct cpu_context *this = &per_cpu(contexts, cpu);
9766 +                       if (this->transform) {
9767 +                               crypto_free_hash(this->transform);
9768 +                               this->transform = NULL;
9769 +                               this->desc.tfm = NULL;
9770 +                       }
9771 +
9772 +                       if (this->buf) {
9773 +                               toi_free_page(27, (unsigned long) this->buf);
9774 +                               this->buf = NULL;
9775 +                       }
9776 +               }
9777 +       }
9778 +}
9779 +
9780 +/*
9781 + * toi_crypto_initialise
9782 + *
9783 + * Prepare to do some work by allocating buffers and transforms.
9784 + * Returns: Int: Zero. Even if we can't set up checksum, we still
9785 + * seek to hibernate.
9786 + */
9787 +static int toi_checksum_initialise(int starting_cycle)
9788 +{
9789 +       int cpu;
9790 +
9791 +       if (!(starting_cycle & SYSFS_HIBERNATE) || !toi_checksum_ops.enabled)
9792 +               return 0;
9793 +
9794 +       if (!*toi_checksum_name) {
9795 +               printk(KERN_INFO "TuxOnIce: No checksum algorithm name set.\n");
9796 +               return 1;
9797 +       }
9798 +
9799 +       for_each_online_cpu(cpu) {
9800 +               struct cpu_context *this = &per_cpu(contexts, cpu);
9801 +               struct page *page;
9802 +
9803 +               this->transform = crypto_alloc_hash(toi_checksum_name, 0, 0);
9804 +               if (IS_ERR(this->transform)) {
9805 +                       printk(KERN_INFO "TuxOnIce: Failed to initialise the "
9806 +                               "%s checksum algorithm: %ld.\n",
9807 +                               toi_checksum_name, (long) this->transform);
9808 +                       this->transform = NULL;
9809 +                       return 1;
9810 +               }
9811 +
9812 +               this->desc.tfm = this->transform;
9813 +               this->desc.flags = 0;
9814 +
9815 +               page = toi_alloc_page(27, GFP_KERNEL);
9816 +               if (!page)
9817 +                       return 1;
9818 +               this->buf = page_address(page);
9819 +               sg_init_one(&this->sg[0], this->buf, PAGE_SIZE);
9820 +       }
9821 +       return 0;
9822 +}
9823 +
9824 +/*
9825 + * toi_checksum_print_debug_stats
9826 + * @buffer: Pointer to a buffer into which the debug info will be printed.
9827 + * @size: Size of the buffer.
9828 + *
9829 + * Print information to be recorded for debugging purposes into a buffer.
9830 + * Returns: Number of characters written to the buffer.
9831 + */
9832 +
9833 +static int toi_checksum_print_debug_stats(char *buffer, int size)
9834 +{
9835 +       int len;
9836 +
9837 +       if (!toi_checksum_ops.enabled)
9838 +               return scnprintf(buffer, size,
9839 +                       "- Checksumming disabled.\n");
9840 +
9841 +       len = scnprintf(buffer, size, "- Checksum method is '%s'.\n",
9842 +                       toi_checksum_name);
9843 +       len += scnprintf(buffer + len, size - len,
9844 +               "  %d pages resaved in atomic copy.\n", toi_num_resaved);
9845 +       return len;
9846 +}
9847 +
9848 +static int toi_checksum_memory_needed(void)
9849 +{
9850 +       return toi_checksum_ops.enabled ?
9851 +               checksum_pages_needed() << PAGE_SHIFT : 0;
9852 +}
9853 +
9854 +static int toi_checksum_storage_needed(void)
9855 +{
9856 +       if (toi_checksum_ops.enabled)
9857 +               return strlen(toi_checksum_name) + sizeof(int) + 1;
9858 +       else
9859 +               return 0;
9860 +}
9861 +
9862 +/*
9863 + * toi_checksum_save_config_info
9864 + * @buffer: Pointer to a buffer of size PAGE_SIZE.
9865 + *
9866 + * Save informaton needed when reloading the image at resume time.
9867 + * Returns: Number of bytes used for saving our data.
9868 + */
9869 +static int toi_checksum_save_config_info(char *buffer)
9870 +{
9871 +       int namelen = strlen(toi_checksum_name) + 1;
9872 +       int total_len;
9873 +
9874 +       *((unsigned int *) buffer) = namelen;
9875 +       strncpy(buffer + sizeof(unsigned int), toi_checksum_name, namelen);
9876 +       total_len = sizeof(unsigned int) + namelen;
9877 +       return total_len;
9878 +}
9879 +
9880 +/* toi_checksum_load_config_info
9881 + * @buffer: Pointer to the start of the data.
9882 + * @size: Number of bytes that were saved.
9883 + *
9884 + * Description:        Reload information needed for dechecksuming the image at
9885 + * resume time.
9886 + */
9887 +static void toi_checksum_load_config_info(char *buffer, int size)
9888 +{
9889 +       int namelen;
9890 +
9891 +       namelen = *((unsigned int *) (buffer));
9892 +       strncpy(toi_checksum_name, buffer + sizeof(unsigned int),
9893 +                       namelen);
9894 +       return;
9895 +}
9896 +
9897 +/*
9898 + * Free Checksum Memory
9899 + */
9900 +
9901 +void free_checksum_pages(void)
9902 +{
9903 +       while (pages_allocated) {
9904 +               unsigned long next = *((unsigned long *) page_list);
9905 +               ClearPageNosave(virt_to_page(page_list));
9906 +               toi_free_page(15, (unsigned long) page_list);
9907 +               page_list = next;
9908 +               pages_allocated--;
9909 +       }
9910 +}
9911 +
9912 +/*
9913 + * Allocate Checksum Memory
9914 + */
9915 +
9916 +int allocate_checksum_pages(void)
9917 +{
9918 +       int pages_needed = checksum_pages_needed();
9919 +
9920 +       if (!toi_checksum_ops.enabled)
9921 +               return 0;
9922 +
9923 +       while (pages_allocated < pages_needed) {
9924 +               unsigned long *new_page =
9925 +                 (unsigned long *) toi_get_zeroed_page(15, TOI_ATOMIC_GFP);
9926 +               if (!new_page) {
9927 +                       printk(KERN_ERR "Unable to allocate checksum pages.\n");
9928 +                       return -ENOMEM;
9929 +               }
9930 +               SetPageNosave(virt_to_page(new_page));
9931 +               (*new_page) = page_list;
9932 +               page_list = (unsigned long) new_page;
9933 +               pages_allocated++;
9934 +       }
9935 +
9936 +       next_page = (unsigned long) page_list;
9937 +       checksum_index = 0;
9938 +
9939 +       return 0;
9940 +}
9941 +
9942 +char *tuxonice_get_next_checksum(void)
9943 +{
9944 +       if (!toi_checksum_ops.enabled)
9945 +               return NULL;
9946 +
9947 +       if (checksum_index % CHECKSUMS_PER_PAGE)
9948 +               this_checksum += CHECKSUM_SIZE;
9949 +       else {
9950 +               this_checksum = next_page + sizeof(void *);
9951 +               next_page = *((unsigned long *) next_page);
9952 +       }
9953 +
9954 +       checksum_index++;
9955 +       return (char *) this_checksum;
9956 +}
9957 +
9958 +int tuxonice_calc_checksum(struct page *page, char *checksum_locn)
9959 +{
9960 +       char *pa;
9961 +       int result, cpu = smp_processor_id();
9962 +       struct cpu_context *ctx = &per_cpu(contexts, cpu);
9963 +
9964 +       if (!toi_checksum_ops.enabled)
9965 +               return 0;
9966 +
9967 +       pa = kmap(page);
9968 +       memcpy(ctx->buf, pa, PAGE_SIZE);
9969 +       kunmap(page);
9970 +       result = crypto_hash_digest(&ctx->desc, ctx->sg, PAGE_SIZE,
9971 +                                               checksum_locn);
9972 +       if (result)
9973 +               printk(KERN_ERR "TuxOnIce checksumming: crypto_hash_digest "
9974 +                               "returned %d.\n", result);
9975 +       return result;
9976 +}
9977 +/*
9978 + * Calculate checksums
9979 + */
9980 +
9981 +void check_checksums(void)
9982 +{
9983 +       int pfn, index = 0, cpu = smp_processor_id();
9984 +       char current_checksum[CHECKSUM_SIZE];
9985 +       struct cpu_context *ctx = &per_cpu(contexts, cpu);
9986 +
9987 +       if (!toi_checksum_ops.enabled)
9988 +               return;
9989 +
9990 +       next_page = (unsigned long) page_list;
9991 +
9992 +       toi_num_resaved = 0;
9993 +       this_checksum = 0;
9994 +
9995 +       memory_bm_position_reset(pageset2_map);
9996 +       for (pfn = memory_bm_next_pfn(pageset2_map); pfn != BM_END_OF_MAP;
9997 +                       pfn = memory_bm_next_pfn(pageset2_map)) {
9998 +               int ret;
9999 +               char *pa;
10000 +               struct page *page = pfn_to_page(pfn);
10001 +
10002 +               if (index % CHECKSUMS_PER_PAGE) {
10003 +                       this_checksum += CHECKSUM_SIZE;
10004 +               } else {
10005 +                       this_checksum = next_page + sizeof(void *);
10006 +                       next_page = *((unsigned long *) next_page);
10007 +               }
10008 +
10009 +               /* Done when IRQs disabled so must be atomic */
10010 +               pa = kmap_atomic(page, KM_USER1);
10011 +               memcpy(ctx->buf, pa, PAGE_SIZE);
10012 +               kunmap_atomic(pa, KM_USER1);
10013 +               ret = crypto_hash_digest(&ctx->desc, ctx->sg, PAGE_SIZE,
10014 +                                                       current_checksum);
10015 +
10016 +               if (ret) {
10017 +                       printk(KERN_INFO "Digest failed. Returned %d.\n", ret);
10018 +                       return;
10019 +               }
10020 +
10021 +               if (memcmp(current_checksum, (char *) this_checksum,
10022 +                                                       CHECKSUM_SIZE)) {
10023 +                       SetPageResave(pfn_to_page(pfn));
10024 +                       toi_num_resaved++;
10025 +                       if (test_action_state(TOI_ABORT_ON_RESAVE_NEEDED))
10026 +                               set_abort_result(TOI_RESAVE_NEEDED);
10027 +               }
10028 +
10029 +               index++;
10030 +       }
10031 +}
10032 +
10033 +static struct toi_sysfs_data sysfs_params[] = {
10034 +       SYSFS_INT("enabled", SYSFS_RW, &toi_checksum_ops.enabled, 0, 1, 0,
10035 +                       NULL),
10036 +       SYSFS_BIT("abort_if_resave_needed", SYSFS_RW, &toi_bkd.toi_action,
10037 +                       TOI_ABORT_ON_RESAVE_NEEDED, 0)
10038 +};
10039 +
10040 +/*
10041 + * Ops structure.
10042 + */
10043 +static struct toi_module_ops toi_checksum_ops = {
10044 +       .type                   = MISC_MODULE,
10045 +       .name                   = "checksumming",
10046 +       .directory              = "checksum",
10047 +       .module                 = THIS_MODULE,
10048 +       .initialise             = toi_checksum_initialise,
10049 +       .cleanup                = toi_checksum_cleanup,
10050 +       .print_debug_info       = toi_checksum_print_debug_stats,
10051 +       .save_config_info       = toi_checksum_save_config_info,
10052 +       .load_config_info       = toi_checksum_load_config_info,
10053 +       .memory_needed          = toi_checksum_memory_needed,
10054 +       .storage_needed         = toi_checksum_storage_needed,
10055 +
10056 +       .sysfs_data             = sysfs_params,
10057 +       .num_sysfs_entries      = sizeof(sysfs_params) /
10058 +               sizeof(struct toi_sysfs_data),
10059 +};
10060 +
10061 +/* ---- Registration ---- */
10062 +int toi_checksum_init(void)
10063 +{
10064 +       int result = toi_register_module(&toi_checksum_ops);
10065 +       return result;
10066 +}
10067 +
10068 +void toi_checksum_exit(void)
10069 +{
10070 +       toi_unregister_module(&toi_checksum_ops);
10071 +}
10072 diff --git a/kernel/power/tuxonice_checksum.h b/kernel/power/tuxonice_checksum.h
10073 new file mode 100644
10074 index 0000000..0f2812e
10075 --- /dev/null
10076 +++ b/kernel/power/tuxonice_checksum.h
10077 @@ -0,0 +1,31 @@
10078 +/*
10079 + * kernel/power/tuxonice_checksum.h
10080 + *
10081 + * Copyright (C) 2006-2010 Nigel Cunningham (nigel at tuxonice net)
10082 + *
10083 + * This file is released under the GPLv2.
10084 + *
10085 + * This file contains data checksum routines for TuxOnIce,
10086 + * using cryptoapi. They are used to locate any modifications
10087 + * made to pageset 2 while we're saving it.
10088 + */
10089 +
10090 +#if defined(CONFIG_TOI_CHECKSUM)
10091 +extern int toi_checksum_init(void);
10092 +extern void toi_checksum_exit(void);
10093 +void check_checksums(void);
10094 +int allocate_checksum_pages(void);
10095 +void free_checksum_pages(void);
10096 +char *tuxonice_get_next_checksum(void);
10097 +int tuxonice_calc_checksum(struct page *page, char *checksum_locn);
10098 +#else
10099 +static inline int toi_checksum_init(void) { return 0; }
10100 +static inline void toi_checksum_exit(void) { }
10101 +static inline void check_checksums(void) { };
10102 +static inline int allocate_checksum_pages(void) { return 0; };
10103 +static inline void free_checksum_pages(void) { };
10104 +static inline char *tuxonice_get_next_checksum(void) { return NULL; };
10105 +static inline int tuxonice_calc_checksum(struct page *page, char *checksum_locn)
10106 +       { return 0; }
10107 +#endif
10108 +
10109 diff --git a/kernel/power/tuxonice_cluster.c b/kernel/power/tuxonice_cluster.c
10110 new file mode 100644
10111 index 0000000..0e5a262
10112 --- /dev/null
10113 +++ b/kernel/power/tuxonice_cluster.c
10114 @@ -0,0 +1,1069 @@
10115 +/*
10116 + * kernel/power/tuxonice_cluster.c
10117 + *
10118 + * Copyright (C) 2006-2010 Nigel Cunningham (nigel at tuxonice net)
10119 + *
10120 + * This file is released under the GPLv2.
10121 + *
10122 + * This file contains routines for cluster hibernation support.
10123 + *
10124 + * Based on ip autoconfiguration code in net/ipv4/ipconfig.c.
10125 + *
10126 + * How does it work?
10127 + *
10128 + * There is no 'master' node that tells everyone else what to do. All nodes
10129 + * send messages to the broadcast address/port, maintain a list of peers
10130 + * and figure out when to progress to the next step in hibernating or resuming.
10131 + * This makes us more fault tolerant when it comes to nodes coming and going
10132 + * (which may be more of an issue if we're hibernating when power supplies
10133 + * are being unreliable).
10134 + *
10135 + * At boot time, we start a ktuxonice thread that handles communication with
10136 + * other nodes. This node maintains a state machine that controls our progress
10137 + * through hibernating and resuming, keeping us in step with other nodes. Nodes
10138 + * are identified by their hw address.
10139 + *
10140 + * On startup, the node sends CLUSTER_PING on the configured interface's
10141 + * broadcast address, port $toi_cluster_port (see below) and begins to listen
10142 + * for other broadcast messages. CLUSTER_PING messages are repeated at
10143 + * intervals of 5 minutes, with a random offset to spread traffic out.
10144 + *
10145 + * A hibernation cycle is initiated from any node via
10146 + *
10147 + * echo > /sys/power/tuxonice/do_hibernate
10148 + *
10149 + * and (possibily) the hibernate script. At each step of the process, the node
10150 + * completes its work, and waits for all other nodes to signal completion of
10151 + * their work (or timeout) before progressing to the next step.
10152 + *
10153 + * Request/state  Action before reply  Possible reply  Next state
10154 + * HIBERNATE     capable, pre-script   HIBERNATE|ACK   NODE_PREP
10155 + *                                     HIBERNATE|NACK  INIT_0
10156 + *
10157 + * PREP                  prepare_image         PREP|ACK        IMAGE_WRITE
10158 + *                                     PREP|NACK       INIT_0
10159 + *                                     ABORT           RUNNING
10160 + *
10161 + * IO            write image           IO|ACK          power off
10162 + *                                     ABORT           POST_RESUME
10163 + *
10164 + * (Boot time)   check for image       IMAGE|ACK       RESUME_PREP
10165 + *                                     (Note 1)
10166 + *                                     IMAGE|NACK      (Note 2)
10167 + *
10168 + * PREP                  prepare read image    PREP|ACK        IMAGE_READ
10169 + *                                     PREP|NACK       (As NACK_IMAGE)
10170 + *
10171 + * IO            read image            IO|ACK          POST_RESUME
10172 + *
10173 + * POST_RESUME   thaw, post-script                     RUNNING
10174 + *
10175 + * INIT_0        init 0
10176 + *
10177 + * Other messages:
10178 + *
10179 + * - PING: Request for all other live nodes to send a PONG. Used at startup to
10180 + *   announce presence, when a node is suspected dead and periodically, in case
10181 + *   segments of the network are [un]plugged.
10182 + *
10183 + * - PONG: Response to a PING.
10184 + *
10185 + * - ABORT: Request to cancel writing an image.
10186 + *
10187 + * - BYE: Notification that this node is shutting down.
10188 + *
10189 + * Note 1: Repeated at 3s intervals until we continue to boot/resume, so that
10190 + * nodes which are slower to start up can get state synchronised. If a node
10191 + * starting up sees other nodes sending RESUME_PREP or IMAGE_READ, it may send
10192 + * ACK_IMAGE and they will wait for it to catch up. If it sees ACK_READ, it
10193 + * must invalidate its image (if any) and boot normally.
10194 + *
10195 + * Note 2: May occur when one node lost power or powered off while others
10196 + * hibernated. This node waits for others to complete resuming (ACK_READ)
10197 + * before completing its boot, so that it appears as a fail node restarting.
10198 + *
10199 + * If any node has an image, then it also has a list of nodes that hibernated
10200 + * in synchronisation with it. The node will wait for other nodes to appear
10201 + * or timeout before beginning its restoration.
10202 + *
10203 + * If a node has no image, it needs to wait, in case other nodes which do have
10204 + * an image are going to resume, but are taking longer to announce their
10205 + * presence. For this reason, the user can specify a timeout value and a number
10206 + * of nodes detected before we just continue. (We might want to assume in a
10207 + * cluster of, say, 15 nodes, if 8 others have booted without finding an image,
10208 + * the remaining nodes will too. This might help in situations where some nodes
10209 + * are much slower to boot, or more subject to hardware failures or such like).
10210 + */
10211 +
10212 +#include <linux/suspend.h>
10213 +#include <linux/module.h>
10214 +#include <linux/moduleparam.h>
10215 +#include <linux/if.h>
10216 +#include <linux/rtnetlink.h>
10217 +#include <linux/ip.h>
10218 +#include <linux/udp.h>
10219 +#include <linux/in.h>
10220 +#include <linux/if_arp.h>
10221 +#include <linux/kthread.h>
10222 +#include <linux/wait.h>
10223 +#include <linux/netdevice.h>
10224 +#include <net/ip.h>
10225 +
10226 +#include "tuxonice.h"
10227 +#include "tuxonice_modules.h"
10228 +#include "tuxonice_sysfs.h"
10229 +#include "tuxonice_alloc.h"
10230 +#include "tuxonice_io.h"
10231 +
10232 +#if 1
10233 +#define PRINTK(a, b...) do { printk(a, ##b); } while (0)
10234 +#else
10235 +#define PRINTK(a, b...) do { } while (0)
10236 +#endif
10237 +
10238 +static int loopback_mode;
10239 +static int num_local_nodes = 1;
10240 +#define MAX_LOCAL_NODES 8
10241 +#define SADDR (loopback_mode ? b->sid : h->saddr)
10242 +
10243 +#define MYNAME "TuxOnIce Clustering"
10244 +
10245 +enum cluster_message {
10246 +       MSG_ACK = 1,
10247 +       MSG_NACK = 2,
10248 +       MSG_PING = 4,
10249 +       MSG_ABORT = 8,
10250 +       MSG_BYE = 16,
10251 +       MSG_HIBERNATE = 32,
10252 +       MSG_IMAGE = 64,
10253 +       MSG_IO = 128,
10254 +       MSG_RUNNING = 256
10255 +};
10256 +
10257 +static char *str_message(int message)
10258 +{
10259 +       switch (message) {
10260 +       case 4:
10261 +               return "Ping";
10262 +       case 8:
10263 +               return "Abort";
10264 +       case 9:
10265 +               return "Abort acked";
10266 +       case 10:
10267 +               return "Abort nacked";
10268 +       case 16:
10269 +               return "Bye";
10270 +       case 17:
10271 +               return "Bye acked";
10272 +       case 18:
10273 +               return "Bye nacked";
10274 +       case 32:
10275 +               return "Hibernate request";
10276 +       case 33:
10277 +               return "Hibernate ack";
10278 +       case 34:
10279 +               return "Hibernate nack";
10280 +       case 64:
10281 +               return "Image exists?";
10282 +       case 65:
10283 +               return "Image does exist";
10284 +       case 66:
10285 +               return "No image here";
10286 +       case 128:
10287 +               return "I/O";
10288 +       case 129:
10289 +               return "I/O okay";
10290 +       case 130:
10291 +               return "I/O failed";
10292 +       case 256:
10293 +               return "Running";
10294 +       default:
10295 +               printk(KERN_ERR "Unrecognised message %d.\n", message);
10296 +               return "Unrecognised message (see dmesg)";
10297 +       }
10298 +}
10299 +
10300 +#define MSG_ACK_MASK (MSG_ACK | MSG_NACK)
10301 +#define MSG_STATE_MASK (~MSG_ACK_MASK)
10302 +
10303 +struct node_info {
10304 +       struct list_head member_list;
10305 +       wait_queue_head_t member_events;
10306 +       spinlock_t member_list_lock;
10307 +       spinlock_t receive_lock;
10308 +       int peer_count, ignored_peer_count;
10309 +       struct toi_sysfs_data sysfs_data;
10310 +       enum cluster_message current_message;
10311 +};
10312 +
10313 +struct node_info node_array[MAX_LOCAL_NODES];
10314 +
10315 +struct cluster_member {
10316 +       __be32 addr;
10317 +       enum cluster_message message;
10318 +       struct list_head list;
10319 +       int ignore;
10320 +};
10321 +
10322 +#define toi_cluster_port_send 3501
10323 +#define toi_cluster_port_recv 3502
10324 +
10325 +static struct net_device *net_dev;
10326 +static struct toi_module_ops toi_cluster_ops;
10327 +
10328 +static int toi_recv(struct sk_buff *skb, struct net_device *dev,
10329 +               struct packet_type *pt, struct net_device *orig_dev);
10330 +
10331 +static struct packet_type toi_cluster_packet_type = {
10332 +       .type = __constant_htons(ETH_P_IP),
10333 +       .func = toi_recv,
10334 +};
10335 +
10336 +struct toi_pkt {               /* BOOTP packet format */
10337 +       struct iphdr iph;       /* IP header */
10338 +       struct udphdr udph;     /* UDP header */
10339 +       u8 htype;               /* HW address type */
10340 +       u8 hlen;                /* HW address length */
10341 +       __be32 xid;             /* Transaction ID */
10342 +       __be16 secs;            /* Seconds since we started */
10343 +       __be16 flags;           /* Just what it says */
10344 +       u8 hw_addr[16];         /* Sender's HW address */
10345 +       u16 message;            /* Message */
10346 +       unsigned long sid;      /* Source ID for loopback testing */
10347 +};
10348 +
10349 +static char toi_cluster_iface[IFNAMSIZ] = CONFIG_TOI_DEFAULT_CLUSTER_INTERFACE;
10350 +
10351 +static int added_pack;
10352 +
10353 +static int others_have_image;
10354 +
10355 +/* Key used to allow multiple clusters on the same lan */
10356 +static char toi_cluster_key[32] = CONFIG_TOI_DEFAULT_CLUSTER_KEY;
10357 +static char pre_hibernate_script[255] =
10358 +       CONFIG_TOI_DEFAULT_CLUSTER_PRE_HIBERNATE;
10359 +static char post_hibernate_script[255] =
10360 +       CONFIG_TOI_DEFAULT_CLUSTER_POST_HIBERNATE;
10361 +
10362 +/*                     List of cluster members                 */
10363 +static unsigned long continue_delay = 5 * HZ;
10364 +static unsigned long cluster_message_timeout = 3 * HZ;
10365 +
10366 +/*             === Membership list ===         */
10367 +
10368 +static void print_member_info(int index)
10369 +{
10370 +       struct cluster_member *this;
10371 +
10372 +       printk(KERN_INFO "==> Dumping node %d.\n", index);
10373 +
10374 +       list_for_each_entry(this, &node_array[index].member_list, list)
10375 +               printk(KERN_INFO "%d.%d.%d.%d last message %s. %s\n",
10376 +                               NIPQUAD(this->addr),
10377 +                               str_message(this->message),
10378 +                               this->ignore ? "(Ignored)" : "");
10379 +       printk(KERN_INFO "== Done ==\n");
10380 +}
10381 +
10382 +static struct cluster_member *__find_member(int index, __be32 addr)
10383 +{
10384 +       struct cluster_member *this;
10385 +
10386 +       list_for_each_entry(this, &node_array[index].member_list, list) {
10387 +               if (this->addr != addr)
10388 +                       continue;
10389 +
10390 +               return this;
10391 +       }
10392 +
10393 +       return NULL;
10394 +}
10395 +
10396 +static void set_ignore(int index, __be32 addr, struct cluster_member *this)
10397 +{
10398 +       if (this->ignore) {
10399 +               PRINTK("Node %d already ignoring %d.%d.%d.%d.\n",
10400 +                               index, NIPQUAD(addr));
10401 +               return;
10402 +       }
10403 +
10404 +       PRINTK("Node %d sees node %d.%d.%d.%d now being ignored.\n",
10405 +                               index, NIPQUAD(addr));
10406 +       this->ignore = 1;
10407 +       node_array[index].ignored_peer_count++;
10408 +}
10409 +
10410 +static int __add_update_member(int index, __be32 addr, int message)
10411 +{
10412 +       struct cluster_member *this;
10413 +
10414 +       this = __find_member(index, addr);
10415 +       if (this) {
10416 +               if (this->message != message) {
10417 +                       this->message = message;
10418 +                       if ((message & MSG_NACK) &&
10419 +                           (message & (MSG_HIBERNATE | MSG_IMAGE | MSG_IO)))
10420 +                               set_ignore(index, addr, this);
10421 +                       PRINTK("Node %d sees node %d.%d.%d.%d now sending "
10422 +                                       "%s.\n", index, NIPQUAD(addr),
10423 +                                       str_message(message));
10424 +                       wake_up(&node_array[index].member_events);
10425 +               }
10426 +               return 0;
10427 +       }
10428 +
10429 +       this = (struct cluster_member *) toi_kzalloc(36,
10430 +                       sizeof(struct cluster_member), GFP_KERNEL);
10431 +
10432 +       if (!this)
10433 +               return -1;
10434 +
10435 +       this->addr = addr;
10436 +       this->message = message;
10437 +       this->ignore = 0;
10438 +       INIT_LIST_HEAD(&this->list);
10439 +
10440 +       node_array[index].peer_count++;
10441 +
10442 +       PRINTK("Node %d sees node %d.%d.%d.%d sending %s.\n", index,
10443 +                       NIPQUAD(addr), str_message(message));
10444 +
10445 +       if ((message & MSG_NACK) &&
10446 +           (message & (MSG_HIBERNATE | MSG_IMAGE | MSG_IO)))
10447 +               set_ignore(index, addr, this);
10448 +       list_add_tail(&this->list, &node_array[index].member_list);
10449 +       return 1;
10450 +}
10451 +
10452 +static int add_update_member(int index, __be32 addr, int message)
10453 +{
10454 +       int result;
10455 +       unsigned long flags;
10456 +       spin_lock_irqsave(&node_array[index].member_list_lock, flags);
10457 +       result = __add_update_member(index, addr, message);
10458 +       spin_unlock_irqrestore(&node_array[index].member_list_lock, flags);
10459 +
10460 +       print_member_info(index);
10461 +
10462 +       wake_up(&node_array[index].member_events);
10463 +
10464 +       return result;
10465 +}
10466 +
10467 +static void del_member(int index, __be32 addr)
10468 +{
10469 +       struct cluster_member *this;
10470 +       unsigned long flags;
10471 +
10472 +       spin_lock_irqsave(&node_array[index].member_list_lock, flags);
10473 +       this = __find_member(index, addr);
10474 +
10475 +       if (this) {
10476 +               list_del_init(&this->list);
10477 +               toi_kfree(36, this, sizeof(*this));
10478 +               node_array[index].peer_count--;
10479 +       }
10480 +
10481 +       spin_unlock_irqrestore(&node_array[index].member_list_lock, flags);
10482 +}
10483 +
10484 +/*             === Message transmission ===    */
10485 +
10486 +static void toi_send_if(int message, unsigned long my_id);
10487 +
10488 +/*
10489 + *  Process received TOI packet.
10490 + */
10491 +static int toi_recv(struct sk_buff *skb, struct net_device *dev,
10492 +               struct packet_type *pt, struct net_device *orig_dev)
10493 +{
10494 +       struct toi_pkt *b;
10495 +       struct iphdr *h;
10496 +       int len, result, index;
10497 +       unsigned long addr, message, ack;
10498 +
10499 +       /* Perform verifications before taking the lock.  */
10500 +       if (skb->pkt_type == PACKET_OTHERHOST)
10501 +               goto drop;
10502 +
10503 +       if (dev != net_dev)
10504 +               goto drop;
10505 +
10506 +       skb = skb_share_check(skb, GFP_ATOMIC);
10507 +       if (!skb)
10508 +               return NET_RX_DROP;
10509 +
10510 +       if (!pskb_may_pull(skb,
10511 +                          sizeof(struct iphdr) +
10512 +                          sizeof(struct udphdr)))
10513 +               goto drop;
10514 +
10515 +       b = (struct toi_pkt *)skb_network_header(skb);
10516 +       h = &b->iph;
10517 +
10518 +       if (h->ihl != 5 || h->version != 4 || h->protocol != IPPROTO_UDP)
10519 +               goto drop;
10520 +
10521 +       /* Fragments are not supported */
10522 +       if (h->frag_off & htons(IP_OFFSET | IP_MF)) {
10523 +               if (net_ratelimit())
10524 +                       printk(KERN_ERR "TuxOnIce: Ignoring fragmented "
10525 +                              "cluster message.\n");
10526 +               goto drop;
10527 +       }
10528 +
10529 +       if (skb->len < ntohs(h->tot_len))
10530 +               goto drop;
10531 +
10532 +       if (ip_fast_csum((char *) h, h->ihl))
10533 +               goto drop;
10534 +
10535 +       if (b->udph.source != htons(toi_cluster_port_send) ||
10536 +           b->udph.dest != htons(toi_cluster_port_recv))
10537 +               goto drop;
10538 +
10539 +       if (ntohs(h->tot_len) < ntohs(b->udph.len) + sizeof(struct iphdr))
10540 +               goto drop;
10541 +
10542 +       len = ntohs(b->udph.len) - sizeof(struct udphdr);
10543 +
10544 +       /* Ok the front looks good, make sure we can get at the rest.  */
10545 +       if (!pskb_may_pull(skb, skb->len))
10546 +               goto drop;
10547 +
10548 +       b = (struct toi_pkt *)skb_network_header(skb);
10549 +       h = &b->iph;
10550 +
10551 +       addr = SADDR;
10552 +       PRINTK(">>> Message %s received from " NIPQUAD_FMT ".\n",
10553 +                       str_message(b->message), NIPQUAD(addr));
10554 +
10555 +       message = b->message & MSG_STATE_MASK;
10556 +       ack = b->message & MSG_ACK_MASK;
10557 +
10558 +       for (index = 0; index < num_local_nodes; index++) {
10559 +               int new_message = node_array[index].current_message,
10560 +                   old_message = new_message;
10561 +
10562 +               if (index == SADDR || !old_message) {
10563 +                       PRINTK("Ignoring node %d (offline or self).\n", index);
10564 +                       continue;
10565 +               }
10566 +
10567 +               /* One message at a time, please. */
10568 +               spin_lock(&node_array[index].receive_lock);
10569 +
10570 +               result = add_update_member(index, SADDR, b->message);
10571 +               if (result == -1) {
10572 +                       printk(KERN_INFO "Failed to add new cluster member "
10573 +                                       NIPQUAD_FMT ".\n",
10574 +                                       NIPQUAD(addr));
10575 +                       goto drop_unlock;
10576 +               }
10577 +
10578 +               switch (b->message & MSG_STATE_MASK) {
10579 +               case MSG_PING:
10580 +                       break;
10581 +               case MSG_ABORT:
10582 +                       break;
10583 +               case MSG_BYE:
10584 +                       break;
10585 +               case MSG_HIBERNATE:
10586 +                       /* Can I hibernate? */
10587 +                       new_message = MSG_HIBERNATE |
10588 +                               ((index & 1) ? MSG_NACK : MSG_ACK);
10589 +                       break;
10590 +               case MSG_IMAGE:
10591 +                       /* Can I resume? */
10592 +                       new_message = MSG_IMAGE |
10593 +                               ((index & 1) ? MSG_NACK : MSG_ACK);
10594 +                       if (new_message != old_message)
10595 +                               printk(KERN_ERR "Setting whether I can resume "
10596 +                                               "to %d.\n", new_message);
10597 +                       break;
10598 +               case MSG_IO:
10599 +                       new_message = MSG_IO | MSG_ACK;
10600 +                       break;
10601 +               case MSG_RUNNING:
10602 +                       break;
10603 +               default:
10604 +                       if (net_ratelimit())
10605 +                               printk(KERN_ERR "Unrecognised TuxOnIce cluster"
10606 +                                       " message %d from " NIPQUAD_FMT ".\n",
10607 +                                       b->message, NIPQUAD(addr));
10608 +               };
10609 +
10610 +               if (old_message != new_message) {
10611 +                       node_array[index].current_message = new_message;
10612 +                       printk(KERN_INFO ">>> Sending new message for node "
10613 +                                       "%d.\n", index);
10614 +                       toi_send_if(new_message, index);
10615 +               } else if (!ack) {
10616 +                       printk(KERN_INFO ">>> Resending message for node %d.\n",
10617 +                                       index);
10618 +                       toi_send_if(new_message, index);
10619 +               }
10620 +drop_unlock:
10621 +               spin_unlock(&node_array[index].receive_lock);
10622 +       };
10623 +
10624 +drop:
10625 +       /* Throw the packet out. */
10626 +       kfree_skb(skb);
10627 +
10628 +       return 0;
10629 +}
10630 +
10631 +/*
10632 + *  Send cluster message to single interface.
10633 + */
10634 +static void toi_send_if(int message, unsigned long my_id)
10635 +{
10636 +       struct sk_buff *skb;
10637 +       struct toi_pkt *b;
10638 +       int hh_len = LL_RESERVED_SPACE(net_dev);
10639 +       struct iphdr *h;
10640 +
10641 +       /* Allocate packet */
10642 +       skb = alloc_skb(sizeof(struct toi_pkt) + hh_len + 15, GFP_KERNEL);
10643 +       if (!skb)
10644 +               return;
10645 +       skb_reserve(skb, hh_len);
10646 +       b = (struct toi_pkt *) skb_put(skb, sizeof(struct toi_pkt));
10647 +       memset(b, 0, sizeof(struct toi_pkt));
10648 +
10649 +       /* Construct IP header */
10650 +       skb_reset_network_header(skb);
10651 +       h = ip_hdr(skb);
10652 +       h->version = 4;
10653 +       h->ihl = 5;
10654 +       h->tot_len = htons(sizeof(struct toi_pkt));
10655 +       h->frag_off = htons(IP_DF);
10656 +       h->ttl = 64;
10657 +       h->protocol = IPPROTO_UDP;
10658 +       h->daddr = htonl(INADDR_BROADCAST);
10659 +       h->check = ip_fast_csum((unsigned char *) h, h->ihl);
10660 +
10661 +       /* Construct UDP header */
10662 +       b->udph.source = htons(toi_cluster_port_send);
10663 +       b->udph.dest = htons(toi_cluster_port_recv);
10664 +       b->udph.len = htons(sizeof(struct toi_pkt) - sizeof(struct iphdr));
10665 +       /* UDP checksum not calculated -- explicitly allowed in BOOTP RFC */
10666 +
10667 +       /* Construct message */
10668 +       b->message = message;
10669 +       b->sid = my_id;
10670 +       b->htype = net_dev->type; /* can cause undefined behavior */
10671 +       b->hlen = net_dev->addr_len;
10672 +       memcpy(b->hw_addr, net_dev->dev_addr, net_dev->addr_len);
10673 +       b->secs = htons(3); /* 3 seconds */
10674 +
10675 +       /* Chain packet down the line... */
10676 +       skb->dev = net_dev;
10677 +       skb->protocol = htons(ETH_P_IP);
10678 +       if ((dev_hard_header(skb, net_dev, ntohs(skb->protocol),
10679 +                    net_dev->broadcast, net_dev->dev_addr, skb->len) < 0) ||
10680 +                       dev_queue_xmit(skb) < 0)
10681 +               printk(KERN_INFO "E");
10682 +}
10683 +
10684 +/*     =========================================               */
10685 +
10686 +/*                     kTOICluster                     */
10687 +
10688 +static atomic_t num_cluster_threads;
10689 +static DECLARE_WAIT_QUEUE_HEAD(clusterd_events);
10690 +
10691 +static int kTOICluster(void *data)
10692 +{
10693 +       unsigned long my_id;
10694 +
10695 +       my_id = atomic_add_return(1, &num_cluster_threads) - 1;
10696 +       node_array[my_id].current_message = (unsigned long) data;
10697 +
10698 +       PRINTK("kTOICluster daemon %lu starting.\n", my_id);
10699 +
10700 +       current->flags |= PF_NOFREEZE;
10701 +
10702 +       while (node_array[my_id].current_message) {
10703 +               toi_send_if(node_array[my_id].current_message, my_id);
10704 +               sleep_on_timeout(&clusterd_events,
10705 +                               cluster_message_timeout);
10706 +               PRINTK("Link state %lu is %d.\n", my_id,
10707 +                               node_array[my_id].current_message);
10708 +       }
10709 +
10710 +       toi_send_if(MSG_BYE, my_id);
10711 +       atomic_dec(&num_cluster_threads);
10712 +       wake_up(&clusterd_events);
10713 +
10714 +       PRINTK("kTOICluster daemon %lu exiting.\n", my_id);
10715 +       __set_current_state(TASK_RUNNING);
10716 +       return 0;
10717 +}
10718 +
10719 +static void kill_clusterd(void)
10720 +{
10721 +       int i;
10722 +
10723 +       for (i = 0; i < num_local_nodes; i++) {
10724 +               if (node_array[i].current_message) {
10725 +                       PRINTK("Seeking to kill clusterd %d.\n", i);
10726 +                       node_array[i].current_message = 0;
10727 +               }
10728 +       }
10729 +       wait_event(clusterd_events,
10730 +                       !atomic_read(&num_cluster_threads));
10731 +       PRINTK("All cluster daemons have exited.\n");
10732 +}
10733 +
10734 +static int peers_not_in_message(int index, int message, int precise)
10735 +{
10736 +       struct cluster_member *this;
10737 +       unsigned long flags;
10738 +       int result = 0;
10739 +
10740 +       spin_lock_irqsave(&node_array[index].member_list_lock, flags);
10741 +       list_for_each_entry(this, &node_array[index].member_list, list) {
10742 +               if (this->ignore)
10743 +                       continue;
10744 +
10745 +               PRINTK("Peer %d.%d.%d.%d sending %s. "
10746 +                       "Seeking %s.\n",
10747 +                       NIPQUAD(this->addr),
10748 +                       str_message(this->message), str_message(message));
10749 +               if ((precise ? this->message :
10750 +                                       this->message & MSG_STATE_MASK) !=
10751 +                                       message)
10752 +                       result++;
10753 +       }
10754 +       spin_unlock_irqrestore(&node_array[index].member_list_lock, flags);
10755 +       PRINTK("%d peers in sought message.\n", result);
10756 +       return result;
10757 +}
10758 +
10759 +static void reset_ignored(int index)
10760 +{
10761 +       struct cluster_member *this;
10762 +       unsigned long flags;
10763 +
10764 +       spin_lock_irqsave(&node_array[index].member_list_lock, flags);
10765 +       list_for_each_entry(this, &node_array[index].member_list, list)
10766 +               this->ignore = 0;
10767 +       node_array[index].ignored_peer_count = 0;
10768 +       spin_unlock_irqrestore(&node_array[index].member_list_lock, flags);
10769 +}
10770 +
10771 +static int peers_in_message(int index, int message, int precise)
10772 +{
10773 +       return node_array[index].peer_count -
10774 +               node_array[index].ignored_peer_count -
10775 +               peers_not_in_message(index, message, precise);
10776 +}
10777 +
10778 +static int time_to_continue(int index, unsigned long start, int message)
10779 +{
10780 +       int first = peers_not_in_message(index, message, 0);
10781 +       int second = peers_in_message(index, message, 1);
10782 +
10783 +       PRINTK("First part returns %d, second returns %d.\n", first, second);
10784 +
10785 +       if (!first && !second) {
10786 +               PRINTK("All peers answered message %d.\n",
10787 +                       message);
10788 +               return 1;
10789 +       }
10790 +
10791 +       if (time_after(jiffies, start + continue_delay)) {
10792 +               PRINTK("Timeout reached.\n");
10793 +               return 1;
10794 +       }
10795 +
10796 +       PRINTK("Not time to continue yet (%lu < %lu).\n", jiffies,
10797 +                       start + continue_delay);
10798 +       return 0;
10799 +}
10800 +
10801 +void toi_initiate_cluster_hibernate(void)
10802 +{
10803 +       int result;
10804 +       unsigned long start;
10805 +
10806 +       result = do_toi_step(STEP_HIBERNATE_PREPARE_IMAGE);
10807 +       if (result)
10808 +               return;
10809 +
10810 +       toi_send_if(MSG_HIBERNATE, 0);
10811 +
10812 +       start = jiffies;
10813 +       wait_event(node_array[0].member_events,
10814 +                       time_to_continue(0, start, MSG_HIBERNATE));
10815 +
10816 +       if (test_action_state(TOI_FREEZER_TEST)) {
10817 +               toi_send_if(MSG_ABORT, 0);
10818 +
10819 +               start = jiffies;
10820 +               wait_event(node_array[0].member_events,
10821 +                       time_to_continue(0, start, MSG_RUNNING));
10822 +
10823 +               do_toi_step(STEP_QUIET_CLEANUP);
10824 +               return;
10825 +       }
10826 +
10827 +       toi_send_if(MSG_IO, 0);
10828 +
10829 +       result = do_toi_step(STEP_HIBERNATE_SAVE_IMAGE);
10830 +       if (result)
10831 +               return;
10832 +
10833 +       /* This code runs at resume time too! */
10834 +       if (toi_in_hibernate)
10835 +               result = do_toi_step(STEP_HIBERNATE_POWERDOWN);
10836 +}
10837 +EXPORT_SYMBOL_GPL(toi_initiate_cluster_hibernate);
10838 +
10839 +/* toi_cluster_print_debug_stats
10840 + *
10841 + * Description:        Print information to be recorded for debugging purposes into a
10842 + *             buffer.
10843 + * Arguments:  buffer: Pointer to a buffer into which the debug info will be
10844 + *                     printed.
10845 + *             size:   Size of the buffer.
10846 + * Returns:    Number of characters written to the buffer.
10847 + */
10848 +static int toi_cluster_print_debug_stats(char *buffer, int size)
10849 +{
10850 +       int len;
10851 +
10852 +       if (strlen(toi_cluster_iface))
10853 +               len = scnprintf(buffer, size,
10854 +                               "- Cluster interface is '%s'.\n",
10855 +                               toi_cluster_iface);
10856 +       else
10857 +               len = scnprintf(buffer, size,
10858 +                               "- Cluster support is disabled.\n");
10859 +       return len;
10860 +}
10861 +
10862 +/* cluster_memory_needed
10863 + *
10864 + * Description:        Tell the caller how much memory we need to operate during
10865 + *             hibernate/resume.
10866 + * Returns:    Unsigned long. Maximum number of bytes of memory required for
10867 + *             operation.
10868 + */
10869 +static int toi_cluster_memory_needed(void)
10870 +{
10871 +       return 0;
10872 +}
10873 +
10874 +static int toi_cluster_storage_needed(void)
10875 +{
10876 +       return 1 + strlen(toi_cluster_iface);
10877 +}
10878 +
10879 +/* toi_cluster_save_config_info
10880 + *
10881 + * Description:        Save informaton needed when reloading the image at resume time.
10882 + * Arguments:  Buffer:         Pointer to a buffer of size PAGE_SIZE.
10883 + * Returns:    Number of bytes used for saving our data.
10884 + */
10885 +static int toi_cluster_save_config_info(char *buffer)
10886 +{
10887 +       strcpy(buffer, toi_cluster_iface);
10888 +       return strlen(toi_cluster_iface + 1);
10889 +}
10890 +
10891 +/* toi_cluster_load_config_info
10892 + *
10893 + * Description:        Reload information needed for declustering the image at
10894 + *             resume time.
10895 + * Arguments:  Buffer:         Pointer to the start of the data.
10896 + *             Size:           Number of bytes that were saved.
10897 + */
10898 +static void toi_cluster_load_config_info(char *buffer, int size)
10899 +{
10900 +       strncpy(toi_cluster_iface, buffer, size);
10901 +       return;
10902 +}
10903 +
10904 +static void cluster_startup(void)
10905 +{
10906 +       int have_image = do_check_can_resume(), i;
10907 +       unsigned long start = jiffies, initial_message;
10908 +       struct task_struct *p;
10909 +
10910 +       initial_message = MSG_IMAGE;
10911 +
10912 +       have_image = 1;
10913 +
10914 +       for (i = 0; i < num_local_nodes; i++) {
10915 +               PRINTK("Starting ktoiclusterd %d.\n", i);
10916 +               p = kthread_create(kTOICluster, (void *) initial_message,
10917 +                               "ktoiclusterd/%d", i);
10918 +               if (IS_ERR(p)) {
10919 +                       printk(KERN_ERR "Failed to start ktoiclusterd.\n");
10920 +                       return;
10921 +               }
10922 +
10923 +               wake_up_process(p);
10924 +       }
10925 +
10926 +       /* Wait for delay or someone else sending first message */
10927 +       wait_event(node_array[0].member_events, time_to_continue(0, start,
10928 +                               MSG_IMAGE));
10929 +
10930 +       others_have_image = peers_in_message(0, MSG_IMAGE | MSG_ACK, 1);
10931 +
10932 +       printk(KERN_INFO "Continuing. I %shave an image. Peers with image:"
10933 +               " %d.\n", have_image ? "" : "don't ", others_have_image);
10934 +
10935 +       if (have_image) {
10936 +               int result;
10937 +
10938 +               /* Start to resume */
10939 +               printk(KERN_INFO "  === Starting to resume ===  \n");
10940 +               node_array[0].current_message = MSG_IO;
10941 +               toi_send_if(MSG_IO, 0);
10942 +
10943 +               /* result = do_toi_step(STEP_RESUME_LOAD_PS1); */
10944 +               result = 0;
10945 +
10946 +               if (!result) {
10947 +                       /*
10948 +                        * Atomic restore - we'll come back in the hibernation
10949 +                        * path.
10950 +                        */
10951 +
10952 +                       /* result = do_toi_step(STEP_RESUME_DO_RESTORE); */
10953 +                       result = 0;
10954 +
10955 +                       /* do_toi_step(STEP_QUIET_CLEANUP); */
10956 +               }
10957 +
10958 +               node_array[0].current_message |= MSG_NACK;
10959 +
10960 +               /* For debugging - disable for real life? */
10961 +               wait_event(node_array[0].member_events,
10962 +                               time_to_continue(0, start, MSG_IO));
10963 +       }
10964 +
10965 +       if (others_have_image) {
10966 +               /* Wait for them to resume */
10967 +               printk(KERN_INFO "Waiting for other nodes to resume.\n");
10968 +               start = jiffies;
10969 +               wait_event(node_array[0].member_events,
10970 +                               time_to_continue(0, start, MSG_RUNNING));
10971 +               if (peers_not_in_message(0, MSG_RUNNING, 0))
10972 +                       printk(KERN_INFO "Timed out while waiting for other "
10973 +                                       "nodes to resume.\n");
10974 +       }
10975 +
10976 +       /* Find out whether an image exists here. Send ACK_IMAGE or NACK_IMAGE
10977 +        * as appropriate.
10978 +        *
10979 +        * If we don't have an image:
10980 +        * - Wait until someone else says they have one, or conditions are met
10981 +        *   for continuing to boot (n machines or t seconds).
10982 +        * - If anyone has an image, wait for them to resume before continuing
10983 +        *   to boot.
10984 +        *
10985 +        * If we have an image:
10986 +        * - Wait until conditions are met before continuing to resume (n
10987 +        *   machines or t seconds). Send RESUME_PREP and freeze processes.
10988 +        *   NACK_PREP if freezing fails (shouldn't) and follow logic for
10989 +        *   us having no image above. On success, wait for [N]ACK_PREP from
10990 +        *   other machines. Read image (including atomic restore) until done.
10991 +        *   Wait for ACK_READ from others (should never fail). Thaw processes
10992 +        *   and do post-resume. (The section after the atomic restore is done
10993 +        *   via the code for hibernating).
10994 +        */
10995 +
10996 +       node_array[0].current_message = MSG_RUNNING;
10997 +}
10998 +
10999 +/* toi_cluster_open_iface
11000 + *
11001 + * Description:        Prepare to use an interface.
11002 + */
11003 +
11004 +static int toi_cluster_open_iface(void)
11005 +{
11006 +       struct net_device *dev;
11007 +
11008 +       rtnl_lock();
11009 +
11010 +       for_each_netdev(&init_net, dev) {
11011 +               if (/* dev == &init_net.loopback_dev || */
11012 +                   strcmp(dev->name, toi_cluster_iface))
11013 +                       continue;
11014 +
11015 +               net_dev = dev;
11016 +               break;
11017 +       }
11018 +
11019 +       rtnl_unlock();
11020 +
11021 +       if (!net_dev) {
11022 +               printk(KERN_ERR MYNAME ": Device %s not found.\n",
11023 +                               toi_cluster_iface);
11024 +               return -ENODEV;
11025 +       }
11026 +
11027 +       dev_add_pack(&toi_cluster_packet_type);
11028 +       added_pack = 1;
11029 +
11030 +       loopback_mode = (net_dev == init_net.loopback_dev);
11031 +       num_local_nodes = loopback_mode ? 8 : 1;
11032 +
11033 +       PRINTK("Loopback mode is %s. Number of local nodes is %d.\n",
11034 +                       loopback_mode ? "on" : "off", num_local_nodes);
11035 +
11036 +       cluster_startup();
11037 +       return 0;
11038 +}
11039 +
11040 +/* toi_cluster_close_iface
11041 + *
11042 + * Description: Stop using an interface.
11043 + */
11044 +
11045 +static int toi_cluster_close_iface(void)
11046 +{
11047 +       kill_clusterd();
11048 +       if (added_pack) {
11049 +               dev_remove_pack(&toi_cluster_packet_type);
11050 +               added_pack = 0;
11051 +       }
11052 +       return 0;
11053 +}
11054 +
11055 +static void write_side_effect(void)
11056 +{
11057 +       if (toi_cluster_ops.enabled) {
11058 +               toi_cluster_open_iface();
11059 +               set_toi_state(TOI_CLUSTER_MODE);
11060 +       } else {
11061 +               toi_cluster_close_iface();
11062 +               clear_toi_state(TOI_CLUSTER_MODE);
11063 +       }
11064 +}
11065 +
11066 +static void node_write_side_effect(void)
11067 +{
11068 +}
11069 +
11070 +/*
11071 + * data for our sysfs entries.
11072 + */
11073 +static struct toi_sysfs_data sysfs_params[] = {
11074 +       SYSFS_STRING("interface", SYSFS_RW, toi_cluster_iface, IFNAMSIZ, 0,
11075 +                       NULL),
11076 +       SYSFS_INT("enabled", SYSFS_RW, &toi_cluster_ops.enabled, 0, 1, 0,
11077 +                       write_side_effect),
11078 +       SYSFS_STRING("cluster_name", SYSFS_RW, toi_cluster_key, 32, 0, NULL),
11079 +       SYSFS_STRING("pre-hibernate-script", SYSFS_RW, pre_hibernate_script,
11080 +                       256, 0, NULL),
11081 +       SYSFS_STRING("post-hibernate-script", SYSFS_RW, post_hibernate_script,
11082 +                       256, 0, STRING),
11083 +       SYSFS_UL("continue_delay", SYSFS_RW, &continue_delay, HZ / 2, 60 * HZ,
11084 +                       0)
11085 +};
11086 +
11087 +/*
11088 + * Ops structure.
11089 + */
11090 +
11091 +static struct toi_module_ops toi_cluster_ops = {
11092 +       .type                   = FILTER_MODULE,
11093 +       .name                   = "Cluster",
11094 +       .directory              = "cluster",
11095 +       .module                 = THIS_MODULE,
11096 +       .memory_needed          = toi_cluster_memory_needed,
11097 +       .print_debug_info       = toi_cluster_print_debug_stats,
11098 +       .save_config_info       = toi_cluster_save_config_info,
11099 +       .load_config_info       = toi_cluster_load_config_info,
11100 +       .storage_needed         = toi_cluster_storage_needed,
11101 +
11102 +       .sysfs_data             = sysfs_params,
11103 +       .num_sysfs_entries      = sizeof(sysfs_params) /
11104 +               sizeof(struct toi_sysfs_data),
11105 +};
11106 +
11107 +/* ---- Registration ---- */
11108 +
11109 +#ifdef MODULE
11110 +#define INIT static __init
11111 +#define EXIT static __exit
11112 +#else
11113 +#define INIT
11114 +#define EXIT
11115 +#endif
11116 +
11117 +INIT int toi_cluster_init(void)
11118 +{
11119 +       int temp = toi_register_module(&toi_cluster_ops), i;
11120 +       struct kobject *kobj = toi_cluster_ops.dir_kobj;
11121 +
11122 +       for (i = 0; i < MAX_LOCAL_NODES; i++) {
11123 +               node_array[i].current_message = 0;
11124 +               INIT_LIST_HEAD(&node_array[i].member_list);
11125 +               init_waitqueue_head(&node_array[i].member_events);
11126 +               spin_lock_init(&node_array[i].member_list_lock);
11127 +               spin_lock_init(&node_array[i].receive_lock);
11128 +
11129 +               /* Set up sysfs entry */
11130 +               node_array[i].sysfs_data.attr.name = toi_kzalloc(8,
11131 +                               sizeof(node_array[i].sysfs_data.attr.name),
11132 +                               GFP_KERNEL);
11133 +               sprintf((char *) node_array[i].sysfs_data.attr.name, "node_%d",
11134 +                               i);
11135 +               node_array[i].sysfs_data.attr.mode = SYSFS_RW;
11136 +               node_array[i].sysfs_data.type = TOI_SYSFS_DATA_INTEGER;
11137 +               node_array[i].sysfs_data.flags = 0;
11138 +               node_array[i].sysfs_data.data.integer.variable =
11139 +                       (int *) &node_array[i].current_message;
11140 +               node_array[i].sysfs_data.data.integer.minimum = 0;
11141 +               node_array[i].sysfs_data.data.integer.maximum = INT_MAX;
11142 +               node_array[i].sysfs_data.write_side_effect =
11143 +                       node_write_side_effect;
11144 +               toi_register_sysfs_file(kobj, &node_array[i].sysfs_data);
11145 +       }
11146 +
11147 +       toi_cluster_ops.enabled = (strlen(toi_cluster_iface) > 0);
11148 +
11149 +       if (toi_cluster_ops.enabled)
11150 +               toi_cluster_open_iface();
11151 +
11152 +       return temp;
11153 +}
11154 +
11155 +EXIT void toi_cluster_exit(void)
11156 +{
11157 +       int i;
11158 +       toi_cluster_close_iface();
11159 +
11160 +       for (i = 0; i < MAX_LOCAL_NODES; i++)
11161 +               toi_unregister_sysfs_file(toi_cluster_ops.dir_kobj,
11162 +                               &node_array[i].sysfs_data);
11163 +       toi_unregister_module(&toi_cluster_ops);
11164 +}
11165 +
11166 +static int __init toi_cluster_iface_setup(char *iface)
11167 +{
11168 +       toi_cluster_ops.enabled = (*iface &&
11169 +                       strcmp(iface, "off"));
11170 +
11171 +       if (toi_cluster_ops.enabled)
11172 +               strncpy(toi_cluster_iface, iface, strlen(iface));
11173 +}
11174 +
11175 +__setup("toi_cluster=", toi_cluster_iface_setup);
11176 +
11177 +#ifdef MODULE
11178 +MODULE_LICENSE("GPL");
11179 +module_init(toi_cluster_init);
11180 +module_exit(toi_cluster_exit);
11181 +MODULE_AUTHOR("Nigel Cunningham");
11182 +MODULE_DESCRIPTION("Cluster Support for TuxOnIce");
11183 +#endif
11184 diff --git a/kernel/power/tuxonice_cluster.h b/kernel/power/tuxonice_cluster.h
11185 new file mode 100644
11186 index 0000000..051feb3
11187 --- /dev/null
11188 +++ b/kernel/power/tuxonice_cluster.h
11189 @@ -0,0 +1,18 @@
11190 +/*
11191 + * kernel/power/tuxonice_cluster.h
11192 + *
11193 + * Copyright (C) 2006-2010 Nigel Cunningham (nigel at tuxonice net)
11194 + *
11195 + * This file is released under the GPLv2.
11196 + */
11197 +
11198 +#ifdef CONFIG_TOI_CLUSTER
11199 +extern int toi_cluster_init(void);
11200 +extern void toi_cluster_exit(void);
11201 +extern void toi_initiate_cluster_hibernate(void);
11202 +#else
11203 +static inline int toi_cluster_init(void) { return 0; }
11204 +static inline void toi_cluster_exit(void) { }
11205 +static inline void toi_initiate_cluster_hibernate(void) { }
11206 +#endif
11207 +
11208 diff --git a/kernel/power/tuxonice_compress.c b/kernel/power/tuxonice_compress.c
11209 new file mode 100644
11210 index 0000000..6bbc446
11211 --- /dev/null
11212 +++ b/kernel/power/tuxonice_compress.c
11213 @@ -0,0 +1,497 @@
11214 +/*
11215 + * kernel/power/compression.c
11216 + *
11217 + * Copyright (C) 2003-2010 Nigel Cunningham (nigel at tuxonice net)
11218 + *
11219 + * This file is released under the GPLv2.
11220 + *
11221 + * This file contains data compression routines for TuxOnIce,
11222 + * using cryptoapi.
11223 + */
11224 +
11225 +#include <linux/suspend.h>
11226 +#include <linux/highmem.h>
11227 +#include <linux/vmalloc.h>
11228 +#include <linux/crypto.h>
11229 +
11230 +#include "tuxonice_builtin.h"
11231 +#include "tuxonice.h"
11232 +#include "tuxonice_modules.h"
11233 +#include "tuxonice_sysfs.h"
11234 +#include "tuxonice_io.h"
11235 +#include "tuxonice_ui.h"
11236 +#include "tuxonice_alloc.h"
11237 +
11238 +static int toi_expected_compression;
11239 +
11240 +static struct toi_module_ops toi_compression_ops;
11241 +static struct toi_module_ops *next_driver;
11242 +
11243 +static char toi_compressor_name[32] = "lzo";
11244 +
11245 +static DEFINE_MUTEX(stats_lock);
11246 +
11247 +struct cpu_context {
11248 +       u8 *page_buffer;
11249 +       struct crypto_comp *transform;
11250 +       unsigned int len;
11251 +       char *buffer_start;
11252 +       char *output_buffer;
11253 +       char *check_buffer;
11254 +};
11255 +
11256 +static DEFINE_PER_CPU(struct cpu_context, contexts);
11257 +static int toi_check_compression;
11258 +
11259 +/*
11260 + * toi_crypto_prepare
11261 + *
11262 + * Prepare to do some work by allocating buffers and transforms.
11263 + */
11264 +static int toi_compress_crypto_prepare(void)
11265 +{
11266 +       int cpu;
11267 +
11268 +       if (!*toi_compressor_name) {
11269 +               printk(KERN_INFO "TuxOnIce: Compression enabled but no "
11270 +                               "compressor name set.\n");
11271 +               return 1;
11272 +       }
11273 +
11274 +       for_each_online_cpu(cpu) {
11275 +               struct cpu_context *this = &per_cpu(contexts, cpu);
11276 +               this->transform = crypto_alloc_comp(toi_compressor_name, 0, 0);
11277 +               if (IS_ERR(this->transform)) {
11278 +                       printk(KERN_INFO "TuxOnIce: Failed to initialise the "
11279 +                                       "%s compression transform.\n",
11280 +                                       toi_compressor_name);
11281 +                       this->transform = NULL;
11282 +                       return 1;
11283 +               }
11284 +
11285 +               this->page_buffer =
11286 +                       (char *) toi_get_zeroed_page(16, TOI_ATOMIC_GFP);
11287 +
11288 +               if (!this->page_buffer) {
11289 +                       printk(KERN_ERR
11290 +                         "Failed to allocate a page buffer for TuxOnIce "
11291 +                         "compression driver.\n");
11292 +                       return -ENOMEM;
11293 +               }
11294 +
11295 +               this->output_buffer =
11296 +                       (char *) vmalloc_32(2 * PAGE_SIZE);
11297 +
11298 +               if (!this->output_buffer) {
11299 +                       printk(KERN_ERR
11300 +                         "Failed to allocate a output buffer for TuxOnIce "
11301 +                         "compression driver.\n");
11302 +                       return -ENOMEM;
11303 +               }
11304 +
11305 +               this->check_buffer =
11306 +                       (char *) toi_get_zeroed_page(16, TOI_ATOMIC_GFP);
11307 +
11308 +               if (!this->check_buffer) {
11309 +                       printk(KERN_ERR
11310 +                         "Failed to allocate a check buffer for TuxOnIce "
11311 +                         "compression driver.\n");
11312 +                       return -ENOMEM;
11313 +               }
11314 +
11315 +       }
11316 +
11317 +       return 0;
11318 +}
11319 +
11320 +static int toi_compress_rw_cleanup(int writing)
11321 +{
11322 +       int cpu;
11323 +
11324 +       for_each_online_cpu(cpu) {
11325 +               struct cpu_context *this = &per_cpu(contexts, cpu);
11326 +               if (this->transform) {
11327 +                       crypto_free_comp(this->transform);
11328 +                       this->transform = NULL;
11329 +               }
11330 +
11331 +               if (this->page_buffer)
11332 +                       toi_free_page(16, (unsigned long) this->page_buffer);
11333 +
11334 +               this->page_buffer = NULL;
11335 +
11336 +               if (this->output_buffer)
11337 +                       vfree(this->output_buffer);
11338 +
11339 +               this->output_buffer = NULL;
11340 +
11341 +               if (this->check_buffer)
11342 +                       toi_free_page(16, (unsigned long) this->check_buffer);
11343 +
11344 +               this->check_buffer = NULL;
11345 +       }
11346 +
11347 +       return 0;
11348 +}
11349 +
11350 +/*
11351 + * toi_compress_init
11352 + */
11353 +
11354 +static int toi_compress_init(int toi_or_resume)
11355 +{
11356 +       if (!toi_or_resume)
11357 +               return 0;
11358 +
11359 +       toi_compress_bytes_in = 0;
11360 +       toi_compress_bytes_out = 0;
11361 +
11362 +       next_driver = toi_get_next_filter(&toi_compression_ops);
11363 +
11364 +       return next_driver ? 0 : -ECHILD;
11365 +}
11366 +
11367 +/*
11368 + * toi_compress_rw_init()
11369 + */
11370 +
11371 +static int toi_compress_rw_init(int rw, int stream_number)
11372 +{
11373 +       if (toi_compress_crypto_prepare()) {
11374 +               printk(KERN_ERR "Failed to initialise compression "
11375 +                               "algorithm.\n");
11376 +               if (rw == READ) {
11377 +                       printk(KERN_INFO "Unable to read the image.\n");
11378 +                       return -ENODEV;
11379 +               } else {
11380 +                       printk(KERN_INFO "Continuing without "
11381 +                               "compressing the image.\n");
11382 +                       toi_compression_ops.enabled = 0;
11383 +               }
11384 +       }
11385 +
11386 +       return 0;
11387 +}
11388 +
11389 +static int check_compression(struct cpu_context *ctx, struct page *buffer_page,
11390 +               int buf_size)
11391 +{
11392 +       char *original = kmap(buffer_page);
11393 +       int output_size = PAGE_SIZE, okay, ret;
11394 +
11395 +       ret = crypto_comp_decompress(ctx->transform, ctx->output_buffer,
11396 +                       ctx->len, ctx->check_buffer, &output_size);
11397 +       okay = (!ret && output_size == PAGE_SIZE &&
11398 +                       !memcmp(ctx->check_buffer, original, PAGE_SIZE));
11399 +
11400 +       if (!okay) {
11401 +               printk("Compression test failed.\n");
11402 +               print_hex_dump(KERN_ERR, "Original page: ", DUMP_PREFIX_NONE,
11403 +                               16, 1, original, PAGE_SIZE, 0);
11404 +               printk(KERN_ERR "\nOutput %d bytes. Result %d.", ctx->len, ret);
11405 +               print_hex_dump(KERN_ERR, "Compressed to: ", DUMP_PREFIX_NONE,
11406 +                               16, 1, ctx->output_buffer, ctx->len, 0);
11407 +               printk(KERN_ERR "\nRestored to %d bytes.\n", output_size);
11408 +               print_hex_dump(KERN_ERR, "Decompressed : ", DUMP_PREFIX_NONE,
11409 +                               16, 1, ctx->check_buffer, output_size, 0);
11410 +       }
11411 +       kunmap(buffer_page);
11412 +
11413 +       return okay;
11414 +}
11415 +
11416 +/*
11417 + * toi_compress_write_page()
11418 + *
11419 + * Compress a page of data, buffering output and passing on filled
11420 + * pages to the next module in the pipeline.
11421 + *
11422 + * Buffer_page:        Pointer to a buffer of size PAGE_SIZE, containing
11423 + * data to be compressed.
11424 + *
11425 + * Returns:    0 on success. Otherwise the error is that returned by later
11426 + *             modules, -ECHILD if we have a broken pipeline or -EIO if
11427 + *             zlib errs.
11428 + */
11429 +static int toi_compress_write_page(unsigned long index,
11430 +               struct page *buffer_page, unsigned int buf_size)
11431 +{
11432 +       int ret, cpu = smp_processor_id();
11433 +       struct cpu_context *ctx = &per_cpu(contexts, cpu);
11434 +
11435 +       if (!ctx->transform)
11436 +               return next_driver->write_page(index, buffer_page, buf_size);
11437 +
11438 +       ctx->buffer_start = kmap(buffer_page);
11439 +
11440 +       ctx->len = PAGE_SIZE;
11441 +
11442 +       ret = crypto_comp_compress(ctx->transform,
11443 +                       ctx->buffer_start, buf_size,
11444 +                       ctx->output_buffer, &ctx->len);
11445 +
11446 +       kunmap(buffer_page);
11447 +
11448 +       mutex_lock(&stats_lock);
11449 +       toi_compress_bytes_in += buf_size;
11450 +       toi_compress_bytes_out += ctx->len;
11451 +       mutex_unlock(&stats_lock);
11452 +
11453 +       if (!ret && ctx->len < buf_size) { /* some compression */
11454 +               if (unlikely(toi_check_compression)) {
11455 +                       ret = check_compression(ctx, buffer_page, buf_size);
11456 +                       if (!ret)
11457 +                               return next_driver->write_page(index,
11458 +                                               buffer_page, buf_size);
11459 +               }
11460 +
11461 +               memcpy(ctx->page_buffer, ctx->output_buffer, ctx->len);
11462 +               return next_driver->write_page(index,
11463 +                               virt_to_page(ctx->page_buffer),
11464 +                               ctx->len);
11465 +       } else
11466 +               return next_driver->write_page(index, buffer_page, buf_size);
11467 +}
11468 +
11469 +/*
11470 + * toi_compress_read_page()
11471 + * @buffer_page: struct page *. Pointer to a buffer of size PAGE_SIZE.
11472 + *
11473 + * Retrieve data from later modules and decompress it until the input buffer
11474 + * is filled.
11475 + * Zero if successful. Error condition from me or from downstream on failure.
11476 + */
11477 +static int toi_compress_read_page(unsigned long *index,
11478 +               struct page *buffer_page, unsigned int *buf_size)
11479 +{
11480 +       int ret, cpu = smp_processor_id();
11481 +       unsigned int len;
11482 +       unsigned int outlen = PAGE_SIZE;
11483 +       char *buffer_start;
11484 +       struct cpu_context *ctx = &per_cpu(contexts, cpu);
11485 +
11486 +       if (!ctx->transform)
11487 +               return next_driver->read_page(index, buffer_page, buf_size);
11488 +
11489 +       /*
11490 +        * All our reads must be synchronous - we can't decompress
11491 +        * data that hasn't been read yet.
11492 +        */
11493 +
11494 +       ret = next_driver->read_page(index, buffer_page, &len);
11495 +
11496 +       /* Error or uncompressed data */
11497 +       if (ret || len == PAGE_SIZE)
11498 +               return ret;
11499 +
11500 +       buffer_start = kmap(buffer_page);
11501 +       memcpy(ctx->page_buffer, buffer_start, len);
11502 +       ret = crypto_comp_decompress(
11503 +                       ctx->transform,
11504 +                       ctx->page_buffer,
11505 +                       len, buffer_start, &outlen);
11506 +       if (ret)
11507 +               abort_hibernate(TOI_FAILED_IO,
11508 +                       "Compress_read returned %d.\n", ret);
11509 +       else if (outlen != PAGE_SIZE) {
11510 +               abort_hibernate(TOI_FAILED_IO,
11511 +                       "Decompression yielded %d bytes instead of %ld.\n",
11512 +                       outlen, PAGE_SIZE);
11513 +               printk(KERN_ERR "Decompression yielded %d bytes instead of "
11514 +                               "%ld.\n", outlen, PAGE_SIZE);
11515 +               ret = -EIO;
11516 +               *buf_size = outlen;
11517 +       }
11518 +       kunmap(buffer_page);
11519 +       return ret;
11520 +}
11521 +
11522 +/*
11523 + * toi_compress_print_debug_stats
11524 + * @buffer: Pointer to a buffer into which the debug info will be printed.
11525 + * @size: Size of the buffer.
11526 + *
11527 + * Print information to be recorded for debugging purposes into a buffer.
11528 + * Returns: Number of characters written to the buffer.
11529 + */
11530 +
11531 +static int toi_compress_print_debug_stats(char *buffer, int size)
11532 +{
11533 +       unsigned long pages_in = toi_compress_bytes_in >> PAGE_SHIFT,
11534 +                     pages_out = toi_compress_bytes_out >> PAGE_SHIFT;
11535 +       int len;
11536 +
11537 +       /* Output the compression ratio achieved. */
11538 +       if (*toi_compressor_name)
11539 +               len = scnprintf(buffer, size, "- Compressor is '%s'.\n",
11540 +                               toi_compressor_name);
11541 +       else
11542 +               len = scnprintf(buffer, size, "- Compressor is not set.\n");
11543 +
11544 +       if (pages_in)
11545 +               len += scnprintf(buffer+len, size - len, "  Compressed "
11546 +                       "%lu bytes into %lu (%ld percent compression).\n",
11547 +                 toi_compress_bytes_in,
11548 +                 toi_compress_bytes_out,
11549 +                 (pages_in - pages_out) * 100 / pages_in);
11550 +       return len;
11551 +}
11552 +
11553 +/*
11554 + * toi_compress_compression_memory_needed
11555 + *
11556 + * Tell the caller how much memory we need to operate during hibernate/resume.
11557 + * Returns: Unsigned long. Maximum number of bytes of memory required for
11558 + * operation.
11559 + */
11560 +static int toi_compress_memory_needed(void)
11561 +{
11562 +       return 2 * PAGE_SIZE;
11563 +}
11564 +
11565 +static int toi_compress_storage_needed(void)
11566 +{
11567 +       return 4 * sizeof(unsigned long) + strlen(toi_compressor_name) + 1;
11568 +}
11569 +
11570 +/*
11571 + * toi_compress_save_config_info
11572 + * @buffer: Pointer to a buffer of size PAGE_SIZE.
11573 + *
11574 + * Save informaton needed when reloading the image at resume time.
11575 + * Returns: Number of bytes used for saving our data.
11576 + */
11577 +static int toi_compress_save_config_info(char *buffer)
11578 +{
11579 +       int namelen = strlen(toi_compressor_name) + 1;
11580 +       int total_len;
11581 +
11582 +       *((unsigned long *) buffer) = toi_compress_bytes_in;
11583 +       *((unsigned long *) (buffer + 1 * sizeof(unsigned long))) =
11584 +               toi_compress_bytes_out;
11585 +       *((unsigned long *) (buffer + 2 * sizeof(unsigned long))) =
11586 +               toi_expected_compression;
11587 +       *((unsigned long *) (buffer + 3 * sizeof(unsigned long))) = namelen;
11588 +       strncpy(buffer + 4 * sizeof(unsigned long), toi_compressor_name,
11589 +                                                               namelen);
11590 +       total_len = 4 * sizeof(unsigned long) + namelen;
11591 +       return total_len;
11592 +}
11593 +
11594 +/* toi_compress_load_config_info
11595 + * @buffer: Pointer to the start of the data.
11596 + * @size: Number of bytes that were saved.
11597 + *
11598 + * Description:        Reload information needed for decompressing the image at
11599 + * resume time.
11600 + */
11601 +static void toi_compress_load_config_info(char *buffer, int size)
11602 +{
11603 +       int namelen;
11604 +
11605 +       toi_compress_bytes_in = *((unsigned long *) buffer);
11606 +       toi_compress_bytes_out = *((unsigned long *) (buffer + 1 *
11607 +                               sizeof(unsigned long)));
11608 +       toi_expected_compression = *((unsigned long *) (buffer + 2 *
11609 +                               sizeof(unsigned long)));
11610 +       namelen = *((unsigned long *) (buffer + 3 * sizeof(unsigned long)));
11611 +       if (strncmp(toi_compressor_name, buffer + 4 * sizeof(unsigned long),
11612 +                               namelen))
11613 +               strncpy(toi_compressor_name, buffer + 4 * sizeof(unsigned long),
11614 +                       namelen);
11615 +       return;
11616 +}
11617 +
11618 +static void toi_compress_pre_atomic_restore(struct toi_boot_kernel_data *bkd)
11619 +{
11620 +       bkd->compress_bytes_in = toi_compress_bytes_in;
11621 +       bkd->compress_bytes_out = toi_compress_bytes_out;
11622 +}
11623 +
11624 +static void toi_compress_post_atomic_restore(struct toi_boot_kernel_data *bkd)
11625 +{
11626 +       toi_compress_bytes_in = bkd->compress_bytes_in;
11627 +       toi_compress_bytes_out = bkd->compress_bytes_out;
11628 +}
11629 +
11630 +/*
11631 + * toi_expected_compression_ratio
11632 + *
11633 + * Description:        Returns the expected ratio between data passed into this module
11634 + *             and the amount of data output when writing.
11635 + * Returns:    100 if the module is disabled. Otherwise the value set by the
11636 + *             user via our sysfs entry.
11637 + */
11638 +
11639 +static int toi_compress_expected_ratio(void)
11640 +{
11641 +       if (!toi_compression_ops.enabled)
11642 +               return 100;
11643 +       else
11644 +               return 100 - toi_expected_compression;
11645 +}
11646 +
11647 +/*
11648 + * data for our sysfs entries.
11649 + */
11650 +static struct toi_sysfs_data sysfs_params[] = {
11651 +       SYSFS_INT("expected_compression", SYSFS_RW, &toi_expected_compression,
11652 +                       0, 99, 0, NULL),
11653 +       SYSFS_INT("enabled", SYSFS_RW, &toi_compression_ops.enabled, 0, 1, 0,
11654 +                       NULL),
11655 +       SYSFS_INT("check", SYSFS_RW, &toi_check_compression, 0, 1, 0,
11656 +                       NULL),
11657 +       SYSFS_STRING("algorithm", SYSFS_RW, toi_compressor_name, 31, 0, NULL),
11658 +};
11659 +
11660 +/*
11661 + * Ops structure.
11662 + */
11663 +static struct toi_module_ops toi_compression_ops = {
11664 +       .type                   = FILTER_MODULE,
11665 +       .name                   = "compression",
11666 +       .directory              = "compression",
11667 +       .module                 = THIS_MODULE,
11668 +       .initialise             = toi_compress_init,
11669 +       .memory_needed          = toi_compress_memory_needed,
11670 +       .print_debug_info       = toi_compress_print_debug_stats,
11671 +       .save_config_info       = toi_compress_save_config_info,
11672 +       .load_config_info       = toi_compress_load_config_info,
11673 +       .storage_needed         = toi_compress_storage_needed,
11674 +       .expected_compression   = toi_compress_expected_ratio,
11675 +
11676 +       .pre_atomic_restore     = toi_compress_pre_atomic_restore,
11677 +       .post_atomic_restore    = toi_compress_post_atomic_restore,
11678 +
11679 +       .rw_init                = toi_compress_rw_init,
11680 +       .rw_cleanup             = toi_compress_rw_cleanup,
11681 +
11682 +       .write_page             = toi_compress_write_page,
11683 +       .read_page              = toi_compress_read_page,
11684 +
11685 +       .sysfs_data             = sysfs_params,
11686 +       .num_sysfs_entries      = sizeof(sysfs_params) /
11687 +               sizeof(struct toi_sysfs_data),
11688 +};
11689 +
11690 +/* ---- Registration ---- */
11691 +
11692 +static __init int toi_compress_load(void)
11693 +{
11694 +       return toi_register_module(&toi_compression_ops);
11695 +}
11696 +
11697 +#ifdef MODULE
11698 +static __exit void toi_compress_unload(void)
11699 +{
11700 +       toi_unregister_module(&toi_compression_ops);
11701 +}
11702 +
11703 +module_init(toi_compress_load);
11704 +module_exit(toi_compress_unload);
11705 +MODULE_LICENSE("GPL");
11706 +MODULE_AUTHOR("Nigel Cunningham");
11707 +MODULE_DESCRIPTION("Compression Support for TuxOnIce");
11708 +#else
11709 +late_initcall(toi_compress_load);
11710 +#endif
11711 diff --git a/kernel/power/tuxonice_extent.c b/kernel/power/tuxonice_extent.c
11712 new file mode 100644
11713 index 0000000..e84572c
11714 --- /dev/null
11715 +++ b/kernel/power/tuxonice_extent.c
11716 @@ -0,0 +1,123 @@
11717 +/*
11718 + * kernel/power/tuxonice_extent.c
11719 + *
11720 + * Copyright (C) 2003-2010 Nigel Cunningham (nigel at tuxonice net)
11721 + *
11722 + * Distributed under GPLv2.
11723 + *
11724 + * These functions encapsulate the manipulation of storage metadata.
11725 + */
11726 +
11727 +#include <linux/suspend.h>
11728 +#include "tuxonice_modules.h"
11729 +#include "tuxonice_extent.h"
11730 +#include "tuxonice_alloc.h"
11731 +#include "tuxonice_ui.h"
11732 +#include "tuxonice.h"
11733 +
11734 +/**
11735 + * toi_get_extent - return a free extent
11736 + *
11737 + * May fail, returning NULL instead.
11738 + **/
11739 +static struct hibernate_extent *toi_get_extent(void)
11740 +{
11741 +       return (struct hibernate_extent *) toi_kzalloc(2,
11742 +                       sizeof(struct hibernate_extent), TOI_ATOMIC_GFP);
11743 +}
11744 +
11745 +/**
11746 + * toi_put_extent_chain - free a whole chain of extents
11747 + * @chain:     Chain to free.
11748 + **/
11749 +void toi_put_extent_chain(struct hibernate_extent_chain *chain)
11750 +{
11751 +       struct hibernate_extent *this;
11752 +
11753 +       this = chain->first;
11754 +
11755 +       while (this) {
11756 +               struct hibernate_extent *next = this->next;
11757 +               toi_kfree(2, this, sizeof(*this));
11758 +               chain->num_extents--;
11759 +               this = next;
11760 +       }
11761 +
11762 +       chain->first = NULL;
11763 +       chain->last_touched = NULL;
11764 +       chain->current_extent = NULL;
11765 +       chain->size = 0;
11766 +}
11767 +EXPORT_SYMBOL_GPL(toi_put_extent_chain);
11768 +
11769 +/**
11770 + * toi_add_to_extent_chain - add an extent to an existing chain
11771 + * @chain:     Chain to which the extend should be added
11772 + * @start:     Start of the extent (first physical block)
11773 + * @end:       End of the extent (last physical block)
11774 + *
11775 + * The chain information is updated if the insertion is successful.
11776 + **/
11777 +int toi_add_to_extent_chain(struct hibernate_extent_chain *chain,
11778 +               unsigned long start, unsigned long end)
11779 +{
11780 +       struct hibernate_extent *new_ext = NULL, *cur_ext = NULL;
11781 +
11782 +       toi_message(TOI_IO, TOI_VERBOSE, 0,
11783 +               "Adding extent %lu-%lu to chain %p.\n", start, end, chain);
11784 +
11785 +       /* Find the right place in the chain */
11786 +       if (chain->last_touched && chain->last_touched->start < start)
11787 +               cur_ext = chain->last_touched;
11788 +       else if (chain->first && chain->first->start < start)
11789 +               cur_ext = chain->first;
11790 +
11791 +       if (cur_ext) {
11792 +               while (cur_ext->next && cur_ext->next->start < start)
11793 +                       cur_ext = cur_ext->next;
11794 +
11795 +               if (cur_ext->end == (start - 1)) {
11796 +                       struct hibernate_extent *next_ext = cur_ext->next;
11797 +                       cur_ext->end = end;
11798 +
11799 +                       /* Merge with the following one? */
11800 +                       if (next_ext && cur_ext->end + 1 == next_ext->start) {
11801 +                               cur_ext->end = next_ext->end;
11802 +                               cur_ext->next = next_ext->next;
11803 +                               toi_kfree(2, next_ext, sizeof(*next_ext));
11804 +                               chain->num_extents--;
11805 +                       }
11806 +
11807 +                       chain->last_touched = cur_ext;
11808 +                       chain->size += (end - start + 1);
11809 +
11810 +                       return 0;
11811 +               }
11812 +       }
11813 +
11814 +       new_ext = toi_get_extent();
11815 +       if (!new_ext) {
11816 +               printk(KERN_INFO "Error unable to append a new extent to the "
11817 +                               "chain.\n");
11818 +               return -ENOMEM;
11819 +       }
11820 +
11821 +       chain->num_extents++;
11822 +       chain->size += (end - start + 1);
11823 +       new_ext->start = start;
11824 +       new_ext->end = end;
11825 +
11826 +       chain->last_touched = new_ext;
11827 +
11828 +       if (cur_ext) {
11829 +               new_ext->next = cur_ext->next;
11830 +               cur_ext->next = new_ext;
11831 +       } else {
11832 +               if (chain->first)
11833 +                       new_ext->next = chain->first;
11834 +               chain->first = new_ext;
11835 +       }
11836 +
11837 +       return 0;
11838 +}
11839 +EXPORT_SYMBOL_GPL(toi_add_to_extent_chain);
11840 diff --git a/kernel/power/tuxonice_extent.h b/kernel/power/tuxonice_extent.h
11841 new file mode 100644
11842 index 0000000..157446c
11843 --- /dev/null
11844 +++ b/kernel/power/tuxonice_extent.h
11845 @@ -0,0 +1,44 @@
11846 +/*
11847 + * kernel/power/tuxonice_extent.h
11848 + *
11849 + * Copyright (C) 2003-2010 Nigel Cunningham (nigel at tuxonice net)
11850 + *
11851 + * This file is released under the GPLv2.
11852 + *
11853 + * It contains declarations related to extents. Extents are
11854 + * TuxOnIce's method of storing some of the metadata for the image.
11855 + * See tuxonice_extent.c for more info.
11856 + *
11857 + */
11858 +
11859 +#include "tuxonice_modules.h"
11860 +
11861 +#ifndef EXTENT_H
11862 +#define EXTENT_H
11863 +
11864 +struct hibernate_extent {
11865 +       unsigned long start, end;
11866 +       struct hibernate_extent *next;
11867 +};
11868 +
11869 +struct hibernate_extent_chain {
11870 +       unsigned long size; /* size of the chain ie sum (max-min+1) */
11871 +       int num_extents;
11872 +       struct hibernate_extent *first, *last_touched;
11873 +       struct hibernate_extent *current_extent;
11874 +       unsigned long current_offset;
11875 +};
11876 +
11877 +/* Simplify iterating through all the values in an extent chain */
11878 +#define toi_extent_for_each(extent_chain, extentpointer, value) \
11879 +if ((extent_chain)->first) \
11880 +       for ((extentpointer) = (extent_chain)->first, (value) = \
11881 +                       (extentpointer)->start; \
11882 +            ((extentpointer) && ((extentpointer)->next || (value) <= \
11883 +                                (extentpointer)->end)); \
11884 +            (((value) == (extentpointer)->end) ? \
11885 +               ((extentpointer) = (extentpointer)->next, (value) = \
11886 +                ((extentpointer) ? (extentpointer)->start : 0)) : \
11887 +                       (value)++))
11888 +
11889 +#endif
11890 diff --git a/kernel/power/tuxonice_file.c b/kernel/power/tuxonice_file.c
11891 new file mode 100644
11892 index 0000000..7a4614a
11893 --- /dev/null
11894 +++ b/kernel/power/tuxonice_file.c
11895 @@ -0,0 +1,496 @@
11896 +/*
11897 + * kernel/power/tuxonice_file.c
11898 + *
11899 + * Copyright (C) 2005-2010 Nigel Cunningham (nigel at tuxonice net)
11900 + *
11901 + * Distributed under GPLv2.
11902 + *
11903 + * This file encapsulates functions for usage of a simple file as a
11904 + * backing store. It is based upon the swapallocator, and shares the
11905 + * same basic working. Here, though, we have nothing to do with
11906 + * swapspace, and only one device to worry about.
11907 + *
11908 + * The user can just
11909 + *
11910 + * echo TuxOnIce > /path/to/my_file
11911 + *
11912 + * dd if=/dev/zero bs=1M count=<file_size_desired> >> /path/to/my_file
11913 + *
11914 + * and
11915 + *
11916 + * echo /path/to/my_file > /sys/power/tuxonice/file/target
11917 + *
11918 + * then put what they find in /sys/power/tuxonice/resume
11919 + * as their resume= parameter in lilo.conf (and rerun lilo if using it).
11920 + *
11921 + * Having done this, they're ready to hibernate and resume.
11922 + *
11923 + * TODO:
11924 + * - File resizing.
11925 + */
11926 +
11927 +#include <linux/blkdev.h>
11928 +#include <linux/mount.h>
11929 +#include <linux/fs.h>
11930 +#include <linux/fs_uuid.h>
11931 +
11932 +#include "tuxonice.h"
11933 +#include "tuxonice_modules.h"
11934 +#include "tuxonice_bio.h"
11935 +#include "tuxonice_alloc.h"
11936 +#include "tuxonice_builtin.h"
11937 +#include "tuxonice_sysfs.h"
11938 +#include "tuxonice_ui.h"
11939 +#include "tuxonice_io.h"
11940 +
11941 +#define target_is_normal_file() (S_ISREG(target_inode->i_mode))
11942 +
11943 +static struct toi_module_ops toi_fileops;
11944 +
11945 +static struct file *target_file;
11946 +static struct block_device *toi_file_target_bdev;
11947 +static unsigned long pages_available, pages_allocated;
11948 +static char toi_file_target[256];
11949 +static struct inode *target_inode;
11950 +static int file_target_priority;
11951 +static int used_devt;
11952 +static int target_claim;
11953 +static dev_t toi_file_dev_t;
11954 +static int sig_page_index;
11955 +
11956 +/* For test_toi_file_target */
11957 +static struct toi_bdev_info *file_chain;
11958 +
11959 +static int has_contiguous_blocks(struct toi_bdev_info *dev_info, int page_num)
11960 +{
11961 +       int j;
11962 +       sector_t last = 0;
11963 +
11964 +       for (j = 0; j < dev_info->blocks_per_page; j++) {
11965 +               sector_t this = bmap(target_inode,
11966 +                               page_num * dev_info->blocks_per_page + j);
11967 +
11968 +               if (!this || (last && (last + 1) != this))
11969 +                       break;
11970 +
11971 +               last = this;
11972 +       }
11973 +
11974 +       return j == dev_info->blocks_per_page;
11975 +}
11976 +
11977 +static unsigned long get_usable_pages(struct toi_bdev_info *dev_info)
11978 +{
11979 +       unsigned long result = 0;
11980 +       struct block_device *bdev = dev_info->bdev;
11981 +       int i;
11982 +
11983 +       switch (target_inode->i_mode & S_IFMT) {
11984 +       case S_IFSOCK:
11985 +       case S_IFCHR:
11986 +       case S_IFIFO: /* Socket, Char, Fifo */
11987 +               return -1;
11988 +       case S_IFREG: /* Regular file: current size - holes + free
11989 +                        space on part */
11990 +               for (i = 0; i < (target_inode->i_size >> PAGE_SHIFT) ; i++) {
11991 +                       if (has_contiguous_blocks(dev_info, i))
11992 +                               result++;
11993 +               }
11994 +               break;
11995 +       case S_IFBLK: /* Block device */
11996 +               if (!bdev->bd_disk) {
11997 +                       toi_message(TOI_IO, TOI_VERBOSE, 0,
11998 +                                       "bdev->bd_disk null.");
11999 +                       return 0;
12000 +               }
12001 +
12002 +               result = (bdev->bd_part ?
12003 +                       bdev->bd_part->nr_sects :
12004 +                       get_capacity(bdev->bd_disk)) >> (PAGE_SHIFT - 9);
12005 +       }
12006 +
12007 +
12008 +       return result;
12009 +}
12010 +
12011 +static int toi_file_register_storage(void)
12012 +{
12013 +       struct toi_bdev_info *devinfo;
12014 +       int result = 0;
12015 +       struct fs_info *fs_info;
12016 +
12017 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_file_register_storage.");
12018 +       if (!strlen(toi_file_target)) {
12019 +               toi_message(TOI_IO, TOI_VERBOSE, 0, "Register file storage: "
12020 +                               "No target filename set.");
12021 +               return 0;
12022 +       }
12023 +
12024 +       target_file = filp_open(toi_file_target, O_RDONLY|O_LARGEFILE, 0);
12025 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "filp_open %s returned %p.",
12026 +                       toi_file_target, target_file);
12027 +
12028 +       if (IS_ERR(target_file) || !target_file) {
12029 +               target_file = NULL;
12030 +               toi_file_dev_t = name_to_dev_t(toi_file_target);
12031 +               if (!toi_file_dev_t) {
12032 +                       struct kstat stat;
12033 +                       int error = vfs_stat(toi_file_target, &stat);
12034 +                       printk(KERN_INFO "Open file %s returned %p and "
12035 +                                       "name_to_devt failed.\n",
12036 +                                       toi_file_target, target_file);
12037 +                       if (error) {
12038 +                               printk(KERN_INFO "Stating the file also failed."
12039 +                                       " Nothing more we can do.\n");
12040 +                               return 0;
12041 +                       } else
12042 +                               toi_file_dev_t = stat.rdev;
12043 +               }
12044 +
12045 +               toi_file_target_bdev = toi_open_by_devnum(toi_file_dev_t);
12046 +               if (IS_ERR(toi_file_target_bdev)) {
12047 +                       printk(KERN_INFO "Got a dev_num (%lx) but failed to "
12048 +                                       "open it.\n",
12049 +                                       (unsigned long) toi_file_dev_t);
12050 +                       toi_file_target_bdev = NULL;
12051 +                       return 0;
12052 +               }
12053 +               used_devt = 1;
12054 +               target_inode = toi_file_target_bdev->bd_inode;
12055 +       } else
12056 +               target_inode = target_file->f_mapping->host;
12057 +
12058 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "Succeeded in opening the target.");
12059 +       if (S_ISLNK(target_inode->i_mode) || S_ISDIR(target_inode->i_mode) ||
12060 +           S_ISSOCK(target_inode->i_mode) || S_ISFIFO(target_inode->i_mode)) {
12061 +               printk(KERN_INFO "File support works with regular files,"
12062 +                               " character files and block devices.\n");
12063 +               /* Cleanup routine will undo the above */
12064 +               return 0;
12065 +       }
12066 +
12067 +       if (!used_devt) {
12068 +               if (S_ISBLK(target_inode->i_mode)) {
12069 +                       toi_file_target_bdev = I_BDEV(target_inode);
12070 +                       if (!bd_claim(toi_file_target_bdev, &toi_fileops))
12071 +                               target_claim = 1;
12072 +               } else
12073 +                       toi_file_target_bdev = target_inode->i_sb->s_bdev;
12074 +               if (!toi_file_target_bdev) {
12075 +                       printk(KERN_INFO "%s is not a valid file allocator "
12076 +                                       "target.\n", toi_file_target);
12077 +                       return 0;
12078 +               }
12079 +               toi_file_dev_t = toi_file_target_bdev->bd_dev;
12080 +       }
12081 +
12082 +       devinfo = toi_kzalloc(39, sizeof(struct toi_bdev_info), GFP_ATOMIC);
12083 +       if (!devinfo) {
12084 +               printk("Failed to allocate a toi_bdev_info struct for the file allocator.\n");
12085 +               return -ENOMEM;
12086 +       }
12087 +
12088 +       devinfo->bdev = toi_file_target_bdev;
12089 +       devinfo->allocator = &toi_fileops;
12090 +       devinfo->allocator_index = 0;
12091 +
12092 +       fs_info = fs_info_from_block_dev(toi_file_target_bdev);
12093 +       if (fs_info && !IS_ERR(fs_info)) {
12094 +               memcpy(devinfo->uuid, &fs_info->uuid, 16);
12095 +               free_fs_info(fs_info);
12096 +       } else
12097 +               result = (int) PTR_ERR(fs_info);
12098 +
12099 +       /* Unlike swap code, only complain if fs_info_from_block_dev returned
12100 +        * -ENOMEM. The 'file' might be a full partition, so might validly not
12101 +        * have an identifiable type, UUID etc.
12102 +        */
12103 +       if (result)
12104 +               printk(KERN_DEBUG "Failed to get fs_info for file device (%d).\n",
12105 +                               result);
12106 +       devinfo->dev_t = toi_file_dev_t;
12107 +       devinfo->prio = file_target_priority;
12108 +       devinfo->bmap_shift = target_inode->i_blkbits - 9;
12109 +       devinfo->blocks_per_page =
12110 +               (1 << (PAGE_SHIFT - target_inode->i_blkbits));
12111 +       sprintf(devinfo->name, "file %s", toi_file_target);
12112 +       file_chain = devinfo;
12113 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "Dev_t is %lx. Prio is %d. Bmap "
12114 +                       "shift is %d. Blocks per page %d.",
12115 +                       devinfo->dev_t, devinfo->prio, devinfo->bmap_shift,
12116 +                       devinfo->blocks_per_page);
12117 +
12118 +       /* Keep one aside for the signature */
12119 +       pages_available = get_usable_pages(devinfo) - 1;
12120 +
12121 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "Registering file storage, %lu "
12122 +                       "pages.", pages_available);
12123 +
12124 +       toi_bio_ops.register_storage(devinfo);
12125 +       return 0;
12126 +}
12127 +
12128 +static unsigned long toi_file_storage_available(void)
12129 +{
12130 +       return pages_available;
12131 +}
12132 +
12133 +static int toi_file_allocate_storage(struct toi_bdev_info *chain,
12134 +               unsigned long request)
12135 +{
12136 +       unsigned long available = pages_available - pages_allocated;
12137 +       unsigned long to_add = min(available, request);
12138 +
12139 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "Pages available is %lu. Allocated "
12140 +               "is %lu. Allocating %lu pages from file.",
12141 +               pages_available, pages_allocated, to_add);
12142 +       pages_allocated += to_add;
12143 +
12144 +       return to_add;
12145 +}
12146 +
12147 +/**
12148 + * __populate_block_list - add an extent to the chain
12149 + * @min:       Start of the extent (first physical block = sector)
12150 + * @max:       End of the extent (last physical block = sector)
12151 + *
12152 + * If TOI_TEST_BIO is set, print a debug message, outputting the min and max
12153 + * fs block numbers.
12154 + **/
12155 +static int __populate_block_list(struct toi_bdev_info *chain, int min, int max)
12156 +{
12157 +       if (test_action_state(TOI_TEST_BIO))
12158 +               toi_message(TOI_IO, TOI_VERBOSE, 0, "Adding extent %d-%d.",
12159 +                       min << chain->bmap_shift,
12160 +                       ((max + 1) << chain->bmap_shift) - 1);
12161 +
12162 +       return toi_add_to_extent_chain(&chain->blocks, min, max);
12163 +}
12164 +
12165 +static int get_main_pool_phys_params(struct toi_bdev_info *chain)
12166 +{
12167 +       int i, extent_min = -1, extent_max = -1, result = 0, have_sig_page = 0;
12168 +       unsigned long pages_mapped = 0;
12169 +
12170 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "Getting file allocator blocks.");
12171 +
12172 +       if (chain->blocks.first)
12173 +               toi_put_extent_chain(&chain->blocks);
12174 +
12175 +       if (!target_is_normal_file()) {
12176 +               result = (pages_available > 0) ?
12177 +                       __populate_block_list(chain, chain->blocks_per_page,
12178 +                               (pages_allocated + 1) *
12179 +                               chain->blocks_per_page - 1) : 0;
12180 +               return result;
12181 +       }
12182 +
12183 +       /*
12184 +        * FIXME: We are assuming the first page is contiguous. Is that
12185 +        * assumption always right?
12186 +        */
12187 +
12188 +       for (i = 0; i < (target_inode->i_size >> PAGE_SHIFT); i++) {
12189 +               sector_t new_sector;
12190 +
12191 +               if (!has_contiguous_blocks(chain, i))
12192 +                       continue;
12193 +
12194 +               if (!have_sig_page) {
12195 +                       have_sig_page = 1;
12196 +                       sig_page_index = i;
12197 +                       continue;
12198 +               }
12199 +
12200 +               pages_mapped++;
12201 +
12202 +               /* Ignore first page - it has the header */
12203 +               if (pages_mapped == 1)
12204 +                       continue;
12205 +
12206 +               new_sector = bmap(target_inode, (i * chain->blocks_per_page));
12207 +
12208 +               /*
12209 +                * I'd love to be able to fill in holes and resize
12210 +                * files, but not yet...
12211 +                */
12212 +
12213 +               if (new_sector == extent_max + 1)
12214 +                       extent_max += chain->blocks_per_page;
12215 +               else {
12216 +                       if (extent_min > -1) {
12217 +                               result = __populate_block_list(chain,
12218 +                                               extent_min, extent_max);
12219 +                               if (result)
12220 +                                       return result;
12221 +                       }
12222 +
12223 +                       extent_min = new_sector;
12224 +                       extent_max = extent_min +
12225 +                               chain->blocks_per_page - 1;
12226 +               }
12227 +
12228 +               if (pages_mapped == pages_allocated)
12229 +                       break;
12230 +       }
12231 +
12232 +       if (extent_min > -1) {
12233 +               result = __populate_block_list(chain, extent_min, extent_max);
12234 +               if (result)
12235 +                       return result;
12236 +       }
12237 +
12238 +       return 0;
12239 +}
12240 +
12241 +static void toi_file_free_storage(struct toi_bdev_info *chain)
12242 +{
12243 +       pages_allocated = 0;
12244 +       file_chain = NULL;
12245 +}
12246 +
12247 +/**
12248 + * toi_file_print_debug_stats - print debug info
12249 + * @buffer:    Buffer to data to populate
12250 + * @size:      Size of the buffer
12251 + **/
12252 +static int toi_file_print_debug_stats(char *buffer, int size)
12253 +{
12254 +       int len = scnprintf(buffer, size, "- File Allocator active.\n");
12255 +
12256 +       len += scnprintf(buffer+len, size-len, "  Storage available for "
12257 +                       "image: %lu pages.\n", pages_available);
12258 +
12259 +       return len;
12260 +}
12261 +
12262 +static void toi_file_cleanup(int finishing_cycle)
12263 +{
12264 +       if (toi_file_target_bdev) {
12265 +               if (target_claim) {
12266 +                       bd_release(toi_file_target_bdev);
12267 +                       target_claim = 0;
12268 +               }
12269 +
12270 +               if (used_devt) {
12271 +                       blkdev_put(toi_file_target_bdev,
12272 +                                       FMODE_READ | FMODE_NDELAY);
12273 +                       used_devt = 0;
12274 +               }
12275 +               toi_file_target_bdev = NULL;
12276 +               target_inode = NULL;
12277 +       }
12278 +
12279 +       if (target_file) {
12280 +               filp_close(target_file, NULL);
12281 +               target_file = NULL;
12282 +       }
12283 +
12284 +       pages_available = 0;
12285 +}
12286 +
12287 +/**
12288 + * test_toi_file_target - sysfs callback for /sys/power/tuxonince/file/target
12289 + *
12290 + * Test wheter the target file is valid for hibernating.
12291 + **/
12292 +static void test_toi_file_target(void)
12293 +{
12294 +       int result = toi_file_register_storage();
12295 +       sector_t sector;
12296 +       char buf[50];
12297 +       struct fs_info *fs_info;
12298 +
12299 +       if (result || !file_chain)
12300 +               return;
12301 +
12302 +       /* This doesn't mean we're in business. Is any storage available? */
12303 +       if (!pages_available)
12304 +               goto out;
12305 +
12306 +       toi_file_allocate_storage(file_chain, 1);
12307 +       result = get_main_pool_phys_params(file_chain);
12308 +       if (result)
12309 +               goto out;
12310 +
12311 +
12312 +       sector = bmap(target_inode, sig_page_index *
12313 +                       file_chain->blocks_per_page) << file_chain->bmap_shift;
12314 +
12315 +       /* Use the uuid, or the dev_t if that fails */
12316 +       fs_info = fs_info_from_block_dev(toi_file_target_bdev);
12317 +       if (!fs_info || IS_ERR(fs_info)) {
12318 +               bdevname(toi_file_target_bdev, buf);
12319 +               sprintf(resume_file, "/dev/%s:%llu", buf,
12320 +                               (unsigned long long) sector);
12321 +       } else {
12322 +               int i;
12323 +               hex_dump_to_buffer(fs_info->uuid, 16, 32, 1, buf, 50, 0);
12324 +
12325 +               /* Remove the spaces */
12326 +               for (i = 1; i < 16; i++) {
12327 +                       buf[2 * i] = buf[3 * i];
12328 +                       buf[2 * i + 1] = buf[3 * i + 1];
12329 +               }
12330 +               buf[32] = 0;
12331 +               sprintf(resume_file, "UUID=%s:0x%llx", buf,
12332 +                               (unsigned long long) sector);
12333 +               free_fs_info(fs_info);
12334 +       }
12335 +
12336 +       toi_attempt_to_parse_resume_device(0);
12337 +out:
12338 +       toi_file_free_storage(file_chain);
12339 +       toi_bio_ops.free_storage();
12340 +}
12341 +
12342 +static struct toi_sysfs_data sysfs_params[] = {
12343 +       SYSFS_STRING("target", SYSFS_RW, toi_file_target, 256,
12344 +               SYSFS_NEEDS_SM_FOR_WRITE, test_toi_file_target),
12345 +       SYSFS_INT("enabled", SYSFS_RW, &toi_fileops.enabled, 0, 1, 0, NULL),
12346 +       SYSFS_INT("priority", SYSFS_RW, &file_target_priority, -4095,
12347 +                       4096, 0, NULL),
12348 +};
12349 +
12350 +static struct toi_bio_allocator_ops toi_bio_fileops = {
12351 +       .register_storage                       = toi_file_register_storage,
12352 +       .storage_available                      = toi_file_storage_available,
12353 +       .allocate_storage                       = toi_file_allocate_storage,
12354 +       .bmap                                   = get_main_pool_phys_params,
12355 +       .free_storage                           = toi_file_free_storage,
12356 +};
12357 +
12358 +static struct toi_module_ops toi_fileops = {
12359 +       .type                                   = BIO_ALLOCATOR_MODULE,
12360 +       .name                                   = "file storage",
12361 +       .directory                              = "file",
12362 +       .module                                 = THIS_MODULE,
12363 +       .print_debug_info                       = toi_file_print_debug_stats,
12364 +       .cleanup                                = toi_file_cleanup,
12365 +       .bio_allocator_ops                      = &toi_bio_fileops,
12366 +
12367 +       .sysfs_data             = sysfs_params,
12368 +       .num_sysfs_entries      = sizeof(sysfs_params) /
12369 +               sizeof(struct toi_sysfs_data),
12370 +};
12371 +
12372 +/* ---- Registration ---- */
12373 +static __init int toi_file_load(void)
12374 +{
12375 +       return toi_register_module(&toi_fileops);
12376 +}
12377 +
12378 +#ifdef MODULE
12379 +static __exit void toi_file_unload(void)
12380 +{
12381 +       toi_unregister_module(&toi_fileops);
12382 +}
12383 +
12384 +module_init(toi_file_load);
12385 +module_exit(toi_file_unload);
12386 +MODULE_LICENSE("GPL");
12387 +MODULE_AUTHOR("Nigel Cunningham");
12388 +MODULE_DESCRIPTION("TuxOnIce FileAllocator");
12389 +#else
12390 +late_initcall(toi_file_load);
12391 +#endif
12392 diff --git a/kernel/power/tuxonice_highlevel.c b/kernel/power/tuxonice_highlevel.c
12393 new file mode 100644
12394 index 0000000..c4bbb49
12395 --- /dev/null
12396 +++ b/kernel/power/tuxonice_highlevel.c
12397 @@ -0,0 +1,1313 @@
12398 +/*
12399 + * kernel/power/tuxonice_highlevel.c
12400 + */
12401 +/** \mainpage TuxOnIce.
12402 + *
12403 + * TuxOnIce provides support for saving and restoring an image of
12404 + * system memory to an arbitrary storage device, either on the local computer,
12405 + * or across some network. The support is entirely OS based, so TuxOnIce
12406 + * works without requiring BIOS, APM or ACPI support. The vast majority of the
12407 + * code is also architecture independant, so it should be very easy to port
12408 + * the code to new architectures. TuxOnIce includes support for SMP, 4G HighMem
12409 + * and preemption. Initramfses and initrds are also supported.
12410 + *
12411 + * TuxOnIce uses a modular design, in which the method of storing the image is
12412 + * completely abstracted from the core code, as are transformations on the data
12413 + * such as compression and/or encryption (multiple 'modules' can be used to
12414 + * provide arbitrary combinations of functionality). The user interface is also
12415 + * modular, so that arbitrarily simple or complex interfaces can be used to
12416 + * provide anything from debugging information through to eye candy.
12417 + *
12418 + * \section Copyright
12419 + *
12420 + * TuxOnIce is released under the GPLv2.
12421 + *
12422 + * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu><BR>
12423 + * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz><BR>
12424 + * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr><BR>
12425 + * Copyright (C) 2002-2010 Nigel Cunningham (nigel at tuxonice net)<BR>
12426 + *
12427 + * \section Credits
12428 + *
12429 + * Nigel would like to thank the following people for their work:
12430 + *
12431 + * Bernard Blackham <bernard@blackham.com.au><BR>
12432 + * Web page & Wiki administration, some coding. A person without whom
12433 + * TuxOnIce would not be where it is.
12434 + *
12435 + * Michael Frank <mhf@linuxmail.org><BR>
12436 + * Extensive testing and help with improving stability. I was constantly
12437 + * amazed by the quality and quantity of Michael's help.
12438 + *
12439 + * Pavel Machek <pavel@ucw.cz><BR>
12440 + * Modifications, defectiveness pointing, being with Gabor at the very
12441 + * beginning, suspend to swap space, stop all tasks. Port to 2.4.18-ac and
12442 + * 2.5.17. Even though Pavel and I disagree on the direction suspend to
12443 + * disk should take, I appreciate the valuable work he did in helping Gabor
12444 + * get the concept working.
12445 + *
12446 + * ..and of course the myriads of TuxOnIce users who have helped diagnose
12447 + * and fix bugs, made suggestions on how to improve the code, proofread
12448 + * documentation, and donated time and money.
12449 + *
12450 + * Thanks also to corporate sponsors:
12451 + *
12452 + * <B>Redhat.</B>Sometime employer from May 2006 (my fault, not Redhat's!).
12453 + *
12454 + * <B>Cyclades.com.</B> Nigel's employers from Dec 2004 until May 2006, who
12455 + * allowed him to work on TuxOnIce and PM related issues on company time.
12456 + *
12457 + * <B>LinuxFund.org.</B> Sponsored Nigel's work on TuxOnIce for four months Oct
12458 + * 2003 to Jan 2004.
12459 + *
12460 + * <B>LAC Linux.</B> Donated P4 hardware that enabled development and ongoing
12461 + * maintenance of SMP and Highmem support.
12462 + *
12463 + * <B>OSDL.</B> Provided access to various hardware configurations, make
12464 + * occasional small donations to the project.
12465 + */
12466 +
12467 +#include <linux/suspend.h>
12468 +#include <linux/freezer.h>
12469 +#include <generated/utsrelease.h>
12470 +#include <linux/cpu.h>
12471 +#include <linux/console.h>
12472 +#include <linux/writeback.h>
12473 +#include <linux/uaccess.h> /* for get/set_fs & KERNEL_DS on i386 */
12474 +#include <linux/bio.h>
12475 +
12476 +#include "tuxonice.h"
12477 +#include "tuxonice_modules.h"
12478 +#include "tuxonice_sysfs.h"
12479 +#include "tuxonice_prepare_image.h"
12480 +#include "tuxonice_io.h"
12481 +#include "tuxonice_ui.h"
12482 +#include "tuxonice_power_off.h"
12483 +#include "tuxonice_storage.h"
12484 +#include "tuxonice_checksum.h"
12485 +#include "tuxonice_builtin.h"
12486 +#include "tuxonice_atomic_copy.h"
12487 +#include "tuxonice_alloc.h"
12488 +#include "tuxonice_cluster.h"
12489 +
12490 +/*! Pageset metadata. */
12491 +struct pagedir pagedir2 = {2};
12492 +EXPORT_SYMBOL_GPL(pagedir2);
12493 +
12494 +static mm_segment_t oldfs;
12495 +static DEFINE_MUTEX(tuxonice_in_use);
12496 +static int block_dump_save;
12497 +
12498 +/* Binary signature if an image is present */
12499 +char tuxonice_signature[9] = "\xed\xc3\x02\xe9\x98\x56\xe5\x0c";
12500 +EXPORT_SYMBOL_GPL(tuxonice_signature);
12501 +
12502 +unsigned long boot_kernel_data_buffer;
12503 +
12504 +static char *result_strings[] = {
12505 +       "Hibernation was aborted",
12506 +       "The user requested that we cancel the hibernation",
12507 +       "No storage was available",
12508 +       "Insufficient storage was available",
12509 +       "Freezing filesystems and/or tasks failed",
12510 +       "A pre-existing image was used",
12511 +       "We would free memory, but image size limit doesn't allow this",
12512 +       "Unable to free enough memory to hibernate",
12513 +       "Unable to obtain the Power Management Semaphore",
12514 +       "A device suspend/resume returned an error",
12515 +       "A system device suspend/resume returned an error",
12516 +       "The extra pages allowance is too small",
12517 +       "We were unable to successfully prepare an image",
12518 +       "TuxOnIce module initialisation failed",
12519 +       "TuxOnIce module cleanup failed",
12520 +       "I/O errors were encountered",
12521 +       "Ran out of memory",
12522 +       "An error was encountered while reading the image",
12523 +       "Platform preparation failed",
12524 +       "CPU Hotplugging failed",
12525 +       "Architecture specific preparation failed",
12526 +       "Pages needed resaving, but we were told to abort if this happens",
12527 +       "We can't hibernate at the moment (invalid resume= or filewriter "
12528 +               "target?)",
12529 +       "A hibernation preparation notifier chain member cancelled the "
12530 +               "hibernation",
12531 +       "Pre-snapshot preparation failed",
12532 +       "Pre-restore preparation failed",
12533 +       "Failed to disable usermode helpers",
12534 +       "Can't resume from alternate image",
12535 +       "Header reservation too small",
12536 +};
12537 +
12538 +/**
12539 + * toi_finish_anything - cleanup after doing anything
12540 + * @hibernate_or_resume:       Whether finishing a cycle or attempt at
12541 + *                             resuming.
12542 + *
12543 + * This is our basic clean-up routine, matching start_anything below. We
12544 + * call cleanup routines, drop module references and restore process fs and
12545 + * cpus allowed masks, together with the global block_dump variable's value.
12546 + **/
12547 +void toi_finish_anything(int hibernate_or_resume)
12548 +{
12549 +       toi_cleanup_modules(hibernate_or_resume);
12550 +       toi_put_modules();
12551 +       if (hibernate_or_resume) {
12552 +               block_dump = block_dump_save;
12553 +               set_cpus_allowed_ptr(current, cpu_all_mask);
12554 +               toi_alloc_print_debug_stats();
12555 +               atomic_inc(&snapshot_device_available);
12556 +               mutex_unlock(&pm_mutex);
12557 +       }
12558 +
12559 +       set_fs(oldfs);
12560 +       mutex_unlock(&tuxonice_in_use);
12561 +}
12562 +
12563 +/**
12564 + * toi_start_anything - basic initialisation for TuxOnIce
12565 + * @toi_or_resume:     Whether starting a cycle or attempt at resuming.
12566 + *
12567 + * Our basic initialisation routine. Take references on modules, use the
12568 + * kernel segment, recheck resume= if no active allocator is set, initialise
12569 + * modules, save and reset block_dump and ensure we're running on CPU0.
12570 + **/
12571 +int toi_start_anything(int hibernate_or_resume)
12572 +{
12573 +       mutex_lock(&tuxonice_in_use);
12574 +
12575 +       oldfs = get_fs();
12576 +       set_fs(KERNEL_DS);
12577 +
12578 +       if (hibernate_or_resume) {
12579 +               mutex_lock(&pm_mutex);
12580 +
12581 +               if (!atomic_add_unless(&snapshot_device_available, -1, 0))
12582 +                       goto snapshotdevice_unavailable;
12583 +       }
12584 +
12585 +       if (hibernate_or_resume == SYSFS_HIBERNATE)
12586 +               toi_print_modules();
12587 +
12588 +       if (toi_get_modules()) {
12589 +               printk(KERN_INFO "TuxOnIce: Get modules failed!\n");
12590 +               goto prehibernate_err;
12591 +       }
12592 +
12593 +       if (hibernate_or_resume) {
12594 +               block_dump_save = block_dump;
12595 +               block_dump = 0;
12596 +               set_cpus_allowed_ptr(current,
12597 +                               &cpumask_of_cpu(first_cpu(cpu_online_map)));
12598 +       }
12599 +
12600 +       if (toi_initialise_modules_early(hibernate_or_resume))
12601 +               goto early_init_err;
12602 +
12603 +       if (!toiActiveAllocator)
12604 +               toi_attempt_to_parse_resume_device(!hibernate_or_resume);
12605 +
12606 +       if (!toi_initialise_modules_late(hibernate_or_resume))
12607 +               return 0;
12608 +
12609 +       toi_cleanup_modules(hibernate_or_resume);
12610 +early_init_err:
12611 +       if (hibernate_or_resume) {
12612 +               block_dump_save = block_dump;
12613 +               set_cpus_allowed_ptr(current, cpu_all_mask);
12614 +       }
12615 +       toi_put_modules();
12616 +prehibernate_err:
12617 +       if (hibernate_or_resume)
12618 +               atomic_inc(&snapshot_device_available);
12619 +snapshotdevice_unavailable:
12620 +       if (hibernate_or_resume)
12621 +               mutex_unlock(&pm_mutex);
12622 +       set_fs(oldfs);
12623 +       mutex_unlock(&tuxonice_in_use);
12624 +       return -EBUSY;
12625 +}
12626 +
12627 +/*
12628 + * Nosave page tracking.
12629 + *
12630 + * Here rather than in prepare_image because we want to do it once only at the
12631 + * start of a cycle.
12632 + */
12633 +
12634 +/**
12635 + * mark_nosave_pages - set up our Nosave bitmap
12636 + *
12637 + * Build a bitmap of Nosave pages from the list. The bitmap allows faster
12638 + * use when preparing the image.
12639 + **/
12640 +static void mark_nosave_pages(void)
12641 +{
12642 +       struct nosave_region *region;
12643 +
12644 +       list_for_each_entry(region, &nosave_regions, list) {
12645 +               unsigned long pfn;
12646 +
12647 +               for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++)
12648 +                       if (pfn_valid(pfn))
12649 +                               SetPageNosave(pfn_to_page(pfn));
12650 +       }
12651 +}
12652 +
12653 +static int alloc_a_bitmap(struct memory_bitmap **bm)
12654 +{
12655 +       int result = 0;
12656 +
12657 +       *bm = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
12658 +       if (!*bm) {
12659 +               printk(KERN_ERR "Failed to kzalloc memory for a bitmap.\n");
12660 +               return -ENOMEM;
12661 +       }
12662 +
12663 +       result = memory_bm_create(*bm, GFP_KERNEL, 0);
12664 +
12665 +       if (result) {
12666 +               printk(KERN_ERR "Failed to create a bitmap.\n");
12667 +               kfree(*bm);
12668 +       }
12669 +
12670 +       return result;
12671 +}
12672 +
12673 +/**
12674 + * allocate_bitmaps - allocate bitmaps used to record page states
12675 + *
12676 + * Allocate the bitmaps we use to record the various TuxOnIce related
12677 + * page states.
12678 + **/
12679 +static int allocate_bitmaps(void)
12680 +{
12681 +       if (alloc_a_bitmap(&pageset1_map) ||
12682 +           alloc_a_bitmap(&pageset1_copy_map) ||
12683 +           alloc_a_bitmap(&pageset2_map) ||
12684 +           alloc_a_bitmap(&io_map) ||
12685 +           alloc_a_bitmap(&nosave_map) ||
12686 +           alloc_a_bitmap(&free_map) ||
12687 +           alloc_a_bitmap(&page_resave_map))
12688 +               return 1;
12689 +
12690 +       return 0;
12691 +}
12692 +
12693 +static void free_a_bitmap(struct memory_bitmap **bm)
12694 +{
12695 +       if (!*bm)
12696 +               return;
12697 +
12698 +       memory_bm_free(*bm, 0);
12699 +       kfree(*bm);
12700 +       *bm = NULL;
12701 +}
12702 +
12703 +/**
12704 + * free_bitmaps - free the bitmaps used to record page states
12705 + *
12706 + * Free the bitmaps allocated above. It is not an error to call
12707 + * memory_bm_free on a bitmap that isn't currently allocated.
12708 + **/
12709 +static void free_bitmaps(void)
12710 +{
12711 +       free_a_bitmap(&pageset1_map);
12712 +       free_a_bitmap(&pageset1_copy_map);
12713 +       free_a_bitmap(&pageset2_map);
12714 +       free_a_bitmap(&io_map);
12715 +       free_a_bitmap(&nosave_map);
12716 +       free_a_bitmap(&free_map);
12717 +       free_a_bitmap(&page_resave_map);
12718 +}
12719 +
12720 +/**
12721 + * io_MB_per_second - return the number of MB/s read or written
12722 + * @write:     Whether to return the speed at which we wrote.
12723 + *
12724 + * Calculate the number of megabytes per second that were read or written.
12725 + **/
12726 +static int io_MB_per_second(int write)
12727 +{
12728 +       return (toi_bkd.toi_io_time[write][1]) ?
12729 +               MB((unsigned long) toi_bkd.toi_io_time[write][0]) * HZ /
12730 +               toi_bkd.toi_io_time[write][1] : 0;
12731 +}
12732 +
12733 +#define SNPRINTF(a...)         do { len += scnprintf(((char *) buffer) + len, \
12734 +               count - len - 1, ## a); } while (0)
12735 +
12736 +/**
12737 + * get_debug_info - fill a buffer with debugging information
12738 + * @buffer:    The buffer to be filled.
12739 + * @count:     The size of the buffer, in bytes.
12740 + *
12741 + * Fill a (usually PAGE_SIZEd) buffer with the debugging info that we will
12742 + * either printk or return via sysfs.
12743 + **/
12744 +static int get_toi_debug_info(const char *buffer, int count)
12745 +{
12746 +       int len = 0, i, first_result = 1;
12747 +
12748 +       SNPRINTF("TuxOnIce debugging info:\n");
12749 +       SNPRINTF("- TuxOnIce core  : " TOI_CORE_VERSION "\n");
12750 +       SNPRINTF("- Kernel Version : " UTS_RELEASE "\n");
12751 +       SNPRINTF("- Compiler vers. : %d.%d\n", __GNUC__, __GNUC_MINOR__);
12752 +       SNPRINTF("- Attempt number : %d\n", nr_hibernates);
12753 +       SNPRINTF("- Parameters     : %ld %ld %ld %d %ld %ld\n",
12754 +                       toi_result,
12755 +                       toi_bkd.toi_action,
12756 +                       toi_bkd.toi_debug_state,
12757 +                       toi_bkd.toi_default_console_level,
12758 +                       image_size_limit,
12759 +                       toi_poweroff_method);
12760 +       SNPRINTF("- Overall expected compression percentage: %d.\n",
12761 +                       100 - toi_expected_compression_ratio());
12762 +       len += toi_print_module_debug_info(((char *) buffer) + len,
12763 +                       count - len - 1);
12764 +       if (toi_bkd.toi_io_time[0][1]) {
12765 +               if ((io_MB_per_second(0) < 5) || (io_MB_per_second(1) < 5)) {
12766 +                       SNPRINTF("- I/O speed: Write %ld KB/s",
12767 +                         (KB((unsigned long) toi_bkd.toi_io_time[0][0]) * HZ /
12768 +                         toi_bkd.toi_io_time[0][1]));
12769 +                       if (toi_bkd.toi_io_time[1][1])
12770 +                               SNPRINTF(", Read %ld KB/s",
12771 +                                 (KB((unsigned long)
12772 +                                     toi_bkd.toi_io_time[1][0]) * HZ /
12773 +                                 toi_bkd.toi_io_time[1][1]));
12774 +               } else {
12775 +                       SNPRINTF("- I/O speed: Write %ld MB/s",
12776 +                        (MB((unsigned long) toi_bkd.toi_io_time[0][0]) * HZ /
12777 +                         toi_bkd.toi_io_time[0][1]));
12778 +                       if (toi_bkd.toi_io_time[1][1])
12779 +                               SNPRINTF(", Read %ld MB/s",
12780 +                                (MB((unsigned long)
12781 +                                    toi_bkd.toi_io_time[1][0]) * HZ /
12782 +                                 toi_bkd.toi_io_time[1][1]));
12783 +               }
12784 +               SNPRINTF(".\n");
12785 +       } else
12786 +               SNPRINTF("- No I/O speed stats available.\n");
12787 +       SNPRINTF("- Extra pages    : %lu used/%lu.\n",
12788 +                       extra_pd1_pages_used, extra_pd1_pages_allowance);
12789 +
12790 +       for (i = 0; i < TOI_NUM_RESULT_STATES; i++)
12791 +               if (test_result_state(i)) {
12792 +                       SNPRINTF("%s: %s.\n", first_result ?
12793 +                                       "- Result         " :
12794 +                                       "                 ",
12795 +                                       result_strings[i]);
12796 +                       first_result = 0;
12797 +               }
12798 +       if (first_result)
12799 +               SNPRINTF("- Result         : %s.\n", nr_hibernates ?
12800 +                       "Succeeded" :
12801 +                       "No hibernation attempts so far");
12802 +       return len;
12803 +}
12804 +
12805 +/**
12806 + * do_cleanup - cleanup after attempting to hibernate or resume
12807 + * @get_debug_info:    Whether to allocate and return debugging info.
12808 + *
12809 + * Cleanup after attempting to hibernate or resume, possibly getting
12810 + * debugging info as we do so.
12811 + **/
12812 +static void do_cleanup(int get_debug_info, int restarting)
12813 +{
12814 +       int i = 0;
12815 +       char *buffer = NULL;
12816 +
12817 +       trap_non_toi_io = 0;
12818 +
12819 +       if (get_debug_info)
12820 +               toi_prepare_status(DONT_CLEAR_BAR, "Cleaning up...");
12821 +
12822 +       free_checksum_pages();
12823 +
12824 +       if (get_debug_info)
12825 +               buffer = (char *) toi_get_zeroed_page(20, TOI_ATOMIC_GFP);
12826 +
12827 +       if (buffer)
12828 +               i = get_toi_debug_info(buffer, PAGE_SIZE);
12829 +
12830 +       toi_free_extra_pagedir_memory();
12831 +
12832 +       pagedir1.size = 0;
12833 +       pagedir2.size = 0;
12834 +       set_highmem_size(pagedir1, 0);
12835 +       set_highmem_size(pagedir2, 0);
12836 +
12837 +       if (boot_kernel_data_buffer) {
12838 +               if (!test_toi_state(TOI_BOOT_KERNEL))
12839 +                       toi_free_page(37, boot_kernel_data_buffer);
12840 +               boot_kernel_data_buffer = 0;
12841 +       }
12842 +
12843 +       clear_toi_state(TOI_BOOT_KERNEL);
12844 +       thaw_processes();
12845 +
12846 +       if (test_action_state(TOI_KEEP_IMAGE) &&
12847 +           !test_result_state(TOI_ABORTED)) {
12848 +               toi_message(TOI_ANY_SECTION, TOI_LOW, 1,
12849 +                       "TuxOnIce: Not invalidating the image due "
12850 +                       "to Keep Image being enabled.");
12851 +               set_result_state(TOI_KEPT_IMAGE);
12852 +       } else
12853 +               if (toiActiveAllocator)
12854 +                       toiActiveAllocator->remove_image();
12855 +
12856 +       free_bitmaps();
12857 +       usermodehelper_enable();
12858 +
12859 +       if (test_toi_state(TOI_NOTIFIERS_PREPARE)) {
12860 +               pm_notifier_call_chain(PM_POST_HIBERNATION);
12861 +               clear_toi_state(TOI_NOTIFIERS_PREPARE);
12862 +       }
12863 +
12864 +       if (buffer && i) {
12865 +               /* Printk can only handle 1023 bytes, including
12866 +                * its level mangling. */
12867 +               for (i = 0; i < 3; i++)
12868 +                       printk(KERN_ERR "%s", buffer + (1023 * i));
12869 +               toi_free_page(20, (unsigned long) buffer);
12870 +       }
12871 +
12872 +       if (!test_action_state(TOI_LATE_CPU_HOTPLUG))
12873 +               enable_nonboot_cpus();
12874 +
12875 +       if (!restarting)
12876 +               toi_cleanup_console();
12877 +
12878 +       free_attention_list();
12879 +
12880 +       if (!restarting)
12881 +               toi_deactivate_storage(0);
12882 +
12883 +       clear_toi_state(TOI_IGNORE_LOGLEVEL);
12884 +       clear_toi_state(TOI_TRYING_TO_RESUME);
12885 +       clear_toi_state(TOI_NOW_RESUMING);
12886 +}
12887 +
12888 +/**
12889 + * check_still_keeping_image - we kept an image; check whether to reuse it.
12890 + *
12891 + * We enter this routine when we have kept an image. If the user has said they
12892 + * want to still keep it, all we need to do is powerdown. If powering down
12893 + * means hibernating to ram and the power doesn't run out, we'll return 1.
12894 + * If we do power off properly or the battery runs out, we'll resume via the
12895 + * normal paths.
12896 + *
12897 + * If the user has said they want to remove the previously kept image, we
12898 + * remove it, and return 0. We'll then store a new image.
12899 + **/
12900 +static int check_still_keeping_image(void)
12901 +{
12902 +       if (test_action_state(TOI_KEEP_IMAGE)) {
12903 +               printk(KERN_INFO "Image already stored: powering down "
12904 +                               "immediately.");
12905 +               do_toi_step(STEP_HIBERNATE_POWERDOWN);
12906 +               return 1;       /* Just in case we're using S3 */
12907 +       }
12908 +
12909 +       printk(KERN_INFO "Invalidating previous image.\n");
12910 +       toiActiveAllocator->remove_image();
12911 +
12912 +       return 0;
12913 +}
12914 +
12915 +/**
12916 + * toi_init - prepare to hibernate to disk
12917 + *
12918 + * Initialise variables & data structures, in preparation for
12919 + * hibernating to disk.
12920 + **/
12921 +static int toi_init(int restarting)
12922 +{
12923 +       int result, i, j;
12924 +
12925 +       toi_result = 0;
12926 +
12927 +       printk(KERN_INFO "Initiating a hibernation cycle.\n");
12928 +
12929 +       nr_hibernates++;
12930 +
12931 +       for (i = 0; i < 2; i++)
12932 +               for (j = 0; j < 2; j++)
12933 +                       toi_bkd.toi_io_time[i][j] = 0;
12934 +
12935 +       if (!test_toi_state(TOI_CAN_HIBERNATE) ||
12936 +           allocate_bitmaps())
12937 +               return 1;
12938 +
12939 +       mark_nosave_pages();
12940 +
12941 +       if (!restarting)
12942 +               toi_prepare_console();
12943 +
12944 +       result = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
12945 +       if (result) {
12946 +               set_result_state(TOI_NOTIFIERS_PREPARE_FAILED);
12947 +               return 1;
12948 +       }
12949 +       set_toi_state(TOI_NOTIFIERS_PREPARE);
12950 +
12951 +       result = usermodehelper_disable();
12952 +       if (result) {
12953 +               printk(KERN_ERR "TuxOnIce: Failed to disable usermode "
12954 +                               "helpers\n");
12955 +               set_result_state(TOI_USERMODE_HELPERS_ERR);
12956 +               return 1;
12957 +       }
12958 +
12959 +       boot_kernel_data_buffer = toi_get_zeroed_page(37, TOI_ATOMIC_GFP);
12960 +       if (!boot_kernel_data_buffer) {
12961 +               printk(KERN_ERR "TuxOnIce: Failed to allocate "
12962 +                               "boot_kernel_data_buffer.\n");
12963 +               set_result_state(TOI_OUT_OF_MEMORY);
12964 +               return 1;
12965 +       }
12966 +
12967 +       if (test_action_state(TOI_LATE_CPU_HOTPLUG) ||
12968 +                       !disable_nonboot_cpus())
12969 +               return 1;
12970 +
12971 +       set_abort_result(TOI_CPU_HOTPLUG_FAILED);
12972 +       return 0;
12973 +}
12974 +
12975 +/**
12976 + * can_hibernate - perform basic 'Can we hibernate?' tests
12977 + *
12978 + * Perform basic tests that must pass if we're going to be able to hibernate:
12979 + * Can we get the pm_mutex? Is resume= valid (we need to know where to write
12980 + * the image header).
12981 + **/
12982 +static int can_hibernate(void)
12983 +{
12984 +       if (!test_toi_state(TOI_CAN_HIBERNATE))
12985 +               toi_attempt_to_parse_resume_device(0);
12986 +
12987 +       if (!test_toi_state(TOI_CAN_HIBERNATE)) {
12988 +               printk(KERN_INFO "TuxOnIce: Hibernation is disabled.\n"
12989 +                       "This may be because you haven't put something along "
12990 +                       "the lines of\n\nresume=swap:/dev/hda1\n\n"
12991 +                       "in lilo.conf or equivalent. (Where /dev/hda1 is your "
12992 +                       "swap partition).\n");
12993 +               set_abort_result(TOI_CANT_SUSPEND);
12994 +               return 0;
12995 +       }
12996 +
12997 +       if (strlen(alt_resume_param)) {
12998 +               attempt_to_parse_alt_resume_param();
12999 +
13000 +               if (!strlen(alt_resume_param)) {
13001 +                       printk(KERN_INFO "Alternate resume parameter now "
13002 +                                       "invalid. Aborting.\n");
13003 +                       set_abort_result(TOI_CANT_USE_ALT_RESUME);
13004 +                       return 0;
13005 +               }
13006 +       }
13007 +
13008 +       return 1;
13009 +}
13010 +
13011 +/**
13012 + * do_post_image_write - having written an image, figure out what to do next
13013 + *
13014 + * After writing an image, we might load an alternate image or power down.
13015 + * Powering down might involve hibernating to ram, in which case we also
13016 + * need to handle reloading pageset2.
13017 + **/
13018 +static int do_post_image_write(void)
13019 +{
13020 +       /* If switching images fails, do normal powerdown */
13021 +       if (alt_resume_param[0])
13022 +               do_toi_step(STEP_RESUME_ALT_IMAGE);
13023 +
13024 +       toi_power_down();
13025 +
13026 +       barrier();
13027 +       mb();
13028 +       return 0;
13029 +}
13030 +
13031 +/**
13032 + * __save_image - do the hard work of saving the image
13033 + *
13034 + * High level routine for getting the image saved. The key assumptions made
13035 + * are that processes have been frozen and sufficient memory is available.
13036 + *
13037 + * We also exit through here at resume time, coming back from toi_hibernate
13038 + * after the atomic restore. This is the reason for the toi_in_hibernate
13039 + * test.
13040 + **/
13041 +static int __save_image(void)
13042 +{
13043 +       int temp_result, did_copy = 0;
13044 +
13045 +       toi_prepare_status(DONT_CLEAR_BAR, "Starting to save the image..");
13046 +
13047 +       toi_message(TOI_ANY_SECTION, TOI_LOW, 1,
13048 +               " - Final values: %d and %d.",
13049 +               pagedir1.size, pagedir2.size);
13050 +
13051 +       toi_cond_pause(1, "About to write pagedir2.");
13052 +
13053 +       temp_result = write_pageset(&pagedir2);
13054 +
13055 +       if (temp_result == -1 || test_result_state(TOI_ABORTED))
13056 +               return 1;
13057 +
13058 +       toi_cond_pause(1, "About to copy pageset 1.");
13059 +
13060 +       if (test_result_state(TOI_ABORTED))
13061 +               return 1;
13062 +
13063 +       toi_deactivate_storage(1);
13064 +
13065 +       toi_prepare_status(DONT_CLEAR_BAR, "Doing atomic copy/restore.");
13066 +
13067 +       toi_in_hibernate = 1;
13068 +
13069 +       if (toi_go_atomic(PMSG_FREEZE, 1))
13070 +               goto Failed;
13071 +
13072 +       temp_result = toi_hibernate();
13073 +       if (!temp_result)
13074 +               did_copy = 1;
13075 +
13076 +       /* We return here at resume time too! */
13077 +       toi_end_atomic(ATOMIC_ALL_STEPS, toi_in_hibernate, temp_result);
13078 +
13079 +Failed:
13080 +       if (toi_activate_storage(1))
13081 +               panic("Failed to reactivate our storage.");
13082 +
13083 +       /* Resume time? */
13084 +       if (!toi_in_hibernate) {
13085 +               copyback_post();
13086 +               return 0;
13087 +       }
13088 +
13089 +       /* Nope. Hibernating. So, see if we can save the image... */
13090 +
13091 +       if (temp_result || test_result_state(TOI_ABORTED)) {
13092 +               if (did_copy)
13093 +                       goto abort_reloading_pagedir_two;
13094 +               else
13095 +                       return 1;
13096 +       }
13097 +
13098 +       toi_update_status(pagedir2.size, pagedir1.size + pagedir2.size,
13099 +                       NULL);
13100 +
13101 +       if (test_result_state(TOI_ABORTED))
13102 +               goto abort_reloading_pagedir_two;
13103 +
13104 +       toi_cond_pause(1, "About to write pageset1.");
13105 +
13106 +       toi_message(TOI_ANY_SECTION, TOI_LOW, 1, "-- Writing pageset1");
13107 +
13108 +       temp_result = write_pageset(&pagedir1);
13109 +
13110 +       /* We didn't overwrite any memory, so no reread needs to be done. */
13111 +       if (test_action_state(TOI_TEST_FILTER_SPEED))
13112 +               return 1;
13113 +
13114 +       if (temp_result == 1 || test_result_state(TOI_ABORTED))
13115 +               goto abort_reloading_pagedir_two;
13116 +
13117 +       toi_cond_pause(1, "About to write header.");
13118 +
13119 +       if (test_result_state(TOI_ABORTED))
13120 +               goto abort_reloading_pagedir_two;
13121 +
13122 +       temp_result = write_image_header();
13123 +
13124 +       if (test_action_state(TOI_TEST_BIO))
13125 +               return 1;
13126 +
13127 +       if (!temp_result && !test_result_state(TOI_ABORTED))
13128 +               return 0;
13129 +
13130 +abort_reloading_pagedir_two:
13131 +       temp_result = read_pageset2(1);
13132 +
13133 +       /* If that failed, we're sunk. Panic! */
13134 +       if (temp_result)
13135 +               panic("Attempt to reload pagedir 2 while aborting "
13136 +                               "a hibernate failed.");
13137 +
13138 +       return 1;
13139 +}
13140 +
13141 +static void map_ps2_pages(int enable)
13142 +{
13143 +       unsigned long pfn = 0;
13144 +
13145 +       pfn = memory_bm_next_pfn(pageset2_map);
13146 +
13147 +       while (pfn != BM_END_OF_MAP) {
13148 +               struct page *page = pfn_to_page(pfn);
13149 +               kernel_map_pages(page, 1, enable);
13150 +               pfn = memory_bm_next_pfn(pageset2_map);
13151 +       }
13152 +}
13153 +
13154 +/**
13155 + * do_save_image - save the image and handle the result
13156 + *
13157 + * Save the prepared image. If we fail or we're in the path returning
13158 + * from the atomic restore, cleanup.
13159 + **/
13160 +static int do_save_image(void)
13161 +{
13162 +       int result;
13163 +       map_ps2_pages(0);
13164 +       result = __save_image();
13165 +       map_ps2_pages(1);
13166 +       return result;
13167 +}
13168 +
13169 +/**
13170 + * do_prepare_image - try to prepare an image
13171 + *
13172 + * Seek to initialise and prepare an image to be saved. On failure,
13173 + * cleanup.
13174 + **/
13175 +static int do_prepare_image(void)
13176 +{
13177 +       int restarting = test_result_state(TOI_EXTRA_PAGES_ALLOW_TOO_SMALL);
13178 +
13179 +       if (!restarting && toi_activate_storage(0))
13180 +               return 1;
13181 +
13182 +       /*
13183 +        * If kept image and still keeping image and hibernating to RAM, we will
13184 +        * return 1 after hibernating and resuming (provided the power doesn't
13185 +        * run out. In that case, we skip directly to cleaning up and exiting.
13186 +        */
13187 +
13188 +       if (!can_hibernate() ||
13189 +           (test_result_state(TOI_KEPT_IMAGE) &&
13190 +            check_still_keeping_image()))
13191 +               return 1;
13192 +
13193 +       if (toi_init(restarting) && !toi_prepare_image() &&
13194 +                       !test_result_state(TOI_ABORTED))
13195 +               return 0;
13196 +
13197 +       trap_non_toi_io = 1;
13198 +
13199 +       return 1;
13200 +}
13201 +
13202 +/**
13203 + * do_check_can_resume - find out whether an image has been stored
13204 + *
13205 + * Read whether an image exists. We use the same routine as the
13206 + * image_exists sysfs entry, and just look to see whether the
13207 + * first character in the resulting buffer is a '1'.
13208 + **/
13209 +int do_check_can_resume(void)
13210 +{
13211 +       int result = -1;
13212 +
13213 +       if (toi_activate_storage(0))
13214 +               return -1;
13215 +
13216 +       if (!test_toi_state(TOI_RESUME_DEVICE_OK))
13217 +               toi_attempt_to_parse_resume_device(1);
13218 +
13219 +       if (toiActiveAllocator)
13220 +               result = toiActiveAllocator->image_exists(1);
13221 +
13222 +       toi_deactivate_storage(0);
13223 +       return result;
13224 +}
13225 +EXPORT_SYMBOL_GPL(do_check_can_resume);
13226 +
13227 +/**
13228 + * do_load_atomic_copy - load the first part of an image, if it exists
13229 + *
13230 + * Check whether we have an image. If one exists, do sanity checking
13231 + * (possibly invalidating the image or even rebooting if the user
13232 + * requests that) before loading it into memory in preparation for the
13233 + * atomic restore.
13234 + *
13235 + * If and only if we have an image loaded and ready to restore, we return 1.
13236 + **/
13237 +static int do_load_atomic_copy(void)
13238 +{
13239 +       int read_image_result = 0;
13240 +
13241 +       if (sizeof(swp_entry_t) != sizeof(long)) {
13242 +               printk(KERN_WARNING "TuxOnIce: The size of swp_entry_t != size"
13243 +                       " of long. Please report this!\n");
13244 +               return 1;
13245 +       }
13246 +
13247 +       if (!resume_file[0])
13248 +               printk(KERN_WARNING "TuxOnIce: "
13249 +                       "You need to use a resume= command line parameter to "
13250 +                       "tell TuxOnIce where to look for an image.\n");
13251 +
13252 +       toi_activate_storage(0);
13253 +
13254 +       if (!(test_toi_state(TOI_RESUME_DEVICE_OK)) &&
13255 +               !toi_attempt_to_parse_resume_device(0)) {
13256 +               /*
13257 +                * Without a usable storage device we can do nothing -
13258 +                * even if noresume is given
13259 +                */
13260 +
13261 +               if (!toiNumAllocators)
13262 +                       printk(KERN_ALERT "TuxOnIce: "
13263 +                         "No storage allocators have been registered.\n");
13264 +               else
13265 +                       printk(KERN_ALERT "TuxOnIce: "
13266 +                               "Missing or invalid storage location "
13267 +                               "(resume= parameter). Please correct and "
13268 +                               "rerun lilo (or equivalent) before "
13269 +                               "hibernating.\n");
13270 +               toi_deactivate_storage(0);
13271 +               return 1;
13272 +       }
13273 +
13274 +       if (allocate_bitmaps())
13275 +               return 1;
13276 +
13277 +       read_image_result = read_pageset1(); /* non fatal error ignored */
13278 +
13279 +       if (test_toi_state(TOI_NORESUME_SPECIFIED))
13280 +               clear_toi_state(TOI_NORESUME_SPECIFIED);
13281 +
13282 +       toi_deactivate_storage(0);
13283 +
13284 +       if (read_image_result)
13285 +               return 1;
13286 +
13287 +       return 0;
13288 +}
13289 +
13290 +/**
13291 + * prepare_restore_load_alt_image - save & restore alt image variables
13292 + *
13293 + * Save and restore the pageset1 maps, when loading an alternate image.
13294 + **/
13295 +static void prepare_restore_load_alt_image(int prepare)
13296 +{
13297 +       static struct memory_bitmap *pageset1_map_save, *pageset1_copy_map_save;
13298 +
13299 +       if (prepare) {
13300 +               pageset1_map_save = pageset1_map;
13301 +               pageset1_map = NULL;
13302 +               pageset1_copy_map_save = pageset1_copy_map;
13303 +               pageset1_copy_map = NULL;
13304 +               set_toi_state(TOI_LOADING_ALT_IMAGE);
13305 +               toi_reset_alt_image_pageset2_pfn();
13306 +       } else {
13307 +               memory_bm_free(pageset1_map, 0);
13308 +               pageset1_map = pageset1_map_save;
13309 +               memory_bm_free(pageset1_copy_map, 0);
13310 +               pageset1_copy_map = pageset1_copy_map_save;
13311 +               clear_toi_state(TOI_NOW_RESUMING);
13312 +               clear_toi_state(TOI_LOADING_ALT_IMAGE);
13313 +       }
13314 +}
13315 +
13316 +/**
13317 + * do_toi_step - perform a step in hibernating or resuming
13318 + *
13319 + * Perform a step in hibernating or resuming an image. This abstraction
13320 + * is in preparation for implementing cluster support, and perhaps replacing
13321 + * uswsusp too (haven't looked whether that's possible yet).
13322 + **/
13323 +int do_toi_step(int step)
13324 +{
13325 +       switch (step) {
13326 +       case STEP_HIBERNATE_PREPARE_IMAGE:
13327 +               return do_prepare_image();
13328 +       case STEP_HIBERNATE_SAVE_IMAGE:
13329 +               return do_save_image();
13330 +       case STEP_HIBERNATE_POWERDOWN:
13331 +               return do_post_image_write();
13332 +       case STEP_RESUME_CAN_RESUME:
13333 +               return do_check_can_resume();
13334 +       case STEP_RESUME_LOAD_PS1:
13335 +               return do_load_atomic_copy();
13336 +       case STEP_RESUME_DO_RESTORE:
13337 +               /*
13338 +                * If we succeed, this doesn't return.
13339 +                * Instead, we return from do_save_image() in the
13340 +                * hibernated kernel.
13341 +                */
13342 +               return toi_atomic_restore();
13343 +       case STEP_RESUME_ALT_IMAGE:
13344 +               printk(KERN_INFO "Trying to resume alternate image.\n");
13345 +               toi_in_hibernate = 0;
13346 +               save_restore_alt_param(SAVE, NOQUIET);
13347 +               prepare_restore_load_alt_image(1);
13348 +               if (!do_check_can_resume()) {
13349 +                       printk(KERN_INFO "Nothing to resume from.\n");
13350 +                       goto out;
13351 +               }
13352 +               if (!do_load_atomic_copy())
13353 +                       toi_atomic_restore();
13354 +
13355 +               printk(KERN_INFO "Failed to load image.\n");
13356 +out:
13357 +               prepare_restore_load_alt_image(0);
13358 +               save_restore_alt_param(RESTORE, NOQUIET);
13359 +               break;
13360 +       case STEP_CLEANUP:
13361 +               do_cleanup(1, 0);
13362 +               break;
13363 +       case STEP_QUIET_CLEANUP:
13364 +               do_cleanup(0, 0);
13365 +               break;
13366 +       }
13367 +
13368 +       return 0;
13369 +}
13370 +EXPORT_SYMBOL_GPL(do_toi_step);
13371 +
13372 +/* -- Functions for kickstarting a hibernate or resume --- */
13373 +
13374 +/**
13375 + * toi_try_resume - try to do the steps in resuming
13376 + *
13377 + * Check if we have an image and if so try to resume. Clear the status
13378 + * flags too.
13379 + **/
13380 +void toi_try_resume(void)
13381 +{
13382 +       set_toi_state(TOI_TRYING_TO_RESUME);
13383 +       resume_attempted = 1;
13384 +
13385 +       current->flags |= PF_MEMALLOC;
13386 +
13387 +       if (do_toi_step(STEP_RESUME_CAN_RESUME) &&
13388 +                       !do_toi_step(STEP_RESUME_LOAD_PS1))
13389 +               do_toi_step(STEP_RESUME_DO_RESTORE);
13390 +
13391 +       do_cleanup(0, 0);
13392 +
13393 +       current->flags &= ~PF_MEMALLOC;
13394 +
13395 +       clear_toi_state(TOI_IGNORE_LOGLEVEL);
13396 +       clear_toi_state(TOI_TRYING_TO_RESUME);
13397 +       clear_toi_state(TOI_NOW_RESUMING);
13398 +}
13399 +
13400 +/**
13401 + * toi_sys_power_disk_try_resume - wrapper calling toi_try_resume
13402 + *
13403 + * Wrapper for when __toi_try_resume is called from swsusp resume path,
13404 + * rather than from echo > /sys/power/tuxonice/do_resume.
13405 + **/
13406 +static void toi_sys_power_disk_try_resume(void)
13407 +{
13408 +       resume_attempted = 1;
13409 +
13410 +       /*
13411 +        * There's a comment in kernel/power/disk.c that indicates
13412 +        * we should be able to use mutex_lock_nested below. That
13413 +        * doesn't seem to cut it, though, so let's just turn lockdep
13414 +        * off for now.
13415 +        */
13416 +       lockdep_off();
13417 +
13418 +       if (toi_start_anything(SYSFS_RESUMING))
13419 +               goto out;
13420 +
13421 +       toi_try_resume();
13422 +
13423 +       /*
13424 +        * For initramfs, we have to clear the boot time
13425 +        * flag after trying to resume
13426 +        */
13427 +       clear_toi_state(TOI_BOOT_TIME);
13428 +
13429 +       toi_finish_anything(SYSFS_RESUMING);
13430 +out:
13431 +       lockdep_on();
13432 +}
13433 +
13434 +/**
13435 + * toi_try_hibernate - try to start a hibernation cycle
13436 + *
13437 + * Start a hibernation cycle, coming in from either
13438 + * echo > /sys/power/tuxonice/do_suspend
13439 + *
13440 + * or
13441 + *
13442 + * echo disk > /sys/power/state
13443 + *
13444 + * In the later case, we come in without pm_sem taken; in the
13445 + * former, it has been taken.
13446 + **/
13447 +int toi_try_hibernate(void)
13448 +{
13449 +       int result = 0, sys_power_disk = 0, retries = 0;
13450 +
13451 +       if (!mutex_is_locked(&tuxonice_in_use)) {
13452 +               /* Came in via /sys/power/disk */
13453 +               if (toi_start_anything(SYSFS_HIBERNATING))
13454 +                       return -EBUSY;
13455 +               sys_power_disk = 1;
13456 +       }
13457 +
13458 +       current->flags |= PF_MEMALLOC;
13459 +
13460 +       if (test_toi_state(TOI_CLUSTER_MODE)) {
13461 +               toi_initiate_cluster_hibernate();
13462 +               goto out;
13463 +       }
13464 +
13465 +prepare:
13466 +       result = do_toi_step(STEP_HIBERNATE_PREPARE_IMAGE);
13467 +
13468 +       if (result || test_action_state(TOI_FREEZER_TEST))
13469 +               goto out;
13470 +
13471 +       result = do_toi_step(STEP_HIBERNATE_SAVE_IMAGE);
13472 +
13473 +       if (test_result_state(TOI_EXTRA_PAGES_ALLOW_TOO_SMALL)) {
13474 +               if (retries < 2) {
13475 +                       do_cleanup(0, 1);
13476 +                       retries++;
13477 +                       clear_result_state(TOI_ABORTED);
13478 +                       extra_pd1_pages_allowance = extra_pd1_pages_used + 500;
13479 +                       printk(KERN_INFO "Automatically adjusting the extra"
13480 +                               " pages allowance to %ld and restarting.\n",
13481 +                               extra_pd1_pages_allowance);
13482 +                       goto prepare;
13483 +               }
13484 +
13485 +               printk(KERN_INFO "Adjusted extra pages allowance twice and "
13486 +                       "still couldn't hibernate successfully. Giving up.");
13487 +       }
13488 +
13489 +       /* This code runs at resume time too! */
13490 +       if (!result && toi_in_hibernate)
13491 +               result = do_toi_step(STEP_HIBERNATE_POWERDOWN);
13492 +out:
13493 +       do_cleanup(1, 0);
13494 +       current->flags &= ~PF_MEMALLOC;
13495 +
13496 +       if (sys_power_disk)
13497 +               toi_finish_anything(SYSFS_HIBERNATING);
13498 +
13499 +       return result;
13500 +}
13501 +
13502 +/*
13503 + * channel_no: If !0, -c <channel_no> is added to args (userui).
13504 + */
13505 +int toi_launch_userspace_program(char *command, int channel_no,
13506 +               enum umh_wait wait, int debug)
13507 +{
13508 +       int retval;
13509 +       static char *envp[] = {
13510 +                       "HOME=/",
13511 +                       "TERM=linux",
13512 +                       "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
13513 +                       NULL };
13514 +       static char *argv[] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
13515 +               };
13516 +       char *channel = NULL;
13517 +       int arg = 0, size;
13518 +       char test_read[255];
13519 +       char *orig_posn = command;
13520 +
13521 +       if (!strlen(orig_posn))
13522 +               return 1;
13523 +
13524 +       if (channel_no) {
13525 +               channel = toi_kzalloc(4, 6, GFP_KERNEL);
13526 +               if (!channel) {
13527 +                       printk(KERN_INFO "Failed to allocate memory in "
13528 +                               "preparing to launch userspace program.\n");
13529 +                       return 1;
13530 +               }
13531 +       }
13532 +
13533 +       /* Up to 6 args supported */
13534 +       while (arg < 6) {
13535 +               sscanf(orig_posn, "%s", test_read);
13536 +               size = strlen(test_read);
13537 +               if (!(size))
13538 +                       break;
13539 +               argv[arg] = toi_kzalloc(5, size + 1, TOI_ATOMIC_GFP);
13540 +               strcpy(argv[arg], test_read);
13541 +               orig_posn += size + 1;
13542 +               *test_read = 0;
13543 +               arg++;
13544 +       }
13545 +
13546 +       if (channel_no) {
13547 +               sprintf(channel, "-c%d", channel_no);
13548 +               argv[arg] = channel;
13549 +       } else
13550 +               arg--;
13551 +
13552 +       if (debug) {
13553 +               argv[++arg] = toi_kzalloc(5, 8, TOI_ATOMIC_GFP);
13554 +               strcpy(argv[arg], "--debug");
13555 +       }
13556 +
13557 +       retval = call_usermodehelper(argv[0], argv, envp, wait);
13558 +
13559 +       /*
13560 +        * If the program reports an error, retval = 256. Don't complain
13561 +        * about that here.
13562 +        */
13563 +       if (retval && retval != 256)
13564 +               printk(KERN_ERR "Failed to launch userspace program '%s': "
13565 +                               "Error %d\n", command, retval);
13566 +
13567 +       {
13568 +               int i;
13569 +               for (i = 0; i < arg; i++)
13570 +                       if (argv[i] && argv[i] != channel)
13571 +                               toi_kfree(5, argv[i], sizeof(*argv[i]));
13572 +       }
13573 +
13574 +       toi_kfree(4, channel, sizeof(*channel));
13575 +
13576 +       return retval;
13577 +}
13578 +
13579 +/*
13580 + * This array contains entries that are automatically registered at
13581 + * boot. Modules and the console code register their own entries separately.
13582 + */
13583 +static struct toi_sysfs_data sysfs_params[] = {
13584 +       SYSFS_INT("freezer_sync", SYSFS_RW, &freezer_sync, 0, 1, 0, NULL),
13585 +       SYSFS_LONG("extra_pages_allowance", SYSFS_RW,
13586 +                       &extra_pd1_pages_allowance, 0, LONG_MAX, 0),
13587 +       SYSFS_CUSTOM("image_exists", SYSFS_RW, image_exists_read,
13588 +                       image_exists_write, SYSFS_NEEDS_SM_FOR_BOTH, NULL),
13589 +       SYSFS_STRING("resume", SYSFS_RW, resume_file, 255,
13590 +                       SYSFS_NEEDS_SM_FOR_WRITE,
13591 +                       attempt_to_parse_resume_device2),
13592 +       SYSFS_STRING("alt_resume_param", SYSFS_RW, alt_resume_param, 255,
13593 +                       SYSFS_NEEDS_SM_FOR_WRITE,
13594 +                       attempt_to_parse_alt_resume_param),
13595 +       SYSFS_CUSTOM("debug_info", SYSFS_READONLY, get_toi_debug_info, NULL, 0,
13596 +                       NULL),
13597 +       SYSFS_BIT("ignore_rootfs", SYSFS_RW, &toi_bkd.toi_action,
13598 +                       TOI_IGNORE_ROOTFS, 0),
13599 +       SYSFS_LONG("image_size_limit", SYSFS_RW, &image_size_limit, -2,
13600 +                       INT_MAX, 0),
13601 +       SYSFS_UL("last_result", SYSFS_RW, &toi_result, 0, 0, 0),
13602 +       SYSFS_BIT("no_multithreaded_io", SYSFS_RW, &toi_bkd.toi_action,
13603 +                       TOI_NO_MULTITHREADED_IO, 0),
13604 +       SYSFS_BIT("no_flusher_thread", SYSFS_RW, &toi_bkd.toi_action,
13605 +                       TOI_NO_FLUSHER_THREAD, 0),
13606 +       SYSFS_BIT("full_pageset2", SYSFS_RW, &toi_bkd.toi_action,
13607 +                       TOI_PAGESET2_FULL, 0),
13608 +       SYSFS_BIT("reboot", SYSFS_RW, &toi_bkd.toi_action, TOI_REBOOT, 0),
13609 +       SYSFS_BIT("replace_swsusp", SYSFS_RW, &toi_bkd.toi_action,
13610 +                       TOI_REPLACE_SWSUSP, 0),
13611 +       SYSFS_STRING("resume_commandline", SYSFS_RW,
13612 +                       toi_bkd.toi_nosave_commandline, COMMAND_LINE_SIZE, 0,
13613 +                       NULL),
13614 +       SYSFS_STRING("version", SYSFS_READONLY, TOI_CORE_VERSION, 0, 0, NULL),
13615 +       SYSFS_BIT("freezer_test", SYSFS_RW, &toi_bkd.toi_action,
13616 +                       TOI_FREEZER_TEST, 0),
13617 +       SYSFS_BIT("test_bio", SYSFS_RW, &toi_bkd.toi_action, TOI_TEST_BIO, 0),
13618 +       SYSFS_BIT("test_filter_speed", SYSFS_RW, &toi_bkd.toi_action,
13619 +                       TOI_TEST_FILTER_SPEED, 0),
13620 +       SYSFS_BIT("no_pageset2", SYSFS_RW, &toi_bkd.toi_action,
13621 +                       TOI_NO_PAGESET2, 0),
13622 +       SYSFS_BIT("no_pageset2_if_unneeded", SYSFS_RW, &toi_bkd.toi_action,
13623 +                       TOI_NO_PS2_IF_UNNEEDED, 0),
13624 +       SYSFS_BIT("late_cpu_hotplug", SYSFS_RW, &toi_bkd.toi_action,
13625 +                       TOI_LATE_CPU_HOTPLUG, 0),
13626 +       SYSFS_STRING("binary_signature", SYSFS_READONLY,
13627 +                       tuxonice_signature, 9, 0, NULL),
13628 +       SYSFS_INT("max_workers", SYSFS_RW, &toi_max_workers, 0, NR_CPUS, 0,
13629 +                       NULL),
13630 +#ifdef CONFIG_TOI_KEEP_IMAGE
13631 +       SYSFS_BIT("keep_image", SYSFS_RW , &toi_bkd.toi_action, TOI_KEEP_IMAGE,
13632 +                       0),
13633 +#endif
13634 +};
13635 +
13636 +static struct toi_core_fns my_fns = {
13637 +       .get_nonconflicting_page = __toi_get_nonconflicting_page,
13638 +       .post_context_save = __toi_post_context_save,
13639 +       .try_hibernate = toi_try_hibernate,
13640 +       .try_resume = toi_sys_power_disk_try_resume,
13641 +};
13642 +
13643 +/**
13644 + * core_load - initialisation of TuxOnIce core
13645 + *
13646 + * Initialise the core, beginning with sysfs. Checksum and so on are part of
13647 + * the core, but have their own initialisation routines because they either
13648 + * aren't compiled in all the time or have their own subdirectories.
13649 + **/
13650 +static __init int core_load(void)
13651 +{
13652 +       int i,
13653 +           numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data);
13654 +
13655 +       printk(KERN_INFO "TuxOnIce " TOI_CORE_VERSION
13656 +                       " (http://tuxonice.net)\n");
13657 +
13658 +       if (toi_sysfs_init())
13659 +               return 1;
13660 +
13661 +       for (i = 0; i < numfiles; i++)
13662 +               toi_register_sysfs_file(tuxonice_kobj, &sysfs_params[i]);
13663 +
13664 +       toi_core_fns = &my_fns;
13665 +
13666 +       if (toi_alloc_init())
13667 +               return 1;
13668 +       if (toi_checksum_init())
13669 +               return 1;
13670 +       if (toi_usm_init())
13671 +               return 1;
13672 +       if (toi_ui_init())
13673 +               return 1;
13674 +       if (toi_poweroff_init())
13675 +               return 1;
13676 +       if (toi_cluster_init())
13677 +               return 1;
13678 +
13679 +       return 0;
13680 +}
13681 +
13682 +#ifdef MODULE
13683 +/**
13684 + * core_unload: Prepare to unload the core code.
13685 + **/
13686 +static __exit void core_unload(void)
13687 +{
13688 +       int i,
13689 +           numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data);
13690 +
13691 +       toi_alloc_exit();
13692 +       toi_checksum_exit();
13693 +       toi_poweroff_exit();
13694 +       toi_ui_exit();
13695 +       toi_usm_exit();
13696 +       toi_cluster_exit();
13697 +
13698 +       for (i = 0; i < numfiles; i++)
13699 +               toi_unregister_sysfs_file(tuxonice_kobj, &sysfs_params[i]);
13700 +
13701 +       toi_core_fns = NULL;
13702 +
13703 +       toi_sysfs_exit();
13704 +}
13705 +MODULE_LICENSE("GPL");
13706 +module_init(core_load);
13707 +module_exit(core_unload);
13708 +#else
13709 +late_initcall(core_load);
13710 +#endif
13711 diff --git a/kernel/power/tuxonice_io.c b/kernel/power/tuxonice_io.c
13712 new file mode 100644
13713 index 0000000..6030dc6
13714 --- /dev/null
13715 +++ b/kernel/power/tuxonice_io.c
13716 @@ -0,0 +1,1836 @@
13717 +/*
13718 + * kernel/power/tuxonice_io.c
13719 + *
13720 + * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
13721 + * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz>
13722 + * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr>
13723 + * Copyright (C) 2002-2010 Nigel Cunningham (nigel at tuxonice net)
13724 + *
13725 + * This file is released under the GPLv2.
13726 + *
13727 + * It contains high level IO routines for hibernating.
13728 + *
13729 + */
13730 +
13731 +#include <linux/suspend.h>
13732 +#include <linux/version.h>
13733 +#include <linux/utsname.h>
13734 +#include <linux/mount.h>
13735 +#include <linux/highmem.h>
13736 +#include <linux/kthread.h>
13737 +#include <linux/cpu.h>
13738 +#include <linux/fs_struct.h>
13739 +#include <linux/bio.h>
13740 +#include <linux/fs_uuid.h>
13741 +#include <asm/tlbflush.h>
13742 +
13743 +#include "tuxonice.h"
13744 +#include "tuxonice_modules.h"
13745 +#include "tuxonice_pageflags.h"
13746 +#include "tuxonice_io.h"
13747 +#include "tuxonice_ui.h"
13748 +#include "tuxonice_storage.h"
13749 +#include "tuxonice_prepare_image.h"
13750 +#include "tuxonice_extent.h"
13751 +#include "tuxonice_sysfs.h"
13752 +#include "tuxonice_builtin.h"
13753 +#include "tuxonice_checksum.h"
13754 +#include "tuxonice_alloc.h"
13755 +char alt_resume_param[256];
13756 +
13757 +/* Version read from image header at resume */
13758 +static int toi_image_header_version;
13759 +
13760 +#define read_if_version(VERS, VAR, DESC, ERR_ACT) do {                                 \
13761 +       if (likely(toi_image_header_version >= VERS))                           \
13762 +               if (toiActiveAllocator->rw_header_chunk(READ, NULL,             \
13763 +                                       (char *) &VAR, sizeof(VAR))) {          \
13764 +                       abort_hibernate(TOI_FAILED_IO, "Failed to read DESC."); \
13765 +                       ERR_ACT;                                        \
13766 +               }                                                               \
13767 +} while(0)                                                                     \
13768 +
13769 +/* Variables shared between threads and updated under the mutex */
13770 +static int io_write, io_finish_at, io_base, io_barmax, io_pageset, io_result;
13771 +static int io_index, io_nextupdate, io_pc, io_pc_step;
13772 +static DEFINE_MUTEX(io_mutex);
13773 +static DEFINE_PER_CPU(struct page *, last_sought);
13774 +static DEFINE_PER_CPU(struct page *, last_high_page);
13775 +static DEFINE_PER_CPU(char *, checksum_locn);
13776 +static DEFINE_PER_CPU(struct pbe *, last_low_page);
13777 +static atomic_t io_count;
13778 +atomic_t toi_io_workers;
13779 +EXPORT_SYMBOL_GPL(toi_io_workers);
13780 +
13781 +DECLARE_WAIT_QUEUE_HEAD(toi_io_queue_flusher);
13782 +EXPORT_SYMBOL_GPL(toi_io_queue_flusher);
13783 +
13784 +int toi_bio_queue_flusher_should_finish;
13785 +EXPORT_SYMBOL_GPL(toi_bio_queue_flusher_should_finish);
13786 +
13787 +/* Indicates that this thread should be used for checking throughput */
13788 +#define MONITOR ((void *) 1)
13789 +
13790 +int toi_max_workers;
13791 +
13792 +static char *image_version_error = "The image header version is newer than " \
13793 +       "this kernel supports.";
13794 +
13795 +/**
13796 + * toi_attempt_to_parse_resume_device - determine if we can hibernate
13797 + *
13798 + * Can we hibernate, using the current resume= parameter?
13799 + **/
13800 +int toi_attempt_to_parse_resume_device(int quiet)
13801 +{
13802 +       struct list_head *Allocator;
13803 +       struct toi_module_ops *thisAllocator;
13804 +       int result, returning = 0;
13805 +
13806 +       if (toi_activate_storage(0))
13807 +               return 0;
13808 +
13809 +       toiActiveAllocator = NULL;
13810 +       clear_toi_state(TOI_RESUME_DEVICE_OK);
13811 +       clear_toi_state(TOI_CAN_RESUME);
13812 +       clear_result_state(TOI_ABORTED);
13813 +
13814 +       if (!toiNumAllocators) {
13815 +               if (!quiet)
13816 +                       printk(KERN_INFO "TuxOnIce: No storage allocators have "
13817 +                               "been registered. Hibernating will be "
13818 +                               "disabled.\n");
13819 +               goto cleanup;
13820 +       }
13821 +
13822 +       list_for_each(Allocator, &toiAllocators) {
13823 +               thisAllocator = list_entry(Allocator, struct toi_module_ops,
13824 +                                                               type_list);
13825 +
13826 +               /*
13827 +                * Not sure why you'd want to disable an allocator, but
13828 +                * we should honour the flag if we're providing it
13829 +                */
13830 +               if (!thisAllocator->enabled)
13831 +                       continue;
13832 +
13833 +               result = thisAllocator->parse_sig_location(
13834 +                               resume_file, (toiNumAllocators == 1),
13835 +                               quiet);
13836 +
13837 +               switch (result) {
13838 +               case -EINVAL:
13839 +                       /* For this allocator, but not a valid
13840 +                        * configuration. Error already printed. */
13841 +                       goto cleanup;
13842 +
13843 +               case 0:
13844 +                       /* For this allocator and valid. */
13845 +                       toiActiveAllocator = thisAllocator;
13846 +
13847 +                       set_toi_state(TOI_RESUME_DEVICE_OK);
13848 +                       set_toi_state(TOI_CAN_RESUME);
13849 +                       returning = 1;
13850 +                       goto cleanup;
13851 +               }
13852 +       }
13853 +       if (!quiet)
13854 +               printk(KERN_INFO "TuxOnIce: No matching enabled allocator "
13855 +                               "found. Resuming disabled.\n");
13856 +cleanup:
13857 +       toi_deactivate_storage(0);
13858 +       return returning;
13859 +}
13860 +EXPORT_SYMBOL_GPL(toi_attempt_to_parse_resume_device);
13861 +
13862 +void attempt_to_parse_resume_device2(void)
13863 +{
13864 +       toi_prepare_usm();
13865 +       toi_attempt_to_parse_resume_device(0);
13866 +       toi_cleanup_usm();
13867 +}
13868 +EXPORT_SYMBOL_GPL(attempt_to_parse_resume_device2);
13869 +
13870 +void save_restore_alt_param(int replace, int quiet)
13871 +{
13872 +       static char resume_param_save[255];
13873 +       static unsigned long toi_state_save;
13874 +
13875 +       if (replace) {
13876 +               toi_state_save = toi_state;
13877 +               strcpy(resume_param_save, resume_file);
13878 +               strcpy(resume_file, alt_resume_param);
13879 +       } else {
13880 +               strcpy(resume_file, resume_param_save);
13881 +               toi_state = toi_state_save;
13882 +       }
13883 +       toi_attempt_to_parse_resume_device(quiet);
13884 +}
13885 +
13886 +void attempt_to_parse_alt_resume_param(void)
13887 +{
13888 +       int ok = 0;
13889 +
13890 +       /* Temporarily set resume_param to the poweroff value */
13891 +       if (!strlen(alt_resume_param))
13892 +               return;
13893 +
13894 +       printk(KERN_INFO "=== Trying Poweroff Resume2 ===\n");
13895 +       save_restore_alt_param(SAVE, NOQUIET);
13896 +       if (test_toi_state(TOI_CAN_RESUME))
13897 +               ok = 1;
13898 +
13899 +       printk(KERN_INFO "=== Done ===\n");
13900 +       save_restore_alt_param(RESTORE, QUIET);
13901 +
13902 +       /* If not ok, clear the string */
13903 +       if (ok)
13904 +               return;
13905 +
13906 +       printk(KERN_INFO "Can't resume from that location; clearing "
13907 +                       "alt_resume_param.\n");
13908 +       alt_resume_param[0] = '\0';
13909 +}
13910 +
13911 +/**
13912 + * noresume_reset_modules - reset data structures in case of non resuming
13913 + *
13914 + * When we read the start of an image, modules (and especially the
13915 + * active allocator) might need to reset data structures if we
13916 + * decide to remove the image rather than resuming from it.
13917 + **/
13918 +static void noresume_reset_modules(void)
13919 +{
13920 +       struct toi_module_ops *this_filter;
13921 +
13922 +       list_for_each_entry(this_filter, &toi_filters, type_list)
13923 +               if (this_filter->noresume_reset)
13924 +                       this_filter->noresume_reset();
13925 +
13926 +       if (toiActiveAllocator && toiActiveAllocator->noresume_reset)
13927 +               toiActiveAllocator->noresume_reset();
13928 +}
13929 +
13930 +/**
13931 + * fill_toi_header - fill the hibernate header structure
13932 + * @struct toi_header: Header data structure to be filled.
13933 + **/
13934 +static int fill_toi_header(struct toi_header *sh)
13935 +{
13936 +       int i, error;
13937 +
13938 +       error = init_header((struct swsusp_info *) sh);
13939 +       if (error)
13940 +               return error;
13941 +
13942 +       sh->pagedir = pagedir1;
13943 +       sh->pageset_2_size = pagedir2.size;
13944 +       sh->param0 = toi_result;
13945 +       sh->param1 = toi_bkd.toi_action;
13946 +       sh->param2 = toi_bkd.toi_debug_state;
13947 +       sh->param3 = toi_bkd.toi_default_console_level;
13948 +       sh->root_fs = current->fs->root.mnt->mnt_sb->s_dev;
13949 +       for (i = 0; i < 4; i++)
13950 +               sh->io_time[i/2][i%2] = toi_bkd.toi_io_time[i/2][i%2];
13951 +       sh->bkd = boot_kernel_data_buffer;
13952 +       return 0;
13953 +}
13954 +
13955 +/**
13956 + * rw_init_modules - initialize modules
13957 + * @rw:                Whether we are reading of writing an image.
13958 + * @which:     Section of the image being processed.
13959 + *
13960 + * Iterate over modules, preparing the ones that will be used to read or write
13961 + * data.
13962 + **/
13963 +static int rw_init_modules(int rw, int which)
13964 +{
13965 +       struct toi_module_ops *this_module;
13966 +       /* Initialise page transformers */
13967 +       list_for_each_entry(this_module, &toi_filters, type_list) {
13968 +               if (!this_module->enabled)
13969 +                       continue;
13970 +               if (this_module->rw_init && this_module->rw_init(rw, which)) {
13971 +                       abort_hibernate(TOI_FAILED_MODULE_INIT,
13972 +                               "Failed to initialize the %s filter.",
13973 +                               this_module->name);
13974 +                       return 1;
13975 +               }
13976 +       }
13977 +
13978 +       /* Initialise allocator */
13979 +       if (toiActiveAllocator->rw_init(rw, which)) {
13980 +               abort_hibernate(TOI_FAILED_MODULE_INIT,
13981 +                               "Failed to initialise the allocator.");
13982 +               return 1;
13983 +       }
13984 +
13985 +       /* Initialise other modules */
13986 +       list_for_each_entry(this_module, &toi_modules, module_list) {
13987 +               if (!this_module->enabled ||
13988 +                   this_module->type == FILTER_MODULE ||
13989 +                   this_module->type == WRITER_MODULE)
13990 +                       continue;
13991 +               if (this_module->rw_init && this_module->rw_init(rw, which)) {
13992 +                       set_abort_result(TOI_FAILED_MODULE_INIT);
13993 +                       printk(KERN_INFO "Setting aborted flag due to module "
13994 +                                       "init failure.\n");
13995 +                       return 1;
13996 +               }
13997 +       }
13998 +
13999 +       return 0;
14000 +}
14001 +
14002 +/**
14003 + * rw_cleanup_modules - cleanup modules
14004 + * @rw:        Whether we are reading of writing an image.
14005 + *
14006 + * Cleanup components after reading or writing a set of pages.
14007 + * Only the allocator may fail.
14008 + **/
14009 +static int rw_cleanup_modules(int rw)
14010 +{
14011 +       struct toi_module_ops *this_module;
14012 +       int result = 0;
14013 +
14014 +       /* Cleanup other modules */
14015 +       list_for_each_entry(this_module, &toi_modules, module_list) {
14016 +               if (!this_module->enabled ||
14017 +                   this_module->type == FILTER_MODULE ||
14018 +                   this_module->type == WRITER_MODULE)
14019 +                       continue;
14020 +               if (this_module->rw_cleanup)
14021 +                       result |= this_module->rw_cleanup(rw);
14022 +       }
14023 +
14024 +       /* Flush data and cleanup */
14025 +       list_for_each_entry(this_module, &toi_filters, type_list) {
14026 +               if (!this_module->enabled)
14027 +                       continue;
14028 +               if (this_module->rw_cleanup)
14029 +                       result |= this_module->rw_cleanup(rw);
14030 +       }
14031 +
14032 +       result |= toiActiveAllocator->rw_cleanup(rw);
14033 +
14034 +       return result;
14035 +}
14036 +
14037 +static struct page *copy_page_from_orig_page(struct page *orig_page)
14038 +{
14039 +       int is_high = PageHighMem(orig_page), index, min, max;
14040 +       struct page *high_page = NULL,
14041 +                   **my_last_high_page = &__get_cpu_var(last_high_page),
14042 +                   **my_last_sought = &__get_cpu_var(last_sought);
14043 +       struct pbe *this, **my_last_low_page = &__get_cpu_var(last_low_page);
14044 +       void *compare;
14045 +
14046 +       if (is_high) {
14047 +               if (*my_last_sought && *my_last_high_page &&
14048 +                               *my_last_sought < orig_page)
14049 +                       high_page = *my_last_high_page;
14050 +               else
14051 +                       high_page = (struct page *) restore_highmem_pblist;
14052 +               this = (struct pbe *) kmap(high_page);
14053 +               compare = orig_page;
14054 +       } else {
14055 +               if (*my_last_sought && *my_last_low_page &&
14056 +                               *my_last_sought < orig_page)
14057 +                       this = *my_last_low_page;
14058 +               else
14059 +                       this = restore_pblist;
14060 +               compare = page_address(orig_page);
14061 +       }
14062 +
14063 +       *my_last_sought = orig_page;
14064 +
14065 +       /* Locate page containing pbe */
14066 +       while (this[PBES_PER_PAGE - 1].next &&
14067 +                       this[PBES_PER_PAGE - 1].orig_address < compare) {
14068 +               if (is_high) {
14069 +                       struct page *next_high_page = (struct page *)
14070 +                               this[PBES_PER_PAGE - 1].next;
14071 +                       kunmap(high_page);
14072 +                       this = kmap(next_high_page);
14073 +                       high_page = next_high_page;
14074 +               } else
14075 +                       this = this[PBES_PER_PAGE - 1].next;
14076 +       }
14077 +
14078 +       /* Do a binary search within the page */
14079 +       min = 0;
14080 +       max = PBES_PER_PAGE;
14081 +       index = PBES_PER_PAGE / 2;
14082 +       while (max - min) {
14083 +               if (!this[index].orig_address ||
14084 +                   this[index].orig_address > compare)
14085 +                       max = index;
14086 +               else if (this[index].orig_address == compare) {
14087 +                       if (is_high) {
14088 +                               struct page *page = this[index].address;
14089 +                               *my_last_high_page = high_page;
14090 +                               kunmap(high_page);
14091 +                               return page;
14092 +                       }
14093 +                       *my_last_low_page = this;
14094 +                       return virt_to_page(this[index].address);
14095 +               } else
14096 +                       min = index;
14097 +               index = ((max + min) / 2);
14098 +       };
14099 +
14100 +       if (is_high)
14101 +               kunmap(high_page);
14102 +
14103 +       abort_hibernate(TOI_FAILED_IO, "Failed to get destination page for"
14104 +               " orig page %p. This[min].orig_address=%p.\n", orig_page,
14105 +               this[index].orig_address);
14106 +       return NULL;
14107 +}
14108 +
14109 +/**
14110 + * write_next_page - write the next page in a pageset
14111 + * @data_pfn: The pfn where the next data to write is located.
14112 + * @my_io_index: The index of the page in the pageset.
14113 + * @write_pfn: The pfn number to write in the image (where the data belongs).
14114 + * @first_filter: Where to send the page (optimisation).
14115 + *
14116 + * Get the pfn of the next page to write, map the page if necessary and do the
14117 + * write.
14118 + **/
14119 +static int write_next_page(unsigned long *data_pfn, int *my_io_index,
14120 +               unsigned long *write_pfn, struct toi_module_ops *first_filter)
14121 +{
14122 +       struct page *page;
14123 +       char **my_checksum_locn = &__get_cpu_var(checksum_locn);
14124 +       int result = 0, was_present;
14125 +
14126 +       *data_pfn = memory_bm_next_pfn(io_map);
14127 +
14128 +       /* Another thread could have beaten us to it. */
14129 +       if (*data_pfn == BM_END_OF_MAP) {
14130 +               if (atomic_read(&io_count)) {
14131 +                       printk(KERN_INFO "Ran out of pfns but io_count is "
14132 +                                       "still %d.\n", atomic_read(&io_count));
14133 +                       BUG();
14134 +               }
14135 +               mutex_unlock(&io_mutex);
14136 +               return -ENODATA;
14137 +       }
14138 +
14139 +       *my_io_index = io_finish_at - atomic_sub_return(1, &io_count);
14140 +
14141 +       memory_bm_clear_bit(io_map, *data_pfn);
14142 +       page = pfn_to_page(*data_pfn);
14143 +
14144 +       was_present = kernel_page_present(page);
14145 +       if (!was_present)
14146 +               kernel_map_pages(page, 1, 1);
14147 +
14148 +       if (io_pageset == 1)
14149 +               *write_pfn = memory_bm_next_pfn(pageset1_map);
14150 +       else {
14151 +               *write_pfn = *data_pfn;
14152 +               *my_checksum_locn = tuxonice_get_next_checksum();
14153 +       }
14154 +
14155 +       mutex_unlock(&io_mutex);
14156 +
14157 +       if (io_pageset == 2 && tuxonice_calc_checksum(page, *my_checksum_locn))
14158 +               return 1;
14159 +
14160 +       result = first_filter->write_page(*write_pfn, page, PAGE_SIZE);
14161 +
14162 +       if (!was_present)
14163 +               kernel_map_pages(page, 1, 0);
14164 +
14165 +       return result;
14166 +}
14167 +
14168 +/**
14169 + * read_next_page - read the next page in a pageset
14170 + * @my_io_index: The index of the page in the pageset.
14171 + * @write_pfn: The pfn in which the data belongs.
14172 + *
14173 + * Read a page of the image into our buffer. It can happen (here and in the
14174 + * write routine) that threads don't get run until after other CPUs have done
14175 + * all the work. This was the cause of the long standing issue with
14176 + * occasionally getting -ENODATA errors at the end of reading the image. We
14177 + * therefore need to check there's actually a page to read before trying to
14178 + * retrieve one.
14179 + **/
14180 +
14181 +static int read_next_page(int *my_io_index, unsigned long *write_pfn,
14182 +               struct page *buffer, struct toi_module_ops *first_filter)
14183 +{
14184 +       unsigned int buf_size = PAGE_SIZE;
14185 +       unsigned long left = atomic_read(&io_count);
14186 +
14187 +       if (left)
14188 +               *my_io_index = io_finish_at - atomic_sub_return(1, &io_count);
14189 +
14190 +       mutex_unlock(&io_mutex);
14191 +
14192 +       /*
14193 +        * Are we aborting? If so, don't submit any more I/O as
14194 +        * resetting the resume_attempted flag (from ui.c) will
14195 +        * clear the bdev flags, making this thread oops.
14196 +        */
14197 +       if (unlikely(test_toi_state(TOI_STOP_RESUME))) {
14198 +               atomic_dec(&toi_io_workers);
14199 +               if (!atomic_read(&toi_io_workers)) {
14200 +                       /*
14201 +                        * So we can be sure we'll have memory for
14202 +                        * marking that we haven't resumed.
14203 +                        */
14204 +                       rw_cleanup_modules(READ);
14205 +                       set_toi_state(TOI_IO_STOPPED);
14206 +               }
14207 +               while (1)
14208 +                       schedule();
14209 +       }
14210 +
14211 +       if (!left)
14212 +               return -ENODATA;
14213 +
14214 +       /*
14215 +        * See toi_bio_read_page in tuxonice_bio.c:
14216 +        * read the next page in the image.
14217 +        */
14218 +       return first_filter->read_page(write_pfn, buffer, &buf_size);
14219 +}
14220 +
14221 +static void use_read_page(unsigned long write_pfn, struct page *buffer)
14222 +{
14223 +       struct page *final_page = pfn_to_page(write_pfn),
14224 +                   *copy_page = final_page;
14225 +       char *virt, *buffer_virt;
14226 +
14227 +       if (io_pageset == 1 && !PagePageset1Copy(final_page)) {
14228 +               copy_page = copy_page_from_orig_page(final_page);
14229 +               BUG_ON(!copy_page);
14230 +       }
14231 +
14232 +       if (memory_bm_test_bit(io_map, write_pfn)) {
14233 +               int was_present;
14234 +
14235 +               virt = kmap(copy_page);
14236 +               buffer_virt = kmap(buffer);
14237 +               was_present = kernel_page_present(copy_page);
14238 +               if (!was_present)
14239 +                       kernel_map_pages(copy_page, 1, 1);
14240 +               memcpy(virt, buffer_virt, PAGE_SIZE);
14241 +               if (!was_present)
14242 +                       kernel_map_pages(copy_page, 1, 0);
14243 +               kunmap(copy_page);
14244 +               kunmap(buffer);
14245 +               memory_bm_clear_bit(io_map, write_pfn);
14246 +       } else {
14247 +               mutex_lock(&io_mutex);
14248 +               atomic_inc(&io_count);
14249 +               mutex_unlock(&io_mutex);
14250 +       }
14251 +}
14252 +
14253 +static unsigned long status_update(int writing, unsigned long done,
14254 +               unsigned long ticks)
14255 +{
14256 +       int cs_index = writing ? 0 : 1;
14257 +       unsigned long ticks_so_far = toi_bkd.toi_io_time[cs_index][1] + ticks;
14258 +       unsigned long msec = jiffies_to_msecs(abs(ticks_so_far));
14259 +       unsigned long pgs_per_s, estimate = 0, pages_left;
14260 +
14261 +       if (msec) {
14262 +               pages_left = io_barmax - done;
14263 +               pgs_per_s = 1000 * done / msec;
14264 +               if (pgs_per_s)
14265 +                       estimate = pages_left / pgs_per_s;
14266 +       }
14267 +
14268 +       if (estimate && ticks > HZ / 2)
14269 +               return toi_update_status(done, io_barmax,
14270 +                       " %d/%d MB (%lu sec left)",
14271 +                       MB(done+1), MB(io_barmax), estimate);
14272 +
14273 +       return toi_update_status(done, io_barmax, " %d/%d MB",
14274 +               MB(done+1), MB(io_barmax));
14275 +}
14276 +
14277 +/**
14278 + * worker_rw_loop - main loop to read/write pages
14279 + *
14280 + * The main I/O loop for reading or writing pages. The io_map bitmap is used to
14281 + * track the pages to read/write.
14282 + * If we are reading, the pages are loaded to their final (mapped) pfn.
14283 + **/
14284 +static int worker_rw_loop(void *data)
14285 +{
14286 +       unsigned long data_pfn, write_pfn, next_jiffies = jiffies + HZ / 4,
14287 +                     jif_index = 1, start_time = jiffies;
14288 +       int result = 0, my_io_index = 0, last_worker;
14289 +       struct toi_module_ops *first_filter = toi_get_next_filter(NULL);
14290 +       struct page *buffer = toi_alloc_page(28, TOI_ATOMIC_GFP);
14291 +
14292 +       current->flags |= PF_NOFREEZE;
14293 +
14294 +       mutex_lock(&io_mutex);
14295 +
14296 +       do {
14297 +               if (data && jiffies > next_jiffies) {
14298 +                       next_jiffies += HZ / 4;
14299 +                       if (toiActiveAllocator->update_throughput_throttle)
14300 +                               toiActiveAllocator->update_throughput_throttle(
14301 +                                               jif_index);
14302 +                       jif_index++;
14303 +               }
14304 +
14305 +               /*
14306 +                * What page to use? If reading, don't know yet which page's
14307 +                * data will be read, so always use the buffer. If writing,
14308 +                * use the copy (Pageset1) or original page (Pageset2), but
14309 +                * always write the pfn of the original page.
14310 +                */
14311 +               if (io_write)
14312 +                       result = write_next_page(&data_pfn, &my_io_index,
14313 +                                       &write_pfn, first_filter);
14314 +               else /* Reading */
14315 +                       result = read_next_page(&my_io_index, &write_pfn,
14316 +                                       buffer, first_filter);
14317 +
14318 +               if (result) {
14319 +                       mutex_lock(&io_mutex);
14320 +                       /* Nothing to do? */
14321 +                       if (result == -ENODATA)
14322 +                               break;
14323 +
14324 +                       io_result = result;
14325 +
14326 +                       if (io_write) {
14327 +                               printk(KERN_INFO "Write chunk returned %d.\n",
14328 +                                               result);
14329 +                               abort_hibernate(TOI_FAILED_IO,
14330 +                                       "Failed to write a chunk of the "
14331 +                                       "image.");
14332 +                               break;
14333 +                       }
14334 +
14335 +                       if (io_pageset == 1) {
14336 +                               printk(KERN_ERR "\nBreaking out of I/O loop "
14337 +                                       "because of result code %d.\n", result);
14338 +                               break;
14339 +                       }
14340 +                       panic("Read chunk returned (%d)", result);
14341 +               }
14342 +
14343 +               /*
14344 +                * Discard reads of resaved pages while reading ps2
14345 +                * and unwanted pages while rereading ps2 when aborting.
14346 +                */
14347 +               if (!io_write && !PageResave(pfn_to_page(write_pfn)))
14348 +                       use_read_page(write_pfn, buffer);
14349 +
14350 +               if (my_io_index + io_base == io_nextupdate)
14351 +                       io_nextupdate = status_update(io_write, my_io_index +
14352 +                                       io_base, jiffies - start_time);
14353 +
14354 +               if (my_io_index == io_pc) {
14355 +                       printk(KERN_CONT "...%d%%", 20 * io_pc_step);
14356 +                       io_pc_step++;
14357 +                       io_pc = io_finish_at * io_pc_step / 5;
14358 +               }
14359 +
14360 +               toi_cond_pause(0, NULL);
14361 +
14362 +               /*
14363 +                * Subtle: If there's less I/O still to be done than threads
14364 +                * running, quit. This stops us doing I/O beyond the end of
14365 +                * the image when reading.
14366 +                *
14367 +                * Possible race condition. Two threads could do the test at
14368 +                * the same time; one should exit and one should continue.
14369 +                * Therefore we take the mutex before comparing and exiting.
14370 +                */
14371 +
14372 +               mutex_lock(&io_mutex);
14373 +
14374 +       } while (atomic_read(&io_count) >= atomic_read(&toi_io_workers) &&
14375 +               !(io_write && test_result_state(TOI_ABORTED)));
14376 +
14377 +       last_worker = atomic_dec_and_test(&toi_io_workers);
14378 +       mutex_unlock(&io_mutex);
14379 +
14380 +       if (last_worker) {
14381 +               toi_bio_queue_flusher_should_finish = 1;
14382 +               wake_up(&toi_io_queue_flusher);
14383 +               result = toiActiveAllocator->finish_all_io();
14384 +               printk(KERN_CONT "\n");
14385 +       }
14386 +
14387 +       toi__free_page(28, buffer);
14388 +
14389 +       return result;
14390 +}
14391 +
14392 +static int start_other_threads(void)
14393 +{
14394 +       int cpu, num_started = 0;
14395 +       struct task_struct *p;
14396 +       int to_start = (toi_max_workers ? toi_max_workers : num_online_cpus()) - 1;
14397 +
14398 +       atomic_set(&toi_io_workers, to_start);
14399 +
14400 +       for_each_online_cpu(cpu) {
14401 +               if (num_started == to_start)
14402 +                       break;
14403 +
14404 +               if (cpu == smp_processor_id())
14405 +                       continue;
14406 +
14407 +               p = kthread_create(worker_rw_loop, num_started ? NULL : MONITOR,
14408 +                               "ktoi_io/%d", cpu);
14409 +               if (IS_ERR(p)) {
14410 +                       printk(KERN_ERR "ktoi_io for %i failed\n", cpu);
14411 +                       atomic_dec(&toi_io_workers);
14412 +                       continue;
14413 +               }
14414 +               kthread_bind(p, cpu);
14415 +               p->flags |= PF_MEMALLOC;
14416 +               wake_up_process(p);
14417 +               num_started++;
14418 +       }
14419 +
14420 +       return num_started;
14421 +}
14422 +
14423 +/**
14424 + * do_rw_loop - main highlevel function for reading or writing pages
14425 + *
14426 + * Create the io_map bitmap and call worker_rw_loop to perform I/O operations.
14427 + **/
14428 +static int do_rw_loop(int write, int finish_at, struct memory_bitmap *pageflags,
14429 +               int base, int barmax, int pageset)
14430 +{
14431 +       int index = 0, cpu, num_other_threads = 0, result = 0;
14432 +       unsigned long pfn;
14433 +
14434 +       if (!finish_at)
14435 +               return 0;
14436 +
14437 +       io_write = write;
14438 +       io_finish_at = finish_at;
14439 +       io_base = base;
14440 +       io_barmax = barmax;
14441 +       io_pageset = pageset;
14442 +       io_index = 0;
14443 +       io_pc = io_finish_at / 5;
14444 +       io_pc_step = 1;
14445 +       io_result = 0;
14446 +       io_nextupdate = base + 1;
14447 +       toi_bio_queue_flusher_should_finish = 0;
14448 +
14449 +       for_each_online_cpu(cpu) {
14450 +               per_cpu(last_sought, cpu) = NULL;
14451 +               per_cpu(last_low_page, cpu) = NULL;
14452 +               per_cpu(last_high_page, cpu) = NULL;
14453 +       }
14454 +
14455 +       /* Ensure all bits clear */
14456 +       memory_bm_clear(io_map);
14457 +
14458 +       /* Set the bits for the pages to write */
14459 +       memory_bm_position_reset(pageflags);
14460 +
14461 +       pfn = memory_bm_next_pfn(pageflags);
14462 +
14463 +       while (pfn != BM_END_OF_MAP && index < finish_at) {
14464 +               memory_bm_set_bit(io_map, pfn);
14465 +               pfn = memory_bm_next_pfn(pageflags);
14466 +               index++;
14467 +       }
14468 +
14469 +       BUG_ON(index < finish_at);
14470 +
14471 +       atomic_set(&io_count, finish_at);
14472 +
14473 +       memory_bm_position_reset(pageset1_map);
14474 +
14475 +       clear_toi_state(TOI_IO_STOPPED);
14476 +       memory_bm_position_reset(io_map);
14477 +
14478 +       if (!test_action_state(TOI_NO_MULTITHREADED_IO) &&
14479 +               (write || !toi_force_no_multithreaded))
14480 +               num_other_threads = start_other_threads();
14481 +
14482 +       if (!num_other_threads || !toiActiveAllocator->io_flusher ||
14483 +               test_action_state(TOI_NO_FLUSHER_THREAD)) {
14484 +               atomic_inc(&toi_io_workers);
14485 +               worker_rw_loop(num_other_threads ? NULL : MONITOR);
14486 +       } else
14487 +               result = toiActiveAllocator->io_flusher(write);
14488 +
14489 +       while (atomic_read(&toi_io_workers))
14490 +               schedule();
14491 +
14492 +       if (unlikely(test_toi_state(TOI_STOP_RESUME))) {
14493 +               if (!atomic_read(&toi_io_workers)) {
14494 +                       rw_cleanup_modules(READ);
14495 +                       set_toi_state(TOI_IO_STOPPED);
14496 +               }
14497 +               while (1)
14498 +                       schedule();
14499 +       }
14500 +       set_toi_state(TOI_IO_STOPPED);
14501 +
14502 +       if (!io_result && !result && !test_result_state(TOI_ABORTED)) {
14503 +               unsigned long next;
14504 +
14505 +               toi_update_status(io_base + io_finish_at, io_barmax,
14506 +                               " %d/%d MB ",
14507 +                               MB(io_base + io_finish_at), MB(io_barmax));
14508 +
14509 +               memory_bm_position_reset(io_map);
14510 +               next = memory_bm_next_pfn(io_map);
14511 +               if  (next != BM_END_OF_MAP) {
14512 +                       printk(KERN_INFO "Finished I/O loop but still work to "
14513 +                                       "do?\nFinish at = %d. io_count = %d.\n",
14514 +                                       finish_at, atomic_read(&io_count));
14515 +                       printk(KERN_INFO "I/O bitmap still records work to do."
14516 +                                       "%ld.\n", next);
14517 +                       do {
14518 +                               cpu_relax();
14519 +                       } while (0);
14520 +               }
14521 +       }
14522 +
14523 +       return io_result ? io_result : result;
14524 +}
14525 +
14526 +/**
14527 + * write_pageset - write a pageset to disk.
14528 + * @pagedir:   Which pagedir to write.
14529 + *
14530 + * Returns:
14531 + *     Zero on success or -1 on failure.
14532 + **/
14533 +int write_pageset(struct pagedir *pagedir)
14534 +{
14535 +       int finish_at, base = 0;
14536 +       int barmax = pagedir1.size + pagedir2.size;
14537 +       long error = 0;
14538 +       struct memory_bitmap *pageflags;
14539 +       unsigned long start_time, end_time;
14540 +
14541 +       /*
14542 +        * Even if there is nothing to read or write, the allocator
14543 +        * may need the init/cleanup for it's housekeeping.  (eg:
14544 +        * Pageset1 may start where pageset2 ends when writing).
14545 +        */
14546 +       finish_at = pagedir->size;
14547 +
14548 +       if (pagedir->id == 1) {
14549 +               toi_prepare_status(DONT_CLEAR_BAR,
14550 +                               "Writing kernel & process data...");
14551 +               base = pagedir2.size;
14552 +               if (test_action_state(TOI_TEST_FILTER_SPEED) ||
14553 +                   test_action_state(TOI_TEST_BIO))
14554 +                       pageflags = pageset1_map;
14555 +               else
14556 +                       pageflags = pageset1_copy_map;
14557 +       } else {
14558 +               toi_prepare_status(DONT_CLEAR_BAR, "Writing caches...");
14559 +               pageflags = pageset2_map;
14560 +       }
14561 +
14562 +       start_time = jiffies;
14563 +
14564 +       if (rw_init_modules(1, pagedir->id)) {
14565 +               abort_hibernate(TOI_FAILED_MODULE_INIT,
14566 +                               "Failed to initialise modules for writing.");
14567 +               error = 1;
14568 +       }
14569 +
14570 +       if (!error)
14571 +               error = do_rw_loop(1, finish_at, pageflags, base, barmax,
14572 +                               pagedir->id);
14573 +
14574 +       if (rw_cleanup_modules(WRITE) && !error) {
14575 +               abort_hibernate(TOI_FAILED_MODULE_CLEANUP,
14576 +                               "Failed to cleanup after writing.");
14577 +               error = 1;
14578 +       }
14579 +
14580 +       end_time = jiffies;
14581 +
14582 +       if ((end_time - start_time) && (!test_result_state(TOI_ABORTED))) {
14583 +               toi_bkd.toi_io_time[0][0] += finish_at,
14584 +               toi_bkd.toi_io_time[0][1] += (end_time - start_time);
14585 +       }
14586 +
14587 +       return error;
14588 +}
14589 +
14590 +/**
14591 + * read_pageset - highlevel function to read a pageset from disk
14592 + * @pagedir:                   pageset to read
14593 + * @overwrittenpagesonly:      Whether to read the whole pageset or
14594 + *                             only part of it.
14595 + *
14596 + * Returns:
14597 + *     Zero on success or -1 on failure.
14598 + **/
14599 +static int read_pageset(struct pagedir *pagedir, int overwrittenpagesonly)
14600 +{
14601 +       int result = 0, base = 0;
14602 +       int finish_at = pagedir->size;
14603 +       int barmax = pagedir1.size + pagedir2.size;
14604 +       struct memory_bitmap *pageflags;
14605 +       unsigned long start_time, end_time;
14606 +
14607 +       if (pagedir->id == 1) {
14608 +               toi_prepare_status(DONT_CLEAR_BAR,
14609 +                               "Reading kernel & process data...");
14610 +               pageflags = pageset1_map;
14611 +       } else {
14612 +               toi_prepare_status(DONT_CLEAR_BAR, "Reading caches...");
14613 +               if (overwrittenpagesonly) {
14614 +                       barmax = min(pagedir1.size, pagedir2.size);
14615 +                       finish_at = min(pagedir1.size, pagedir2.size);
14616 +               } else
14617 +                       base = pagedir1.size;
14618 +               pageflags = pageset2_map;
14619 +       }
14620 +
14621 +       start_time = jiffies;
14622 +
14623 +       if (rw_init_modules(0, pagedir->id)) {
14624 +               toiActiveAllocator->remove_image();
14625 +               result = 1;
14626 +       } else
14627 +               result = do_rw_loop(0, finish_at, pageflags, base, barmax,
14628 +                               pagedir->id);
14629 +
14630 +       if (rw_cleanup_modules(READ) && !result) {
14631 +               abort_hibernate(TOI_FAILED_MODULE_CLEANUP,
14632 +                               "Failed to cleanup after reading.");
14633 +               result = 1;
14634 +       }
14635 +
14636 +       /* Statistics */
14637 +       end_time = jiffies;
14638 +
14639 +       if ((end_time - start_time) && (!test_result_state(TOI_ABORTED))) {
14640 +               toi_bkd.toi_io_time[1][0] += finish_at,
14641 +               toi_bkd.toi_io_time[1][1] += (end_time - start_time);
14642 +       }
14643 +
14644 +       return result;
14645 +}
14646 +
14647 +/**
14648 + * write_module_configs - store the modules configuration
14649 + *
14650 + * The configuration for each module is stored in the image header.
14651 + * Returns: Int
14652 + *     Zero on success, Error value otherwise.
14653 + **/
14654 +static int write_module_configs(void)
14655 +{
14656 +       struct toi_module_ops *this_module;
14657 +       char *buffer = (char *) toi_get_zeroed_page(22, TOI_ATOMIC_GFP);
14658 +       int len, index = 1;
14659 +       struct toi_module_header toi_module_header;
14660 +
14661 +       if (!buffer) {
14662 +               printk(KERN_INFO "Failed to allocate a buffer for saving "
14663 +                               "module configuration info.\n");
14664 +               return -ENOMEM;
14665 +       }
14666 +
14667 +       /*
14668 +        * We have to know which data goes with which module, so we at
14669 +        * least write a length of zero for a module. Note that we are
14670 +        * also assuming every module's config data takes <= PAGE_SIZE.
14671 +        */
14672 +
14673 +       /* For each module (in registration order) */
14674 +       list_for_each_entry(this_module, &toi_modules, module_list) {
14675 +               if (!this_module->enabled || !this_module->storage_needed ||
14676 +                   (this_module->type == WRITER_MODULE &&
14677 +                    toiActiveAllocator != this_module))
14678 +                       continue;
14679 +
14680 +               /* Get the data from the module */
14681 +               len = 0;
14682 +               if (this_module->save_config_info)
14683 +                       len = this_module->save_config_info(buffer);
14684 +
14685 +               /* Save the details of the module */
14686 +               toi_module_header.enabled = this_module->enabled;
14687 +               toi_module_header.type = this_module->type;
14688 +               toi_module_header.index = index++;
14689 +               strncpy(toi_module_header.name, this_module->name,
14690 +                                       sizeof(toi_module_header.name));
14691 +               toiActiveAllocator->rw_header_chunk(WRITE,
14692 +                               this_module,
14693 +                               (char *) &toi_module_header,
14694 +                               sizeof(toi_module_header));
14695 +
14696 +               /* Save the size of the data and any data returned */
14697 +               toiActiveAllocator->rw_header_chunk(WRITE,
14698 +                               this_module,
14699 +                               (char *) &len, sizeof(int));
14700 +               if (len)
14701 +                       toiActiveAllocator->rw_header_chunk(
14702 +                               WRITE, this_module, buffer, len);
14703 +       }
14704 +
14705 +       /* Write a blank header to terminate the list */
14706 +       toi_module_header.name[0] = '\0';
14707 +       toiActiveAllocator->rw_header_chunk(WRITE, NULL,
14708 +                       (char *) &toi_module_header, sizeof(toi_module_header));
14709 +
14710 +       toi_free_page(22, (unsigned long) buffer);
14711 +       return 0;
14712 +}
14713 +
14714 +/**
14715 + * read_one_module_config - read and configure one module
14716 + *
14717 + * Read the configuration for one module, and configure the module
14718 + * to match if it is loaded.
14719 + *
14720 + * Returns: Int
14721 + *     Zero on success, Error value otherwise.
14722 + **/
14723 +static int read_one_module_config(struct toi_module_header *header)
14724 +{
14725 +       struct toi_module_ops *this_module;
14726 +       int result, len;
14727 +       char *buffer;
14728 +
14729 +       /* Find the module */
14730 +       this_module = toi_find_module_given_name(header->name);
14731 +
14732 +       if (!this_module) {
14733 +               if (header->enabled) {
14734 +                       toi_early_boot_message(1, TOI_CONTINUE_REQ,
14735 +                               "It looks like we need module %s for reading "
14736 +                               "the image but it hasn't been registered.\n",
14737 +                               header->name);
14738 +                       if (!(test_toi_state(TOI_CONTINUE_REQ)))
14739 +                               return -EINVAL;
14740 +               } else
14741 +                       printk(KERN_INFO "Module %s configuration data found, "
14742 +                               "but the module hasn't registered. Looks like "
14743 +                               "it was disabled, so we're ignoring its data.",
14744 +                               header->name);
14745 +       }
14746 +
14747 +       /* Get the length of the data (if any) */
14748 +       result = toiActiveAllocator->rw_header_chunk(READ, NULL, (char *) &len,
14749 +                       sizeof(int));
14750 +       if (result) {
14751 +               printk(KERN_ERR "Failed to read the length of the module %s's"
14752 +                               " configuration data.\n",
14753 +                               header->name);
14754 +               return -EINVAL;
14755 +       }
14756 +
14757 +       /* Read any data and pass to the module (if we found one) */
14758 +       if (!len)
14759 +               return 0;
14760 +
14761 +       buffer = (char *) toi_get_zeroed_page(23, TOI_ATOMIC_GFP);
14762 +
14763 +       if (!buffer) {
14764 +               printk(KERN_ERR "Failed to allocate a buffer for reloading "
14765 +                               "module configuration info.\n");
14766 +               return -ENOMEM;
14767 +       }
14768 +
14769 +       toiActiveAllocator->rw_header_chunk(READ, NULL, buffer, len);
14770 +
14771 +       if (!this_module)
14772 +               goto out;
14773 +
14774 +       if (!this_module->save_config_info)
14775 +               printk(KERN_ERR "Huh? Module %s appears to have a "
14776 +                               "save_config_info, but not a load_config_info "
14777 +                               "function!\n", this_module->name);
14778 +       else
14779 +               this_module->load_config_info(buffer, len);
14780 +
14781 +       /*
14782 +        * Now move this module to the tail of its lists. This will put it in
14783 +        * order. Any new modules will end up at the top of the lists. They
14784 +        * should have been set to disabled when loaded (people will
14785 +        * normally not edit an initrd to load a new module and then hibernate
14786 +        * without using it!).
14787 +        */
14788 +
14789 +       toi_move_module_tail(this_module);
14790 +
14791 +       this_module->enabled = header->enabled;
14792 +
14793 +out:
14794 +       toi_free_page(23, (unsigned long) buffer);
14795 +       return 0;
14796 +}
14797 +
14798 +/**
14799 + * read_module_configs - reload module configurations from the image header.
14800 + *
14801 + * Returns: Int
14802 + *     Zero on success or an error code.
14803 + **/
14804 +static int read_module_configs(void)
14805 +{
14806 +       int result = 0;
14807 +       struct toi_module_header toi_module_header;
14808 +       struct toi_module_ops *this_module;
14809 +
14810 +       /* All modules are initially disabled. That way, if we have a module
14811 +        * loaded now that wasn't loaded when we hibernated, it won't be used
14812 +        * in trying to read the data.
14813 +        */
14814 +       list_for_each_entry(this_module, &toi_modules, module_list)
14815 +               this_module->enabled = 0;
14816 +
14817 +       /* Get the first module header */
14818 +       result = toiActiveAllocator->rw_header_chunk(READ, NULL,
14819 +                       (char *) &toi_module_header,
14820 +                       sizeof(toi_module_header));
14821 +       if (result) {
14822 +               printk(KERN_ERR "Failed to read the next module header.\n");
14823 +               return -EINVAL;
14824 +       }
14825 +
14826 +       /* For each module (in registration order) */
14827 +       while (toi_module_header.name[0]) {
14828 +               result = read_one_module_config(&toi_module_header);
14829 +
14830 +               if (result)
14831 +                       return -EINVAL;
14832 +
14833 +               /* Get the next module header */
14834 +               result = toiActiveAllocator->rw_header_chunk(READ, NULL,
14835 +                               (char *) &toi_module_header,
14836 +                               sizeof(toi_module_header));
14837 +
14838 +               if (result) {
14839 +                       printk(KERN_ERR "Failed to read the next module "
14840 +                                       "header.\n");
14841 +                       return -EINVAL;
14842 +               }
14843 +       }
14844 +
14845 +       return 0;
14846 +}
14847 +
14848 +static inline int save_fs_info(struct fs_info *fs, struct block_device *bdev)
14849 +{
14850 +       return (!fs || IS_ERR(fs) || !fs->last_mount_size) ? 0 : 1;
14851 +}
14852 +
14853 +int fs_info_space_needed(void)
14854 +{
14855 +       const struct super_block *sb;
14856 +       int result = sizeof(int);
14857 +
14858 +       list_for_each_entry(sb, &super_blocks, s_list) {
14859 +               struct fs_info *fs;
14860 +
14861 +               if (!sb->s_bdev)
14862 +                       continue;
14863 +
14864 +               fs = fs_info_from_block_dev(sb->s_bdev);
14865 +               if (save_fs_info(fs, sb->s_bdev))
14866 +                       result += 16 + sizeof(dev_t) + sizeof(int) +
14867 +                               fs->last_mount_size;
14868 +               free_fs_info(fs);
14869 +       }
14870 +       return result;
14871 +}
14872 +
14873 +static int fs_info_num_to_save(void)
14874 +{
14875 +       const struct super_block *sb;
14876 +       int to_save = 0;
14877 +
14878 +       list_for_each_entry(sb, &super_blocks, s_list) {
14879 +               struct fs_info *fs;
14880 +
14881 +               if (!sb->s_bdev)
14882 +                       continue;
14883 +
14884 +               fs = fs_info_from_block_dev(sb->s_bdev);
14885 +               if (save_fs_info(fs, sb->s_bdev))
14886 +                       to_save++;
14887 +               free_fs_info(fs);
14888 +       }
14889 +
14890 +       return to_save;
14891 +}
14892 +
14893 +static int fs_info_save(void)
14894 +{
14895 +       const struct super_block *sb;
14896 +       int to_save = fs_info_num_to_save();
14897 +
14898 +       if (toiActiveAllocator->rw_header_chunk(WRITE, NULL, (char *) &to_save,
14899 +                               sizeof(int))) {
14900 +               abort_hibernate(TOI_FAILED_IO, "Failed to write num fs_info"
14901 +                               " to save.");
14902 +               return -EIO;
14903 +       }
14904 +
14905 +       list_for_each_entry(sb, &super_blocks, s_list) {
14906 +               struct fs_info *fs;
14907 +
14908 +               if (!sb->s_bdev)
14909 +                       continue;
14910 +
14911 +               fs = fs_info_from_block_dev(sb->s_bdev);
14912 +               if (save_fs_info(fs, sb->s_bdev)) {
14913 +                       if (toiActiveAllocator->rw_header_chunk(WRITE, NULL,
14914 +                                       &fs->uuid[0], 16)) {
14915 +                               abort_hibernate(TOI_FAILED_IO, "Failed to "
14916 +                                               "write uuid.");
14917 +                               return -EIO;
14918 +                       }
14919 +                       if (toiActiveAllocator->rw_header_chunk(WRITE, NULL,
14920 +                                       (char *) &fs->dev_t, sizeof(dev_t))) {
14921 +                               abort_hibernate(TOI_FAILED_IO, "Failed to "
14922 +                                               "write dev_t.");
14923 +                               return -EIO;
14924 +                       }
14925 +                       if (toiActiveAllocator->rw_header_chunk(WRITE, NULL,
14926 +                                       (char *) &fs->last_mount_size, sizeof(int))) {
14927 +                               abort_hibernate(TOI_FAILED_IO, "Failed to "
14928 +                                               "write last mount length.");
14929 +                               return -EIO;
14930 +                       }
14931 +                       if (toiActiveAllocator->rw_header_chunk(WRITE, NULL,
14932 +                                       fs->last_mount, fs->last_mount_size)) {
14933 +                               abort_hibernate(TOI_FAILED_IO, "Failed to "
14934 +                                               "write uuid.");
14935 +                               return -EIO;
14936 +                       }
14937 +               }
14938 +               free_fs_info(fs);
14939 +       }
14940 +       return 0;
14941 +}
14942 +
14943 +static int fs_info_load_and_check_one(void)
14944 +{
14945 +       char uuid[16], *last_mount;
14946 +       int result = 0, ln;
14947 +       dev_t dev_t;
14948 +       struct block_device *dev;
14949 +       struct fs_info *fs_info, seek;
14950 +
14951 +       if (toiActiveAllocator->rw_header_chunk(READ, NULL, uuid, 16)) {
14952 +               abort_hibernate(TOI_FAILED_IO, "Failed to read uuid.");
14953 +               return -EIO;
14954 +       }
14955 +
14956 +       read_if_version(3, dev_t, "uuid dev_t field", return -EIO);
14957 +
14958 +       if (toiActiveAllocator->rw_header_chunk(READ, NULL, (char *) &ln,
14959 +                               sizeof(int))) {
14960 +               abort_hibernate(TOI_FAILED_IO,
14961 +                               "Failed to read last mount size.");
14962 +               return -EIO;
14963 +       }
14964 +
14965 +       last_mount = kzalloc(ln, GFP_KERNEL);
14966 +
14967 +       if (!last_mount)
14968 +               return -ENOMEM;
14969 +
14970 +       if (toiActiveAllocator->rw_header_chunk(READ, NULL, last_mount, ln)) {
14971 +               abort_hibernate(TOI_FAILED_IO,
14972 +                               "Failed to read last mount timestamp.");
14973 +               result = -EIO;
14974 +               goto out_lmt;
14975 +       }
14976 +
14977 +       strncpy((char *) &seek.uuid, uuid, 16);
14978 +       seek.dev_t = dev_t;
14979 +       seek.last_mount_size = ln;
14980 +       seek.last_mount = last_mount;
14981 +       dev_t = blk_lookup_fs_info(&seek);
14982 +       if (!dev_t)
14983 +               goto out_lmt;
14984 +
14985 +       dev = toi_open_by_devnum(dev_t);
14986 +
14987 +       fs_info = fs_info_from_block_dev(dev);
14988 +       if (fs_info && !IS_ERR(fs_info)) {
14989 +               if (ln != fs_info->last_mount_size) {
14990 +                       printk(KERN_EMERG "Found matching uuid but last mount "
14991 +                                       "time lengths differ?! "
14992 +                                       "(%d vs %d).\n", ln,
14993 +                                       fs_info->last_mount_size);
14994 +                       result = -EINVAL;
14995 +               } else {
14996 +                       char buf[BDEVNAME_SIZE];
14997 +                       result = !!memcmp(fs_info->last_mount, last_mount, ln);
14998 +                       if (result)
14999 +                               printk(KERN_EMERG "Last mount time for %s has "
15000 +                                       "changed!\n", bdevname(dev, buf));
15001 +               }
15002 +       }
15003 +       toi_close_bdev(dev);
15004 +       free_fs_info(fs_info);
15005 +out_lmt:
15006 +       kfree(last_mount);
15007 +       return result;
15008 +}
15009 +
15010 +static int fs_info_load_and_check(void)
15011 +{
15012 +       int to_do, result = 0;
15013 +
15014 +       if (toiActiveAllocator->rw_header_chunk(READ, NULL, (char *) &to_do,
15015 +                               sizeof(int))) {
15016 +               abort_hibernate(TOI_FAILED_IO, "Failed to read num fs_info "
15017 +                               "to load.");
15018 +               return -EIO;
15019 +       }
15020 +
15021 +       while(to_do--)
15022 +               result |= fs_info_load_and_check_one();
15023 +
15024 +       return result;
15025 +}
15026 +
15027 +/**
15028 + * write_image_header - write the image header after write the image proper
15029 + *
15030 + * Returns: Int
15031 + *     Zero on success, error value otherwise.
15032 + **/
15033 +int write_image_header(void)
15034 +{
15035 +       int ret;
15036 +       int total = pagedir1.size + pagedir2.size+2;
15037 +       char *header_buffer = NULL;
15038 +
15039 +       /* Now prepare to write the header */
15040 +       ret = toiActiveAllocator->write_header_init();
15041 +       if (ret) {
15042 +               abort_hibernate(TOI_FAILED_MODULE_INIT,
15043 +                               "Active allocator's write_header_init"
15044 +                               " function failed.");
15045 +               goto write_image_header_abort;
15046 +       }
15047 +
15048 +       /* Get a buffer */
15049 +       header_buffer = (char *) toi_get_zeroed_page(24, TOI_ATOMIC_GFP);
15050 +       if (!header_buffer) {
15051 +               abort_hibernate(TOI_OUT_OF_MEMORY,
15052 +                       "Out of memory when trying to get page for header!");
15053 +               goto write_image_header_abort;
15054 +       }
15055 +
15056 +       /* Write hibernate header */
15057 +       if (fill_toi_header((struct toi_header *) header_buffer)) {
15058 +               abort_hibernate(TOI_OUT_OF_MEMORY,
15059 +                       "Failure to fill header information!");
15060 +               goto write_image_header_abort;
15061 +       }
15062 +
15063 +       if (toiActiveAllocator->rw_header_chunk(WRITE, NULL,
15064 +                       header_buffer, sizeof(struct toi_header))) {
15065 +               abort_hibernate(TOI_OUT_OF_MEMORY,
15066 +                       "Failure to write header info.");
15067 +               goto write_image_header_abort;
15068 +       }
15069 +
15070 +       if (toiActiveAllocator->rw_header_chunk(WRITE, NULL,
15071 +                       (char *) &toi_max_workers, sizeof(toi_max_workers))) {
15072 +               abort_hibernate(TOI_OUT_OF_MEMORY,
15073 +                       "Failure to number of workers to use.");
15074 +               goto write_image_header_abort;
15075 +       }
15076 +
15077 +       /* Write filesystem info */
15078 +       if (fs_info_save())
15079 +               goto write_image_header_abort;
15080 +
15081 +       /* Write module configurations */
15082 +       ret = write_module_configs();
15083 +       if (ret) {
15084 +               abort_hibernate(TOI_FAILED_IO,
15085 +                               "Failed to write module configs.");
15086 +               goto write_image_header_abort;
15087 +       }
15088 +
15089 +       if (memory_bm_write(pageset1_map,
15090 +                               toiActiveAllocator->rw_header_chunk)) {
15091 +               abort_hibernate(TOI_FAILED_IO,
15092 +                               "Failed to write bitmaps.");
15093 +               goto write_image_header_abort;
15094 +       }
15095 +
15096 +       /* Flush data and let allocator cleanup */
15097 +       if (toiActiveAllocator->write_header_cleanup()) {
15098 +               abort_hibernate(TOI_FAILED_IO,
15099 +                               "Failed to cleanup writing header.");
15100 +               goto write_image_header_abort_no_cleanup;
15101 +       }
15102 +
15103 +       if (test_result_state(TOI_ABORTED))
15104 +               goto write_image_header_abort_no_cleanup;
15105 +
15106 +       toi_update_status(total, total, NULL);
15107 +
15108 +out:
15109 +       if (header_buffer)
15110 +               toi_free_page(24, (unsigned long) header_buffer);
15111 +       return ret;
15112 +
15113 +write_image_header_abort:
15114 +       toiActiveAllocator->write_header_cleanup();
15115 +write_image_header_abort_no_cleanup:
15116 +       ret = -1;
15117 +       goto out;
15118 +}
15119 +
15120 +/**
15121 + * sanity_check - check the header
15122 + * @sh:        the header which was saved at hibernate time.
15123 + *
15124 + * Perform a few checks, seeking to ensure that the kernel being
15125 + * booted matches the one hibernated. They need to match so we can
15126 + * be _sure_ things will work. It is not absolutely impossible for
15127 + * resuming from a different kernel to work, just not assured.
15128 + **/
15129 +static char *sanity_check(struct toi_header *sh)
15130 +{
15131 +       char *reason = check_image_kernel((struct swsusp_info *) sh);
15132 +
15133 +       if (reason)
15134 +               return reason;
15135 +
15136 +       if (!test_action_state(TOI_IGNORE_ROOTFS)) {
15137 +               const struct super_block *sb;
15138 +               list_for_each_entry(sb, &super_blocks, s_list) {
15139 +                       if ((!(sb->s_flags & MS_RDONLY)) &&
15140 +                           (sb->s_type->fs_flags & FS_REQUIRES_DEV))
15141 +                               return "Device backed fs has been mounted "
15142 +                                       "rw prior to resume or initrd/ramfs "
15143 +                                       "is mounted rw.";
15144 +               }
15145 +       }
15146 +
15147 +       return NULL;
15148 +}
15149 +
15150 +static DECLARE_WAIT_QUEUE_HEAD(freeze_wait);
15151 +
15152 +#define FREEZE_IN_PROGRESS (~0)
15153 +
15154 +static int freeze_result;
15155 +
15156 +static void do_freeze(struct work_struct *dummy)
15157 +{
15158 +       freeze_result = freeze_processes();
15159 +       wake_up(&freeze_wait);
15160 +       trap_non_toi_io = 1;
15161 +}
15162 +
15163 +static DECLARE_WORK(freeze_work, do_freeze);
15164 +
15165 +/**
15166 + * __read_pageset1 - test for the existence of an image and attempt to load it
15167 + *
15168 + * Returns:    Int
15169 + *     Zero if image found and pageset1 successfully loaded.
15170 + *     Error if no image found or loaded.
15171 + **/
15172 +static int __read_pageset1(void)
15173 +{
15174 +       int i, result = 0;
15175 +       char *header_buffer = (char *) toi_get_zeroed_page(25, TOI_ATOMIC_GFP),
15176 +            *sanity_error = NULL;
15177 +       struct toi_header *toi_header;
15178 +
15179 +       if (!header_buffer) {
15180 +               printk(KERN_INFO "Unable to allocate a page for reading the "
15181 +                               "signature.\n");
15182 +               return -ENOMEM;
15183 +       }
15184 +
15185 +       /* Check for an image */
15186 +       result = toiActiveAllocator->image_exists(1);
15187 +       if (result == 3) {
15188 +               result = -ENODATA;
15189 +               toi_early_boot_message(1, 0, "The signature from an older "
15190 +                               "version of TuxOnIce has been detected.");
15191 +               goto out_remove_image;
15192 +       }
15193 +
15194 +       if (result != 1) {
15195 +               result = -ENODATA;
15196 +               noresume_reset_modules();
15197 +               printk(KERN_INFO "TuxOnIce: No image found.\n");
15198 +               goto out;
15199 +       }
15200 +
15201 +       /*
15202 +        * Prepare the active allocator for reading the image header. The
15203 +        * activate allocator might read its own configuration.
15204 +        *
15205 +        * NB: This call may never return because there might be a signature
15206 +        * for a different image such that we warn the user and they choose
15207 +        * to reboot. (If the device ids look erroneous (2.4 vs 2.6) or the
15208 +        * location of the image might be unavailable if it was stored on a
15209 +        * network connection).
15210 +        */
15211 +
15212 +       result = toiActiveAllocator->read_header_init();
15213 +       if (result) {
15214 +               printk(KERN_INFO "TuxOnIce: Failed to initialise, reading the "
15215 +                               "image header.\n");
15216 +               goto out_remove_image;
15217 +       }
15218 +
15219 +       /* Check for noresume command line option */
15220 +       if (test_toi_state(TOI_NORESUME_SPECIFIED)) {
15221 +               printk(KERN_INFO "TuxOnIce: Noresume on command line. Removed "
15222 +                               "image.\n");
15223 +               goto out_remove_image;
15224 +       }
15225 +
15226 +       /* Check whether we've resumed before */
15227 +       if (test_toi_state(TOI_RESUMED_BEFORE)) {
15228 +               toi_early_boot_message(1, 0, NULL);
15229 +               if (!(test_toi_state(TOI_CONTINUE_REQ))) {
15230 +                       printk(KERN_INFO "TuxOnIce: Tried to resume before: "
15231 +                                       "Invalidated image.\n");
15232 +                       goto out_remove_image;
15233 +               }
15234 +       }
15235 +
15236 +       clear_toi_state(TOI_CONTINUE_REQ);
15237 +
15238 +       toi_image_header_version = toiActiveAllocator->get_header_version();
15239 +
15240 +       if (unlikely(toi_image_header_version > TOI_HEADER_VERSION)) {
15241 +               toi_early_boot_message(1, 0, image_version_error);
15242 +               if (!(test_toi_state(TOI_CONTINUE_REQ))) {
15243 +                       printk(KERN_INFO "TuxOnIce: Header version too new: "
15244 +                                       "Invalidated image.\n");
15245 +                       goto out_remove_image;
15246 +               }
15247 +       }
15248 +
15249 +       /* Read hibernate header */
15250 +       result = toiActiveAllocator->rw_header_chunk(READ, NULL,
15251 +                       header_buffer, sizeof(struct toi_header));
15252 +       if (result < 0) {
15253 +               printk(KERN_ERR "TuxOnIce: Failed to read the image "
15254 +                               "signature.\n");
15255 +               goto out_remove_image;
15256 +       }
15257 +
15258 +       toi_header = (struct toi_header *) header_buffer;
15259 +
15260 +       /*
15261 +        * NB: This call may also result in a reboot rather than returning.
15262 +        */
15263 +
15264 +       sanity_error = sanity_check(toi_header);
15265 +       if (sanity_error) {
15266 +               toi_early_boot_message(1, TOI_CONTINUE_REQ,
15267 +                               sanity_error);
15268 +               printk(KERN_INFO "TuxOnIce: Sanity check failed.\n");
15269 +               goto out_remove_image;
15270 +       }
15271 +
15272 +       /*
15273 +        * We have an image and it looks like it will load okay.
15274 +        *
15275 +        * Get metadata from header. Don't override commandline parameters.
15276 +        *
15277 +        * We don't need to save the image size limit because it's not used
15278 +        * during resume and will be restored with the image anyway.
15279 +        */
15280 +
15281 +       memcpy((char *) &pagedir1,
15282 +               (char *) &toi_header->pagedir, sizeof(pagedir1));
15283 +       toi_result = toi_header->param0;
15284 +       if (!toi_bkd.toi_debug_state) {
15285 +               toi_bkd.toi_action = toi_header->param1;
15286 +               toi_bkd.toi_debug_state = toi_header->param2;
15287 +               toi_bkd.toi_default_console_level = toi_header->param3;
15288 +       }
15289 +       clear_toi_state(TOI_IGNORE_LOGLEVEL);
15290 +       pagedir2.size = toi_header->pageset_2_size;
15291 +       for (i = 0; i < 4; i++)
15292 +               toi_bkd.toi_io_time[i/2][i%2] =
15293 +                       toi_header->io_time[i/2][i%2];
15294 +
15295 +       set_toi_state(TOI_BOOT_KERNEL);
15296 +       boot_kernel_data_buffer = toi_header->bkd;
15297 +
15298 +       read_if_version(1, toi_max_workers, "TuxOnIce max workers",
15299 +                       goto out_remove_image);
15300 +
15301 +       /* Read filesystem info */
15302 +       if (fs_info_load_and_check()) {
15303 +               printk(KERN_EMERG "TuxOnIce: File system mount time checks "
15304 +                       "failed. Refusing to corrupt your filesystems!\n");
15305 +               goto out_remove_image;
15306 +       }
15307 +
15308 +       /* Read module configurations */
15309 +       result = read_module_configs();
15310 +       if (result) {
15311 +               pagedir1.size = 0;
15312 +               pagedir2.size = 0;
15313 +               printk(KERN_INFO "TuxOnIce: Failed to read TuxOnIce module "
15314 +                               "configurations.\n");
15315 +               clear_action_state(TOI_KEEP_IMAGE);
15316 +               goto out_remove_image;
15317 +       }
15318 +
15319 +       toi_prepare_console();
15320 +
15321 +       set_toi_state(TOI_NOW_RESUMING);
15322 +
15323 +       if (!test_action_state(TOI_LATE_CPU_HOTPLUG)) {
15324 +               toi_prepare_status(DONT_CLEAR_BAR, "Disable nonboot cpus.");
15325 +               if (disable_nonboot_cpus()) {
15326 +                       set_abort_result(TOI_CPU_HOTPLUG_FAILED);
15327 +                       goto out_reset_console;
15328 +               }
15329 +       }
15330 +
15331 +       if (usermodehelper_disable())
15332 +               goto out_enable_nonboot_cpus;
15333 +
15334 +       current->flags |= PF_NOFREEZE;
15335 +       freeze_result = FREEZE_IN_PROGRESS;
15336 +
15337 +       schedule_work_on(first_cpu(cpu_online_map), &freeze_work);
15338 +
15339 +       toi_cond_pause(1, "About to read original pageset1 locations.");
15340 +
15341 +       /*
15342 +        * See _toi_rw_header_chunk in tuxonice_bio.c:
15343 +        * Initialize pageset1_map by reading the map from the image.
15344 +        */
15345 +       if (memory_bm_read(pageset1_map, toiActiveAllocator->rw_header_chunk))
15346 +               goto out_thaw;
15347 +
15348 +       /*
15349 +        * See toi_rw_cleanup in tuxonice_bio.c:
15350 +        * Clean up after reading the header.
15351 +        */
15352 +       result = toiActiveAllocator->read_header_cleanup();
15353 +       if (result) {
15354 +               printk(KERN_ERR "TuxOnIce: Failed to cleanup after reading the "
15355 +                               "image header.\n");
15356 +               goto out_thaw;
15357 +       }
15358 +
15359 +       toi_cond_pause(1, "About to read pagedir.");
15360 +
15361 +       /*
15362 +        * Get the addresses of pages into which we will load the kernel to
15363 +        * be copied back and check if they conflict with the ones we are using.
15364 +        */
15365 +       if (toi_get_pageset1_load_addresses()) {
15366 +               printk(KERN_INFO "TuxOnIce: Failed to get load addresses for "
15367 +                               "pageset1.\n");
15368 +               goto out_thaw;
15369 +       }
15370 +
15371 +       /* Read the original kernel back */
15372 +       toi_cond_pause(1, "About to read pageset 1.");
15373 +
15374 +       /* Given the pagemap, read back the data from disk */
15375 +       if (read_pageset(&pagedir1, 0)) {
15376 +               toi_prepare_status(DONT_CLEAR_BAR, "Failed to read pageset 1.");
15377 +               result = -EIO;
15378 +               goto out_thaw;
15379 +       }
15380 +
15381 +       toi_cond_pause(1, "About to restore original kernel.");
15382 +       result = 0;
15383 +
15384 +       if (!test_action_state(TOI_KEEP_IMAGE) &&
15385 +           toiActiveAllocator->mark_resume_attempted)
15386 +               toiActiveAllocator->mark_resume_attempted(1);
15387 +
15388 +       wait_event(freeze_wait, freeze_result != FREEZE_IN_PROGRESS);
15389 +out:
15390 +       current->flags &= ~PF_NOFREEZE;
15391 +       toi_free_page(25, (unsigned long) header_buffer);
15392 +       return result;
15393 +
15394 +out_thaw:
15395 +       wait_event(freeze_wait, freeze_result != FREEZE_IN_PROGRESS);
15396 +       trap_non_toi_io = 0;
15397 +       thaw_processes();
15398 +       usermodehelper_enable();
15399 +out_enable_nonboot_cpus:
15400 +       enable_nonboot_cpus();
15401 +out_reset_console:
15402 +       toi_cleanup_console();
15403 +out_remove_image:
15404 +       result = -EINVAL;
15405 +       if (!test_action_state(TOI_KEEP_IMAGE))
15406 +               toiActiveAllocator->remove_image();
15407 +       toiActiveAllocator->read_header_cleanup();
15408 +       noresume_reset_modules();
15409 +       goto out;
15410 +}
15411 +
15412 +/**
15413 + * read_pageset1 - highlevel function to read the saved pages
15414 + *
15415 + * Attempt to read the header and pageset1 of a hibernate image.
15416 + * Handle the outcome, complaining where appropriate.
15417 + **/
15418 +int read_pageset1(void)
15419 +{
15420 +       int error;
15421 +
15422 +       error = __read_pageset1();
15423 +
15424 +       if (error && error != -ENODATA && error != -EINVAL &&
15425 +                                       !test_result_state(TOI_ABORTED))
15426 +               abort_hibernate(TOI_IMAGE_ERROR,
15427 +                       "TuxOnIce: Error %d resuming\n", error);
15428 +
15429 +       return error;
15430 +}
15431 +
15432 +/**
15433 + * get_have_image_data - check the image header
15434 + **/
15435 +static char *get_have_image_data(void)
15436 +{
15437 +       char *output_buffer = (char *) toi_get_zeroed_page(26, TOI_ATOMIC_GFP);
15438 +       struct toi_header *toi_header;
15439 +
15440 +       if (!output_buffer) {
15441 +               printk(KERN_INFO "Output buffer null.\n");
15442 +               return NULL;
15443 +       }
15444 +
15445 +       /* Check for an image */
15446 +       if (!toiActiveAllocator->image_exists(1) ||
15447 +           toiActiveAllocator->read_header_init() ||
15448 +           toiActiveAllocator->rw_header_chunk(READ, NULL,
15449 +                       output_buffer, sizeof(struct toi_header))) {
15450 +               sprintf(output_buffer, "0\n");
15451 +               /*
15452 +                * From an initrd/ramfs, catting have_image and
15453 +                * getting a result of 0 is sufficient.
15454 +                */
15455 +               clear_toi_state(TOI_BOOT_TIME);
15456 +               goto out;
15457 +       }
15458 +
15459 +       toi_header = (struct toi_header *) output_buffer;
15460 +
15461 +       sprintf(output_buffer, "1\n%s\n%s\n",
15462 +                       toi_header->uts.machine,
15463 +                       toi_header->uts.version);
15464 +
15465 +       /* Check whether we've resumed before */
15466 +       if (test_toi_state(TOI_RESUMED_BEFORE))
15467 +               strcat(output_buffer, "Resumed before.\n");
15468 +
15469 +out:
15470 +       noresume_reset_modules();
15471 +       return output_buffer;
15472 +}
15473 +
15474 +/**
15475 + * read_pageset2 - read second part of the image
15476 + * @overwrittenpagesonly:      Read only pages which would have been
15477 + *                             verwritten by pageset1?
15478 + *
15479 + * Read in part or all of pageset2 of an image, depending upon
15480 + * whether we are hibernating and have only overwritten a portion
15481 + * with pageset1 pages, or are resuming and need to read them
15482 + * all.
15483 + *
15484 + * Returns: Int
15485 + *     Zero if no error, otherwise the error value.
15486 + **/
15487 +int read_pageset2(int overwrittenpagesonly)
15488 +{
15489 +       int result = 0;
15490 +
15491 +       if (!pagedir2.size)
15492 +               return 0;
15493 +
15494 +       result = read_pageset(&pagedir2, overwrittenpagesonly);
15495 +
15496 +       toi_cond_pause(1, "Pagedir 2 read.");
15497 +
15498 +       return result;
15499 +}
15500 +
15501 +/**
15502 + * image_exists_read - has an image been found?
15503 + * @page:      Output buffer
15504 + *
15505 + * Store 0 or 1 in page, depending on whether an image is found.
15506 + * Incoming buffer is PAGE_SIZE and result is guaranteed
15507 + * to be far less than that, so we don't worry about
15508 + * overflow.
15509 + **/
15510 +int image_exists_read(const char *page, int count)
15511 +{
15512 +       int len = 0;
15513 +       char *result;
15514 +
15515 +       if (toi_activate_storage(0))
15516 +               return count;
15517 +
15518 +       if (!test_toi_state(TOI_RESUME_DEVICE_OK))
15519 +               toi_attempt_to_parse_resume_device(0);
15520 +
15521 +       if (!toiActiveAllocator) {
15522 +               len = sprintf((char *) page, "-1\n");
15523 +       } else {
15524 +               result = get_have_image_data();
15525 +               if (result) {
15526 +                       len = sprintf((char *) page, "%s",  result);
15527 +                       toi_free_page(26, (unsigned long) result);
15528 +               }
15529 +       }
15530 +
15531 +       toi_deactivate_storage(0);
15532 +
15533 +       return len;
15534 +}
15535 +
15536 +/**
15537 + * image_exists_write - invalidate an image if one exists
15538 + **/
15539 +int image_exists_write(const char *buffer, int count)
15540 +{
15541 +       if (toi_activate_storage(0))
15542 +               return count;
15543 +
15544 +       if (toiActiveAllocator && toiActiveAllocator->image_exists(1))
15545 +               toiActiveAllocator->remove_image();
15546 +
15547 +       toi_deactivate_storage(0);
15548 +
15549 +       clear_result_state(TOI_KEPT_IMAGE);
15550 +
15551 +       return count;
15552 +}
15553 diff --git a/kernel/power/tuxonice_io.h b/kernel/power/tuxonice_io.h
15554 new file mode 100644
15555 index 0000000..fe37713
15556 --- /dev/null
15557 +++ b/kernel/power/tuxonice_io.h
15558 @@ -0,0 +1,74 @@
15559 +/*
15560 + * kernel/power/tuxonice_io.h
15561 + *
15562 + * Copyright (C) 2005-2010 Nigel Cunningham (nigel at tuxonice net)
15563 + *
15564 + * This file is released under the GPLv2.
15565 + *
15566 + * It contains high level IO routines for hibernating.
15567 + *
15568 + */
15569 +
15570 +#include <linux/utsname.h>
15571 +#include "tuxonice_pagedir.h"
15572 +
15573 +/* Non-module data saved in our image header */
15574 +struct toi_header {
15575 +       /*
15576 +        * Mirror struct swsusp_info, but without
15577 +        * the page aligned attribute
15578 +        */
15579 +       struct new_utsname uts;
15580 +       u32 version_code;
15581 +       unsigned long num_physpages;
15582 +       int cpus;
15583 +       unsigned long image_pages;
15584 +       unsigned long pages;
15585 +       unsigned long size;
15586 +
15587 +       /* Our own data */
15588 +       unsigned long orig_mem_free;
15589 +       int page_size;
15590 +       int pageset_2_size;
15591 +       int param0;
15592 +       int param1;
15593 +       int param2;
15594 +       int param3;
15595 +       int progress0;
15596 +       int progress1;
15597 +       int progress2;
15598 +       int progress3;
15599 +       int io_time[2][2];
15600 +       struct pagedir pagedir;
15601 +       dev_t root_fs;
15602 +       unsigned long bkd; /* Boot kernel data locn */
15603 +};
15604 +
15605 +extern int write_pageset(struct pagedir *pagedir);
15606 +extern int write_image_header(void);
15607 +extern int read_pageset1(void);
15608 +extern int read_pageset2(int overwrittenpagesonly);
15609 +
15610 +extern int toi_attempt_to_parse_resume_device(int quiet);
15611 +extern void attempt_to_parse_resume_device2(void);
15612 +extern void attempt_to_parse_alt_resume_param(void);
15613 +int image_exists_read(const char *page, int count);
15614 +int image_exists_write(const char *buffer, int count);
15615 +extern void save_restore_alt_param(int replace, int quiet);
15616 +extern atomic_t toi_io_workers;
15617 +
15618 +/* Args to save_restore_alt_param */
15619 +#define RESTORE 0
15620 +#define SAVE 1
15621 +
15622 +#define NOQUIET 0
15623 +#define QUIET 1
15624 +
15625 +extern dev_t name_to_dev_t(char *line);
15626 +
15627 +extern wait_queue_head_t toi_io_queue_flusher;
15628 +extern int toi_bio_queue_flusher_should_finish;
15629 +
15630 +int fs_info_space_needed(void);
15631 +
15632 +extern int toi_max_workers;
15633 diff --git a/kernel/power/tuxonice_modules.c b/kernel/power/tuxonice_modules.c
15634 new file mode 100644
15635 index 0000000..4cc24a9
15636 --- /dev/null
15637 +++ b/kernel/power/tuxonice_modules.c
15638 @@ -0,0 +1,522 @@
15639 +/*
15640 + * kernel/power/tuxonice_modules.c
15641 + *
15642 + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net)
15643 + *
15644 + */
15645 +
15646 +#include <linux/suspend.h>
15647 +#include "tuxonice.h"
15648 +#include "tuxonice_modules.h"
15649 +#include "tuxonice_sysfs.h"
15650 +#include "tuxonice_ui.h"
15651 +
15652 +LIST_HEAD(toi_filters);
15653 +LIST_HEAD(toiAllocators);
15654 +
15655 +LIST_HEAD(toi_modules);
15656 +EXPORT_SYMBOL_GPL(toi_modules);
15657 +
15658 +struct toi_module_ops *toiActiveAllocator;
15659 +EXPORT_SYMBOL_GPL(toiActiveAllocator);
15660 +
15661 +static int toi_num_filters;
15662 +int toiNumAllocators, toi_num_modules;
15663 +
15664 +/*
15665 + * toi_header_storage_for_modules
15666 + *
15667 + * Returns the amount of space needed to store configuration
15668 + * data needed by the modules prior to copying back the original
15669 + * kernel. We can exclude data for pageset2 because it will be
15670 + * available anyway once the kernel is copied back.
15671 + */
15672 +long toi_header_storage_for_modules(void)
15673 +{
15674 +       struct toi_module_ops *this_module;
15675 +       int bytes = 0;
15676 +
15677 +       list_for_each_entry(this_module, &toi_modules, module_list) {
15678 +               if (!this_module->enabled ||
15679 +                   (this_module->type == WRITER_MODULE &&
15680 +                    toiActiveAllocator != this_module))
15681 +                       continue;
15682 +               if (this_module->storage_needed) {
15683 +                       int this = this_module->storage_needed() +
15684 +                               sizeof(struct toi_module_header) +
15685 +                               sizeof(int);
15686 +                       this_module->header_requested = this;
15687 +                       bytes += this;
15688 +               }
15689 +       }
15690 +
15691 +       /* One more for the empty terminator */
15692 +       return bytes + sizeof(struct toi_module_header);
15693 +}
15694 +
15695 +void print_toi_header_storage_for_modules(void)
15696 +{
15697 +       struct toi_module_ops *this_module;
15698 +       int bytes = 0;
15699 +
15700 +       printk(KERN_DEBUG "Header storage:\n");
15701 +       list_for_each_entry(this_module, &toi_modules, module_list) {
15702 +               if (!this_module->enabled ||
15703 +                   (this_module->type == WRITER_MODULE &&
15704 +                    toiActiveAllocator != this_module))
15705 +                       continue;
15706 +               if (this_module->storage_needed) {
15707 +                       int this = this_module->storage_needed() +
15708 +                               sizeof(struct toi_module_header) +
15709 +                               sizeof(int);
15710 +                       this_module->header_requested = this;
15711 +                       bytes += this;
15712 +                       printk(KERN_DEBUG "+ %16s : %-4d/%d.\n",
15713 +                                       this_module->name,
15714 +                                       this_module->header_used, this);
15715 +               }
15716 +       }
15717 +
15718 +       printk(KERN_DEBUG "+ empty terminator : %zu.\n",
15719 +                       sizeof(struct toi_module_header));
15720 +       printk(KERN_DEBUG "                     ====\n");
15721 +       printk(KERN_DEBUG "                     %zu\n",
15722 +                       bytes + sizeof(struct toi_module_header));
15723 +}
15724 +EXPORT_SYMBOL_GPL(print_toi_header_storage_for_modules);
15725 +
15726 +/*
15727 + * toi_memory_for_modules
15728 + *
15729 + * Returns the amount of memory requested by modules for
15730 + * doing their work during the cycle.
15731 + */
15732 +
15733 +long toi_memory_for_modules(int print_parts)
15734 +{
15735 +       long bytes = 0, result;
15736 +       struct toi_module_ops *this_module;
15737 +
15738 +       if (print_parts)
15739 +               printk(KERN_INFO "Memory for modules:\n===================\n");
15740 +       list_for_each_entry(this_module, &toi_modules, module_list) {
15741 +               int this;
15742 +               if (!this_module->enabled)
15743 +                       continue;
15744 +               if (this_module->memory_needed) {
15745 +                       this = this_module->memory_needed();
15746 +                       if (print_parts)
15747 +                               printk(KERN_INFO "%10d bytes (%5ld pages) for "
15748 +                                               "module '%s'.\n", this,
15749 +                                               DIV_ROUND_UP(this, PAGE_SIZE),
15750 +                                               this_module->name);
15751 +                       bytes += this;
15752 +               }
15753 +       }
15754 +
15755 +       result = DIV_ROUND_UP(bytes, PAGE_SIZE);
15756 +       if (print_parts)
15757 +               printk(KERN_INFO " => %ld bytes, %ld pages.\n", bytes, result);
15758 +
15759 +       return result;
15760 +}
15761 +
15762 +/*
15763 + * toi_expected_compression_ratio
15764 + *
15765 + * Returns the compression ratio expected when saving the image.
15766 + */
15767 +
15768 +int toi_expected_compression_ratio(void)
15769 +{
15770 +       int ratio = 100;
15771 +       struct toi_module_ops *this_module;
15772 +
15773 +       list_for_each_entry(this_module, &toi_modules, module_list) {
15774 +               if (!this_module->enabled)
15775 +                       continue;
15776 +               if (this_module->expected_compression)
15777 +                       ratio = ratio * this_module->expected_compression()
15778 +                               / 100;
15779 +       }
15780 +
15781 +       return ratio;
15782 +}
15783 +
15784 +/* toi_find_module_given_dir
15785 + * Functionality :     Return a module (if found), given a pointer
15786 + *                     to its directory name
15787 + */
15788 +
15789 +static struct toi_module_ops *toi_find_module_given_dir(char *name)
15790 +{
15791 +       struct toi_module_ops *this_module, *found_module = NULL;
15792 +
15793 +       list_for_each_entry(this_module, &toi_modules, module_list) {
15794 +               if (!strcmp(name, this_module->directory)) {
15795 +                       found_module = this_module;
15796 +                       break;
15797 +               }
15798 +       }
15799 +
15800 +       return found_module;
15801 +}
15802 +
15803 +/* toi_find_module_given_name
15804 + * Functionality :     Return a module (if found), given a pointer
15805 + *                     to its name
15806 + */
15807 +
15808 +struct toi_module_ops *toi_find_module_given_name(char *name)
15809 +{
15810 +       struct toi_module_ops *this_module, *found_module = NULL;
15811 +
15812 +       list_for_each_entry(this_module, &toi_modules, module_list) {
15813 +               if (!strcmp(name, this_module->name)) {
15814 +                       found_module = this_module;
15815 +                       break;
15816 +               }
15817 +       }
15818 +
15819 +       return found_module;
15820 +}
15821 +
15822 +/*
15823 + * toi_print_module_debug_info
15824 + * Functionality   : Get debugging info from modules into a buffer.
15825 + */
15826 +int toi_print_module_debug_info(char *buffer, int buffer_size)
15827 +{
15828 +       struct toi_module_ops *this_module;
15829 +       int len = 0;
15830 +
15831 +       list_for_each_entry(this_module, &toi_modules, module_list) {
15832 +               if (!this_module->enabled)
15833 +                       continue;
15834 +               if (this_module->print_debug_info) {
15835 +                       int result;
15836 +                       result = this_module->print_debug_info(buffer + len,
15837 +                                       buffer_size - len);
15838 +                       len += result;
15839 +               }
15840 +       }
15841 +
15842 +       /* Ensure null terminated */
15843 +       buffer[buffer_size] = 0;
15844 +
15845 +       return len;
15846 +}
15847 +
15848 +/*
15849 + * toi_register_module
15850 + *
15851 + * Register a module.
15852 + */
15853 +int toi_register_module(struct toi_module_ops *module)
15854 +{
15855 +       int i;
15856 +       struct kobject *kobj;
15857 +
15858 +       module->enabled = 1;
15859 +
15860 +       if (toi_find_module_given_name(module->name)) {
15861 +               printk(KERN_INFO "TuxOnIce: Trying to load module %s,"
15862 +                               " which is already registered.\n",
15863 +                               module->name);
15864 +               return -EBUSY;
15865 +       }
15866 +
15867 +       switch (module->type) {
15868 +       case FILTER_MODULE:
15869 +               list_add_tail(&module->type_list, &toi_filters);
15870 +               toi_num_filters++;
15871 +               break;
15872 +       case WRITER_MODULE:
15873 +               list_add_tail(&module->type_list, &toiAllocators);
15874 +               toiNumAllocators++;
15875 +               break;
15876 +       case MISC_MODULE:
15877 +       case MISC_HIDDEN_MODULE:
15878 +       case BIO_ALLOCATOR_MODULE:
15879 +               break;
15880 +       default:
15881 +               printk(KERN_ERR "Hmmm. Module '%s' has an invalid type."
15882 +                       " It has been ignored.\n", module->name);
15883 +               return -EINVAL;
15884 +       }
15885 +       list_add_tail(&module->module_list, &toi_modules);
15886 +       toi_num_modules++;
15887 +
15888 +       if ((!module->directory && !module->shared_directory) ||
15889 +                       !module->sysfs_data || !module->num_sysfs_entries)
15890 +               return 0;
15891 +
15892 +       /*
15893 +        * Modules may share a directory, but those with shared_dir
15894 +        * set must be loaded (via symbol dependencies) after parents
15895 +        * and unloaded beforehand.
15896 +        */
15897 +       if (module->shared_directory) {
15898 +               struct toi_module_ops *shared =
15899 +                       toi_find_module_given_dir(module->shared_directory);
15900 +               if (!shared) {
15901 +                       printk(KERN_ERR "TuxOnIce: Module %s wants to share "
15902 +                                       "%s's directory but %s isn't loaded.\n",
15903 +                                       module->name, module->shared_directory,
15904 +                                       module->shared_directory);
15905 +                       toi_unregister_module(module);
15906 +                       return -ENODEV;
15907 +               }
15908 +               kobj = shared->dir_kobj;
15909 +       } else {
15910 +               if (!strncmp(module->directory, "[ROOT]", 6))
15911 +                       kobj = tuxonice_kobj;
15912 +               else
15913 +                       kobj = make_toi_sysdir(module->directory);
15914 +       }
15915 +       module->dir_kobj = kobj;
15916 +       for (i = 0; i < module->num_sysfs_entries; i++) {
15917 +               int result = toi_register_sysfs_file(kobj,
15918 +                               &module->sysfs_data[i]);
15919 +               if (result)
15920 +                       return result;
15921 +       }
15922 +       return 0;
15923 +}
15924 +EXPORT_SYMBOL_GPL(toi_register_module);
15925 +
15926 +/*
15927 + * toi_unregister_module
15928 + *
15929 + * Remove a module.
15930 + */
15931 +void toi_unregister_module(struct toi_module_ops *module)
15932 +{
15933 +       int i;
15934 +
15935 +       if (module->dir_kobj)
15936 +               for (i = 0; i < module->num_sysfs_entries; i++)
15937 +                       toi_unregister_sysfs_file(module->dir_kobj,
15938 +                                       &module->sysfs_data[i]);
15939 +
15940 +       if (!module->shared_directory && module->directory &&
15941 +                       strncmp(module->directory, "[ROOT]", 6))
15942 +               remove_toi_sysdir(module->dir_kobj);
15943 +
15944 +       switch (module->type) {
15945 +       case FILTER_MODULE:
15946 +               list_del(&module->type_list);
15947 +               toi_num_filters--;
15948 +               break;
15949 +       case WRITER_MODULE:
15950 +               list_del(&module->type_list);
15951 +               toiNumAllocators--;
15952 +               if (toiActiveAllocator == module) {
15953 +                       toiActiveAllocator = NULL;
15954 +                       clear_toi_state(TOI_CAN_RESUME);
15955 +                       clear_toi_state(TOI_CAN_HIBERNATE);
15956 +               }
15957 +               break;
15958 +       case MISC_MODULE:
15959 +       case MISC_HIDDEN_MODULE:
15960 +       case BIO_ALLOCATOR_MODULE:
15961 +               break;
15962 +       default:
15963 +               printk(KERN_ERR "Module '%s' has an invalid type."
15964 +                       " It has been ignored.\n", module->name);
15965 +               return;
15966 +       }
15967 +       list_del(&module->module_list);
15968 +       toi_num_modules--;
15969 +}
15970 +EXPORT_SYMBOL_GPL(toi_unregister_module);
15971 +
15972 +/*
15973 + * toi_move_module_tail
15974 + *
15975 + * Rearrange modules when reloading the config.
15976 + */
15977 +void toi_move_module_tail(struct toi_module_ops *module)
15978 +{
15979 +       switch (module->type) {
15980 +       case FILTER_MODULE:
15981 +               if (toi_num_filters > 1)
15982 +                       list_move_tail(&module->type_list, &toi_filters);
15983 +               break;
15984 +       case WRITER_MODULE:
15985 +               if (toiNumAllocators > 1)
15986 +                       list_move_tail(&module->type_list, &toiAllocators);
15987 +               break;
15988 +       case MISC_MODULE:
15989 +       case MISC_HIDDEN_MODULE:
15990 +       case BIO_ALLOCATOR_MODULE:
15991 +               break;
15992 +       default:
15993 +               printk(KERN_ERR "Module '%s' has an invalid type."
15994 +                       " It has been ignored.\n", module->name);
15995 +               return;
15996 +       }
15997 +       if ((toi_num_filters + toiNumAllocators) > 1)
15998 +               list_move_tail(&module->module_list, &toi_modules);
15999 +}
16000 +
16001 +/*
16002 + * toi_initialise_modules
16003 + *
16004 + * Get ready to do some work!
16005 + */
16006 +int toi_initialise_modules(int starting_cycle, int early)
16007 +{
16008 +       struct toi_module_ops *this_module;
16009 +       int result;
16010 +
16011 +       list_for_each_entry(this_module, &toi_modules, module_list) {
16012 +               this_module->header_requested = 0;
16013 +               this_module->header_used = 0;
16014 +               if (!this_module->enabled)
16015 +                       continue;
16016 +               if (this_module->early != early)
16017 +                       continue;
16018 +               if (this_module->initialise) {
16019 +                       result = this_module->initialise(starting_cycle);
16020 +                       if (result) {
16021 +                               toi_cleanup_modules(starting_cycle);
16022 +                               return result;
16023 +                       }
16024 +                       this_module->initialised = 1;
16025 +               }
16026 +       }
16027 +
16028 +       return 0;
16029 +}
16030 +
16031 +/*
16032 + * toi_cleanup_modules
16033 + *
16034 + * Tell modules the work is done.
16035 + */
16036 +void toi_cleanup_modules(int finishing_cycle)
16037 +{
16038 +       struct toi_module_ops *this_module;
16039 +
16040 +       list_for_each_entry(this_module, &toi_modules, module_list) {
16041 +               if (!this_module->enabled || !this_module->initialised)
16042 +                       continue;
16043 +               if (this_module->cleanup)
16044 +                       this_module->cleanup(finishing_cycle);
16045 +               this_module->initialised = 0;
16046 +       }
16047 +}
16048 +
16049 +/*
16050 + * toi_pre_atomic_restore_modules
16051 + *
16052 + * Get ready to do some work!
16053 + */
16054 +void toi_pre_atomic_restore_modules(struct toi_boot_kernel_data *bkd)
16055 +{
16056 +       struct toi_module_ops *this_module;
16057 +
16058 +       list_for_each_entry(this_module, &toi_modules, module_list) {
16059 +               if (this_module->enabled && this_module->pre_atomic_restore)
16060 +                       this_module->pre_atomic_restore(bkd);
16061 +       }
16062 +}
16063 +
16064 +/*
16065 + * toi_post_atomic_restore_modules
16066 + *
16067 + * Get ready to do some work!
16068 + */
16069 +void toi_post_atomic_restore_modules(struct toi_boot_kernel_data *bkd)
16070 +{
16071 +       struct toi_module_ops *this_module;
16072 +
16073 +       list_for_each_entry(this_module, &toi_modules, module_list) {
16074 +               if (this_module->enabled && this_module->post_atomic_restore)
16075 +                       this_module->post_atomic_restore(bkd);
16076 +       }
16077 +}
16078 +
16079 +/*
16080 + * toi_get_next_filter
16081 + *
16082 + * Get the next filter in the pipeline.
16083 + */
16084 +struct toi_module_ops *toi_get_next_filter(struct toi_module_ops *filter_sought)
16085 +{
16086 +       struct toi_module_ops *last_filter = NULL, *this_filter = NULL;
16087 +
16088 +       list_for_each_entry(this_filter, &toi_filters, type_list) {
16089 +               if (!this_filter->enabled)
16090 +                       continue;
16091 +               if ((last_filter == filter_sought) || (!filter_sought))
16092 +                       return this_filter;
16093 +               last_filter = this_filter;
16094 +       }
16095 +
16096 +       return toiActiveAllocator;
16097 +}
16098 +EXPORT_SYMBOL_GPL(toi_get_next_filter);
16099 +
16100 +/**
16101 + * toi_show_modules: Printk what support is loaded.
16102 + */
16103 +void toi_print_modules(void)
16104 +{
16105 +       struct toi_module_ops *this_module;
16106 +       int prev = 0;
16107 +
16108 +       printk(KERN_INFO "TuxOnIce " TOI_CORE_VERSION ", with support for");
16109 +
16110 +       list_for_each_entry(this_module, &toi_modules, module_list) {
16111 +               if (this_module->type == MISC_HIDDEN_MODULE)
16112 +                       continue;
16113 +               printk("%s %s%s%s", prev ? "," : "",
16114 +                               this_module->enabled ? "" : "[",
16115 +                               this_module->name,
16116 +                               this_module->enabled ? "" : "]");
16117 +               prev = 1;
16118 +       }
16119 +
16120 +       printk(".\n");
16121 +}
16122 +
16123 +/* toi_get_modules
16124 + *
16125 + * Take a reference to modules so they can't go away under us.
16126 + */
16127 +
16128 +int toi_get_modules(void)
16129 +{
16130 +       struct toi_module_ops *this_module;
16131 +
16132 +       list_for_each_entry(this_module, &toi_modules, module_list) {
16133 +               struct toi_module_ops *this_module2;
16134 +
16135 +               if (try_module_get(this_module->module))
16136 +                       continue;
16137 +
16138 +               /* Failed! Reverse gets and return error */
16139 +               list_for_each_entry(this_module2, &toi_modules,
16140 +                               module_list) {
16141 +                       if (this_module == this_module2)
16142 +                               return -EINVAL;
16143 +                       module_put(this_module2->module);
16144 +               }
16145 +       }
16146 +       return 0;
16147 +}
16148 +
16149 +/* toi_put_modules
16150 + *
16151 + * Release our references to modules we used.
16152 + */
16153 +
16154 +void toi_put_modules(void)
16155 +{
16156 +       struct toi_module_ops *this_module;
16157 +
16158 +       list_for_each_entry(this_module, &toi_modules, module_list)
16159 +               module_put(this_module->module);
16160 +}
16161 diff --git a/kernel/power/tuxonice_modules.h b/kernel/power/tuxonice_modules.h
16162 new file mode 100644
16163 index 0000000..9e198c4
16164 --- /dev/null
16165 +++ b/kernel/power/tuxonice_modules.h
16166 @@ -0,0 +1,197 @@
16167 +/*
16168 + * kernel/power/tuxonice_modules.h
16169 + *
16170 + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net)
16171 + *
16172 + * This file is released under the GPLv2.
16173 + *
16174 + * It contains declarations for modules. Modules are additions to
16175 + * TuxOnIce that provide facilities such as image compression or
16176 + * encryption, backends for storage of the image and user interfaces.
16177 + *
16178 + */
16179 +
16180 +#ifndef TOI_MODULES_H
16181 +#define TOI_MODULES_H
16182 +
16183 +/* This is the maximum size we store in the image header for a module name */
16184 +#define TOI_MAX_MODULE_NAME_LENGTH 30
16185 +
16186 +struct toi_boot_kernel_data;
16187 +
16188 +/* Per-module metadata */
16189 +struct toi_module_header {
16190 +       char name[TOI_MAX_MODULE_NAME_LENGTH];
16191 +       int enabled;
16192 +       int type;
16193 +       int index;
16194 +       int data_length;
16195 +       unsigned long signature;
16196 +};
16197 +
16198 +enum {
16199 +       FILTER_MODULE,
16200 +       WRITER_MODULE,
16201 +       BIO_ALLOCATOR_MODULE,
16202 +       MISC_MODULE,
16203 +       MISC_HIDDEN_MODULE,
16204 +};
16205 +
16206 +enum {
16207 +       TOI_ASYNC,
16208 +       TOI_SYNC
16209 +};
16210 +
16211 +struct toi_module_ops {
16212 +       /* Functions common to all modules */
16213 +       int type;
16214 +       char *name;
16215 +       char *directory;
16216 +       char *shared_directory;
16217 +       struct kobject *dir_kobj;
16218 +       struct module *module;
16219 +       int enabled, early, initialised;
16220 +       struct list_head module_list;
16221 +
16222 +       /* List of filters or allocators */
16223 +       struct list_head list, type_list;
16224 +
16225 +       /*
16226 +        * Requirements for memory and storage in
16227 +        * the image header..
16228 +        */
16229 +       int (*memory_needed) (void);
16230 +       int (*storage_needed) (void);
16231 +
16232 +       int header_requested, header_used;
16233 +
16234 +       int (*expected_compression) (void);
16235 +
16236 +       /*
16237 +        * Debug info
16238 +        */
16239 +       int (*print_debug_info) (char *buffer, int size);
16240 +       int (*save_config_info) (char *buffer);
16241 +       void (*load_config_info) (char *buffer, int len);
16242 +
16243 +       /*
16244 +        * Initialise & cleanup - general routines called
16245 +        * at the start and end of a cycle.
16246 +        */
16247 +       int (*initialise) (int starting_cycle);
16248 +       void (*cleanup) (int finishing_cycle);
16249 +
16250 +       void (*pre_atomic_restore) (struct toi_boot_kernel_data *bkd);
16251 +       void (*post_atomic_restore) (struct toi_boot_kernel_data *bkd);
16252 +
16253 +       /*
16254 +        * Calls for allocating storage (allocators only).
16255 +        *
16256 +        * Header space is requested separately and cannot fail, but the
16257 +        * reservation is only applied when main storage is allocated.
16258 +        * The header space reservation is thus always set prior to
16259 +        * requesting the allocation of storage - and prior to querying
16260 +        * how much storage is available.
16261 +        */
16262 +
16263 +       unsigned long (*storage_available) (void);
16264 +       void (*reserve_header_space) (unsigned long space_requested);
16265 +       int (*register_storage) (void);
16266 +       int (*allocate_storage) (unsigned long space_requested);
16267 +       unsigned long (*storage_allocated) (void);
16268 +
16269 +       /*
16270 +        * Routines used in image I/O.
16271 +        */
16272 +       int (*rw_init) (int rw, int stream_number);
16273 +       int (*rw_cleanup) (int rw);
16274 +       int (*write_page) (unsigned long index, struct page *buffer_page,
16275 +                       unsigned int buf_size);
16276 +       int (*read_page) (unsigned long *index, struct page *buffer_page,
16277 +                       unsigned int *buf_size);
16278 +       int (*io_flusher) (int rw);
16279 +
16280 +       /* Reset module if image exists but reading aborted */
16281 +       void (*noresume_reset) (void);
16282 +
16283 +       /* Read and write the metadata */
16284 +       int (*write_header_init) (void);
16285 +       int (*write_header_cleanup) (void);
16286 +
16287 +       int (*read_header_init) (void);
16288 +       int (*read_header_cleanup) (void);
16289 +
16290 +       /* To be called after read_header_init */
16291 +       int (*get_header_version) (void);
16292 +
16293 +       int (*rw_header_chunk) (int rw, struct toi_module_ops *owner,
16294 +                       char *buffer_start, int buffer_size);
16295 +
16296 +       int (*rw_header_chunk_noreadahead) (int rw,
16297 +                       struct toi_module_ops *owner, char *buffer_start,
16298 +                       int buffer_size);
16299 +
16300 +       /* Attempt to parse an image location */
16301 +       int (*parse_sig_location) (char *buffer, int only_writer, int quiet);
16302 +
16303 +       /* Throttle I/O according to throughput */
16304 +       void (*update_throughput_throttle) (int jif_index);
16305 +
16306 +       /* Flush outstanding I/O */
16307 +       int (*finish_all_io) (void);
16308 +
16309 +       /* Determine whether image exists that we can restore */
16310 +       int (*image_exists) (int quiet);
16311 +
16312 +       /* Mark the image as having tried to resume */
16313 +       int (*mark_resume_attempted) (int);
16314 +
16315 +       /* Destroy image if one exists */
16316 +       int (*remove_image) (void);
16317 +
16318 +       /* Sysfs Data */
16319 +       struct toi_sysfs_data *sysfs_data;
16320 +       int num_sysfs_entries;
16321 +
16322 +       /* Block I/O allocator */
16323 +       struct toi_bio_allocator_ops *bio_allocator_ops;
16324 +};
16325 +
16326 +extern int toi_num_modules, toiNumAllocators;
16327 +
16328 +extern struct toi_module_ops *toiActiveAllocator;
16329 +extern struct list_head toi_filters, toiAllocators, toi_modules;
16330 +
16331 +extern void toi_prepare_console_modules(void);
16332 +extern void toi_cleanup_console_modules(void);
16333 +
16334 +extern struct toi_module_ops *toi_find_module_given_name(char *name);
16335 +extern struct toi_module_ops *toi_get_next_filter(struct toi_module_ops *);
16336 +
16337 +extern int toi_register_module(struct toi_module_ops *module);
16338 +extern void toi_move_module_tail(struct toi_module_ops *module);
16339 +
16340 +extern long toi_header_storage_for_modules(void);
16341 +extern long toi_memory_for_modules(int print_parts);
16342 +extern void print_toi_header_storage_for_modules(void);
16343 +extern int toi_expected_compression_ratio(void);
16344 +
16345 +extern int toi_print_module_debug_info(char *buffer, int buffer_size);
16346 +extern int toi_register_module(struct toi_module_ops *module);
16347 +extern void toi_unregister_module(struct toi_module_ops *module);
16348 +
16349 +extern int toi_initialise_modules(int starting_cycle, int early);
16350 +#define toi_initialise_modules_early(starting) \
16351 +       toi_initialise_modules(starting, 1)
16352 +#define toi_initialise_modules_late(starting) \
16353 +       toi_initialise_modules(starting, 0)
16354 +extern void toi_cleanup_modules(int finishing_cycle);
16355 +
16356 +extern void toi_post_atomic_restore_modules(struct toi_boot_kernel_data *bkd);
16357 +extern void toi_pre_atomic_restore_modules(struct toi_boot_kernel_data *bkd);
16358 +
16359 +extern void toi_print_modules(void);
16360 +
16361 +int toi_get_modules(void);
16362 +void toi_put_modules(void);
16363 +#endif
16364 diff --git a/kernel/power/tuxonice_netlink.c b/kernel/power/tuxonice_netlink.c
16365 new file mode 100644
16366 index 0000000..c5208ee
16367 --- /dev/null
16368 +++ b/kernel/power/tuxonice_netlink.c
16369 @@ -0,0 +1,345 @@
16370 +/*
16371 + * kernel/power/tuxonice_netlink.c
16372 + *
16373 + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net)
16374 + *
16375 + * This file is released under the GPLv2.
16376 + *
16377 + * Functions for communicating with a userspace helper via netlink.
16378 + */
16379 +
16380 +
16381 +#include <linux/suspend.h>
16382 +#include <linux/sched.h>
16383 +#include "tuxonice_netlink.h"
16384 +#include "tuxonice.h"
16385 +#include "tuxonice_modules.h"
16386 +#include "tuxonice_alloc.h"
16387 +#include "tuxonice_builtin.h"
16388 +
16389 +static struct user_helper_data *uhd_list;
16390 +
16391 +/*
16392 + * Refill our pool of SKBs for use in emergencies (eg, when eating memory and
16393 + * none can be allocated).
16394 + */
16395 +static void toi_fill_skb_pool(struct user_helper_data *uhd)
16396 +{
16397 +       while (uhd->pool_level < uhd->pool_limit) {
16398 +               struct sk_buff *new_skb =
16399 +                       alloc_skb(NLMSG_SPACE(uhd->skb_size), TOI_ATOMIC_GFP);
16400 +
16401 +               if (!new_skb)
16402 +                       break;
16403 +
16404 +               new_skb->next = uhd->emerg_skbs;
16405 +               uhd->emerg_skbs = new_skb;
16406 +               uhd->pool_level++;
16407 +       }
16408 +}
16409 +
16410 +/*
16411 + * Try to allocate a single skb. If we can't get one, try to use one from
16412 + * our pool.
16413 + */
16414 +static struct sk_buff *toi_get_skb(struct user_helper_data *uhd)
16415 +{
16416 +       struct sk_buff *skb =
16417 +               alloc_skb(NLMSG_SPACE(uhd->skb_size), TOI_ATOMIC_GFP);
16418 +
16419 +       if (skb)
16420 +               return skb;
16421 +
16422 +       skb = uhd->emerg_skbs;
16423 +       if (skb) {
16424 +               uhd->pool_level--;
16425 +               uhd->emerg_skbs = skb->next;
16426 +               skb->next = NULL;
16427 +       }
16428 +
16429 +       return skb;
16430 +}
16431 +
16432 +static void put_skb(struct user_helper_data *uhd, struct sk_buff *skb)
16433 +{
16434 +       if (uhd->pool_level < uhd->pool_limit) {
16435 +               skb->next = uhd->emerg_skbs;
16436 +               uhd->emerg_skbs = skb;
16437 +       } else
16438 +               kfree_skb(skb);
16439 +}
16440 +
16441 +void toi_send_netlink_message(struct user_helper_data *uhd,
16442 +               int type, void *params, size_t len)
16443 +{
16444 +       struct sk_buff *skb;
16445 +       struct nlmsghdr *nlh;
16446 +       void *dest;
16447 +       struct task_struct *t;
16448 +
16449 +       if (uhd->pid == -1)
16450 +               return;
16451 +
16452 +       if (uhd->debug)
16453 +               printk(KERN_ERR "toi_send_netlink_message: Send "
16454 +                               "message type %d.\n", type);
16455 +
16456 +       skb = toi_get_skb(uhd);
16457 +       if (!skb) {
16458 +               printk(KERN_INFO "toi_netlink: Can't allocate skb!\n");
16459 +               return;
16460 +       }
16461 +
16462 +       /* NLMSG_PUT contains a hidden goto nlmsg_failure */
16463 +       nlh = NLMSG_PUT(skb, 0, uhd->sock_seq, type, len);
16464 +       uhd->sock_seq++;
16465 +
16466 +       dest = NLMSG_DATA(nlh);
16467 +       if (params && len > 0)
16468 +               memcpy(dest, params, len);
16469 +
16470 +       netlink_unicast(uhd->nl, skb, uhd->pid, 0);
16471 +
16472 +       toi_read_lock_tasklist();
16473 +       t = find_task_by_pid_ns(uhd->pid, &init_pid_ns);
16474 +       if (!t) {
16475 +               toi_read_unlock_tasklist();
16476 +               if (uhd->pid > -1)
16477 +                       printk(KERN_INFO "Hmm. Can't find the userspace task"
16478 +                               " %d.\n", uhd->pid);
16479 +               return;
16480 +       }
16481 +       wake_up_process(t);
16482 +       toi_read_unlock_tasklist();
16483 +
16484 +       yield();
16485 +
16486 +       return;
16487 +
16488 +nlmsg_failure:
16489 +       if (skb)
16490 +               put_skb(uhd, skb);
16491 +
16492 +       if (uhd->debug)
16493 +               printk(KERN_ERR "toi_send_netlink_message: Failed to send "
16494 +                               "message type %d.\n", type);
16495 +}
16496 +EXPORT_SYMBOL_GPL(toi_send_netlink_message);
16497 +
16498 +static void send_whether_debugging(struct user_helper_data *uhd)
16499 +{
16500 +       static u8 is_debugging = 1;
16501 +
16502 +       toi_send_netlink_message(uhd, NETLINK_MSG_IS_DEBUGGING,
16503 +                       &is_debugging, sizeof(u8));
16504 +}
16505 +
16506 +/*
16507 + * Set the PF_NOFREEZE flag on the given process to ensure it can run whilst we
16508 + * are hibernating.
16509 + */
16510 +static int nl_set_nofreeze(struct user_helper_data *uhd, __u32 pid)
16511 +{
16512 +       struct task_struct *t;
16513 +
16514 +       if (uhd->debug)
16515 +               printk(KERN_ERR "nl_set_nofreeze for pid %d.\n", pid);
16516 +
16517 +       toi_read_lock_tasklist();
16518 +       t = find_task_by_pid_ns(pid, &init_pid_ns);
16519 +       if (!t) {
16520 +               toi_read_unlock_tasklist();
16521 +               printk(KERN_INFO "Strange. Can't find the userspace task %d.\n",
16522 +                               pid);
16523 +               return -EINVAL;
16524 +       }
16525 +
16526 +       t->flags |= PF_NOFREEZE;
16527 +
16528 +       toi_read_unlock_tasklist();
16529 +       uhd->pid = pid;
16530 +
16531 +       toi_send_netlink_message(uhd, NETLINK_MSG_NOFREEZE_ACK, NULL, 0);
16532 +
16533 +       return 0;
16534 +}
16535 +
16536 +/*
16537 + * Called when the userspace process has informed us that it's ready to roll.
16538 + */
16539 +static int nl_ready(struct user_helper_data *uhd, u32 version)
16540 +{
16541 +       if (version != uhd->interface_version) {
16542 +               printk(KERN_INFO "%s userspace process using invalid interface"
16543 +                               " version (%d - kernel wants %d). Trying to "
16544 +                               "continue without it.\n",
16545 +                               uhd->name, version, uhd->interface_version);
16546 +               if (uhd->not_ready)
16547 +                       uhd->not_ready();
16548 +               return -EINVAL;
16549 +       }
16550 +
16551 +       complete(&uhd->wait_for_process);
16552 +
16553 +       return 0;
16554 +}
16555 +
16556 +void toi_netlink_close_complete(struct user_helper_data *uhd)
16557 +{
16558 +       if (uhd->nl) {
16559 +               netlink_kernel_release(uhd->nl);
16560 +               uhd->nl = NULL;
16561 +       }
16562 +
16563 +       while (uhd->emerg_skbs) {
16564 +               struct sk_buff *next = uhd->emerg_skbs->next;
16565 +               kfree_skb(uhd->emerg_skbs);
16566 +               uhd->emerg_skbs = next;
16567 +       }
16568 +
16569 +       uhd->pid = -1;
16570 +}
16571 +EXPORT_SYMBOL_GPL(toi_netlink_close_complete);
16572 +
16573 +static int toi_nl_gen_rcv_msg(struct user_helper_data *uhd,
16574 +               struct sk_buff *skb, struct nlmsghdr *nlh)
16575 +{
16576 +       int type = nlh->nlmsg_type;
16577 +       int *data;
16578 +       int err;
16579 +
16580 +       if (uhd->debug)
16581 +               printk(KERN_ERR "toi_user_rcv_skb: Received message %d.\n",
16582 +                               type);
16583 +
16584 +       /* Let the more specific handler go first. It returns
16585 +        * 1 for valid messages that it doesn't know. */
16586 +       err = uhd->rcv_msg(skb, nlh);
16587 +       if (err != 1)
16588 +               return err;
16589 +
16590 +       /* Only allow one task to receive NOFREEZE privileges */
16591 +       if (type == NETLINK_MSG_NOFREEZE_ME && uhd->pid != -1) {
16592 +               printk(KERN_INFO "Received extra nofreeze me requests.\n");
16593 +               return -EBUSY;
16594 +       }
16595 +
16596 +       data = NLMSG_DATA(nlh);
16597 +
16598 +       switch (type) {
16599 +       case NETLINK_MSG_NOFREEZE_ME:
16600 +               return nl_set_nofreeze(uhd, nlh->nlmsg_pid);
16601 +       case NETLINK_MSG_GET_DEBUGGING:
16602 +               send_whether_debugging(uhd);
16603 +               return 0;
16604 +       case NETLINK_MSG_READY:
16605 +               if (nlh->nlmsg_len != NLMSG_LENGTH(sizeof(u32))) {
16606 +                       printk(KERN_INFO "Invalid ready mesage.\n");
16607 +                       if (uhd->not_ready)
16608 +                               uhd->not_ready();
16609 +                       return -EINVAL;
16610 +               }
16611 +               return nl_ready(uhd, (u32) *data);
16612 +       case NETLINK_MSG_CLEANUP:
16613 +               toi_netlink_close_complete(uhd);
16614 +               return 0;
16615 +       }
16616 +
16617 +       return -EINVAL;
16618 +}
16619 +
16620 +static void toi_user_rcv_skb(struct sk_buff *skb)
16621 +{
16622 +       int err;
16623 +       struct nlmsghdr *nlh;
16624 +       struct user_helper_data *uhd = uhd_list;
16625 +
16626 +       while (uhd && uhd->netlink_id != skb->sk->sk_protocol)
16627 +               uhd = uhd->next;
16628 +
16629 +       if (!uhd)
16630 +               return;
16631 +
16632 +       while (skb->len >= NLMSG_SPACE(0)) {
16633 +               u32 rlen;
16634 +
16635 +               nlh = (struct nlmsghdr *) skb->data;
16636 +               if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
16637 +                       return;
16638 +
16639 +               rlen = NLMSG_ALIGN(nlh->nlmsg_len);
16640 +               if (rlen > skb->len)
16641 +                       rlen = skb->len;
16642 +
16643 +               err = toi_nl_gen_rcv_msg(uhd, skb, nlh);
16644 +               if (err)
16645 +                       netlink_ack(skb, nlh, err);
16646 +               else if (nlh->nlmsg_flags & NLM_F_ACK)
16647 +                       netlink_ack(skb, nlh, 0);
16648 +               skb_pull(skb, rlen);
16649 +       }
16650 +}
16651 +
16652 +static int netlink_prepare(struct user_helper_data *uhd)
16653 +{
16654 +       uhd->next = uhd_list;
16655 +       uhd_list = uhd;
16656 +
16657 +       uhd->sock_seq = 0x42c0ffee;
16658 +       uhd->nl = netlink_kernel_create(&init_net, uhd->netlink_id, 0,
16659 +                       toi_user_rcv_skb, NULL, THIS_MODULE);
16660 +       if (!uhd->nl) {
16661 +               printk(KERN_INFO "Failed to allocate netlink socket for %s.\n",
16662 +                               uhd->name);
16663 +               return -ENOMEM;
16664 +       }
16665 +
16666 +       toi_fill_skb_pool(uhd);
16667 +
16668 +       return 0;
16669 +}
16670 +
16671 +void toi_netlink_close(struct user_helper_data *uhd)
16672 +{
16673 +       struct task_struct *t;
16674 +
16675 +       toi_read_lock_tasklist();
16676 +       t = find_task_by_pid_ns(uhd->pid, &init_pid_ns);
16677 +       if (t)
16678 +               t->flags &= ~PF_NOFREEZE;
16679 +       toi_read_unlock_tasklist();
16680 +
16681 +       toi_send_netlink_message(uhd, NETLINK_MSG_CLEANUP, NULL, 0);
16682 +}
16683 +EXPORT_SYMBOL_GPL(toi_netlink_close);
16684 +
16685 +int toi_netlink_setup(struct user_helper_data *uhd)
16686 +{
16687 +       /* In case userui didn't cleanup properly on us */
16688 +       toi_netlink_close_complete(uhd);
16689 +
16690 +       if (netlink_prepare(uhd) < 0) {
16691 +               printk(KERN_INFO "Netlink prepare failed.\n");
16692 +               return 1;
16693 +       }
16694 +
16695 +       if (toi_launch_userspace_program(uhd->program, uhd->netlink_id,
16696 +                               UMH_WAIT_EXEC, uhd->debug) < 0) {
16697 +               printk(KERN_INFO "Launch userspace program failed.\n");
16698 +               toi_netlink_close_complete(uhd);
16699 +               return 1;
16700 +       }
16701 +
16702 +       /* Wait 2 seconds for the userspace process to make contact */
16703 +       wait_for_completion_timeout(&uhd->wait_for_process, 2*HZ);
16704 +
16705 +       if (uhd->pid == -1) {
16706 +               printk(KERN_INFO "%s: Failed to contact userspace process.\n",
16707 +                               uhd->name);
16708 +               toi_netlink_close_complete(uhd);
16709 +               return 1;
16710 +       }
16711 +
16712 +       return 0;
16713 +}
16714 +EXPORT_SYMBOL_GPL(toi_netlink_setup);
16715 diff --git a/kernel/power/tuxonice_netlink.h b/kernel/power/tuxonice_netlink.h
16716 new file mode 100644
16717 index 0000000..b8ef06e
16718 --- /dev/null
16719 +++ b/kernel/power/tuxonice_netlink.h
16720 @@ -0,0 +1,62 @@
16721 +/*
16722 + * kernel/power/tuxonice_netlink.h
16723 + *
16724 + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net)
16725 + *
16726 + * This file is released under the GPLv2.
16727 + *
16728 + * Declarations for functions for communicating with a userspace helper
16729 + * via netlink.
16730 + */
16731 +
16732 +#include <linux/netlink.h>
16733 +#include <net/sock.h>
16734 +
16735 +#define NETLINK_MSG_BASE 0x10
16736 +
16737 +#define NETLINK_MSG_READY 0x10
16738 +#define        NETLINK_MSG_NOFREEZE_ME 0x16
16739 +#define NETLINK_MSG_GET_DEBUGGING 0x19
16740 +#define NETLINK_MSG_CLEANUP 0x24
16741 +#define NETLINK_MSG_NOFREEZE_ACK 0x27
16742 +#define NETLINK_MSG_IS_DEBUGGING 0x28
16743 +
16744 +struct user_helper_data {
16745 +       int (*rcv_msg) (struct sk_buff *skb, struct nlmsghdr *nlh);
16746 +       void (*not_ready) (void);
16747 +       struct sock *nl;
16748 +       u32 sock_seq;
16749 +       pid_t pid;
16750 +       char *comm;
16751 +       char program[256];
16752 +       int pool_level;
16753 +       int pool_limit;
16754 +       struct sk_buff *emerg_skbs;
16755 +       int skb_size;
16756 +       int netlink_id;
16757 +       char *name;
16758 +       struct user_helper_data *next;
16759 +       struct completion wait_for_process;
16760 +       u32 interface_version;
16761 +       int must_init;
16762 +       int debug;
16763 +};
16764 +
16765 +#ifdef CONFIG_NET
16766 +int toi_netlink_setup(struct user_helper_data *uhd);
16767 +void toi_netlink_close(struct user_helper_data *uhd);
16768 +void toi_send_netlink_message(struct user_helper_data *uhd,
16769 +               int type, void *params, size_t len);
16770 +void toi_netlink_close_complete(struct user_helper_data *uhd);
16771 +#else
16772 +static inline int toi_netlink_setup(struct user_helper_data *uhd)
16773 +{
16774 +       return 0;
16775 +}
16776 +
16777 +static inline void toi_netlink_close(struct user_helper_data *uhd) { };
16778 +static inline void toi_send_netlink_message(struct user_helper_data *uhd,
16779 +               int type, void *params, size_t len) { };
16780 +static inline void toi_netlink_close_complete(struct user_helper_data *uhd)
16781 +       { };
16782 +#endif
16783 diff --git a/kernel/power/tuxonice_pagedir.c b/kernel/power/tuxonice_pagedir.c
16784 new file mode 100644
16785 index 0000000..091c9e3
16786 --- /dev/null
16787 +++ b/kernel/power/tuxonice_pagedir.c
16788 @@ -0,0 +1,339 @@
16789 +/*
16790 + * kernel/power/tuxonice_pagedir.c
16791 + *
16792 + * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
16793 + * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz>
16794 + * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr>
16795 + * Copyright (C) 2006-2010 Nigel Cunningham (nigel at tuxonice net)
16796 + *
16797 + * This file is released under the GPLv2.
16798 + *
16799 + * Routines for handling pagesets.
16800 + * Note that pbes aren't actually stored as such. They're stored as
16801 + * bitmaps and extents.
16802 + */
16803 +
16804 +#include <linux/suspend.h>
16805 +#include <linux/highmem.h>
16806 +#include <linux/bootmem.h>
16807 +#include <linux/hardirq.h>
16808 +#include <linux/sched.h>
16809 +#include <linux/cpu.h>
16810 +#include <asm/tlbflush.h>
16811 +
16812 +#include "tuxonice_pageflags.h"
16813 +#include "tuxonice_ui.h"
16814 +#include "tuxonice_pagedir.h"
16815 +#include "tuxonice_prepare_image.h"
16816 +#include "tuxonice.h"
16817 +#include "tuxonice_builtin.h"
16818 +#include "tuxonice_alloc.h"
16819 +
16820 +static int ptoi_pfn;
16821 +static struct pbe *this_low_pbe;
16822 +static struct pbe **last_low_pbe_ptr;
16823 +static struct memory_bitmap dup_map1, dup_map2;
16824 +
16825 +void toi_reset_alt_image_pageset2_pfn(void)
16826 +{
16827 +       memory_bm_position_reset(pageset2_map);
16828 +}
16829 +
16830 +static struct page *first_conflicting_page;
16831 +
16832 +/*
16833 + * free_conflicting_pages
16834 + */
16835 +
16836 +static void free_conflicting_pages(void)
16837 +{
16838 +       while (first_conflicting_page) {
16839 +               struct page *next =
16840 +                       *((struct page **) kmap(first_conflicting_page));
16841 +               kunmap(first_conflicting_page);
16842 +               toi__free_page(29, first_conflicting_page);
16843 +               first_conflicting_page = next;
16844 +       }
16845 +}
16846 +
16847 +/* __toi_get_nonconflicting_page
16848 + *
16849 + * Description: Gets order zero pages that won't be overwritten
16850 + *             while copying the original pages.
16851 + */
16852 +
16853 +struct page *___toi_get_nonconflicting_page(int can_be_highmem)
16854 +{
16855 +       struct page *page;
16856 +       gfp_t flags = TOI_ATOMIC_GFP;
16857 +       if (can_be_highmem)
16858 +               flags |= __GFP_HIGHMEM;
16859 +
16860 +
16861 +       if (test_toi_state(TOI_LOADING_ALT_IMAGE) &&
16862 +                       pageset2_map &&
16863 +                       (ptoi_pfn != BM_END_OF_MAP)) {
16864 +               do {
16865 +                       ptoi_pfn = memory_bm_next_pfn(pageset2_map);
16866 +                       if (ptoi_pfn != BM_END_OF_MAP) {
16867 +                               page = pfn_to_page(ptoi_pfn);
16868 +                               if (!PagePageset1(page) &&
16869 +                                   (can_be_highmem || !PageHighMem(page)))
16870 +                                       return page;
16871 +                       }
16872 +               } while (ptoi_pfn != BM_END_OF_MAP);
16873 +       }
16874 +
16875 +       do {
16876 +               page = toi_alloc_page(29, flags);
16877 +               if (!page) {
16878 +                       printk(KERN_INFO "Failed to get nonconflicting "
16879 +                                       "page.\n");
16880 +                       return NULL;
16881 +               }
16882 +               if (PagePageset1(page)) {
16883 +                       struct page **next = (struct page **) kmap(page);
16884 +                       *next = first_conflicting_page;
16885 +                       first_conflicting_page = page;
16886 +                       kunmap(page);
16887 +               }
16888 +       } while (PagePageset1(page));
16889 +
16890 +       return page;
16891 +}
16892 +
16893 +unsigned long __toi_get_nonconflicting_page(void)
16894 +{
16895 +       struct page *page = ___toi_get_nonconflicting_page(0);
16896 +       return page ? (unsigned long) page_address(page) : 0;
16897 +}
16898 +
16899 +static struct pbe *get_next_pbe(struct page **page_ptr, struct pbe *this_pbe,
16900 +               int highmem)
16901 +{
16902 +       if (((((unsigned long) this_pbe) & (PAGE_SIZE - 1))
16903 +                    + 2 * sizeof(struct pbe)) > PAGE_SIZE) {
16904 +               struct page *new_page =
16905 +                       ___toi_get_nonconflicting_page(highmem);
16906 +               if (!new_page)
16907 +                       return ERR_PTR(-ENOMEM);
16908 +               this_pbe = (struct pbe *) kmap(new_page);
16909 +               memset(this_pbe, 0, PAGE_SIZE);
16910 +               *page_ptr = new_page;
16911 +       } else
16912 +               this_pbe++;
16913 +
16914 +       return this_pbe;
16915 +}
16916 +
16917 +/**
16918 + * get_pageset1_load_addresses - generate pbes for conflicting pages
16919 + *
16920 + * We check here that pagedir & pages it points to won't collide
16921 + * with pages where we're going to restore from the loaded pages
16922 + * later.
16923 + *
16924 + * Returns:
16925 + *     Zero on success, one if couldn't find enough pages (shouldn't
16926 + *     happen).
16927 + **/
16928 +int toi_get_pageset1_load_addresses(void)
16929 +{
16930 +       int pfn, highallocd = 0, lowallocd = 0;
16931 +       int low_needed = pagedir1.size - get_highmem_size(pagedir1);
16932 +       int high_needed = get_highmem_size(pagedir1);
16933 +       int low_pages_for_highmem = 0;
16934 +       gfp_t flags = GFP_ATOMIC | __GFP_NOWARN | __GFP_HIGHMEM;
16935 +       struct page *page, *high_pbe_page = NULL, *last_high_pbe_page = NULL,
16936 +                   *low_pbe_page;
16937 +       struct pbe **last_high_pbe_ptr = &restore_highmem_pblist,
16938 +                  *this_high_pbe = NULL;
16939 +       int orig_low_pfn, orig_high_pfn;
16940 +       int high_pbes_done = 0, low_pbes_done = 0;
16941 +       int low_direct = 0, high_direct = 0, result = 0, i;
16942 +
16943 +       /*
16944 +        * We need to duplicate pageset1's map because memory_bm_next_pfn's
16945 +        * state gets stomped on by the PagePageset1() test in setup_pbes.
16946 +        */
16947 +       memory_bm_create(&dup_map1, GFP_ATOMIC, 0);
16948 +       memory_bm_dup(pageset1_map, &dup_map1);
16949 +
16950 +       memory_bm_create(&dup_map2, GFP_ATOMIC, 0);
16951 +       memory_bm_dup(pageset1_map, &dup_map2);
16952 +
16953 +       memory_bm_position_reset(pageset1_map);
16954 +       memory_bm_position_reset(&dup_map1);
16955 +       memory_bm_position_reset(&dup_map2);
16956 +
16957 +       last_low_pbe_ptr = &restore_pblist;
16958 +
16959 +       /* First, allocate pages for the start of our pbe lists. */
16960 +       if (high_needed) {
16961 +               high_pbe_page = ___toi_get_nonconflicting_page(1);
16962 +               if (!high_pbe_page) {
16963 +                       result = -ENOMEM;
16964 +                       goto out;
16965 +               }
16966 +               this_high_pbe = (struct pbe *) kmap(high_pbe_page);
16967 +               memset(this_high_pbe, 0, PAGE_SIZE);
16968 +       }
16969 +
16970 +       low_pbe_page = ___toi_get_nonconflicting_page(0);
16971 +       if (!low_pbe_page) {
16972 +               result = -ENOMEM;
16973 +               goto out;
16974 +       }
16975 +       this_low_pbe = (struct pbe *) page_address(low_pbe_page);
16976 +
16977 +       /*
16978 +        * Next, allocate the number of pages we need.
16979 +        */
16980 +
16981 +       i = low_needed + high_needed;
16982 +
16983 +       do {
16984 +               int is_high;
16985 +
16986 +               if (i == low_needed)
16987 +                       flags &= ~__GFP_HIGHMEM;
16988 +
16989 +               page = toi_alloc_page(30, flags);
16990 +               BUG_ON(!page);
16991 +
16992 +               SetPagePageset1Copy(page);
16993 +               is_high = PageHighMem(page);
16994 +
16995 +               if (PagePageset1(page)) {
16996 +                       if (is_high)
16997 +                               high_direct++;
16998 +                       else
16999 +                               low_direct++;
17000 +               } else {
17001 +                       if (is_high)
17002 +                               highallocd++;
17003 +                       else
17004 +                               lowallocd++;
17005 +               }
17006 +       } while (--i);
17007 +
17008 +       high_needed -= high_direct;
17009 +       low_needed -= low_direct;
17010 +
17011 +       /*
17012 +        * Do we need to use some lowmem pages for the copies of highmem
17013 +        * pages?
17014 +        */
17015 +       if (high_needed > highallocd) {
17016 +               low_pages_for_highmem = high_needed - highallocd;
17017 +               high_needed -= low_pages_for_highmem;
17018 +               low_needed += low_pages_for_highmem;
17019 +       }
17020 +
17021 +       /*
17022 +        * Now generate our pbes (which will be used for the atomic restore),
17023 +        * and free unneeded pages.
17024 +        */
17025 +       memory_bm_position_reset(pageset1_copy_map);
17026 +       for (pfn = memory_bm_next_pfn(pageset1_copy_map); pfn != BM_END_OF_MAP;
17027 +                       pfn = memory_bm_next_pfn(pageset1_copy_map)) {
17028 +               int is_high;
17029 +               page = pfn_to_page(pfn);
17030 +               is_high = PageHighMem(page);
17031 +
17032 +               if (PagePageset1(page))
17033 +                       continue;
17034 +
17035 +               /* Nope. We're going to use this page. Add a pbe. */
17036 +               if (is_high || low_pages_for_highmem) {
17037 +                       struct page *orig_page;
17038 +                       high_pbes_done++;
17039 +                       if (!is_high)
17040 +                               low_pages_for_highmem--;
17041 +                       do {
17042 +                               orig_high_pfn = memory_bm_next_pfn(&dup_map1);
17043 +                               BUG_ON(orig_high_pfn == BM_END_OF_MAP);
17044 +                               orig_page = pfn_to_page(orig_high_pfn);
17045 +                       } while (!PageHighMem(orig_page) ||
17046 +                                       PagePageset1Copy(orig_page));
17047 +
17048 +                       this_high_pbe->orig_address = orig_page;
17049 +                       this_high_pbe->address = page;
17050 +                       this_high_pbe->next = NULL;
17051 +                       if (last_high_pbe_page != high_pbe_page) {
17052 +                               *last_high_pbe_ptr =
17053 +                                       (struct pbe *) high_pbe_page;
17054 +                               if (!last_high_pbe_page)
17055 +                                       last_high_pbe_page = high_pbe_page;
17056 +                       } else
17057 +                               *last_high_pbe_ptr = this_high_pbe;
17058 +                       last_high_pbe_ptr = &this_high_pbe->next;
17059 +                       if (last_high_pbe_page != high_pbe_page) {
17060 +                               kunmap(last_high_pbe_page);
17061 +                               last_high_pbe_page = high_pbe_page;
17062 +                       }
17063 +                       this_high_pbe = get_next_pbe(&high_pbe_page,
17064 +                                       this_high_pbe, 1);
17065 +                       if (IS_ERR(this_high_pbe)) {
17066 +                               printk(KERN_INFO
17067 +                                               "This high pbe is an error.\n");
17068 +                               return -ENOMEM;
17069 +                       }
17070 +               } else {
17071 +                       struct page *orig_page;
17072 +                       low_pbes_done++;
17073 +                       do {
17074 +                               orig_low_pfn = memory_bm_next_pfn(&dup_map2);
17075 +                               BUG_ON(orig_low_pfn == BM_END_OF_MAP);
17076 +                               orig_page = pfn_to_page(orig_low_pfn);
17077 +                       } while (PageHighMem(orig_page) ||
17078 +                                       PagePageset1Copy(orig_page));
17079 +
17080 +                       this_low_pbe->orig_address = page_address(orig_page);
17081 +                       this_low_pbe->address = page_address(page);
17082 +                       this_low_pbe->next = NULL;
17083 +                       *last_low_pbe_ptr = this_low_pbe;
17084 +                       last_low_pbe_ptr = &this_low_pbe->next;
17085 +                       this_low_pbe = get_next_pbe(&low_pbe_page,
17086 +                                       this_low_pbe, 0);
17087 +                       if (IS_ERR(this_low_pbe)) {
17088 +                               printk(KERN_INFO "this_low_pbe is an error.\n");
17089 +                               return -ENOMEM;
17090 +                       }
17091 +               }
17092 +       }
17093 +
17094 +       if (high_pbe_page)
17095 +               kunmap(high_pbe_page);
17096 +
17097 +       if (last_high_pbe_page != high_pbe_page) {
17098 +               if (last_high_pbe_page)
17099 +                       kunmap(last_high_pbe_page);
17100 +               toi__free_page(29, high_pbe_page);
17101 +       }
17102 +
17103 +       free_conflicting_pages();
17104 +
17105 +out:
17106 +       memory_bm_free(&dup_map1, 0);
17107 +       memory_bm_free(&dup_map2, 0);
17108 +
17109 +       return result;
17110 +}
17111 +
17112 +int add_boot_kernel_data_pbe(void)
17113 +{
17114 +       this_low_pbe->address = (char *) __toi_get_nonconflicting_page();
17115 +       if (!this_low_pbe->address) {
17116 +               printk(KERN_INFO "Failed to get bkd atomic restore buffer.");
17117 +               return -ENOMEM;
17118 +       }
17119 +
17120 +       toi_bkd.size = sizeof(toi_bkd);
17121 +       memcpy(this_low_pbe->address, &toi_bkd, sizeof(toi_bkd));
17122 +
17123 +       *last_low_pbe_ptr = this_low_pbe;
17124 +       this_low_pbe->orig_address = (char *) boot_kernel_data_buffer;
17125 +       this_low_pbe->next = NULL;
17126 +       return 0;
17127 +}
17128 diff --git a/kernel/power/tuxonice_pagedir.h b/kernel/power/tuxonice_pagedir.h
17129 new file mode 100644
17130 index 0000000..d08e4b1
17131 --- /dev/null
17132 +++ b/kernel/power/tuxonice_pagedir.h
17133 @@ -0,0 +1,50 @@
17134 +/*
17135 + * kernel/power/tuxonice_pagedir.h
17136 + *
17137 + * Copyright (C) 2006-2010 Nigel Cunningham (nigel at tuxonice net)
17138 + *
17139 + * This file is released under the GPLv2.
17140 + *
17141 + * Declarations for routines for handling pagesets.
17142 + */
17143 +
17144 +#ifndef KERNEL_POWER_PAGEDIR_H
17145 +#define KERNEL_POWER_PAGEDIR_H
17146 +
17147 +/* Pagedir
17148 + *
17149 + * Contains the metadata for a set of pages saved in the image.
17150 + */
17151 +
17152 +struct pagedir {
17153 +       int id;
17154 +       unsigned long size;
17155 +#ifdef CONFIG_HIGHMEM
17156 +       unsigned long size_high;
17157 +#endif
17158 +};
17159 +
17160 +#ifdef CONFIG_HIGHMEM
17161 +#define get_highmem_size(pagedir) (pagedir.size_high)
17162 +#define set_highmem_size(pagedir, sz) do { pagedir.size_high = sz; } while (0)
17163 +#define inc_highmem_size(pagedir) do { pagedir.size_high++; } while (0)
17164 +#define get_lowmem_size(pagedir) (pagedir.size - pagedir.size_high)
17165 +#else
17166 +#define get_highmem_size(pagedir) (0)
17167 +#define set_highmem_size(pagedir, sz) do { } while (0)
17168 +#define inc_highmem_size(pagedir) do { } while (0)
17169 +#define get_lowmem_size(pagedir) (pagedir.size)
17170 +#endif
17171 +
17172 +extern struct pagedir pagedir1, pagedir2;
17173 +
17174 +extern void toi_copy_pageset1(void);
17175 +
17176 +extern int toi_get_pageset1_load_addresses(void);
17177 +
17178 +extern unsigned long __toi_get_nonconflicting_page(void);
17179 +struct page *___toi_get_nonconflicting_page(int can_be_highmem);
17180 +
17181 +extern void toi_reset_alt_image_pageset2_pfn(void);
17182 +extern int add_boot_kernel_data_pbe(void);
17183 +#endif
17184 diff --git a/kernel/power/tuxonice_pageflags.c b/kernel/power/tuxonice_pageflags.c
17185 new file mode 100644
17186 index 0000000..e9ec5b5
17187 --- /dev/null
17188 +++ b/kernel/power/tuxonice_pageflags.c
17189 @@ -0,0 +1,28 @@
17190 +/*
17191 + * kernel/power/tuxonice_pageflags.c
17192 + *
17193 + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net)
17194 + *
17195 + * This file is released under the GPLv2.
17196 + *
17197 + * Routines for serialising and relocating pageflags in which we
17198 + * store our image metadata.
17199 + */
17200 +
17201 +#include <linux/list.h>
17202 +#include "tuxonice_pageflags.h"
17203 +#include "power.h"
17204 +
17205 +int toi_pageflags_space_needed(void)
17206 +{
17207 +       int total = 0;
17208 +       struct bm_block *bb;
17209 +
17210 +       total = sizeof(unsigned int);
17211 +
17212 +       list_for_each_entry(bb, &pageset1_map->blocks, hook)
17213 +               total += 2 * sizeof(unsigned long) + PAGE_SIZE;
17214 +
17215 +       return total;
17216 +}
17217 +EXPORT_SYMBOL_GPL(toi_pageflags_space_needed);
17218 diff --git a/kernel/power/tuxonice_pageflags.h b/kernel/power/tuxonice_pageflags.h
17219 new file mode 100644
17220 index 0000000..d5aa7b1
17221 --- /dev/null
17222 +++ b/kernel/power/tuxonice_pageflags.h
17223 @@ -0,0 +1,72 @@
17224 +/*
17225 + * kernel/power/tuxonice_pageflags.h
17226 + *
17227 + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net)
17228 + *
17229 + * This file is released under the GPLv2.
17230 + */
17231 +
17232 +#ifndef KERNEL_POWER_TUXONICE_PAGEFLAGS_H
17233 +#define KERNEL_POWER_TUXONICE_PAGEFLAGS_H
17234 +
17235 +extern struct memory_bitmap *pageset1_map;
17236 +extern struct memory_bitmap *pageset1_copy_map;
17237 +extern struct memory_bitmap *pageset2_map;
17238 +extern struct memory_bitmap *page_resave_map;
17239 +extern struct memory_bitmap *io_map;
17240 +extern struct memory_bitmap *nosave_map;
17241 +extern struct memory_bitmap *free_map;
17242 +
17243 +#define PagePageset1(page) \
17244 +       (memory_bm_test_bit(pageset1_map, page_to_pfn(page)))
17245 +#define SetPagePageset1(page) \
17246 +       (memory_bm_set_bit(pageset1_map, page_to_pfn(page)))
17247 +#define ClearPagePageset1(page) \
17248 +       (memory_bm_clear_bit(pageset1_map, page_to_pfn(page)))
17249 +
17250 +#define PagePageset1Copy(page) \
17251 +       (memory_bm_test_bit(pageset1_copy_map, page_to_pfn(page)))
17252 +#define SetPagePageset1Copy(page) \
17253 +       (memory_bm_set_bit(pageset1_copy_map, page_to_pfn(page)))
17254 +#define ClearPagePageset1Copy(page) \
17255 +       (memory_bm_clear_bit(pageset1_copy_map, page_to_pfn(page)))
17256 +
17257 +#define PagePageset2(page) \
17258 +       (memory_bm_test_bit(pageset2_map, page_to_pfn(page)))
17259 +#define SetPagePageset2(page) \
17260 +       (memory_bm_set_bit(pageset2_map, page_to_pfn(page)))
17261 +#define ClearPagePageset2(page) \
17262 +       (memory_bm_clear_bit(pageset2_map, page_to_pfn(page)))
17263 +
17264 +#define PageWasRW(page) \
17265 +       (memory_bm_test_bit(pageset2_map, page_to_pfn(page)))
17266 +#define SetPageWasRW(page) \
17267 +       (memory_bm_set_bit(pageset2_map, page_to_pfn(page)))
17268 +#define ClearPageWasRW(page) \
17269 +       (memory_bm_clear_bit(pageset2_map, page_to_pfn(page)))
17270 +
17271 +#define PageResave(page) (page_resave_map ? \
17272 +       memory_bm_test_bit(page_resave_map, page_to_pfn(page)) : 0)
17273 +#define SetPageResave(page) \
17274 +       (memory_bm_set_bit(page_resave_map, page_to_pfn(page)))
17275 +#define ClearPageResave(page) \
17276 +       (memory_bm_clear_bit(page_resave_map, page_to_pfn(page)))
17277 +
17278 +#define PageNosave(page) (nosave_map ? \
17279 +               memory_bm_test_bit(nosave_map, page_to_pfn(page)) : 0)
17280 +#define SetPageNosave(page) \
17281 +       (memory_bm_set_bit(nosave_map, page_to_pfn(page)))
17282 +#define ClearPageNosave(page) \
17283 +       (memory_bm_clear_bit(nosave_map, page_to_pfn(page)))
17284 +
17285 +#define PageNosaveFree(page) (free_map ? \
17286 +               memory_bm_test_bit(free_map, page_to_pfn(page)) : 0)
17287 +#define SetPageNosaveFree(page) \
17288 +       (memory_bm_set_bit(free_map, page_to_pfn(page)))
17289 +#define ClearPageNosaveFree(page) \
17290 +       (memory_bm_clear_bit(free_map, page_to_pfn(page)))
17291 +
17292 +extern void save_pageflags(struct memory_bitmap *pagemap);
17293 +extern int load_pageflags(struct memory_bitmap *pagemap);
17294 +extern int toi_pageflags_space_needed(void);
17295 +#endif
17296 diff --git a/kernel/power/tuxonice_power_off.c b/kernel/power/tuxonice_power_off.c
17297 new file mode 100644
17298 index 0000000..07e39c0
17299 --- /dev/null
17300 +++ b/kernel/power/tuxonice_power_off.c
17301 @@ -0,0 +1,285 @@
17302 +/*
17303 + * kernel/power/tuxonice_power_off.c
17304 + *
17305 + * Copyright (C) 2006-2010 Nigel Cunningham (nigel at tuxonice net)
17306 + *
17307 + * This file is released under the GPLv2.
17308 + *
17309 + * Support for powering down.
17310 + */
17311 +
17312 +#include <linux/device.h>
17313 +#include <linux/suspend.h>
17314 +#include <linux/mm.h>
17315 +#include <linux/pm.h>
17316 +#include <linux/reboot.h>
17317 +#include <linux/cpu.h>
17318 +#include <linux/console.h>
17319 +#include <linux/fs.h>
17320 +#include "tuxonice.h"
17321 +#include "tuxonice_ui.h"
17322 +#include "tuxonice_power_off.h"
17323 +#include "tuxonice_sysfs.h"
17324 +#include "tuxonice_modules.h"
17325 +#include "tuxonice_io.h"
17326 +
17327 +unsigned long toi_poweroff_method; /* 0 - Kernel power off */
17328 +EXPORT_SYMBOL_GPL(toi_poweroff_method);
17329 +
17330 +static int wake_delay;
17331 +static char lid_state_file[256], wake_alarm_dir[256];
17332 +static struct file *lid_file, *alarm_file, *epoch_file;
17333 +static int post_wake_state = -1;
17334 +
17335 +static int did_suspend_to_both;
17336 +
17337 +/*
17338 + * __toi_power_down
17339 + * Functionality   : Powers down or reboots the computer once the image
17340 + *                   has been written to disk.
17341 + * Key Assumptions : Able to reboot/power down via code called or that
17342 + *                   the warning emitted if the calls fail will be visible
17343 + *                   to the user (ie printk resumes devices).
17344 + */
17345 +
17346 +static void __toi_power_down(int method)
17347 +{
17348 +       int error;
17349 +
17350 +       toi_cond_pause(1, test_action_state(TOI_REBOOT) ? "Ready to reboot." :
17351 +                       "Powering down.");
17352 +
17353 +       if (test_result_state(TOI_ABORTED))
17354 +               goto out;
17355 +
17356 +       if (test_action_state(TOI_REBOOT))
17357 +               kernel_restart(NULL);
17358 +
17359 +       switch (method) {
17360 +       case 0:
17361 +               break;
17362 +       case 3:
17363 +               /*
17364 +                * Re-read the overwritten part of pageset2 to make post-resume
17365 +                * faster.
17366 +                */
17367 +               if (read_pageset2(1))
17368 +                       panic("Attempt to reload pagedir 2 failed. "
17369 +                                       "Try rebooting.");
17370 +
17371 +               pm_prepare_console();
17372 +
17373 +               error = pm_notifier_call_chain(PM_SUSPEND_PREPARE);
17374 +               if (!error) {
17375 +                       error = suspend_devices_and_enter(PM_SUSPEND_MEM);
17376 +                       if (!error)
17377 +                               did_suspend_to_both = 1;
17378 +               }
17379 +               pm_notifier_call_chain(PM_POST_SUSPEND);
17380 +               pm_restore_console();
17381 +
17382 +               /* Success - we're now post-resume-from-ram */
17383 +               if (did_suspend_to_both)
17384 +                       return;
17385 +
17386 +               /* Failed to suspend to ram - do normal power off */
17387 +               break;
17388 +       case 4:
17389 +               /*
17390 +                * If succeeds, doesn't return. If fails, do a simple
17391 +                * powerdown.
17392 +                */
17393 +               hibernation_platform_enter();
17394 +               break;
17395 +       case 5:
17396 +               /* Historic entry only now */
17397 +               break;
17398 +       }
17399 +
17400 +       if (method && method != 5)
17401 +               toi_cond_pause(1,
17402 +                       "Falling back to alternate power off method.");
17403 +
17404 +       if (test_result_state(TOI_ABORTED))
17405 +               goto out;
17406 +
17407 +       kernel_power_off();
17408 +       kernel_halt();
17409 +       toi_cond_pause(1, "Powerdown failed.");
17410 +       while (1)
17411 +               cpu_relax();
17412 +
17413 +out:
17414 +       if (read_pageset2(1))
17415 +               panic("Attempt to reload pagedir 2 failed. Try rebooting.");
17416 +       return;
17417 +}
17418 +
17419 +#define CLOSE_FILE(file) \
17420 +       if (file) { \
17421 +               filp_close(file, NULL); file = NULL; \
17422 +       }
17423 +
17424 +static void powerdown_cleanup(int toi_or_resume)
17425 +{
17426 +       if (!toi_or_resume)
17427 +               return;
17428 +
17429 +       CLOSE_FILE(lid_file);
17430 +       CLOSE_FILE(alarm_file);
17431 +       CLOSE_FILE(epoch_file);
17432 +}
17433 +
17434 +static void open_file(char *format, char *arg, struct file **var, int mode,
17435 +               char *desc)
17436 +{
17437 +       char buf[256];
17438 +
17439 +       if (strlen(arg)) {
17440 +               sprintf(buf, format, arg);
17441 +               *var = filp_open(buf, mode, 0);
17442 +               if (IS_ERR(*var) || !*var) {
17443 +                       printk(KERN_INFO "Failed to open %s file '%s' (%p).\n",
17444 +                               desc, buf, *var);
17445 +                       *var = NULL;
17446 +               }
17447 +       }
17448 +}
17449 +
17450 +static int powerdown_init(int toi_or_resume)
17451 +{
17452 +       if (!toi_or_resume)
17453 +               return 0;
17454 +
17455 +       did_suspend_to_both = 0;
17456 +
17457 +       open_file("/proc/acpi/button/%s/state", lid_state_file, &lid_file,
17458 +                       O_RDONLY, "lid");
17459 +
17460 +       if (strlen(wake_alarm_dir)) {
17461 +               open_file("/sys/class/rtc/%s/wakealarm", wake_alarm_dir,
17462 +                               &alarm_file, O_WRONLY, "alarm");
17463 +
17464 +               open_file("/sys/class/rtc/%s/since_epoch", wake_alarm_dir,
17465 +                               &epoch_file, O_RDONLY, "epoch");
17466 +       }
17467 +
17468 +       return 0;
17469 +}
17470 +
17471 +static int lid_closed(void)
17472 +{
17473 +       char array[25];
17474 +       ssize_t size;
17475 +       loff_t pos = 0;
17476 +
17477 +       if (!lid_file)
17478 +               return 0;
17479 +
17480 +       size = vfs_read(lid_file, (char __user *) array, 25, &pos);
17481 +       if ((int) size < 1) {
17482 +               printk(KERN_INFO "Failed to read lid state file (%d).\n",
17483 +                       (int) size);
17484 +               return 0;
17485 +       }
17486 +
17487 +       if (!strcmp(array, "state:      closed\n"))
17488 +               return 1;
17489 +
17490 +       return 0;
17491 +}
17492 +
17493 +static void write_alarm_file(int value)
17494 +{
17495 +       ssize_t size;
17496 +       char buf[40];
17497 +       loff_t pos = 0;
17498 +
17499 +       if (!alarm_file)
17500 +               return;
17501 +
17502 +       sprintf(buf, "%d\n", value);
17503 +
17504 +       size = vfs_write(alarm_file, (char __user *)buf, strlen(buf), &pos);
17505 +
17506 +       if (size < 0)
17507 +               printk(KERN_INFO "Error %d writing alarm value %s.\n",
17508 +                               (int) size, buf);
17509 +}
17510 +
17511 +/**
17512 + * toi_check_resleep: See whether to powerdown again after waking.
17513 + *
17514 + * After waking, check whether we should powerdown again in a (usually
17515 + * different) way. We only do this if the lid switch is still closed.
17516 + */
17517 +void toi_check_resleep(void)
17518 +{
17519 +       /* We only return if we suspended to ram and woke. */
17520 +       if (lid_closed() && post_wake_state >= 0)
17521 +               __toi_power_down(post_wake_state);
17522 +}
17523 +
17524 +void toi_power_down(void)
17525 +{
17526 +       if (alarm_file && wake_delay) {
17527 +               char array[25];
17528 +               loff_t pos = 0;
17529 +               size_t size = vfs_read(epoch_file, (char __user *) array, 25,
17530 +                               &pos);
17531 +
17532 +               if (((int) size) < 1)
17533 +                       printk(KERN_INFO "Failed to read epoch file (%d).\n",
17534 +                                       (int) size);
17535 +               else {
17536 +                       unsigned long since_epoch;
17537 +                       if (!strict_strtoul(array, 0, &since_epoch)) {
17538 +                               /* Clear any wakeup time. */
17539 +                               write_alarm_file(0);
17540 +
17541 +                               /* Set new wakeup time. */
17542 +                               write_alarm_file(since_epoch + wake_delay);
17543 +                       }
17544 +               }
17545 +       }
17546 +
17547 +       __toi_power_down(toi_poweroff_method);
17548 +
17549 +       toi_check_resleep();
17550 +}
17551 +EXPORT_SYMBOL_GPL(toi_power_down);
17552 +
17553 +static struct toi_sysfs_data sysfs_params[] = {
17554 +#if defined(CONFIG_ACPI)
17555 +       SYSFS_STRING("lid_file", SYSFS_RW, lid_state_file, 256, 0, NULL),
17556 +       SYSFS_INT("wake_delay", SYSFS_RW, &wake_delay, 0, INT_MAX, 0, NULL),
17557 +       SYSFS_STRING("wake_alarm_dir", SYSFS_RW, wake_alarm_dir, 256, 0, NULL),
17558 +       SYSFS_INT("post_wake_state", SYSFS_RW, &post_wake_state, -1, 5, 0,
17559 +                       NULL),
17560 +       SYSFS_UL("powerdown_method", SYSFS_RW, &toi_poweroff_method, 0, 5, 0),
17561 +       SYSFS_INT("did_suspend_to_both", SYSFS_READONLY, &did_suspend_to_both,
17562 +               0, 0, 0, NULL)
17563 +#endif
17564 +};
17565 +
17566 +static struct toi_module_ops powerdown_ops = {
17567 +       .type                           = MISC_HIDDEN_MODULE,
17568 +       .name                           = "poweroff",
17569 +       .initialise                     = powerdown_init,
17570 +       .cleanup                        = powerdown_cleanup,
17571 +       .directory                      = "[ROOT]",
17572 +       .module                         = THIS_MODULE,
17573 +       .sysfs_data                     = sysfs_params,
17574 +       .num_sysfs_entries              = sizeof(sysfs_params) /
17575 +               sizeof(struct toi_sysfs_data),
17576 +};
17577 +
17578 +int toi_poweroff_init(void)
17579 +{
17580 +       return toi_register_module(&powerdown_ops);
17581 +}
17582 +
17583 +void toi_poweroff_exit(void)
17584 +{
17585 +       toi_unregister_module(&powerdown_ops);
17586 +}
17587 diff --git a/kernel/power/tuxonice_power_off.h b/kernel/power/tuxonice_power_off.h
17588 new file mode 100644
17589 index 0000000..9aa0ea8
17590 --- /dev/null
17591 +++ b/kernel/power/tuxonice_power_off.h
17592 @@ -0,0 +1,24 @@
17593 +/*
17594 + * kernel/power/tuxonice_power_off.h
17595 + *
17596 + * Copyright (C) 2006-2010 Nigel Cunningham (nigel at tuxonice net)
17597 + *
17598 + * This file is released under the GPLv2.
17599 + *
17600 + * Support for the powering down.
17601 + */
17602 +
17603 +int toi_pm_state_finish(void);
17604 +void toi_power_down(void);
17605 +extern unsigned long toi_poweroff_method;
17606 +int toi_poweroff_init(void);
17607 +void toi_poweroff_exit(void);
17608 +void toi_check_resleep(void);
17609 +
17610 +extern int platform_begin(int platform_mode);
17611 +extern int platform_pre_snapshot(int platform_mode);
17612 +extern void platform_leave(int platform_mode);
17613 +extern void platform_end(int platform_mode);
17614 +extern void platform_finish(int platform_mode);
17615 +extern int platform_pre_restore(int platform_mode);
17616 +extern void platform_restore_cleanup(int platform_mode);
17617 diff --git a/kernel/power/tuxonice_prepare_image.c b/kernel/power/tuxonice_prepare_image.c
17618 new file mode 100644
17619 index 0000000..9f74df0
17620 --- /dev/null
17621 +++ b/kernel/power/tuxonice_prepare_image.c
17622 @@ -0,0 +1,1107 @@
17623 +/*
17624 + * kernel/power/tuxonice_prepare_image.c
17625 + *
17626 + * Copyright (C) 2003-2010 Nigel Cunningham (nigel at tuxonice net)
17627 + *
17628 + * This file is released under the GPLv2.
17629 + *
17630 + * We need to eat memory until we can:
17631 + * 1. Perform the save without changing anything (RAM_NEEDED < #pages)
17632 + * 2. Fit it all in available space (toiActiveAllocator->available_space() >=
17633 + *    main_storage_needed())
17634 + * 3. Reload the pagedir and pageset1 to places that don't collide with their
17635 + *    final destinations, not knowing to what extent the resumed kernel will
17636 + *    overlap with the one loaded at boot time. I think the resumed kernel
17637 + *    should overlap completely, but I don't want to rely on this as it is
17638 + *    an unproven assumption. We therefore assume there will be no overlap at
17639 + *    all (worse case).
17640 + * 4. Meet the user's requested limit (if any) on the size of the image.
17641 + *    The limit is in MB, so pages/256 (assuming 4K pages).
17642 + *
17643 + */
17644 +
17645 +#include <linux/highmem.h>
17646 +#include <linux/freezer.h>
17647 +#include <linux/hardirq.h>
17648 +#include <linux/mmzone.h>
17649 +#include <linux/console.h>
17650 +
17651 +#include "tuxonice_pageflags.h"
17652 +#include "tuxonice_modules.h"
17653 +#include "tuxonice_io.h"
17654 +#include "tuxonice_ui.h"
17655 +#include "tuxonice_prepare_image.h"
17656 +#include "tuxonice.h"
17657 +#include "tuxonice_extent.h"
17658 +#include "tuxonice_checksum.h"
17659 +#include "tuxonice_sysfs.h"
17660 +#include "tuxonice_alloc.h"
17661 +#include "tuxonice_atomic_copy.h"
17662 +#include "tuxonice_builtin.h"
17663 +
17664 +static unsigned long num_nosave, main_storage_allocated, storage_limit,
17665 +           header_storage_needed;
17666 +unsigned long extra_pd1_pages_allowance =
17667 +       CONFIG_TOI_DEFAULT_EXTRA_PAGES_ALLOWANCE;
17668 +long image_size_limit;
17669 +static int no_ps2_needed;
17670 +
17671 +struct attention_list {
17672 +       struct task_struct *task;
17673 +       struct attention_list *next;
17674 +};
17675 +
17676 +static struct attention_list *attention_list;
17677 +
17678 +#define PAGESET1 0
17679 +#define PAGESET2 1
17680 +
17681 +void free_attention_list(void)
17682 +{
17683 +       struct attention_list *last = NULL;
17684 +
17685 +       while (attention_list) {
17686 +               last = attention_list;
17687 +               attention_list = attention_list->next;
17688 +               toi_kfree(6, last, sizeof(*last));
17689 +       }
17690 +}
17691 +
17692 +static int build_attention_list(void)
17693 +{
17694 +       int i, task_count = 0;
17695 +       struct task_struct *p;
17696 +       struct attention_list *next;
17697 +
17698 +       /*
17699 +        * Count all userspace process (with task->mm) marked PF_NOFREEZE.
17700 +        */
17701 +       toi_read_lock_tasklist();
17702 +       for_each_process(p)
17703 +               if ((p->flags & PF_NOFREEZE) || p == current)
17704 +                       task_count++;
17705 +       toi_read_unlock_tasklist();
17706 +
17707 +       /*
17708 +        * Allocate attention list structs.
17709 +        */
17710 +       for (i = 0; i < task_count; i++) {
17711 +               struct attention_list *this =
17712 +                       toi_kzalloc(6, sizeof(struct attention_list),
17713 +                                       TOI_WAIT_GFP);
17714 +               if (!this) {
17715 +                       printk(KERN_INFO "Failed to allocate slab for "
17716 +                                       "attention list.\n");
17717 +                       free_attention_list();
17718 +                       return 1;
17719 +               }
17720 +               this->next = NULL;
17721 +               if (attention_list)
17722 +                       this->next = attention_list;
17723 +               attention_list = this;
17724 +       }
17725 +
17726 +       next = attention_list;
17727 +       toi_read_lock_tasklist();
17728 +       for_each_process(p)
17729 +               if ((p->flags & PF_NOFREEZE) || p == current) {
17730 +                       next->task = p;
17731 +                       next = next->next;
17732 +               }
17733 +       toi_read_unlock_tasklist();
17734 +       return 0;
17735 +}
17736 +
17737 +static void pageset2_full(void)
17738 +{
17739 +       struct zone *zone;
17740 +       struct page *page;
17741 +       unsigned long flags;
17742 +       int i;
17743 +
17744 +       for_each_populated_zone(zone) {
17745 +               spin_lock_irqsave(&zone->lru_lock, flags);
17746 +               for_each_lru(i) {
17747 +                       if (!zone_page_state(zone, NR_LRU_BASE + i))
17748 +                               continue;
17749 +
17750 +                       list_for_each_entry(page, &zone->lru[i].list, lru) {
17751 +                               struct address_space *mapping;
17752 +
17753 +                               mapping = page_mapping(page);
17754 +                               if (!mapping || !mapping->host ||
17755 +                                   !(mapping->host->i_flags & S_ATOMIC_COPY))
17756 +                                       SetPagePageset2(page);
17757 +                       }
17758 +               }
17759 +               spin_unlock_irqrestore(&zone->lru_lock, flags);
17760 +       }
17761 +}
17762 +
17763 +/*
17764 + * toi_mark_task_as_pageset
17765 + * Functionality   : Marks all the saveable pages belonging to a given process
17766 + *                  as belonging to a particular pageset.
17767 + */
17768 +
17769 +static void toi_mark_task_as_pageset(struct task_struct *t, int pageset2)
17770 +{
17771 +       struct vm_area_struct *vma;
17772 +       struct mm_struct *mm;
17773 +
17774 +       mm = t->active_mm;
17775 +
17776 +       if (!mm || !mm->mmap)
17777 +               return;
17778 +
17779 +       if (!irqs_disabled())
17780 +               down_read(&mm->mmap_sem);
17781 +
17782 +       for (vma = mm->mmap; vma; vma = vma->vm_next) {
17783 +               unsigned long posn;
17784 +
17785 +               if (!vma->vm_start ||
17786 +                   vma->vm_flags & (VM_IO | VM_RESERVED | VM_PFNMAP))
17787 +                       continue;
17788 +
17789 +               for (posn = vma->vm_start; posn < vma->vm_end;
17790 +                               posn += PAGE_SIZE) {
17791 +                       struct page *page = follow_page(vma, posn, 0);
17792 +                       struct address_space *mapping;
17793 +
17794 +                       if (!page || !pfn_valid(page_to_pfn(page)))
17795 +                               continue;
17796 +
17797 +                       mapping = page_mapping(page);
17798 +                       if (mapping && mapping->host &&
17799 +                           mapping->host->i_flags & S_ATOMIC_COPY)
17800 +                               continue;
17801 +
17802 +                       if (pageset2)
17803 +                               SetPagePageset2(page);
17804 +                       else {
17805 +                               ClearPagePageset2(page);
17806 +                               SetPagePageset1(page);
17807 +                       }
17808 +               }
17809 +       }
17810 +
17811 +       if (!irqs_disabled())
17812 +               up_read(&mm->mmap_sem);
17813 +}
17814 +
17815 +static void mark_tasks(int pageset)
17816 +{
17817 +       struct task_struct *p;
17818 +
17819 +       toi_read_lock_tasklist();
17820 +       for_each_process(p) {
17821 +               if (!p->mm)
17822 +                       continue;
17823 +
17824 +               if (p->flags & PF_KTHREAD)
17825 +                       continue;
17826 +
17827 +               toi_mark_task_as_pageset(p, pageset);
17828 +       }
17829 +       toi_read_unlock_tasklist();
17830 +
17831 +}
17832 +
17833 +/* mark_pages_for_pageset2
17834 + *
17835 + * Description:        Mark unshared pages in processes not needed for hibernate as
17836 + *             being able to be written out in a separate pagedir.
17837 + *             HighMem pages are simply marked as pageset2. They won't be
17838 + *             needed during hibernate.
17839 + */
17840 +
17841 +static void toi_mark_pages_for_pageset2(void)
17842 +{
17843 +       struct attention_list *this = attention_list;
17844 +
17845 +       memory_bm_clear(pageset2_map);
17846 +
17847 +       if (test_action_state(TOI_NO_PAGESET2) || no_ps2_needed)
17848 +               return;
17849 +
17850 +       if (test_action_state(TOI_PAGESET2_FULL))
17851 +               pageset2_full();
17852 +       else
17853 +               mark_tasks(PAGESET2);
17854 +
17855 +       /*
17856 +        * Because the tasks in attention_list are ones related to hibernating,
17857 +        * we know that they won't go away under us.
17858 +        */
17859 +
17860 +       while (this) {
17861 +               if (!test_result_state(TOI_ABORTED))
17862 +                       toi_mark_task_as_pageset(this->task, PAGESET1);
17863 +               this = this->next;
17864 +       }
17865 +}
17866 +
17867 +/*
17868 + * The atomic copy of pageset1 is stored in pageset2 pages.
17869 + * But if pageset1 is larger (normally only just after boot),
17870 + * we need to allocate extra pages to store the atomic copy.
17871 + * The following data struct and functions are used to handle
17872 + * the allocation and freeing of that memory.
17873 + */
17874 +
17875 +static unsigned long extra_pages_allocated;
17876 +
17877 +struct extras {
17878 +       struct page *page;
17879 +       int order;
17880 +       struct extras *next;
17881 +};
17882 +
17883 +static struct extras *extras_list;
17884 +
17885 +/* toi_free_extra_pagedir_memory
17886 + *
17887 + * Description:        Free previously allocated extra pagedir memory.
17888 + */
17889 +void toi_free_extra_pagedir_memory(void)
17890 +{
17891 +       /* Free allocated pages */
17892 +       while (extras_list) {
17893 +               struct extras *this = extras_list;
17894 +               int i;
17895 +
17896 +               extras_list = this->next;
17897 +
17898 +               for (i = 0; i < (1 << this->order); i++)
17899 +                       ClearPageNosave(this->page + i);
17900 +
17901 +               toi_free_pages(9, this->page, this->order);
17902 +               toi_kfree(7, this, sizeof(*this));
17903 +       }
17904 +
17905 +       extra_pages_allocated = 0;
17906 +}
17907 +
17908 +/* toi_allocate_extra_pagedir_memory
17909 + *
17910 + * Description:        Allocate memory for making the atomic copy of pagedir1 in the
17911 + *             case where it is bigger than pagedir2.
17912 + * Arguments:  int     num_to_alloc: Number of extra pages needed.
17913 + * Result:     int.    Number of extra pages we now have allocated.
17914 + */
17915 +static int toi_allocate_extra_pagedir_memory(int extra_pages_needed)
17916 +{
17917 +       int j, order, num_to_alloc = extra_pages_needed - extra_pages_allocated;
17918 +       gfp_t flags = TOI_ATOMIC_GFP;
17919 +
17920 +       if (num_to_alloc < 1)
17921 +               return 0;
17922 +
17923 +       order = fls(num_to_alloc);
17924 +       if (order >= MAX_ORDER)
17925 +               order = MAX_ORDER - 1;
17926 +
17927 +       while (num_to_alloc) {
17928 +               struct page *newpage;
17929 +               unsigned long virt;
17930 +               struct extras *extras_entry;
17931 +
17932 +               while ((1 << order) > num_to_alloc)
17933 +                       order--;
17934 +
17935 +               extras_entry = (struct extras *) toi_kzalloc(7,
17936 +                       sizeof(struct extras), TOI_ATOMIC_GFP);
17937 +
17938 +               if (!extras_entry)
17939 +                       return extra_pages_allocated;
17940 +
17941 +               virt = toi_get_free_pages(9, flags, order);
17942 +               while (!virt && order) {
17943 +                       order--;
17944 +                       virt = toi_get_free_pages(9, flags, order);
17945 +               }
17946 +
17947 +               if (!virt) {
17948 +                       toi_kfree(7, extras_entry, sizeof(*extras_entry));
17949 +                       return extra_pages_allocated;
17950 +               }
17951 +
17952 +               newpage = virt_to_page(virt);
17953 +
17954 +               extras_entry->page = newpage;
17955 +               extras_entry->order = order;
17956 +               extras_entry->next = NULL;
17957 +
17958 +               if (extras_list)
17959 +                       extras_entry->next = extras_list;
17960 +
17961 +               extras_list = extras_entry;
17962 +
17963 +               for (j = 0; j < (1 << order); j++) {
17964 +                       SetPageNosave(newpage + j);
17965 +                       SetPagePageset1Copy(newpage + j);
17966 +               }
17967 +
17968 +               extra_pages_allocated += (1 << order);
17969 +               num_to_alloc -= (1 << order);
17970 +       }
17971 +
17972 +       return extra_pages_allocated;
17973 +}
17974 +
17975 +/*
17976 + * real_nr_free_pages: Count pcp pages for a zone type or all zones
17977 + * (-1 for all, otherwise zone_idx() result desired).
17978 + */
17979 +unsigned long real_nr_free_pages(unsigned long zone_idx_mask)
17980 +{
17981 +       struct zone *zone;
17982 +       int result = 0, cpu;
17983 +
17984 +       /* PCP lists */
17985 +       for_each_populated_zone(zone) {
17986 +               if (!(zone_idx_mask & (1 << zone_idx(zone))))
17987 +                       continue;
17988 +
17989 +               for_each_online_cpu(cpu) {
17990 +                       struct per_cpu_pageset *pset =
17991 +                               per_cpu_ptr(zone->pageset, cpu);
17992 +                       struct per_cpu_pages *pcp = &pset->pcp;
17993 +                       result += pcp->count;
17994 +               }
17995 +
17996 +               result += zone_page_state(zone, NR_FREE_PAGES);
17997 +       }
17998 +       return result;
17999 +}
18000 +EXPORT_SYMBOL_GPL(real_nr_free_pages);
18001 +
18002 +/*
18003 + * Discover how much extra memory will be required by the drivers
18004 + * when they're asked to hibernate. We can then ensure that amount
18005 + * of memory is available when we really want it.
18006 + */
18007 +static void get_extra_pd1_allowance(void)
18008 +{
18009 +       unsigned long orig_num_free = real_nr_free_pages(all_zones_mask), final;
18010 +
18011 +       toi_prepare_status(CLEAR_BAR, "Finding allowance for drivers.");
18012 +
18013 +       if (toi_go_atomic(PMSG_FREEZE, 1))
18014 +               return;
18015 +
18016 +       final = real_nr_free_pages(all_zones_mask);
18017 +       toi_end_atomic(ATOMIC_ALL_STEPS, 1, 0);
18018 +
18019 +       extra_pd1_pages_allowance = (orig_num_free > final) ?
18020 +               orig_num_free - final + MIN_EXTRA_PAGES_ALLOWANCE :
18021 +               MIN_EXTRA_PAGES_ALLOWANCE;
18022 +}
18023 +
18024 +/*
18025 + * Amount of storage needed, possibly taking into account the
18026 + * expected compression ratio and possibly also ignoring our
18027 + * allowance for extra pages.
18028 + */
18029 +static unsigned long main_storage_needed(int use_ecr,
18030 +               int ignore_extra_pd1_allow)
18031 +{
18032 +       return (pagedir1.size + pagedir2.size +
18033 +         (ignore_extra_pd1_allow ? 0 : extra_pd1_pages_allowance)) *
18034 +        (use_ecr ? toi_expected_compression_ratio() : 100) / 100;
18035 +}
18036 +
18037 +/*
18038 + * Storage needed for the image header, in bytes until the return.
18039 + */
18040 +unsigned long get_header_storage_needed(void)
18041 +{
18042 +       unsigned long bytes = sizeof(struct toi_header) +
18043 +                       toi_header_storage_for_modules() +
18044 +                       toi_pageflags_space_needed() +
18045 +                       fs_info_space_needed();
18046 +
18047 +       return DIV_ROUND_UP(bytes, PAGE_SIZE);
18048 +}
18049 +EXPORT_SYMBOL_GPL(get_header_storage_needed);
18050 +
18051 +/*
18052 + * When freeing memory, pages from either pageset might be freed.
18053 + *
18054 + * When seeking to free memory to be able to hibernate, for every ps1 page
18055 + * freed, we need 2 less pages for the atomic copy because there is one less
18056 + * page to copy and one more page into which data can be copied.
18057 + *
18058 + * Freeing ps2 pages saves us nothing directly. No more memory is available
18059 + * for the atomic copy. Indirectly, a ps1 page might be freed (slab?), but
18060 + * that's too much work to figure out.
18061 + *
18062 + * => ps1_to_free functions
18063 + *
18064 + * Of course if we just want to reduce the image size, because of storage
18065 + * limitations or an image size limit either ps will do.
18066 + *
18067 + * => any_to_free function
18068 + */
18069 +
18070 +static unsigned long lowpages_usable_for_highmem_copy(void)
18071 +{
18072 +       unsigned long needed = get_lowmem_size(pagedir1) +
18073 +                       extra_pd1_pages_allowance + MIN_FREE_RAM +
18074 +                       toi_memory_for_modules(0),
18075 +               available = get_lowmem_size(pagedir2) +
18076 +                        real_nr_free_low_pages() + extra_pages_allocated;
18077 +
18078 +       return available > needed ? available - needed : 0;
18079 +}
18080 +
18081 +static unsigned long highpages_ps1_to_free(void)
18082 +{
18083 +       unsigned long need = get_highmem_size(pagedir1),
18084 +                     available = get_highmem_size(pagedir2) +
18085 +                             real_nr_free_high_pages() +
18086 +                             lowpages_usable_for_highmem_copy();
18087 +
18088 +       return need > available ? DIV_ROUND_UP(need - available, 2) : 0;
18089 +}
18090 +
18091 +static unsigned long lowpages_ps1_to_free(void)
18092 +{
18093 +       unsigned long needed = get_lowmem_size(pagedir1) +
18094 +                       extra_pd1_pages_allowance + MIN_FREE_RAM +
18095 +                       toi_memory_for_modules(0),
18096 +               available = get_lowmem_size(pagedir2) +
18097 +                        real_nr_free_low_pages() + extra_pages_allocated;
18098 +
18099 +       return needed > available ? DIV_ROUND_UP(needed - available, 2) : 0;
18100 +}
18101 +
18102 +static unsigned long current_image_size(void)
18103 +{
18104 +       return pagedir1.size + pagedir2.size + header_storage_needed;
18105 +}
18106 +
18107 +static unsigned long storage_still_required(void)
18108 +{
18109 +       unsigned long needed = main_storage_needed(1, 1);
18110 +       return needed > storage_limit ? needed - storage_limit : 0;
18111 +}
18112 +
18113 +static unsigned long ram_still_required(void)
18114 +{
18115 +       unsigned long needed = MIN_FREE_RAM + toi_memory_for_modules(0) +
18116 +               2 * extra_pd1_pages_allowance,
18117 +                 available = real_nr_free_low_pages() + extra_pages_allocated;
18118 +       return needed > available ? needed - available : 0;
18119 +}
18120 +
18121 +static unsigned long any_to_free(int use_image_size_limit)
18122 +{
18123 +       int use_soft_limit = use_image_size_limit && image_size_limit > 0;
18124 +       unsigned long current_size = current_image_size(),
18125 +                     soft_limit = use_soft_limit ? (image_size_limit << 8) : 0,
18126 +                     to_free = use_soft_limit ? (current_size > soft_limit ?
18127 +                                     current_size - soft_limit : 0) : 0,
18128 +                     storage_limit = storage_still_required(),
18129 +                     ram_limit = ram_still_required(),
18130 +                     first_max = max(to_free, storage_limit);
18131 +
18132 +       return max(first_max, ram_limit);
18133 +}
18134 +
18135 +static int need_pageset2(void)
18136 +{
18137 +       return (real_nr_free_low_pages() + extra_pages_allocated -
18138 +               2 * extra_pd1_pages_allowance - MIN_FREE_RAM -
18139 +                toi_memory_for_modules(0) - pagedir1.size) < pagedir2.size;
18140 +}
18141 +
18142 +/* amount_needed
18143 + *
18144 + * Calculates the amount by which the image size needs to be reduced to meet
18145 + * our constraints.
18146 + */
18147 +static unsigned long amount_needed(int use_image_size_limit)
18148 +{
18149 +       return max(highpages_ps1_to_free() + lowpages_ps1_to_free(),
18150 +                       any_to_free(use_image_size_limit));
18151 +}
18152 +
18153 +static int image_not_ready(int use_image_size_limit)
18154 +{
18155 +       toi_message(TOI_EAT_MEMORY, TOI_LOW, 1,
18156 +               "Amount still needed (%lu) > 0:%u,"
18157 +               " Storage allocd: %lu < %lu: %u.\n",
18158 +                       amount_needed(use_image_size_limit),
18159 +                       (amount_needed(use_image_size_limit) > 0),
18160 +                       main_storage_allocated,
18161 +                       main_storage_needed(1, 1),
18162 +                       main_storage_allocated < main_storage_needed(1, 1));
18163 +
18164 +       toi_cond_pause(0, NULL);
18165 +
18166 +       return (amount_needed(use_image_size_limit) > 0) ||
18167 +                main_storage_allocated < main_storage_needed(1, 1);
18168 +}
18169 +
18170 +static void display_failure_reason(int tries_exceeded)
18171 +{
18172 +       unsigned long storage_required = storage_still_required(),
18173 +           ram_required = ram_still_required(),
18174 +           high_ps1 = highpages_ps1_to_free(),
18175 +           low_ps1 = lowpages_ps1_to_free();
18176 +
18177 +       printk(KERN_INFO "Failed to prepare the image because...\n");
18178 +
18179 +       if (!storage_limit) {
18180 +               printk(KERN_INFO "- You need some storage available to be "
18181 +                               "able to hibernate.\n");
18182 +               return;
18183 +       }
18184 +
18185 +       if (tries_exceeded)
18186 +               printk(KERN_INFO "- The maximum number of iterations was "
18187 +                               "reached without successfully preparing the "
18188 +                               "image.\n");
18189 +
18190 +       if (storage_required) {
18191 +               printk(KERN_INFO " - We need at least %lu pages of storage "
18192 +                               "(ignoring the header), but only have %lu.\n",
18193 +                               main_storage_needed(1, 1),
18194 +                               main_storage_allocated);
18195 +               set_abort_result(TOI_INSUFFICIENT_STORAGE);
18196 +       }
18197 +
18198 +       if (ram_required) {
18199 +               printk(KERN_INFO " - We need %lu more free pages of low "
18200 +                               "memory.\n", ram_required);
18201 +               printk(KERN_INFO "     Minimum free     : %8d\n", MIN_FREE_RAM);
18202 +               printk(KERN_INFO "   + Reqd. by modules : %8lu\n",
18203 +                               toi_memory_for_modules(0));
18204 +               printk(KERN_INFO "   + 2 * extra allow  : %8lu\n",
18205 +                               2 * extra_pd1_pages_allowance);
18206 +               printk(KERN_INFO "   - Currently free   : %8lu\n",
18207 +                               real_nr_free_low_pages());
18208 +               printk(KERN_INFO "   - Pages allocd     : %8lu\n",
18209 +                               extra_pages_allocated);
18210 +               printk(KERN_INFO "                      : ========\n");
18211 +               printk(KERN_INFO "     Still needed     : %8lu\n",
18212 +                               ram_required);
18213 +
18214 +               /* Print breakdown of memory needed for modules */
18215 +               toi_memory_for_modules(1);
18216 +               set_abort_result(TOI_UNABLE_TO_FREE_ENOUGH_MEMORY);
18217 +       }
18218 +
18219 +       if (high_ps1) {
18220 +               printk(KERN_INFO "- We need to free %lu highmem pageset 1 "
18221 +                               "pages.\n", high_ps1);
18222 +               set_abort_result(TOI_UNABLE_TO_FREE_ENOUGH_MEMORY);
18223 +       }
18224 +
18225 +       if (low_ps1) {
18226 +               printk(KERN_INFO " - We need to free %ld lowmem pageset 1 "
18227 +                               "pages.\n", low_ps1);
18228 +               set_abort_result(TOI_UNABLE_TO_FREE_ENOUGH_MEMORY);
18229 +       }
18230 +}
18231 +
18232 +static void display_stats(int always, int sub_extra_pd1_allow)
18233 +{
18234 +       char buffer[255];
18235 +       snprintf(buffer, 254,
18236 +               "Free:%lu(%lu). Sets:%lu(%lu),%lu(%lu). "
18237 +               "Nosave:%lu-%lu=%lu. Storage:%lu/%lu(%lu=>%lu). "
18238 +               "Needed:%lu,%lu,%lu(%u,%lu,%lu,%ld) (PS2:%s)\n",
18239 +
18240 +               /* Free */
18241 +               real_nr_free_pages(all_zones_mask),
18242 +               real_nr_free_low_pages(),
18243 +
18244 +               /* Sets */
18245 +               pagedir1.size, pagedir1.size - get_highmem_size(pagedir1),
18246 +               pagedir2.size, pagedir2.size - get_highmem_size(pagedir2),
18247 +
18248 +               /* Nosave */
18249 +               num_nosave, extra_pages_allocated,
18250 +               num_nosave - extra_pages_allocated,
18251 +
18252 +               /* Storage */
18253 +               main_storage_allocated,
18254 +               storage_limit,
18255 +               main_storage_needed(1, sub_extra_pd1_allow),
18256 +               main_storage_needed(1, 1),
18257 +
18258 +               /* Needed */
18259 +               lowpages_ps1_to_free(), highpages_ps1_to_free(),
18260 +               any_to_free(1),
18261 +               MIN_FREE_RAM, toi_memory_for_modules(0),
18262 +               extra_pd1_pages_allowance,
18263 +               image_size_limit,
18264 +
18265 +               need_pageset2() ? "yes" : "no");
18266 +
18267 +       if (always)
18268 +               printk("%s", buffer);
18269 +       else
18270 +               toi_message(TOI_EAT_MEMORY, TOI_MEDIUM, 1, buffer);
18271 +}
18272 +
18273 +/* generate_free_page_map
18274 + *
18275 + * Description:        This routine generates a bitmap of free pages from the
18276 + *             lists used by the memory manager. We then use the bitmap
18277 + *             to quickly calculate which pages to save and in which
18278 + *             pagesets.
18279 + */
18280 +static void generate_free_page_map(void)
18281 +{
18282 +       int order, cpu, t;
18283 +       unsigned long flags, i;
18284 +       struct zone *zone;
18285 +       struct list_head *curr;
18286 +       unsigned long pfn;
18287 +       struct page *page;
18288 +
18289 +       for_each_populated_zone(zone) {
18290 +
18291 +               if (!zone->spanned_pages)
18292 +                       continue;
18293 +
18294 +               spin_lock_irqsave(&zone->lock, flags);
18295 +
18296 +               for (i = 0; i < zone->spanned_pages; i++) {
18297 +                       pfn = ZONE_START(zone) + i;
18298 +
18299 +                       if (!pfn_valid(pfn))
18300 +                               continue;
18301 +
18302 +                       page = pfn_to_page(pfn);
18303 +
18304 +                       ClearPageNosaveFree(page);
18305 +               }
18306 +
18307 +               for_each_migratetype_order(order, t) {
18308 +                       list_for_each(curr,
18309 +                                       &zone->free_area[order].free_list[t]) {
18310 +                               unsigned long j;
18311 +
18312 +                               pfn = page_to_pfn(list_entry(curr, struct page,
18313 +                                                       lru));
18314 +                               for (j = 0; j < (1UL << order); j++)
18315 +                                       SetPageNosaveFree(pfn_to_page(pfn + j));
18316 +                       }
18317 +               }
18318 +
18319 +               for_each_online_cpu(cpu) {
18320 +                       struct per_cpu_pageset *pset =
18321 +                               per_cpu_ptr(zone->pageset, cpu);
18322 +                       struct per_cpu_pages *pcp = &pset->pcp;
18323 +                       struct page *page;
18324 +                       int t;
18325 +
18326 +                       for (t = 0; t < MIGRATE_PCPTYPES; t++)
18327 +                               list_for_each_entry(page, &pcp->lists[t], lru)
18328 +                                       SetPageNosaveFree(page);
18329 +               }
18330 +
18331 +               spin_unlock_irqrestore(&zone->lock, flags);
18332 +       }
18333 +}
18334 +
18335 +/* size_of_free_region
18336 + *
18337 + * Description:        Return the number of pages that are free, beginning with and
18338 + *             including this one.
18339 + */
18340 +static int size_of_free_region(struct zone *zone, unsigned long start_pfn)
18341 +{
18342 +       unsigned long this_pfn = start_pfn,
18343 +                     end_pfn = ZONE_START(zone) + zone->spanned_pages - 1;
18344 +
18345 +       while (this_pfn <= end_pfn && PageNosaveFree(pfn_to_page(this_pfn)))
18346 +               this_pfn++;
18347 +
18348 +       return this_pfn - start_pfn;
18349 +}
18350 +
18351 +/* flag_image_pages
18352 + *
18353 + * This routine generates our lists of pages to be stored in each
18354 + * pageset. Since we store the data using extents, and adding new
18355 + * extents might allocate a new extent page, this routine may well
18356 + * be called more than once.
18357 + */
18358 +static void flag_image_pages(int atomic_copy)
18359 +{
18360 +       int num_free = 0;
18361 +       unsigned long loop;
18362 +       struct zone *zone;
18363 +
18364 +       pagedir1.size = 0;
18365 +       pagedir2.size = 0;
18366 +
18367 +       set_highmem_size(pagedir1, 0);
18368 +       set_highmem_size(pagedir2, 0);
18369 +
18370 +       num_nosave = 0;
18371 +
18372 +       memory_bm_clear(pageset1_map);
18373 +
18374 +       generate_free_page_map();
18375 +
18376 +       /*
18377 +        * Pages not to be saved are marked Nosave irrespective of being
18378 +        * reserved.
18379 +        */
18380 +       for_each_populated_zone(zone) {
18381 +               int highmem = is_highmem(zone);
18382 +
18383 +               for (loop = 0; loop < zone->spanned_pages; loop++) {
18384 +                       unsigned long pfn = ZONE_START(zone) + loop;
18385 +                       struct page *page;
18386 +                       int chunk_size;
18387 +
18388 +                       if (!pfn_valid(pfn))
18389 +                               continue;
18390 +
18391 +                       chunk_size = size_of_free_region(zone, pfn);
18392 +                       if (chunk_size) {
18393 +                               num_free += chunk_size;
18394 +                               loop += chunk_size - 1;
18395 +                               continue;
18396 +                       }
18397 +
18398 +                       page = pfn_to_page(pfn);
18399 +
18400 +                       if (PageNosave(page)) {
18401 +                               num_nosave++;
18402 +                               continue;
18403 +                       }
18404 +
18405 +                       page = highmem ? saveable_highmem_page(zone, pfn) :
18406 +                               saveable_page(zone, pfn);
18407 +
18408 +                       if (!page) {
18409 +                               num_nosave++;
18410 +                               continue;
18411 +                       }
18412 +
18413 +                       if (PagePageset2(page)) {
18414 +                               pagedir2.size++;
18415 +                               if (PageHighMem(page))
18416 +                                       inc_highmem_size(pagedir2);
18417 +                               else
18418 +                                       SetPagePageset1Copy(page);
18419 +                               if (PageResave(page)) {
18420 +                                       SetPagePageset1(page);
18421 +                                       ClearPagePageset1Copy(page);
18422 +                                       pagedir1.size++;
18423 +                                       if (PageHighMem(page))
18424 +                                               inc_highmem_size(pagedir1);
18425 +                               }
18426 +                       } else {
18427 +                               pagedir1.size++;
18428 +                               SetPagePageset1(page);
18429 +                               if (PageHighMem(page))
18430 +                                       inc_highmem_size(pagedir1);
18431 +                       }
18432 +               }
18433 +       }
18434 +
18435 +       if (!atomic_copy)
18436 +               toi_message(TOI_EAT_MEMORY, TOI_MEDIUM, 0,
18437 +                       "Count data pages: Set1 (%d) + Set2 (%d) + Nosave (%ld)"
18438 +                                               " + NumFree (%d) = %d.\n",
18439 +                       pagedir1.size, pagedir2.size, num_nosave, num_free,
18440 +                       pagedir1.size + pagedir2.size + num_nosave + num_free);
18441 +}
18442 +
18443 +void toi_recalculate_image_contents(int atomic_copy)
18444 +{
18445 +       memory_bm_clear(pageset1_map);
18446 +       if (!atomic_copy) {
18447 +               unsigned long pfn;
18448 +               memory_bm_position_reset(pageset2_map);
18449 +               for (pfn = memory_bm_next_pfn(pageset2_map);
18450 +                               pfn != BM_END_OF_MAP;
18451 +                               pfn = memory_bm_next_pfn(pageset2_map))
18452 +                       ClearPagePageset1Copy(pfn_to_page(pfn));
18453 +               /* Need to call this before getting pageset1_size! */
18454 +               toi_mark_pages_for_pageset2();
18455 +       }
18456 +       flag_image_pages(atomic_copy);
18457 +
18458 +       if (!atomic_copy) {
18459 +               storage_limit = toiActiveAllocator->storage_available();
18460 +               display_stats(0, 0);
18461 +       }
18462 +}
18463 +
18464 +/* update_image
18465 + *
18466 + * Allocate [more] memory and storage for the image.
18467 + */
18468 +static void update_image(int ps2_recalc)
18469 +{
18470 +       int old_header_req;
18471 +       unsigned long seek, wanted, got;
18472 +
18473 +       /* Include allowance for growth in pagedir1 while writing pagedir 2 */
18474 +       wanted = pagedir1.size +  extra_pd1_pages_allowance -
18475 +               get_lowmem_size(pagedir2);
18476 +       if (wanted > extra_pages_allocated) {
18477 +               got = toi_allocate_extra_pagedir_memory(wanted);
18478 +               if (wanted < got) {
18479 +                       toi_message(TOI_EAT_MEMORY, TOI_LOW, 1,
18480 +                               "Want %d extra pages for pageset1, got %d.\n",
18481 +                               wanted, got);
18482 +                       return;
18483 +               }
18484 +       }
18485 +
18486 +       if (ps2_recalc)
18487 +               goto recalc;
18488 +
18489 +       thaw_kernel_threads();
18490 +
18491 +       /*
18492 +        * Allocate remaining storage space, if possible, up to the
18493 +        * maximum we know we'll need. It's okay to allocate the
18494 +        * maximum if the writer is the swapwriter, but
18495 +        * we don't want to grab all available space on an NFS share.
18496 +        * We therefore ignore the expected compression ratio here,
18497 +        * thereby trying to allocate the maximum image size we could
18498 +        * need (assuming compression doesn't expand the image), but
18499 +        * don't complain if we can't get the full amount we're after.
18500 +        */
18501 +
18502 +       do {
18503 +               int result;
18504 +
18505 +               old_header_req = header_storage_needed;
18506 +               toiActiveAllocator->reserve_header_space(header_storage_needed);
18507 +
18508 +               /* How much storage is free with the reservation applied? */
18509 +               storage_limit = toiActiveAllocator->storage_available();
18510 +               seek = min(storage_limit, main_storage_needed(0, 0));
18511 +
18512 +               result = toiActiveAllocator->allocate_storage(seek);
18513 +               if (result)
18514 +                       printk("Failed to allocate storage (%d).\n", result);
18515 +
18516 +               main_storage_allocated =
18517 +                       toiActiveAllocator->storage_allocated();
18518 +
18519 +               /* Need more header because more storage allocated? */
18520 +               header_storage_needed = get_header_storage_needed();
18521 +
18522 +       } while (header_storage_needed > old_header_req);
18523 +
18524 +       if (freeze_processes())
18525 +               set_abort_result(TOI_FREEZING_FAILED);
18526 +
18527 +recalc:
18528 +       toi_recalculate_image_contents(0);
18529 +}
18530 +
18531 +/* attempt_to_freeze
18532 + *
18533 + * Try to freeze processes.
18534 + */
18535 +
18536 +static int attempt_to_freeze(void)
18537 +{
18538 +       int result;
18539 +
18540 +       /* Stop processes before checking again */
18541 +       thaw_processes();
18542 +       toi_prepare_status(CLEAR_BAR, "Freezing processes & syncing "
18543 +                       "filesystems.");
18544 +       result = freeze_processes();
18545 +
18546 +       if (result)
18547 +               set_abort_result(TOI_FREEZING_FAILED);
18548 +
18549 +       return result;
18550 +}
18551 +
18552 +/* eat_memory
18553 + *
18554 + * Try to free some memory, either to meet hard or soft constraints on the image
18555 + * characteristics.
18556 + *
18557 + * Hard constraints:
18558 + * - Pageset1 must be < half of memory;
18559 + * - We must have enough memory free at resume time to have pageset1
18560 + *   be able to be loaded in pages that don't conflict with where it has to
18561 + *   be restored.
18562 + * Soft constraints
18563 + * - User specificied image size limit.
18564 + */
18565 +static void eat_memory(void)
18566 +{
18567 +       unsigned long amount_wanted = 0;
18568 +       int did_eat_memory = 0;
18569 +
18570 +       /*
18571 +        * Note that if we have enough storage space and enough free memory, we
18572 +        * may exit without eating anything. We give up when the last 10
18573 +        * iterations ate no extra pages because we're not going to get much
18574 +        * more anyway, but the few pages we get will take a lot of time.
18575 +        *
18576 +        * We freeze processes before beginning, and then unfreeze them if we
18577 +        * need to eat memory until we think we have enough. If our attempts
18578 +        * to freeze fail, we give up and abort.
18579 +        */
18580 +
18581 +       amount_wanted = amount_needed(1);
18582 +
18583 +       switch (image_size_limit) {
18584 +       case -1: /* Don't eat any memory */
18585 +               if (amount_wanted > 0) {
18586 +                       set_abort_result(TOI_WOULD_EAT_MEMORY);
18587 +                       return;
18588 +               }
18589 +               break;
18590 +       case -2:  /* Free caches only */
18591 +               drop_pagecache();
18592 +               toi_recalculate_image_contents(0);
18593 +               amount_wanted = amount_needed(1);
18594 +               break;
18595 +       default:
18596 +               break;
18597 +       }
18598 +
18599 +       if (amount_wanted > 0 && !test_result_state(TOI_ABORTED) &&
18600 +                       image_size_limit != -1) {
18601 +               unsigned long request = amount_wanted;
18602 +               unsigned long high_req = max(highpages_ps1_to_free(),
18603 +                               any_to_free(1));
18604 +               unsigned long low_req = lowpages_ps1_to_free();
18605 +               unsigned long got = 0;
18606 +
18607 +               toi_prepare_status(CLEAR_BAR,
18608 +                               "Seeking to free %ldMB of memory.",
18609 +                               MB(amount_wanted));
18610 +
18611 +               thaw_kernel_threads();
18612 +
18613 +               /*
18614 +                * Ask for too many because shrink_all_memory doesn't
18615 +                * currently return enough most of the time.
18616 +                */
18617 +               
18618 +               if (low_req)
18619 +                       got = shrink_memory_mask(low_req, GFP_KERNEL);
18620 +               if (high_req)
18621 +                       shrink_memory_mask(high_req - got, GFP_HIGHUSER);
18622 +
18623 +               did_eat_memory = 1;
18624 +
18625 +               toi_recalculate_image_contents(0);
18626 +
18627 +               amount_wanted = amount_needed(1);
18628 +
18629 +               printk(KERN_DEBUG "Asked shrink_all_memory for %ld low pages &"
18630 +                               " %ld pages from anywhere, got %ld.\n",
18631 +                               high_req, low_req,
18632 +                               request - amount_wanted);
18633 +
18634 +               toi_cond_pause(0, NULL);
18635 +
18636 +               if (freeze_processes())
18637 +                       set_abort_result(TOI_FREEZING_FAILED);
18638 +       }
18639 +
18640 +       if (did_eat_memory)
18641 +               toi_recalculate_image_contents(0);
18642 +}
18643 +
18644 +/* toi_prepare_image
18645 + *
18646 + * Entry point to the whole image preparation section.
18647 + *
18648 + * We do four things:
18649 + * - Freeze processes;
18650 + * - Ensure image size constraints are met;
18651 + * - Complete all the preparation for saving the image,
18652 + *   including allocation of storage. The only memory
18653 + *   that should be needed when we're finished is that
18654 + *   for actually storing the image (and we know how
18655 + *   much is needed for that because the modules tell
18656 + *   us).
18657 + * - Make sure that all dirty buffers are written out.
18658 + */
18659 +#define MAX_TRIES 2
18660 +int toi_prepare_image(void)
18661 +{
18662 +       int result = 1, tries = 1;
18663 +
18664 +       main_storage_allocated = 0;
18665 +       no_ps2_needed = 0;
18666 +
18667 +       if (attempt_to_freeze())
18668 +               return 1;
18669 +
18670 +       if (!extra_pd1_pages_allowance)
18671 +               get_extra_pd1_allowance();
18672 +
18673 +       storage_limit = toiActiveAllocator->storage_available();
18674 +
18675 +       if (!storage_limit) {
18676 +               printk(KERN_INFO "No storage available. Didn't try to prepare "
18677 +                               "an image.\n");
18678 +               display_failure_reason(0);
18679 +               set_abort_result(TOI_NOSTORAGE_AVAILABLE);
18680 +               return 1;
18681 +       }
18682 +
18683 +       if (build_attention_list()) {
18684 +               abort_hibernate(TOI_UNABLE_TO_PREPARE_IMAGE,
18685 +                               "Unable to successfully prepare the image.\n");
18686 +               return 1;
18687 +       }
18688 +
18689 +       toi_recalculate_image_contents(0);
18690 +
18691 +       do {
18692 +               toi_prepare_status(CLEAR_BAR,
18693 +                               "Preparing Image. Try %d.", tries);
18694 +
18695 +               eat_memory();
18696 +
18697 +               if (test_result_state(TOI_ABORTED))
18698 +                       break;
18699 +
18700 +               update_image(0);
18701 +
18702 +               tries++;
18703 +
18704 +       } while (image_not_ready(1) && tries <= MAX_TRIES &&
18705 +                       !test_result_state(TOI_ABORTED));
18706 +
18707 +       result = image_not_ready(0);
18708 +
18709 +       if (!test_result_state(TOI_ABORTED)) {
18710 +               if (result) {
18711 +                       display_stats(1, 0);
18712 +                       display_failure_reason(tries > MAX_TRIES);
18713 +                       abort_hibernate(TOI_UNABLE_TO_PREPARE_IMAGE,
18714 +                               "Unable to successfully prepare the image.\n");
18715 +               } else {
18716 +                       /* Pageset 2 needed? */
18717 +                       if (!need_pageset2() &&
18718 +                                 test_action_state(TOI_NO_PS2_IF_UNNEEDED)) {
18719 +                               no_ps2_needed = 1;
18720 +                               toi_recalculate_image_contents(0);
18721 +                               update_image(1);
18722 +                       }
18723 +
18724 +                       toi_cond_pause(1, "Image preparation complete.");
18725 +               }
18726 +       }
18727 +
18728 +       return result ? result : allocate_checksum_pages();
18729 +}
18730 diff --git a/kernel/power/tuxonice_prepare_image.h b/kernel/power/tuxonice_prepare_image.h
18731 new file mode 100644
18732 index 0000000..7b52e9e
18733 --- /dev/null
18734 +++ b/kernel/power/tuxonice_prepare_image.h
18735 @@ -0,0 +1,36 @@
18736 +/*
18737 + * kernel/power/tuxonice_prepare_image.h
18738 + *
18739 + * Copyright (C) 2003-2010 Nigel Cunningham (nigel at tuxonice net)
18740 + *
18741 + * This file is released under the GPLv2.
18742 + *
18743 + */
18744 +
18745 +#include <asm/sections.h>
18746 +
18747 +extern int toi_prepare_image(void);
18748 +extern void toi_recalculate_image_contents(int storage_available);
18749 +extern unsigned long real_nr_free_pages(unsigned long zone_idx_mask);
18750 +extern long image_size_limit;
18751 +extern void toi_free_extra_pagedir_memory(void);
18752 +extern unsigned long extra_pd1_pages_allowance;
18753 +extern void free_attention_list(void);
18754 +
18755 +#define MIN_FREE_RAM 100
18756 +#define MIN_EXTRA_PAGES_ALLOWANCE 500
18757 +
18758 +#define all_zones_mask ((unsigned long) ((1 << MAX_NR_ZONES) - 1))
18759 +#ifdef CONFIG_HIGHMEM
18760 +#define real_nr_free_high_pages() (real_nr_free_pages(1 << ZONE_HIGHMEM))
18761 +#define real_nr_free_low_pages() (real_nr_free_pages(all_zones_mask - \
18762 +                                               (1 << ZONE_HIGHMEM)))
18763 +#else
18764 +#define real_nr_free_high_pages() (0)
18765 +#define real_nr_free_low_pages() (real_nr_free_pages(all_zones_mask))
18766 +
18767 +/* For eat_memory function */
18768 +#define ZONE_HIGHMEM (MAX_NR_ZONES + 1)
18769 +#endif
18770 +
18771 +unsigned long get_header_storage_needed(void);
18772 diff --git a/kernel/power/tuxonice_storage.c b/kernel/power/tuxonice_storage.c
18773 new file mode 100644
18774 index 0000000..be962ee
18775 --- /dev/null
18776 +++ b/kernel/power/tuxonice_storage.c
18777 @@ -0,0 +1,282 @@
18778 +/*
18779 + * kernel/power/tuxonice_storage.c
18780 + *
18781 + * Copyright (C) 2005-2010 Nigel Cunningham (nigel at tuxonice net)
18782 + *
18783 + * This file is released under the GPLv2.
18784 + *
18785 + * Routines for talking to a userspace program that manages storage.
18786 + *
18787 + * The kernel side:
18788 + * - starts the userspace program;
18789 + * - sends messages telling it when to open and close the connection;
18790 + * - tells it when to quit;
18791 + *
18792 + * The user space side:
18793 + * - passes messages regarding status;
18794 + *
18795 + */
18796 +
18797 +#include <linux/suspend.h>
18798 +#include <linux/freezer.h>
18799 +
18800 +#include "tuxonice_sysfs.h"
18801 +#include "tuxonice_modules.h"
18802 +#include "tuxonice_netlink.h"
18803 +#include "tuxonice_storage.h"
18804 +#include "tuxonice_ui.h"
18805 +
18806 +static struct user_helper_data usm_helper_data;
18807 +static struct toi_module_ops usm_ops;
18808 +static int message_received, usm_prepare_count;
18809 +static int storage_manager_last_action, storage_manager_action;
18810 +
18811 +static int usm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
18812 +{
18813 +       int type;
18814 +       int *data;
18815 +
18816 +       type = nlh->nlmsg_type;
18817 +
18818 +       /* A control message: ignore them */
18819 +       if (type < NETLINK_MSG_BASE)
18820 +               return 0;
18821 +
18822 +       /* Unknown message: reply with EINVAL */
18823 +       if (type >= USM_MSG_MAX)
18824 +               return -EINVAL;
18825 +
18826 +       /* All operations require privileges, even GET */
18827 +       if (security_netlink_recv(skb, CAP_NET_ADMIN))
18828 +               return -EPERM;
18829 +
18830 +       /* Only allow one task to receive NOFREEZE privileges */
18831 +       if (type == NETLINK_MSG_NOFREEZE_ME && usm_helper_data.pid != -1)
18832 +               return -EBUSY;
18833 +
18834 +       data = (int *) NLMSG_DATA(nlh);
18835 +
18836 +       switch (type) {
18837 +       case USM_MSG_SUCCESS:
18838 +       case USM_MSG_FAILED:
18839 +               message_received = type;
18840 +               complete(&usm_helper_data.wait_for_process);
18841 +               break;
18842 +       default:
18843 +               printk(KERN_INFO "Storage manager doesn't recognise "
18844 +                               "message %d.\n", type);
18845 +       }
18846 +
18847 +       return 1;
18848 +}
18849 +
18850 +#ifdef CONFIG_NET
18851 +static int activations;
18852 +
18853 +int toi_activate_storage(int force)
18854 +{
18855 +       int tries = 1;
18856 +
18857 +       if (usm_helper_data.pid == -1 || !usm_ops.enabled)
18858 +               return 0;
18859 +
18860 +       message_received = 0;
18861 +       activations++;
18862 +
18863 +       if (activations > 1 && !force)
18864 +               return 0;
18865 +
18866 +       while ((!message_received || message_received == USM_MSG_FAILED) &&
18867 +                       tries < 2) {
18868 +               toi_prepare_status(DONT_CLEAR_BAR, "Activate storage attempt "
18869 +                               "%d.\n", tries);
18870 +
18871 +               init_completion(&usm_helper_data.wait_for_process);
18872 +
18873 +               toi_send_netlink_message(&usm_helper_data,
18874 +                       USM_MSG_CONNECT,
18875 +                       NULL, 0);
18876 +
18877 +               /* Wait 2 seconds for the userspace process to make contact */
18878 +               wait_for_completion_timeout(&usm_helper_data.wait_for_process,
18879 +                               2*HZ);
18880 +
18881 +               tries++;
18882 +       }
18883 +
18884 +       return 0;
18885 +}
18886 +
18887 +int toi_deactivate_storage(int force)
18888 +{
18889 +       if (usm_helper_data.pid == -1 || !usm_ops.enabled)
18890 +               return 0;
18891 +
18892 +       message_received = 0;
18893 +       activations--;
18894 +
18895 +       if (activations && !force)
18896 +               return 0;
18897 +
18898 +       init_completion(&usm_helper_data.wait_for_process);
18899 +
18900 +       toi_send_netlink_message(&usm_helper_data,
18901 +                       USM_MSG_DISCONNECT,
18902 +                       NULL, 0);
18903 +
18904 +       wait_for_completion_timeout(&usm_helper_data.wait_for_process, 2*HZ);
18905 +
18906 +       if (!message_received || message_received == USM_MSG_FAILED) {
18907 +               printk(KERN_INFO "Returning failure disconnecting storage.\n");
18908 +               return 1;
18909 +       }
18910 +
18911 +       return 0;
18912 +}
18913 +#endif
18914 +
18915 +static void storage_manager_simulate(void)
18916 +{
18917 +       printk(KERN_INFO "--- Storage manager simulate ---\n");
18918 +       toi_prepare_usm();
18919 +       schedule();
18920 +       printk(KERN_INFO "--- Activate storage 1 ---\n");
18921 +       toi_activate_storage(1);
18922 +       schedule();
18923 +       printk(KERN_INFO "--- Deactivate storage 1 ---\n");
18924 +       toi_deactivate_storage(1);
18925 +       schedule();
18926 +       printk(KERN_INFO "--- Cleanup usm ---\n");
18927 +       toi_cleanup_usm();
18928 +       schedule();
18929 +       printk(KERN_INFO "--- Storage manager simulate ends ---\n");
18930 +}
18931 +
18932 +static int usm_storage_needed(void)
18933 +{
18934 +       return strlen(usm_helper_data.program);
18935 +}
18936 +
18937 +static int usm_save_config_info(char *buf)
18938 +{
18939 +       int len = strlen(usm_helper_data.program);
18940 +       memcpy(buf, usm_helper_data.program, len);
18941 +       return len;
18942 +}
18943 +
18944 +static void usm_load_config_info(char *buf, int size)
18945 +{
18946 +       /* Don't load the saved path if one has already been set */
18947 +       if (usm_helper_data.program[0])
18948 +               return;
18949 +
18950 +       memcpy(usm_helper_data.program, buf, size);
18951 +}
18952 +
18953 +static int usm_memory_needed(void)
18954 +{
18955 +       /* ball park figure of 32 pages */
18956 +       return 32 * PAGE_SIZE;
18957 +}
18958 +
18959 +/* toi_prepare_usm
18960 + */
18961 +int toi_prepare_usm(void)
18962 +{
18963 +       usm_prepare_count++;
18964 +
18965 +       if (usm_prepare_count > 1 || !usm_ops.enabled)
18966 +               return 0;
18967 +
18968 +       usm_helper_data.pid = -1;
18969 +
18970 +       if (!*usm_helper_data.program)
18971 +               return 0;
18972 +
18973 +       toi_netlink_setup(&usm_helper_data);
18974 +
18975 +       if (usm_helper_data.pid == -1)
18976 +               printk(KERN_INFO "TuxOnIce Storage Manager wanted, but couldn't"
18977 +                               " start it.\n");
18978 +
18979 +       toi_activate_storage(0);
18980 +
18981 +       return usm_helper_data.pid != -1;
18982 +}
18983 +
18984 +void toi_cleanup_usm(void)
18985 +{
18986 +       usm_prepare_count--;
18987 +
18988 +       if (usm_helper_data.pid > -1 && !usm_prepare_count) {
18989 +               toi_deactivate_storage(0);
18990 +               toi_netlink_close(&usm_helper_data);
18991 +       }
18992 +}
18993 +
18994 +static void storage_manager_activate(void)
18995 +{
18996 +       if (storage_manager_action == storage_manager_last_action)
18997 +               return;
18998 +
18999 +       if (storage_manager_action)
19000 +               toi_prepare_usm();
19001 +       else
19002 +               toi_cleanup_usm();
19003 +
19004 +       storage_manager_last_action = storage_manager_action;
19005 +}
19006 +
19007 +/*
19008 + * User interface specific /sys/power/tuxonice entries.
19009 + */
19010 +
19011 +static struct toi_sysfs_data sysfs_params[] = {
19012 +       SYSFS_NONE("simulate_atomic_copy", storage_manager_simulate),
19013 +       SYSFS_INT("enabled", SYSFS_RW, &usm_ops.enabled, 0, 1, 0, NULL),
19014 +       SYSFS_STRING("program", SYSFS_RW, usm_helper_data.program, 254, 0,
19015 +               NULL),
19016 +       SYSFS_INT("activate_storage", SYSFS_RW , &storage_manager_action, 0, 1,
19017 +                       0, storage_manager_activate)
19018 +};
19019 +
19020 +static struct toi_module_ops usm_ops = {
19021 +       .type                           = MISC_MODULE,
19022 +       .name                           = "usm",
19023 +       .directory                      = "storage_manager",
19024 +       .module                         = THIS_MODULE,
19025 +       .storage_needed                 = usm_storage_needed,
19026 +       .save_config_info               = usm_save_config_info,
19027 +       .load_config_info               = usm_load_config_info,
19028 +       .memory_needed                  = usm_memory_needed,
19029 +
19030 +       .sysfs_data                     = sysfs_params,
19031 +       .num_sysfs_entries              = sizeof(sysfs_params) /
19032 +               sizeof(struct toi_sysfs_data),
19033 +};
19034 +
19035 +/* toi_usm_sysfs_init
19036 + * Description: Boot time initialisation for user interface.
19037 + */
19038 +int toi_usm_init(void)
19039 +{
19040 +       usm_helper_data.nl = NULL;
19041 +       usm_helper_data.program[0] = '\0';
19042 +       usm_helper_data.pid = -1;
19043 +       usm_helper_data.skb_size = 0;
19044 +       usm_helper_data.pool_limit = 6;
19045 +       usm_helper_data.netlink_id = NETLINK_TOI_USM;
19046 +       usm_helper_data.name = "userspace storage manager";
19047 +       usm_helper_data.rcv_msg = usm_user_rcv_msg;
19048 +       usm_helper_data.interface_version = 2;
19049 +       usm_helper_data.must_init = 0;
19050 +       init_completion(&usm_helper_data.wait_for_process);
19051 +
19052 +       return toi_register_module(&usm_ops);
19053 +}
19054 +
19055 +void toi_usm_exit(void)
19056 +{
19057 +       toi_netlink_close_complete(&usm_helper_data);
19058 +       toi_unregister_module(&usm_ops);
19059 +}
19060 diff --git a/kernel/power/tuxonice_storage.h b/kernel/power/tuxonice_storage.h
19061 new file mode 100644
19062 index 0000000..8c6b5a7
19063 --- /dev/null
19064 +++ b/kernel/power/tuxonice_storage.h
19065 @@ -0,0 +1,45 @@
19066 +/*
19067 + * kernel/power/tuxonice_storage.h
19068 + *
19069 + * Copyright (C) 2005-2010 Nigel Cunningham (nigel at tuxonice net)
19070 + *
19071 + * This file is released under the GPLv2.
19072 + */
19073 +
19074 +#ifdef CONFIG_NET
19075 +int toi_prepare_usm(void);
19076 +void toi_cleanup_usm(void);
19077 +
19078 +int toi_activate_storage(int force);
19079 +int toi_deactivate_storage(int force);
19080 +extern int toi_usm_init(void);
19081 +extern void toi_usm_exit(void);
19082 +#else
19083 +static inline int toi_usm_init(void) { return 0; }
19084 +static inline void toi_usm_exit(void) { }
19085 +
19086 +static inline int toi_activate_storage(int force)
19087 +{
19088 +       return 0;
19089 +}
19090 +
19091 +static inline int toi_deactivate_storage(int force)
19092 +{
19093 +       return 0;
19094 +}
19095 +
19096 +static inline int toi_prepare_usm(void) { return 0; }
19097 +static inline void toi_cleanup_usm(void) { }
19098 +#endif
19099 +
19100 +enum {
19101 +       USM_MSG_BASE = 0x10,
19102 +
19103 +       /* Kernel -> Userspace */
19104 +       USM_MSG_CONNECT = 0x30,
19105 +       USM_MSG_DISCONNECT = 0x31,
19106 +       USM_MSG_SUCCESS = 0x40,
19107 +       USM_MSG_FAILED = 0x41,
19108 +
19109 +       USM_MSG_MAX,
19110 +};
19111 diff --git a/kernel/power/tuxonice_swap.c b/kernel/power/tuxonice_swap.c
19112 new file mode 100644
19113 index 0000000..a4dbceb
19114 --- /dev/null
19115 +++ b/kernel/power/tuxonice_swap.c
19116 @@ -0,0 +1,487 @@
19117 +/*
19118 + * kernel/power/tuxonice_swap.c
19119 + *
19120 + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net)
19121 + *
19122 + * Distributed under GPLv2.
19123 + *
19124 + * This file encapsulates functions for usage of swap space as a
19125 + * backing store.
19126 + */
19127 +
19128 +#include <linux/suspend.h>
19129 +#include <linux/blkdev.h>
19130 +#include <linux/swapops.h>
19131 +#include <linux/swap.h>
19132 +#include <linux/syscalls.h>
19133 +#include <linux/fs_uuid.h>
19134 +
19135 +#include "tuxonice.h"
19136 +#include "tuxonice_sysfs.h"
19137 +#include "tuxonice_modules.h"
19138 +#include "tuxonice_io.h"
19139 +#include "tuxonice_ui.h"
19140 +#include "tuxonice_extent.h"
19141 +#include "tuxonice_bio.h"
19142 +#include "tuxonice_alloc.h"
19143 +#include "tuxonice_builtin.h"
19144 +
19145 +static struct toi_module_ops toi_swapops;
19146 +
19147 +/* For swapfile automatically swapon/off'd. */
19148 +static char swapfilename[255] = "";
19149 +static int toi_swapon_status;
19150 +
19151 +/* Swap Pages */
19152 +static unsigned long swap_allocated;
19153 +
19154 +static struct sysinfo swapinfo;
19155 +
19156 +/**
19157 + * enable_swapfile: Swapon the user specified swapfile prior to hibernating.
19158 + *
19159 + * Activate the given swapfile if it wasn't already enabled. Remember whether
19160 + * we really did swapon it for swapoffing later.
19161 + */
19162 +static void enable_swapfile(void)
19163 +{
19164 +       int activateswapresult = -EINVAL;
19165 +
19166 +       if (swapfilename[0]) {
19167 +               /* Attempt to swap on with maximum priority */
19168 +               activateswapresult = sys_swapon(swapfilename, 0xFFFF);
19169 +               if (activateswapresult && activateswapresult != -EBUSY)
19170 +                       printk(KERN_ERR "TuxOnIce: The swapfile/partition "
19171 +                               "specified by /sys/power/tuxonice/swap/swapfile"
19172 +                               " (%s) could not be turned on (error %d). "
19173 +                               "Attempting to continue.\n",
19174 +                               swapfilename, activateswapresult);
19175 +               if (!activateswapresult)
19176 +                       toi_swapon_status = 1;
19177 +       }
19178 +}
19179 +
19180 +/**
19181 + * disable_swapfile: Swapoff any file swaponed at the start of the cycle.
19182 + *
19183 + * If we did successfully swapon a file at the start of the cycle, swapoff
19184 + * it now (finishing up).
19185 + */
19186 +static void disable_swapfile(void)
19187 +{
19188 +       if (!toi_swapon_status)
19189 +               return;
19190 +
19191 +       sys_swapoff(swapfilename);
19192 +       toi_swapon_status = 0;
19193 +}
19194 +
19195 +static int add_blocks_to_extent_chain(struct toi_bdev_info *chain,
19196 +               unsigned long start, unsigned long end)
19197 +{
19198 +       if (test_action_state(TOI_TEST_BIO))
19199 +               toi_message(TOI_IO, TOI_VERBOSE, 0, "Adding extent %lu-%lu to "
19200 +                               "chain %p.", start << chain->bmap_shift,
19201 +                               end << chain->bmap_shift, chain);
19202 +
19203 +       return toi_add_to_extent_chain(&chain->blocks, start, end);
19204 +}
19205 +
19206 +
19207 +static int get_main_pool_phys_params(struct toi_bdev_info *chain)
19208 +{
19209 +       struct hibernate_extent *extentpointer = NULL;
19210 +       unsigned long address, extent_min = 0, extent_max = 0;
19211 +       int empty = 1;
19212 +
19213 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "get main pool phys params for "
19214 +                       "chain %d.", chain->allocator_index);
19215 +
19216 +       if (!chain->allocations.first)
19217 +               return 0;
19218 +
19219 +       if (chain->blocks.first)
19220 +               toi_put_extent_chain(&chain->blocks);
19221 +
19222 +       toi_extent_for_each(&chain->allocations, extentpointer, address) {
19223 +               swp_entry_t swap_address = (swp_entry_t) { address };
19224 +               struct block_device *bdev;
19225 +               sector_t new_sector = map_swap_entry(swap_address, &bdev);
19226 +
19227 +               if (empty) {
19228 +                       empty = 0;
19229 +                       extent_min = extent_max = new_sector;
19230 +                       continue;
19231 +               }
19232 +
19233 +               if (new_sector == extent_max + 1) {
19234 +                       extent_max++;
19235 +                       continue;
19236 +               }
19237 +
19238 +               if (add_blocks_to_extent_chain(chain, extent_min, extent_max)) {
19239 +                       printk(KERN_ERR "Out of memory while making block "
19240 +                                       "chains.\n");
19241 +                       return -ENOMEM;
19242 +               }
19243 +
19244 +               extent_min = new_sector;
19245 +               extent_max = new_sector;
19246 +       }
19247 +
19248 +       if (!empty &&
19249 +           add_blocks_to_extent_chain(chain, extent_min, extent_max)) {
19250 +               printk(KERN_ERR "Out of memory while making block chains.\n");
19251 +               return -ENOMEM;
19252 +       }
19253 +
19254 +       return 0;
19255 +}
19256 +
19257 +/*
19258 + * Like si_swapinfo, except that we don't include ram backed swap (compcache!)
19259 + * and don't need to use the spinlocks (userspace is stopped when this
19260 + * function is called).
19261 + */
19262 +void si_swapinfo_no_compcache(void)
19263 +{
19264 +       unsigned int i;
19265 +
19266 +       si_swapinfo(&swapinfo);
19267 +       swapinfo.freeswap = 0;
19268 +       swapinfo.totalswap = 0;
19269 +
19270 +       for (i = 0; i < MAX_SWAPFILES; i++) {
19271 +               struct swap_info_struct *si = get_swap_info_struct(i);
19272 +               if (si && (si->flags & SWP_WRITEOK) &&
19273 +                   (strncmp(si->bdev->bd_disk->disk_name, "ram", 3))) {
19274 +                       swapinfo.totalswap += si->inuse_pages;
19275 +                       swapinfo.freeswap += si->pages - si->inuse_pages;
19276 +               }
19277 +       }
19278 +}
19279 +/*
19280 + * We can't just remember the value from allocation time, because other
19281 + * processes might have allocated swap in the mean time.
19282 + */
19283 +static unsigned long toi_swap_storage_available(void)
19284 +{
19285 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "In toi_swap_storage_available.");
19286 +       si_swapinfo_no_compcache();
19287 +       return swapinfo.freeswap + swap_allocated;
19288 +}
19289 +
19290 +static int toi_swap_initialise(int starting_cycle)
19291 +{
19292 +       if (!starting_cycle)
19293 +               return 0;
19294 +
19295 +       enable_swapfile();
19296 +       return 0;
19297 +}
19298 +
19299 +static void toi_swap_cleanup(int ending_cycle)
19300 +{
19301 +       if (ending_cycle)
19302 +               disable_swapfile();
19303 +}
19304 +
19305 +static void toi_swap_free_storage(struct toi_bdev_info *chain)
19306 +{
19307 +       /* Free swap entries */
19308 +       struct hibernate_extent *extentpointer;
19309 +       unsigned long extentvalue;
19310 +
19311 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "Freeing storage for chain %p.",
19312 +                       chain);
19313 +
19314 +       swap_allocated -= chain->allocations.size;
19315 +       toi_extent_for_each(&chain->allocations, extentpointer, extentvalue)
19316 +               swap_free((swp_entry_t) { extentvalue });
19317 +
19318 +       toi_put_extent_chain(&chain->allocations);
19319 +}
19320 +
19321 +static void free_swap_range(unsigned long min, unsigned long max)
19322 +{
19323 +       int j;
19324 +
19325 +       for (j = min; j <= max; j++)
19326 +               swap_free((swp_entry_t) { j });
19327 +       swap_allocated -= (max - min + 1);
19328 +}
19329 +
19330 +/*
19331 + * Allocation of a single swap type. Swap priorities are handled at the higher
19332 + * level.
19333 + */
19334 +static int toi_swap_allocate_storage(struct toi_bdev_info *chain,
19335 +               unsigned long request)
19336 +{
19337 +       int to_add = 0;
19338 +       unsigned long gotten = 0;
19339 +       unsigned long extent_min = 0, extent_max = 0;
19340 +
19341 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "  Swap allocate storage: Asked to"
19342 +                       " allocate %lu pages from device %d.", request,
19343 +                       chain->allocator_index);
19344 +
19345 +       while (gotten < request) {
19346 +               swp_entry_t entry;
19347 +               unsigned long new_value;
19348 +
19349 +               entry = get_swap_page_of_type(chain->allocator_index);
19350 +               if (!entry.val)
19351 +                       break;
19352 +
19353 +               swap_allocated++;
19354 +               new_value = entry.val;
19355 +               gotten++;
19356 +
19357 +               if (!to_add) {
19358 +                       to_add = 1;
19359 +                       extent_min = new_value;
19360 +                       extent_max = new_value;
19361 +                       continue;
19362 +               }
19363 +
19364 +               if (new_value == extent_max + 1) {
19365 +                       extent_max++;
19366 +                       continue;
19367 +               }
19368 +
19369 +               if (toi_add_to_extent_chain(&chain->allocations, extent_min,
19370 +                                       extent_max)) {
19371 +                       printk(KERN_INFO "Failed to allocate extent for "
19372 +                                       "%lu-%lu.\n", extent_min, extent_max);
19373 +                       free_swap_range(extent_min, extent_max);
19374 +                       swap_free(entry);
19375 +                       gotten -= (extent_max - extent_min);
19376 +                       /* Don't try to add again below */
19377 +                       to_add = 0;
19378 +                       break;
19379 +               }
19380 +
19381 +               extent_min = new_value;
19382 +               extent_max = new_value;
19383 +       }
19384 +
19385 +       if (to_add) {
19386 +               int this_result = toi_add_to_extent_chain(&chain->allocations,
19387 +                               extent_min, extent_max);
19388 +
19389 +               if (this_result) {
19390 +                       free_swap_range(extent_min, extent_max);
19391 +                       gotten -= (extent_max - extent_min + 1);
19392 +               }
19393 +       }
19394 +
19395 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "  Allocated %lu pages.", gotten);
19396 +       return gotten;
19397 +}
19398 +
19399 +static int toi_swap_register_storage(void)
19400 +{
19401 +       int i, result = 0;
19402 +
19403 +       toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_swap_register_storage.");
19404 +       for (i = 0; i < MAX_SWAPFILES; i++) {
19405 +               struct swap_info_struct *si = get_swap_info_struct(i);
19406 +               struct toi_bdev_info *devinfo;
19407 +               unsigned char *p;
19408 +               unsigned char buf[256];
19409 +               struct fs_info *fs_info;
19410 +
19411 +               if (!si || !(si->flags & SWP_WRITEOK) ||
19412 +                   !strncmp(si->bdev->bd_disk->disk_name, "ram", 3))
19413 +                       continue;
19414 +
19415 +               devinfo = toi_kzalloc(39, sizeof(struct toi_bdev_info),
19416 +                               GFP_ATOMIC);
19417 +               if (!devinfo) {
19418 +                       printk("Failed to allocate devinfo struct for swap "
19419 +                                       "device %d.\n", i);
19420 +                       return -ENOMEM;
19421 +               }
19422 +
19423 +               devinfo->bdev = si->bdev;
19424 +               devinfo->allocator = &toi_swapops;
19425 +               devinfo->allocator_index = i;
19426 +
19427 +               fs_info = fs_info_from_block_dev(si->bdev);
19428 +               if (fs_info && !IS_ERR(fs_info)) {
19429 +                       memcpy(devinfo->uuid, &fs_info->uuid, 16);
19430 +                       free_fs_info(fs_info);
19431 +               } else
19432 +                       result = (int) PTR_ERR(fs_info);
19433 +
19434 +               if (!fs_info)
19435 +                       printk("fs_info from block dev returned %d.\n", result);
19436 +               devinfo->dev_t = si->bdev->bd_dev;
19437 +               devinfo->prio = si->prio;
19438 +               devinfo->bmap_shift = 3;
19439 +               devinfo->blocks_per_page = 1;
19440 +
19441 +               p = d_path(&si->swap_file->f_path, buf, sizeof(buf));
19442 +               sprintf(devinfo->name, "swap on %s", p);
19443 +
19444 +               toi_message(TOI_IO, TOI_VERBOSE, 0, "Registering swap storage:"
19445 +                               " Device %d (%lx), prio %d.", i,
19446 +                               (unsigned long) devinfo->dev_t, devinfo->prio);
19447 +               toi_bio_ops.register_storage(devinfo);
19448 +       }
19449 +
19450 +       return 0;
19451 +}
19452 +
19453 +/*
19454 + * workspace_size
19455 + *
19456 + * Description:
19457 + * Returns the number of bytes of RAM needed for this
19458 + * code to do its work. (Used when calculating whether
19459 + * we have enough memory to be able to hibernate & resume).
19460 + *
19461 + */
19462 +static int toi_swap_memory_needed(void)
19463 +{
19464 +       return 1;
19465 +}
19466 +
19467 +/*
19468 + * Print debug info
19469 + *
19470 + * Description:
19471 + */
19472 +static int toi_swap_print_debug_stats(char *buffer, int size)
19473 +{
19474 +       int len = 0;
19475 +
19476 +       len = scnprintf(buffer, size, "- Swap Allocator enabled.\n");
19477 +       if (swapfilename[0])
19478 +               len += scnprintf(buffer+len, size-len,
19479 +                       "  Attempting to automatically swapon: %s.\n",
19480 +                       swapfilename);
19481 +
19482 +       si_swapinfo_no_compcache();
19483 +
19484 +       len += scnprintf(buffer+len, size-len,
19485 +                       "  Swap available for image: %lu pages.\n",
19486 +                       swapinfo.freeswap + swap_allocated);
19487 +
19488 +       return len;
19489 +}
19490 +
19491 +static int header_locations_read_sysfs(const char *page, int count)
19492 +{
19493 +       int i, printedpartitionsmessage = 0, len = 0, haveswap = 0;
19494 +       struct inode *swapf = NULL;
19495 +       int zone;
19496 +       char *path_page = (char *) toi_get_free_page(10, GFP_KERNEL);
19497 +       char *path, *output = (char *) page;
19498 +       int path_len;
19499 +
19500 +       if (!page)
19501 +               return 0;
19502 +
19503 +       for (i = 0; i < MAX_SWAPFILES; i++) {
19504 +               struct swap_info_struct *si =  get_swap_info_struct(i);
19505 +
19506 +               if (!si || !(si->flags & SWP_WRITEOK))
19507 +                       continue;
19508 +
19509 +               if (S_ISBLK(si->swap_file->f_mapping->host->i_mode)) {
19510 +                       haveswap = 1;
19511 +                       if (!printedpartitionsmessage) {
19512 +                               len += sprintf(output + len,
19513 +                                       "For swap partitions, simply use the "
19514 +                                       "format: resume=swap:/dev/hda1.\n");
19515 +                               printedpartitionsmessage = 1;
19516 +                       }
19517 +               } else {
19518 +                       path_len = 0;
19519 +
19520 +                       path = d_path(&si->swap_file->f_path, path_page,
19521 +                                       PAGE_SIZE);
19522 +                       path_len = snprintf(path_page, PAGE_SIZE, "%s", path);
19523 +
19524 +                       haveswap = 1;
19525 +                       swapf = si->swap_file->f_mapping->host;
19526 +                       zone = bmap(swapf, 0);
19527 +                       if (!zone) {
19528 +                               len += sprintf(output + len,
19529 +                                       "Swapfile %s has been corrupted. Reuse"
19530 +                                       " mkswap on it and try again.\n",
19531 +                                       path_page);
19532 +                       } else {
19533 +                               char name_buffer[BDEVNAME_SIZE];
19534 +                               len += sprintf(output + len,
19535 +                                       "For swapfile `%s`,"
19536 +                                       " use resume=swap:/dev/%s:0x%x.\n",
19537 +                                       path_page,
19538 +                                       bdevname(si->bdev, name_buffer),
19539 +                                       zone << (swapf->i_blkbits - 9));
19540 +                       }
19541 +               }
19542 +       }
19543 +
19544 +       if (!haveswap)
19545 +               len = sprintf(output, "You need to turn on swap partitions "
19546 +                               "before examining this file.\n");
19547 +
19548 +       toi_free_page(10, (unsigned long) path_page);
19549 +       return len;
19550 +}
19551 +
19552 +static struct toi_sysfs_data sysfs_params[] = {
19553 +       SYSFS_STRING("swapfilename", SYSFS_RW, swapfilename, 255, 0, NULL),
19554 +       SYSFS_CUSTOM("headerlocations", SYSFS_READONLY,
19555 +                       header_locations_read_sysfs, NULL, 0, NULL),
19556 +       SYSFS_INT("enabled", SYSFS_RW, &toi_swapops.enabled, 0, 1, 0,
19557 +                       attempt_to_parse_resume_device2),
19558 +};
19559 +
19560 +static struct toi_bio_allocator_ops toi_bio_swapops = {
19561 +       .register_storage                       = toi_swap_register_storage,
19562 +       .storage_available                      = toi_swap_storage_available,
19563 +       .allocate_storage                       = toi_swap_allocate_storage,
19564 +       .bmap                                   = get_main_pool_phys_params,
19565 +       .free_storage                           = toi_swap_free_storage,
19566 +};
19567 +
19568 +static struct toi_module_ops toi_swapops = {
19569 +       .type                                   = BIO_ALLOCATOR_MODULE,
19570 +       .name                                   = "swap storage",
19571 +       .directory                              = "swap",
19572 +       .module                                 = THIS_MODULE,
19573 +       .memory_needed                          = toi_swap_memory_needed,
19574 +       .print_debug_info                       = toi_swap_print_debug_stats,
19575 +       .initialise                             = toi_swap_initialise,
19576 +       .cleanup                                = toi_swap_cleanup,
19577 +       .bio_allocator_ops                      = &toi_bio_swapops,
19578 +
19579 +       .sysfs_data             = sysfs_params,
19580 +       .num_sysfs_entries      = sizeof(sysfs_params) /
19581 +               sizeof(struct toi_sysfs_data),
19582 +};
19583 +
19584 +/* ---- Registration ---- */
19585 +static __init int toi_swap_load(void)
19586 +{
19587 +       return toi_register_module(&toi_swapops);
19588 +}
19589 +
19590 +#ifdef MODULE
19591 +static __exit void toi_swap_unload(void)
19592 +{
19593 +       toi_unregister_module(&toi_swapops);
19594 +}
19595 +
19596 +module_init(toi_swap_load);
19597 +module_exit(toi_swap_unload);
19598 +MODULE_LICENSE("GPL");
19599 +MODULE_AUTHOR("Nigel Cunningham");
19600 +MODULE_DESCRIPTION("TuxOnIce SwapAllocator");
19601 +#else
19602 +late_initcall(toi_swap_load);
19603 +#endif
19604 diff --git a/kernel/power/tuxonice_sysfs.c b/kernel/power/tuxonice_sysfs.c
19605 new file mode 100644
19606 index 0000000..0088409
19607 --- /dev/null
19608 +++ b/kernel/power/tuxonice_sysfs.c
19609 @@ -0,0 +1,335 @@
19610 +/*
19611 + * kernel/power/tuxonice_sysfs.c
19612 + *
19613 + * Copyright (C) 2002-2010 Nigel Cunningham (nigel at tuxonice net)
19614 + *
19615 + * This file is released under the GPLv2.
19616 + *
19617 + * This file contains support for sysfs entries for tuning TuxOnIce.
19618 + *
19619 + * We have a generic handler that deals with the most common cases, and
19620 + * hooks for special handlers to use.
19621 + */
19622 +
19623 +#include <linux/suspend.h>
19624 +
19625 +#include "tuxonice_sysfs.h"
19626 +#include "tuxonice.h"
19627 +#include "tuxonice_storage.h"
19628 +#include "tuxonice_alloc.h"
19629 +
19630 +static int toi_sysfs_initialised;
19631 +
19632 +static void toi_initialise_sysfs(void);
19633 +
19634 +static struct toi_sysfs_data sysfs_params[];
19635 +
19636 +#define to_sysfs_data(_attr) container_of(_attr, struct toi_sysfs_data, attr)
19637 +
19638 +static void toi_main_wrapper(void)
19639 +{
19640 +       toi_try_hibernate();
19641 +}
19642 +
19643 +static ssize_t toi_attr_show(struct kobject *kobj, struct attribute *attr,
19644 +                             char *page)
19645 +{
19646 +       struct toi_sysfs_data *sysfs_data = to_sysfs_data(attr);
19647 +       int len = 0;
19648 +       int full_prep = sysfs_data->flags & SYSFS_NEEDS_SM_FOR_READ;
19649 +
19650 +       if (full_prep && toi_start_anything(0))
19651 +               return -EBUSY;
19652 +
19653 +       if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_READ)
19654 +               toi_prepare_usm();
19655 +
19656 +       switch (sysfs_data->type) {
19657 +       case TOI_SYSFS_DATA_CUSTOM:
19658 +               len = (sysfs_data->data.special.read_sysfs) ?
19659 +                       (sysfs_data->data.special.read_sysfs)(page, PAGE_SIZE)
19660 +                       : 0;
19661 +               break;
19662 +       case TOI_SYSFS_DATA_BIT:
19663 +               len = sprintf(page, "%d\n",
19664 +                       -test_bit(sysfs_data->data.bit.bit,
19665 +                               sysfs_data->data.bit.bit_vector));
19666 +               break;
19667 +       case TOI_SYSFS_DATA_INTEGER:
19668 +               len = sprintf(page, "%d\n",
19669 +                       *(sysfs_data->data.integer.variable));
19670 +               break;
19671 +       case TOI_SYSFS_DATA_LONG:
19672 +               len = sprintf(page, "%ld\n",
19673 +                       *(sysfs_data->data.a_long.variable));
19674 +               break;
19675 +       case TOI_SYSFS_DATA_UL:
19676 +               len = sprintf(page, "%lu\n",
19677 +                       *(sysfs_data->data.ul.variable));
19678 +               break;
19679 +       case TOI_SYSFS_DATA_STRING:
19680 +               len = sprintf(page, "%s\n",
19681 +                       sysfs_data->data.string.variable);
19682 +               break;
19683 +       }
19684 +
19685 +       if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_READ)
19686 +               toi_cleanup_usm();
19687 +
19688 +       if (full_prep)
19689 +               toi_finish_anything(0);
19690 +
19691 +       return len;
19692 +}
19693 +
19694 +#define BOUND(_variable, _type) do { \
19695 +       if (*_variable < sysfs_data->data._type.minimum) \
19696 +               *_variable = sysfs_data->data._type.minimum; \
19697 +       else if (*_variable > sysfs_data->data._type.maximum) \
19698 +               *_variable = sysfs_data->data._type.maximum; \
19699 +} while (0)
19700 +
19701 +static ssize_t toi_attr_store(struct kobject *kobj, struct attribute *attr,
19702 +               const char *my_buf, size_t count)
19703 +{
19704 +       int assigned_temp_buffer = 0, result = count;
19705 +       struct toi_sysfs_data *sysfs_data = to_sysfs_data(attr);
19706 +
19707 +       if (toi_start_anything((sysfs_data->flags & SYSFS_HIBERNATE_OR_RESUME)))
19708 +               return -EBUSY;
19709 +
19710 +       ((char *) my_buf)[count] = 0;
19711 +
19712 +       if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_WRITE)
19713 +               toi_prepare_usm();
19714 +
19715 +       switch (sysfs_data->type) {
19716 +       case TOI_SYSFS_DATA_CUSTOM:
19717 +               if (sysfs_data->data.special.write_sysfs)
19718 +                       result = (sysfs_data->data.special.write_sysfs)(my_buf,
19719 +                                       count);
19720 +               break;
19721 +       case TOI_SYSFS_DATA_BIT:
19722 +               {
19723 +               unsigned long value;
19724 +               result = strict_strtoul(my_buf, 0, &value);
19725 +               if (result)
19726 +                       break;
19727 +               if (value)
19728 +                       set_bit(sysfs_data->data.bit.bit,
19729 +                               (sysfs_data->data.bit.bit_vector));
19730 +               else
19731 +                       clear_bit(sysfs_data->data.bit.bit,
19732 +                               (sysfs_data->data.bit.bit_vector));
19733 +               }
19734 +               break;
19735 +       case TOI_SYSFS_DATA_INTEGER:
19736 +               {
19737 +                       long temp;
19738 +                       result = strict_strtol(my_buf, 0, &temp);
19739 +                       if (result)
19740 +                               break;
19741 +                       *(sysfs_data->data.integer.variable) = (int) temp;
19742 +                       BOUND(sysfs_data->data.integer.variable, integer);
19743 +                       break;
19744 +               }
19745 +       case TOI_SYSFS_DATA_LONG:
19746 +               {
19747 +                       long *variable =
19748 +                               sysfs_data->data.a_long.variable;
19749 +                       result = strict_strtol(my_buf, 0, variable);
19750 +                       if (result)
19751 +                               break;
19752 +                       BOUND(variable, a_long);
19753 +                       break;
19754 +               }
19755 +       case TOI_SYSFS_DATA_UL:
19756 +               {
19757 +                       unsigned long *variable =
19758 +                               sysfs_data->data.ul.variable;
19759 +                       result = strict_strtoul(my_buf, 0, variable);
19760 +                       if (result)
19761 +                               break;
19762 +                       BOUND(variable, ul);
19763 +                       break;
19764 +               }
19765 +               break;
19766 +       case TOI_SYSFS_DATA_STRING:
19767 +               {
19768 +                       int copy_len = count;
19769 +                       char *variable =
19770 +                               sysfs_data->data.string.variable;
19771 +
19772 +                       if (sysfs_data->data.string.max_length &&
19773 +                           (copy_len > sysfs_data->data.string.max_length))
19774 +                               copy_len = sysfs_data->data.string.max_length;
19775 +
19776 +                       if (!variable) {
19777 +                               variable = (char *) toi_get_zeroed_page(31,
19778 +                                               TOI_ATOMIC_GFP);
19779 +                               sysfs_data->data.string.variable = variable;
19780 +                               assigned_temp_buffer = 1;
19781 +                       }
19782 +                       strncpy(variable, my_buf, copy_len);
19783 +                       if (copy_len && my_buf[copy_len - 1] == '\n')
19784 +                               variable[count - 1] = 0;
19785 +                       variable[count] = 0;
19786 +               }
19787 +               break;
19788 +       }
19789 +
19790 +       if (!result)
19791 +               result = count;
19792 +
19793 +       /* Side effect routine? */
19794 +       if (result == count && sysfs_data->write_side_effect)
19795 +               sysfs_data->write_side_effect();
19796 +
19797 +       /* Free temporary buffers */
19798 +       if (assigned_temp_buffer) {
19799 +               toi_free_page(31,
19800 +                       (unsigned long) sysfs_data->data.string.variable);
19801 +               sysfs_data->data.string.variable = NULL;
19802 +       }
19803 +
19804 +       if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_WRITE)
19805 +               toi_cleanup_usm();
19806 +
19807 +       toi_finish_anything(sysfs_data->flags & SYSFS_HIBERNATE_OR_RESUME);
19808 +
19809 +       return result;
19810 +}
19811 +
19812 +static struct sysfs_ops toi_sysfs_ops = {
19813 +       .show   = &toi_attr_show,
19814 +       .store  = &toi_attr_store,
19815 +};
19816 +
19817 +static struct kobj_type toi_ktype = {
19818 +       .sysfs_ops      = &toi_sysfs_ops,
19819 +};
19820 +
19821 +struct kobject *tuxonice_kobj;
19822 +
19823 +/* Non-module sysfs entries.
19824 + *
19825 + * This array contains entries that are automatically registered at
19826 + * boot. Modules and the console code register their own entries separately.
19827 + */
19828 +
19829 +static struct toi_sysfs_data sysfs_params[] = {
19830 +       SYSFS_CUSTOM("do_hibernate", SYSFS_WRITEONLY, NULL, NULL,
19831 +               SYSFS_HIBERNATING, toi_main_wrapper),
19832 +       SYSFS_CUSTOM("do_resume", SYSFS_WRITEONLY, NULL, NULL,
19833 +               SYSFS_RESUMING, toi_try_resume)
19834 +};
19835 +
19836 +void remove_toi_sysdir(struct kobject *kobj)
19837 +{
19838 +       if (!kobj)
19839 +               return;
19840 +
19841 +       kobject_put(kobj);
19842 +}
19843 +
19844 +struct kobject *make_toi_sysdir(char *name)
19845 +{
19846 +       struct kobject *kobj = kobject_create_and_add(name, tuxonice_kobj);
19847 +
19848 +       if (!kobj) {
19849 +               printk(KERN_INFO "TuxOnIce: Can't allocate kobject for sysfs "
19850 +                               "dir!\n");
19851 +               return NULL;
19852 +       }
19853 +
19854 +       kobj->ktype = &toi_ktype;
19855 +
19856 +       return kobj;
19857 +}
19858 +
19859 +/* toi_register_sysfs_file
19860 + *
19861 + * Helper for registering a new /sysfs/tuxonice entry.
19862 + */
19863 +
19864 +int toi_register_sysfs_file(
19865 +               struct kobject *kobj,
19866 +               struct toi_sysfs_data *toi_sysfs_data)
19867 +{
19868 +       int result;
19869 +
19870 +       if (!toi_sysfs_initialised)
19871 +               toi_initialise_sysfs();
19872 +
19873 +       result = sysfs_create_file(kobj, &toi_sysfs_data->attr);
19874 +       if (result)
19875 +               printk(KERN_INFO "TuxOnIce: sysfs_create_file for %s "
19876 +                       "returned %d.\n",
19877 +                       toi_sysfs_data->attr.name, result);
19878 +       kobj->ktype = &toi_ktype;
19879 +
19880 +       return result;
19881 +}
19882 +EXPORT_SYMBOL_GPL(toi_register_sysfs_file);
19883 +
19884 +/* toi_unregister_sysfs_file
19885 + *
19886 + * Helper for removing unwanted /sys/power/tuxonice entries.
19887 + *
19888 + */
19889 +void toi_unregister_sysfs_file(struct kobject *kobj,
19890 +               struct toi_sysfs_data *toi_sysfs_data)
19891 +{
19892 +       sysfs_remove_file(kobj, &toi_sysfs_data->attr);
19893 +}
19894 +EXPORT_SYMBOL_GPL(toi_unregister_sysfs_file);
19895 +
19896 +void toi_cleanup_sysfs(void)
19897 +{
19898 +       int i,
19899 +           numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data);
19900 +
19901 +       if (!toi_sysfs_initialised)
19902 +               return;
19903 +
19904 +       for (i = 0; i < numfiles; i++)
19905 +               toi_unregister_sysfs_file(tuxonice_kobj, &sysfs_params[i]);
19906 +
19907 +       kobject_put(tuxonice_kobj);
19908 +       toi_sysfs_initialised = 0;
19909 +}
19910 +
19911 +/* toi_initialise_sysfs
19912 + *
19913 + * Initialise the /sysfs/tuxonice directory.
19914 + */
19915 +
19916 +static void toi_initialise_sysfs(void)
19917 +{
19918 +       int i;
19919 +       int numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data);
19920 +
19921 +       if (toi_sysfs_initialised)
19922 +               return;
19923 +
19924 +       /* Make our TuxOnIce directory a child of /sys/power */
19925 +       tuxonice_kobj = kobject_create_and_add("tuxonice", power_kobj);
19926 +       if (!tuxonice_kobj)
19927 +               return;
19928 +
19929 +       toi_sysfs_initialised = 1;
19930 +
19931 +       for (i = 0; i < numfiles; i++)
19932 +               toi_register_sysfs_file(tuxonice_kobj, &sysfs_params[i]);
19933 +}
19934 +
19935 +int toi_sysfs_init(void)
19936 +{
19937 +       toi_initialise_sysfs();
19938 +       return 0;
19939 +}
19940 +
19941 +void toi_sysfs_exit(void)
19942 +{
19943 +       toi_cleanup_sysfs();
19944 +}
19945 diff --git a/kernel/power/tuxonice_sysfs.h b/kernel/power/tuxonice_sysfs.h
19946 new file mode 100644
19947 index 0000000..4185c6d
19948 --- /dev/null
19949 +++ b/kernel/power/tuxonice_sysfs.h
19950 @@ -0,0 +1,137 @@
19951 +/*
19952 + * kernel/power/tuxonice_sysfs.h
19953 + *
19954 + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net)
19955 + *
19956 + * This file is released under the GPLv2.
19957 + */
19958 +
19959 +#include <linux/sysfs.h>
19960 +
19961 +struct toi_sysfs_data {
19962 +       struct attribute attr;
19963 +       int type;
19964 +       int flags;
19965 +       union {
19966 +               struct {
19967 +                       unsigned long *bit_vector;
19968 +                       int bit;
19969 +               } bit;
19970 +               struct {
19971 +                       int *variable;
19972 +                       int minimum;
19973 +                       int maximum;
19974 +               } integer;
19975 +               struct {
19976 +                       long *variable;
19977 +                       long minimum;
19978 +                       long maximum;
19979 +               } a_long;
19980 +               struct {
19981 +                       unsigned long *variable;
19982 +                       unsigned long minimum;
19983 +                       unsigned long maximum;
19984 +               } ul;
19985 +               struct {
19986 +                       char *variable;
19987 +                       int max_length;
19988 +               } string;
19989 +               struct {
19990 +                       int (*read_sysfs) (const char *buffer, int count);
19991 +                       int (*write_sysfs) (const char *buffer, int count);
19992 +                       void *data;
19993 +               } special;
19994 +       } data;
19995 +
19996 +       /* Side effects routine. Used, eg, for reparsing the
19997 +        * resume= entry when it changes */
19998 +       void (*write_side_effect) (void);
19999 +       struct list_head sysfs_data_list;
20000 +};
20001 +
20002 +enum {
20003 +       TOI_SYSFS_DATA_NONE = 1,
20004 +       TOI_SYSFS_DATA_CUSTOM,
20005 +       TOI_SYSFS_DATA_BIT,
20006 +       TOI_SYSFS_DATA_INTEGER,
20007 +       TOI_SYSFS_DATA_UL,
20008 +       TOI_SYSFS_DATA_LONG,
20009 +       TOI_SYSFS_DATA_STRING
20010 +};
20011 +
20012 +#define SYSFS_WRITEONLY 0200
20013 +#define SYSFS_READONLY 0444
20014 +#define SYSFS_RW 0644
20015 +
20016 +#define SYSFS_BIT(_name, _mode, _ul, _bit, _flags) { \
20017 +       .attr = {.name  = _name , .mode   = _mode }, \
20018 +       .type = TOI_SYSFS_DATA_BIT, \
20019 +       .flags = _flags, \
20020 +       .data = { .bit = { .bit_vector = _ul, .bit = _bit } } }
20021 +
20022 +#define SYSFS_INT(_name, _mode, _int, _min, _max, _flags, _wse) { \
20023 +       .attr = {.name  = _name , .mode   = _mode }, \
20024 +       .type = TOI_SYSFS_DATA_INTEGER, \
20025 +       .flags = _flags, \
20026 +       .data = { .integer = { .variable = _int, .minimum = _min, \
20027 +                       .maximum = _max } }, \
20028 +       .write_side_effect = _wse }
20029 +
20030 +#define SYSFS_UL(_name, _mode, _ul, _min, _max, _flags) { \
20031 +       .attr = {.name  = _name , .mode   = _mode }, \
20032 +       .type = TOI_SYSFS_DATA_UL, \
20033 +       .flags = _flags, \
20034 +       .data = { .ul = { .variable = _ul, .minimum = _min, \
20035 +                       .maximum = _max } } }
20036 +
20037 +#define SYSFS_LONG(_name, _mode, _long, _min, _max, _flags) { \
20038 +       .attr = {.name  = _name , .mode   = _mode }, \
20039 +       .type = TOI_SYSFS_DATA_LONG, \
20040 +       .flags = _flags, \
20041 +       .data = { .a_long = { .variable = _long, .minimum = _min, \
20042 +                       .maximum = _max } } }
20043 +
20044 +#define SYSFS_STRING(_name, _mode, _string, _max_len, _flags, _wse) { \
20045 +       .attr = {.name  = _name , .mode   = _mode }, \
20046 +       .type = TOI_SYSFS_DATA_STRING, \
20047 +       .flags = _flags, \
20048 +       .data = { .string = { .variable = _string, .max_length = _max_len } }, \
20049 +       .write_side_effect = _wse }
20050 +
20051 +#define SYSFS_CUSTOM(_name, _mode, _read, _write, _flags, _wse) { \
20052 +       .attr = {.name  = _name , .mode   = _mode }, \
20053 +       .type = TOI_SYSFS_DATA_CUSTOM, \
20054 +       .flags = _flags, \
20055 +       .data = { .special = { .read_sysfs = _read, .write_sysfs = _write } }, \
20056 +       .write_side_effect = _wse }
20057 +
20058 +#define SYSFS_NONE(_name, _wse) { \
20059 +       .attr = {.name  = _name , .mode   = SYSFS_WRITEONLY }, \
20060 +       .type = TOI_SYSFS_DATA_NONE, \
20061 +       .write_side_effect = _wse, \
20062 +}
20063 +
20064 +/* Flags */
20065 +#define SYSFS_NEEDS_SM_FOR_READ 1
20066 +#define SYSFS_NEEDS_SM_FOR_WRITE 2
20067 +#define SYSFS_HIBERNATE 4
20068 +#define SYSFS_RESUME 8
20069 +#define SYSFS_HIBERNATE_OR_RESUME (SYSFS_HIBERNATE | SYSFS_RESUME)
20070 +#define SYSFS_HIBERNATING (SYSFS_HIBERNATE | SYSFS_NEEDS_SM_FOR_WRITE)
20071 +#define SYSFS_RESUMING (SYSFS_RESUME | SYSFS_NEEDS_SM_FOR_WRITE)
20072 +#define SYSFS_NEEDS_SM_FOR_BOTH \
20073 + (SYSFS_NEEDS_SM_FOR_READ | SYSFS_NEEDS_SM_FOR_WRITE)
20074 +
20075 +int toi_register_sysfs_file(struct kobject *kobj,
20076 +               struct toi_sysfs_data *toi_sysfs_data);
20077 +void toi_unregister_sysfs_file(struct kobject *kobj,
20078 +               struct toi_sysfs_data *toi_sysfs_data);
20079 +
20080 +extern struct kobject *tuxonice_kobj;
20081 +
20082 +struct kobject *make_toi_sysdir(char *name);
20083 +void remove_toi_sysdir(struct kobject *obj);
20084 +extern void toi_cleanup_sysfs(void);
20085 +
20086 +extern int toi_sysfs_init(void);
20087 +extern void toi_sysfs_exit(void);
20088 diff --git a/kernel/power/tuxonice_ui.c b/kernel/power/tuxonice_ui.c
20089 new file mode 100644
20090 index 0000000..b0b3b40
20091 --- /dev/null
20092 +++ b/kernel/power/tuxonice_ui.c
20093 @@ -0,0 +1,250 @@
20094 +/*
20095 + * kernel/power/tuxonice_ui.c
20096 + *
20097 + * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
20098 + * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz>
20099 + * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr>
20100 + * Copyright (C) 2002-2010 Nigel Cunningham (nigel at tuxonice net)
20101 + *
20102 + * This file is released under the GPLv2.
20103 + *
20104 + * Routines for TuxOnIce's user interface.
20105 + *
20106 + * The user interface code talks to a userspace program via a
20107 + * netlink socket.
20108 + *
20109 + * The kernel side:
20110 + * - starts the userui program;
20111 + * - sends text messages and progress bar status;
20112 + *
20113 + * The user space side:
20114 + * - passes messages regarding user requests (abort, toggle reboot etc)
20115 + *
20116 + */
20117 +
20118 +#define __KERNEL_SYSCALLS__
20119 +
20120 +#include <linux/reboot.h>
20121 +
20122 +#include "tuxonice_sysfs.h"
20123 +#include "tuxonice_modules.h"
20124 +#include "tuxonice.h"
20125 +#include "tuxonice_ui.h"
20126 +#include "tuxonice_netlink.h"
20127 +#include "tuxonice_power_off.h"
20128 +#include "tuxonice_builtin.h"
20129 +
20130 +static char local_printf_buf[1024];    /* Same as printk - should be safe */
20131 +struct ui_ops *toi_current_ui;
20132 +EXPORT_SYMBOL_GPL(toi_current_ui);
20133 +
20134 +/**
20135 + * toi_wait_for_keypress - Wait for keypress via userui or /dev/console.
20136 + *
20137 + * @timeout: Maximum time to wait.
20138 + *
20139 + * Wait for a keypress, either from userui or /dev/console if userui isn't
20140 + * available. The non-userui path is particularly for at boot-time, prior
20141 + * to userui being started, when we have an important warning to give to
20142 + * the user.
20143 + */
20144 +static char toi_wait_for_keypress(int timeout)
20145 +{
20146 +       if (toi_current_ui && toi_current_ui->wait_for_key(timeout))
20147 +               return ' ';
20148 +
20149 +       return toi_wait_for_keypress_dev_console(timeout);
20150 +}
20151 +
20152 +/* toi_early_boot_message()
20153 + * Description:        Handle errors early in the process of booting.
20154 + *             The user may press C to continue booting, perhaps
20155 + *             invalidating the image,  or space to reboot.
20156 + *             This works from either the serial console or normally
20157 + *             attached keyboard.
20158 + *
20159 + *             Note that we come in here from init, while the kernel is
20160 + *             locked. If we want to get events from the serial console,
20161 + *             we need to temporarily unlock the kernel.
20162 + *
20163 + *             toi_early_boot_message may also be called post-boot.
20164 + *             In this case, it simply printks the message and returns.
20165 + *
20166 + * Arguments:  int     Whether we are able to erase the image.
20167 + *             int     default_answer. What to do when we timeout. This
20168 + *                     will normally be continue, but the user might
20169 + *                     provide command line options (__setup) to override
20170 + *                     particular cases.
20171 + *             Char *. Pointer to a string explaining why we're moaning.
20172 + */
20173 +
20174 +#define say(message, a...) printk(KERN_EMERG message, ##a)
20175 +
20176 +void toi_early_boot_message(int message_detail, int default_answer,
20177 +       char *warning_reason, ...)
20178 +{
20179 +#if defined(CONFIG_VT) || defined(CONFIG_SERIAL_CONSOLE)
20180 +       unsigned long orig_state = get_toi_state(), continue_req = 0;
20181 +       unsigned long orig_loglevel = console_loglevel;
20182 +       int can_ask = 1;
20183 +#else
20184 +       int can_ask = 0;
20185 +#endif
20186 +
20187 +       va_list args;
20188 +       int printed_len;
20189 +
20190 +       if (!toi_wait) {
20191 +               set_toi_state(TOI_CONTINUE_REQ);
20192 +               can_ask = 0;
20193 +       }
20194 +
20195 +       if (warning_reason) {
20196 +               va_start(args, warning_reason);
20197 +               printed_len = vsnprintf(local_printf_buf,
20198 +                               sizeof(local_printf_buf),
20199 +                               warning_reason,
20200 +                               args);
20201 +               va_end(args);
20202 +       }
20203 +
20204 +       if (!test_toi_state(TOI_BOOT_TIME)) {
20205 +               printk("TuxOnIce: %s\n", local_printf_buf);
20206 +               return;
20207 +       }
20208 +
20209 +       if (!can_ask) {
20210 +               continue_req = !!default_answer;
20211 +               goto post_ask;
20212 +       }
20213 +
20214 +#if defined(CONFIG_VT) || defined(CONFIG_SERIAL_CONSOLE)
20215 +       console_loglevel = 7;
20216 +
20217 +       say("=== TuxOnIce ===\n\n");
20218 +       if (warning_reason) {
20219 +               say("BIG FAT WARNING!! %s\n\n", local_printf_buf);
20220 +               switch (message_detail) {
20221 +               case 0:
20222 +                       say("If you continue booting, note that any image WILL"
20223 +                               "NOT BE REMOVED.\nTuxOnIce is unable to do so "
20224 +                               "because the appropriate modules aren't\n"
20225 +                               "loaded. You should manually remove the image "
20226 +                               "to avoid any\npossibility of corrupting your "
20227 +                               "filesystem(s) later.\n");
20228 +                       break;
20229 +               case 1:
20230 +                       say("If you want to use the current TuxOnIce image, "
20231 +                               "reboot and try\nagain with the same kernel "
20232 +                               "that you hibernated from. If you want\n"
20233 +                               "to forget that image, continue and the image "
20234 +                               "will be erased.\n");
20235 +                       break;
20236 +               }
20237 +               say("Press SPACE to reboot or C to continue booting with "
20238 +                       "this kernel\n\n");
20239 +               if (toi_wait > 0)
20240 +                       say("Default action if you don't select one in %d "
20241 +                               "seconds is: %s.\n",
20242 +                               toi_wait,
20243 +                               default_answer == TOI_CONTINUE_REQ ?
20244 +                               "continue booting" : "reboot");
20245 +       } else {
20246 +               say("BIG FAT WARNING!!\n\n"
20247 +                       "You have tried to resume from this image before.\n"
20248 +                       "If it failed once, it may well fail again.\n"
20249 +                       "Would you like to remove the image and boot "
20250 +                       "normally?\nThis will be equivalent to entering "
20251 +                       "noresume on the\nkernel command line.\n\n"
20252 +                       "Press SPACE to remove the image or C to continue "
20253 +                       "resuming.\n\n");
20254 +               if (toi_wait > 0)
20255 +                       say("Default action if you don't select one in %d "
20256 +                               "seconds is: %s.\n", toi_wait,
20257 +                               !!default_answer ?
20258 +                               "continue resuming" : "remove the image");
20259 +       }
20260 +       console_loglevel = orig_loglevel;
20261 +
20262 +       set_toi_state(TOI_SANITY_CHECK_PROMPT);
20263 +       clear_toi_state(TOI_CONTINUE_REQ);
20264 +
20265 +       if (toi_wait_for_keypress(toi_wait) == 0) /* We timed out */
20266 +               continue_req = !!default_answer;
20267 +       else
20268 +               continue_req = test_toi_state(TOI_CONTINUE_REQ);
20269 +
20270 +#endif /* CONFIG_VT or CONFIG_SERIAL_CONSOLE */
20271 +
20272 +post_ask:
20273 +       if ((warning_reason) && (!continue_req))
20274 +               machine_restart(NULL);
20275 +
20276 +       restore_toi_state(orig_state);
20277 +       if (continue_req)
20278 +               set_toi_state(TOI_CONTINUE_REQ);
20279 +}
20280 +EXPORT_SYMBOL_GPL(toi_early_boot_message);
20281 +#undef say
20282 +
20283 +/*
20284 + * User interface specific /sys/power/tuxonice entries.
20285 + */
20286 +
20287 +static struct toi_sysfs_data sysfs_params[] = {
20288 +#if defined(CONFIG_NET) && defined(CONFIG_SYSFS)
20289 +       SYSFS_INT("default_console_level", SYSFS_RW,
20290 +                       &toi_bkd.toi_default_console_level, 0, 7, 0, NULL),
20291 +       SYSFS_UL("debug_sections", SYSFS_RW, &toi_bkd.toi_debug_state, 0,
20292 +                       1 << 30, 0),
20293 +       SYSFS_BIT("log_everything", SYSFS_RW, &toi_bkd.toi_action, TOI_LOGALL,
20294 +                       0)
20295 +#endif
20296 +};
20297 +
20298 +static struct toi_module_ops userui_ops = {
20299 +       .type                           = MISC_HIDDEN_MODULE,
20300 +       .name                           = "printk ui",
20301 +       .directory                      = "user_interface",
20302 +       .module                         = THIS_MODULE,
20303 +       .sysfs_data                     = sysfs_params,
20304 +       .num_sysfs_entries              = sizeof(sysfs_params) /
20305 +               sizeof(struct toi_sysfs_data),
20306 +};
20307 +
20308 +int toi_register_ui_ops(struct ui_ops *this_ui)
20309 +{
20310 +       if (toi_current_ui) {
20311 +               printk(KERN_INFO "Only one TuxOnIce user interface module can "
20312 +                               "be loaded at a time.");
20313 +               return -EBUSY;
20314 +       }
20315 +
20316 +       toi_current_ui = this_ui;
20317 +
20318 +       return 0;
20319 +}
20320 +EXPORT_SYMBOL_GPL(toi_register_ui_ops);
20321 +
20322 +void toi_remove_ui_ops(struct ui_ops *this_ui)
20323 +{
20324 +       if (toi_current_ui != this_ui)
20325 +               return;
20326 +
20327 +       toi_current_ui = NULL;
20328 +}
20329 +EXPORT_SYMBOL_GPL(toi_remove_ui_ops);
20330 +
20331 +/* toi_console_sysfs_init
20332 + * Description: Boot time initialisation for user interface.
20333 + */
20334 +
20335 +int toi_ui_init(void)
20336 +{
20337 +       return toi_register_module(&userui_ops);
20338 +}
20339 +
20340 +void toi_ui_exit(void)
20341 +{
20342 +       toi_unregister_module(&userui_ops);
20343 +}
20344 diff --git a/kernel/power/tuxonice_ui.h b/kernel/power/tuxonice_ui.h
20345 new file mode 100644
20346 index 0000000..85fb7cb
20347 --- /dev/null
20348 +++ b/kernel/power/tuxonice_ui.h
20349 @@ -0,0 +1,97 @@
20350 +/*
20351 + * kernel/power/tuxonice_ui.h
20352 + *
20353 + * Copyright (C) 2004-2010 Nigel Cunningham (nigel at tuxonice net)
20354 + */
20355 +
20356 +enum {
20357 +       DONT_CLEAR_BAR,
20358 +       CLEAR_BAR
20359 +};
20360 +
20361 +enum {
20362 +       /* Userspace -> Kernel */
20363 +       USERUI_MSG_ABORT = 0x11,
20364 +       USERUI_MSG_SET_STATE = 0x12,
20365 +       USERUI_MSG_GET_STATE = 0x13,
20366 +       USERUI_MSG_GET_DEBUG_STATE = 0x14,
20367 +       USERUI_MSG_SET_DEBUG_STATE = 0x15,
20368 +       USERUI_MSG_SPACE = 0x18,
20369 +       USERUI_MSG_GET_POWERDOWN_METHOD = 0x1A,
20370 +       USERUI_MSG_SET_POWERDOWN_METHOD = 0x1B,
20371 +       USERUI_MSG_GET_LOGLEVEL = 0x1C,
20372 +       USERUI_MSG_SET_LOGLEVEL = 0x1D,
20373 +       USERUI_MSG_PRINTK = 0x1E,
20374 +
20375 +       /* Kernel -> Userspace */
20376 +       USERUI_MSG_MESSAGE = 0x21,
20377 +       USERUI_MSG_PROGRESS = 0x22,
20378 +       USERUI_MSG_POST_ATOMIC_RESTORE = 0x25,
20379 +
20380 +       USERUI_MSG_MAX,
20381 +};
20382 +
20383 +struct userui_msg_params {
20384 +       u32 a, b, c, d;
20385 +       char text[255];
20386 +};
20387 +
20388 +struct ui_ops {
20389 +       char (*wait_for_key) (int timeout);
20390 +       u32 (*update_status) (u32 value, u32 maximum, const char *fmt, ...);
20391 +       void (*prepare_status) (int clearbar, const char *fmt, ...);
20392 +       void (*cond_pause) (int pause, char *message);
20393 +       void (*abort)(int result_code, const char *fmt, ...);
20394 +       void (*prepare)(void);
20395 +       void (*cleanup)(void);
20396 +       void (*message)(u32 section, u32 level, u32 normally_logged,
20397 +                       const char *fmt, ...);
20398 +};
20399 +
20400 +extern struct ui_ops *toi_current_ui;
20401 +
20402 +#define toi_update_status(val, max, fmt, args...) \
20403 + (toi_current_ui ? (toi_current_ui->update_status) (val, max, fmt, ##args) : \
20404 +       max)
20405 +
20406 +#define toi_prepare_console(void) \
20407 +       do { if (toi_current_ui) \
20408 +               (toi_current_ui->prepare)(); \
20409 +       } while (0)
20410 +
20411 +#define toi_cleanup_console(void) \
20412 +       do { if (toi_current_ui) \
20413 +               (toi_current_ui->cleanup)(); \
20414 +       } while (0)
20415 +
20416 +#define abort_hibernate(result, fmt, args...) \
20417 +       do { if (toi_current_ui) \
20418 +               (toi_current_ui->abort)(result, fmt, ##args); \
20419 +            else { \
20420 +               set_abort_result(result); \
20421 +            } \
20422 +       } while (0)
20423 +
20424 +#define toi_cond_pause(pause, message) \
20425 +       do { if (toi_current_ui) \
20426 +               (toi_current_ui->cond_pause)(pause, message); \
20427 +       } while (0)
20428 +
20429 +#define toi_prepare_status(clear, fmt, args...) \
20430 +       do { if (toi_current_ui) \
20431 +               (toi_current_ui->prepare_status)(clear, fmt, ##args); \
20432 +            else \
20433 +               printk(KERN_ERR fmt "%s", ##args, "\n"); \
20434 +       } while (0)
20435 +
20436 +#define toi_message(sn, lev, log, fmt, a...) \
20437 +do { \
20438 +       if (toi_current_ui && (!sn || test_debug_state(sn))) \
20439 +               toi_current_ui->message(sn, lev, log, fmt, ##a); \
20440 +} while (0)
20441 +
20442 +__exit void toi_ui_cleanup(void);
20443 +extern int toi_ui_init(void);
20444 +extern void toi_ui_exit(void);
20445 +extern int toi_register_ui_ops(struct ui_ops *this_ui);
20446 +extern void toi_remove_ui_ops(struct ui_ops *this_ui);
20447 diff --git a/kernel/power/tuxonice_userui.c b/kernel/power/tuxonice_userui.c
20448 new file mode 100644
20449 index 0000000..625d863
20450 --- /dev/null
20451 +++ b/kernel/power/tuxonice_userui.c
20452 @@ -0,0 +1,668 @@
20453 +/*
20454 + * kernel/power/user_ui.c
20455 + *
20456 + * Copyright (C) 2005-2007 Bernard Blackham
20457 + * Copyright (C) 2002-2010 Nigel Cunningham (nigel at tuxonice net)
20458 + *
20459 + * This file is released under the GPLv2.
20460 + *
20461 + * Routines for TuxOnIce's user interface.
20462 + *
20463 + * The user interface code talks to a userspace program via a
20464 + * netlink socket.
20465 + *
20466 + * The kernel side:
20467 + * - starts the userui program;
20468 + * - sends text messages and progress bar status;
20469 + *
20470 + * The user space side:
20471 + * - passes messages regarding user requests (abort, toggle reboot etc)
20472 + *
20473 + */
20474 +
20475 +#define __KERNEL_SYSCALLS__
20476 +
20477 +#include <linux/suspend.h>
20478 +#include <linux/freezer.h>
20479 +#include <linux/console.h>
20480 +#include <linux/ctype.h>
20481 +#include <linux/tty.h>
20482 +#include <linux/vt_kern.h>
20483 +#include <linux/reboot.h>
20484 +#include <linux/kmod.h>
20485 +#include <linux/security.h>
20486 +#include <linux/syscalls.h>
20487 +#include <linux/vt.h>
20488 +
20489 +#include "tuxonice_sysfs.h"
20490 +#include "tuxonice_modules.h"
20491 +#include "tuxonice.h"
20492 +#include "tuxonice_ui.h"
20493 +#include "tuxonice_netlink.h"
20494 +#include "tuxonice_power_off.h"
20495 +
20496 +static char local_printf_buf[1024];    /* Same as printk - should be safe */
20497 +
20498 +static struct user_helper_data ui_helper_data;
20499 +static struct toi_module_ops userui_ops;
20500 +static int orig_kmsg;
20501 +
20502 +static char lastheader[512];
20503 +static int lastheader_message_len;
20504 +static int ui_helper_changed; /* Used at resume-time so don't overwrite value
20505 +                               set from initrd/ramfs. */
20506 +
20507 +/* Number of distinct progress amounts that userspace can display */
20508 +static int progress_granularity = 30;
20509 +
20510 +static DECLARE_WAIT_QUEUE_HEAD(userui_wait_for_key);
20511 +
20512 +/**
20513 + * ui_nl_set_state - Update toi_action based on a message from userui.
20514 + *
20515 + * @n: The bit (1 << bit) to set.
20516 + */
20517 +static void ui_nl_set_state(int n)
20518 +{
20519 +       /* Only let them change certain settings */
20520 +       static const u32 toi_action_mask =
20521 +               (1 << TOI_REBOOT) | (1 << TOI_PAUSE) |
20522 +               (1 << TOI_LOGALL) |
20523 +               (1 << TOI_SINGLESTEP) |
20524 +               (1 << TOI_PAUSE_NEAR_PAGESET_END);
20525 +       static unsigned long new_action;
20526 +
20527 +       new_action = (toi_bkd.toi_action & (~toi_action_mask)) |
20528 +               (n & toi_action_mask);
20529 +
20530 +       printk(KERN_DEBUG "n is %x. Action flags being changed from %lx "
20531 +                       "to %lx.", n, toi_bkd.toi_action, new_action);
20532 +       toi_bkd.toi_action = new_action;
20533 +
20534 +       if (!test_action_state(TOI_PAUSE) &&
20535 +                       !test_action_state(TOI_SINGLESTEP))
20536 +               wake_up_interruptible(&userui_wait_for_key);
20537 +}
20538 +
20539 +/**
20540 + * userui_post_atomic_restore - Tell userui that atomic restore just happened.
20541 + *
20542 + * Tell userui that atomic restore just occured, so that it can do things like
20543 + * redrawing the screen, re-getting settings and so on.
20544 + */
20545 +static void userui_post_atomic_restore(struct toi_boot_kernel_data *bkd)
20546 +{
20547 +       toi_send_netlink_message(&ui_helper_data,
20548 +                       USERUI_MSG_POST_ATOMIC_RESTORE, NULL, 0);
20549 +}
20550 +
20551 +/**
20552 + * userui_storage_needed - Report how much memory in image header is needed.
20553 + */
20554 +static int userui_storage_needed(void)
20555 +{
20556 +       return sizeof(ui_helper_data.program) + 1 + sizeof(int);
20557 +}
20558 +
20559 +/**
20560 + * userui_save_config_info - Fill buffer with config info for image header.
20561 + *
20562 + * @buf: Buffer into which to put the config info we want to save.
20563 + */
20564 +static int userui_save_config_info(char *buf)
20565 +{
20566 +       *((int *) buf) = progress_granularity;
20567 +       memcpy(buf + sizeof(int), ui_helper_data.program,
20568 +                       sizeof(ui_helper_data.program));
20569 +       return sizeof(ui_helper_data.program) + sizeof(int) + 1;
20570 +}
20571 +
20572 +/**
20573 + * userui_load_config_info - Restore config info from buffer.
20574 + *
20575 + * @buf: Buffer containing header info loaded.
20576 + * @size: Size of data loaded for this module.
20577 + */
20578 +static void userui_load_config_info(char *buf, int size)
20579 +{
20580 +       progress_granularity = *((int *) buf);
20581 +       size -= sizeof(int);
20582 +
20583 +       /* Don't load the saved path if one has already been set */
20584 +       if (ui_helper_changed)
20585 +               return;
20586 +
20587 +       if (size > sizeof(ui_helper_data.program))
20588 +               size = sizeof(ui_helper_data.program);
20589 +
20590 +       memcpy(ui_helper_data.program, buf + sizeof(int), size);
20591 +       ui_helper_data.program[sizeof(ui_helper_data.program)-1] = '\0';
20592 +}
20593 +
20594 +/**
20595 + * set_ui_program_set: Record that userui program was changed.
20596 + *
20597 + * Side effect routine for when the userui program is set. In an initrd or
20598 + * ramfs, the user may set a location for the userui program. If this happens,
20599 + * we don't want to reload the value that was saved in the image header. This
20600 + * routine allows us to flag that we shouldn't restore the program name from
20601 + * the image header.
20602 + */
20603 +static void set_ui_program_set(void)
20604 +{
20605 +       ui_helper_changed = 1;
20606 +}
20607 +
20608 +/**
20609 + * userui_memory_needed - Tell core how much memory to reserve for us.
20610 + */
20611 +static int userui_memory_needed(void)
20612 +{
20613 +       /* ball park figure of 128 pages */
20614 +       return 128 * PAGE_SIZE;
20615 +}
20616 +
20617 +/**
20618 + * userui_update_status - Update the progress bar and (if on) in-bar message.
20619 + *
20620 + * @value: Current progress percentage numerator.
20621 + * @maximum: Current progress percentage denominator.
20622 + * @fmt: Message to be displayed in the middle of the progress bar.
20623 + *
20624 + * Note that a NULL message does not mean that any previous message is erased!
20625 + * For that, you need toi_prepare_status with clearbar on.
20626 + *
20627 + * Returns an unsigned long, being the next numerator (as determined by the
20628 + * maximum and progress granularity) where status needs to be updated.
20629 + * This is to reduce unnecessary calls to update_status.
20630 + */
20631 +static u32 userui_update_status(u32 value, u32 maximum, const char *fmt, ...)
20632 +{
20633 +       static u32 last_step = 9999;
20634 +       struct userui_msg_params msg;
20635 +       u32 this_step, next_update;
20636 +       int bitshift;
20637 +
20638 +       if (ui_helper_data.pid == -1)
20639 +               return 0;
20640 +
20641 +       if ((!maximum) || (!progress_granularity))
20642 +               return maximum;
20643 +
20644 +       if (value < 0)
20645 +               value = 0;
20646 +
20647 +       if (value > maximum)
20648 +               value = maximum;
20649 +
20650 +       /* Try to avoid math problems - we can't do 64 bit math here
20651 +        * (and shouldn't need it - anyone got screen resolution
20652 +        * of 65536 pixels or more?) */
20653 +       bitshift = fls(maximum) - 16;
20654 +       if (bitshift > 0) {
20655 +               u32 temp_maximum = maximum >> bitshift;
20656 +               u32 temp_value = value >> bitshift;
20657 +               this_step = (u32)
20658 +                       (temp_value * progress_granularity / temp_maximum);
20659 +               next_update = (((this_step + 1) * temp_maximum /
20660 +                                       progress_granularity) + 1) << bitshift;
20661 +       } else {
20662 +               this_step = (u32) (value * progress_granularity / maximum);
20663 +               next_update = ((this_step + 1) * maximum /
20664 +                               progress_granularity) + 1;
20665 +       }
20666 +
20667 +       if (this_step == last_step)
20668 +               return next_update;
20669 +
20670 +       memset(&msg, 0, sizeof(msg));
20671 +
20672 +       msg.a = this_step;
20673 +       msg.b = progress_granularity;
20674 +
20675 +       if (fmt) {
20676 +               va_list args;
20677 +               va_start(args, fmt);
20678 +               vsnprintf(msg.text, sizeof(msg.text), fmt, args);
20679 +               va_end(args);
20680 +               msg.text[sizeof(msg.text)-1] = '\0';
20681 +       }
20682 +
20683 +       toi_send_netlink_message(&ui_helper_data, USERUI_MSG_PROGRESS,
20684 +                       &msg, sizeof(msg));
20685 +       last_step = this_step;
20686 +
20687 +       return next_update;
20688 +}
20689 +
20690 +/**
20691 + * userui_message - Display a message without necessarily logging it.
20692 + *
20693 + * @section: Type of message. Messages can be filtered by type.
20694 + * @level: Degree of importance of the message. Lower values = higher priority.
20695 + * @normally_logged: Whether logged even if log_everything is off.
20696 + * @fmt: Message (and parameters).
20697 + *
20698 + * This function is intended to do the same job as printk, but without normally
20699 + * logging what is printed. The point is to be able to get debugging info on
20700 + * screen without filling the logs with "1/534. ^M 2/534^M. 3/534^M"
20701 + *
20702 + * It may be called from an interrupt context - can't sleep!
20703 + */
20704 +static void userui_message(u32 section, u32 level, u32 normally_logged,
20705 +               const char *fmt, ...)
20706 +{
20707 +       struct userui_msg_params msg;
20708 +
20709 +       if ((level) && (level > console_loglevel))
20710 +               return;
20711 +
20712 +       memset(&msg, 0, sizeof(msg));
20713 +
20714 +       msg.a = section;
20715 +       msg.b = level;
20716 +       msg.c = normally_logged;
20717 +
20718 +       if (fmt) {
20719 +               va_list args;
20720 +               va_start(args, fmt);
20721 +               vsnprintf(msg.text, sizeof(msg.text), fmt, args);
20722 +               va_end(args);
20723 +               msg.text[sizeof(msg.text)-1] = '\0';
20724 +       }
20725 +
20726 +       if (test_action_state(TOI_LOGALL))
20727 +               printk(KERN_INFO "%s\n", msg.text);
20728 +
20729 +       toi_send_netlink_message(&ui_helper_data, USERUI_MSG_MESSAGE,
20730 +                       &msg, sizeof(msg));
20731 +}
20732 +
20733 +/**
20734 + * wait_for_key_via_userui - Wait for userui to receive a keypress.
20735 + */
20736 +static void wait_for_key_via_userui(void)
20737 +{
20738 +       DECLARE_WAITQUEUE(wait, current);
20739 +
20740 +       add_wait_queue(&userui_wait_for_key, &wait);
20741 +       set_current_state(TASK_INTERRUPTIBLE);
20742 +
20743 +       interruptible_sleep_on(&userui_wait_for_key);
20744 +
20745 +       set_current_state(TASK_RUNNING);
20746 +       remove_wait_queue(&userui_wait_for_key, &wait);
20747 +}
20748 +
20749 +/**
20750 + * userui_prepare_status - Display high level messages.
20751 + *
20752 + * @clearbar: Whether to clear the progress bar.
20753 + * @fmt...: New message for the title.
20754 + *
20755 + * Prepare the 'nice display', drawing the header and version, along with the
20756 + * current action and perhaps also resetting the progress bar.
20757 + */
20758 +static void userui_prepare_status(int clearbar, const char *fmt, ...)
20759 +{
20760 +       va_list args;
20761 +
20762 +       if (fmt) {
20763 +               va_start(args, fmt);
20764 +               lastheader_message_len = vsnprintf(lastheader, 512, fmt, args);
20765 +               va_end(args);
20766 +       }
20767 +
20768 +       if (clearbar)
20769 +               toi_update_status(0, 1, NULL);
20770 +
20771 +       if (ui_helper_data.pid == -1)
20772 +               printk(KERN_EMERG "%s\n", lastheader);
20773 +       else
20774 +               toi_message(0, TOI_STATUS, 1, lastheader, NULL);
20775 +}
20776 +
20777 +/**
20778 + * toi_wait_for_keypress - Wait for keypress via userui.
20779 + *
20780 + * @timeout: Maximum time to wait.
20781 + *
20782 + * Wait for a keypress from userui.
20783 + *
20784 + * FIXME: Implement timeout?
20785 + */
20786 +static char userui_wait_for_keypress(int timeout)
20787 +{
20788 +       char key = '\0';
20789 +
20790 +       if (ui_helper_data.pid != -1) {
20791 +               wait_for_key_via_userui();
20792 +               key = ' ';
20793 +       }
20794 +
20795 +       return key;
20796 +}
20797 +
20798 +/**
20799 + * userui_abort_hibernate - Abort a cycle & tell user if they didn't request it.
20800 + *
20801 + * @result_code: Reason why we're aborting (1 << bit).
20802 + * @fmt: Message to display if telling the user what's going on.
20803 + *
20804 + * Abort a cycle. If this wasn't at the user's request (and we're displaying
20805 + * output), tell the user why and wait for them to acknowledge the message.
20806 + */
20807 +static void userui_abort_hibernate(int result_code, const char *fmt, ...)
20808 +{
20809 +       va_list args;
20810 +       int printed_len = 0;
20811 +
20812 +       set_result_state(result_code);
20813 +
20814 +       if (test_result_state(TOI_ABORTED))
20815 +               return;
20816 +
20817 +       set_result_state(TOI_ABORTED);
20818 +
20819 +       if (test_result_state(TOI_ABORT_REQUESTED))
20820 +               return;
20821 +
20822 +       va_start(args, fmt);
20823 +       printed_len = vsnprintf(local_printf_buf,  sizeof(local_printf_buf),
20824 +                       fmt, args);
20825 +       va_end(args);
20826 +       if (ui_helper_data.pid != -1)
20827 +               printed_len = sprintf(local_printf_buf + printed_len,
20828 +                                       " (Press SPACE to continue)");
20829 +
20830 +       toi_prepare_status(CLEAR_BAR, "%s", local_printf_buf);
20831 +
20832 +       if (ui_helper_data.pid != -1)
20833 +               userui_wait_for_keypress(0);
20834 +}
20835 +
20836 +/**
20837 + * request_abort_hibernate - Abort hibernating or resuming at user request.
20838 + *
20839 + * Handle the user requesting the cancellation of a hibernation or resume by
20840 + * pressing escape.
20841 + */
20842 +static void request_abort_hibernate(void)
20843 +{
20844 +       if (test_result_state(TOI_ABORT_REQUESTED) ||
20845 +          !test_action_state(TOI_CAN_CANCEL))
20846 +               return;
20847 +
20848 +       if (test_toi_state(TOI_NOW_RESUMING)) {
20849 +               toi_prepare_status(CLEAR_BAR, "Escape pressed. "
20850 +                                       "Powering down again.");
20851 +               set_toi_state(TOI_STOP_RESUME);
20852 +               while (!test_toi_state(TOI_IO_STOPPED))
20853 +                       schedule();
20854 +               if (toiActiveAllocator->mark_resume_attempted)
20855 +                       toiActiveAllocator->mark_resume_attempted(0);
20856 +               toi_power_down();
20857 +       }
20858 +
20859 +       toi_prepare_status(CLEAR_BAR, "--- ESCAPE PRESSED :"
20860 +                                       " ABORTING HIBERNATION ---");
20861 +       set_abort_result(TOI_ABORT_REQUESTED);
20862 +       wake_up_interruptible(&userui_wait_for_key);
20863 +}
20864 +
20865 +/**
20866 + * userui_user_rcv_msg - Receive a netlink message from userui.
20867 + *
20868 + * @skb: skb received.
20869 + * @nlh: Netlink header received.
20870 + */
20871 +static int userui_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
20872 +{
20873 +       int type;
20874 +       int *data;
20875 +
20876 +       type = nlh->nlmsg_type;
20877 +
20878 +       /* A control message: ignore them */
20879 +       if (type < NETLINK_MSG_BASE)
20880 +               return 0;
20881 +
20882 +       /* Unknown message: reply with EINVAL */
20883 +       if (type >= USERUI_MSG_MAX)
20884 +               return -EINVAL;
20885 +
20886 +       /* All operations require privileges, even GET */
20887 +       if (security_netlink_recv(skb, CAP_NET_ADMIN))
20888 +               return -EPERM;
20889 +
20890 +       /* Only allow one task to receive NOFREEZE privileges */
20891 +       if (type == NETLINK_MSG_NOFREEZE_ME && ui_helper_data.pid != -1) {
20892 +               printk(KERN_INFO "Got NOFREEZE_ME request when "
20893 +                       "ui_helper_data.pid is %d.\n", ui_helper_data.pid);
20894 +               return -EBUSY;
20895 +       }
20896 +
20897 +       data = (int *) NLMSG_DATA(nlh);
20898 +
20899 +       switch (type) {
20900 +       case USERUI_MSG_ABORT:
20901 +               request_abort_hibernate();
20902 +               return 0;
20903 +       case USERUI_MSG_GET_STATE:
20904 +               toi_send_netlink_message(&ui_helper_data,
20905 +                               USERUI_MSG_GET_STATE, &toi_bkd.toi_action,
20906 +                               sizeof(toi_bkd.toi_action));
20907 +               return 0;
20908 +       case USERUI_MSG_GET_DEBUG_STATE:
20909 +               toi_send_netlink_message(&ui_helper_data,
20910 +                               USERUI_MSG_GET_DEBUG_STATE,
20911 +                               &toi_bkd.toi_debug_state,
20912 +                               sizeof(toi_bkd.toi_debug_state));
20913 +               return 0;
20914 +       case USERUI_MSG_SET_STATE:
20915 +               if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(int)))
20916 +                       return -EINVAL;
20917 +               ui_nl_set_state(*data);
20918 +               return 0;
20919 +       case USERUI_MSG_SET_DEBUG_STATE:
20920 +               if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(int)))
20921 +                       return -EINVAL;
20922 +               toi_bkd.toi_debug_state = (*data);
20923 +               return 0;
20924 +       case USERUI_MSG_SPACE:
20925 +               wake_up_interruptible(&userui_wait_for_key);
20926 +               return 0;
20927 +       case USERUI_MSG_GET_POWERDOWN_METHOD:
20928 +               toi_send_netlink_message(&ui_helper_data,
20929 +                               USERUI_MSG_GET_POWERDOWN_METHOD,
20930 +                               &toi_poweroff_method,
20931 +                               sizeof(toi_poweroff_method));
20932 +               return 0;
20933 +       case USERUI_MSG_SET_POWERDOWN_METHOD:
20934 +               if (nlh->nlmsg_len != NLMSG_LENGTH(sizeof(char)))
20935 +                       return -EINVAL;
20936 +               toi_poweroff_method = (unsigned long)(*data);
20937 +               return 0;
20938 +       case USERUI_MSG_GET_LOGLEVEL:
20939 +               toi_send_netlink_message(&ui_helper_data,
20940 +                               USERUI_MSG_GET_LOGLEVEL,
20941 +                               &toi_bkd.toi_default_console_level,
20942 +                               sizeof(toi_bkd.toi_default_console_level));
20943 +               return 0;
20944 +       case USERUI_MSG_SET_LOGLEVEL:
20945 +               if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(int)))
20946 +                       return -EINVAL;
20947 +               toi_bkd.toi_default_console_level = (*data);
20948 +               return 0;
20949 +       case USERUI_MSG_PRINTK:
20950 +               printk(KERN_INFO "%s", (char *) data);
20951 +               return 0;
20952 +       }
20953 +
20954 +       /* Unhandled here */
20955 +       return 1;
20956 +}
20957 +
20958 +/**
20959 + * userui_cond_pause - Possibly pause at user request.
20960 + *
20961 + * @pause: Whether to pause or just display the message.
20962 + * @message: Message to display at the start of pausing.
20963 + *
20964 + * Potentially pause and wait for the user to tell us to continue. We normally
20965 + * only pause when @pause is set. While paused, the user can do things like
20966 + * changing the loglevel, toggling the display of debugging sections and such
20967 + * like.
20968 + */
20969 +static void userui_cond_pause(int pause, char *message)
20970 +{
20971 +       int displayed_message = 0, last_key = 0;
20972 +
20973 +       while (last_key != 32 &&
20974 +               ui_helper_data.pid != -1 &&
20975 +               ((test_action_state(TOI_PAUSE) && pause) ||
20976 +                (test_action_state(TOI_SINGLESTEP)))) {
20977 +               if (!displayed_message) {
20978 +                       toi_prepare_status(DONT_CLEAR_BAR,
20979 +                          "%s Press SPACE to continue.%s",
20980 +                          message ? message : "",
20981 +                          (test_action_state(TOI_SINGLESTEP)) ?
20982 +                          " Single step on." : "");
20983 +                       displayed_message = 1;
20984 +               }
20985 +               last_key = userui_wait_for_keypress(0);
20986 +       }
20987 +       schedule();
20988 +}
20989 +
20990 +/**
20991 + * userui_prepare_console - Prepare the console for use.
20992 + *
20993 + * Prepare a console for use, saving current kmsg settings and attempting to
20994 + * start userui. Console loglevel changes are handled by userui.
20995 + */
20996 +static void userui_prepare_console(void)
20997 +{
20998 +       orig_kmsg = vt_kmsg_redirect(fg_console + 1);
20999 +
21000 +       ui_helper_data.pid = -1;
21001 +
21002 +       if (!userui_ops.enabled) {
21003 +               printk(KERN_INFO "TuxOnIce: Userui disabled.\n");
21004 +               return;
21005 +       }
21006 +
21007 +       if (*ui_helper_data.program)
21008 +               toi_netlink_setup(&ui_helper_data);
21009 +       else
21010 +               printk(KERN_INFO "TuxOnIce: Userui program not configured.\n");
21011 +}
21012 +
21013 +/**
21014 + * userui_cleanup_console - Cleanup after a cycle.
21015 + *
21016 + * Tell userui to cleanup, and restore kmsg_redirect to its original value.
21017 + */
21018 +
21019 +static void userui_cleanup_console(void)
21020 +{
21021 +       if (ui_helper_data.pid > -1)
21022 +               toi_netlink_close(&ui_helper_data);
21023 +
21024 +       vt_kmsg_redirect(orig_kmsg);
21025 +}
21026 +
21027 +/*
21028 + * User interface specific /sys/power/tuxonice entries.
21029 + */
21030 +
21031 +static struct toi_sysfs_data sysfs_params[] = {
21032 +#if defined(CONFIG_NET) && defined(CONFIG_SYSFS)
21033 +       SYSFS_BIT("enable_escape", SYSFS_RW, &toi_bkd.toi_action,
21034 +                       TOI_CAN_CANCEL, 0),
21035 +       SYSFS_BIT("pause_between_steps", SYSFS_RW, &toi_bkd.toi_action,
21036 +                       TOI_PAUSE, 0),
21037 +       SYSFS_INT("enabled", SYSFS_RW, &userui_ops.enabled, 0, 1, 0, NULL),
21038 +       SYSFS_INT("progress_granularity", SYSFS_RW, &progress_granularity, 1,
21039 +                       2048, 0, NULL),
21040 +       SYSFS_STRING("program", SYSFS_RW, ui_helper_data.program, 255, 0,
21041 +                       set_ui_program_set),
21042 +       SYSFS_INT("debug", SYSFS_RW, &ui_helper_data.debug, 0, 1, 0, NULL)
21043 +#endif
21044 +};
21045 +
21046 +static struct toi_module_ops userui_ops = {
21047 +       .type                           = MISC_MODULE,
21048 +       .name                           = "userui",
21049 +       .shared_directory               = "user_interface",
21050 +       .module                         = THIS_MODULE,
21051 +       .storage_needed                 = userui_storage_needed,
21052 +       .save_config_info               = userui_save_config_info,
21053 +       .load_config_info               = userui_load_config_info,
21054 +       .memory_needed                  = userui_memory_needed,
21055 +       .post_atomic_restore            = userui_post_atomic_restore,
21056 +       .sysfs_data                     = sysfs_params,
21057 +       .num_sysfs_entries              = sizeof(sysfs_params) /
21058 +               sizeof(struct toi_sysfs_data),
21059 +};
21060 +
21061 +static struct ui_ops my_ui_ops = {
21062 +       .update_status                  = userui_update_status,
21063 +       .message                        = userui_message,
21064 +       .prepare_status                 = userui_prepare_status,
21065 +       .abort                          = userui_abort_hibernate,
21066 +       .cond_pause                     = userui_cond_pause,
21067 +       .prepare                        = userui_prepare_console,
21068 +       .cleanup                        = userui_cleanup_console,
21069 +       .wait_for_key                   = userui_wait_for_keypress,
21070 +};
21071 +
21072 +/**
21073 + * toi_user_ui_init - Boot time initialisation for user interface.
21074 + *
21075 + * Invoked from the core init routine.
21076 + */
21077 +static __init int toi_user_ui_init(void)
21078 +{
21079 +       int result;
21080 +
21081 +       ui_helper_data.nl = NULL;
21082 +       strncpy(ui_helper_data.program, CONFIG_TOI_USERUI_DEFAULT_PATH, 255);
21083 +       ui_helper_data.pid = -1;
21084 +       ui_helper_data.skb_size = sizeof(struct userui_msg_params);
21085 +       ui_helper_data.pool_limit = 6;
21086 +       ui_helper_data.netlink_id = NETLINK_TOI_USERUI;
21087 +       ui_helper_data.name = "userspace ui";
21088 +       ui_helper_data.rcv_msg = userui_user_rcv_msg;
21089 +       ui_helper_data.interface_version = 8;
21090 +       ui_helper_data.must_init = 0;
21091 +       ui_helper_data.not_ready = userui_cleanup_console;
21092 +       init_completion(&ui_helper_data.wait_for_process);
21093 +       result = toi_register_module(&userui_ops);
21094 +       if (!result)
21095 +               result = toi_register_ui_ops(&my_ui_ops);
21096 +       if (result)
21097 +               toi_unregister_module(&userui_ops);
21098 +
21099 +       return result;
21100 +}
21101 +
21102 +#ifdef MODULE
21103 +/**
21104 + * toi_user_ui_ext - Cleanup code for if the core is unloaded.
21105 + */
21106 +static __exit void toi_user_ui_exit(void)
21107 +{
21108 +       toi_netlink_close_complete(&ui_helper_data);
21109 +       toi_remove_ui_ops(&my_ui_ops);
21110 +       toi_unregister_module(&userui_ops);
21111 +}
21112 +
21113 +module_init(toi_user_ui_init);
21114 +module_exit(toi_user_ui_exit);
21115 +MODULE_AUTHOR("Nigel Cunningham");
21116 +MODULE_DESCRIPTION("TuxOnIce Userui Support");
21117 +MODULE_LICENSE("GPL");
21118 +#else
21119 +late_initcall(toi_user_ui_init);
21120 +#endif
21121 diff --git a/kernel/power/user.c b/kernel/power/user.c
21122 index e819e17..193abc1 100644
21123 --- a/kernel/power/user.c
21124 +++ b/kernel/power/user.c
21125 @@ -64,6 +64,7 @@ static struct snapshot_data {
21126  } snapshot_state;
21127  
21128  atomic_t snapshot_device_available = ATOMIC_INIT(1);
21129 +EXPORT_SYMBOL_GPL(snapshot_device_available);
21130  
21131  static int snapshot_open(struct inode *inode, struct file *filp)
21132  {
21133 diff --git a/kernel/printk.c b/kernel/printk.c
21134 index 444b770..49ddbab 100644
21135 --- a/kernel/printk.c
21136 +++ b/kernel/printk.c
21137 @@ -32,6 +32,7 @@
21138  #include <linux/security.h>
21139  #include <linux/bootmem.h>
21140  #include <linux/syscalls.h>
21141 +#include <linux/suspend.h>
21142  #include <linux/kexec.h>
21143  #include <linux/kdb.h>
21144  #include <linux/ratelimit.h>
21145 @@ -70,6 +71,7 @@ int console_printk[4] = {
21146         MINIMUM_CONSOLE_LOGLEVEL,       /* minimum_console_loglevel */
21147         DEFAULT_CONSOLE_LOGLEVEL,       /* default_console_loglevel */
21148  };
21149 +EXPORT_SYMBOL_GPL(console_printk);
21150  
21151  /*
21152   * Low level drivers may need that to know if they can schedule in
21153 @@ -974,6 +976,7 @@ void suspend_console(void)
21154         console_suspended = 1;
21155         up(&console_sem);
21156  }
21157 +EXPORT_SYMBOL_GPL(suspend_console);
21158  
21159  void resume_console(void)
21160  {
21161 @@ -983,6 +986,7 @@ void resume_console(void)
21162         console_suspended = 0;
21163         release_console_sem();
21164  }
21165 +EXPORT_SYMBOL_GPL(resume_console);
21166  
21167  /**
21168   * acquire_console_sem - lock the console system for exclusive use.
21169 diff --git a/mm/bootmem.c b/mm/bootmem.c
21170 index 58c66cc..f79d461 100644
21171 --- a/mm/bootmem.c
21172 +++ b/mm/bootmem.c
21173 @@ -25,6 +25,7 @@
21174  unsigned long max_low_pfn;
21175  unsigned long min_low_pfn;
21176  unsigned long max_pfn;
21177 +EXPORT_SYMBOL_GPL(max_pfn);
21178  
21179  #ifdef CONFIG_CRASH_DUMP
21180  /*
21181 diff --git a/mm/highmem.c b/mm/highmem.c
21182 index 66baa20..2dd71c1 100644
21183 --- a/mm/highmem.c
21184 +++ b/mm/highmem.c
21185 @@ -57,6 +57,7 @@ unsigned int nr_free_highpages (void)
21186  
21187         return pages;
21188  }
21189 +EXPORT_SYMBOL_GPL(nr_free_highpages);
21190  
21191  static int pkmap_count[LAST_PKMAP];
21192  static unsigned int last_pkmap_nr;
21193 diff --git a/mm/memory.c b/mm/memory.c
21194 index 119b7cc..ffd5f08 100644
21195 --- a/mm/memory.c
21196 +++ b/mm/memory.c
21197 @@ -1340,6 +1340,7 @@ no_page_table:
21198                 return ERR_PTR(-EFAULT);
21199         return page;
21200  }
21201 +EXPORT_SYMBOL_GPL(follow_page);
21202  
21203  int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
21204                      unsigned long start, int nr_pages, unsigned int gup_flags,
21205 diff --git a/mm/mmzone.c b/mm/mmzone.c
21206 index f5b7d17..72a6770 100644
21207 --- a/mm/mmzone.c
21208 +++ b/mm/mmzone.c
21209 @@ -14,6 +14,7 @@ struct pglist_data *first_online_pgdat(void)
21210  {
21211         return NODE_DATA(first_online_node);
21212  }
21213 +EXPORT_SYMBOL_GPL(first_online_pgdat);
21214  
21215  struct pglist_data *next_online_pgdat(struct pglist_data *pgdat)
21216  {
21217 @@ -23,6 +24,7 @@ struct pglist_data *next_online_pgdat(struct pglist_data *pgdat)
21218                 return NULL;
21219         return NODE_DATA(nid);
21220  }
21221 +EXPORT_SYMBOL_GPL(next_online_pgdat);
21222  
21223  /*
21224   * next_zone - helper magic for for_each_zone()
21225 @@ -42,6 +44,7 @@ struct zone *next_zone(struct zone *zone)
21226         }
21227         return zone;
21228  }
21229 +EXPORT_SYMBOL_GPL(next_zone);
21230  
21231  static inline int zref_in_nodemask(struct zoneref *zref, nodemask_t *nodes)
21232  {
21233 diff --git a/mm/page-writeback.c b/mm/page-writeback.c
21234 index bbd396a..39e7638 100644
21235 --- a/mm/page-writeback.c
21236 +++ b/mm/page-writeback.c
21237 @@ -99,6 +99,7 @@ unsigned int dirty_expire_interval = 30 * 100; /* centiseconds */
21238   * Flag that makes the machine dump writes/reads and block dirtyings.
21239   */
21240  int block_dump;
21241 +EXPORT_SYMBOL_GPL(block_dump);
21242  
21243  /*
21244   * Flag that puts the machine in "laptop mode". Doubles as a timeout in jiffies:
21245 diff --git a/mm/page_alloc.c b/mm/page_alloc.c
21246 index 431214b..1226024 100644
21247 --- a/mm/page_alloc.c
21248 +++ b/mm/page_alloc.c
21249 @@ -2273,6 +2273,26 @@ static unsigned int nr_free_zone_pages(int offset)
21250         return sum;
21251  }
21252  
21253 +static unsigned int nr_unallocated_zone_pages(int offset)
21254 +{
21255 +       struct zoneref *z;
21256 +       struct zone *zone;
21257 +
21258 +       /* Just pick one node, since fallback list is circular */
21259 +       unsigned int sum = 0;
21260 +
21261 +       struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL);
21262 +
21263 +       for_each_zone_zonelist(zone, z, zonelist, offset) {
21264 +               unsigned long high = high_wmark_pages(zone);
21265 +               unsigned long left = zone_page_state(zone, NR_FREE_PAGES);
21266 +               if (left > high)
21267 +                       sum += left - high;
21268 +       }
21269 +
21270 +       return sum;
21271 +}
21272 +
21273  /*
21274   * Amount of free RAM allocatable within ZONE_DMA and ZONE_NORMAL
21275   */
21276 @@ -2283,6 +2303,15 @@ unsigned int nr_free_buffer_pages(void)
21277  EXPORT_SYMBOL_GPL(nr_free_buffer_pages);
21278  
21279  /*
21280 + * Amount of free RAM allocatable within ZONE_DMA and ZONE_NORMAL
21281 + */
21282 +unsigned int nr_unallocated_buffer_pages(void)
21283 +{
21284 +       return nr_unallocated_zone_pages(gfp_zone(GFP_USER));
21285 +}
21286 +EXPORT_SYMBOL_GPL(nr_unallocated_buffer_pages);
21287 +
21288 +/*
21289   * Amount of free RAM allocatable within all zones
21290   */
21291  unsigned int nr_free_pagecache_pages(void)
21292 diff --git a/mm/shmem.c b/mm/shmem.c
21293 index f65f840..3024d35 100644
21294 --- a/mm/shmem.c
21295 +++ b/mm/shmem.c
21296 @@ -1568,6 +1568,8 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
21297                 memset(info, 0, (char *)inode - (char *)info);
21298                 spin_lock_init(&info->lock);
21299                 info->flags = flags & VM_NORESERVE;
21300 +               if (flags & VM_ATOMIC_COPY)
21301 +                       inode->i_flags |= S_ATOMIC_COPY;
21302                 INIT_LIST_HEAD(&info->swaplist);
21303                 cache_no_acl(inode);
21304  
21305 diff --git a/mm/swap_state.c b/mm/swap_state.c
21306 index e10f583..86bc26a 100644
21307 --- a/mm/swap_state.c
21308 +++ b/mm/swap_state.c
21309 @@ -47,6 +47,7 @@ struct address_space swapper_space = {
21310         .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear),
21311         .backing_dev_info = &swap_backing_dev_info,
21312  };
21313 +EXPORT_SYMBOL_GPL(swapper_space);
21314  
21315  #define INC_CACHE_INFO(x)      do { swap_cache_info.x++; } while (0)
21316  
21317 diff --git a/mm/swapfile.c b/mm/swapfile.c
21318 index 03aa2d5..55176da 100644
21319 --- a/mm/swapfile.c
21320 +++ b/mm/swapfile.c
21321 @@ -39,7 +39,6 @@
21322  static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
21323                                  unsigned char);
21324  static void free_swap_count_continuations(struct swap_info_struct *);
21325 -static sector_t map_swap_entry(swp_entry_t, struct block_device**);
21326  
21327  static DEFINE_SPINLOCK(swap_lock);
21328  static unsigned int nr_swapfiles;
21329 @@ -480,6 +479,7 @@ noswap:
21330         spin_unlock(&swap_lock);
21331         return (swp_entry_t) {0};
21332  }
21333 +EXPORT_SYMBOL_GPL(get_swap_page);
21334  
21335  /* The only caller of this function is now susupend routine */
21336  swp_entry_t get_swap_page_of_type(int type)
21337 @@ -502,6 +502,7 @@ swp_entry_t get_swap_page_of_type(int type)
21338         spin_unlock(&swap_lock);
21339         return (swp_entry_t) {0};
21340  }
21341 +EXPORT_SYMBOL_GPL(get_swap_page_of_type);
21342  
21343  static struct swap_info_struct *swap_info_get(swp_entry_t entry)
21344  {
21345 @@ -626,6 +627,7 @@ void swapcache_free(swp_entry_t entry, struct page *page)
21346                 spin_unlock(&swap_lock);
21347         }
21348  }
21349 +EXPORT_SYMBOL_GPL(swap_free);
21350  
21351  /*
21352   * How many references to page are currently swapped out?
21353 @@ -1302,7 +1304,7 @@ static void drain_mmlist(void)
21354   * Note that the type of this function is sector_t, but it returns page offset
21355   * into the bdev, not sector offset.
21356   */
21357 -static sector_t map_swap_entry(swp_entry_t entry, struct block_device **bdev)
21358 +sector_t map_swap_entry(swp_entry_t entry, struct block_device **bdev)
21359  {
21360         struct swap_info_struct *sis;
21361         struct swap_extent *start_se;
21362 @@ -1329,6 +1331,7 @@ static sector_t map_swap_entry(swp_entry_t entry, struct block_device **bdev)
21363                 BUG_ON(se == start_se);         /* It *must* be present */
21364         }
21365  }
21366 +EXPORT_SYMBOL_GPL(map_swap_entry);
21367  
21368  /*
21369   * Returns the page offset into bdev for the specified page's swap entry.
21370 @@ -1671,6 +1674,7 @@ out_dput:
21371  out:
21372         return err;
21373  }
21374 +EXPORT_SYMBOL_GPL(sys_swapoff);
21375  
21376  #ifdef CONFIG_PROC_FS
21377  /* iterator */
21378 @@ -2100,6 +2104,7 @@ out:
21379         }
21380         return error;
21381  }
21382 +EXPORT_SYMBOL_GPL(sys_swapon);
21383  
21384  void si_swapinfo(struct sysinfo *val)
21385  {
21386 @@ -2117,6 +2122,7 @@ void si_swapinfo(struct sysinfo *val)
21387         val->totalswap = total_swap_pages + nr_to_be_unused;
21388         spin_unlock(&swap_lock);
21389  }
21390 +EXPORT_SYMBOL_GPL(si_swapinfo);
21391  
21392  /*
21393   * Verify that a swap entry is valid and increment its swap map count.
21394 @@ -2228,6 +2234,13 @@ int swapcache_prepare(swp_entry_t entry)
21395         return __swap_duplicate(entry, SWAP_HAS_CACHE);
21396  }
21397  
21398 +
21399 +struct swap_info_struct *get_swap_info_struct(unsigned type)
21400 +{
21401 +       return swap_info[type];
21402 +}
21403 +EXPORT_SYMBOL_GPL(get_swap_info_struct);
21404 +
21405  /*
21406   * swap_lock prevents swap_map being freed. Don't grab an extra
21407   * reference on the swaphandle, it doesn't matter if it becomes unused.
21408 diff --git a/mm/vmscan.c b/mm/vmscan.c
21409 index 9c7e57c..cd5995c 100644
21410 --- a/mm/vmscan.c
21411 +++ b/mm/vmscan.c
21412 @@ -2335,6 +2335,9 @@ void wakeup_kswapd(struct zone *zone, int order)
21413         if (!populated_zone(zone))
21414                 return;
21415  
21416 +       if (freezer_is_on())
21417 +               return;
21418 +
21419         pgdat = zone->zone_pgdat;
21420         if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0))
21421                 return;
21422 @@ -2391,11 +2394,11 @@ unsigned long zone_reclaimable_pages(struct zone *zone)
21423   * LRU order by reclaiming preferentially
21424   * inactive > active > active referenced > active mapped
21425   */
21426 -unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
21427 +unsigned long shrink_memory_mask(unsigned long nr_to_reclaim, gfp_t mask)
21428  {
21429         struct reclaim_state reclaim_state;
21430         struct scan_control sc = {
21431 -               .gfp_mask = GFP_HIGHUSER_MOVABLE,
21432 +               .gfp_mask = mask,
21433                 .may_swap = 1,
21434                 .may_unmap = 1,
21435                 .may_writepage = 1,
21436 @@ -2421,6 +2424,13 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
21437  
21438         return nr_reclaimed;
21439  }
21440 +EXPORT_SYMBOL_GPL(shrink_memory_mask);
21441 +
21442 +unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
21443 +{
21444 +       return shrink_memory_mask(nr_to_reclaim, GFP_HIGHUSER_MOVABLE);
21445 +}
21446 +EXPORT_SYMBOL_GPL(shrink_all_memory);
21447  #endif /* CONFIG_HIBERNATION */
21448  
21449  /* It's optimal to keep kswapds on the same CPUs as their memory, but
This page took 1.734038 seconds and 3 git commands to generate.