]>
Commit | Line | Data |
---|---|---|
6c68d08a | 1 | diff -raN -U 5 linux-2.4.20/CREDITS linux-2.4.20-BadRAM/CREDITS |
2 | --- linux-2.4.20/CREDITS Thu Nov 28 15:53:08 2002 | |
3 | +++ linux-2.4.20-BadRAM/CREDITS Tue Mar 25 19:38:24 2003 | |
4 | @@ -2473,10 +2473,18 @@ | |
5 | D: Author: fetchmail, Emacs VC mode, Emacs GUD mode | |
6 | S: 6 Karen Drive | |
7 | S: Malvern, Pennsylvania 19355 | |
8 | S: USA | |
9 | ||
10 | +N: Rick van Rein | |
11 | +E: vanrein@cs.utwente.nl | |
12 | +W: http://www.cs.utwente.nl/~vanrein | |
13 | +D: Memory, the BadRAM subsystem dealing with statically challanged RAM modules. | |
14 | +S: Binnenes 67 | |
15 | +S: 9407 CX Assen | |
16 | +S: The Netherlands | |
17 | + | |
18 | N: Stefan Reinauer | |
19 | E: stepan@linux.de | |
20 | W: http://www.freiburg.linux.de/~stepan/ | |
21 | D: Modularization of some filesystems | |
22 | D: /proc/sound, minor fixes | |
23 | @@ -2671,10 +2679,17 @@ | |
24 | S: Buckenhof, Germany | |
25 | ||
26 | N: Michael Schmitz | |
27 | E: | |
28 | D: Macintosh IDE Driver | |
29 | + | |
30 | +N: Nico Schmoigl | |
31 | +E: nico@writemail.com | |
32 | +W: http://webrum.uni-mannheim.de/math/schmoigl/linux/ | |
33 | +D: Migration of BadRAM patch to 2.4.4 series (with Rick van Rein) | |
34 | +S: Mannheim, BW, Germany | |
35 | +P: 2047/38FC9E03 5D DB 09 E4 3F F3 CD 09 75 59 - 11 17 9C 03 46 E3 38 FC 9E 03 | |
36 | ||
37 | N: Peter De Schrijver | |
38 | E: stud11@cc4.kuleuven.ac.be | |
39 | D: Mitsumi CD-ROM driver patches March version | |
40 | S: Molenbaan 29 | |
41 | diff -raN -U 5 linux-2.4.20/Documentation/Configure.help linux-2.4.20-BadRAM/Documentation/Configure.help | |
42 | --- linux-2.4.20/Documentation/Configure.help Sat Dec 14 16:50:40 2002 | |
43 | +++ linux-2.4.20-BadRAM/Documentation/Configure.help Tue Mar 25 19:38:24 2003 | |
44 | @@ -23381,10 +23381,25 @@ | |
45 | Disable data cache | |
46 | CONFIG_DCACHE_DISABLE | |
47 | This option allows you to run the kernel with data cache disabled. | |
48 | Say Y if you experience CPM lock-ups. | |
49 | ||
50 | +Work around bad spots in RAM | |
51 | +CONFIG_BADRAM | |
52 | + This small kernel extension makes it possible to use memory chips | |
53 | + which are not entirely correct. It works by never allocating the | |
54 | + places that are wrong. Those places are specified with the badram | |
55 | + boot option to LILO. Read /usr/src/linux/Documentation/badram.txt | |
56 | + and/or visit http://home.zonnet.nl/vanrein/badram for information. | |
57 | + | |
58 | + This option co-operates well with a second boot option from LILO | |
59 | + that starts memtest86, which is able to automatically produce the | |
60 | + patterns for the commandline in case of memory trouble. | |
61 | + | |
62 | + It is safe to say 'Y' here, and it is advised because there is no | |
63 | + performance impact. | |
64 | + | |
65 | # | |
66 | # m68k-specific kernel options | |
67 | # Documented by Chris Lawrence <mailto:quango@themall.net> et al. | |
68 | # | |
69 | Amiga support | |
70 | @@ -28566,11 +28581,11 @@ | |
71 | # LocalWords: tdfxfb TNTx HGA hgafb VERBOSEDEBUG SunTrunking SunSoft XIRTULIP | |
72 | # LocalWords: ethercards PNIC Macronix MXIC ASIX xircom Mustek MDC gphoto mdc | |
73 | # LocalWords: CramFs Cramfs uid cramfs AVM's kernelcapi PCIV cdrdao Cdparanoia | |
74 | # LocalWords: DMX Domex dmx wellington ftdi sio Accton Billington Corega FEter | |
75 | # LocalWords: MELCO LUA PNA Linksys SNC chkdsk AWACS Webcam RAMFS Ramfs ramfs | |
76 | -# LocalWords: ramfiles MAKEDEV pty WDTPCI APA apa | |
77 | +# LocalWords: ramfiles MAKEDEV pty WDTPCI APA apa BadRAM badram vanrein zonnet | |
78 | # | |
79 | # The following sets edit modes for GNU EMACS | |
80 | # Local Variables: | |
81 | # case-fold-search:nil | |
82 | # fill-prefix:" " | |
83 | diff -raN -U 5 linux-2.4.20/Documentation/badram.txt linux-2.4.20-BadRAM/Documentation/badram.txt | |
84 | --- linux-2.4.20/Documentation/badram.txt Wed Dec 31 16:00:00 1969 | |
85 | +++ linux-2.4.20-BadRAM/Documentation/badram.txt Tue Mar 25 19:38:24 2003 | |
86 | @@ -0,0 +1,266 @@ | |
87 | +INFORMATION ON USING BAD RAM MODULES | |
88 | +==================================== | |
89 | + | |
90 | +Introduction | |
91 | + RAM is getting smaller and smaller, and as a result, also more and more | |
92 | + vulnerable. This makes the manufacturing of hardware more expensive, | |
93 | + since an excessive amount of RAM chips must be discarded on account of | |
94 | + a single cell that is wrong. Similarly, static discharge may damage a | |
95 | + RAM module forever, which is usually remedied by replacing it | |
96 | + entirely. | |
97 | + | |
98 | + This is not necessary, as the BadRAM code shows: By informing the Linux | |
99 | + kernel which addresses in a RAM are damaged, the kernel simply avoids | |
100 | + ever allocating such addresses but makes all the rest available. | |
101 | + | |
102 | +Reasons for this feature | |
103 | + There are many reasons why this kernel feature is useful: | |
104 | + - Chip manufacture is resource intensive; waste less and sleep better | |
105 | + - It's another chance to promote Linux as "the flexible OS" | |
106 | + - Some laptops have their RAM soldered in... and then it fails! | |
107 | + - It's plain cool ;-) | |
108 | + | |
109 | +Running example | |
110 | + To run this project, I was given two DIMMs, 32 MB each. One, that we | |
111 | + shall use as a running example in this text, contained 512 faulty bits, | |
112 | + spread over 1/4 of the address range in a regular pattern. Some tricks | |
113 | + with a RAM tester and a few binary calculations were sufficient to | |
114 | + write these faults down in 2 longword numbers. | |
115 | + | |
116 | + The kernel recognised the correct number of pages with faults and did | |
117 | + not give them out for allocation. The allocation routines could | |
118 | + therefore progress as normally, without any adaption. | |
119 | + So, I gained 30 MB of DIMM which would otherwise have been thrown | |
120 | + away. After booting the kernel, the kernel behaved exactly as it | |
121 | + always had. | |
122 | + | |
123 | +Initial checks | |
124 | + If you experience RAM trouble, first read /usr/src/linux/memory.txt | |
125 | + and try out the mem=4M trick to see if at least some initial parts | |
126 | + of your RAM work well. The BadRAM routines halt the kernel in panic | |
127 | + if the reserved area of memory (containing kernel stuff) contains | |
128 | + a faulty address. | |
129 | + | |
130 | +Running a RAM checker | |
131 | + The memory checker is not built into the kernel, to avoid delays at | |
132 | + runtime. If you experience problems that may be caused by RAM, run | |
133 | + a good RAM checker, such as | |
134 | + http://reality.sgi.com/cbrady_denver/memtest86 | |
135 | + The output of a RAM checker provides addresses that went wrong. In | |
136 | + the 32 MB chip with 512 faulty bits mentioned above, the errors were | |
137 | + found in the 8MB-16MB range (the DIMM was in slot #0) at addresses | |
138 | + xxx42f4 | |
139 | + xxx62f4 | |
140 | + xxxc2f4 | |
141 | + xxxe2f4 | |
142 | + and the error was a "sticky 1 bit", a memory bit that stayed "1" no | |
143 | + matter what was written to it. The regularity of this pattern | |
144 | + suggests the death of a buffer at the output stages of a row on one of | |
145 | + the chips. I expect such regularity to be commonplace. Finding this | |
146 | + regularity currently is human effort, but it should not be hard to | |
147 | + alter a RAM checker to capture it in some sort of pattern, possibly | |
148 | + the BadRAM patterns described below. | |
149 | + | |
150 | + By the way, if you manage to get hold of memtest86 version 2.3 or | |
151 | + beyond, you can configure the printing mode to produce BadRAM patterns, | |
152 | + which find out exactly what you must enter on the LILO: commandline, | |
153 | + except that you shouldn't mention the added spacing. That means that | |
154 | + you can skip the following step, which saves you a *lot* of work. | |
155 | + | |
156 | + Also by the way, if your machine has the ISA memory gap in the 15M-16M | |
157 | + range unstoppable, Linux can get in trouble. One way of handling that | |
158 | + situation is by specifying the total memory size to Linux with a boot | |
159 | + parameter mem=... and then to tell it to treat the 15M-16M range as | |
160 | + faulty with an additional boot parameter, for instance: | |
161 | + mem=24M badram=0x00f00000,0xfff00000 | |
162 | + if you installed 24MB of RAM in total. | |
163 | + | |
164 | +Capturing errors in a pattern | |
165 | + Instead of manually providing all 512 errors to the kernel, it's nicer | |
166 | + to generate a pattern. Since the regularity is based on address decoding | |
167 | + software, which generally takes certain bits into account and ignores | |
168 | + others, we shall provide a faulty address F, together with a bit mask M | |
169 | + that specifies which bits must be equal to F. In C code, an address A | |
170 | + is faulty if and only if | |
171 | + (F & M) == (A & M) | |
172 | + or alternately (closer to a hardware implementation): | |
173 | + ~((F ^ A) & M) | |
174 | + In the example 32 MB chip, we had the faulty addresses in 8MB-16MB: | |
175 | + xxx42f4 ....0100.... | |
176 | + xxx62f4 ....0110.... | |
177 | + xxxc2f4 ....1100.... | |
178 | + xxxe2f4 ....1110.... | |
179 | + The second column represents the alternating hex digit in binary form. | |
180 | + Apperantly, the first and one-but last binary digit can be anything, | |
181 | + so the binary mask for that part is 0101. The mask for the part after | |
182 | + this is 0xfff, and the part before should select anything in the range | |
183 | + 8MB-16MB, or 0x00800000-0x01000000; this is done with a bitmask | |
184 | + 0xff80xxxx. Combining these partial masks, we get: | |
185 | + F=0x008042f4 M=0xff805fff | |
186 | + That covers everything for this DIMM; for more complicated failing | |
187 | + DIMMs, or for a combination of multiple failing DIMMs, it can be | |
188 | + necessary to set up a number of such F/M pairs. | |
189 | + | |
190 | +Rebooting Linux | |
191 | + Now that these patterns are known (and double-checked, the calculations | |
192 | + are highly error-prone... it would be neat to test them in the RAM | |
193 | + checker...) we simply restart Linux with these F/M pairs as a parameter. | |
194 | + If you normally boot as follows: | |
195 | + LILO: linux | |
196 | + you should now boot with | |
197 | + LILO: linux badram=0x008042f4,0xff805fff | |
198 | + or perhaps by mentioning more F/M pairs in an order F0,M0,F1,M1,... | |
199 | + When you provide an odd number of arguments to badram, the default mask | |
200 | + 0xffffffff (only one address matched) is applied to the pattern. | |
201 | + | |
202 | + Beware of the commandline length. At least up to LILO version 0.21, | |
203 | + the commandline is cut off after the 78th character; later versions | |
204 | + may go as far as the kernel goes, namely 255 characters. In no way is | |
205 | + it possible to enter more than 10 numbers to the badram boot option. | |
206 | + | |
207 | + When the kernel now boots, it should not give any trouble with RAM. | |
208 | + Mind you, this is under the assumption that the kernel and its data | |
209 | + storage do not overlap an erroneous part. If this happens, and the | |
210 | + kernel does not choke on it right away, it will stop with a panic. | |
211 | + You will need to provide a RAM where the initial, say 2MB, is faultless. | |
212 | + | |
213 | + Now look up your memory status with | |
214 | + dmesg | grep ^Memory: | |
215 | + which prints a single line with information like | |
216 | + Memory: 158524k/163840k available | |
217 | + (940k kernel code, | |
218 | + 412k reserved, | |
219 | + 1856k data, | |
220 | + 60k init, | |
221 | + 0k highmem, | |
222 | + 2048k BadRAM) | |
223 | + The latter entry, the badram, is 2048k to represent the loss of 2MB | |
224 | + of general purpose RAM due to the errors. Or, positively rephrased, | |
225 | + instead of throwing out 32MB as useless, you only throw out 2MB. | |
226 | + | |
227 | + If the system is stable (try compiling a few kernels, and do a few | |
228 | + finds in / or so) you may add the boot parameter to /etc/lilo.conf | |
229 | + as a line to _all_ the kernels that handle this trouble with a line | |
230 | + append="badram=0x008042f4,0xff805fff" | |
231 | + after which you run "lilo". | |
232 | + Warning: Don't experiment with these settings on your only boot image. | |
233 | + If the BadRAM overlays kernel code, data, init, or other reserved | |
234 | + memory, the kernel will halt in panic. Try settings on a test boot | |
235 | + image first, and if you get a panic you should change the order of | |
236 | + your DIMMs [which may involve buying a new one just to be able to | |
237 | + change the order]. | |
238 | + | |
239 | + You are allowed to enter any number of BadRAM patterns in all the | |
240 | + places documented in this file. They will all apply. It is even | |
241 | + possible to mention several BadRAM patterns in a single place. The | |
242 | + completion of an odd number of arguments with the default mask is | |
243 | + done separately for each badram=... option. | |
244 | + | |
245 | +Kernel Customisation | |
246 | + Some people prefer to enter their badram patterns in the kernel, and | |
247 | + this is also possible. In mm/page_alloc.c there is an array of unsigned | |
248 | + long integers into which the parameters can be entered, prefixed with | |
249 | + the number of integers (twice the number of patterns). The array is | |
250 | + named badram_custom and it will be added to the BadRAM list whenever an | |
251 | + option 'badram' is provided on the commandline when booting, either | |
252 | + with or without additional patterns. | |
253 | + | |
254 | + For the previous example, the code would become | |
255 | + | |
256 | + static unsigned long __init badram_custom[] = { | |
257 | + 2, // Number of longwords that follow, as F/M pairs | |
258 | + 0x008042f4L, 0xff805fffL, | |
259 | + }; | |
260 | + | |
261 | + Even on this place you may assume the default mask to be filled in | |
262 | + when you enter an odd number of longwords. Specify the number of | |
263 | + longwords to be 0 to avoid influence of this custom BadRAM list. | |
264 | + | |
265 | +BadRAM classification | |
266 | + This technique may start a lively market for "dead" RAM. It is important | |
267 | + to realise that some RAMs are more dead than others. So, instead of | |
268 | + just providing a RAM size, it is also important to know the BadRAM | |
269 | + class, which is defined as follows: | |
270 | + | |
271 | + A BadRAM class N means that at most 2^N bytes have a problem, | |
272 | + and that all problems with the RAMs are persistent: They | |
273 | + are predictable and always show up. | |
274 | + | |
275 | + The DIMM that serves as an example here was of class 9, since 512=2^9 | |
276 | + errors were found. Higher classes are worse, "correct" RAM is of class | |
277 | + -1 (or even less, at your choice). | |
278 | + Class N also means that the bitmask for your chip (if there's just one, | |
279 | + that is) counts N bits "0" and it means that (if no faults fall in the | |
280 | + same page) an amount of 2^N*PAGESIZE memory is lost, in the example on | |
281 | + an i386 architecture that would be 2^9*4k=2MB, which accounts for the | |
282 | + initial claim of 30MB RAM gained with this DIMM. | |
283 | + | |
284 | + Note that this scheme has deliberately been defined to be independent | |
285 | + of memory technology and of computer architecture. | |
286 | + | |
287 | +Known Bugs | |
288 | + LILO is known to cut off commandlines which are too long. For the | |
289 | + lilo-0.21 distribution, a commandline may not exceed 78 characters, | |
290 | + while actually, 255 would be possible [on i386, kernel 2.2.16]. | |
291 | + LILO does _not_ report too-long commandlines, but the error will | |
292 | + show up as either a panic at boot time, stating | |
293 | + panic: BadRAM page in initial area | |
294 | + or the dmesg line starting with Memory: will mention an unpredicted | |
295 | + number of kilobytes. (Note that the latter number only includes | |
296 | + errors in accessed memory.) | |
297 | + | |
298 | +Future Possibilities | |
299 | + It would be possible to use even more of the faulty RAMs by employing | |
300 | + them for slabs. The smaller allocation granularity of slabs makes it | |
301 | + possible to throw out just, say, 32 bytes surrounding an error. This | |
302 | + would mean that the example DIMM only looses 16kB instead of 2MB. | |
303 | + It might even be possible to allocate the slabs in such a way that, | |
304 | + where possible, the remaining bytes in a slab structure are allocated | |
305 | + around the error, reducing the RAM loss to 0 in the optimal situation! | |
306 | + | |
307 | + However, this yield is somewhat faked: It is possible to provide 512 | |
308 | + pages of 32-byte slabs, but it is not certain that anyone would use | |
309 | + that many 32-byte slabs at any time. | |
310 | + | |
311 | + A better solution might be to alter the page allocation for a slab to | |
312 | + have a preference for BadRAM pages, and given those a special treatment. | |
313 | + This way, the BadRAM would be spread over all the slabs, which seems | |
314 | + more likely to be a `true' pay-off. This would yield more overhead at | |
315 | + slab allocation time, but on the other hand, by the nature of slabs, | |
316 | + such allocations are made as rare as possible, so it might not matter | |
317 | + that much. I am uncertain where to go. | |
318 | + | |
319 | + Many suggestions have been made to insert a RAM checker at boot time; | |
320 | + since this would leave the time to do only very meager checking, it | |
321 | + is not a reasonable option; we already have a BIOS doing that in most | |
322 | + systems! | |
323 | + | |
324 | + It would be interesting to integrate this functionality with the | |
325 | + self-verifying nature of ECC RAM. These memories can even distinguish | |
326 | + between recorable and unrecoverable errors! Such memory has been | |
327 | + handled in older operating systems by `testing' once-failed memory | |
328 | + blocks for a while, by placing only (reloadable) program code in it. | |
329 | + Unfortunately, I possess no faulty ECC modules to work this out. | |
330 | + | |
331 | +Names and Places | |
332 | + The home page of this project is on | |
333 | + http://rick.vanrein.org/linux/badram | |
334 | + This page also links to Nico Schmoigl's experimental extensions to | |
335 | + this patch (with debugging and a few other fancy things). | |
336 | + | |
337 | + In case you have experiences with the BadRAM software which differ from | |
338 | + the test reportings on that site, I hope you will mail me with that | |
339 | + new information. | |
340 | + | |
341 | + The BadRAM project is an idea and implementation by | |
342 | + Rick van Rein | |
343 | + Binnenes 67 | |
344 | + 9407 CX Assen | |
345 | + The Netherlands | |
346 | + vanrein@cs.utwente.nl | |
347 | + If you like it, a postcard would be much appreciated ;-) | |
348 | + | |
349 | + | |
350 | + Enjoy, | |
351 | + -Rick. | |
352 | + | |
353 | diff -raN -U 5 linux-2.4.20/Documentation/kernel-parameters.txt linux-2.4.20-BadRAM/Documentation/kernel-parameters.txt | |
354 | --- linux-2.4.20/Documentation/kernel-parameters.txt Thu Nov 28 15:53:08 2002 | |
355 | +++ linux-2.4.20-BadRAM/Documentation/kernel-parameters.txt Tue Mar 25 19:38:24 2003 | |
356 | @@ -12,10 +12,11 @@ | |
357 | ||
358 | ACPI ACPI support is enabled. | |
359 | APIC APIC support is enabled. | |
360 | APM Advanced Power Management support is enabled. | |
361 | AX25 Appropriate AX.25 support is enabled. | |
362 | + BADRAM Support for faulty RAM chips is enabled. | |
363 | CD Appropriate CD support is enabled. | |
364 | DEVFS devfs support is enabled. | |
365 | DRM Direct Rendering Management support is enabled. | |
366 | EFI EFI Partitioning (GPT) is enabled | |
367 | EIDE EIDE/ATAPI support is enabled. | |
368 | @@ -107,10 +108,13 @@ | |
369 | atascsi= [HW,SCSI] Atari SCSI. | |
370 | ||
371 | awe= [HW,SOUND] | |
372 | ||
373 | aztcd= [HW,CD] Aztec CD driver. | |
374 | + | |
375 | + badram= [BADRAM] Avoid allocating faulty RAM addresses. | |
376 | + | |
377 | ||
378 | baycom_epp= [HW,AX25] | |
379 | ||
380 | baycom_par= [HW,AX25] BayCom Parallel Port AX.25 Modem. | |
381 | ||
382 | diff -raN -U 5 linux-2.4.20/Documentation/memory.txt linux-2.4.20-BadRAM/Documentation/memory.txt | |
383 | --- linux-2.4.20/Documentation/memory.txt Fri Nov 9 13:58:02 2001 | |
384 | +++ linux-2.4.20-BadRAM/Documentation/memory.txt Tue Mar 25 19:38:24 2003 | |
385 | @@ -16,10 +16,18 @@ | |
386 | a certain quantity of memory. If you have one of these | |
387 | motherboards, your system will be SLOWER, not faster | |
388 | as you add more memory. Consider exchanging your | |
389 | motherboard. | |
390 | ||
391 | + 4) A static discharge or production fault causes a RAM module | |
392 | + to have (predictable) errors, usually meaning that certain | |
393 | + bits cannot be set or reset. Instead of throwing away your | |
394 | + RAM module, you may read /usr/src/linux/Documentation/badram.txt | |
395 | + to learn how to detect, locate and circuimvent such errors | |
396 | + in your RAM module. | |
397 | + | |
398 | + | |
399 | All of these problems can be addressed with the "mem=XXXM" boot option | |
400 | (where XXX is the size of RAM to use in megabytes). | |
401 | It can also tell Linux to use less memory than is actually installed. | |
402 | ||
403 | See the documentation of your boot loader (LILO, loadlin, etc.) about | |
404 | @@ -43,10 +51,12 @@ | |
405 | ||
406 | * Disabling the cache from the BIOS. | |
407 | ||
408 | * Try passing the "mem=4M" option to the kernel to limit | |
409 | Linux to using a very small amount of memory. | |
410 | + If this helps, read /usr/src/linux/Documentation/badram.txt | |
411 | + to learn how to find and circuimvent memory errors. | |
412 | ||
413 | ||
414 | Other tricks: | |
415 | ||
416 | * Try passing the "no-387" option to the kernel to ignore | |
417 | diff -raN -U 5 linux-2.4.20/arch/i386/config.in linux-2.4.20-BadRAM/arch/i386/config.in | |
418 | --- linux-2.4.20/arch/i386/config.in Fri Nov 29 20:49:25 2002 | |
419 | +++ linux-2.4.20-BadRAM/arch/i386/config.in Tue Mar 25 19:38:24 2003 | |
420 | @@ -315,10 +315,12 @@ | |
421 | bool ' RTC stores time in GMT' CONFIG_APM_RTC_IS_GMT | |
422 | bool ' Allow interrupts during APM BIOS calls' CONFIG_APM_ALLOW_INTS | |
423 | bool ' Use real mode APM BIOS call to power off' CONFIG_APM_REAL_MODE_POWER_OFF | |
424 | fi | |
425 | ||
426 | +bool 'Work around bad spots in RAM' CONFIG_BADRAM | |
427 | + | |
428 | endmenu | |
429 | ||
430 | source abi/Config.in | |
431 | ||
432 | source drivers/mtd/Config.in | |
433 | diff -raN -U 5 linux-2.4.20/arch/i386/defconfig linux-2.4.20-BadRAM/arch/i386/defconfig | |
434 | --- linux-2.4.20/arch/i386/defconfig Thu Nov 28 15:53:09 2002 | |
435 | +++ linux-2.4.20-BadRAM/arch/i386/defconfig Tue Mar 25 19:38:24 2003 | |
436 | @@ -79,10 +79,11 @@ | |
437 | CONFIG_PCI_DIRECT=y | |
438 | CONFIG_PCI_NAMES=y | |
439 | # CONFIG_EISA is not set | |
440 | # CONFIG_MCA is not set | |
441 | CONFIG_HOTPLUG=y | |
442 | +CONFIG_BADRAM=y | |
443 | ||
444 | # | |
445 | # PCMCIA/CardBus support | |
446 | # | |
447 | CONFIG_PCMCIA=y | |
448 | diff -raN -U 5 linux-2.4.20/arch/i386/mm/init.c linux-2.4.20-BadRAM/arch/i386/mm/init.c | |
449 | --- linux-2.4.20/arch/i386/mm/init.c Thu Nov 28 15:53:09 2002 | |
450 | +++ linux-2.4.20-BadRAM/arch/i386/mm/init.c Thu Mar 27 20:45:43 2003 | |
451 | @@ -90,11 +90,11 @@ | |
452 | } | |
453 | #endif /* CONFIG_HIGHMEM */ | |
454 | ||
455 | void show_mem(void) | |
456 | { | |
457 | - int i, total = 0, reserved = 0; | |
458 | + int i, total = 0, reserved = 0, badram = 0; | |
459 | int shared = 0, cached = 0; | |
460 | int highmem = 0; | |
461 | ||
462 | printk("Mem-info:\n"); | |
463 | show_free_areas(); | |
464 | @@ -104,18 +104,25 @@ | |
465 | total++; | |
466 | if (PageHighMem(mem_map+i)) | |
467 | highmem++; | |
468 | if (PageReserved(mem_map+i)) | |
469 | reserved++; | |
470 | +#ifdef CONFIG_BADRAM | |
471 | + if (PageBad(mem_map+1)) | |
472 | + badram++; | |
473 | +#endif | |
474 | else if (PageSwapCache(mem_map+i)) | |
475 | cached++; | |
476 | else if (page_count(mem_map+i)) | |
477 | shared += page_count(mem_map+i) - 1; | |
478 | } | |
479 | printk("%d pages of RAM\n", total); | |
480 | printk("%d pages of HIGHMEM\n",highmem); | |
481 | printk("%d reserved pages\n",reserved); | |
482 | +#ifdef CONFIG_BADRAM | |
483 | + printk("%d pages of BadRAM\n",badram); | |
484 | +#endif | |
485 | printk("%d pages shared\n",shared); | |
486 | printk("%d pages swap cached\n",cached); | |
487 | printk("%ld pages in page table cache\n",pgtable_cache_size); | |
488 | show_buffers(); | |
489 | } | |
490 | @@ -445,12 +452,17 @@ | |
491 | return 1; | |
492 | return 0; | |
493 | } | |
494 | ||
495 | #ifdef CONFIG_HIGHMEM | |
496 | -void __init one_highpage_init(struct page *page, int pfn, int bad_ppro) | |
497 | +/** | |
498 | + * @param bad set on return to whether the page is bad RAM | |
499 | + */ | |
500 | +void __init one_highpage_init(struct page *page, int pfn, int bad_ppro, | |
501 | + _Bool *bad) | |
502 | { | |
503 | + *bad = 0; | |
504 | if (!page_is_ram(pfn)) { | |
505 | SetPageReserved(page); | |
506 | return; | |
507 | } | |
508 | ||
509 | @@ -460,11 +472,17 @@ | |
510 | } | |
511 | ||
512 | ClearPageReserved(page); | |
513 | set_bit(PG_highmem, &page->flags); | |
514 | atomic_set(&page->count, 1); | |
515 | - __free_page(page); | |
516 | +#ifdef CONFIG_BADRAM | |
517 | + if (PageBad(page)) | |
518 | + *bad = 1; | |
519 | + else | |
520 | +#else | |
521 | + __free_page(page); | |
522 | +#endif | |
523 | totalhigh_pages++; | |
524 | } | |
525 | #endif /* CONFIG_HIGHMEM */ | |
526 | ||
527 | static void __init set_max_mapnr_init(void) | |
528 | @@ -476,39 +494,53 @@ | |
529 | #else | |
530 | max_mapnr = num_mappedpages = num_physpages = max_low_pfn; | |
531 | #endif | |
532 | } | |
533 | ||
534 | -static int __init free_pages_init(void) | |
535 | + | |
536 | +/** structure for returning multiple values from free_pages_init() */ | |
537 | +typedef struct { int reserved, bad; } PageCount; | |
538 | + | |
539 | +static PageCount __init free_pages_init(void) | |
540 | { | |
541 | extern int ppro_with_ram_bug(void); | |
542 | int bad_ppro, reservedpages, pfn; | |
543 | + PageCount pages = {0,0}; | |
544 | ||
545 | bad_ppro = ppro_with_ram_bug(); | |
546 | ||
547 | /* this will put all low memory onto the freelists */ | |
548 | totalram_pages += free_all_bootmem(); | |
549 | ||
550 | reservedpages = 0; | |
551 | for (pfn = 0; pfn < max_low_pfn; pfn++) { | |
552 | /* | |
553 | - * Only count reserved RAM pages | |
554 | + * Only count reserved and bad RAM pages | |
555 | */ | |
556 | if (page_is_ram(pfn) && PageReserved(mem_map+pfn)) | |
557 | reservedpages++; | |
558 | +#ifdef CONFIG_BADRAM | |
559 | + if (page_is_ram(pfn) && PageBad(mem_map+pfn)) | |
560 | + pages.bad++; | |
561 | +#endif | |
562 | } | |
563 | #ifdef CONFIG_HIGHMEM | |
564 | - for (pfn = highend_pfn-1; pfn >= highstart_pfn; pfn--) | |
565 | - one_highpage_init((struct page *) (mem_map + pfn), pfn, bad_ppro); | |
566 | + for (pfn = highend_pfn-1; pfn >= highstart_pfn; pfn--) { | |
567 | + _Bool bad; | |
568 | + one_highpage_init((struct page *) (mem_map + pfn), pfn, bad_ppro, &bad); | |
569 | + if (bad) pages.bad++; | |
570 | + } | |
571 | totalram_pages += totalhigh_pages; | |
572 | #endif | |
573 | - return reservedpages; | |
574 | + pages.reserved = reservedpages; | |
575 | + return pages; | |
576 | } | |
577 | ||
578 | void __init mem_init(void) | |
579 | { | |
580 | int codesize, reservedpages, datasize, initsize; | |
581 | + PageCount pages; | |
582 | ||
583 | if (!mem_map) | |
584 | BUG(); | |
585 | ||
586 | set_max_mapnr_init(); | |
587 | @@ -516,25 +548,38 @@ | |
588 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); | |
589 | ||
590 | /* clear the zero-page */ | |
591 | memset(empty_zero_page, 0, PAGE_SIZE); | |
592 | ||
593 | - reservedpages = free_pages_init(); | |
594 | + reservedpages = (pages = free_pages_init()).reserved; | |
595 | ||
596 | codesize = (unsigned long) &_etext - (unsigned long) &_text; | |
597 | datasize = (unsigned long) &_edata - (unsigned long) &_etext; | |
598 | initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; | |
599 | ||
600 | +#ifdef CONFIG_BADRAM | |
601 | + printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem, %ldk BadRAM)\n", | |
602 | + (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), | |
603 | + max_mapnr << (PAGE_SHIFT-10), | |
604 | + codesize >> 10, | |
605 | + reservedpages << (PAGE_SHIFT-10), | |
606 | + datasize >> 10, | |
607 | + initsize >> 10, | |
608 | + (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)), | |
609 | + pages.bad << (PAGE_SHIFT-10) | |
610 | + ); | |
611 | +#else | |
612 | printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n", | |
613 | (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), | |
614 | max_mapnr << (PAGE_SHIFT-10), | |
615 | codesize >> 10, | |
616 | reservedpages << (PAGE_SHIFT-10), | |
617 | datasize >> 10, | |
618 | initsize >> 10, | |
619 | (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) | |
620 | ); | |
621 | +#endif | |
622 | ||
623 | #if CONFIG_X86_PAE | |
624 | if (!cpu_has_pae) | |
625 | panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!"); | |
626 | #endif | |
627 | diff -raN -U 5 linux-2.4.20/include/asm-i386/page.h linux-2.4.20-BadRAM/include/asm-i386/page.h | |
628 | --- linux-2.4.20/include/asm-i386/page.h Fri Aug 2 17:39:45 2002 | |
629 | +++ linux-2.4.20-BadRAM/include/asm-i386/page.h Tue Mar 25 19:38:24 2003 | |
630 | @@ -130,10 +130,11 @@ | |
631 | #define __MAXMEM (-__PAGE_OFFSET-__VMALLOC_RESERVE) | |
632 | #define MAXMEM ((unsigned long)(-PAGE_OFFSET-VMALLOC_RESERVE)) | |
633 | #define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) | |
634 | #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) | |
635 | #define virt_to_page(kaddr) (mem_map + (__pa(kaddr) >> PAGE_SHIFT)) | |
636 | +#define phys_to_page(x) (mem_map + ((unsigned long)(x) >> PAGE_SHIFT)) | |
637 | #define VALID_PAGE(page) ((page - mem_map) < max_mapnr) | |
638 | ||
639 | #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ | |
640 | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) | |
641 | ||
642 | diff -raN -U 5 linux-2.4.20/include/linux/mm.h linux-2.4.20-BadRAM/include/linux/mm.h | |
643 | --- linux-2.4.20/include/linux/mm.h Fri Aug 2 17:39:45 2002 | |
644 | +++ linux-2.4.20-BadRAM/include/linux/mm.h Tue Mar 25 19:38:24 2003 | |
645 | @@ -313,10 +313,11 @@ | |
646 | #define PG_highmem 11 | |
647 | #define PG_checked 12 /* kill me in 2.5.<early>. */ | |
648 | #define PG_arch_1 13 | |
649 | #define PG_reserved 14 | |
650 | #define PG_launder 15 /* written out by VM pressure.. */ | |
651 | +#define PG_badram 16 | |
652 | ||
653 | /* Make it prettier to test the above... */ | |
654 | #define UnlockPage(page) unlock_page(page) | |
655 | #define Page_Uptodate(page) test_bit(PG_uptodate, &(page)->flags) | |
656 | #define SetPageUptodate(page) set_bit(PG_uptodate, &(page)->flags) | |
657 | @@ -403,10 +404,13 @@ | |
658 | #define ClearPageReferenced(page) clear_bit(PG_referenced, &(page)->flags) | |
659 | #define PageTestandClearReferenced(page) test_and_clear_bit(PG_referenced, &(page)->flags) | |
660 | #define PageSlab(page) test_bit(PG_slab, &(page)->flags) | |
661 | #define PageSetSlab(page) set_bit(PG_slab, &(page)->flags) | |
662 | #define PageClearSlab(page) clear_bit(PG_slab, &(page)->flags) | |
663 | +#define PageBad(page) test_bit(PG_badram, &(page)->flags) | |
664 | +#define PageSetBad(page) set_bit(PG_badram, &(page)->flags) | |
665 | +#define PageTestandSetBad(page) test_and_set_bit(PG_badram, &(page)->flags) | |
666 | #define PageReserved(page) test_bit(PG_reserved, &(page)->flags) | |
667 | ||
668 | #define PageActive(page) test_bit(PG_active, &(page)->flags) | |
669 | #define SetPageActive(page) set_bit(PG_active, &(page)->flags) | |
670 | #define ClearPageActive(page) clear_bit(PG_active, &(page)->flags) | |
671 | diff -raN -U 5 linux-2.4.20/mm/bootmem.c linux-2.4.20-BadRAM/mm/bootmem.c | |
672 | --- linux-2.4.20/mm/bootmem.c Thu Nov 28 15:53:15 2002 | |
673 | +++ linux-2.4.20-BadRAM/mm/bootmem.c Tue Mar 25 19:38:24 2003 | |
674 | @@ -255,12 +255,19 @@ | |
675 | idx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT); | |
676 | for (i = 0; i < idx; i++, page++) { | |
677 | if (!test_bit(i, bdata->node_bootmem_map)) { | |
678 | count++; | |
679 | ClearPageReserved(page); | |
680 | +#ifdef CONFIG_BADRAM | |
681 | + if (!PageBad(page)) { | |
682 | + set_page_count(page, 1); | |
683 | + __free_page(page); | |
684 | + } | |
685 | +#else | |
686 | set_page_count(page, 1); | |
687 | __free_page(page); | |
688 | +#endif | |
689 | } | |
690 | } | |
691 | total += count; | |
692 | ||
693 | /* | |
694 | @@ -270,12 +277,19 @@ | |
695 | page = virt_to_page(bdata->node_bootmem_map); | |
696 | count = 0; | |
697 | for (i = 0; i < ((bdata->node_low_pfn-(bdata->node_boot_start >> PAGE_SHIFT))/8 + PAGE_SIZE-1)/PAGE_SIZE; i++,page++) { | |
698 | count++; | |
699 | ClearPageReserved(page); | |
700 | +#ifdef CONFIG_BADRAM | |
701 | + if (!PageBad(page)) { | |
702 | + set_page_count(page, 1); | |
703 | + __free_page(page); | |
704 | + } | |
705 | +#else | |
706 | set_page_count(page, 1); | |
707 | __free_page(page); | |
708 | +#endif | |
709 | } | |
710 | total += count; | |
711 | bdata->node_bootmem_map = NULL; | |
712 | ||
713 | return total; | |
714 | diff -raN -U 5 linux-2.4.20/mm/page_alloc.c linux-2.4.20-BadRAM/mm/page_alloc.c | |
715 | --- linux-2.4.20/mm/page_alloc.c Thu Nov 28 15:53:15 2002 | |
716 | +++ linux-2.4.20-BadRAM/mm/page_alloc.c Tue Mar 25 19:38:24 2003 | |
717 | @@ -8,10 +8,11 @@ | |
718 | * Swap reorganised 29.12.95, Stephen Tweedie | |
719 | * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 | |
720 | * Reshaped it to be a zoned allocator, Ingo Molnar, Red Hat, 1999 | |
721 | * Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999 | |
722 | * Zone balancing, Kanoj Sarcar, SGI, Jan 2000 | |
723 | + * BadRAM handling, Rick van Rein, Feb 2001 | |
724 | */ | |
725 | ||
726 | #include <linux/config.h> | |
727 | #include <linux/mm.h> | |
728 | #include <linux/swap.h> | |
729 | @@ -883,5 +884,98 @@ | |
730 | printk("\n"); | |
731 | return 1; | |
732 | } | |
733 | ||
734 | __setup("memfrac=", setup_mem_frac); | |
735 | + | |
736 | + | |
737 | +#ifdef CONFIG_BADRAM | |
738 | + | |
739 | +/* Given a pointed-at address and a mask, increment the page so that the | |
740 | + * mask hides the increment. Return 0 if no increment is possible. | |
741 | + */ | |
742 | +static int __init next_masked_address (unsigned long *addrp, unsigned long mask) | |
743 | +{ | |
744 | + unsigned long inc=1; | |
745 | + unsigned long newval = *addrp; | |
746 | + while (inc & mask) | |
747 | + inc += inc; | |
748 | + while (inc != 0) { | |
749 | + newval += inc; | |
750 | + newval &= ~mask; | |
751 | + newval |= ((*addrp) & mask); | |
752 | + if (newval > *addrp) { | |
753 | + *addrp = newval; | |
754 | + return 1; | |
755 | + } | |
756 | + do { | |
757 | + inc += inc; | |
758 | + } while (inc & ~mask); | |
759 | + while (inc & mask) | |
760 | + inc += inc; | |
761 | + } | |
762 | + return 0; | |
763 | +} | |
764 | + | |
765 | + | |
766 | +void __init badram_markpages (int argc, unsigned long *argv) { | |
767 | + unsigned long addr, mask; | |
768 | + while (argc-- > 0) { | |
769 | + addr = *argv++; | |
770 | + mask = (argc-- > 0) ? *argv++ : ~0L; | |
771 | + mask |= ~PAGE_MASK; // Optimalisation | |
772 | + addr &= mask; // Normalisation | |
773 | + do { | |
774 | + struct page *pg = phys_to_page(addr); | |
775 | +printk ("%05lx ", __pa(__va(addr)) >> PAGE_SHIFT); | |
776 | +printk ("=%05lx/%05lx ", pg-mem_map, max_mapnr); | |
777 | + // if (VALID_PAGE(pg)) { | |
778 | + if (PageTestandSetBad (pg)) { | |
779 | + reserve_bootmem (addr, PAGE_SIZE); | |
780 | +printk ("BAD "); | |
781 | + } | |
782 | +else printk ("BFR "); | |
783 | + // } | |
784 | +// else printk ("INV "); | |
785 | + } while (next_masked_address (&addr,mask)); | |
786 | + } | |
787 | +} | |
788 | + | |
789 | + | |
790 | + | |
791 | +/*********** CONFIG_BADRAM: CUSTOMISABLE SECTION STARTS HERE ******************/ | |
792 | + | |
793 | + | |
794 | +// Enter your custom BadRAM patterns here as pairs of unsigned long integers. | |
795 | +// For more information on these F/M pairs, refer to Documentation/badram.txt | |
796 | + | |
797 | + | |
798 | +static unsigned long __init badram_custom[] = { | |
799 | + 0, // Number of longwords that follow, as F/M pairs | |
800 | +}; | |
801 | + | |
802 | + | |
803 | +/*********** CONFIG_BADRAM: CUSTOMISABLE SECTION ENDS HERE ********************/ | |
804 | + | |
805 | + | |
806 | + | |
807 | +static int __init badram_setup (char *str) | |
808 | +{ | |
809 | + unsigned long opts[3]; | |
810 | + if (!mem_map) BUG(); | |
811 | +printk ("PAGE_OFFSET=0x%08lx\n", PAGE_OFFSET); | |
812 | +printk ("BadRAM option is %s\n", str); | |
813 | + if (*str++ == '=') | |
814 | + while (str=get_options (str, 3, (int *) opts), *opts) { | |
815 | +printk (" --> marking 0x%08lx, 0x%08lx [%ld]\n", opts[1], opts[2], opts[0]); | |
816 | + badram_markpages (*opts, opts+1); | |
817 | + if (*opts==1) | |
818 | + break; | |
819 | + }; | |
820 | + badram_markpages (*badram_custom, badram_custom+1); | |
821 | + return 0; | |
822 | +} | |
823 | + | |
824 | +__setup("badram", badram_setup); | |
825 | + | |
826 | +#endif /* CONFIG_BADRAM */ | |
827 | + |