]> git.pld-linux.org Git - packages/coreutils.git/blame - coreutils-fmt-wchars.patch
- add runuser program (see man 1 runuser)
[packages/coreutils.git] / coreutils-fmt-wchars.patch
CommitLineData
29623d34 1--- coreutils-6.7/src/fmt.c.orig 2006-10-22 18:54:15.000000000 +0200
eb0f91a0 2+++ coreutils-6.7/src/fmt.c 2007-02-13 17:20:22.000000000 +0100
29623d34 3@@ -18,6 +18,7 @@
4 /* Written by Ross Paterson <rap@doc.ic.ac.uk>. */
5
6 #include <config.h>
7+#include <wchar.h>
8 #include <stdio.h>
9 #include <sys/types.h>
10 #include <getopt.h>
11@@ -39,7 +40,7 @@
12 /* The following parameters represent the program's idea of what is
13 "best". Adjust to taste, subject to the caveats given. */
14
15-/* Default longest permitted line length (max_width). */
16+/* Default longest permitted line width (max_width). */
17 #define WIDTH 75
18
19 /* Prefer lines to be LEEWAY % shorter than the maximum width, giving
20@@ -51,7 +52,7 @@
21 #define DEF_INDENT 3
22
23 /* Costs and bonuses are expressed as the equivalent departure from the
24- optimal line length, multiplied by 10. e.g. assigning something a
25+ optimal line width, multiplied by 10. e.g. assigning something a
26 cost of 50 means that it is as bad as a line 5 characters too short
27 or too long. The definition of SHORT_COST(n) should not be changed.
28 However, EQUIV(n) may need tuning. */
29@@ -78,11 +79,11 @@
30 #define LINE_COST EQUIV (70)
31
32 /* Cost of breaking a line after the first word of a sentence, where
33- the length of the word is N. */
34+ the width of the word is N. */
35 #define WIDOW_COST(n) (EQUIV (200) / ((n) + 2))
36
37 /* Cost of breaking a line before the last word of a sentence, where
38- the length of the word is N. */
39+ the width of the word is N. */
40 #define ORPHAN_COST(n) (EQUIV (150) / ((n) + 2))
41
42 /* Bonus for breaking a line at the end of a sentence. */
43@@ -114,11 +115,30 @@
44 #define MAXWORDS 1000
45 #define MAXCHARS 5000
46
47+/* Wide character support */
48+
49+static wint_t
50+xgetwc (FILE *stream)
51+{
52+ wint_t c = getwc (stream);
53+ if (c == WEOF && ferror (stream))
54+ error (EXIT_FAILURE, errno, _("read error"));
55+ return c;
56+}
57+
58+static inline int
59+xwcwidth (wchar_t wc)
60+{
61+ int w = wcwidth (wc);
62+ return w < 0 ? 0 : w;
63+}
64+
65 /* Extra ctype(3)-style macros. */
66
67-#define isopen(c) (strchr ("([`'\"", c) != NULL)
68-#define isclose(c) (strchr (")]'\"", c) != NULL)
69-#define isperiod(c) (strchr (".?!", c) != NULL)
70+#define isopen(c) \
71+ (wcschr (L"([`'\"\u2018\u201A\u201B\u201C\u201E\u201F", c) != NULL)
72+#define isclose(c) (wcschr (L")]'\"\u2018\u2019\u201C\u201D", c) != NULL)
73+#define isperiod(c) (wcschr (L".?!", c) != NULL)
74
75 /* Size of a tab stop, for expansion on input and re-introduction on
76 output. */
77@@ -133,8 +153,9 @@
78
79 /* Static attributes determined during input. */
80
81- const char *text; /* the text of the word */
82- int length; /* length of this word */
83+ const wchar_t *text; /* the text of the word */
84+ int length; /* length of this word, in characters */
85+ int width; /* width of this word, in columns */
86 int space; /* the size of the following space */
87 unsigned int paren:1; /* starts with open paren */
88 unsigned int period:1; /* ends in [.?!])* */
89@@ -143,7 +164,7 @@
90
91 /* The remaining fields are computed during the optimization. */
92
93- int line_length; /* length of the best line starting here */
94+ int line_width; /* width of the best line starting here */
95 COST best_cost; /* cost of best paragraph starting here */
96 WORD *next_break; /* break which achieves best_cost */
97 };
98@@ -153,16 +174,16 @@
99 static void set_prefix (char *p);
100 static void fmt (FILE *f);
101 static bool get_paragraph (FILE *f);
102-static int get_line (FILE *f, int c);
103-static int get_prefix (FILE *f);
104-static int get_space (FILE *f, int c);
105-static int copy_rest (FILE *f, int c);
106-static bool same_para (int c);
107+static wint_t get_line (FILE *f, wint_t c);
108+static wint_t get_prefix (FILE *f);
109+static wint_t get_space (FILE *f, wint_t c);
110+static wint_t copy_rest (FILE *f, wint_t c);
111+static bool same_para (wint_t c);
112 static void flush_paragraph (void);
113 static void fmt_paragraph (void);
114 static void check_punctuation (WORD *w);
115 static COST base_cost (WORD *this);
116-static COST line_cost (WORD *next, int len);
117+static COST line_cost (WORD *next, int wid);
118 static void put_paragraph (WORD *finish);
119 static void put_line (WORD *w, int indent);
120 static void put_word (WORD *w);
121@@ -185,8 +206,11 @@
122 /* If true, don't preserve inter-word spacing (default false). */
123 static bool uniform;
124
125+/* How many spaces to put after a sentence (1 or 2). */
126+static int sentence_space;
127+
128 /* Prefix minus leading and trailing spaces (default ""). */
129-static const char *prefix;
130+static wchar_t *prefix;
131
132 /* User-supplied maximum line width (default WIDTH). The only output
133 lines longer than this will each comprise a single word. */
134@@ -194,14 +218,14 @@
135
136 /* Values derived from the option values. */
137
138-/* The length of prefix minus leading space. */
139-static int prefix_full_length;
140+/* The width of prefix minus leading space. */
141+static int prefix_full_width;
142
143-/* The length of the leading space trimmed from the prefix. */
144+/* The width of the leading space trimmed from the prefix. */
145 static int prefix_lead_space;
146
147-/* The length of prefix minus leading and trailing space. */
148-static int prefix_length;
149+/* The width of prefix minus leading and trailing space. */
150+static int prefix_width;
151
152 /* The preferred width of text lines, set to LEEWAY % less than max_width. */
153 static int best_width;
154@@ -216,10 +240,10 @@
155
156 /* Space for the paragraph text -- longer paragraphs are handled neatly
157 (cf. flush_paragraph()). */
158-static char parabuf[MAXCHARS];
159+static wchar_t parabuf[MAXCHARS];
160
161 /* A pointer into parabuf, indicating the first unused character position. */
162-static char *wptr;
163+static wchar_t *wptr;
164
165 /* The words of a paragraph -- longer paragraphs are handled neatly
166 (cf. flush_paragraph()). */
167@@ -251,16 +275,16 @@
168 prefix (next_prefix_indent). See get_paragraph() and copy_rest(). */
169
170 /* The last character read from the input file. */
171-static int next_char;
172+static wint_t next_char;
173
174 /* The space before the trimmed prefix (or part of it) on the next line
175 after the current paragraph. */
176 static int next_prefix_indent;
177
178-/* If nonzero, the length of the last line output in the current
179+/* If nonzero, the width of the last line output in the current
180 paragraph, used to charge for raggedness at the split point for long
181 paragraphs chosen by fmt_paragraph(). */
182-static int last_line_length;
183+static int last_line_width;
184
185 void
186 usage (int status)
eb0f91a0 187@@ -288,7 +312,8 @@
188 stdout);
29623d34 189 fputs (_("\
190 -t, --tagged-paragraph indentation of first line different from second\n\
eb0f91a0 191- -u, --uniform-spacing one space between words, two after sentences\n\
192+ -u, --uniform-spacing one space between words, two between sentences\n\
193+ -n, --single-spaces single spaces between sentences\n\
29623d34 194 -w, --width=WIDTH maximum line width (default of 75 columns)\n\
195 "), stdout);
196 fputs (HELP_OPTION_DESCRIPTION, stdout);
197@@ -311,6 +336,7 @@
198 {"split-only", no_argument, NULL, 's'},
199 {"tagged-paragraph", no_argument, NULL, 't'},
200 {"uniform-spacing", no_argument, NULL, 'u'},
eb0f91a0 201+ {"single-spaces", no_argument, NULL, 'n'},
29623d34 202 {"width", required_argument, NULL, 'w'},
203 {GETOPT_HELP_OPTION_DECL},
204 {GETOPT_VERSION_OPTION_DECL},
eb0f91a0 205@@ -333,9 +359,10 @@
206 atexit (close_stdout);
29623d34 207
208 crown = tagged = split = uniform = false;
eb0f91a0 209+ sentence_space = 2;
29623d34 210 max_width = WIDTH;
211- prefix = "";
212- prefix_length = prefix_lead_space = prefix_full_length = 0;
213+ prefix = L"";
214+ prefix_width = prefix_lead_space = prefix_full_width = 0;
215
216 if (argc > 1 && argv[1][0] == '-' && ISDIGIT (argv[1][1]))
217 {
eb0f91a0 218@@ -348,7 +375,7 @@
29623d34 219 argc--;
220 }
221
222- while ((optchar = getopt_long (argc, argv, "0123456789cstuw:p:",
223+ while ((optchar = getopt_long (argc, argv, "0123456789cstunw:p:",
224 long_options, NULL))
225 != -1)
226 switch (optchar)
eb0f91a0 227@@ -376,6 +403,10 @@
29623d34 228 uniform = true;
29623d34 229 break;
230
eb0f91a0 231+ case 'n':
232+ sentence_space = 1;
233+ break;
234+
29623d34 235 case 'w':
eb0f91a0 236 max_width_option = optarg;
237 break;
238@@ -440,26 +471,32 @@
29623d34 239 }
240
241 /* Trim space from the front and back of the string P, yielding the prefix,
242- and record the lengths of the prefix and the space trimmed. */
243+ and record the widths of the prefix and the space trimmed. */
244
245 static void
246 set_prefix (char *p)
247 {
248- char *s;
249+ size_t len;
250+ wchar_t *s;
251
252 prefix_lead_space = 0;
253- while (*p == ' ')
254+ while (*p == L' ')
255 {
256 prefix_lead_space++;
257 p++;
258 }
259- prefix = p;
260- prefix_full_length = strlen (p);
261- s = p + prefix_full_length;
262- while (s > p && s[-1] == ' ')
263- s--;
264- *s = '\0';
265- prefix_length = s - p;
266+ len = mbsrtowcs (NULL, (const char **) &p, 0, NULL);
267+ prefix = xmalloc (len * sizeof (wchar_t));
268+ mbsrtowcs (prefix, (const char **) &p, len, NULL);
269+ for (s = prefix; *s; s++)
270+ prefix_full_width += xwcwidth (*s);
271+ prefix_width = prefix_full_width;
272+ while (s > prefix && s[-1] == L' ')
273+ {
274+ s--;
275+ prefix_width--;
276+ }
277+ *s = L'\0';
278 }
279
280 /* read file F and send formatted output to stdout. */
eb0f91a0 281@@ -528,24 +565,24 @@
29623d34 282 static bool
283 get_paragraph (FILE *f)
284 {
285- int c;
286+ wint_t c;
287
288- last_line_length = 0;
289+ last_line_width = 0;
290 c = next_char;
291
292 /* Scan (and copy) blank lines, and lines not introduced by the prefix. */
293
294- while (c == '\n' || c == EOF
295+ while (c == L'\n' || c == WEOF
296 || next_prefix_indent < prefix_lead_space
297- || in_column < next_prefix_indent + prefix_full_length)
298+ || in_column < next_prefix_indent + prefix_full_width)
299 {
300 c = copy_rest (f, c);
301- if (c == EOF)
302+ if (c == WEOF)
303 {
304- next_char = EOF;
305+ next_char = WEOF;
306 return false;
307 }
308- putchar ('\n');
309+ putwchar (L'\n');
310 c = get_prefix (f);
311 }
312
956567ad 313@@ -601,26 +638,26 @@
29623d34 314 that failed to match the prefix. In the latter, C is \n or EOF.
315 Return the character (\n or EOF) ending the line. */
316
317-static int
318-copy_rest (FILE *f, int c)
319+static wint_t
320+copy_rest (FILE *f, wint_t c)
321 {
322- const char *s;
323+ const wchar_t *s;
324
325 out_column = 0;
956567ad
JR
326- if (in_column > next_prefix_indent || (c != '\n' && c != EOF))
327+ if (in_column > next_prefix_indent || (c != L'\n' && c != WEOF))
29623d34 328 {
329 put_space (next_prefix_indent);
330 for (s = prefix; out_column != in_column && *s; out_column++)
331- putchar (*s++);
956567ad 332- if (c != EOF && c != '\n')
29623d34 333+ putwchar (*s++);
956567ad
JR
334+ if (c != WEOF && c != L'\n')
335 put_space (in_column - out_column);
336- if (c == EOF && in_column >= next_prefix_indent + prefix_length)
337- putchar ('\n');
338+ if (c == WEOF && in_column >= next_prefix_indent + prefix_length)
339+ putwchar (L'\n');
29623d34 340 }
341- while (c != '\n' && c != EOF)
342+ while (c != L'\n' && c != WEOF)
343 {
344- putchar (c);
345- c = getc (f);
346+ putwchar (c);
347+ c = xgetwc (f);
348 }
349 return c;
350 }
eb0f91a0 351@@ -627,11 +664,11 @@
29623d34 352 otherwise false. */
353
354 static bool
355-same_para (int c)
356+same_para (wint_t c)
357 {
358 return (next_prefix_indent == prefix_indent
359- && in_column >= next_prefix_indent + prefix_full_length
360- && c != '\n' && c != EOF);
361+ && in_column >= next_prefix_indent + prefix_full_width
362+ && c != L'\n' && c != WEOF);
363 }
364
365 /* Read a line from input file F, given first non-blank character C
eb0f91a0 366@@ -642,11 +679,11 @@
29623d34 367
368 Return the first non-blank character of the next line. */
369
370-static int
371-get_line (FILE *f, int c)
372+static wint_t
373+get_line (FILE *f, wint_t c)
374 {
375 int start;
376- char *end_of_parabuf;
377+ wchar_t *end_of_parabuf;
378 WORD *end_of_word;
379
380 end_of_parabuf = &parabuf[MAXCHARS];
eb0f91a0 381@@ -658,6 +695,7 @@
29623d34 382 /* Scan word. */
383
384 word_limit->text = wptr;
385+ word_limit->width = 0;
386 do
387 {
388 if (wptr == end_of_parabuf)
eb0f91a0 389@@ -666,10 +704,12 @@
29623d34 390 flush_paragraph ();
391 }
392 *wptr++ = c;
393- c = getc (f);
394+ word_limit->width += xwcwidth (c);
395+ c = xgetwc (f);
396 }
397- while (c != EOF && !isspace (c));
398- in_column += word_limit->length = wptr - word_limit->text;
399+ while (c != WEOF && !isspace (c));
400+ word_limit->length = wptr - word_limit->text;
401+ in_column += word_limit->width;
402 check_punctuation (word_limit);
403
404 /* Scan inter-word space. */
956567ad 405@@ -677,46 +717,46 @@
29623d34 406 start = in_column;
407 c = get_space (f, c);
408 word_limit->space = in_column - start;
409- word_limit->final = (c == EOF
410+ word_limit->final = (c == WEOF
411 || (word_limit->period
412- && (c == '\n' || word_limit->space > 1)));
413- if (c == '\n' || c == EOF || uniform)
414- word_limit->space = word_limit->final ? 2 : 1;
415+ && (c == L'\n' || word_limit->space > 1)));
416+ if (c == L'\n' || c == WEOF || uniform)
417+ word_limit->space = word_limit->final ? sentence_space : 1;
418 if (word_limit == end_of_word)
419 {
420 set_other_indent (true);
421 flush_paragraph ();
422 }
423 word_limit++;
29623d34 424 }
956567ad
JR
425- while (c != '\n' && c != EOF);
426+ while (c != L'\n' && c != WEOF);
29623d34 427 return get_prefix (f);
428 }
429
430 /* Read a prefix from input file F. Return either first non-matching
431 character, or first non-blank character after the prefix. */
432
433-static int
434+static wint_t
435 get_prefix (FILE *f)
436 {
437- int c;
438+ wint_t c;
439
440 in_column = 0;
441- c = get_space (f, getc (f));
442- if (prefix_length == 0)
443+ c = get_space (f, xgetwc (f));
444+ if (prefix_width == 0)
445 next_prefix_indent = prefix_lead_space < in_column ?
446 prefix_lead_space : in_column;
447 else
448 {
449- const char *p;
450+ const wchar_t *p;
451 next_prefix_indent = in_column;
452- for (p = prefix; *p != '\0'; p++)
453+ for (p = prefix; *p != L'\0'; p++)
454 {
455- unsigned char pc = *p;
456+ wchar_t pc = *p;
457 if (c != pc)
458 return c;
459 in_column++;
460- c = getc (f);
461+ c = xgetwc (f);
462 }
463 c = get_space (f, c);
464 }
eb0f91a0 465@@ -728,21 +768,21 @@
29623d34 466 /* Read blank characters from input file F, starting with C, and keeping
467 in_column up-to-date. Return first non-blank character. */
468
469-static int
470-get_space (FILE *f, int c)
471+static wint_t
472+get_space (FILE *f, wint_t c)
473 {
474 for (;;)
475 {
476- if (c == ' ')
477+ if (c == L' ')
478 in_column++;
479- else if (c == '\t')
480+ else if (c == L'\t')
481 {
482 tabs = true;
483 in_column = (in_column / TABWIDTH + 1) * TABWIDTH;
484 }
485 else
486 return c;
487- c = getc (f);
488+ c = xgetwc (f);
489 }
490 }
491
eb0f91a0 492@@ -751,9 +791,9 @@
29623d34 493 static void
494 check_punctuation (WORD *w)
495 {
496- char const *start = w->text;
497- char const *finish = start + (w->length - 1);
498- unsigned char fin = *finish;
499+ wchar_t const *start = w->text;
500+ wchar_t const *finish = start + (w->length - 1);
501+ wchar_t fin = *finish;
502
503 w->paren = isopen (*start);
504 w->punct = !! ispunct (fin);
eb0f91a0 505@@ -777,7 +817,9 @@
29623d34 506
507 if (word_limit == word)
508 {
509- fwrite (parabuf, sizeof *parabuf, wptr - parabuf, stdout);
510+ wchar_t *outptr;
511+ for (outptr = parabuf; outptr < wptr; outptr++)
512+ putwchar (*outptr);
513 wptr = parabuf;
514 return;
515 }
eb0f91a0 516@@ -809,7 +851,8 @@
29623d34 517 /* Copy text of words down to start of parabuf -- we use memmove because
518 the source and target may overlap. */
519
520- memmove (parabuf, split_point->text, wptr - split_point->text);
521+ memmove (parabuf, split_point->text,
522+ (wptr - split_point->text) * sizeof (wchar_t));
523 shift = split_point->text - parabuf;
524 wptr -= shift;
525
eb0f91a0 526@@ -833,53 +876,53 @@
29623d34 527 fmt_paragraph (void)
528 {
529 WORD *start, *w;
530- int len;
531+ int wid;
532 COST wcost, best;
533- int saved_length;
534+ int saved_width;
535
536 word_limit->best_cost = 0;
537- saved_length = word_limit->length;
538- word_limit->length = max_width; /* sentinel */
539+ saved_width = word_limit->width;
540+ word_limit->width = max_width; /* sentinel */
541
542 for (start = word_limit - 1; start >= word; start--)
543 {
544 best = MAXCOST;
545- len = start == word ? first_indent : other_indent;
546+ wid = start == word ? first_indent : other_indent;
547
548 /* At least one word, however long, in the line. */
549
550 w = start;
551- len += w->length;
552+ wid += w->width;
553 do
554 {
555 w++;
556
557 /* Consider breaking before w. */
558
559- wcost = line_cost (w, len) + w->best_cost;
560- if (start == word && last_line_length > 0)
561- wcost += RAGGED_COST (len - last_line_length);
562+ wcost = line_cost (w, wid) + w->best_cost;
563+ if (start == word && last_line_width > 0)
564+ wcost += RAGGED_COST (wid - last_line_width);
565 if (wcost < best)
566 {
567 best = wcost;
568 start->next_break = w;
569- start->line_length = len;
570+ start->line_width = wid;
571 }
572
573- /* This is a kludge to keep us from computing `len' as the
574- sum of the sentinel length and some non-zero number.
575- Since the sentinel w->length may be INT_MAX, adding
576+ /* This is a kludge to keep us from computing `wid' as the
577+ sum of the sentinel width and some non-zero number.
578+ Since the sentinel w->width may be INT_MAX, adding
579 to that would give a negative result. */
580 if (w == word_limit)
581 break;
582
583- len += (w - 1)->space + w->length; /* w > start >= word */
584+ wid += (w - 1)->space + w->width; /* w > start >= word */
585 }
586- while (len < max_width);
587+ while (wid < max_width);
588 start->best_cost = best + base_cost (start);
589 }
590
591- word_limit->length = saved_length;
592+ word_limit->width = saved_width;
593 }
594
595 /* Return the constant component of the cost of breaking before the
eb0f91a0 596@@ -904,33 +947,33 @@
29623d34 597 else if ((this - 1)->punct)
598 cost -= PUNCT_BONUS;
599 else if (this > word + 1 && (this - 2)->final)
600- cost += WIDOW_COST ((this - 1)->length);
601+ cost += WIDOW_COST ((this - 1)->width);
602 }
603
604 if (this->paren)
605 cost -= PAREN_BONUS;
606 else if (this->final)
607- cost += ORPHAN_COST (this->length);
608+ cost += ORPHAN_COST (this->width);
609
610 return cost;
611 }
612
613 /* Return the component of the cost of breaking before word NEXT that
614- depends on LEN, the length of the line beginning there. */
615+ depends on WID, the width of the line beginning there. */
616
617 static COST
618-line_cost (WORD *next, int len)
619+line_cost (WORD *next, int wid)
620 {
621 int n;
622 COST cost;
623
624 if (next == word_limit)
625 return 0;
626- n = best_width - len;
627+ n = best_width - wid;
628 cost = SHORT_COST (n);
629 if (next->next_break != word_limit)
630 {
631- n = len - next->line_length;
632+ n = wid - next->line_width;
633 cost += RAGGED_COST (n);
634 }
635 return cost;
eb0f91a0 636@@ -959,8 +1002,8 @@
29623d34 637
638 out_column = 0;
639 put_space (prefix_indent);
640- fputs (prefix, stdout);
641- out_column += prefix_length;
642+ fputws (prefix, stdout);
643+ out_column += prefix_width;
644 put_space (indent - out_column);
645
646 endline = w->next_break - 1;
eb0f91a0 647@@ -970,8 +1013,8 @@
29623d34 648 put_space (w->space);
649 }
650 put_word (w);
651- last_line_length = out_column;
652- putchar ('\n');
653+ last_line_width = out_column;
654+ putwchar (L'\n');
655 }
656
657 /* Output to stdout the word W. */
eb0f91a0 658@@ -979,13 +1022,13 @@
29623d34 659 static void
660 put_word (WORD *w)
661 {
662- const char *s;
663+ const wchar_t *s;
664 int n;
665
666 s = w->text;
667 for (n = w->length; n != 0; n--)
668- putchar (*s++);
669- out_column += w->length;
670+ putwchar (*s++);
671+ out_column += w->width;
672 }
673
674 /* Output to stdout SPACE spaces, or equivalent tabs. */
eb0f91a0 675@@ -1002,13 +1045,13 @@
29623d34 676 if (out_column + 1 < tab_target)
677 while (out_column < tab_target)
678 {
679- putchar ('\t');
680+ putwchar (L'\t');
681 out_column = (out_column / TABWIDTH + 1) * TABWIDTH;
682 }
683 }
684 while (out_column < space_target)
685 {
686- putchar (' ');
687+ putwchar (L' ');
688 out_column++;
689 }
690 }
eb0f91a0 691--- coreutils-6.7/po/pl.po~ 2007-02-13 17:23:15.000000000 +0100
692+++ coreutils-6.7/po/pl.po 2007-02-13 17:32:43.000000000 +0100
693@@ -3788,12 +3788,14 @@
694 #: src/fmt.c:289
695 msgid ""
696 " -t, --tagged-paragraph indentation of first line different from second\n"
697-" -u, --uniform-spacing one space between words, two after sentences\n"
698+" -u, --uniform-spacing one space between words, two between sentences\n"
699+" -n, --single-spaces single spaces between sentences\n"
700 " -w, --width=WIDTH maximum line width (default of 75 columns)\n"
701 msgstr ""
702