1 --- coreutils-6.7/src/fmt.c.orig 2006-10-22 18:54:15.000000000 +0200
2 +++ coreutils-6.7/src/fmt.c 2007-02-13 17:20:22.000000000 +0100
4 /* Written by Ross Paterson <rap@doc.ic.ac.uk>. */
12 /* The following parameters represent the program's idea of what is
13 "best". Adjust to taste, subject to the caveats given. */
15 -/* Default longest permitted line length (max_width). */
16 +/* Default longest permitted line width (max_width). */
19 /* Prefer lines to be LEEWAY % shorter than the maximum width, giving
23 /* Costs and bonuses are expressed as the equivalent departure from the
24 - optimal line length, multiplied by 10. e.g. assigning something a
25 + optimal line width, multiplied by 10. e.g. assigning something a
26 cost of 50 means that it is as bad as a line 5 characters too short
27 or too long. The definition of SHORT_COST(n) should not be changed.
28 However, EQUIV(n) may need tuning. */
30 #define LINE_COST EQUIV (70)
32 /* Cost of breaking a line after the first word of a sentence, where
33 - the length of the word is N. */
34 + the width of the word is N. */
35 #define WIDOW_COST(n) (EQUIV (200) / ((n) + 2))
37 /* Cost of breaking a line before the last word of a sentence, where
38 - the length of the word is N. */
39 + the width of the word is N. */
40 #define ORPHAN_COST(n) (EQUIV (150) / ((n) + 2))
42 /* Bonus for breaking a line at the end of a sentence. */
47 +/* Wide character support */
50 +xgetwc (FILE *stream)
52 + wint_t c = getwc (stream);
53 + if (c == WEOF && ferror (stream))
54 + error (EXIT_FAILURE, errno, _("read error"));
59 +xwcwidth (wchar_t wc)
61 + int w = wcwidth (wc);
62 + return w < 0 ? 0 : w;
65 /* Extra ctype(3)-style macros. */
67 -#define isopen(c) (strchr ("([`'\"", c) != NULL)
68 -#define isclose(c) (strchr (")]'\"", c) != NULL)
69 -#define isperiod(c) (strchr (".?!", c) != NULL)
71 + (wcschr (L"([`'\"\u2018\u201A\u201B\u201C\u201E\u201F", c) != NULL)
72 +#define isclose(c) (wcschr (L")]'\"\u2018\u2019\u201C\u201D", c) != NULL)
73 +#define isperiod(c) (wcschr (L".?!", c) != NULL)
75 /* Size of a tab stop, for expansion on input and re-introduction on
79 /* Static attributes determined during input. */
81 - const char *text; /* the text of the word */
82 - int length; /* length of this word */
83 + const wchar_t *text; /* the text of the word */
84 + int length; /* length of this word, in characters */
85 + int width; /* width of this word, in columns */
86 int space; /* the size of the following space */
87 unsigned int paren:1; /* starts with open paren */
88 unsigned int period:1; /* ends in [.?!])* */
91 /* The remaining fields are computed during the optimization. */
93 - int line_length; /* length of the best line starting here */
94 + int line_width; /* width of the best line starting here */
95 COST best_cost; /* cost of best paragraph starting here */
96 WORD *next_break; /* break which achieves best_cost */
99 static void set_prefix (char *p);
100 static void fmt (FILE *f);
101 static bool get_paragraph (FILE *f);
102 -static int get_line (FILE *f, int c);
103 -static int get_prefix (FILE *f);
104 -static int get_space (FILE *f, int c);
105 -static int copy_rest (FILE *f, int c);
106 -static bool same_para (int c);
107 +static wint_t get_line (FILE *f, wint_t c);
108 +static wint_t get_prefix (FILE *f);
109 +static wint_t get_space (FILE *f, wint_t c);
110 +static wint_t copy_rest (FILE *f, wint_t c);
111 +static bool same_para (wint_t c);
112 static void flush_paragraph (void);
113 static void fmt_paragraph (void);
114 static void check_punctuation (WORD *w);
115 static COST base_cost (WORD *this);
116 -static COST line_cost (WORD *next, int len);
117 +static COST line_cost (WORD *next, int wid);
118 static void put_paragraph (WORD *finish);
119 static void put_line (WORD *w, int indent);
120 static void put_word (WORD *w);
122 /* If true, don't preserve inter-word spacing (default false). */
125 +/* How many spaces to put after a sentence (1 or 2). */
126 +static int sentence_space;
128 /* Prefix minus leading and trailing spaces (default ""). */
129 -static const char *prefix;
130 +static wchar_t *prefix;
132 /* User-supplied maximum line width (default WIDTH). The only output
133 lines longer than this will each comprise a single word. */
134 @@ -194,14 +218,14 @@
136 /* Values derived from the option values. */
138 -/* The length of prefix minus leading space. */
139 -static int prefix_full_length;
140 +/* The width of prefix minus leading space. */
141 +static int prefix_full_width;
143 -/* The length of the leading space trimmed from the prefix. */
144 +/* The width of the leading space trimmed from the prefix. */
145 static int prefix_lead_space;
147 -/* The length of prefix minus leading and trailing space. */
148 -static int prefix_length;
149 +/* The width of prefix minus leading and trailing space. */
150 +static int prefix_width;
152 /* The preferred width of text lines, set to LEEWAY % less than max_width. */
153 static int best_width;
154 @@ -216,10 +240,10 @@
156 /* Space for the paragraph text -- longer paragraphs are handled neatly
157 (cf. flush_paragraph()). */
158 -static char parabuf[MAXCHARS];
159 +static wchar_t parabuf[MAXCHARS];
161 /* A pointer into parabuf, indicating the first unused character position. */
163 +static wchar_t *wptr;
165 /* The words of a paragraph -- longer paragraphs are handled neatly
166 (cf. flush_paragraph()). */
167 @@ -251,16 +275,16 @@
168 prefix (next_prefix_indent). See get_paragraph() and copy_rest(). */
170 /* The last character read from the input file. */
171 -static int next_char;
172 +static wint_t next_char;
174 /* The space before the trimmed prefix (or part of it) on the next line
175 after the current paragraph. */
176 static int next_prefix_indent;
178 -/* If nonzero, the length of the last line output in the current
179 +/* If nonzero, the width of the last line output in the current
180 paragraph, used to charge for raggedness at the split point for long
181 paragraphs chosen by fmt_paragraph(). */
182 -static int last_line_length;
183 +static int last_line_width;
190 -t, --tagged-paragraph indentation of first line different from second\n\
191 - -u, --uniform-spacing one space between words, two after sentences\n\
192 + -u, --uniform-spacing one space between words, two between sentences\n\
193 + -n, --single-spaces single spaces between sentences\n\
194 -w, --width=WIDTH maximum line width (default of 75 columns)\n\
196 fputs (HELP_OPTION_DESCRIPTION, stdout);
198 {"split-only", no_argument, NULL, 's'},
199 {"tagged-paragraph", no_argument, NULL, 't'},
200 {"uniform-spacing", no_argument, NULL, 'u'},
201 + {"single-spaces", no_argument, NULL, 'n'},
202 {"width", required_argument, NULL, 'w'},
203 {GETOPT_HELP_OPTION_DECL},
204 {GETOPT_VERSION_OPTION_DECL},
206 atexit (close_stdout);
208 crown = tagged = split = uniform = false;
209 + sentence_space = 2;
212 - prefix_length = prefix_lead_space = prefix_full_length = 0;
214 + prefix_width = prefix_lead_space = prefix_full_width = 0;
216 if (argc > 1 && argv[1][0] == '-' && ISDIGIT (argv[1][1]))
222 - while ((optchar = getopt_long (argc, argv, "0123456789cstuw:p:",
223 + while ((optchar = getopt_long (argc, argv, "0123456789cstunw:p:",
232 + sentence_space = 1;
236 max_width_option = optarg;
238 @@ -440,26 +471,32 @@
241 /* Trim space from the front and back of the string P, yielding the prefix,
242 - and record the lengths of the prefix and the space trimmed. */
243 + and record the widths of the prefix and the space trimmed. */
252 prefix_lead_space = 0;
260 - prefix_full_length = strlen (p);
261 - s = p + prefix_full_length;
262 - while (s > p && s[-1] == ' ')
265 - prefix_length = s - p;
266 + len = mbsrtowcs (NULL, (const char **) &p, 0, NULL);
267 + prefix = xmalloc (len * sizeof (wchar_t));
268 + mbsrtowcs (prefix, (const char **) &p, len, NULL);
269 + for (s = prefix; *s; s++)
270 + prefix_full_width += xwcwidth (*s);
271 + prefix_width = prefix_full_width;
272 + while (s > prefix && s[-1] == L' ')
280 /* read file F and send formatted output to stdout. */
281 @@ -528,24 +565,24 @@
283 get_paragraph (FILE *f)
288 - last_line_length = 0;
289 + last_line_width = 0;
292 /* Scan (and copy) blank lines, and lines not introduced by the prefix. */
294 - while (c == '\n' || c == EOF
295 + while (c == L'\n' || c == WEOF
296 || next_prefix_indent < prefix_lead_space
297 - || in_column < next_prefix_indent + prefix_full_length)
298 + || in_column < next_prefix_indent + prefix_full_width)
300 c = copy_rest (f, c);
313 @@ -601,26 +638,26 @@
314 that failed to match the prefix. In the latter, C is \n or EOF.
315 Return the character (\n or EOF) ending the line. */
318 -copy_rest (FILE *f, int c)
320 +copy_rest (FILE *f, wint_t c)
326 - if (in_column > next_prefix_indent || (c != '\n' && c != EOF))
327 + if (in_column > next_prefix_indent || (c != L'\n' && c != WEOF))
329 put_space (next_prefix_indent);
330 for (s = prefix; out_column != in_column && *s; out_column++)
332 - if (c != EOF && c != '\n')
334 + if (c != WEOF && c != L'\n')
335 put_space (in_column - out_column);
336 - if (c == EOF && in_column >= next_prefix_indent + prefix_length)
338 + if (c == WEOF && in_column >= next_prefix_indent + prefix_width)
341 - while (c != '\n' && c != EOF)
342 + while (c != L'\n' && c != WEOF)
351 @@ -627,11 +664,11 @@
356 +same_para (wint_t c)
358 return (next_prefix_indent == prefix_indent
359 - && in_column >= next_prefix_indent + prefix_full_length
360 - && c != '\n' && c != EOF);
361 + && in_column >= next_prefix_indent + prefix_full_width
362 + && c != L'\n' && c != WEOF);
365 /* Read a line from input file F, given first non-blank character C
366 @@ -642,11 +679,11 @@
368 Return the first non-blank character of the next line. */
371 -get_line (FILE *f, int c)
373 +get_line (FILE *f, wint_t c)
376 - char *end_of_parabuf;
377 + wchar_t *end_of_parabuf;
380 end_of_parabuf = ¶buf[MAXCHARS];
384 word_limit->text = wptr;
385 + word_limit->width = 0;
388 if (wptr == end_of_parabuf)
389 @@ -666,10 +704,12 @@
394 + word_limit->width += xwcwidth (c);
397 - while (c != EOF && !isspace (c));
398 - in_column += word_limit->length = wptr - word_limit->text;
399 + while (c != WEOF && !isspace (c));
400 + word_limit->length = wptr - word_limit->text;
401 + in_column += word_limit->width;
402 check_punctuation (word_limit);
404 /* Scan inter-word space. */
405 @@ -677,46 +717,46 @@
407 c = get_space (f, c);
408 word_limit->space = in_column - start;
409 - word_limit->final = (c == EOF
410 + word_limit->final = (c == WEOF
411 || (word_limit->period
412 - && (c == '\n' || word_limit->space > 1)));
413 - if (c == '\n' || c == EOF || uniform)
414 - word_limit->space = word_limit->final ? 2 : 1;
415 + && (c == L'\n' || word_limit->space > 1)));
416 + if (c == L'\n' || c == WEOF || uniform)
417 + word_limit->space = word_limit->final ? sentence_space : 1;
418 if (word_limit == end_of_word)
420 set_other_indent (true);
425 - while (c != '\n' && c != EOF);
426 + while (c != L'\n' && c != WEOF);
427 return get_prefix (f);
430 /* Read a prefix from input file F. Return either first non-matching
431 character, or first non-blank character after the prefix. */
441 - c = get_space (f, getc (f));
442 - if (prefix_length == 0)
443 + c = get_space (f, xgetwc (f));
444 + if (prefix_width == 0)
445 next_prefix_indent = prefix_lead_space < in_column ?
446 prefix_lead_space : in_column;
451 next_prefix_indent = in_column;
452 - for (p = prefix; *p != '\0'; p++)
453 + for (p = prefix; *p != L'\0'; p++)
455 - unsigned char pc = *p;
463 c = get_space (f, c);
465 @@ -728,21 +768,21 @@
466 /* Read blank characters from input file F, starting with C, and keeping
467 in_column up-to-date. Return first non-blank character. */
470 -get_space (FILE *f, int c)
472 +get_space (FILE *f, wint_t c)
479 - else if (c == '\t')
480 + else if (c == L'\t')
483 in_column = (in_column / TABWIDTH + 1) * TABWIDTH;
494 check_punctuation (WORD *w)
496 - char const *start = w->text;
497 - char const *finish = start + (w->length - 1);
498 - unsigned char fin = *finish;
499 + wchar_t const *start = w->text;
500 + wchar_t const *finish = start + (w->length - 1);
501 + wchar_t fin = *finish;
503 w->paren = isopen (*start);
504 w->punct = !! ispunct (fin);
507 if (word_limit == word)
509 - fwrite (parabuf, sizeof *parabuf, wptr - parabuf, stdout);
511 + for (outptr = parabuf; outptr < wptr; outptr++)
512 + putwchar (*outptr);
517 /* Copy text of words down to start of parabuf -- we use memmove because
518 the source and target may overlap. */
520 - memmove (parabuf, split_point->text, wptr - split_point->text);
521 + memmove (parabuf, split_point->text,
522 + (wptr - split_point->text) * sizeof (wchar_t));
523 shift = split_point->text - parabuf;
526 @@ -833,53 +876,53 @@
536 word_limit->best_cost = 0;
537 - saved_length = word_limit->length;
538 - word_limit->length = max_width; /* sentinel */
539 + saved_width = word_limit->width;
540 + word_limit->width = max_width; /* sentinel */
542 for (start = word_limit - 1; start >= word; start--)
545 - len = start == word ? first_indent : other_indent;
546 + wid = start == word ? first_indent : other_indent;
548 /* At least one word, however long, in the line. */
557 /* Consider breaking before w. */
559 - wcost = line_cost (w, len) + w->best_cost;
560 - if (start == word && last_line_length > 0)
561 - wcost += RAGGED_COST (len - last_line_length);
562 + wcost = line_cost (w, wid) + w->best_cost;
563 + if (start == word && last_line_width > 0)
564 + wcost += RAGGED_COST (wid - last_line_width);
568 start->next_break = w;
569 - start->line_length = len;
570 + start->line_width = wid;
573 - /* This is a kludge to keep us from computing `len' as the
574 - sum of the sentinel length and some non-zero number.
575 - Since the sentinel w->length may be INT_MAX, adding
576 + /* This is a kludge to keep us from computing `wid' as the
577 + sum of the sentinel width and some non-zero number.
578 + Since the sentinel w->width may be INT_MAX, adding
579 to that would give a negative result. */
583 - len += (w - 1)->space + w->length; /* w > start >= word */
584 + wid += (w - 1)->space + w->width; /* w > start >= word */
586 - while (len < max_width);
587 + while (wid < max_width);
588 start->best_cost = best + base_cost (start);
591 - word_limit->length = saved_length;
592 + word_limit->width = saved_width;
595 /* Return the constant component of the cost of breaking before the
596 @@ -904,33 +947,33 @@
597 else if ((this - 1)->punct)
599 else if (this > word + 1 && (this - 2)->final)
600 - cost += WIDOW_COST ((this - 1)->length);
601 + cost += WIDOW_COST ((this - 1)->width);
606 else if (this->final)
607 - cost += ORPHAN_COST (this->length);
608 + cost += ORPHAN_COST (this->width);
613 /* Return the component of the cost of breaking before word NEXT that
614 - depends on LEN, the length of the line beginning there. */
615 + depends on WID, the width of the line beginning there. */
618 -line_cost (WORD *next, int len)
619 +line_cost (WORD *next, int wid)
624 if (next == word_limit)
626 - n = best_width - len;
627 + n = best_width - wid;
628 cost = SHORT_COST (n);
629 if (next->next_break != word_limit)
631 - n = len - next->line_length;
632 + n = wid - next->line_width;
633 cost += RAGGED_COST (n);
639 put_space (prefix_indent);
640 - fputs (prefix, stdout);
641 - out_column += prefix_length;
642 + fputws (prefix, stdout);
643 + out_column += prefix_width;
644 put_space (indent - out_column);
646 endline = w->next_break - 1;
648 put_space (w->space);
651 - last_line_length = out_column;
653 + last_line_width = out_column;
657 /* Output to stdout the word W. */
658 @@ -979,13 +1022,13 @@
667 for (n = w->length; n != 0; n--)
669 - out_column += w->length;
671 + out_column += w->width;
674 /* Output to stdout SPACE spaces, or equivalent tabs. */
675 @@ -1002,13 +1045,13 @@
676 if (out_column + 1 < tab_target)
677 while (out_column < tab_target)
681 out_column = (out_column / TABWIDTH + 1) * TABWIDTH;
684 while (out_column < space_target)
691 --- coreutils-6.7/po/pl.po~ 2007-02-13 17:23:15.000000000 +0100
692 +++ coreutils-6.7/po/pl.po 2007-02-13 17:32:43.000000000 +0100
693 @@ -3788,12 +3788,14 @@
696 " -t, --tagged-paragraph indentation of first line different from second\n"
697 -" -u, --uniform-spacing one space between words, two after sentences\n"
698 +" -u, --uniform-spacing one space between words, two between sentences\n"
699 +" -n, --single-spaces single spaces between sentences\n"
700 " -w, --width=WIDTH maximum line width (default of 75 columns)\n"
702 " -t, --tagged-paragraph wcięcie pierwszej linii inne niż drugiej\n"
703 " -u, --uniform-spacing jedna spacja między słowami, dwie między "
705 +" -n, --single-spaces pojedyncze spacje między zdaniami\n"
706 " -w, --width=ILE maksymalna szerokość linii (domyślnie 75 "