]> git.pld-linux.org Git - packages/coreutils.git/blame - coreutils-fmt-wchars.patch
outdated
[packages/coreutils.git] / coreutils-fmt-wchars.patch
CommitLineData
29623d34 1--- coreutils-6.7/src/fmt.c.orig 2006-10-22 18:54:15.000000000 +0200
eb0f91a0 2+++ coreutils-6.7/src/fmt.c 2007-02-13 17:20:22.000000000 +0100
29623d34 3@@ -18,6 +18,7 @@
4 /* Written by Ross Paterson <rap@doc.ic.ac.uk>. */
5
6 #include <config.h>
7+#include <wchar.h>
8 #include <stdio.h>
9 #include <sys/types.h>
10 #include <getopt.h>
11@@ -39,7 +40,7 @@
12 /* The following parameters represent the program's idea of what is
13 "best". Adjust to taste, subject to the caveats given. */
14
15-/* Default longest permitted line length (max_width). */
16+/* Default longest permitted line width (max_width). */
17 #define WIDTH 75
18
19 /* Prefer lines to be LEEWAY % shorter than the maximum width, giving
20@@ -51,7 +52,7 @@
21 #define DEF_INDENT 3
22
23 /* Costs and bonuses are expressed as the equivalent departure from the
24- optimal line length, multiplied by 10. e.g. assigning something a
25+ optimal line width, multiplied by 10. e.g. assigning something a
26 cost of 50 means that it is as bad as a line 5 characters too short
27 or too long. The definition of SHORT_COST(n) should not be changed.
28 However, EQUIV(n) may need tuning. */
29@@ -78,11 +79,11 @@
30 #define LINE_COST EQUIV (70)
31
32 /* Cost of breaking a line after the first word of a sentence, where
33- the length of the word is N. */
34+ the width of the word is N. */
35 #define WIDOW_COST(n) (EQUIV (200) / ((n) + 2))
36
37 /* Cost of breaking a line before the last word of a sentence, where
38- the length of the word is N. */
39+ the width of the word is N. */
40 #define ORPHAN_COST(n) (EQUIV (150) / ((n) + 2))
41
42 /* Bonus for breaking a line at the end of a sentence. */
43@@ -114,11 +115,30 @@
44 #define MAXWORDS 1000
45 #define MAXCHARS 5000
46
47+/* Wide character support */
48+
49+static wint_t
50+xgetwc (FILE *stream)
51+{
52+ wint_t c = getwc (stream);
53+ if (c == WEOF && ferror (stream))
54+ error (EXIT_FAILURE, errno, _("read error"));
55+ return c;
56+}
57+
58+static inline int
59+xwcwidth (wchar_t wc)
60+{
61+ int w = wcwidth (wc);
62+ return w < 0 ? 0 : w;
63+}
64+
65 /* Extra ctype(3)-style macros. */
66
67-#define isopen(c) (strchr ("([`'\"", c) != NULL)
68-#define isclose(c) (strchr (")]'\"", c) != NULL)
69-#define isperiod(c) (strchr (".?!", c) != NULL)
70+#define isopen(c) \
71+ (wcschr (L"([`'\"\u2018\u201A\u201B\u201C\u201E\u201F", c) != NULL)
72+#define isclose(c) (wcschr (L")]'\"\u2018\u2019\u201C\u201D", c) != NULL)
73+#define isperiod(c) (wcschr (L".?!", c) != NULL)
74
75 /* Size of a tab stop, for expansion on input and re-introduction on
76 output. */
77@@ -133,8 +153,9 @@
78
79 /* Static attributes determined during input. */
80
81- const char *text; /* the text of the word */
82- int length; /* length of this word */
83+ const wchar_t *text; /* the text of the word */
84+ int length; /* length of this word, in characters */
85+ int width; /* width of this word, in columns */
86 int space; /* the size of the following space */
87 unsigned int paren:1; /* starts with open paren */
88 unsigned int period:1; /* ends in [.?!])* */
89@@ -143,7 +164,7 @@
90
91 /* The remaining fields are computed during the optimization. */
92
93- int line_length; /* length of the best line starting here */
94+ int line_width; /* width of the best line starting here */
95 COST best_cost; /* cost of best paragraph starting here */
96 WORD *next_break; /* break which achieves best_cost */
97 };
98@@ -153,16 +174,16 @@
99 static void set_prefix (char *p);
100 static void fmt (FILE *f);
101 static bool get_paragraph (FILE *f);
102-static int get_line (FILE *f, int c);
103-static int get_prefix (FILE *f);
104-static int get_space (FILE *f, int c);
105-static int copy_rest (FILE *f, int c);
106-static bool same_para (int c);
107+static wint_t get_line (FILE *f, wint_t c);
108+static wint_t get_prefix (FILE *f);
109+static wint_t get_space (FILE *f, wint_t c);
110+static wint_t copy_rest (FILE *f, wint_t c);
111+static bool same_para (wint_t c);
112 static void flush_paragraph (void);
113 static void fmt_paragraph (void);
114 static void check_punctuation (WORD *w);
115 static COST base_cost (WORD *this);
116-static COST line_cost (WORD *next, int len);
117+static COST line_cost (WORD *next, int wid);
118 static void put_paragraph (WORD *finish);
119 static void put_line (WORD *w, int indent);
120 static void put_word (WORD *w);
121@@ -185,8 +206,11 @@
122 /* If true, don't preserve inter-word spacing (default false). */
123 static bool uniform;
124
125+/* How many spaces to put after a sentence (1 or 2). */
126+static int sentence_space;
127+
128 /* Prefix minus leading and trailing spaces (default ""). */
129-static const char *prefix;
130+static wchar_t *prefix;
131
132 /* User-supplied maximum line width (default WIDTH). The only output
133 lines longer than this will each comprise a single word. */
134@@ -194,14 +218,14 @@
135
136 /* Values derived from the option values. */
137
138-/* The length of prefix minus leading space. */
139-static int prefix_full_length;
140+/* The width of prefix minus leading space. */
141+static int prefix_full_width;
142
143-/* The length of the leading space trimmed from the prefix. */
144+/* The width of the leading space trimmed from the prefix. */
145 static int prefix_lead_space;
146
147-/* The length of prefix minus leading and trailing space. */
148-static int prefix_length;
149+/* The width of prefix minus leading and trailing space. */
150+static int prefix_width;
151
152 /* The preferred width of text lines, set to LEEWAY % less than max_width. */
153 static int best_width;
154@@ -216,10 +240,10 @@
155
156 /* Space for the paragraph text -- longer paragraphs are handled neatly
157 (cf. flush_paragraph()). */
158-static char parabuf[MAXCHARS];
159+static wchar_t parabuf[MAXCHARS];
160
161 /* A pointer into parabuf, indicating the first unused character position. */
162-static char *wptr;
163+static wchar_t *wptr;
164
165 /* The words of a paragraph -- longer paragraphs are handled neatly
166 (cf. flush_paragraph()). */
167@@ -251,16 +275,16 @@
168 prefix (next_prefix_indent). See get_paragraph() and copy_rest(). */
169
170 /* The last character read from the input file. */
171-static int next_char;
172+static wint_t next_char;
173
174 /* The space before the trimmed prefix (or part of it) on the next line
175 after the current paragraph. */
176 static int next_prefix_indent;
177
178-/* If nonzero, the length of the last line output in the current
179+/* If nonzero, the width of the last line output in the current
180 paragraph, used to charge for raggedness at the split point for long
181 paragraphs chosen by fmt_paragraph(). */
182-static int last_line_length;
183+static int last_line_width;
184
185 void
186 usage (int status)
eb0f91a0 187@@ -288,7 +312,8 @@
188 stdout);
29623d34 189 fputs (_("\
190 -t, --tagged-paragraph indentation of first line different from second\n\
eb0f91a0 191- -u, --uniform-spacing one space between words, two after sentences\n\
192+ -u, --uniform-spacing one space between words, two between sentences\n\
193+ -n, --single-spaces single spaces between sentences\n\
29623d34 194 -w, --width=WIDTH maximum line width (default of 75 columns)\n\
195 "), stdout);
196 fputs (HELP_OPTION_DESCRIPTION, stdout);
197@@ -311,6 +336,7 @@
198 {"split-only", no_argument, NULL, 's'},
199 {"tagged-paragraph", no_argument, NULL, 't'},
200 {"uniform-spacing", no_argument, NULL, 'u'},
eb0f91a0 201+ {"single-spaces", no_argument, NULL, 'n'},
29623d34 202 {"width", required_argument, NULL, 'w'},
203 {GETOPT_HELP_OPTION_DECL},
204 {GETOPT_VERSION_OPTION_DECL},
eb0f91a0 205@@ -333,9 +359,10 @@
206 atexit (close_stdout);
29623d34 207
208 crown = tagged = split = uniform = false;
eb0f91a0 209+ sentence_space = 2;
29623d34 210 max_width = WIDTH;
211- prefix = "";
212- prefix_length = prefix_lead_space = prefix_full_length = 0;
213+ prefix = L"";
214+ prefix_width = prefix_lead_space = prefix_full_width = 0;
215
216 if (argc > 1 && argv[1][0] == '-' && ISDIGIT (argv[1][1]))
217 {
eb0f91a0 218@@ -348,7 +375,7 @@
29623d34 219 argc--;
220 }
221
222- while ((optchar = getopt_long (argc, argv, "0123456789cstuw:p:",
223+ while ((optchar = getopt_long (argc, argv, "0123456789cstunw:p:",
224 long_options, NULL))
225 != -1)
226 switch (optchar)
eb0f91a0 227@@ -376,6 +403,10 @@
29623d34 228 uniform = true;
29623d34 229 break;
230
eb0f91a0 231+ case 'n':
232+ sentence_space = 1;
233+ break;
234+
29623d34 235 case 'w':
eb0f91a0 236 max_width_option = optarg;
237 break;
238@@ -440,26 +471,32 @@
29623d34 239 }
240
241 /* Trim space from the front and back of the string P, yielding the prefix,
242- and record the lengths of the prefix and the space trimmed. */
243+ and record the widths of the prefix and the space trimmed. */
244
245 static void
246 set_prefix (char *p)
247 {
248- char *s;
249+ size_t len;
250+ wchar_t *s;
251
252 prefix_lead_space = 0;
253- while (*p == ' ')
254+ while (*p == L' ')
255 {
256 prefix_lead_space++;
257 p++;
258 }
259- prefix = p;
260- prefix_full_length = strlen (p);
261- s = p + prefix_full_length;
262- while (s > p && s[-1] == ' ')
263- s--;
264- *s = '\0';
265- prefix_length = s - p;
266+ len = mbsrtowcs (NULL, (const char **) &p, 0, NULL);
267+ prefix = xmalloc (len * sizeof (wchar_t));
268+ mbsrtowcs (prefix, (const char **) &p, len, NULL);
269+ for (s = prefix; *s; s++)
270+ prefix_full_width += xwcwidth (*s);
271+ prefix_width = prefix_full_width;
272+ while (s > prefix && s[-1] == L' ')
273+ {
274+ s--;
275+ prefix_width--;
276+ }
277+ *s = L'\0';
278 }
279
280 /* read file F and send formatted output to stdout. */
eb0f91a0 281@@ -528,24 +565,24 @@
29623d34 282 static bool
283 get_paragraph (FILE *f)
284 {
285- int c;
286+ wint_t c;
287
288- last_line_length = 0;
289+ last_line_width = 0;
290 c = next_char;
291
292 /* Scan (and copy) blank lines, and lines not introduced by the prefix. */
293
294- while (c == '\n' || c == EOF
295+ while (c == L'\n' || c == WEOF
296 || next_prefix_indent < prefix_lead_space
297- || in_column < next_prefix_indent + prefix_full_length)
298+ || in_column < next_prefix_indent + prefix_full_width)
299 {
300 c = copy_rest (f, c);
301- if (c == EOF)
302+ if (c == WEOF)
303 {
304- next_char = EOF;
305+ next_char = WEOF;
306 return false;
307 }
308- putchar ('\n');
309+ putwchar (L'\n');
310 c = get_prefix (f);
311 }
312
eb0f91a0 313@@ -601,23 +638,23 @@
29623d34 314 that failed to match the prefix. In the latter, C is \n or EOF.
315 Return the character (\n or EOF) ending the line. */
316
317-static int
318-copy_rest (FILE *f, int c)
319+static wint_t
320+copy_rest (FILE *f, wint_t c)
321 {
322- const char *s;
323+ const wchar_t *s;
324
325 out_column = 0;
326- if (in_column > next_prefix_indent && c != '\n' && c != EOF)
327+ if (in_column > next_prefix_indent && c != L'\n' && c != WEOF)
328 {
329 put_space (next_prefix_indent);
330 for (s = prefix; out_column != in_column && *s; out_column++)
331- putchar (*s++);
332+ putwchar (*s++);
333 put_space (in_column - out_column);
334 }
335- while (c != '\n' && c != EOF)
336+ while (c != L'\n' && c != WEOF)
337 {
338- putchar (c);
339- c = getc (f);
340+ putwchar (c);
341+ c = xgetwc (f);
342 }
343 return c;
344 }
eb0f91a0 345@@ -627,11 +664,11 @@
29623d34 346 otherwise false. */
347
348 static bool
349-same_para (int c)
350+same_para (wint_t c)
351 {
352 return (next_prefix_indent == prefix_indent
353- && in_column >= next_prefix_indent + prefix_full_length
354- && c != '\n' && c != EOF);
355+ && in_column >= next_prefix_indent + prefix_full_width
356+ && c != L'\n' && c != WEOF);
357 }
358
359 /* Read a line from input file F, given first non-blank character C
eb0f91a0 360@@ -642,11 +679,11 @@
29623d34 361
362 Return the first non-blank character of the next line. */
363
364-static int
365-get_line (FILE *f, int c)
366+static wint_t
367+get_line (FILE *f, wint_t c)
368 {
369 int start;
370- char *end_of_parabuf;
371+ wchar_t *end_of_parabuf;
372 WORD *end_of_word;
373
374 end_of_parabuf = &parabuf[MAXCHARS];
eb0f91a0 375@@ -658,6 +695,7 @@
29623d34 376 /* Scan word. */
377
378 word_limit->text = wptr;
379+ word_limit->width = 0;
380 do
381 {
382 if (wptr == end_of_parabuf)
eb0f91a0 383@@ -666,10 +704,12 @@
29623d34 384 flush_paragraph ();
385 }
386 *wptr++ = c;
387- c = getc (f);
388+ word_limit->width += xwcwidth (c);
389+ c = xgetwc (f);
390 }
391- while (c != EOF && !isspace (c));
392- in_column += word_limit->length = wptr - word_limit->text;
393+ while (c != WEOF && !isspace (c));
394+ word_limit->length = wptr - word_limit->text;
395+ in_column += word_limit->width;
396 check_punctuation (word_limit);
397
398 /* Scan inter-word space. */
eb0f91a0 399@@ -677,48 +717,48 @@
29623d34 400 start = in_column;
401 c = get_space (f, c);
402 word_limit->space = in_column - start;
403- word_limit->final = (c == EOF
404+ word_limit->final = (c == WEOF
405 || (word_limit->period
406- && (c == '\n' || word_limit->space > 1)));
407- if (c == '\n' || c == EOF || uniform)
408- word_limit->space = word_limit->final ? 2 : 1;
409+ && (c == L'\n' || word_limit->space > 1)));
410+ if (c == L'\n' || c == WEOF || uniform)
411+ word_limit->space = word_limit->final ? sentence_space : 1;
412 if (word_limit == end_of_word)
413 {
414 set_other_indent (true);
415 flush_paragraph ();
416 }
417 word_limit++;
418- if (c == EOF)
419- return EOF;
420+ if (c == WEOF)
421+ return WEOF;
422 }
423- while (c != '\n');
424+ while (c != L'\n');
425 return get_prefix (f);
426 }
427
428 /* Read a prefix from input file F. Return either first non-matching
429 character, or first non-blank character after the prefix. */
430
431-static int
432+static wint_t
433 get_prefix (FILE *f)
434 {
435- int c;
436+ wint_t c;
437
438 in_column = 0;
439- c = get_space (f, getc (f));
440- if (prefix_length == 0)
441+ c = get_space (f, xgetwc (f));
442+ if (prefix_width == 0)
443 next_prefix_indent = prefix_lead_space < in_column ?
444 prefix_lead_space : in_column;
445 else
446 {
447- const char *p;
448+ const wchar_t *p;
449 next_prefix_indent = in_column;
450- for (p = prefix; *p != '\0'; p++)
451+ for (p = prefix; *p != L'\0'; p++)
452 {
453- unsigned char pc = *p;
454+ wchar_t pc = *p;
455 if (c != pc)
456 return c;
457 in_column++;
458- c = getc (f);
459+ c = xgetwc (f);
460 }
461 c = get_space (f, c);
462 }
eb0f91a0 463@@ -728,21 +768,21 @@
29623d34 464 /* Read blank characters from input file F, starting with C, and keeping
465 in_column up-to-date. Return first non-blank character. */
466
467-static int
468-get_space (FILE *f, int c)
469+static wint_t
470+get_space (FILE *f, wint_t c)
471 {
472 for (;;)
473 {
474- if (c == ' ')
475+ if (c == L' ')
476 in_column++;
477- else if (c == '\t')
478+ else if (c == L'\t')
479 {
480 tabs = true;
481 in_column = (in_column / TABWIDTH + 1) * TABWIDTH;
482 }
483 else
484 return c;
485- c = getc (f);
486+ c = xgetwc (f);
487 }
488 }
489
eb0f91a0 490@@ -751,9 +791,9 @@
29623d34 491 static void
492 check_punctuation (WORD *w)
493 {
494- char const *start = w->text;
495- char const *finish = start + (w->length - 1);
496- unsigned char fin = *finish;
497+ wchar_t const *start = w->text;
498+ wchar_t const *finish = start + (w->length - 1);
499+ wchar_t fin = *finish;
500
501 w->paren = isopen (*start);
502 w->punct = !! ispunct (fin);
eb0f91a0 503@@ -777,7 +817,9 @@
29623d34 504
505 if (word_limit == word)
506 {
507- fwrite (parabuf, sizeof *parabuf, wptr - parabuf, stdout);
508+ wchar_t *outptr;
509+ for (outptr = parabuf; outptr < wptr; outptr++)
510+ putwchar (*outptr);
511 wptr = parabuf;
512 return;
513 }
eb0f91a0 514@@ -809,7 +851,8 @@
29623d34 515 /* Copy text of words down to start of parabuf -- we use memmove because
516 the source and target may overlap. */
517
518- memmove (parabuf, split_point->text, wptr - split_point->text);
519+ memmove (parabuf, split_point->text,
520+ (wptr - split_point->text) * sizeof (wchar_t));
521 shift = split_point->text - parabuf;
522 wptr -= shift;
523
eb0f91a0 524@@ -833,53 +876,53 @@
29623d34 525 fmt_paragraph (void)
526 {
527 WORD *start, *w;
528- int len;
529+ int wid;
530 COST wcost, best;
531- int saved_length;
532+ int saved_width;
533
534 word_limit->best_cost = 0;
535- saved_length = word_limit->length;
536- word_limit->length = max_width; /* sentinel */
537+ saved_width = word_limit->width;
538+ word_limit->width = max_width; /* sentinel */
539
540 for (start = word_limit - 1; start >= word; start--)
541 {
542 best = MAXCOST;
543- len = start == word ? first_indent : other_indent;
544+ wid = start == word ? first_indent : other_indent;
545
546 /* At least one word, however long, in the line. */
547
548 w = start;
549- len += w->length;
550+ wid += w->width;
551 do
552 {
553 w++;
554
555 /* Consider breaking before w. */
556
557- wcost = line_cost (w, len) + w->best_cost;
558- if (start == word && last_line_length > 0)
559- wcost += RAGGED_COST (len - last_line_length);
560+ wcost = line_cost (w, wid) + w->best_cost;
561+ if (start == word && last_line_width > 0)
562+ wcost += RAGGED_COST (wid - last_line_width);
563 if (wcost < best)
564 {
565 best = wcost;
566 start->next_break = w;
567- start->line_length = len;
568+ start->line_width = wid;
569 }
570
571- /* This is a kludge to keep us from computing `len' as the
572- sum of the sentinel length and some non-zero number.
573- Since the sentinel w->length may be INT_MAX, adding
574+ /* This is a kludge to keep us from computing `wid' as the
575+ sum of the sentinel width and some non-zero number.
576+ Since the sentinel w->width may be INT_MAX, adding
577 to that would give a negative result. */
578 if (w == word_limit)
579 break;
580
581- len += (w - 1)->space + w->length; /* w > start >= word */
582+ wid += (w - 1)->space + w->width; /* w > start >= word */
583 }
584- while (len < max_width);
585+ while (wid < max_width);
586 start->best_cost = best + base_cost (start);
587 }
588
589- word_limit->length = saved_length;
590+ word_limit->width = saved_width;
591 }
592
593 /* Return the constant component of the cost of breaking before the
eb0f91a0 594@@ -904,33 +947,33 @@
29623d34 595 else if ((this - 1)->punct)
596 cost -= PUNCT_BONUS;
597 else if (this > word + 1 && (this - 2)->final)
598- cost += WIDOW_COST ((this - 1)->length);
599+ cost += WIDOW_COST ((this - 1)->width);
600 }
601
602 if (this->paren)
603 cost -= PAREN_BONUS;
604 else if (this->final)
605- cost += ORPHAN_COST (this->length);
606+ cost += ORPHAN_COST (this->width);
607
608 return cost;
609 }
610
611 /* Return the component of the cost of breaking before word NEXT that
612- depends on LEN, the length of the line beginning there. */
613+ depends on WID, the width of the line beginning there. */
614
615 static COST
616-line_cost (WORD *next, int len)
617+line_cost (WORD *next, int wid)
618 {
619 int n;
620 COST cost;
621
622 if (next == word_limit)
623 return 0;
624- n = best_width - len;
625+ n = best_width - wid;
626 cost = SHORT_COST (n);
627 if (next->next_break != word_limit)
628 {
629- n = len - next->line_length;
630+ n = wid - next->line_width;
631 cost += RAGGED_COST (n);
632 }
633 return cost;
eb0f91a0 634@@ -959,8 +1002,8 @@
29623d34 635
636 out_column = 0;
637 put_space (prefix_indent);
638- fputs (prefix, stdout);
639- out_column += prefix_length;
640+ fputws (prefix, stdout);
641+ out_column += prefix_width;
642 put_space (indent - out_column);
643
644 endline = w->next_break - 1;
eb0f91a0 645@@ -970,8 +1013,8 @@
29623d34 646 put_space (w->space);
647 }
648 put_word (w);
649- last_line_length = out_column;
650- putchar ('\n');
651+ last_line_width = out_column;
652+ putwchar (L'\n');
653 }
654
655 /* Output to stdout the word W. */
eb0f91a0 656@@ -979,13 +1022,13 @@
29623d34 657 static void
658 put_word (WORD *w)
659 {
660- const char *s;
661+ const wchar_t *s;
662 int n;
663
664 s = w->text;
665 for (n = w->length; n != 0; n--)
666- putchar (*s++);
667- out_column += w->length;
668+ putwchar (*s++);
669+ out_column += w->width;
670 }
671
672 /* Output to stdout SPACE spaces, or equivalent tabs. */
eb0f91a0 673@@ -1002,13 +1045,13 @@
29623d34 674 if (out_column + 1 < tab_target)
675 while (out_column < tab_target)
676 {
677- putchar ('\t');
678+ putwchar (L'\t');
679 out_column = (out_column / TABWIDTH + 1) * TABWIDTH;
680 }
681 }
682 while (out_column < space_target)
683 {
684- putchar (' ');
685+ putwchar (L' ');
686 out_column++;
687 }
688 }
eb0f91a0 689--- coreutils-6.7/po/pl.po~ 2007-02-13 17:23:15.000000000 +0100
690+++ coreutils-6.7/po/pl.po 2007-02-13 17:32:43.000000000 +0100
691@@ -3788,12 +3788,14 @@
692 #: src/fmt.c:289
693 msgid ""
694 " -t, --tagged-paragraph indentation of first line different from second\n"
695-" -u, --uniform-spacing one space between words, two after sentences\n"
696+" -u, --uniform-spacing one space between words, two between sentences\n"
697+" -n, --single-spaces single spaces between sentences\n"
698 " -w, --width=WIDTH maximum line width (default of 75 columns)\n"
699 msgstr ""
700