--- coreutils-8.19/po/pl.po.orig 2012-10-14 10:58:23.839244005 +0200 +++ coreutils-8.19/po/pl.po 2012-10-14 11:00:38.809238341 +0200 @@ -4497,13 +4497,15 @@ #, no-c-format msgid "" " -t, --tagged-paragraph indentation of first line different from second\n" -" -u, --uniform-spacing one space between words, two after sentences\n" +" -u, --uniform-spacing one space between words, two between sentences\n" +" -n, --single-spaces single spaces between sentences\n" " -w, --width=WIDTH maximum line width (default of 75 columns)\n" " -g, --goal=WIDTH goal width (default of 93% of width)\n" msgstr "" " -t, --tagged-paragraph wcięcie pierwszej linii inne niż drugiej\n" " -u, --uniform-spacing jedna spacja między słowami, dwie między " "zdaniami\n" +" -n, --single-spaces pojedyncze spacje między zdaniami\n" " -w, --width=SZEROKOŚĆ maksymalna SZEROKOŚĆ linii (domyślnie 75 " "kolumn)\n" " -g, --goal=SZEROKOŚĆ docelowa SZEROKOŚĆ (domyślnie 93% of szerokości\n" --- coreutils-8.19/src/fmt.c.orig 2012-07-21 16:54:31.000000000 +0200 +++ coreutils-8.19/src/fmt.c 2012-10-14 11:02:27.109233796 +0200 @@ -17,6 +17,7 @@ /* Written by Ross Paterson . */ #include +#include #include #include #include @@ -40,7 +41,7 @@ /* The following parameters represent the program's idea of what is "best". Adjust to taste, subject to the caveats given. */ -/* Default longest permitted line length (max_width). */ +/* Default longest permitted line width (max_width). */ #define WIDTH 75 /* Prefer lines to be LEEWAY % shorter than the maximum width, giving @@ -52,7 +53,7 @@ #define DEF_INDENT 3 /* Costs and bonuses are expressed as the equivalent departure from the - optimal line length, multiplied by 10. e.g. assigning something a + optimal line width, multiplied by 10. e.g. assigning something a cost of 50 means that it is as bad as a line 5 characters too short or too long. The definition of SHORT_COST(n) should not be changed. However, EQUIV(n) may need tuning. */ @@ -79,11 +80,11 @@ #define LINE_COST EQUIV (70) /* Cost of breaking a line after the first word of a sentence, where - the length of the word is N. */ + the width of the word is N. */ #define WIDOW_COST(n) (EQUIV (200) / ((n) + 2)) /* Cost of breaking a line before the last word of a sentence, where - the length of the word is N. */ + the width of the word is N. */ #define ORPHAN_COST(n) (EQUIV (150) / ((n) + 2)) /* Bonus for breaking a line at the end of a sentence. */ @@ -115,11 +116,30 @@ #define MAXWORDS 1000 #define MAXCHARS 5000 +/* Wide character support */ + +static wint_t +xgetwc (FILE *stream) +{ + wint_t c = getwc (stream); + if (c == WEOF && ferror (stream)) + error (EXIT_FAILURE, errno, _("read error")); + return c; +} + +static inline int +xwcwidth (wchar_t wc) +{ + int w = wcwidth (wc); + return w < 0 ? 0 : w; +} + /* Extra ctype(3)-style macros. */ -#define isopen(c) (strchr ("(['`\"", c) != nullptr) -#define isclose(c) (strchr (")]'\"", c) != nullptr) -#define isperiod(c) (strchr (".?!", c) != nullptr) +#define isopen(c) \ + (wcschr (L"(['`\"\u2018\u201A\u201B\u201C\u201E\u201F", c) != nullptr) +#define isclose(c) (wcschr (L")]'\"\u2018\u2019\u201C\u201D", c) != nullptr) +#define isperiod(c) (wcschr (L".?!", c) != nullptr) /* Size of a tab stop, for expansion on input and re-introduction on output. */ @@ -134,8 +154,9 @@ /* Static attributes determined during input. */ - char const *text; /* the text of the word */ - int length; /* length of this word */ + wchar_t const *text; /* the text of the word */ + int length; /* length of this word, in characters */ + int width; /* width of this word, in columns */ int space; /* the size of the following space */ unsigned int paren:1; /* starts with open paren */ unsigned int period:1; /* ends in [.?!])* */ @@ -144,7 +165,7 @@ /* The remaining fields are computed during the optimization. */ - int line_length; /* length of the best line starting here */ + int line_width; /* width of the best line starting here */ COST best_cost; /* cost of best paragraph starting here */ WORD *next_break; /* break which achieves best_cost */ }; @@ -154,16 +175,16 @@ static void set_prefix (char *p); static bool fmt (FILE *f, char const *); static bool get_paragraph (FILE *f); -static int get_line (FILE *f, int c); -static int get_prefix (FILE *f); -static int get_space (FILE *f, int c); -static int copy_rest (FILE *f, int c); -static bool same_para (int c); +static wint_t get_line (FILE *f, wint_t c); +static wint_t get_prefix (FILE *f); +static wint_t get_space (FILE *f, wint_t c); +static wint_t copy_rest (FILE *f, wint_t c); +static bool same_para (wint_t c); static void flush_paragraph (void); static void fmt_paragraph (void); static void check_punctuation (WORD *w); static COST base_cost (WORD *this); -static COST line_cost (WORD *next, int len); +static COST line_cost (WORD *next, int wid); static void put_paragraph (WORD *finish); static void put_line (WORD *w, int indent); static void put_word (WORD *w); @@ -183,8 +204,11 @@ /* If true, don't preserve inter-word spacing (default false). */ static bool uniform; +/* How many spaces to put after a sentence (1 or 2). */ +static int sentence_space; + /* Prefix minus leading and trailing spaces (default ""). */ -static char const *prefix; +static wchar_t *prefix; /* User-supplied maximum line width (default WIDTH). The only output lines longer than this will each comprise a single word. */ @@ -192,14 +216,14 @@ /* Values derived from the option values. */ -/* The length of prefix minus leading space. */ -static int prefix_full_length; +/* The width of prefix minus leading space. */ +static int prefix_full_width; -/* The length of the leading space trimmed from the prefix. */ +/* The width of the leading space trimmed from the prefix. */ static int prefix_lead_space; -/* The length of prefix minus leading and trailing space. */ -static int prefix_length; +/* The width of prefix minus leading and trailing space. */ +static int prefix_width; /* The preferred width of text lines, set to LEEWAY % less than max_width. */ static int goal_width; @@ -214,10 +238,10 @@ /* Space for the paragraph text -- longer paragraphs are handled neatly (cf. flush_paragraph()). */ -static char parabuf[MAXCHARS]; +static wchar_t parabuf[MAXCHARS]; /* A pointer into parabuf, indicating the first unused character position. */ -static char *wptr; +static wchar_t *wptr; /* The words of a paragraph -- longer paragraphs are handled neatly (cf. flush_paragraph()). */ @@ -249,16 +273,16 @@ prefix (next_prefix_indent). See get_paragraph() and copy_rest(). */ /* The last character read from the input file. */ -static int next_char; +static wint_t next_char; /* The space before the trimmed prefix (or part of it) on the next line after the current paragraph. */ static int next_prefix_indent; -/* If nonzero, the length of the last line output in the current +/* If nonzero, the width of the last line output in the current paragraph, used to charge for raggedness at the split point for long paragraphs chosen by fmt_paragraph(). */ -static int last_line_length; +static int last_line_width; void usage (int status) @@ -287,7 +311,8 @@ format string: xgettext:no-c-format */ fputs (_("\ -t, --tagged-paragraph indentation of first line different from second\n\ - -u, --uniform-spacing one space between words, two after sentences\n\ + -u, --uniform-spacing one space between words, two between sentences\n\ + -n, --single-spaces single spaces between sentences\n\ -w, --width=WIDTH maximum line width (default of 75 columns)\n\ -g, --goal=WIDTH goal width (default of 93% of width)\n\ "), stdout); @@ -311,6 +336,7 @@ {"split-only", no_argument, nullptr, 's'}, {"tagged-paragraph", no_argument, nullptr, 't'}, {"uniform-spacing", no_argument, nullptr, 'u'}, + {"single-spaces", no_argument, nullptr, 'n'}, {"width", required_argument, nullptr, 'w'}, {"goal", required_argument, nullptr, 'g'}, {GETOPT_HELP_OPTION_DECL}, @@ -335,9 +361,10 @@ atexit (close_stdout); crown = tagged = split = uniform = false; + sentence_space = 2; max_width = WIDTH; - prefix = ""; - prefix_length = prefix_lead_space = prefix_full_length = 0; + prefix = L""; + prefix_width = prefix_lead_space = prefix_full_width = 0; if (argc > 1 && argv[1][0] == '-' && ISDIGIT (argv[1][1])) { @@ -350,7 +377,7 @@ argc--; } - while ((optchar = getopt_long (argc, argv, "0123456789cstuw:p:g:", + while ((optchar = getopt_long (argc, argv, "0123456789cstunw:p:g:", long_options, nullptr)) != -1) switch (optchar) @@ -378,6 +405,10 @@ uniform = true; break; + case 'n': + sentence_space = 1; + break; + case 'w': max_width_option = optarg; break; @@ -461,26 +492,32 @@ } /* Trim space from the front and back of the string P, yielding the prefix, - and record the lengths of the prefix and the space trimmed. */ + and record the widths of the prefix and the space trimmed. */ static void set_prefix (char *p) { - char *s; + size_t len; + wchar_t *s; prefix_lead_space = 0; - while (*p == ' ') + while (*p == L' ') { prefix_lead_space++; p++; } - prefix = p; - prefix_full_length = strlen (p); - s = p + prefix_full_length; - while (s > p && s[-1] == ' ') - s--; - *s = '\0'; - prefix_length = s - p; + len = mbsrtowcs (NULL, (const char **) &p, 0, NULL); + prefix = xmalloc (len * sizeof (wchar_t)); + mbsrtowcs (prefix, (const char **) &p, len, NULL); + for (s = prefix; *s; s++) + prefix_full_width += xwcwidth (*s); + prefix_width = prefix_full_width; + while (s > prefix && s[-1] == L' ') + { + s--; + prefix_width--; + } + *s = L'\0'; } /* Read F and send formatted output to stdout. @@ -550,24 +587,24 @@ static bool get_paragraph (FILE *f) { - int c; + wint_t c; - last_line_length = 0; + last_line_width = 0; c = next_char; /* Scan (and copy) blank lines, and lines not introduced by the prefix. */ - while (c == '\n' || c == EOF + while (c == L'\n' || c == WEOF || next_prefix_indent < prefix_lead_space - || in_column < next_prefix_indent + prefix_full_length) + || in_column < next_prefix_indent + prefix_full_width) { c = copy_rest (f, c); - if (c == EOF) + if (c == WEOF) { - next_char = EOF; + next_char = WEOF; return false; } - putchar ('\n'); + putwchar (L'\n'); c = get_prefix (f); } @@ -628,26 +665,26 @@ that failed to match the prefix. In the latter, C is \n or EOF. Return the character (\n or EOF) ending the line. */ -static int -copy_rest (FILE *f, int c) +static wint_t +copy_rest (FILE *f, wint_t c) { - char const *s; + wchar_t const *s; out_column = 0; - if (in_column > next_prefix_indent || (c != '\n' && c != EOF)) + if (in_column > next_prefix_indent || (c != L'\n' && c != WEOF)) { put_space (next_prefix_indent); for (s = prefix; out_column != in_column && *s; out_column++) - putchar (*s++); - if (c != EOF && c != '\n') + putwchar (*s++); + if (c != WEOF && c != L'\n') put_space (in_column - out_column); - if (c == EOF && in_column >= next_prefix_indent + prefix_length) - putchar ('\n'); + if (c == WEOF && in_column >= next_prefix_indent + prefix_width) + putwchar (L'\n'); } - while (c != '\n' && c != EOF) + while (c != L'\n' && c != WEOF) { - putchar (c); - c = getc (f); + putwchar (c); + c = xgetwc (f); } return c; } @@ -657,11 +694,11 @@ otherwise false. */ static bool -same_para (int c) +same_para (wint_t c) { return (next_prefix_indent == prefix_indent - && in_column >= next_prefix_indent + prefix_full_length - && c != '\n' && c != EOF); + && in_column >= next_prefix_indent + prefix_full_width + && c != L'\n' && c != WEOF); } /* Read a line from input file F, given first non-blank character C @@ -672,11 +709,11 @@ Return the first non-blank character of the next line. */ -static int -get_line (FILE *f, int c) +static wint_t +get_line (FILE *f, wint_t c) { int start; - char *end_of_parabuf; + wchar_t *end_of_parabuf; WORD *end_of_word; end_of_parabuf = ¶buf[MAXCHARS]; @@ -688,6 +725,7 @@ /* Scan word. */ word_limit->text = wptr; + word_limit->width = 0; do { if (wptr == end_of_parabuf) @@ -696,10 +734,12 @@ flush_paragraph (); } *wptr++ = c; - c = getc (f); + word_limit->width += xwcwidth (c); + c = xgetwc (f); } - while (c != EOF && !c_isspace (c)); - in_column += word_limit->length = wptr - word_limit->text; + while (c != WEOF && !isspace (c)); + word_limit->length = wptr - word_limit->text; + in_column += word_limit->width; check_punctuation (word_limit); /* Scan inter-word space. */ @@ -707,11 +747,11 @@ start = in_column; c = get_space (f, c); word_limit->space = in_column - start; - word_limit->final = (c == EOF + word_limit->final = (c == WEOF || (word_limit->period - && (c == '\n' || word_limit->space > 1))); - if (c == '\n' || c == EOF || uniform) - word_limit->space = word_limit->final ? 2 : 1; + && (c == L'\n' || word_limit->space > 1))); + if (c == L'\n' || c == WEOF || uniform) + word_limit->space = word_limit->final ? sentence_space : 1; if (word_limit == end_of_word) { set_other_indent (true); @@ -719,34 +759,34 @@ } word_limit++; } - while (c != '\n' && c != EOF); + while (c != L'\n' && c != WEOF); return get_prefix (f); } /* Read a prefix from input file F. Return either first non-matching character, or first non-blank character after the prefix. */ -static int +static wint_t get_prefix (FILE *f) { - int c; + wint_t c; in_column = 0; - c = get_space (f, getc (f)); - if (prefix_length == 0) + c = get_space (f, xgetwc (f)); + if (prefix_width == 0) next_prefix_indent = prefix_lead_space < in_column ? prefix_lead_space : in_column; else { - char const *p; + wchar_t const *p; next_prefix_indent = in_column; - for (p = prefix; *p != '\0'; p++) + for (p = prefix; *p != L'\0'; p++) { - unsigned char pc = *p; + wchar_t pc = *p; if (c != pc) return c; in_column++; - c = getc (f); + c = xgetwc (f); } c = get_space (f, c); } @@ -756,21 +796,21 @@ /* Read blank characters from input file F, starting with C, and keeping in_column up-to-date. Return first non-blank character. */ -static int -get_space (FILE *f, int c) +static wint_t +get_space (FILE *f, wint_t c) { while (true) { - if (c == ' ') + if (c == L' ') in_column++; - else if (c == '\t') + else if (c == L'\t') { tabs = true; in_column = (in_column / TABWIDTH + 1) * TABWIDTH; } else return c; - c = getc (f); + c = xgetwc (f); } } @@ -779,9 +819,9 @@ static void check_punctuation (WORD *w) { - char const *start = w->text; - char const *finish = start + (w->length - 1); - unsigned char fin = *finish; + wchar_t const *start = w->text; + wchar_t const *finish = start + (w->length - 1); + wchar_t fin = *finish; w->paren = isopen (*start); w->punct = !! ispunct (fin); @@ -805,7 +845,9 @@ if (word_limit == word) { - fwrite (parabuf, sizeof *parabuf, wptr - parabuf, stdout); + wchar_t *outptr; + for (outptr = parabuf; outptr < wptr; outptr++) + putwchar (*outptr); wptr = parabuf; return; } @@ -837,7 +879,8 @@ /* Copy text of words down to start of parabuf -- we use memmove because the source and target may overlap. */ - memmove (parabuf, split_point->text, wptr - split_point->text); + memmove (parabuf, split_point->text, + (wptr - split_point->text) * sizeof (wchar_t)); shift = split_point->text - parabuf; wptr -= shift; @@ -861,53 +904,53 @@ fmt_paragraph (void) { WORD *start, *w; - int len; + int wid; COST wcost, best; - int saved_length; + int saved_width; word_limit->best_cost = 0; - saved_length = word_limit->length; - word_limit->length = max_width; /* sentinel */ + saved_width = word_limit->width; + word_limit->width = max_width; /* sentinel */ for (start = word_limit - 1; start >= word; start--) { best = MAXCOST; - len = start == word ? first_indent : other_indent; + wid = start == word ? first_indent : other_indent; /* At least one word, however long, in the line. */ w = start; - len += w->length; + wid += w->width; do { w++; /* Consider breaking before w. */ - wcost = line_cost (w, len) + w->best_cost; - if (start == word && last_line_length > 0) - wcost += RAGGED_COST (len - last_line_length); + wcost = line_cost (w, wid) + w->best_cost; + if (start == word && last_line_width > 0) + wcost += RAGGED_COST (wid - last_line_width); if (wcost < best) { best = wcost; start->next_break = w; - start->line_length = len; + start->line_width = wid; } - /* This is a kludge to keep us from computing 'len' as the - sum of the sentinel length and some non-zero number. - Since the sentinel w->length may be INT_MAX, adding + /* This is a kludge to keep us from computing 'wid' as the + sum of the sentinel width and some non-zero number. + Since the sentinel w->width may be INT_MAX, adding to that would give a negative result. */ if (w == word_limit) break; - len += (w - 1)->space + w->length; /* w > start >= word */ + wid += (w - 1)->space + w->width; /* w > start >= word */ } - while (len < max_width); + while (wid < max_width); start->best_cost = best + base_cost (start); } - word_limit->length = saved_length; + word_limit->width = saved_width; } /* Work around . */ @@ -932,33 +975,33 @@ else if ((this - 1)->punct) cost -= PUNCT_BONUS; else if (this > word + 1 && (this - 2)->final) - cost += WIDOW_COST ((this - 1)->length); + cost += WIDOW_COST ((this - 1)->width); } if (this->paren) cost -= PAREN_BONUS; else if (this->final) - cost += ORPHAN_COST (this->length); + cost += ORPHAN_COST (this->width); return cost; } /* Return the component of the cost of breaking before word NEXT that - depends on LEN, the length of the line beginning there. */ + depends on WID, the width of the line beginning there. */ static COST -line_cost (WORD *next, int len) +line_cost (WORD *next, int wid) { int n; COST cost; if (next == word_limit) return 0; - n = goal_width - len; + n = goal_width - wid; cost = SHORT_COST (n); if (next->next_break != word_limit) { - n = len - next->line_length; + n = wid - next->line_width; cost += RAGGED_COST (n); } return cost; @@ -987,8 +1030,8 @@ out_column = 0; put_space (prefix_indent); - fputs (prefix, stdout); - out_column += prefix_length; + fputws (prefix, stdout); + out_column += prefix_width; put_space (indent - out_column); endline = w->next_break - 1; @@ -998,8 +1041,8 @@ put_space (w->space); } put_word (w); - last_line_length = out_column; - putchar ('\n'); + last_line_width = out_column; + putwchar (L'\n'); } /* Output to stdout the word W. */ @@ -1007,13 +1050,13 @@ static void put_word (WORD *w) { - char const *s; + wchar_t const *s; int n; s = w->text; for (n = w->length; n != 0; n--) - putchar (*s++); - out_column += w->length; + putwchar (*s++); + out_column += w->width; } /* Output to stdout SPACE spaces, or equivalent tabs. */ @@ -1030,13 +1073,13 @@ if (out_column + 1 < tab_target) while (out_column < tab_target) { - putchar ('\t'); + putwchar (L'\t'); out_column = (out_column / TABWIDTH + 1) * TABWIDTH; } } while (out_column < space_target) { - putchar (' '); + putwchar (L' '); out_column++; } }