From: qrczak Date: Tue, 13 Feb 2007 15:59:04 +0000 (+0000) Subject: - coreutils-fmt-wchars.patch: Added support for multibyte encodings X-Git-Tag: auto/th/coreutils-6_7-3~1 X-Git-Url: http://git.pld-linux.org/?p=packages%2Fcoreutils.git;a=commitdiff_plain;h=29623d34a4e5e3f9dd93e610a2bc8162685e52f4 - coreutils-fmt-wchars.patch: Added support for multibyte encodings and wcwidth. Added -n / --single-spacing option, which is like -u but uses a single space between sentences. - Release 2 Changed files: coreutils-fmt-wchars.patch -> 1.1 coreutils.spec -> 1.119 --- diff --git a/coreutils-fmt-wchars.patch b/coreutils-fmt-wchars.patch new file mode 100644 index 0000000..099e387 --- /dev/null +++ b/coreutils-fmt-wchars.patch @@ -0,0 +1,686 @@ +--- coreutils-6.7/src/fmt.c.orig 2006-10-22 18:54:15.000000000 +0200 ++++ coreutils-6.7/src/fmt.c 2007-02-13 16:51:44.000000000 +0100 +@@ -18,6 +18,7 @@ + /* Written by Ross Paterson . */ + + #include ++#include + #include + #include + #include +@@ -39,7 +40,7 @@ + /* The following parameters represent the program's idea of what is + "best". Adjust to taste, subject to the caveats given. */ + +-/* Default longest permitted line length (max_width). */ ++/* Default longest permitted line width (max_width). */ + #define WIDTH 75 + + /* Prefer lines to be LEEWAY % shorter than the maximum width, giving +@@ -51,7 +52,7 @@ + #define DEF_INDENT 3 + + /* Costs and bonuses are expressed as the equivalent departure from the +- optimal line length, multiplied by 10. e.g. assigning something a ++ optimal line width, multiplied by 10. e.g. assigning something a + cost of 50 means that it is as bad as a line 5 characters too short + or too long. The definition of SHORT_COST(n) should not be changed. + However, EQUIV(n) may need tuning. */ +@@ -78,11 +79,11 @@ + #define LINE_COST EQUIV (70) + + /* Cost of breaking a line after the first word of a sentence, where +- the length of the word is N. */ ++ the width of the word is N. */ + #define WIDOW_COST(n) (EQUIV (200) / ((n) + 2)) + + /* Cost of breaking a line before the last word of a sentence, where +- the length of the word is N. */ ++ the width of the word is N. */ + #define ORPHAN_COST(n) (EQUIV (150) / ((n) + 2)) + + /* Bonus for breaking a line at the end of a sentence. */ +@@ -114,11 +115,30 @@ + #define MAXWORDS 1000 + #define MAXCHARS 5000 + ++/* Wide character support */ ++ ++static wint_t ++xgetwc (FILE *stream) ++{ ++ wint_t c = getwc (stream); ++ if (c == WEOF && ferror (stream)) ++ error (EXIT_FAILURE, errno, _("read error")); ++ return c; ++} ++ ++static inline int ++xwcwidth (wchar_t wc) ++{ ++ int w = wcwidth (wc); ++ return w < 0 ? 0 : w; ++} ++ + /* Extra ctype(3)-style macros. */ + +-#define isopen(c) (strchr ("([`'\"", c) != NULL) +-#define isclose(c) (strchr (")]'\"", c) != NULL) +-#define isperiod(c) (strchr (".?!", c) != NULL) ++#define isopen(c) \ ++ (wcschr (L"([`'\"\u2018\u201A\u201B\u201C\u201E\u201F", c) != NULL) ++#define isclose(c) (wcschr (L")]'\"\u2018\u2019\u201C\u201D", c) != NULL) ++#define isperiod(c) (wcschr (L".?!", c) != NULL) + + /* Size of a tab stop, for expansion on input and re-introduction on + output. */ +@@ -133,8 +153,9 @@ + + /* Static attributes determined during input. */ + +- const char *text; /* the text of the word */ +- int length; /* length of this word */ ++ const wchar_t *text; /* the text of the word */ ++ int length; /* length of this word, in characters */ ++ int width; /* width of this word, in columns */ + int space; /* the size of the following space */ + unsigned int paren:1; /* starts with open paren */ + unsigned int period:1; /* ends in [.?!])* */ +@@ -143,7 +164,7 @@ + + /* The remaining fields are computed during the optimization. */ + +- int line_length; /* length of the best line starting here */ ++ int line_width; /* width of the best line starting here */ + COST best_cost; /* cost of best paragraph starting here */ + WORD *next_break; /* break which achieves best_cost */ + }; +@@ -153,16 +174,16 @@ + static void set_prefix (char *p); + static void fmt (FILE *f); + static bool get_paragraph (FILE *f); +-static int get_line (FILE *f, int c); +-static int get_prefix (FILE *f); +-static int get_space (FILE *f, int c); +-static int copy_rest (FILE *f, int c); +-static bool same_para (int c); ++static wint_t get_line (FILE *f, wint_t c); ++static wint_t get_prefix (FILE *f); ++static wint_t get_space (FILE *f, wint_t c); ++static wint_t copy_rest (FILE *f, wint_t c); ++static bool same_para (wint_t c); + static void flush_paragraph (void); + static void fmt_paragraph (void); + static void check_punctuation (WORD *w); + static COST base_cost (WORD *this); +-static COST line_cost (WORD *next, int len); ++static COST line_cost (WORD *next, int wid); + static void put_paragraph (WORD *finish); + static void put_line (WORD *w, int indent); + static void put_word (WORD *w); +@@ -185,8 +206,11 @@ + /* If true, don't preserve inter-word spacing (default false). */ + static bool uniform; + ++/* How many spaces to put after a sentence (1 or 2). */ ++static int sentence_space; ++ + /* Prefix minus leading and trailing spaces (default ""). */ +-static const char *prefix; ++static wchar_t *prefix; + + /* User-supplied maximum line width (default WIDTH). The only output + lines longer than this will each comprise a single word. */ +@@ -194,14 +218,14 @@ + + /* Values derived from the option values. */ + +-/* The length of prefix minus leading space. */ +-static int prefix_full_length; ++/* The width of prefix minus leading space. */ ++static int prefix_full_width; + +-/* The length of the leading space trimmed from the prefix. */ ++/* The width of the leading space trimmed from the prefix. */ + static int prefix_lead_space; + +-/* The length of prefix minus leading and trailing space. */ +-static int prefix_length; ++/* The width of prefix minus leading and trailing space. */ ++static int prefix_width; + + /* The preferred width of text lines, set to LEEWAY % less than max_width. */ + static int best_width; +@@ -216,10 +240,10 @@ + + /* Space for the paragraph text -- longer paragraphs are handled neatly + (cf. flush_paragraph()). */ +-static char parabuf[MAXCHARS]; ++static wchar_t parabuf[MAXCHARS]; + + /* A pointer into parabuf, indicating the first unused character position. */ +-static char *wptr; ++static wchar_t *wptr; + + /* The words of a paragraph -- longer paragraphs are handled neatly + (cf. flush_paragraph()). */ +@@ -251,16 +275,16 @@ + prefix (next_prefix_indent). See get_paragraph() and copy_rest(). */ + + /* The last character read from the input file. */ +-static int next_char; ++static wint_t next_char; + + /* The space before the trimmed prefix (or part of it) on the next line + after the current paragraph. */ + static int next_prefix_indent; + +-/* If nonzero, the length of the last line output in the current ++/* If nonzero, the width of the last line output in the current + paragraph, used to charge for raggedness at the split point for long + paragraphs chosen by fmt_paragraph(). */ +-static int last_line_length; ++static int last_line_width; + + void + usage (int status) +@@ -289,6 +313,7 @@ + fputs (_("\ + -t, --tagged-paragraph indentation of first line different from second\n\ + -u, --uniform-spacing one space between words, two after sentences\n\ ++ -n, --single-spacing one space between words and after sentences\n\ + -w, --width=WIDTH maximum line width (default of 75 columns)\n\ + "), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); +@@ -311,6 +336,7 @@ + {"split-only", no_argument, NULL, 's'}, + {"tagged-paragraph", no_argument, NULL, 't'}, + {"uniform-spacing", no_argument, NULL, 'u'}, ++ {"single-spacing", no_argument, NULL, 'n'}, + {"width", required_argument, NULL, 'w'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, +@@ -334,8 +360,8 @@ + + crown = tagged = split = uniform = false; + max_width = WIDTH; +- prefix = ""; +- prefix_length = prefix_lead_space = prefix_full_length = 0; ++ prefix = L""; ++ prefix_width = prefix_lead_space = prefix_full_width = 0; + + if (argc > 1 && argv[1][0] == '-' && ISDIGIT (argv[1][1])) + { +@@ -348,7 +374,7 @@ + argc--; + } + +- while ((optchar = getopt_long (argc, argv, "0123456789cstuw:p:", ++ while ((optchar = getopt_long (argc, argv, "0123456789cstunw:p:", + long_options, NULL)) + != -1) + switch (optchar) +@@ -374,6 +400,12 @@ + + case 'u': + uniform = true; ++ sentence_space = 2; ++ break; ++ ++ case 'n': ++ uniform = true; ++ sentence_space = 1; + break; + + case 'w': +@@ -440,26 +472,32 @@ + } + + /* Trim space from the front and back of the string P, yielding the prefix, +- and record the lengths of the prefix and the space trimmed. */ ++ and record the widths of the prefix and the space trimmed. */ + + static void + set_prefix (char *p) + { +- char *s; ++ size_t len; ++ wchar_t *s; + + prefix_lead_space = 0; +- while (*p == ' ') ++ while (*p == L' ') + { + prefix_lead_space++; + p++; + } +- prefix = p; +- prefix_full_length = strlen (p); +- s = p + prefix_full_length; +- while (s > p && s[-1] == ' ') +- s--; +- *s = '\0'; +- prefix_length = s - p; ++ len = mbsrtowcs (NULL, (const char **) &p, 0, NULL); ++ prefix = xmalloc (len * sizeof (wchar_t)); ++ mbsrtowcs (prefix, (const char **) &p, len, NULL); ++ for (s = prefix; *s; s++) ++ prefix_full_width += xwcwidth (*s); ++ prefix_width = prefix_full_width; ++ while (s > prefix && s[-1] == L' ') ++ { ++ s--; ++ prefix_width--; ++ } ++ *s = L'\0'; + } + + /* read file F and send formatted output to stdout. */ +@@ -528,24 +566,24 @@ + static bool + get_paragraph (FILE *f) + { +- int c; ++ wint_t c; + +- last_line_length = 0; ++ last_line_width = 0; + c = next_char; + + /* Scan (and copy) blank lines, and lines not introduced by the prefix. */ + +- while (c == '\n' || c == EOF ++ while (c == L'\n' || c == WEOF + || next_prefix_indent < prefix_lead_space +- || in_column < next_prefix_indent + prefix_full_length) ++ || in_column < next_prefix_indent + prefix_full_width) + { + c = copy_rest (f, c); +- if (c == EOF) ++ if (c == WEOF) + { +- next_char = EOF; ++ next_char = WEOF; + return false; + } +- putchar ('\n'); ++ putwchar (L'\n'); + c = get_prefix (f); + } + +@@ -601,23 +639,23 @@ + that failed to match the prefix. In the latter, C is \n or EOF. + Return the character (\n or EOF) ending the line. */ + +-static int +-copy_rest (FILE *f, int c) ++static wint_t ++copy_rest (FILE *f, wint_t c) + { +- const char *s; ++ const wchar_t *s; + + out_column = 0; +- if (in_column > next_prefix_indent && c != '\n' && c != EOF) ++ if (in_column > next_prefix_indent && c != L'\n' && c != WEOF) + { + put_space (next_prefix_indent); + for (s = prefix; out_column != in_column && *s; out_column++) +- putchar (*s++); ++ putwchar (*s++); + put_space (in_column - out_column); + } +- while (c != '\n' && c != EOF) ++ while (c != L'\n' && c != WEOF) + { +- putchar (c); +- c = getc (f); ++ putwchar (c); ++ c = xgetwc (f); + } + return c; + } +@@ -627,11 +665,11 @@ + otherwise false. */ + + static bool +-same_para (int c) ++same_para (wint_t c) + { + return (next_prefix_indent == prefix_indent +- && in_column >= next_prefix_indent + prefix_full_length +- && c != '\n' && c != EOF); ++ && in_column >= next_prefix_indent + prefix_full_width ++ && c != L'\n' && c != WEOF); + } + + /* Read a line from input file F, given first non-blank character C +@@ -642,11 +680,11 @@ + + Return the first non-blank character of the next line. */ + +-static int +-get_line (FILE *f, int c) ++static wint_t ++get_line (FILE *f, wint_t c) + { + int start; +- char *end_of_parabuf; ++ wchar_t *end_of_parabuf; + WORD *end_of_word; + + end_of_parabuf = ¶buf[MAXCHARS]; +@@ -658,6 +696,7 @@ + /* Scan word. */ + + word_limit->text = wptr; ++ word_limit->width = 0; + do + { + if (wptr == end_of_parabuf) +@@ -666,10 +705,12 @@ + flush_paragraph (); + } + *wptr++ = c; +- c = getc (f); ++ word_limit->width += xwcwidth (c); ++ c = xgetwc (f); + } +- while (c != EOF && !isspace (c)); +- in_column += word_limit->length = wptr - word_limit->text; ++ while (c != WEOF && !isspace (c)); ++ word_limit->length = wptr - word_limit->text; ++ in_column += word_limit->width; + check_punctuation (word_limit); + + /* Scan inter-word space. */ +@@ -677,48 +718,48 @@ + start = in_column; + c = get_space (f, c); + word_limit->space = in_column - start; +- word_limit->final = (c == EOF ++ word_limit->final = (c == WEOF + || (word_limit->period +- && (c == '\n' || word_limit->space > 1))); +- if (c == '\n' || c == EOF || uniform) +- word_limit->space = word_limit->final ? 2 : 1; ++ && (c == L'\n' || word_limit->space > 1))); ++ if (c == L'\n' || c == WEOF || uniform) ++ word_limit->space = word_limit->final ? sentence_space : 1; + if (word_limit == end_of_word) + { + set_other_indent (true); + flush_paragraph (); + } + word_limit++; +- if (c == EOF) +- return EOF; ++ if (c == WEOF) ++ return WEOF; + } +- while (c != '\n'); ++ while (c != L'\n'); + return get_prefix (f); + } + + /* Read a prefix from input file F. Return either first non-matching + character, or first non-blank character after the prefix. */ + +-static int ++static wint_t + get_prefix (FILE *f) + { +- int c; ++ wint_t c; + + in_column = 0; +- c = get_space (f, getc (f)); +- if (prefix_length == 0) ++ c = get_space (f, xgetwc (f)); ++ if (prefix_width == 0) + next_prefix_indent = prefix_lead_space < in_column ? + prefix_lead_space : in_column; + else + { +- const char *p; ++ const wchar_t *p; + next_prefix_indent = in_column; +- for (p = prefix; *p != '\0'; p++) ++ for (p = prefix; *p != L'\0'; p++) + { +- unsigned char pc = *p; ++ wchar_t pc = *p; + if (c != pc) + return c; + in_column++; +- c = getc (f); ++ c = xgetwc (f); + } + c = get_space (f, c); + } +@@ -728,21 +769,21 @@ + /* Read blank characters from input file F, starting with C, and keeping + in_column up-to-date. Return first non-blank character. */ + +-static int +-get_space (FILE *f, int c) ++static wint_t ++get_space (FILE *f, wint_t c) + { + for (;;) + { +- if (c == ' ') ++ if (c == L' ') + in_column++; +- else if (c == '\t') ++ else if (c == L'\t') + { + tabs = true; + in_column = (in_column / TABWIDTH + 1) * TABWIDTH; + } + else + return c; +- c = getc (f); ++ c = xgetwc (f); + } + } + +@@ -751,9 +792,9 @@ + static void + check_punctuation (WORD *w) + { +- char const *start = w->text; +- char const *finish = start + (w->length - 1); +- unsigned char fin = *finish; ++ wchar_t const *start = w->text; ++ wchar_t const *finish = start + (w->length - 1); ++ wchar_t fin = *finish; + + w->paren = isopen (*start); + w->punct = !! ispunct (fin); +@@ -777,7 +818,9 @@ + + if (word_limit == word) + { +- fwrite (parabuf, sizeof *parabuf, wptr - parabuf, stdout); ++ wchar_t *outptr; ++ for (outptr = parabuf; outptr < wptr; outptr++) ++ putwchar (*outptr); + wptr = parabuf; + return; + } +@@ -809,7 +852,8 @@ + /* Copy text of words down to start of parabuf -- we use memmove because + the source and target may overlap. */ + +- memmove (parabuf, split_point->text, wptr - split_point->text); ++ memmove (parabuf, split_point->text, ++ (wptr - split_point->text) * sizeof (wchar_t)); + shift = split_point->text - parabuf; + wptr -= shift; + +@@ -833,53 +877,53 @@ + fmt_paragraph (void) + { + WORD *start, *w; +- int len; ++ int wid; + COST wcost, best; +- int saved_length; ++ int saved_width; + + word_limit->best_cost = 0; +- saved_length = word_limit->length; +- word_limit->length = max_width; /* sentinel */ ++ saved_width = word_limit->width; ++ word_limit->width = max_width; /* sentinel */ + + for (start = word_limit - 1; start >= word; start--) + { + best = MAXCOST; +- len = start == word ? first_indent : other_indent; ++ wid = start == word ? first_indent : other_indent; + + /* At least one word, however long, in the line. */ + + w = start; +- len += w->length; ++ wid += w->width; + do + { + w++; + + /* Consider breaking before w. */ + +- wcost = line_cost (w, len) + w->best_cost; +- if (start == word && last_line_length > 0) +- wcost += RAGGED_COST (len - last_line_length); ++ wcost = line_cost (w, wid) + w->best_cost; ++ if (start == word && last_line_width > 0) ++ wcost += RAGGED_COST (wid - last_line_width); + if (wcost < best) + { + best = wcost; + start->next_break = w; +- start->line_length = len; ++ start->line_width = wid; + } + +- /* This is a kludge to keep us from computing `len' as the +- sum of the sentinel length and some non-zero number. +- Since the sentinel w->length may be INT_MAX, adding ++ /* This is a kludge to keep us from computing `wid' as the ++ sum of the sentinel width and some non-zero number. ++ Since the sentinel w->width may be INT_MAX, adding + to that would give a negative result. */ + if (w == word_limit) + break; + +- len += (w - 1)->space + w->length; /* w > start >= word */ ++ wid += (w - 1)->space + w->width; /* w > start >= word */ + } +- while (len < max_width); ++ while (wid < max_width); + start->best_cost = best + base_cost (start); + } + +- word_limit->length = saved_length; ++ word_limit->width = saved_width; + } + + /* Return the constant component of the cost of breaking before the +@@ -904,33 +948,33 @@ + else if ((this - 1)->punct) + cost -= PUNCT_BONUS; + else if (this > word + 1 && (this - 2)->final) +- cost += WIDOW_COST ((this - 1)->length); ++ cost += WIDOW_COST ((this - 1)->width); + } + + if (this->paren) + cost -= PAREN_BONUS; + else if (this->final) +- cost += ORPHAN_COST (this->length); ++ cost += ORPHAN_COST (this->width); + + return cost; + } + + /* Return the component of the cost of breaking before word NEXT that +- depends on LEN, the length of the line beginning there. */ ++ depends on WID, the width of the line beginning there. */ + + static COST +-line_cost (WORD *next, int len) ++line_cost (WORD *next, int wid) + { + int n; + COST cost; + + if (next == word_limit) + return 0; +- n = best_width - len; ++ n = best_width - wid; + cost = SHORT_COST (n); + if (next->next_break != word_limit) + { +- n = len - next->line_length; ++ n = wid - next->line_width; + cost += RAGGED_COST (n); + } + return cost; +@@ -959,8 +1003,8 @@ + + out_column = 0; + put_space (prefix_indent); +- fputs (prefix, stdout); +- out_column += prefix_length; ++ fputws (prefix, stdout); ++ out_column += prefix_width; + put_space (indent - out_column); + + endline = w->next_break - 1; +@@ -970,8 +1014,8 @@ + put_space (w->space); + } + put_word (w); +- last_line_length = out_column; +- putchar ('\n'); ++ last_line_width = out_column; ++ putwchar (L'\n'); + } + + /* Output to stdout the word W. */ +@@ -979,13 +1023,13 @@ + static void + put_word (WORD *w) + { +- const char *s; ++ const wchar_t *s; + int n; + + s = w->text; + for (n = w->length; n != 0; n--) +- putchar (*s++); +- out_column += w->length; ++ putwchar (*s++); ++ out_column += w->width; + } + + /* Output to stdout SPACE spaces, or equivalent tabs. */ +@@ -1002,13 +1046,13 @@ + if (out_column + 1 < tab_target) + while (out_column < tab_target) + { +- putchar ('\t'); ++ putwchar (L'\t'); + out_column = (out_column / TABWIDTH + 1) * TABWIDTH; + } + } + while (out_column < space_target) + { +- putchar (' '); ++ putwchar (L' '); + out_column++; + } + } diff --git a/coreutils.spec b/coreutils.spec index fd0a08a..cf2f6c5 100644 --- a/coreutils.spec +++ b/coreutils.spec @@ -6,7 +6,7 @@ Summary: GNU Core-utils - basic command line utilities Summary(pl.UTF-8): GNU Core-utils - podstawowe narzędzia działające z linii poleceń Name: coreutils Version: 6.7 -Release: 1 +Release: 2 License: GPL Group: Applications/System Source0: ftp://ftp.gnu.org/gnu/coreutils/%{name}-%{version}.tar.bz2 @@ -30,6 +30,7 @@ Patch9: %{name}-po.patch Patch10: %{name}-no-nb.patch Patch11: %{name}-selinux.patch Patch12: %{name}-system-openat.patch +Patch13: %{name}-fmt-wchars.patch URL: http://www.gnu.org/software/coreutils/ BuildRequires: acl-devel BuildRequires: autoconf >= 2.60 @@ -105,6 +106,7 @@ Programy zawarte w tym pakiecie to: %patch10 -p1 %{?with_selinux:%patch11 -p1} %patch12 -p1 +%patch13 -p1 %{__perl} -pi -e 's@GNU/Linux@PLD Linux@' m4/host-os.m4