[packages/coreutils.git] / coreutils-fmt-wchars.patch

--- coreutils-6.7/src/fmt.c.orig	2006-10-22 18:54:15.000000000 +0200
+++ coreutils-6.7/src/fmt.c	2007-02-13 16:51:44.000000000 +0100
@@ -18,6 +18,7 @@
 /* Written by Ross Paterson <rap@doc.ic.ac.uk>.  */
 
 #include <config.h>
+#include <wchar.h>
 #include <stdio.h>
 #include <sys/types.h>
 #include <getopt.h>
@@ -39,7 +40,7 @@
 /* The following parameters represent the program's idea of what is
    "best".  Adjust to taste, subject to the caveats given.  */
 
-/* Default longest permitted line length (max_width).  */
+/* Default longest permitted line width (max_width).  */
 #define WIDTH	75
 
 /* Prefer lines to be LEEWAY % shorter than the maximum width, giving
@@ -51,7 +52,7 @@
 #define DEF_INDENT 3
 
 /* Costs and bonuses are expressed as the equivalent departure from the
-   optimal line length, multiplied by 10.  e.g. assigning something a
+   optimal line width, multiplied by 10.  e.g. assigning something a
    cost of 50 means that it is as bad as a line 5 characters too short
    or too long.  The definition of SHORT_COST(n) should not be changed.
    However, EQUIV(n) may need tuning.  */
@@ -78,11 +79,11 @@
 #define LINE_COST	EQUIV (70)
 
 /* Cost of breaking a line after the first word of a sentence, where
-   the length of the word is N.  */
+   the width of the word is N.  */
 #define WIDOW_COST(n)	(EQUIV (200) / ((n) + 2))
 
 /* Cost of breaking a line before the last word of a sentence, where
-   the length of the word is N.  */
+   the width of the word is N.  */
 #define ORPHAN_COST(n)	(EQUIV (150) / ((n) + 2))
 
 /* Bonus for breaking a line at the end of a sentence.  */
@@ -114,11 +115,30 @@
 #define MAXWORDS	1000
 #define MAXCHARS	5000
 
+/* Wide character support */
+
+static wint_t
+xgetwc (FILE *stream)
+{
+  wint_t c = getwc (stream);
+  if (c == WEOF && ferror (stream))
+    error (EXIT_FAILURE, errno, _("read error"));
+  return c;
+}
+
+static inline int
+xwcwidth (wchar_t wc)
+{
+  int w = wcwidth (wc);
+  return w < 0 ? 0 : w;
+}
+
 /* Extra ctype(3)-style macros.  */
 
-#define isopen(c)	(strchr ("([`'\"", c) != NULL)
-#define isclose(c)	(strchr (")]'\"", c) != NULL)
-#define isperiod(c)	(strchr (".?!", c) != NULL)
+#define isopen(c)	\
+  (wcschr (L"([`'\"\u2018\u201A\u201B\u201C\u201E\u201F", c) != NULL)
+#define isclose(c)	(wcschr (L")]'\"\u2018\u2019\u201C\u201D", c) != NULL)
+#define isperiod(c)	(wcschr (L".?!", c) != NULL)
 
 /* Size of a tab stop, for expansion on input and re-introduction on
    output.  */
@@ -133,8 +153,9 @@
 
     /* Static attributes determined during input.  */
 
-    const char *text;		/* the text of the word */
-    int length;			/* length of this word */
+    const wchar_t *text;	/* the text of the word */
+    int length;			/* length of this word, in characters */
+    int width;			/* width of this word, in columns */
     int space;			/* the size of the following space */
     unsigned int paren:1;	/* starts with open paren */
     unsigned int period:1;	/* ends in [.?!])* */
@@ -143,7 +164,7 @@
 
     /* The remaining fields are computed during the optimization.  */
 
-    int line_length;		/* length of the best line starting here */
+    int line_width;		/* width of the best line starting here */
     COST best_cost;		/* cost of best paragraph starting here */
     WORD *next_break;		/* break which achieves best_cost */
   };
@@ -153,16 +174,16 @@
 static void set_prefix (char *p);
 static void fmt (FILE *f);
 static bool get_paragraph (FILE *f);
-static int get_line (FILE *f, int c);
-static int get_prefix (FILE *f);
-static int get_space (FILE *f, int c);
-static int copy_rest (FILE *f, int c);
-static bool same_para (int c);
+static wint_t get_line (FILE *f, wint_t c);
+static wint_t get_prefix (FILE *f);
+static wint_t get_space (FILE *f, wint_t c);
+static wint_t copy_rest (FILE *f, wint_t c);
+static bool same_para (wint_t c);
 static void flush_paragraph (void);
 static void fmt_paragraph (void);
 static void check_punctuation (WORD *w);
 static COST base_cost (WORD *this);
-static COST line_cost (WORD *next, int len);
+static COST line_cost (WORD *next, int wid);
 static void put_paragraph (WORD *finish);
 static void put_line (WORD *w, int indent);
 static void put_word (WORD *w);
@@ -185,8 +206,11 @@
 /* If true, don't preserve inter-word spacing (default false).  */
 static bool uniform;
 
+/* How many spaces to put after a sentence (1 or 2).  */
+static int sentence_space;
+
 /* Prefix minus leading and trailing spaces (default "").  */
-static const char *prefix;
+static wchar_t *prefix;
 
 /* User-supplied maximum line width (default WIDTH).  The only output
    lines longer than this will each comprise a single word.  */
@@ -194,14 +218,14 @@
 
 /* Values derived from the option values.  */
 
-/* The length of prefix minus leading space.  */
-static int prefix_full_length;
+/* The width of prefix minus leading space.  */
+static int prefix_full_width;
 
-/* The length of the leading space trimmed from the prefix.  */
+/* The width of the leading space trimmed from the prefix.  */
 static int prefix_lead_space;
 
-/* The length of prefix minus leading and trailing space.  */
-static int prefix_length;
+/* The width of prefix minus leading and trailing space.  */
+static int prefix_width;
 
 /* The preferred width of text lines, set to LEEWAY % less than max_width.  */
 static int best_width;
@@ -216,10 +240,10 @@
 
 /* Space for the paragraph text -- longer paragraphs are handled neatly
    (cf. flush_paragraph()).  */
-static char parabuf[MAXCHARS];
+static wchar_t parabuf[MAXCHARS];
 
 /* A pointer into parabuf, indicating the first unused character position.  */
-static char *wptr;
+static wchar_t *wptr;
 
 /* The words of a paragraph -- longer paragraphs are handled neatly
    (cf. flush_paragraph()).  */
@@ -251,16 +275,16 @@
    prefix (next_prefix_indent).  See get_paragraph() and copy_rest().  */
 
 /* The last character read from the input file.  */
-static int next_char;
+static wint_t next_char;
 
 /* The space before the trimmed prefix (or part of it) on the next line
    after the current paragraph.  */
 static int next_prefix_indent;
 
-/* If nonzero, the length of the last line output in the current
+/* If nonzero, the width of the last line output in the current
    paragraph, used to charge for raggedness at the split point for long
    paragraphs chosen by fmt_paragraph().  */
-static int last_line_length;
+static int last_line_width;
 
 void
 usage (int status)
@@ -289,6 +313,7 @@
       fputs (_("\
   -t, --tagged-paragraph    indentation of first line different from second\n\
   -u, --uniform-spacing     one space between words, two after sentences\n\
+  -n, --single-spacing      one space between words and after sentences\n\
   -w, --width=WIDTH         maximum line width (default of 75 columns)\n\
 "), stdout);
       fputs (HELP_OPTION_DESCRIPTION, stdout);
@@ -311,6 +336,7 @@
   {"split-only", no_argument, NULL, 's'},
   {"tagged-paragraph", no_argument, NULL, 't'},
   {"uniform-spacing", no_argument, NULL, 'u'},
+  {"single-spacing", no_argument, NULL, 'n'},
   {"width", required_argument, NULL, 'w'},
   {GETOPT_HELP_OPTION_DECL},
   {GETOPT_VERSION_OPTION_DECL},
@@ -334,8 +360,8 @@
 
   crown = tagged = split = uniform = false;
   max_width = WIDTH;
-  prefix = "";
-  prefix_length = prefix_lead_space = prefix_full_length = 0;
+  prefix = L"";
+  prefix_width = prefix_lead_space = prefix_full_width = 0;
 
   if (argc > 1 && argv[1][0] == '-' && ISDIGIT (argv[1][1]))
     {
@@ -348,7 +374,7 @@
       argc--;
     }
 
-  while ((optchar = getopt_long (argc, argv, "0123456789cstuw:p:",
+  while ((optchar = getopt_long (argc, argv, "0123456789cstunw:p:",
 				 long_options, NULL))
 	 != -1)
     switch (optchar)
@@ -374,6 +400,12 @@
 
       case 'u':
 	uniform = true;
+        sentence_space = 2;
+	break;
+
+      case 'n':
+	uniform = true;
+        sentence_space = 1;
 	break;
 
       case 'w':
@@ -440,26 +472,32 @@
 }
 
 /* Trim space from the front and back of the string P, yielding the prefix,
-   and record the lengths of the prefix and the space trimmed.  */
+   and record the widths of the prefix and the space trimmed.  */
 
 static void
 set_prefix (char *p)
 {
-  char *s;
+  size_t len;
+  wchar_t *s;
 
   prefix_lead_space = 0;
-  while (*p == ' ')
+  while (*p == L' ')
     {
       prefix_lead_space++;
       p++;
     }
-  prefix = p;
-  prefix_full_length = strlen (p);
-  s = p + prefix_full_length;
-  while (s > p && s[-1] == ' ')
-    s--;
-  *s = '\0';
-  prefix_length = s - p;
+  len = mbsrtowcs (NULL, (const char **) &p, 0, NULL);
+  prefix = xmalloc (len * sizeof (wchar_t));
+  mbsrtowcs (prefix, (const char **) &p, len, NULL);
+  for (s = prefix; *s; s++)
+    prefix_full_width += xwcwidth (*s);
+  prefix_width = prefix_full_width;
+  while (s > prefix && s[-1] == L' ')
+    {
+      s--;
+      prefix_width--;
+    }
+  *s = L'\0';
 }
 
 /* read file F and send formatted output to stdout.  */
@@ -528,24 +566,24 @@
 static bool
 get_paragraph (FILE *f)
 {
-  int c;
+  wint_t c;
 
-  last_line_length = 0;
+  last_line_width = 0;
   c = next_char;
 
   /* Scan (and copy) blank lines, and lines not introduced by the prefix.  */
 
-  while (c == '\n' || c == EOF
+  while (c == L'\n' || c == WEOF
 	 || next_prefix_indent < prefix_lead_space
-	 || in_column < next_prefix_indent + prefix_full_length)
+	 || in_column < next_prefix_indent + prefix_full_width)
     {
       c = copy_rest (f, c);
-      if (c == EOF)
+      if (c == WEOF)
 	{
-	  next_char = EOF;
+	  next_char = WEOF;
 	  return false;
 	}
-      putchar ('\n');
+      putwchar (L'\n');
       c = get_prefix (f);
     }
 
@@ -601,23 +639,23 @@
    that failed to match the prefix.  In the latter, C is \n or EOF.
    Return the character (\n or EOF) ending the line.  */
 
-static int
-copy_rest (FILE *f, int c)
+static wint_t
+copy_rest (FILE *f, wint_t c)
 {
-  const char *s;
+  const wchar_t *s;
 
   out_column = 0;
-  if (in_column > next_prefix_indent && c != '\n' && c != EOF)
+  if (in_column > next_prefix_indent && c != L'\n' && c != WEOF)
     {
       put_space (next_prefix_indent);
       for (s = prefix; out_column != in_column && *s; out_column++)
-	putchar (*s++);
+	putwchar (*s++);
       put_space (in_column - out_column);
     }
-  while (c != '\n' && c != EOF)
+  while (c != L'\n' && c != WEOF)
     {
-      putchar (c);
-      c = getc (f);
+      putwchar (c);
+      c = xgetwc (f);
     }
   return c;
 }
@@ -627,11 +665,11 @@
    otherwise false.  */
 
 static bool
-same_para (int c)
+same_para (wint_t c)
 {
   return (next_prefix_indent == prefix_indent
-	  && in_column >= next_prefix_indent + prefix_full_length
-	  && c != '\n' && c != EOF);
+	  && in_column >= next_prefix_indent + prefix_full_width
+	  && c != L'\n' && c != WEOF);
 }
 
 /* Read a line from input file F, given first non-blank character C
@@ -642,11 +680,11 @@
 
    Return the first non-blank character of the next line.  */
 
-static int
-get_line (FILE *f, int c)
+static wint_t
+get_line (FILE *f, wint_t c)
 {
   int start;
-  char *end_of_parabuf;
+  wchar_t *end_of_parabuf;
   WORD *end_of_word;
 
   end_of_parabuf = &parabuf[MAXCHARS];
@@ -658,6 +696,7 @@
       /* Scan word.  */
 
       word_limit->text = wptr;
+      word_limit->width = 0;
       do
 	{
 	  if (wptr == end_of_parabuf)
@@ -666,10 +705,12 @@
 	      flush_paragraph ();
 	    }
 	  *wptr++ = c;
-	  c = getc (f);
+          word_limit->width += xwcwidth (c);
+	  c = xgetwc (f);
 	}
-      while (c != EOF && !isspace (c));
-      in_column += word_limit->length = wptr - word_limit->text;
+      while (c != WEOF && !isspace (c));
+      word_limit->length = wptr - word_limit->text;
+      in_column += word_limit->width;
       check_punctuation (word_limit);
 
       /* Scan inter-word space.  */
@@ -677,48 +718,48 @@
       start = in_column;
       c = get_space (f, c);
       word_limit->space = in_column - start;
-      word_limit->final = (c == EOF
+      word_limit->final = (c == WEOF
 			   || (word_limit->period
-			       && (c == '\n' || word_limit->space > 1)));
-      if (c == '\n' || c == EOF || uniform)
-	word_limit->space = word_limit->final ? 2 : 1;
+			       && (c == L'\n' || word_limit->space > 1)));
+      if (c == L'\n' || c == WEOF || uniform)
+	word_limit->space = word_limit->final ? sentence_space : 1;
       if (word_limit == end_of_word)
 	{
 	  set_other_indent (true);
 	  flush_paragraph ();
 	}
       word_limit++;
-      if (c == EOF)
-	return EOF;
+      if (c == WEOF)
+	return WEOF;
     }
-  while (c != '\n');
+  while (c != L'\n');
   return get_prefix (f);
 }
 
 /* Read a prefix from input file F.  Return either first non-matching
    character, or first non-blank character after the prefix.  */
 
-static int
+static wint_t
 get_prefix (FILE *f)
 {
-  int c;
+  wint_t c;
 
   in_column = 0;
-  c = get_space (f, getc (f));
-  if (prefix_length == 0)
+  c = get_space (f, xgetwc (f));
+  if (prefix_width == 0)
     next_prefix_indent = prefix_lead_space < in_column ?
       prefix_lead_space : in_column;
   else
     {
-      const char *p;
+      const wchar_t *p;
       next_prefix_indent = in_column;
-      for (p = prefix; *p != '\0'; p++)
+      for (p = prefix; *p != L'\0'; p++)
 	{
-	  unsigned char pc = *p;
+	  wchar_t pc = *p;
 	  if (c != pc)
 	    return c;
 	  in_column++;
-	  c = getc (f);
+	  c = xgetwc (f);
 	}
       c = get_space (f, c);
     }
@@ -728,21 +769,21 @@
 /* Read blank characters from input file F, starting with C, and keeping
    in_column up-to-date.  Return first non-blank character.  */
 
-static int
-get_space (FILE *f, int c)
+static wint_t
+get_space (FILE *f, wint_t c)
 {
   for (;;)
     {
-      if (c == ' ')
+      if (c == L' ')
 	in_column++;
-      else if (c == '\t')
+      else if (c == L'\t')
 	{
 	  tabs = true;
 	  in_column = (in_column / TABWIDTH + 1) * TABWIDTH;
 	}
       else
 	return c;
-      c = getc (f);
+      c = xgetwc (f);
     }
 }
 
@@ -751,9 +792,9 @@
 static void
 check_punctuation (WORD *w)
 {
-  char const *start = w->text;
-  char const *finish = start + (w->length - 1);
-  unsigned char fin = *finish;
+  wchar_t const *start = w->text;
+  wchar_t const *finish = start + (w->length - 1);
+  wchar_t fin = *finish;
 
   w->paren = isopen (*start);
   w->punct = !! ispunct (fin);
@@ -777,7 +818,9 @@
 
   if (word_limit == word)
     {
-      fwrite (parabuf, sizeof *parabuf, wptr - parabuf, stdout);
+      wchar_t *outptr;
+      for (outptr = parabuf; outptr < wptr; outptr++)
+        putwchar (*outptr);
       wptr = parabuf;
       return;
     }
@@ -809,7 +852,8 @@
   /* Copy text of words down to start of parabuf -- we use memmove because
      the source and target may overlap.  */
 
-  memmove (parabuf, split_point->text, wptr - split_point->text);
+  memmove (parabuf, split_point->text,
+           (wptr - split_point->text) * sizeof (wchar_t));
   shift = split_point->text - parabuf;
   wptr -= shift;
 
@@ -833,53 +877,53 @@
 fmt_paragraph (void)
 {
   WORD *start, *w;
-  int len;
+  int wid;
   COST wcost, best;
-  int saved_length;
+  int saved_width;
 
   word_limit->best_cost = 0;
-  saved_length = word_limit->length;
-  word_limit->length = max_width;	/* sentinel */
+  saved_width = word_limit->width;
+  word_limit->width = max_width;	/* sentinel */
 
   for (start = word_limit - 1; start >= word; start--)
     {
       best = MAXCOST;
-      len = start == word ? first_indent : other_indent;
+      wid = start == word ? first_indent : other_indent;
 
       /* At least one word, however long, in the line.  */
 
       w = start;
-      len += w->length;
+      wid += w->width;
       do
 	{
 	  w++;
 
 	  /* Consider breaking before w.  */
 
-	  wcost = line_cost (w, len) + w->best_cost;
-	  if (start == word && last_line_length > 0)
-	    wcost += RAGGED_COST (len - last_line_length);
+	  wcost = line_cost (w, wid) + w->best_cost;
+	  if (start == word && last_line_width > 0)
+	    wcost += RAGGED_COST (wid - last_line_width);
 	  if (wcost < best)
 	    {
 	      best = wcost;
 	      start->next_break = w;
-	      start->line_length = len;
+	      start->line_width = wid;
 	    }
 
-	  /* This is a kludge to keep us from computing `len' as the
-	     sum of the sentinel length and some non-zero number.
-	     Since the sentinel w->length may be INT_MAX, adding
+	  /* This is a kludge to keep us from computing `wid' as the
+	     sum of the sentinel width and some non-zero number.
+	     Since the sentinel w->width may be INT_MAX, adding
 	     to that would give a negative result.  */
 	  if (w == word_limit)
 	    break;
 
-	  len += (w - 1)->space + w->length;	/* w > start >= word */
+	  wid += (w - 1)->space + w->width;	/* w > start >= word */
 	}
-      while (len < max_width);
+      while (wid < max_width);
       start->best_cost = best + base_cost (start);
     }
 
-  word_limit->length = saved_length;
+  word_limit->width = saved_width;
 }
 
 /* Return the constant component of the cost of breaking before the
@@ -904,33 +948,33 @@
       else if ((this - 1)->punct)
 	cost -= PUNCT_BONUS;
       else if (this > word + 1 && (this - 2)->final)
-	cost += WIDOW_COST ((this - 1)->length);
+	cost += WIDOW_COST ((this - 1)->width);
     }
 
   if (this->paren)
     cost -= PAREN_BONUS;
   else if (this->final)
-    cost += ORPHAN_COST (this->length);
+    cost += ORPHAN_COST (this->width);
 
   return cost;
 }
 
 /* Return the component of the cost of breaking before word NEXT that
-   depends on LEN, the length of the line beginning there.  */
+   depends on WID, the width of the line beginning there.  */
 
 static COST
-line_cost (WORD *next, int len)
+line_cost (WORD *next, int wid)
 {
   int n;
   COST cost;
 
   if (next == word_limit)
     return 0;
-  n = best_width - len;
+  n = best_width - wid;
   cost = SHORT_COST (n);
   if (next->next_break != word_limit)
     {
-      n = len - next->line_length;
+      n = wid - next->line_width;
       cost += RAGGED_COST (n);
     }
   return cost;
@@ -959,8 +1003,8 @@
 
   out_column = 0;
   put_space (prefix_indent);
-  fputs (prefix, stdout);
-  out_column += prefix_length;
+  fputws (prefix, stdout);
+  out_column += prefix_width;
   put_space (indent - out_column);
 
   endline = w->next_break - 1;
@@ -970,8 +1014,8 @@
       put_space (w->space);
     }
   put_word (w);
-  last_line_length = out_column;
-  putchar ('\n');
+  last_line_width = out_column;
+  putwchar (L'\n');
 }
 
 /* Output to stdout the word W.  */
@@ -979,13 +1023,13 @@
 static void
 put_word (WORD *w)
 {
-  const char *s;
+  const wchar_t *s;
   int n;
 
   s = w->text;
   for (n = w->length; n != 0; n--)
-    putchar (*s++);
-  out_column += w->length;
+    putwchar (*s++);
+  out_column += w->width;
 }
 
 /* Output to stdout SPACE spaces, or equivalent tabs.  */
@@ -1002,13 +1046,13 @@
       if (out_column + 1 < tab_target)
 	while (out_column < tab_target)
 	  {
-	    putchar ('\t');
+	    putwchar (L'\t');
 	    out_column = (out_column / TABWIDTH + 1) * TABWIDTH;
 	  }
     }
   while (out_column < space_target)
     {
-      putchar (' ');
+      putwchar (L' ');
       out_column++;
     }
 }
Commit	Line	Data
29623d34	1	--- coreutils-6.7/src/fmt.c.orig 2006-10-22 18:54:15.000000000 +0200
	2	+++ coreutils-6.7/src/fmt.c 2007-02-13 16:51:44.000000000 +0100
	3	@@ -18,6 +18,7 @@
	4	/* Written by Ross Paterson <rap@doc.ic.ac.uk>. */
	5
	6	#include <config.h>
	7	+#include <wchar.h>
	8	#include <stdio.h>
	9	#include <sys/types.h>
	10	#include <getopt.h>
	11	@@ -39,7 +40,7 @@
	12	/* The following parameters represent the program's idea of what is
	13	"best". Adjust to taste, subject to the caveats given. */
	14
	15	-/* Default longest permitted line length (max_width). */
	16	+/* Default longest permitted line width (max_width). */
	17	#define WIDTH 75
	18
	19	/* Prefer lines to be LEEWAY % shorter than the maximum width, giving
	20	@@ -51,7 +52,7 @@
	21	#define DEF_INDENT 3
	22
	23	/* Costs and bonuses are expressed as the equivalent departure from the
	24	- optimal line length, multiplied by 10. e.g. assigning something a
	25	+ optimal line width, multiplied by 10. e.g. assigning something a
	26	cost of 50 means that it is as bad as a line 5 characters too short
	27	or too long. The definition of SHORT_COST(n) should not be changed.
	28	However, EQUIV(n) may need tuning. */
	29	@@ -78,11 +79,11 @@
	30	#define LINE_COST EQUIV (70)
	31
	32	/* Cost of breaking a line after the first word of a sentence, where
	33	- the length of the word is N. */
	34	+ the width of the word is N. */
	35	#define WIDOW_COST(n) (EQUIV (200) / ((n) + 2))
	36
	37	/* Cost of breaking a line before the last word of a sentence, where
	38	- the length of the word is N. */
	39	+ the width of the word is N. */
	40	#define ORPHAN_COST(n) (EQUIV (150) / ((n) + 2))
	41
	42	/* Bonus for breaking a line at the end of a sentence. */
	43	@@ -114,11 +115,30 @@
	44	#define MAXWORDS 1000
	45	#define MAXCHARS 5000
	46
	47	+/* Wide character support */
	48	+
	49	+static wint_t
	50	+xgetwc (FILE *stream)
	51	+{
	52	+ wint_t c = getwc (stream);
	53	+ if (c == WEOF && ferror (stream))
	54	+ error (EXIT_FAILURE, errno, _("read error"));
	55	+ return c;
	56	+}
	57	+
	58	+static inline int
	59	+xwcwidth (wchar_t wc)
	60	+{
	61	+ int w = wcwidth (wc);
	62	+ return w < 0 ? 0 : w;
	63	+}
	64	+
65	/* Extra ctype(3)-style macros. */
66
67	-#define isopen(c) (strchr ("([`'\"", c) != NULL)
68	-#define isclose(c) (strchr (")]'\"", c) != NULL)
69	-#define isperiod(c) (strchr (".?!", c) != NULL)
70	+#define isopen(c) \
71	+ (wcschr (L"([`'\"\u2018\u201A\u201B\u201C\u201E\u201F", c) != NULL)
72	+#define isclose(c) (wcschr (L")]'\"\u2018\u2019\u201C\u201D", c) != NULL)
73	+#define isperiod(c) (wcschr (L".?!", c) != NULL)
74
75	/* Size of a tab stop, for expansion on input and re-introduction on
76	output. */
77	@@ -133,8 +153,9 @@
78
79	/* Static attributes determined during input. */
80
81	- const char text; / the text of the word */
82	- int length; /* length of this word */
83	+ const wchar_t text; / the text of the word */
84	+ int length; /* length of this word, in characters */
85	+ int width; /* width of this word, in columns */
86	int space; /* the size of the following space */
87	unsigned int paren:1; /* starts with open paren */
88	unsigned int period:1; /* ends in [.?!])* */
89	@@ -143,7 +164,7 @@
90
91	/* The remaining fields are computed during the optimization. */
92
93	- int line_length; /* length of the best line starting here */
94	+ int line_width; /* width of the best line starting here */
95	COST best_cost; /* cost of best paragraph starting here */
96	WORD next_break; / break which achieves best_cost */
97	};
98	@@ -153,16 +174,16 @@
99	static void set_prefix (char *p);
100	static void fmt (FILE *f);
101	static bool get_paragraph (FILE *f);
102	-static int get_line (FILE *f, int c);
103	-static int get_prefix (FILE *f);
104	-static int get_space (FILE *f, int c);
105	-static int copy_rest (FILE *f, int c);
106	-static bool same_para (int c);
107	+static wint_t get_line (FILE *f, wint_t c);
108	+static wint_t get_prefix (FILE *f);
109	+static wint_t get_space (FILE *f, wint_t c);
110	+static wint_t copy_rest (FILE *f, wint_t c);
111	+static bool same_para (wint_t c);
112	static void flush_paragraph (void);
113	static void fmt_paragraph (void);
114	static void check_punctuation (WORD *w);
115	static COST base_cost (WORD *this);
116	-static COST line_cost (WORD *next, int len);
117	+static COST line_cost (WORD *next, int wid);
118	static void put_paragraph (WORD *finish);
119	static void put_line (WORD *w, int indent);
120	static void put_word (WORD *w);
121	@@ -185,8 +206,11 @@
122	/* If true, don't preserve inter-word spacing (default false). */
123	static bool uniform;
124
125	+/* How many spaces to put after a sentence (1 or 2). */
126	+static int sentence_space;
127	+
128	/* Prefix minus leading and trailing spaces (default ""). */
129	-static const char *prefix;
130	+static wchar_t *prefix;
131
132	/* User-supplied maximum line width (default WIDTH). The only output
133	lines longer than this will each comprise a single word. */
134	@@ -194,14 +218,14 @@
135
136	/* Values derived from the option values. */
137
138	-/* The length of prefix minus leading space. */
139	-static int prefix_full_length;
140	+/* The width of prefix minus leading space. */
141	+static int prefix_full_width;
142
143	-/* The length of the leading space trimmed from the prefix. */
144	+/* The width of the leading space trimmed from the prefix. */
145	static int prefix_lead_space;
146
147	-/* The length of prefix minus leading and trailing space. */
148	-static int prefix_length;
149	+/* The width of prefix minus leading and trailing space. */
150	+static int prefix_width;
151
152	/* The preferred width of text lines, set to LEEWAY % less than max_width. */
153	static int best_width;
154	@@ -216,10 +240,10 @@
155
156	/* Space for the paragraph text -- longer paragraphs are handled neatly
157	(cf. flush_paragraph()). */
158	-static char parabuf[MAXCHARS];
159	+static wchar_t parabuf[MAXCHARS];
160
161	/* A pointer into parabuf, indicating the first unused character position. */
162	-static char *wptr;
163	+static wchar_t *wptr;
164
165	/* The words of a paragraph -- longer paragraphs are handled neatly
166	(cf. flush_paragraph()). */
167	@@ -251,16 +275,16 @@
168	prefix (next_prefix_indent). See get_paragraph() and copy_rest(). */
169
170	/* The last character read from the input file. */
171	-static int next_char;
172	+static wint_t next_char;
173
174	/* The space before the trimmed prefix (or part of it) on the next line
175	after the current paragraph. */
176	static int next_prefix_indent;
177
178	-/* If nonzero, the length of the last line output in the current
179	+/* If nonzero, the width of the last line output in the current
180	paragraph, used to charge for raggedness at the split point for long
181	paragraphs chosen by fmt_paragraph(). */
182	-static int last_line_length;
183	+static int last_line_width;
184
185	void
186	usage (int status)
187	@@ -289,6 +313,7 @@
188	fputs (_("\
189	-t, --tagged-paragraph indentation of first line different from second\n\
190	-u, --uniform-spacing one space between words, two after sentences\n\
191	+ -n, --single-spacing one space between words and after sentences\n\
192	-w, --width=WIDTH maximum line width (default of 75 columns)\n\
193	"), stdout);
194	fputs (HELP_OPTION_DESCRIPTION, stdout);
195	@@ -311,6 +336,7 @@
196	{"split-only", no_argument, NULL, 's'},
197	{"tagged-paragraph", no_argument, NULL, 't'},
198	{"uniform-spacing", no_argument, NULL, 'u'},
199	+ {"single-spacing", no_argument, NULL, 'n'},
200	{"width", required_argument, NULL, 'w'},
201	{GETOPT_HELP_OPTION_DECL},
202	{GETOPT_VERSION_OPTION_DECL},
203	@@ -334,8 +360,8 @@
204
205	crown = tagged = split = uniform = false;
206	max_width = WIDTH;
207	- prefix = "";
208	- prefix_length = prefix_lead_space = prefix_full_length = 0;
209	+ prefix = L"";
210	+ prefix_width = prefix_lead_space = prefix_full_width = 0;
211
212	if (argc > 1 && argv[1][0] == '-' && ISDIGIT (argv[1][1]))
213	{
214	@@ -348,7 +374,7 @@
215	argc--;
216	}
217
218	- while ((optchar = getopt_long (argc, argv, "0123456789cstuw:p:",
219	+ while ((optchar = getopt_long (argc, argv, "0123456789cstunw:p:",
220	long_options, NULL))
221	!= -1)
222	switch (optchar)
223	@@ -374,6 +400,12 @@
224
225	case 'u':
226	uniform = true;
227	+ sentence_space = 2;
228	+ break;
229	+
230	+ case 'n':
231	+ uniform = true;
232	+ sentence_space = 1;
233	break;
234
235	case 'w':
236	@@ -440,26 +472,32 @@
237	}
238
239	/* Trim space from the front and back of the string P, yielding the prefix,
240	- and record the lengths of the prefix and the space trimmed. */
241	+ and record the widths of the prefix and the space trimmed. */
242
243	static void
244	set_prefix (char *p)
245	{
246	- char *s;
247	+ size_t len;
248	+ wchar_t *s;
249
250	prefix_lead_space = 0;
251	- while (*p == ' ')
252	+ while (*p == L' ')
253	{
254	prefix_lead_space++;
255	p++;
256	}
257	- prefix = p;
258	- prefix_full_length = strlen (p);
259	- s = p + prefix_full_length;
260	- while (s > p && s[-1] == ' ')
261	- s--;
262	- *s = '\0';
263	- prefix_length = s - p;
264	+ len = mbsrtowcs (NULL, (const char **) &p, 0, NULL);
265	+ prefix = xmalloc (len * sizeof (wchar_t));
266	+ mbsrtowcs (prefix, (const char **) &p, len, NULL);
267	+ for (s = prefix; *s; s++)
268	+ prefix_full_width += xwcwidth (*s);
269	+ prefix_width = prefix_full_width;
270	+ while (s > prefix && s[-1] == L' ')
271	+ {
272	+ s--;
273	+ prefix_width--;
274	+ }
275	+ *s = L'\0';
276	}
277
278	/* read file F and send formatted output to stdout. */
279	@@ -528,24 +566,24 @@
280	static bool
281	get_paragraph (FILE *f)
282	{
283	- int c;
284	+ wint_t c;
285
286	- last_line_length = 0;
287	+ last_line_width = 0;
288	c = next_char;
289
290	/* Scan (and copy) blank lines, and lines not introduced by the prefix. */
291
292	- while (c == '\n' \|\| c == EOF
293	+ while (c == L'\n' \|\| c == WEOF
294	\|\| next_prefix_indent < prefix_lead_space
295	- \|\| in_column < next_prefix_indent + prefix_full_length)
296	+ \|\| in_column < next_prefix_indent + prefix_full_width)
297	{
298	c = copy_rest (f, c);
299	- if (c == EOF)
300	+ if (c == WEOF)
301	{
302	- next_char = EOF;
303	+ next_char = WEOF;
304	return false;
305	}
306	- putchar ('\n');
307	+ putwchar (L'\n');
308	c = get_prefix (f);
309	}
310
311	@@ -601,23 +639,23 @@
312	that failed to match the prefix. In the latter, C is \n or EOF.
313	Return the character (\n or EOF) ending the line. */
314
315	-static int
316	-copy_rest (FILE *f, int c)
317	+static wint_t
318	+copy_rest (FILE *f, wint_t c)
319	{
320	- const char *s;
321	+ const wchar_t *s;
322
323	out_column = 0;
324	- if (in_column > next_prefix_indent && c != '\n' && c != EOF)
325	+ if (in_column > next_prefix_indent && c != L'\n' && c != WEOF)
326	{
327	put_space (next_prefix_indent);
328	for (s = prefix; out_column != in_column && *s; out_column++)
329	- putchar (*s++);
330	+ putwchar (*s++);
331	put_space (in_column - out_column);
332	}
333	- while (c != '\n' && c != EOF)
334	+ while (c != L'\n' && c != WEOF)
335	{
336	- putchar (c);
337	- c = getc (f);
338	+ putwchar (c);
339	+ c = xgetwc (f);
340	}
341	return c;
342	}
343	@@ -627,11 +665,11 @@
344	otherwise false. */
345
346	static bool
347	-same_para (int c)
348	+same_para (wint_t c)
349	{
350	return (next_prefix_indent == prefix_indent
351	- && in_column >= next_prefix_indent + prefix_full_length
352	- && c != '\n' && c != EOF);
353	+ && in_column >= next_prefix_indent + prefix_full_width
354	+ && c != L'\n' && c != WEOF);
355	}
356
357	/* Read a line from input file F, given first non-blank character C
358	@@ -642,11 +680,11 @@
359
360	Return the first non-blank character of the next line. */
361
362	-static int
363	-get_line (FILE *f, int c)
364	+static wint_t
365	+get_line (FILE *f, wint_t c)
366	{
367	int start;
368	- char *end_of_parabuf;
369	+ wchar_t *end_of_parabuf;
370	WORD *end_of_word;
371
372	end_of_parabuf = &parabuf[MAXCHARS];
373	@@ -658,6 +696,7 @@
374	/* Scan word. */
375
376	word_limit->text = wptr;
377	+ word_limit->width = 0;
378	do
379	{
380	if (wptr == end_of_parabuf)
381	@@ -666,10 +705,12 @@
382	flush_paragraph ();
383	}
384	*wptr++ = c;
385	- c = getc (f);
386	+ word_limit->width += xwcwidth (c);
387	+ c = xgetwc (f);
388	}
389	- while (c != EOF && !isspace (c));
390	- in_column += word_limit->length = wptr - word_limit->text;
391	+ while (c != WEOF && !isspace (c));
392	+ word_limit->length = wptr - word_limit->text;
393	+ in_column += word_limit->width;
394	check_punctuation (word_limit);
395
396	/* Scan inter-word space. */
397	@@ -677,48 +718,48 @@
398	start = in_column;
399	c = get_space (f, c);
400	word_limit->space = in_column - start;
401	- word_limit->final = (c == EOF
402	+ word_limit->final = (c == WEOF
403	\|\| (word_limit->period
404	- && (c == '\n' \|\| word_limit->space > 1)));
405	- if (c == '\n' \|\| c == EOF \|\| uniform)
406	- word_limit->space = word_limit->final ? 2 : 1;
407	+ && (c == L'\n' \|\| word_limit->space > 1)));
408	+ if (c == L'\n' \|\| c == WEOF \|\| uniform)
409	+ word_limit->space = word_limit->final ? sentence_space : 1;
410	if (word_limit == end_of_word)
411	{
412	set_other_indent (true);
413	flush_paragraph ();
414	}
415	word_limit++;
416	- if (c == EOF)
417	- return EOF;
418	+ if (c == WEOF)
419	+ return WEOF;
420	}
421	- while (c != '\n');
422	+ while (c != L'\n');
423	return get_prefix (f);
424	}
425
426	/* Read a prefix from input file F. Return either first non-matching
427	character, or first non-blank character after the prefix. */
428
429	-static int
430	+static wint_t
431	get_prefix (FILE *f)
432	{
433	- int c;
434	+ wint_t c;
435
436	in_column = 0;
437	- c = get_space (f, getc (f));
438	- if (prefix_length == 0)
439	+ c = get_space (f, xgetwc (f));
440	+ if (prefix_width == 0)
441	next_prefix_indent = prefix_lead_space < in_column ?
442	prefix_lead_space : in_column;
443	else
444	{
445	- const char *p;
446	+ const wchar_t *p;
447	next_prefix_indent = in_column;
448	- for (p = prefix; *p != '\0'; p++)
449	+ for (p = prefix; *p != L'\0'; p++)
450	{
451	- unsigned char pc = *p;
452	+ wchar_t pc = *p;
453	if (c != pc)
454	return c;
455	in_column++;
456	- c = getc (f);
457	+ c = xgetwc (f);
458	}
459	c = get_space (f, c);
460	}
461	@@ -728,21 +769,21 @@
462	/* Read blank characters from input file F, starting with C, and keeping
463	in_column up-to-date. Return first non-blank character. */
464
465	-static int
466	-get_space (FILE *f, int c)
467	+static wint_t
468	+get_space (FILE *f, wint_t c)
469	{
470	for (;;)
471	{
472	- if (c == ' ')
473	+ if (c == L' ')
474	in_column++;
475	- else if (c == '\t')
476	+ else if (c == L'\t')
477	{
478	tabs = true;
479	in_column = (in_column / TABWIDTH + 1) * TABWIDTH;
480	}
481	else
482	return c;
483	- c = getc (f);
484	+ c = xgetwc (f);
485	}
486	}
487
488	@@ -751,9 +792,9 @@
489	static void
490	check_punctuation (WORD *w)
491	{
492	- char const *start = w->text;
493	- char const *finish = start + (w->length - 1);
494	- unsigned char fin = *finish;
495	+ wchar_t const *start = w->text;
496	+ wchar_t const *finish = start + (w->length - 1);
497	+ wchar_t fin = *finish;
498
499	w->paren = isopen (*start);
500	w->punct = !! ispunct (fin);
501	@@ -777,7 +818,9 @@
502
503	if (word_limit == word)
504	{
505	- fwrite (parabuf, sizeof *parabuf, wptr - parabuf, stdout);
506	+ wchar_t *outptr;
507	+ for (outptr = parabuf; outptr < wptr; outptr++)
508	+ putwchar (*outptr);
509	wptr = parabuf;
510	return;
511	}
512	@@ -809,7 +852,8 @@
513	/* Copy text of words down to start of parabuf -- we use memmove because
514	the source and target may overlap. */
515
516	- memmove (parabuf, split_point->text, wptr - split_point->text);
517	+ memmove (parabuf, split_point->text,
518	+ (wptr - split_point->text) * sizeof (wchar_t));
519	shift = split_point->text - parabuf;
520	wptr -= shift;
521
522	@@ -833,53 +877,53 @@
523	fmt_paragraph (void)
524	{
525	WORD start, w;
526	- int len;
527	+ int wid;
528	COST wcost, best;
529	- int saved_length;
530	+ int saved_width;
531
532	word_limit->best_cost = 0;
533	- saved_length = word_limit->length;
534	- word_limit->length = max_width; /* sentinel */
535	+ saved_width = word_limit->width;
536	+ word_limit->width = max_width; /* sentinel */
537
538	for (start = word_limit - 1; start >= word; start--)
539	{
540	best = MAXCOST;
541	- len = start == word ? first_indent : other_indent;
542	+ wid = start == word ? first_indent : other_indent;
543
544	/* At least one word, however long, in the line. */
545
546	w = start;
547	- len += w->length;
548	+ wid += w->width;
549	do
550	{
551	w++;
552
553	/* Consider breaking before w. */
554
555	- wcost = line_cost (w, len) + w->best_cost;
556	- if (start == word && last_line_length > 0)
557	- wcost += RAGGED_COST (len - last_line_length);
558	+ wcost = line_cost (w, wid) + w->best_cost;
559	+ if (start == word && last_line_width > 0)
560	+ wcost += RAGGED_COST (wid - last_line_width);
561	if (wcost < best)
562	{
563	best = wcost;
564	start->next_break = w;
565	- start->line_length = len;
566	+ start->line_width = wid;
567	}
568
569	- /* This is a kludge to keep us from computing `len' as the
570	- sum of the sentinel length and some non-zero number.
571	- Since the sentinel w->length may be INT_MAX, adding
572	+ /* This is a kludge to keep us from computing `wid' as the
573	+ sum of the sentinel width and some non-zero number.
574	+ Since the sentinel w->width may be INT_MAX, adding
575	to that would give a negative result. */
576	if (w == word_limit)
577	break;
578
579	- len += (w - 1)->space + w->length; /* w > start >= word */
580	+ wid += (w - 1)->space + w->width; /* w > start >= word */
581	}
582	- while (len < max_width);
583	+ while (wid < max_width);
584	start->best_cost = best + base_cost (start);
585	}
586
587	- word_limit->length = saved_length;
588	+ word_limit->width = saved_width;
589	}
590
591	/* Return the constant component of the cost of breaking before the
592	@@ -904,33 +948,33 @@
593	else if ((this - 1)->punct)
594	cost -= PUNCT_BONUS;
595	else if (this > word + 1 && (this - 2)->final)
596	- cost += WIDOW_COST ((this - 1)->length);
597	+ cost += WIDOW_COST ((this - 1)->width);
598	}
599
600	if (this->paren)
601	cost -= PAREN_BONUS;
602	else if (this->final)
603	- cost += ORPHAN_COST (this->length);
604	+ cost += ORPHAN_COST (this->width);
605
606	return cost;
607	}
608
609	/* Return the component of the cost of breaking before word NEXT that
610	- depends on LEN, the length of the line beginning there. */
611	+ depends on WID, the width of the line beginning there. */
612
613	static COST
614	-line_cost (WORD *next, int len)
615	+line_cost (WORD *next, int wid)
616	{
617	int n;
618	COST cost;
619
620	if (next == word_limit)
621	return 0;
622	- n = best_width - len;
623	+ n = best_width - wid;
624	cost = SHORT_COST (n);
625	if (next->next_break != word_limit)
626	{
627	- n = len - next->line_length;
628	+ n = wid - next->line_width;
629	cost += RAGGED_COST (n);
630	}
631	return cost;
632	@@ -959,8 +1003,8 @@
633
634	out_column = 0;
635	put_space (prefix_indent);
636	- fputs (prefix, stdout);
637	- out_column += prefix_length;
638	+ fputws (prefix, stdout);
639	+ out_column += prefix_width;
640	put_space (indent - out_column);
641
642	endline = w->next_break - 1;
643	@@ -970,8 +1014,8 @@
644	put_space (w->space);
645	}
646	put_word (w);
647	- last_line_length = out_column;
648	- putchar ('\n');
649	+ last_line_width = out_column;
650	+ putwchar (L'\n');
651	}
652
653	/* Output to stdout the word W. */
654	@@ -979,13 +1023,13 @@
655	static void
656	put_word (WORD *w)
657	{
658	- const char *s;
659	+ const wchar_t *s;
660	int n;
661
662	s = w->text;
663	for (n = w->length; n != 0; n--)
664	- putchar (*s++);
665	- out_column += w->length;
666	+ putwchar (*s++);
667	+ out_column += w->width;
668	}
669
670	/* Output to stdout SPACE spaces, or equivalent tabs. */
671	@@ -1002,13 +1046,13 @@
672	if (out_column + 1 < tab_target)
673	while (out_column < tab_target)
674	{
675	- putchar ('\t');
676	+ putwchar (L'\t');
677	out_column = (out_column / TABWIDTH + 1) * TABWIDTH;
678	}
679	}
680	while (out_column < space_target)
681	{
682	- putchar (' ');
683	+ putwchar (L' ');
684	out_column++;
685	}
686	}