diff -urN pine4.61.orig/doc/iconv.txt pine4.61/doc/iconv.txt --- pine4.61.orig/doc/iconv.txt 1970-01-01 01:00:00.000000000 +0100 +++ pine4.61/doc/iconv.txt 2004-07-21 01:53:21.386532584 +0200 @@ -0,0 +1,41 @@ +Relaease Notes for pine iconv/UTF-8 patch version 9d - Feb 04 2004 +------------------------------------------------------------------ + +Currently Known Bugs: + +- The message header editor wraps long lines into multible lines. + FormatLines is the function which does this and it's not aware + of UTF-8 yet, so it might cut lines in the middle of an UTF-8 + character sequence, thereby making the sequence invalid. + +Note: + +- The special iso-2022-jp-conversions feature is not directly supported + in this patch, but you can mimic it by changing the character-set + variable to shift-jis or euc-jp, depending on your need. + +If you find more bugs or have an improvement for the patch, send me +the message which triggers the bug together with a description of how +to reproduce and what to expect and if you send a patch how it fixes it. + +Bernhard Kaindl +http://www.suse.de/~bk/pine/iconv/ + +PS: + +Example charset aliases(recommended): + +charset-aliases=iso-8859-12:windows-1250, + iso-8859-2:windows-1252, + iso-8859-11:windows-874, + iso-8859-8-i:iso-8859-8, + tis-620:windows-874, + gb2312:gb18030, + gbk:gb18030, + euc-cn:gb18030, + ks_c_5601-1987:x-windows-949, + 5601:x-windows-949 + +iconv-aliases=x-windows-949:mscp949, + euc-kr:mscp949 + diff -urN pine4.61.orig/doc/tech-notes/config.html pine4.61/doc/tech-notes/config.html --- pine4.61.orig/doc/tech-notes/config.html 2004-07-15 19:38:11.000000000 +0200 +++ pine4.61/doc/tech-notes/config.html 2004-07-21 01:53:21.408529240 +0200 @@ -285,12 +285,14 @@
character-set -
This sets the character set used by the terminal. Currently -appropriate values are US-ASCII, -ISO-8859-1 through ISO-8859-9 and -ISO-2022-JP. See the section on -International Character Sets for more -details. The default is US-ASCII.

+

This sets the character set used by the terminal and which is used +to tag messages which are sent. Example values are US-ASCII, +ISO-8859-1 through ISO-8859-9 and ISO-2022-JP. +There is no default, but an unset charset behaves more or less like +US-ASCII. The correct value should be set for example when +composing mail using pine which does not only contain US-ASCII +characters. For a complete list and more details see the section on +International Character Sets.

color-style diff -urN pine4.61.orig/doc/tech-notes/low-level.html pine4.61/doc/tech-notes/low-level.html --- pine4.61.orig/doc/tech-notes/low-level.html 2002-11-01 20:35:48.000000000 +0100 +++ pine4.61/doc/tech-notes/low-level.html 2004-07-21 01:53:21.472519512 +0200 @@ -708,14 +708,23 @@ changed in the personal or system-wide configuration file with the variable character-set.

-When reading incoming email, Pine allows all character sets to pass -through. Pine doesn't actually display the characters but -simply passes them through; -it is up to the actual display device to show the characters correctly. -When composing email, Pine will accept input in any language -and tag the message according to the character-set variable. -Again, it is up to the input device to generate the correct sequences for -the character set being used.

+When reading incoming email, Pine can convert many character sets +to the current character-set, if the variable is set. +If it is not set, it simply passes them through and displays a notice about +the charset of the message. Then it depends on the charset of the actual +display device if the characters are displayed correctly. + +If the display device supports UTF-8 as it's character set and your system +is able to provide the iconv() library call, much more can be done, see the +section on UTF-8 UTF-8 for details. + +When composing email, Pine tags the message according to the +character-set variable. If pine supports the iconv() library +call, the variable send-charset is available and will try to +convert the message text to this character-set on message compose, reply +and forward before sending and saving the message to the Fcc folder and +before saving to the postponed-messages folder or other storage when +message send is aborted or postponed. With the exception of UNICODE-1-1-UTF-7, the outgoing message is checked to see if it is all US-ASCII text (and contains no escape characters). In @@ -766,15 +775,12 @@

-Earlier versions of Pine made use of the character set tags associated -with text in MIME to decide if the text should be displayed or not. -Depending on the character set tag and the character-set variable -in Pine, the text was either displayed as is, displayed with some -characters filtered out, or not displayed at all. The current version -uses a much simpler algorithm in order to maximize the chance that useful -contents are readable by the user. It simply displays -all messages of type text and makes no attempt to filter -out characters that may be in the wrong character set. If the text is +If your system supports iconv() and pine is compiled with support for +iconv(), pine supports the character sets provided by iconv on your +system. This will likely be a much larger list and you can likely +check it with the command iconv -l.

+ +If the text is tagged as something other than US-ASCII and the tag does not match the character set that the character-set variable is set to, then an attempt is made to convert the incoming characters into the character @@ -797,23 +803,43 @@ The feature disable-2022-jp-conversions may be used to turn off this automatic conversion. +If pine is compiled with support for iconv(), the above special-case code +is disabled and this variable is ignored. Instead set the variable +character-set to either +Shift-JIS or ISO-2022-JP, depending in your display device. In a future +version of the iconv patch, a backwards compatibility mode to mimick this +special-case code could be provided.

-There used to be a facility in PC-Pine to map between various -DOS Code Pages and standard character sets. -That facility, including the CP_TO_ISO and ISO_TO_CP hooks, was broken -for several versions of Pine and is now entirely removed. -We hope that the character set conversion that happens automatically -before displaying a message will handle most of what the Code Page code used -to do. -For example, a Russian user would probably set the -character-set variable in the PINERC file to -the value KOI8-R. -That would cause outgoing messages to be labeled as KOI8-R. -If an incoming message was labeled with the Windows-1251 character set, -Pine should be able to convert the Windows-1251 characters to KOI8-R before -sending them to the display. -

+


+ +

Using UTF-8 as characer-set for the display device

+ +Using the Unicode encoding UTF-8 as characer-set for the display device +allows to display characters from almost all encodings on all display +devices which support UTF-8 as encoding for Unicode.

+ +If the pine is compiled with support for iconv(), pine can convert mail +from all encodings supported by the iconv() implementation on your system +to UTF-8 which your display will be able to display if the font used on +your display device provides glyphs for these Unicode code points.

+ +The Unix terminal display code provides support for using UTF-8 as +character-set, so if you +are able to use Unix terminal like the xterm provided by recent versions +of XFree86 and it has an appropriate Unicode font available on it's display, +changing your locale to a locale which supports UTF-8, like de_DE.UTF-8 +and running the xterm for pine with this charset should enable it's UTF-8 +mode. If you are able to use mlterm instead of xterm, you'll +be even able to see Hebrew in right-to-left display mode. + +Recent Linux distributions provide all the above out of the box you can +select UTF-8 for your Locale configuration during installation and pine +will automatically compile with iconv() enabled.

+ +More on UTF-8 and iconv() can possibly be found at: + +http://www.suse.de/~bk/pine/iconv/.


diff -urN pine4.61.orig/pico/composer.c pine4.61/pico/composer.c --- pine4.61.orig/pico/composer.c 2004-07-21 01:52:33.000000000 +0200 +++ pine4.61/pico/composer.c 2004-07-21 01:53:21.486517384 +0200 @@ -344,7 +344,188 @@ return(TRUE); } +/* + * check_utf8 - check for UTF-8 bytes + * Takes two arguments: + * char *c - a byte of the stream + * char *utf_seq - a status array holding the function's state + * utf_seq must be provided by the caller this way: + * (static) char utf_seq[7] = ""; (content must be retained over calls) + * and must be initialized at start using: utf_seq[0] = 0; + * + * Returns NULL if an UTF-8 sequence has been started and is not completed. + * If an UTF-8 sequence is complete, it returns a pointer to a static string + * which is valid until the next use of the function. + * If the character is a double width character, a space(' ') is prepended + * to the returned string. + * If a character < 128 is passed, the UTF-8 state in utf_seq[] is cleared, + * because a valid UTF-8 sequence only consists of bytes >= 0x80. The pointer + * returned points to the address of the passed character to indicate this. + * Bugs: Only supports UTF-8 seqencies up to 4 bytes. + * Todo: Instead of passing a pointer to the char and comparing the returned + * pointer to this address afterwards, the Interface could be changed + * to just pass the character as simple char(thus not requesting the + * address of a variable which might be declared as register) and replace + * the check of the return value with a check of (c & 0x80) and if this + * is not the case, assuming that (utf_seq[0] == 0) means that this last + * non-ASCII byte completed the UTF-8 sequence, while having + * utf_seq[0] != 0 means having an incomplete UTF-8 sequence. + */ +char * +check_utf8(c, utf_seq) + char *c; + char *utf_seq; +{ + static char char_string[8]; /* (6 * UTF-8 + ' ' * '\0') */ + int ix; + unsigned char dbl_wide[7][2][4] = {0xe1,0x84,0x80,0x00, 0xe1,0x85,0x9F,0x00, + 0xe2,0x8c,0xa9,0x00, 0xe2,0x8c,0xaa,0x00, + 0xe2,0xba,0x80,0x00, 0xed,0x9e,0xa3,0x00, + 0xef,0xa4,0x80,0x00, 0xef,0xa9,0xaa,0x00, + 0xef,0xb8,0xb0,0x00, 0xef,0xb9,0xa8,0x00, + 0xef,0xbc,0x81,0x00, 0xef,0xbd,0xad,0x00, + 0xef,0xbf,0xa0,0x00, 0xef,0xbf,0xa6,0x00 + }; + if (*c & 0x80) { + char_string[0] = *c; + char_string[1] = 0; + if (strlen(utf_seq) == sizeof(utf_seq) - 1) + utf_seq[0] = 0; /* no overflow on long UTF-8 sequence */ + if ((*c & 0xF0) >= 0xC0) { + strncpy(utf_seq, char_string, sizeof(utf_seq)); + return NULL; /* maybe UTF-8 sequence, need next byte */ + } else if (utf_seq[0]) { + strncat(utf_seq, char_string, sizeof(utf_seq)); //add on to string + switch (utf_seq[0] & 0xF0) { + case 0xC0 : + case 0xD0 : + strncpy(char_string, utf_seq, sizeof(char_string)); + utf_seq[0] = 0; // this sequence is over, clear for restart + return char_string; // process this UTF-8 char... + case 0xE0 : + if (strlen(utf_seq) < 3) + return NULL; // 3-byte UTF-8, need next byte + char_string[0] = '\0'; // init + for (ix = 0; ix < 7; ix++) + if (strcmp(utf_seq, &dbl_wide[ix][0][0]) >= 0 + && strcmp(utf_seq, &dbl_wide[ix][1][0]) <= 0) { + char_string[0] = ' '; // dbl width UTF-8 char + break; + } + strncat(char_string, utf_seq, sizeof(char_string)); + utf_seq[0] = 0; // this sequence is over, clear for restart + return char_string; // process this UTF-8 char... + case 0xF0 : + if (strlen(utf_seq) < 4) + return NULL; // 4-byte UTF-8, need next byte + char_string[0] = '\0'; // init + if ((utf_seq[1] & 0xF0) == 0xA0) + char_string[0] = ' '; // double width UTF-8 char + strncat(char_string, utf_seq, sizeof(char_string)); + utf_seq[0] = 0; // this sequence is over, clear for restart + return char_string; // process this UTF-8 char... + } + } + } + utf_seq[0] = 0; // init in case there was an invalid UTF-8 sequence + return c; // single-byte NON-UTF-8 char, process it as usual. +} + +/* + * wrapper of check_utf8 for pico, if not in UTF-8 mode, do not check UTF-8 + */ +char * +pico_check_utf8(c, utf_seq) + char *c; + char *utf_seq; +{ + if(!(Pmaster->pine_flags & P_UTF8)) + return c; + return check_utf8(c, utf_seq); +} + +/* + * Get the number of columns which are filled by the text in the current + * line of LineEdit(from the start of the line to the current position) + */ +static int +count_screencols(void) +{ + char utf_seq[7] = "", *cp, *r; + int seq = 0, w = 0; + + for(cp = ods.cur_l->text; *cp && cp < ods.cur_l->text + ods.p_off; + cp++) { + if (!(r = pico_check_utf8(cp, utf_seq))) { + seq = 1; + continue; + } + if (seq) + w++; + seq = 0; + if (r == cp) + w++; + else if (*r == ' ') + w++; + } + return w; +} +/* + * Get the offset in screen positions which must be subsctracted from the + * byte count in the LineEdit line in order to reach the line position on + * screen(because of double wide characters and multible UTF-8 bytes) + */ +static int +offset_on_screen(void) +{ + return ods.p_off - count_screencols(); +} + +/* + * Move current position in LineEdit one character left, return the number + * of byte positons which were neccesary to jump left in order to + * arrive at the start of the previous multibyte character(UTF-8). + */ +static int +LineEditCharLeft() +{ + int col_right = ods.p_off, cols = count_screencols(); + + do + if (--ods.p_off < 0) + break; + while (count_screencols() - cols == -1); + + ods.p_off++; + + if (col_right - ods.p_off > 0) + return col_right - ods.p_off; + + do + if (--ods.p_off < 0) + break; + while (count_screencols() - cols == -2); + + ods.p_off++; + + return col_right - ods.p_off; +} + +/* + * Move current position in LineEdit one character right, if UTF-8 + * mode is active, the ods.p_off is assumed to be at the start of + * a UTF-8 sequence or at a normal ASCII character. It is moved to + * the next character, jumping past the end of the current UTF-8 + * sequence, if UTF8 mode is active. + */ +static void +LineEditCharRight() +{ + char utf_seq[7] = ""; + while(ods.p_off < ods.p_len && ods.cur_l->text[ods.p_off] && + !pico_check_utf8(ods.cur_l->text + ods.p_off++, utf_seq)); +} /* * ResizeHeader - Handle resizing display when SIGWINCH received. @@ -397,7 +578,7 @@ PaintBody(0); if(ComposerEditing) - movecursor(ods.p_line, ods.p_off+headents[ods.cur_e].prlen); + HeaderPaintCursor(); (*term.t_flush)(); return(TRUE); @@ -1584,6 +1765,7 @@ int skipmove = 0; char *strng; int last_key; /* last keystroke */ + unsigned char utf_seq[7] = ""; strng = ods.cur_l->text; /* initialize offsets */ ods.p_len = strlen(strng); @@ -1666,7 +1848,7 @@ } clearcursor(); - movecursor(ods.p_line, ods.p_off+headents[ods.cur_e].prlen); + HeaderPaintCursor(); if(ch == NODATA) /* GetKey timed out */ continue; @@ -1676,7 +1858,7 @@ if(mpresf){ /* blast old messages */ if(mpresf++ > NMMESSDELAY){ /* every few keystrokes */ mlerase(); - movecursor(ods.p_line, ods.p_off+headents[ods.cur_e].prlen); + HeaderPaintCursor(); } } @@ -1722,12 +1904,38 @@ /* * then find out where things fit... + * + * For UTF-8, the < LINELEN check should need to do it's + * calculation based on count_screencols() plus the width + * of the new char as provided by pico_check_utf8. + * The buffer size may need to be increased for this. */ if(ods.p_len < LINELEN()){ CELL c; - c.c = ch; c.a = 0; + if(Pmaster->pine_flags & P_UTF8) { + char * chp = pico_check_utf8(&ch, utf_seq); + if (chp == NULL) + continue; /* on to the next! */ + if (chp != (char *)&ch && *chp == ' ') + chp++; + if (*chp & 0x80) { + while (*chp && ods.p_len < LINELEN()) { + c.c = *chp++; + pinsert(c); /* add char to str */ + } + /* to update the display line */ + PaintHeader(ods.p_line, FALSE); + /* If end char was inserted, set physical .. */ + if (ods.p_off == ods.p_len) + /* cursor pos on next movecursor_offset: */ + movecursor_offset(-1, 0, 0); + continue; /* on to the next! */ + } + } + + c.c = ch; if(pinsert(c)){ /* add char to str */ skipmove++; /* must'a been optimal */ continue; /* on to the next! */ @@ -1764,6 +1972,7 @@ } } else { /* interpret ch as a command */ + utf_seq[0] = '\0'; switch (ch = normalize_cmd(ch, ckm, 2)) { case (CTRL|'\\') : if (ch = GetAccent()) @@ -1856,9 +2065,7 @@ case (CTRL|'F') : case KEY_RIGHT: /* move character right */ if(ods.p_off < ods.p_len){ - pputc(pscr(ods.p_line, - (ods.p_off++)+headents[ods.cur_e].prlen)->c,0); - skipmove++; + LineEditCharRight(); continue; } else if(gmode & MDHDRONLY) @@ -1870,7 +2077,7 @@ case (CTRL|'B') : case KEY_LEFT : /* move character left */ if(ods.p_off > 0){ - ods.p_off--; + LineEditCharLeft(); continue; } if(ods.p_line != COMPOSER_TOP_LINE) @@ -1905,7 +2112,8 @@ continue; } - pputc(strng[ods.p_off++], 0); /* drop through and rubout */ + LineEditCharRight(); /* jump to next char */ + /* and fall thru */ case DEL : /* blast previous char */ case (CTRL|'H') : @@ -1919,20 +2127,27 @@ continue; } - if(ods.p_off > 0){ /* just shift left one char */ - ods.p_len--; + if(ods.p_off > 0){ /* shift left one char */ + int todelete = LineEditCharLeft(); + + ods.p_len -= todelete; + headents[ods.cur_e].dirty = 1; if(ods.p_len == 0) headents[ods.cur_e].sticky = 0; else headents[ods.cur_e].sticky = 1; - tbufp = &strng[--ods.p_off]; - while(*tbufp++ != '\0') - tbufp[-1] = *tbufp; tbufp = &strng[ods.p_off]; + + while(*tbufp++ != '\0') + tbufp[-1] = tbufp[todelete-1]; + if(pdel()) /* physical screen delete */ skipmove++; /* must'a been optimal */ + + /* needed if pine bgcolor != terminal background color */ + PaintHeader(ods.p_line, TRUE); } else{ /* may have work to do */ if(ods.cur_l->prev == NULL){ @@ -1943,18 +2158,16 @@ ods.p_line--; ods.cur_l = ods.cur_l->prev; strng = ods.cur_l->text; - if((i=strlen(strng)) > 0){ - strng[i-1] = '\0'; /* erase the character */ - ods.p_off = i-1; + if((ods.p_off=strlen(strng)) > 0){ + ods.p_off -= LineEditCharLeft() - 1; + strng[ods.p_off] = '\0'; /* erase the character */ } - else{ + else headents[ods.cur_e].sticky = 0; - ods.p_off = 0; - } - - tbufp = &strng[ods.p_off]; } + tbufp = &strng[ods.p_off]; + if((status = FormatLines(ods.cur_l, "", LINELEN(), headents[ods.cur_e].break_on_comma,0))==-1){ (*term.t_beep)(); @@ -1979,7 +2192,7 @@ PaintBody(1); } - movecursor(ods.p_line, ods.p_off+headents[ods.cur_e].prlen); + HeaderPaintCursor(); if(skipmove) continue; @@ -2004,7 +2217,8 @@ void HeaderPaintCursor() { - movecursor(ods.p_line, ods.p_off+headents[ods.cur_e].prlen); + movecursor_offset(ods.p_line, ods.p_off + headents[ods.cur_e].prlen, + offset_on_screen()); } diff -urN pine4.61.orig/pico/display.c pine4.61/pico/display.c --- pine4.61.orig/pico/display.c 2004-07-21 01:52:33.000000000 +0200 +++ pine4.61/pico/display.c 2004-07-21 01:53:21.490516776 +0200 @@ -1089,7 +1089,22 @@ } } +void +movecursor_offset(row, col, offs) +int row, col, offs; +{ + static int force_next = 0; + if(row == -1) { + force_next = row; + return; + } + if(row!=ttrow || col!=ttcol || force_next) { + (*term.t_move)(row, col - offs); + ttrow = row; + ttcol = col; + } +} /* * Send a command to the terminal to move the hardware cursor to row "row" diff -urN pine4.61.orig/pico/efunc.h pine4.61/pico/efunc.h --- pine4.61.orig/pico/efunc.h 2004-07-21 01:52:33.000000000 +0200 +++ pine4.61/pico/efunc.h 2004-07-21 01:53:21.493516320 +0200 @@ -118,6 +118,7 @@ extern VARS_TO_SAVE *save_pico_state PROTO((void)); extern void restore_pico_state PROTO((VARS_TO_SAVE *)); extern void free_pico_state PROTO((VARS_TO_SAVE *)); +extern char *check_utf8 PROTO((char *, char *)); extern void HeaderPaintCursor PROTO((void)); extern void PaintBody PROTO((int)); diff -urN pine4.61.orig/pico/pico.h pine4.61/pico/pico.h --- pine4.61.orig/pico/pico.h 2004-07-21 01:52:33.000000000 +0200 +++ pine4.61/pico/pico.h 2004-07-21 01:53:21.495516016 +0200 @@ -373,7 +373,7 @@ #define P_HICTRL 0x80000000 /* overwrite mode */ #define P_CHKPTNOW 0x40000000 /* do the checkpoint on entry */ #define P_DELRUBS 0x20000000 /* map ^H to forwdel */ -#define P_LOCALLF 0x10000000 /* use local vs. NVT EOL */ +#define P_UTF8 0x10000000 /* UTF-8 mode */ #define P_BODY 0x08000000 /* start composer in body */ #define P_HEADEND 0x04000000 /* start composer at end of header */ #define P_VIEW MDVIEW /* read-only */ diff -urN pine4.61.orig/pine/filter.c pine4.61/pine/filter.c --- pine4.61.orig/pine/filter.c 2004-07-21 01:52:33.000000000 +0200 +++ pine4.61/pine/filter.c 2004-07-21 03:05:27.500863240 +0200 @@ -65,6 +65,9 @@ #include "headers.h" +#ifdef HAVE_ICONV +#include +#endif /* @@ -708,7 +711,7 @@ #define FL_STF 16 #define FL_SIG 17 #define STOP_DECODING 18 - +#define UTF8 19 /* @@ -738,6 +741,13 @@ unsigned char *GF_IP_INIT(FO); \ unsigned char *GF_EIB_INIT(FO); +#ifdef HAVE_ICONV +#define GF_ICINIT(I, O) unsigned char *GF_OP_INIT(I); \ + unsigned char *GF_EOB_INIT(I); \ + unsigned char *GF_IP_INIT(O); \ + unsigned char *GF_EIB_INIT(O); +#endif + #define GF_CH_RESET(F) (op = eob = GF_QUE_START(F), \ (F)->queueout = (F)->queuein = 0) @@ -2274,6 +2284,110 @@ } } +#ifdef HAVE_ICONV +/* + * This filter converts the input buffer in the MIME charset of + * a message, for example) to another (the user's display charset) + * using iconv(3), POSIX/Single Unix Standard API. + */ +void +gf_convert_utf8_charset(f, flg) + FILTER_S *f; + int flg; +{ + static iconv_t iconv_desc; + static int einval_inbytesleft; + GF_ICINIT(f, f->next); + + switch (flg) { + case GF_DATA: { + size_t conv, inbytesleft = eob - op, outbytesleft = eib - ip; + /* + * If einval_inbytesleft is set, iconv() encountered an incomplete + * multibyte sequence and we asked for more input. In case the number + * of chars left to convert did not change, we should be at the end + * of input and we have an incomplete multibyte sequence at the end + * end of input. We only mark this and ignore the incomplete data. + */ + if (inbytesleft == einval_inbytesleft) { + char *einval_error = "[invalid multibyte seq at end of input]"; + dprint(8,(debugfile, "inval multibyte seq at end of input\n")); + for (;*einval_error;einval_error++) + GF_PUTC(f->next, *einval_error); + GF_FLUSH(f->next); + op = eob; /* throw the remaing unusable bytes away */ + GF_CH_RESET(f); + break; + } + while (1) { + if (!outbytesleft || !inbytesleft) { + GF_FLUSH(f->next); + outbytesleft = eib - ip; + } + if (!inbytesleft) { + GF_CH_RESET(f); + break; + } + einval_inbytesleft = -1; + conv = iconv(iconv_desc, (char **)&op, &inbytesleft, + (char **)&ip, &outbytesleft); + if (conv != (size_t) (-1)) { /* iconv succeeded */ + dprint(9,(debugfile, "irres. conv. count: %d, il: %d, ol: %d\n", + conv, inbytesleft, outbytesleft)); + /* iconv failed. check errno */ + } else if (errno == E2BIG){ + dprint(9,(debugfile, "e2big: outbytesleft=%d\n", outbytesleft)); + outbytesleft = 0; + } else if (errno == EILSEQ){ + char hexout[3]; + dprint(9,(debugfile, "eilseq: ill.octet=0x%02x, il=%d, ol=%d\n", + *op, inbytesleft, outbytesleft)); + sprintf(hexout, "%2x", *op++); + inbytesleft--; + GF_PUTC(f->next, '['); + GF_PUTC(f->next, hexout[0]); + GF_PUTC(f->next, hexout[1]); + GF_PUTC(f->next, ']'); + outbytesleft = eib - ip; + iconv(iconv_desc, NULL, NULL, NULL, NULL); + } else if (errno == EINVAL){ + /* + * We have to return from this function now because our input + * buffer contains an incomplete multibyte character which we + * can't complete without the next bytes of input. + */ + dprint(9,(debugfile, + "einval: %d, ol: %d, incomplete input: 0x%02x\n", + inbytesleft, outbytesleft, (unsigned char) *op)); + /* + * Before we abort here, we need to flush already converted + * output to the filter chain, otherwise we may loose this + * already converted content. + */ + GF_FLUSH(f->next); + /* + * In case we are at the end of all input, and we have + * an incomplete multibyte sequence left, we must find + * a way to not fall into a loop, remember the bytes left: + */ + einval_inbytesleft = inbytesleft; + break; /* Take the straigt way out now */ + } /* errno check */ + } /* while (1) */ + GF_END(f, f->next); + break; + } /* GF_DATA */ + case GF_RESET: + iconv_desc = (iconv_t)(f->opt); + iconv(iconv_desc, NULL, NULL, NULL, NULL); + einval_inbytesleft = -1; + break; + case GF_EOD: + GF_FLUSH(f->next); + (*f->next->f)(f->next, GF_EOD); + } /* switch (flg) */ +} +#else /* * This filter converts characters in UTF-8 to an 8-bit or 16-bit charset. @@ -2377,6 +2491,7 @@ f->n = 0L; } } +#endif /* @@ -2904,6 +3019,7 @@ unsigned strip:1; /* Hilite TAGs allowed */ unsigned handles_loc:1; /* Local handles requested? */ unsigned outputted:1; /* any */ + unsigned nowrap:1; /* Do not wrap, leave it to others */ } HTML_OPT_S; @@ -2915,6 +3031,7 @@ #define HTML_WROTE(X) (HD(X)->wrote) #define HTML_BASE(X) ((X)->opt ? ((HTML_OPT_S *)(X)->opt)->base : NULL) #define STRIP(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->strip) +#define NOWRAP(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->nowrap) #define HANDLESP(X) (((HTML_OPT_S *)(X)->opt)->handlesp) #define DO_HANDLES(X) ((X)->opt && HANDLESP(X)) #define HANDLES_LOC(X) ((X)->opt && ((HTML_OPT_S *)(X)->opt)->handles_loc) @@ -4642,8 +4759,6 @@ CENTER_BIT(hd->html_data) = 0; hd->y = html_indent(hd->html_data, 0, HTML_ID_SET); - hd->z = HD(hd->html_data)->wrapcol; - HD(hd->html_data)->wrapcol = WRAP_COLS(hd->html_data) - 8; html_blank(hd->html_data, 1); } else if(cmd == GF_EOD){ @@ -4656,7 +4771,6 @@ html_indent(hd->html_data, hd->y, HTML_ID_SET); html_blank(hd->html_data, 1); CENTER_BIT(hd->html_data) = (hd->x & HTML_HX_CENTER) != 0; - HD(hd->html_data)->wrapcol = hd->z; } return(1); /* get linked */ @@ -4692,8 +4806,6 @@ hd->x |= HTML_HX_ULINE; CENTER_BIT(hd->html_data) = 0; hd->y = html_indent(hd->html_data, 2, HTML_ID_SET); - hd->z = HD(hd->html_data)->wrapcol; - HD(hd->html_data)->wrapcol = WRAP_COLS(hd->html_data) - 8; html_blank(hd->html_data, 1); } else if(cmd == GF_EOD){ @@ -4706,7 +4818,6 @@ html_indent(hd->html_data, hd->y, HTML_ID_SET); html_blank(hd->html_data, 1); CENTER_BIT(hd->html_data) = (hd->x & HTML_HX_CENTER) != 0; - HD(hd->html_data)->wrapcol = hd->z; } return(1); /* get linked */ @@ -4732,8 +4843,6 @@ hd->x = CENTER_BIT(hd->html_data); /* stop centering for now */ CENTER_BIT(hd->html_data) = 0; hd->y = html_indent(hd->html_data, 4, HTML_ID_SET); - hd->z = HD(hd->html_data)->wrapcol; - HD(hd->html_data)->wrapcol = WRAP_COLS(hd->html_data) - 8; html_blank(hd->html_data, 1); } else if(cmd == GF_EOD){ @@ -4743,7 +4852,6 @@ html_indent(hd->html_data, (int) hd->y, HTML_ID_SET); html_blank(hd->html_data, 1); CENTER_BIT(hd->html_data) = hd->x; - HD(hd->html_data)->wrapcol = hd->z; } return(1); /* get linked */ @@ -4769,8 +4877,6 @@ hd->x = CENTER_BIT(hd->html_data); /* stop centering for now */ CENTER_BIT(hd->html_data) = 0; hd->y = html_indent(hd->html_data, 6, HTML_ID_SET); - hd->z = HD(hd->html_data)->wrapcol; - HD(hd->html_data)->wrapcol = WRAP_COLS(hd->html_data) - 8; html_blank(hd->html_data, 1); } else if(cmd == GF_EOD){ @@ -4806,8 +4912,6 @@ hd->x = CENTER_BIT(hd->html_data); /* stop centering for now */ CENTER_BIT(hd->html_data) = 0; hd->y = html_indent(hd->html_data, 8, HTML_ID_SET); - hd->z = HD(hd->html_data)->wrapcol; - HD(hd->html_data)->wrapcol = WRAP_COLS(hd->html_data) - 8; html_blank(hd->html_data, 1); } else if(cmd == GF_EOD){ @@ -4817,7 +4921,6 @@ html_indent(hd->html_data, (int) hd->y, HTML_ID_SET); html_blank(hd->html_data, 1); CENTER_BIT(hd->html_data) = hd->x; - HD(hd->html_data)->wrapcol = hd->z; } return(1); /* get linked */ @@ -5769,7 +5872,7 @@ f->data = (HTML_DATA_S *) fs_get(sizeof(HTML_DATA_S)); memset(f->data, 0, sizeof(HTML_DATA_S)); HD(f)->wrapstate = 1; /* start with flowing text */ - HD(f)->wrapcol = WRAP_COLS(f) - 8; + HD(f)->wrapcol = NOWRAP(f) ? 8000 : WRAP_COLS(f); f->f1 = DFL; /* state */ f->f2 = 0; /* chars in wrap buffer */ f->n = 0L; /* chars on line so far */ @@ -5929,7 +6032,7 @@ if(HD(f)->prefix) html_a_prefix(f); - if(++f->f2 >= WRAP_COLS(f)){ + if(++f->f2 >= HD(f)->wrapcol){ HTML_LINEP_PUTC(f, ch & 0xff); HTML_FLUSH(f); html_newline(f); @@ -6172,7 +6275,7 @@ /* ch is start of next word */ HD(f)->centered->space = 0; - if(HD(f)->centered->word.width >= WRAP_COLS(f)) + if(HD(f)->centered->word.width >= HD(f)->wrapcol) html_centered_flush(f); html_centered_putc(&HD(f)->centered->word, ch); @@ -6561,6 +6664,7 @@ op->columns = columns - (margin_l + margin_r); op->strip = ((flags & GFHP_STRIPPED) == GFHP_STRIPPED); op->handlesp = handlesp; + op->nowrap = ((flags & GFHP_NOWRAP) == GFHP_NOWRAP); op->handles_loc = ((flags & GFHP_LOCAL_HANDLES) == GFHP_LOCAL_HANDLES); return((void *) op); } @@ -6768,9 +6872,11 @@ state, wrap_col, wrap_max, + offset, margin_l, margin_r, indent; + char utf_seq[8]; char special[256]; long curlinenum; /* current line number */ int curqstrpos; /* current position in quote string */ @@ -6781,8 +6887,9 @@ #define WRAP_MARG_L(F) (((WRAP_S *)(F)->opt)->margin_l) #define WRAP_MARG_R(F) (((WRAP_S *)(F)->opt)->margin_r) -#define WRAP_COL(F) (((WRAP_S *)(F)->opt)->wrap_col - WRAP_MARG_R(F) - ((((WRAP_S *)(F)->opt)->leave_flowed) ? 1 : 0)) -#define WRAP_MAX_COL(F) (((WRAP_S *)(F)->opt)->wrap_max - WRAP_MARG_R(F) - ((((WRAP_S *)(F)->opt)->leave_flowed) ? 1 : 0)) +#define WRAP_UTF_SEQ(F) (((WRAP_S *)(F)->opt)->utf_seq) +#define WRAP_COL(F) (((WRAP_S *)(F)->opt)->wrap_col - WRAP_INDENT(F)) +#define WRAP_MAX_COL(F) (((WRAP_S *)(F)->opt)->wrap_max - WRAP_INDENT(F)) #define WRAP_INDENT(F) (((WRAP_S *)(F)->opt)->indent) #define WRAP_DO_IND(F) (((WRAP_S *)(F)->opt)->do_indent) #define WRAP_COMMA(F) (((WRAP_S *)(F)->opt)->on_comma) @@ -6874,7 +6981,8 @@ GF_INIT(f, f->next); if(flg == GF_DATA){ - register unsigned char c; + char *chp, *chp2; + unsigned char c; register int state = f->f1; register int x; @@ -7309,8 +7417,42 @@ break; + case UTF8 : + if(!(chp = chp2 = pine_check_utf8(&c, WRAP_UTF_SEQ(f)))) + break; /* sequence not complete, need next byte */ + state = DFL; /* end of sequence, leave the UTF-8 mode */ + if(chp != (char *)&c) { /* seq. complete, wrap and write it */ + if(f->n + f->f2 + WRAP_SPC_LEN(f) + > WRAP_COL(f) - (*chp == ' '?2:1)) { + dprint(8, (debugfile, "UTF8: newline\n")); + wrap_flush(f, &ip, &eib, &op, &eob); /* make sure we write everything we've */ + wrap_eol(f, 1, &ip, &eib, &op, &eob); /* end of line, double-with doesn't fit */ + wrap_bol(f, 1, 1, &ip, &eib, &op, &eob); /* write any prefix */ + } + f->n++; + if(*chp == ' ') { /* double-wide UTF-8 char, check space */ + chp++; /* ' ' was just a flag, skip over it */ + f->n++; + } + dprint(9, (debugfile, "UTF8: free room: %02d char: '%s'\n", + (WRAP_COL(f) - f->n - f->f2 - WRAP_SPC_LEN(f)), chp)); + f->n -= strlen(chp); + WRAP_PUTC(f, *chp++, 1); + while(*chp) + WRAP_PUTC(f, *chp++, 1); + if(*chp2 == ' ') + wrap_flush(f, &ip, &eib, &op, &eob); + break; + } + WRAP_PUTC(f, '?', 1); /* in place of invalid sequence */ + /* fall thru to process new char */ + wrap_flush(f, &ip, &eib, &op, &eob); case_dfl : case DFL : + if (!pine_check_utf8(&c, WRAP_UTF_SEQ(f))) { + state = UTF8; /* Change to UTF-8 mode */ + break; /* Process next char in UTF-8 mode */ + } if(WRAP_SPEC(f, c)){ switch(c){ default : @@ -7541,9 +7683,10 @@ while(WRAP_INDENT(f) >= WRAP_MAX_COL(f)) WRAP_INDENT(f) /= 2; - f->line = (char *) fs_get(WRAP_MAX_COL(f) * sizeof(char)); + WRAP_UTF_SEQ(f)[0] = '\0'; + f->line = (char *) fs_get(WRAP_MAX_COL(f) * sizeof(char) * 6); f->linep = f->line; - WRAP_LASTC(f) = &f->line[WRAP_MAX_COL(f) - 1]; + WRAP_LASTC(f) = &f->line[WRAP_MAX_COL(f)*6 - 1]; for(i = 0; i < 256; i++) ((WRAP_S *) f->opt)->special[i] = ((i == '\"' && WRAP_COMMA(f)) diff -urN pine4.61.orig/pine/init.c pine4.61/pine/init.c --- pine4.61.orig/pine/init.c 2004-07-21 01:52:33.000000000 +0200 +++ pine4.61/pine/init.c 2004-07-21 01:53:21.555506896 +0200 @@ -253,6 +253,16 @@ CONF_TXT_T cf_text_character_set[] = "Reflects capabilities of the display you have. Default: US-ASCII.\n# Typical alternatives include ISO-8859-x, (x is a number between 1 and 9)."; +CONF_TXT_T cf_text_assumed_charset[] = "When MIME charset information is missing in Content-Type header field.\n# Message is assumed to be in this charset. Default: US-ASCII. Typical values\n# include ISO-8859-x, ISO-2022-JP, EUC-KR, GB2312, and Big5. The value of\n# header fields which are not encoded per RFC 2047\n# is also assumed to be\n# in this charset."; + +CONF_TXT_T cf_text_charset_aliases[] = "List of charset aliases. Each alias is a pair of charsets delimetered by a\n# single colon, the first one being an alias to the second one. The latter is\n# usually standard/prefered MIME name while the former is non-standard name used\n# by some email clients. For instance, you may have 'x-big5:big5,euc-cn:gb2312'"; + +#ifdef HAVE_ICONV +CONF_TXT_T cf_text_iconv_aliases[] = "List of charset aliases to use with iconv(). Each alias is a pair of\n# charsets delimetered by a single colon, the first one being an alias to the\n# second one. The former is usually standard/prefered MIME name while the\n# latter is non-standard name used by iconv(3) on your system.\n#For example,\n# your iconv may use non-standard 'UTF8' for the standard 'UTF-8'. In that\n# case, you can put 'UTF-8:UTF8' here."; + +CONF_TXT_T cf_text_send_charset[] = "Specifies the MIME charset that a message will be sent in. If not set,\n# the value of character set will be used."; +#endif + CONF_TXT_T cf_text_editor[] = "Specifies the program invoked by ^_ in the Composer,\n# or the \"enable-alternate-editor-implicitly\" feature."; CONF_TXT_T cf_text_speller[] = "Specifies the program invoked by ^T in the Composer."; @@ -599,6 +609,18 @@ cf_text_startup_rules}, {"character-set", 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, cf_text_character_set}, +#ifdef HAVE_ICONV +{"send-charset", 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, + cf_text_send_charset}, +#endif +{"assumed-charset", 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, + cf_text_assumed_charset}, +{"charset-aliases", 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, + cf_text_charset_aliases}, +#ifdef HAVE_ICONV +{"iconv-aliases", 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, + cf_text_iconv_aliases}, +#endif {"editor", 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, cf_text_editor}, {"speller", 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, @@ -2193,6 +2215,9 @@ set_current_val(&vars[V_OLD_STYLE_REPLY], TRUE, TRUE); obs_old_style_reply = !strucmp(VAR_OLD_STYLE_REPLY, "yes"); + /* needed in process_feature_list */ + set_current_val(&vars[V_CHAR_SET], TRUE, TRUE); + set_feature_list_current_val(&vars[V_FEATURE_LIST]); process_feature_list(ps, VAR_FEATURE_LIST, (obs_feature_level == Seasoned) ? 1 : 0, @@ -2200,7 +2225,12 @@ set_current_val(&vars[V_SIGNATURE_FILE], TRUE, TRUE); set_current_val(&vars[V_LITERAL_SIG], TRUE, TRUE); - set_current_val(&vars[V_CHAR_SET], TRUE, TRUE); + set_current_val(&vars[V_ASSUMED_CHAR_SET], TRUE, TRUE); + set_current_val(&vars[V_CHAR_SET_ALIASES], TRUE, TRUE); +#ifdef HAVE_ICONV + set_current_val(&vars[V_ICONV_ALIASES], TRUE, TRUE); + set_current_val(&vars[V_SEND_CHAR_SET], TRUE, TRUE); +#endif set_current_val(&vars[V_GLOB_ADDRBOOK], TRUE, TRUE); set_current_val(&vars[V_ADDRESSBOOK], TRUE, TRUE); set_current_val(&vars[V_FORCED_ABOOK_ENTRY], TRUE, TRUE); @@ -3230,6 +3260,9 @@ #else ps->pass_ctrl_chars = F_ON(F_PASS_CONTROL_CHARS,ps_global) ? 1 : 0; ps->pass_c1_ctrl_chars = F_ON(F_PASS_C1_CONTROL_CHARS,ps_global) ? 1 : 0; + if(ps_global->VAR_CHAR_SET + && !strucmp(ps_global->VAR_CHAR_SET, "UTF-8")) + ps->pass_c1_ctrl_chars = 1; if(F_ON(F_QUELL_BEZERK_TIMEZONE,ps_global)) mail_parameters(NULL, SET_NOTIMEZONES, (void *) 1); diff -urN pine4.61.orig/pine/mailindx.c pine4.61/pine/mailindx.c --- pine4.61.orig/pine/mailindx.c 2004-07-21 01:52:34.000000000 +0200 +++ pine4.61/pine/mailindx.c 2004-07-21 02:48:43.996419104 +0200 @@ -2257,10 +2257,11 @@ drew_X++; } - if(pcol >= 0 && pcol < cols){ - save_pchar = draw[pcol]; - draw[pcol] = h->plus; - } + /* + * We used to write to a fixed byte position here, but + * of course this cannot work with UTF-8. So we + * now use MoveCursor after we have painted the line. + */ if(h->offs[0].offset < 0 || h->offs[0].offset >= cols){ /* no special color, draw from 0 to end */ @@ -2367,6 +2368,10 @@ EndInverse(); } } + if(pcol >= 0 && pcol < cols){ + MoveCursor(HEADER_ROWS(ps_global) + line, pcol); + Writechar(h->plus, 0); + } done_drawing: if(drew_X) @@ -2378,8 +2383,6 @@ if(cur) EndInverse(); - if(pcol >= 0 && pcol < cols) - draw[pcol] = save_pchar; } if(base_color && base_color != lastc && base_color != &h->linecolor) @@ -4469,6 +4472,58 @@ return(doy); } +static char * +rfc_1522_check_charset(chp) + char *chp; +{ + static char *subj_cs = NULL; + char *cs, *enc; + + while(chp && (chp = strstr(chp, "=?"))) + if(rfc1522_valid(chp++, 1, &cs, &enc, NULL, NULL)){ + int cs_len = enc - cs - 1; + + if(subj_cs) + fs_give((void **)&subj_cs); + + strncpy(subj_cs = fs_get(cs_len + 1), cs, cs_len); + subj_cs[cs_len] = 0; + + return subj_cs; + } + return NULL; +} + +static void +rfc1522_decode_width(dest, source, width, idata) + char *dest; + char *source; + int width; + INDEXDATA_S *idata; +{ + char *subj_cs, *assumed_save = NULL, *dummy = NULL, *tmp; + + if(idata && (subj_cs = rfc_1522_check_charset(fetch_subject(idata)))){ + assumed_save = ps_global->VAR_ASSUMED_CHAR_SET; + ps_global->VAR_ASSUMED_CHAR_SET = subj_cs; + } + + tmp = (char *) rfc1522_decode((unsigned char *) tmp_20k_buf, + SIZEOF_20KBUF, source, &dummy); + if(idata){ + if(tmp == source) + strncpy(tmp = tmp_20k_buf, source, SIZEOF_20KBUF); + + removing_leading_and_trailing_white_space(tmp); + + if(subj_cs) + ps_global->VAR_ASSUMED_CHAR_SET = assumed_save; + } + charset_istrncpy(dest, tmp, width, 0); + + if(dummy) + fs_give((void **)&dummy); +} /*---------------------------------------------------------------------- @@ -4485,7 +4540,8 @@ format_index_index_line(idata) INDEXDATA_S *idata; { - char str_buf[MAXIFLDS][MAX_SCREEN_COLS+1], to_us, status, *field, +#define STRLEN MAX_SCREEN_COLS*6 + char str_buf[MAXIFLDS][STRLEN+1], to_us, status, *field, *buffer, *s_tmp, *p, *str, *newsgroups; int width, offsets_set = 0, i, j, smallest, which_array = 0; int plus_off = -1, imp_off = -1, del_off = -1, ans_off = -1, @@ -4794,7 +4850,7 @@ case iFrom: case iAddress: case iMailbox: - from_str(cdesc->ctype, idata, width, str); + from_str(cdesc->ctype, idata, min(width*6,STRLEN), str); break; case iTo: @@ -5111,11 +5167,11 @@ break; case iSubject: - subj_str(idata, width, str, 0); + subj_str(idata, min(width*6,STRLEN), str, 0); break; case iSubjKey: - subj_str(idata, width, str, 1); + subj_str(idata, min(width*6,STRLEN), str, 1); break; case iNews: @@ -5248,7 +5304,6 @@ cdesc->ctype != iNothing && which_array < MAXIFLDS; cdesc++) if(width = cdesc->width){ - char *q; str = str_buf[which_array++]; @@ -5259,34 +5314,11 @@ } if(cdesc->adjustment == Left) - sprintf(p, "%-*.*s", width, width, str); + charset_istrncpy(p, str, width, 1); else sprintf(p, "%*.*s", width, width, str); - /* - * Make sure there are no nulls in the part we were supposed to - * have just written. This may happen if sprintf returns an - * error, but we don't want to check for that because some - * sprintfs don't return anything. If there are nulls, rewrite it. - */ - for(q = p; q < p+width; q++) - if(*q == '\0') - break; - - if(q < p+width){ - strncpy(p, repeat_char(width, ' '), width); - p[width] = '\0'; - /* throw a ? in there too */ - if(width > 4){ - p[(width-1)/2 - 1] = '?'; - p[(width-1)/2 ] = '?'; - p[(width-1)/2 + 1] = '?'; - } - else if(width > 2) - p[(width-1)/2] = '?'; - } - - p += width; + p += strlen(p); } for(i = 0; i < OFFS; i++) @@ -5388,7 +5420,7 @@ } /* Truncate it to be sure not too wide */ - buffer[min(ps_global->ttyo->screen_cols, i_cache_width())] = '\0'; + buffer[i_cache_width()] = '\0'; hline->id = line_hash(buffer); dprint(9, (debugfile, "INDEX(%p) -->%s<-- (%d), 0x%lx>\n", hline, @@ -6236,21 +6268,12 @@ if(addr && !addr->next /* only one address */ && addr->host /* not group syntax */ && addr->personal && addr->personal[0]){ /* there is a personal name */ - char *dummy = NULL; - char buftmp[MAILTMPLEN]; int l; if(l = prefix ? strlen(prefix) : 0) strcpy(s, prefix); - sprintf(buftmp, "%.75s", addr->personal); - p = (char *) rfc1522_decode((unsigned char *) tmp_20k_buf, - SIZEOF_20KBUF, buftmp, &dummy); - removing_leading_and_trailing_white_space(p); - istrncpy(s + l, p, width - l); - s[width] = '\0'; - if(dummy) - fs_give((void **)&dummy); + rfc1522_decode_width(s + l, addr->personal, width - l, idata); if(*(s+l)) return(TRUE); @@ -6271,8 +6294,13 @@ if(l = prefix ? strlen(prefix) : 0) strcpy(s, prefix); - istrncpy(s + l, a_string, width - l); - s[width] = '\0'; + if (p = rfc_1522_check_charset(fetch_subject(idata))) { + char *dest = s + l; + conv_sstrncpy(p, NULL, &dest, a_string, width); + } else { + istrncpy(s + l, a_string, width - l); + s[width] = '\0'; + } fs_give((void **)&a_string); return(TRUE); @@ -6854,7 +6882,6 @@ char *subject, *origsubj, *sptr = NULL; char *p, *border, *q = NULL, *free_subj = NULL; unsigned char *tmp; - size_t len; int depth = 0, mult = 2, collapsed, we_clear = 0; PINETHRD_S *thd, *thdorig; HLINE_S *hline; @@ -7068,24 +7095,14 @@ if(do_subj){ width = (str + width) - sptr; - len = strlen(subject)+1; - tmp = fs_get(len * sizeof(unsigned char)); - istrncpy(sptr, (char *) rfc1522_decode(tmp, len, - subject, NULL), - width); - fs_give((void **) &tmp); + rfc1522_decode_width(sptr, subject, width, NULL); } else if(ps_global->thread_disp_style == THREAD_MUTTLIKE) sptr[0] = '>'; } } else{ - len = strlen(subject)+1; - tmp = fs_get(len * sizeof(unsigned char)); - istrncpy(str, - (char *) rfc1522_decode(tmp, len, subject, NULL), - width); - fs_give((void **) &tmp); + rfc1522_decode_width(str, subject, width, NULL); } if(free_subj) @@ -9924,7 +9941,7 @@ { long j; size_t newsize = sizeof(HLINE_S) - + ((max(ps_global->ttyo->screen_cols, 80)+1) * sizeof(char)); + + ((max(ps_global->ttyo->screen_cols, 80)+1)*6*sizeof(char)); if(j = (newsize % sizeof(long))) /* alignment hack */ newsize += (sizeof(long) - (size_t)j); @@ -10002,7 +10019,7 @@ dprint(2, (debugfile, "Called get_index_cache with msgno=%ld\n", msgno)); - big_enough = sizeof(HLINE_S) + (MAX_SCREEN_COLS * sizeof(char)) + big_enough = sizeof(HLINE_S) + (MAX_SCREEN_COLS * sizeof(char) * 6) + sizeof(long); if(!dummy_to_protect_ourselves) dummy_to_protect_ourselves = (HLINE_S *) fs_get(big_enough); diff -urN pine4.61.orig/pine/mailpart.c pine4.61/pine/mailpart.c --- pine4.61.orig/pine/mailpart.c 2004-07-21 01:52:34.000000000 +0200 +++ pine4.61/pine/mailpart.c 2004-07-21 01:53:21.602499752 +0200 @@ -4127,7 +4127,8 @@ fs_give((void **) &p); } else - passed = !strucmp(test + 9, "us-ascii"); + passed = !strucmp(test + 9, + ps_global->VAR_ASSUMED_CHAR_SET ? ps_global->VAR_ASSUMED_CHAR_SET : "us-ascii"); } else dprint(1, (debugfile, diff -urN pine4.61.orig/pine/mailview.c pine4.61/pine/mailview.c --- pine4.61.orig/pine/mailview.c 2004-07-21 01:52:34.000000000 +0200 +++ pine4.61/pine/mailview.c 2004-07-21 01:53:21.625496256 +0200 @@ -396,6 +396,7 @@ int url_bogus PROTO((char *, char *)); long doubleclick_handle PROTO((SCROLL_S *, HANDLE_S *, int *, int *)); + #ifdef _WINDOWS int format_message_popup PROTO((SCROLL_S *, int)); int simple_text_popup PROTO((SCROLL_S *, int)); @@ -6100,7 +6101,7 @@ int flags; { FILTLIST_S filters[13]; - char *err, *charset; + char *err, *charset, *dcs = ps_global->VAR_CHAR_SET; int filtcnt = 0, error_found = 0, column, wrapit; int is_in_sig = OUT_SIG_BLOCK; int is_flowed_msg = 0; @@ -6136,15 +6137,24 @@ * as ascii or nothing. */ if(F_OFF(F_DISABLE_2022_JP_CONVERSIONS, ps_global) + && dcs && !strucmp(dcs, "ISO-2022-JP") && (!charset || !strucmp(charset, "US-ASCII") || !strucmp(charset, "ISO-2022-JP"))) +#ifdef HAVE_ICONV +#ifdef _WINDOWS + dcs = "shift-jis"; +#else + dcs = "euc-jp"; +#endif +#else filters[filtcnt++].filter = gf_2022_jp_to_euc; +#endif if(charset){ if(F_OFF(F_DISABLE_CHARSET_CONVERSIONS, ps_global)){ - ct = conversion_table(charset, ps_global->VAR_CHAR_SET); + ct = conversion_table(charset, dcs); if(ct && ct->convert && ct->table){ filters[filtcnt].filter = ct->convert; /* we could call an _opt routine here, but why bother? */ @@ -6243,16 +6253,12 @@ else if(!strucmp(att->body->subtype, "html") && ps_global->full_header < 2){ /*BUG: sniff the params for "version=2.0" ala draft-ietf-html-spec-01 */ - int opts = 0; + /* html-internal wrap isn't aware of UTF-8, let gf_wrap do wrapping */ + int opts = GFHP_NOWRAP; - if(flags & FM_DISPLAY){ - if(handlesp) /* pass on handles awareness */ - opts |= GFHP_HANDLES; - } - else + if(!(flags & FM_DISPLAY)) opts |= GFHP_STRIPPED; /* don't embed anything! */ - wrapit = 0; /* wrap already handled! */ filters[filtcnt].filter = gf_html2plain; filters[filtcnt++].data = gf_html2plain_opt(NULL, column, format_view_margin(), diff -urN pine4.61.orig/pine/osdep/os-lnx.h pine4.61/pine/osdep/os-lnx.h --- pine4.61.orig/pine/osdep/os-lnx.h 2004-07-21 01:52:33.000000000 +0200 +++ pine4.61/pine/osdep/os-lnx.h 2004-07-21 01:53:21.627495952 +0200 @@ -214,6 +214,9 @@ ----*/ #define DF_DEFAULT_PRINTER ANSI_PRINTER +/* all recent Linux distributions come with glibc 2.x. with an excellent + * iconv implemenation */ +#define HAVE_ICONV /*----- The usual sendmail configuration for sending mail on Unix ------*/ diff -urN pine4.61.orig/pine/osdep/termout.unx pine4.61/pine/osdep/termout.unx --- pine4.61.orig/pine/osdep/termout.unx 2004-07-21 01:52:34.000000000 +0200 +++ pine4.61/pine/osdep/termout.unx 2004-07-21 01:53:21.629495648 +0200 @@ -743,7 +743,17 @@ register unsigned int ch; int new_esc_len; { - static int esc_len = 0; + static int esc_len = 0, seq = 0; + static unsigned char utf_seq[7] = ""; + int ix; + unsigned char dbl_wide[7][2][4] = {0xe1,0x84,0x80,0x00, 0xe1,0x85,0x9F,0x00, + 0xe2,0x8c,0xa9,0x00, 0xe2,0x8c,0xaa,0x00, + 0xe2,0xba,0x80,0x00, 0xed,0x9e,0xa3,0x00, + 0xef,0xa4,0x80,0x00, 0xef,0xa9,0xaa,0x00, + 0xef,0xb8,0xb0,0x00, 0xef,0xb9,0xa8,0x00, + 0xef,0xbc,0x81,0x00, 0xef,0xbd,0xad,0x00, + 0xef,0xbf,0xa0,0x00, 0xef,0xbf,0xa6,0x00 + }; if(ps_global->in_init_seq /* silent */ || (F_ON(F_BLANK_KEYMENU, ps_global) /* or bottom, */ @@ -752,6 +762,33 @@ && _col + 1 == ps_global->ttyo->screen_cols)) return; + /* Treat UTF-8 sequences if we are not in a special escape sequence */ + if(esc_len <= 0) { + unsigned char *chp; + if ((chp = pine_check_utf8(&ch, utf_seq)) == NULL) { + seq = 1; /* flag that we are in a open UTF-8 sequence */ + return; /* UTF-8 sequence not complete, need next char */ + } + if (chp != &ch) { + seq = 0; /* flag that we are not in a open UTF-8 sequence */ + _col++; + if (*chp == ' ') { + if(++_col > ps_global->ttyo->screen_cols) { + printf("\342\200\246"); /* UTF-8 points... */ + goto wrap; + } + chp++; + } + while(*chp) + putchar(*chp++); + return; + } + if (seq) { /* incomplete UTF-8 sequence */ + seq = 0; /* flag that we are not in a open UTF-8 sequence */ + putchar('?'); /* print question mark at place of sequence */ + } + } + if(ch == LINE_FEED || ch == RETURN || ch == BACKSPACE || ch == BELL || ch == TAB || ch == ESCAPE){ switch(ch){ @@ -829,7 +866,9 @@ like case 1. A little expensive but worth it to avoid problems with terminals configured so they don't match termcap */ - if(_col == ps_global->ttyo->screen_cols) { + if(_col >= ps_global->ttyo->screen_cols) { +wrap: + dprint(3, (debugfile, "%d,%02d, wrap(%x)\n",_line,_col,ch)); _col = 0; if(_line + 1 < ps_global->ttyo->screen_rows) _line++; diff -urN pine4.61.orig/pine/other.c pine4.61/pine/other.c --- pine4.61.orig/pine/other.c 2004-07-21 01:52:34.000000000 +0200 +++ pine4.61/pine/other.c 2004-07-21 01:53:21.665490176 +0200 @@ -7446,6 +7446,16 @@ return(h_config_startup_rules); case V_CHAR_SET : return(h_config_char_set); + case V_ASSUMED_CHAR_SET : + return(h_config_assumed_charset); + case V_CHAR_SET_ALIASES : + return(h_config_charset_aliases); +#ifdef HAVE_ICONV + case V_ICONV_ALIASES : + return(h_config_iconv_aliases); + case V_SEND_CHAR_SET : + return(h_config_send_char_set); +#endif case V_EDITOR : return(h_config_editor); case V_SPELLER : @@ -11974,6 +11984,9 @@ case F_PASS_C1_CONTROL_CHARS : ps->pass_c1_ctrl_chars = F_ON(F_PASS_C1_CONTROL_CHARS,ps_global) ? 1 : 0; + if(ps_global->VAR_CHAR_SET + && !strucmp(ps_global->VAR_CHAR_SET, "UTF-8")) + ps->pass_c1_ctrl_chars = 1; break; #endif #ifdef MOUSE diff -urN pine4.61.orig/pine/pine.h pine4.61/pine/pine.h --- pine4.61.orig/pine/pine.h 2004-07-21 01:52:34.000000000 +0200 +++ pine4.61/pine/pine.h 2004-07-21 01:53:21.685487136 +0200 @@ -68,6 +68,7 @@ #define PHONE_HOME_HOST "docserver.cac.washington.edu" #define UNKNOWN_CHARSET "X-UNKNOWN" +#define US_ASCII_CHARSET "US-ASCII" #define OUR_HDRS_LIST "X-Our-Headers" @@ -175,7 +176,7 @@ #define GER_ALLPARTS 0x04 /* AllParts toggle is on */ #define GFHP_STRIPPED 0x01 -#define GFHP_HANDLES 0x02 +#define GFHP_NOWRAP 0x02 #define GFHP_LOCAL_HANDLES 0x04 #define GFW_HANDLES 0x01 @@ -647,6 +648,14 @@ , V_SORT_RULES , V_STARTUP_RULES , V_CHAR_SET +#ifdef HAVE_ICONV + , V_SEND_CHAR_SET +#endif + , V_ASSUMED_CHAR_SET + , V_CHAR_SET_ALIASES +#ifdef HAVE_ICONV + , V_ICONV_ALIASES +#endif , V_EDITOR , V_SPELLER , V_FILLCOL @@ -934,6 +943,12 @@ #define USR_STARTUP_RULES vars[V_STARTUP_RULES].user_val.l #define VAR_CHAR_SET vars[V_CHAR_SET].current_val.p #define GLO_CHAR_SET vars[V_CHAR_SET].global_val.p +#define VAR_ASSUMED_CHAR_SET vars[V_ASSUMED_CHAR_SET].current_val.p +#define VAR_CHAR_SET_ALIASES vars[V_CHAR_SET_ALIASES].current_val.l +#ifdef HAVE_ICONV +#define VAR_ICONV_ALIASES vars[V_ICONV_ALIASES].current_val.l +#define VAR_SEND_CHAR_SET vars[V_SEND_CHAR_SET].current_val.p +#endif #define VAR_EDITOR vars[V_EDITOR].current_val.l #define GLO_EDITOR vars[V_EDITOR].global_val.l #define VAR_SPELLER vars[V_SPELLER].current_val.p @@ -3587,6 +3602,7 @@ } data; } REPLY_S; +#define pico(F) call_pico(F) #define REPLY_PSEUDO 1 #define REPLY_FORW 2 /* very similar to REPLY_PSEUDO */ #define REPLY_MSGNO 3 @@ -4068,7 +4084,6 @@ } ATABLE_S; -#define TAG_EMBED '\377' /* Announces embedded data in text string */ #define TAG_INVON '\001' /* Supported character attributes */ #define TAG_INVOFF '\002' #define TAG_BOLDON '\003' @@ -4078,6 +4093,7 @@ #define TAG_FGCOLOR '\010' /* Change to this foreground color */ #define TAG_BGCOLOR '\011' /* Change to this background color */ #define TAG_HANDLE '\020' /* indicate's a handle to an action */ +#define TAG_EMBED '\021' /* Announces embedded data in text string */ #define TAG_HANDLEOFF '\030' /* indicate's end of handle text */ @@ -5494,6 +5510,9 @@ int rfc2369_parse_fields PROTO((char *, RFC2369_S *)); unsigned char *trans_euc_to_2022_jp PROTO((unsigned char *)); unsigned char *trans_2022_jp_to_euc PROTO((unsigned char *)); +unsigned char* resolve_charset_alias PROTO((char *, char **)); +char *pine_check_utf8 PROTO((char *, char *)); +unsigned char *trans_with_iconv PROTO((unsigned char *, char *, char *)); /*-- takeaddr.c --*/ diff -urN pine4.61.orig/pine/pine.hlp pine4.61/pine/pine.hlp --- pine4.61.orig/pine/pine.hlp 2004-07-21 01:52:34.000000000 +0200 +++ pine4.61/pine/pine.hlp 2004-07-21 02:23:49.198662904 +0200 @@ -227,6 +227,17 @@
  • The command to Replicate a rule did not copy the "not" parts of the rule correctly +This version includes the charset translation patch 9d by Bernhard Kaindl. + +

    +You need to enable the options +Disable-2022-JP-Conversions and +Pass-C1-Control-Characters-as-is +for this patch to work(will be fixed). + +For more information, see the file doc/iconv.txt provided by the patch and +http://www.suse.de/~bk/pine/iconv/ +

    Version 4.60 @@ -20735,6 +20746,110 @@ <End of help on this topic> +====== h_config_charset_aliases ===== + + +OPTION: Charset-Aliases + + +

    OPTION: Charset-Aliases

    + +List of charset aliases. + +

    +Each alias is a pair of charsets delimetered by a single colon, +the first one being an alias to the second one. + +

    +The latter is usually standard/prefered MIME name while the former +is a non-standard name used by some email clients. + +

    +For instance, you may set it to: 'x-big5:big5,euc-cn:gb2312' + +

    +

    +<End of help on this topic> + + +====== h_config_iconv_aliases ===== + + +OPTION: Iconv-Aliases + + +

    OPTION: Iconv-Aliases

    + +List of charset aliases to use with iconv(). + +

    +Each alias is a pair of charsets delimetered by a single colon, +the first one being an alias to the second one. + +

    +The former is usually standard/prefered MIME name while the latter +is a non-standard name used by iconv(3) on your system. + +

    +For example, your iconv may use non-standard 'UTF8' for the standard +'UTF-8'. In that case, you can put 'UTF-8:UTF8' here. + +

    +

    +<End of help on this topic> + + +====== h_config_assumed_charset ===== + + +OPTION: Assumed-Charset + + +

    OPTION: Assumed-Charset

    + +When MIME charset information is missing in Content-Type header field +the Message is assumed to be in this charset. Default: US-ASCII. +Typical values include ISO-8859-x, ISO-2022-JP, EUC-KR, GB2312, and Big5. +Header fields which are not encoded per RFC 2047 is also assumed to be +in this charset. + +

    +

    +<End of help on this topic> + + +====== h_config_send_char_set ===== + + +OPTION: Send-Charset + + +

    OPTION: Send-Charset

    + +If it's set, the headers and the body of an outgoing message is converted +from the value of character-set (display/terminal charset) to the value +of this option. You have to set this option if your terminal/display charset +(say, UTF-8) is different from the charset you want your outgoing messsages +to be in (say, ISO-8859-1, EUC-KR, Big5, GB2312) because your correspondents +can't handle emails in UTF-8. + +

    +

    +<End of help on this topic> + + ====== h_config_editor ===== diff -urN pine4.61.orig/pine/reply.c pine4.61/pine/reply.c --- pine4.61.orig/pine/reply.c 2004-07-21 01:52:34.000000000 +0200 +++ pine4.61/pine/reply.c 2004-07-21 02:25:37.184246600 +0200 @@ -1615,18 +1615,30 @@ && (decoded[0] == 'R' || decoded[0] == 'r') && (decoded[1] == 'E' || decoded[1] == 'e')){ - if(decoded[2] == ':') - sprintf(buf, "%.*s", buflen-1, subject); + if(decoded[2] == ':'){ + strncpy(buf, subject,l); + buf[l]='\0'; + } else if((decoded[2] == '[') && (p = strchr(decoded, ']'))){ p++; while(*p && isspace((unsigned char)*p)) p++; - if(p[0] == ':') - sprintf(buf, "%.*s", buflen-1, subject); + if(p[0] == ':'){ + strncpy(buf, subject,l); + buf[l]='\0'; + } } } - if(!buf[0]) - sprintf(buf, "Re: %.*s", buflen-1, - (subject && *subject) ? subject : "your mail"); + if(!buf[0]) { + /* + * Used to be sprintf(). + * Some implementations of sprintf() are locale-dependent and + * don't pass through an invalid sequence of bytes blindly. + * Use strncpy() instead: + */ + strcpy(buf,"Re: "); + strncpy(buf+4, (subject && *subject) ? subject : "your mail",l); + buf[l+4]='\0'; + } fs_give((void **) &tmp); return(buf); @@ -4825,6 +4837,7 @@ ENVELOPE *outgoing; BODY *body = NULL; MESSAGECACHE *mc; + char *temp_send_cset = NULL; outgoing = mail_newenvelope(); outgoing->message_id = generate_message_id(); @@ -4908,6 +4921,18 @@ gf_clear_so_writec((STORE_S *) msgtext); +#ifdef HAVE_ICONV + /* + * reset VAR_SEND_CHAR_SET to '' temporarily NOT to + * apply the charset conversion to a bounced message. + */ + if (ps_global->VAR_SEND_CHAR_SET && *(ps_global->VAR_SEND_CHAR_SET)){ + temp_send_cset = (char *)fs_get(strlen(ps_global->VAR_SEND_CHAR_SET)+1); + strcpy(temp_send_cset, ps_global->VAR_SEND_CHAR_SET); + (ps_global->VAR_SEND_CHAR_SET)[0] = '\0'; + } +#endif + if(pine_simple_send(outgoing, &body, role, pmt_who, pmt_cnf, to, !(to && *to) ? SS_PROMPTFORTO : 0) < 0){ errstr = ""; /* p_s_s() better have explained! */ @@ -4918,6 +4943,12 @@ mail_flag(stream, long2string(rawno), "\\SEEN", 0); } +#ifdef HAVE_ICONV + if (temp_send_cset){ + strcpy(ps_global->VAR_SEND_CHAR_SET, temp_send_cset); + fs_give((void **)&temp_send_cset); + } +#endif /* Just for good measure... */ mail_free_envelope(&outgoing); pine_free_body(&body); @@ -6590,6 +6621,9 @@ && ps_global->VAR_EDITOR[0] && ps_global->VAR_EDITOR[0][0])) ? P_ADVANCED : 0L) + | ((ps_global->VAR_CHAR_SET + && !strucmp(ps_global->VAR_CHAR_SET, "UTF-8")) + ? P_UTF8 : 0L) | ((!ps_global->VAR_CHAR_SET || !strucmp(ps_global->VAR_CHAR_SET, "US-ASCII")) ? P_HIBITIGN : 0L)); diff -urN pine4.61.orig/pine/send.c pine4.61/pine/send.c --- pine4.61.orig/pine/send.c 2004-07-21 01:52:34.000000000 +0200 +++ pine4.61/pine/send.c 2004-07-21 01:53:21.829465248 +0200 @@ -95,6 +95,9 @@ CustomType set_default_hdrval PROTO((PINEFIELD *, PINEFIELD *)); int filter_message_text PROTO((char *, ENVELOPE *, BODY *, STORE_S **, METAENV *)); +#ifdef HAVE_ICONV +void filter_msgtxt_to_send_charset PROTO((BODY *)); +#endif void post_compose_filters PROTO((BODY *)); void pine_send_newsgroup_name PROTO((char *, char*, size_t)); long message_format_for_pico PROTO((long, int (*)(int))); @@ -6574,6 +6577,54 @@ } } +#ifdef HAVE_ICONV +/* + * Take the PicoText pointed to and replace it with PicoText which has been + * filtered to change the 'character-set' (display/terminal-charset) to + * 'send-charset'. (based on filter_msgtxt_euc_to_2022_jp, above) + */ +void +filter_msgtxt_to_send_charset(body) + BODY *body; +{ + STORE_S **so = (STORE_S **)((body->type == TYPEMULTIPART) + ? &body->nested.part->body.contents.text.data + : &body->contents.text.data); + STORE_S *filtered_so = NULL; + gf_io_t pc, gc; + char *errstr; + CONV_TABLE *ct; + char * assumed_save = ps_global->VAR_ASSUMED_CHAR_SET; + + ps_global->VAR_ASSUMED_CHAR_SET = NULL; + ct = conversion_table(ps_global->VAR_CHAR_SET, ps_global->VAR_SEND_CHAR_SET); + ps_global->VAR_ASSUMED_CHAR_SET = assumed_save; + + if(ct->table && (filtered_so = so_get(PicoText, NULL, EDIT_ACCESS))){ + so_seek(*so, 0L, 0); + gf_filter_init(); + gf_link_filter(ct->convert, ct->table); + gf_set_so_readc(&gc, *so); + gf_set_so_writec(&pc, filtered_so); + if(errstr = gf_pipe(gc, pc)){ + so_give(&filtered_so); + dprint(1, (debugfile, + "Error with converting to send-charset %s:%s\n", + ps_global->VAR_SEND_CHAR_SET, errstr)); + return; + } + + gf_clear_so_readc(*so); + gf_clear_so_writec(filtered_so); + + so_give(so); + *so = filtered_so; + } + dprint(5, (debugfile, + "Succeeded in converting %s to %s for outgoing email\n", + ps_global->VAR_CHAR_SET, ps_global->VAR_SEND_CHAR_SET)); +} +#endif /*---------------------------------------------------------------------- Pass the first text segment of the message thru the "send filter" @@ -7843,7 +7894,8 @@ src = pf->scratch ? pf->scratch : (*pf->text) ? *pf->text : ""; - len = strlen(src)+1; + /* multiplyer 5 should be enough for EUC-JP -> ISO-2022-JP */ + len = strlen(src)*5+1; p = (char *)fs_get(len * sizeof(char)); if(rfc1522_decode((unsigned char *)p, len, src, &charset) == (unsigned char *) p){ @@ -7975,10 +8027,18 @@ fs_give((void **)pf->text); if(*pf->scratch){ +#ifndef HAVE_ICONV if(ps_global->VAR_CHAR_SET && !strucmp(ps_global->VAR_CHAR_SET, "iso-2022-jp")) *pf->text = (char *) trans_euc_to_2022_jp((unsigned char *) (pf->scratch)); +#else + if(ps_global->VAR_CHAR_SET && ps_global->VAR_SEND_CHAR_SET && + !strucmp(ps_global->VAR_CHAR_SET, ps_global->VAR_SEND_CHAR_SET)) + *pf->text = + (char *) trans_with_iconv((unsigned char *) (pf->scratch), + ps_global->VAR_CHAR_SET, ps_global->VAR_SEND_CHAR_SET); +#endif else *pf->text = cpystr(pf->scratch); } @@ -7993,6 +8053,35 @@ if(bod && *bod) post_compose_filters(*bod); +#ifdef ICONV + /* + * Convert the message body in display charset('characer set') to + * 'send-charset' + */ + if(bod && *bod && ps_global->VAR_CHAR_SET && ps_global->VAR_SEND_CHAR_SET + && strucmp(ps_global->VAR_CHAR_SET, ps_global->VAR_SEND_CHAR_SET)) + filter_msgtxt_to_send_charset(*bod); + /* + * If we've the global pine config option "character-set" set, + * we work in the respective encoding and in case of reply/forward, + * we converted the original/quoted mailtext to "character-set" + * and we also have the assume that the user used the same enconding + * for editing text, so to ensure we will get the new charset set, + * we've to forget the old charset of the edited message text here: + */ + if(bod && *bod && ps_global->VAR_CHAR_SET) { + PARAMETER *pm; + BODY *b = ((*bod)->type == TYPEMULTIPART) ? /* get message body */ + &(*bod)->nested.part->body : *bod; + for(pm = b->parameter; /* check of message body parameters */ + pm && strucmp(pm->attribute, "charset") != 0; + pm = pm->next) + ; /* search for old charset parameter */ + if(pm /* found one */ && pm->value /* and has value */) + fs_give((void **)&pm->value); /* then forget old charset */ + } +#endif + create_message_body(bod, attach, charset, flow_it); pine_encode_body(*bod); @@ -8028,15 +8117,25 @@ p = (char *) rfc1522_decode((unsigned char *)tmp_20k_buf, SIZEOF_20KBUF, buftmp, &charset); +#ifndef HAVE_ICONV q = (char *) trans_euc_to_2022_jp((unsigned char *)(a->personal)); +#else + q = (char *) trans_with_iconv((unsigned char *)(a->personal), + ps_global->VAR_CHAR_SET, ps_global->VAR_SEND_CHAR_SET); +#endif if(p == tmp_20k_buf /* personal was decoded */ && !strcmp(q, p)){ /* still matches what it was */ fs_give((void **)&a->personal); a->personal = cpystr(old->personal); } +#ifndef HAVE_ICONV else if(ps_global->VAR_CHAR_SET && !strucmp(ps_global->VAR_CHAR_SET, "iso-2022-jp")){ +#else + else if(ps_global->VAR_CHAR_SET && ps_global->VAR_SEND_CHAR_SET + && strucmp(ps_global->VAR_CHAR_SET, ps_global->VAR_SEND_CHAR_SET)){ +#endif /* * Convert EUC (unix Pine) or Shift-JIS (PC-Pine) into * ISO-2022-JP. @@ -8153,7 +8252,13 @@ rfc1522_encode(tmp_20k_buf, SIZEOF_20KBUF, (unsigned char *) pa->description, +#ifndef HAVE_ICONV ps_global->VAR_CHAR_SET)); +#else + ps_global->VAR_SEND_CHAR_SET ? + ps_global->VAR_SEND_CHAR_SET : + ps_global->VAR_CHAR_SET)); +#endif } if(charset) fs_give((void **)&charset); @@ -8217,7 +8322,13 @@ p->body.description = cpystr(rfc1522_encode(tmp_20k_buf, SIZEOF_20KBUF, (unsigned char *) pa->description, +#ifndef HAVE_ICONV ps_global->VAR_CHAR_SET)); +#else + ps_global->VAR_SEND_CHAR_SET ? + ps_global->VAR_SEND_CHAR_SET : + ps_global->VAR_CHAR_SET)); +#endif /* Add name attribute for backward compatibility */ for(parmp = &p->body.parameter; *parmp; ) @@ -8627,6 +8738,22 @@ if(new_encoding != ENCBINARY) new_encoding = ENC8BIT; /* short lines, < 30% 8 bit chars */ } + else if(max_line < 300L || (eight_bit_chars * 100L)/len < 80L){ + /* + * The previous test misses East Asian, Greek and Russian text + * in ISO-8859-7, KOI8-R, EUC-KR, Big5, and GB2312 + * with a lot higher percentage of 8bit chars than Western European text + * in ISO-8859-x. For them, use a relaxed condition for the + * percentage of 8bit chars along with a more strict condition + * on the maximum line length. + */ + can_be_ascii--; + if(body->type == TYPEOTHER) + body->type = TYPETEXT; + + if(new_encoding != ENCBINARY) + new_encoding = ENC8BIT; /* short lines, < 30% 8 bit chars */ + } else{ can_be_ascii--; if(body->type == TYPEOTHER){ @@ -8690,7 +8817,7 @@ else set_mime_charset(pm, can_be_ascii > 0, - charset ? charset : ps_global->VAR_CHAR_SET); + charset); } if(body->encoding == ENCOTHER) @@ -8775,7 +8902,7 @@ set_mime_charset(pm, can_be_ascii > 0, - charset ? charset : ps_global->VAR_CHAR_SET); + charset); if(we_cancel) cancel_busy_alarm(-1); @@ -8806,6 +8933,13 @@ if(pm->value && (!*pm->value || strucmp(pm->value, us_ascii) == 0)) fs_give((void **)&pm->value); +#ifndef HAVE_ICONV + cs = cs ? cs : ps_global->VAR_CHAR_SET; +#else + cs = cs ? cs : ps_global->VAR_SEND_CHAR_SET ? + ps_global->VAR_SEND_CHAR_SET : ps_global->VAR_CHAR_SET; +#endif + /* see if cs is a special non_ascii charset */ for(excl = non_ascii; cs && *excl && strucmp(*excl, cs); excl++) ; @@ -8890,9 +9024,19 @@ char *value, *folded = NULL; +#ifdef HAVE_ICONV + text = (char *) trans_with_iconv(text, ps_global->VAR_CHAR_SET, + ps_global->VAR_SEND_CHAR_SET); +#endif value = encode_header_value(tmp_20k_buf, SIZEOF_20KBUF, (unsigned char *) text, +#ifndef HAVE_ICONV ps_global->VAR_CHAR_SET, +#else + ps_global->VAR_SEND_CHAR_SET ? + ps_global->VAR_SEND_CHAR_SET : + ps_global->VAR_CHAR_SET, +#endif encode_whole_header(field, header)); if(value && value == text){ /* no encoding was done, have to fold */ @@ -8956,6 +9100,11 @@ fs_give((void **)&folded); } +#ifdef HAVE_ICONV + if (text) + fs_give((void **)&text); +#endif + return(ret); } @@ -11033,3 +11182,16 @@ { return(0L); } + +int +call_pico (ps) + struct pico_struct * ps; +{ + int ret; + char * assumed_save = ps_global->VAR_ASSUMED_CHAR_SET; + ps_global->VAR_ASSUMED_CHAR_SET = NULL; +#undef pico + ret = pico(ps); + ps_global->VAR_ASSUMED_CHAR_SET = assumed_save; + return ret; +} diff -urN pine4.61.orig/pine/strings.c pine4.61/pine/strings.c --- pine4.61.orig/pine/strings.c 2004-07-21 01:52:34.000000000 +0200 +++ pine4.61/pine/strings.c 2004-07-21 01:53:21.856461144 +0200 @@ -82,6 +82,9 @@ #include "headers.h" #include "../c-client/utf8.h" +#ifdef HAVE_ICONV +#include +#endif typedef struct role_args { char *ourcharset; @@ -668,6 +671,150 @@ (*d)++; } +/* ------------------------- UTF-8 functions -------------------------- */ + +char * +pine_check_utf8(c, utf_seq) + char *c; + char *utf_seq; +{ + if(!ps_global->VAR_CHAR_SET + || strucmp(ps_global->VAR_CHAR_SET, "UTF-8")) + return c; + return check_utf8(c, utf_seq); +} + +/* + * Like istrncpy but since it's used in the mail index, it also converts + * line feed and tab to space to prevent odd effects in mail index paint. + * + * If charset is UTF-8, do not count bytes for the string width but real + * screen widths. The control char and escape sequence filter is also not + * active inside UTF-8 sequencies there because UTF-8 requires bytes in + * the range from 0x80 to 0x9f to be processed. If a series of not recognized + * characters in the range of 0x80 to 0xff is encountered, '?' is copied. + */ +void +charset_istrncpy(dest, source, width, padding) + char *dest; + char *source; /* const */ + int width; + int padding; +{ + char *cp, *chp, *destp = dest; + int seq = 0, screencols=0; + unsigned char utf_seq[10] = ""; + + for(cp = source; *cp && screencols < width; cp++){ + if((chp = pine_check_utf8(cp, utf_seq)) == NULL){ + seq = 1; + continue; + } + if(chp != cp){ + seq = 0; + screencols++; + if(*chp == ' '){ + if(screencols >= width){ + sstrcpy(&destp, "\342\200\246"); + break; /* UTF-8 points... */ + } + screencols++; + chp++; + } + while(*chp) + *destp++ = *chp++; + *destp = '\0'; + continue; + } + if(seq){ + seq = 0; + screencols++; + *destp++ = '?'; + } + screencols++; + if(*cp && FILTER_THIS(*cp) + && !(*(cp+1) && *cp == ESCAPE && match_escapes(cp+1))){ + *destp++ = '^'; + if(screencols < width){ + screencols++; + *destp++ = (*cp & 0x7f) + '@'; + } + } + else if(*cp == '\n' || *cp == '\t') + *destp++ = ' '; + else + *destp++ = *cp; + *destp = '\0'; + } + if(padding == 1) + while(screencols < width){ + screencols++; + *destp++ = ' '; + } + *destp = '\0'; +} + +/* + * Like istrncpy but do not remove UTF-8 sequencies. + * + * The control char and escape sequence filter is also not active inside + * UTF-8 sequencies because UTF-8 requires bytes in the range from 0x80 + * to 0x9f to be processed. If a series of not recognized characters in + * the range of 0x80 to 0xff is encountered, '?' is copied. + */ +static char * +utf8_istrncpy(dest, cp, length) + char *dest; + char *cp; /* const */ + int length; +{ + char *chp, *destp = dest; + int seq = 0; + unsigned char utf_seq[7] = ""; + + *destp = '\0'; + for(; length > 0 && *cp; cp++){ + if((chp = check_utf8(cp, utf_seq)) == NULL) { + seq = 1; + continue; + } + if(chp != cp){ + seq = 0; + if(*chp == ' ') + chp++; + if(strlen(chp) < length){ + while(*chp && length--) + *destp++ = *chp++; + *destp = '\0'; + continue; + } + while(length--) + *destp++ = '.'; + *destp = '\0'; + break; + } + if(seq){ + *destp++ = '?'; + length--; + seq = 0; + } + if(*cp && FILTER_THIS(*cp) + && !(*(cp+1) && *cp == ESCAPE && match_escapes(cp+1))){ + if(length-- > 0){ + *destp++ = '^'; + + if(length-- > 0) + *destp++ = (*cp & 0x7f) + '@'; + } + } + else if(length-- > 0) + *destp++ = *cp; + *destp = '\0'; + } + + return dest; +} + /*---------------------------------------------------------------------- copy at most n chars of the source string onto the destination string @@ -685,6 +832,10 @@ if(!d || !s) return(NULL); + if(!ps_global->pass_ctrl_chars && ps_global->VAR_CHAR_SET + && !strucmp(ps_global->VAR_CHAR_SET, "UTF-8")) + return utf8_istrncpy(d, s, n); + do if(*s && FILTER_THIS(*s) && !(*(s+1) && *s == ESCAPE && match_escapes(s+1))){ @@ -709,6 +860,204 @@ /* + * * * * * * * Character set translation helpers * * * * * * * * + */ + +#ifdef HAVE_ICONV +static iconv_t +make_iconv_d(toset, fromset) + char *toset; + char *fromset; +{ + iconv_t iconv_d; + char * tocode = NULL; + + /* make private copy of toset and append //TRANSLIT if feasible */ + if(strucmp(toset, "UTF-8")){ + tocode = (char *)fs_get((size_t)(strlen(toset)) + 11); + strcpy(tocode, toset); + strcat(tocode, "//TRANSLIT"); + } + + if((iconv_d = iconv_open(tocode?tocode:toset, fromset)) == (iconv_t)-1){ + dprint(7, (debugfile,"iconv open failed:")); + iconv_d = NULL; + } + dprint(7, (debugfile, "from %s to %s\n", fromset, toset)); + + /* free local copy for //TRANSLIT */ + if(tocode) + fs_give((void **) &tocode); + + return iconv_d; +} + +static +iconv_t +get_iconv_d(tocset, fromcset, local_iconvd) + char *tocset; + char *fromcset; + iconv_t *local_iconvd; +{ + static char *s_fromcset = NULL, *s_tocset = NULL; + static iconv_t s_iconv_d = 0; + + /* no conversion if charset missing, from=ASCII or charets are equal */ + if(!tocset || (fromcset && tocset && !strucmp(fromcset, tocset))) + return NULL; + + dprint(6, (debugfile,"charsets %s -> %s\n", fromcset, tocset)); + + fromcset = resolve_charset_alias(fromcset, + ps_global->VAR_CHAR_SET_ALIASES); + fromcset = resolve_charset_alias(fromcset, + ps_global->VAR_ICONV_ALIASES); + tocset = resolve_charset_alias(tocset, + ps_global->VAR_CHAR_SET_ALIASES); + tocset = resolve_charset_alias(tocset, + ps_global->VAR_ICONV_ALIASES); + + if(local_iconvd){ + *local_iconvd = make_iconv_d(tocset, fromcset); + return NULL; + } + + if(s_iconv_d && !strucmp(fromcset, US_ASCII_CHARSET) + && s_tocset && !strucmp(s_tocset, tocset)) { + dprint(6, (debugfile,"use charsets %s -> %s\n", s_fromcset, tocset)); + iconv(s_iconv_d, NULL, NULL, NULL, NULL); + } else { + + if (ps_global->VAR_ASSUMED_CHAR_SET + && (!fromcset || !*fromcset || !strucmp(UNKNOWN_CHARSET, fromcset))) + fromcset = ps_global->VAR_ASSUMED_CHAR_SET; + + if(!strucmp(fromcset, US_ASCII_CHARSET)) + return NULL; + + if(s_fromcset && strucmp(s_fromcset, fromcset)) + fs_give((void **)&s_fromcset); + + if(s_tocset && strucmp(s_tocset, tocset)) + fs_give((void **)&s_tocset); + + if(!s_fromcset || !s_tocset) { + if (s_iconv_d) + iconv_close(s_iconv_d); + s_fromcset = cpystr(fromcset); + s_tocset = cpystr(tocset); + s_iconv_d = make_iconv_d(tocset, fromcset); + } + else if(s_iconv_d) + iconv(s_iconv_d, NULL, NULL, NULL, NULL); + } + + return s_iconv_d; +} +#endif + +/* + * Like sstrncpy, but with charset conversion(if possible) and null termination. + * *dest is left pointing a the terminating zero byte. It will not write + * more than length bytes. To copy the whole string, the output buffer and + * the length passed must be strlen(source)+1 in order to get a full copy. + * + * fromcset -- charset to convert from + * tocset -- charset ro convert to + * **dest -- address of a pointer which points to the destination buffer + * *src -- address of the start of the rfc2047-decoded source buffer + * len -- maximum number of bytes to write at **dest and increase *dest + * __including__ the terminating null. + */ +void +conv_sstrncpy(fromcset, tocset, dest, src, length) + char *fromcset; + char *tocset; + char **dest; + char *src; + size_t length; +{ +#ifdef HAVE_ICONV + iconv_t iconv_desc = NULL; + + if((!fromcset || !*fromcset) && (!tocset || !*tocset)) + goto noconv; + + fromcset = (fromcset && *fromcset) ? fromcset : ps_global->VAR_CHAR_SET; + tocset = (tocset && *tocset) ? tocset : ps_global->VAR_CHAR_SET; + + iconv_desc = get_iconv_d(tocset, fromcset, NULL); + + if(iconv_desc){ + size_t inbytesleft = strlen(src); + char * buf = *dest; int ret; + + length--; /* reserve a byte for '\0' */ + ret = iconv(iconv_desc, &src, &inbytesleft, dest, &length); + **dest = '\0'; /* terminate the output string */ + dprint(9, (debugfile, "iconv ret=%3d: >%s<\n", ret, buf)); + return; + } +#endif +noconv: + dprint(9, (debugfile,"no convert: >%s<(%d)\n", src, length)); + sstrncpy(dest, src, length); + **dest = '\0'; /* ensure that the output string is terminated */ +} + +unsigned char* +resolve_charset_alias(cs, aliases) + char *cs; + char **aliases; +{ + int i; + char *bdry; + + if(!aliases) + return cs; + for(i=0; aliases[i] && *(aliases[i]); i++) + if(bdry=strchr(aliases[i],':')){ + *bdry='\0'; + if (!strucmp(aliases[i], cs)) { + *bdry=':'; + return *(bdry+1) ? bdry+1 : cs; + } + *bdry=':'; + } + return cs; +} + +#ifdef HAVE_ICONV +/* + * Converts the source string in fromcset to tocset and copy the result + * into allocated space. + * Caller is responsible for freeing the result. + */ +unsigned char * +trans_with_iconv(src, fromcset, tocset) + unsigned char *src; + char *fromcset; + char *tocset; +{ + size_t len; + unsigned char *rv, *pstr; + if (!src) + return NULL; + + dprint(5, (debugfile, "translating from %s to %s\n",fromcset, tocset)); + + /* + * XXX: multiplier of 5 should be sufficient for virtually all + * cases (EUC-JP -> ISO-2022-JP) + */ + len = strlen((char *) src) * 5 + 1; + pstr = rv = (unsigned char *) fs_get(sizeof(char) * len); + conv_sstrncpy(fromcset, tocset, (char **) &pstr, src, len); + return rv; +} +#endif + +/* * Copies the source string into allocated space with the 8-bit EUC codes * (on Unix) or the Shift-JIS (on PC) converted into ISO-2022-JP. * Caller is responsible for freeing the result. @@ -3052,12 +3401,35 @@ char **)); int rfc1522_valtok PROTO((int)); int rfc1522_valenc PROTO((int)); -int rfc1522_valid PROTO((char *, char **, char **, char **, +int rfc1522_valid PROTO((char *, int, char **, char **, char **, char **)); char *rfc1522_8bit PROTO((void *, int)); char *rfc1522_binary PROTO((void *, int)); unsigned char *rfc1522_encoded_word PROTO((unsigned char *, int, char *)); +unsigned char * +rfc2047_decode(d, len, s, charset) + unsigned char *d; + size_t len; /* length of d */ + char *s; + char **charset; +{ + unsigned char *t; + char *assumed_charset = NULL; +#ifdef HAVE_ICONV + /* + * reset VAR_ASSUMED_CHAR_SET temporarily avoid double conversions: + */ + assumed_charset = ps_global->VAR_ASSUMED_CHAR_SET; + if (ps_global->VAR_ASSUMED_CHAR_SET && *(ps_global->VAR_ASSUMED_CHAR_SET)) + ps_global->VAR_ASSUMED_CHAR_SET = UNKNOWN_CHARSET; +#endif + t = rfc1522_decode(d, len, s, charset); +#ifdef HAVE_ICONV + ps_global->VAR_ASSUMED_CHAR_SET = assumed_charset; +#endif + return t; +} /* * rfc1522_decode - try to decode the given source string ala RFC 2047 @@ -3100,6 +3472,7 @@ unsigned long l; int i, described_charset_once = 0; int translate_2022_jp = 0; + unsigned char *cset_r; /* cset with alias resolution */ *d = '\0'; /* init destination */ if(charset) @@ -3107,7 +3480,7 @@ while(s && (sw = strstr(s, RFC1522_INIT))){ /* validate the rest of the encoded-word */ - if(rfc1522_valid(sw, &cset, &enc, &txt, &ew)){ + if(rfc1522_valid(sw, 1, &cset, &enc, &txt, &ew)){ if(!rv) rv = d; /* remember start of dest */ @@ -3126,6 +3499,8 @@ if(lang = strchr(cset, '*')) *lang++ = '\0'; + cset_r =resolve_charset_alias(cset,ps_global->VAR_CHAR_SET_ALIASES); +#ifndef HAVE_ICONV /* Insert text explaining charset if we don't know what it is */ if(F_OFF(F_DISABLE_2022_JP_CONVERSIONS, ps_global) && !strucmp((char *) cset, "iso-2022-jp")){ @@ -3138,7 +3513,7 @@ if(!*charset) /* only write first charset */ *charset = cpystr(cset); } - else if(!described_charset_once++){ + else if(!ps_global->VAR_CHAR_SET && !described_charset_once++){ if(d-rvVAR_CHAR_SET + else +#endif + if((!ps_global->VAR_CHAR_SET || strucmp((char *) cset, ps_global->VAR_CHAR_SET)) && strucmp((char *) cset, "US-ASCII")){ dprint(5, (debugfile, "RFC1522_decode: charset mismatch: %s\n", @@ -3198,12 +3575,8 @@ q = NULL; if(p = rfc822_qprint((unsigned char *)txt, strlen(txt), &l)){ - strncpy((char *) d, (char *) p, len-1-(d-rv)); - d[len-1-(d-rv)] = '\0'; + conv_sstrncpy(cset_r, NULL, &d, p, len-(d-rv)); fs_give((void **)&p); /* free encoded buf */ - d += l; /* advance dest ptr to EOL */ - if(d-rv > len-1) - d = rv+len-1; } else{ if(q) @@ -3224,12 +3597,8 @@ case 'B' : /* 'B' encoding */ case 'b' : if(p = rfc822_base64((unsigned char *) txt, strlen(txt), &l)){ - strncpy((char *) d, (char *) p, len-1-(d-rv)); - d[len-1-(d-rv)] = '\0'; + conv_sstrncpy(cset_r, NULL, &d, p, len-(d-rv)); fs_give((void **)&p); /* free encoded buf */ - d += l; /* advance dest ptr to EOL */ - if(d-rv > len-1) - d = rv+len-1; } else goto bogus; @@ -3254,30 +3623,31 @@ lang[-1] = '*'; } else{ - - /* - * Found intro, but bogus data followed, treat it as normal text. - */ - + /* Found intro, but bogus data followed, copy it and continue */ +#ifdef HAVE_ICONV + if (!rv) + rv=d; /* remember start of dest */ +#endif + l = min(len-(d-rv),(sw-s)+ RFC1522_INIT_L); /* data to copy */ /* if already copying to destn, copy it */ - if(rv){ - strncpy((char *) d, s, - (int) min((l = (sw - s) + RFC1522_INIT_L), - len-1-(d-rv))); - d += l; /* advance d, tie off text */ - if(d-rv > len-1) - d = rv+len-1; - *d = '\0'; - s += l; /* advance s beyond intro */ - } - else - s += ((sw - s) + RFC1522_INIT_L); + if(rv) + conv_sstrncpy(ps_global->VAR_ASSUMED_CHAR_SET, NULL, + (char **)&d, s, (int) l); + s += l; /* advance s beyond intro */ } } - if(rv && *s) /* copy remaining text */ - strncat((char *) rv, s, len - 1 - strlen((char *) rv)); + if (s && *s) { /* copy remaining text */ +#ifdef HAVE_ICONV + if (!rv) + rv=d; /* remember start of dest */ +#endif + if (rv) + conv_sstrncpy(ps_global->VAR_ASSUMED_CHAR_SET, NULL, + (char **)&d, s, len - strlen((char *)rv)); + } +#ifndef HAVE_ICONV /* with iconv, we are done, we have converted during copy */ if(translate_2022_jp){ unsigned char *trans; @@ -3339,6 +3709,7 @@ } } } +#endif if(cs) fs_give((void **) &cs); @@ -3402,16 +3773,20 @@ rfc1522_valenc(c) int c; { - return(!(c == '?' || c == SPACE) && isprint((unsigned char)c)); + return(!(c == '?') && isprint((unsigned char)c)); } /* * rfc1522_valid - validate the given string as to it's rfc1522-ness + * if relaxchk is true, double the maximum length of an encoded word. + * this is necessary to decode overlong encoded words generated by + * numerous incompliant implementations of RFC 2047 (1522). */ int -rfc1522_valid(s, charset, enc, txt, endp) +rfc1522_valid(s, relaxchk, charset, enc, txt, endp) char *s; + int relaxchk; char **charset; char **enc; char **txt; @@ -3423,7 +3798,11 @@ rv = rfc1522_token(c = s+RFC1522_INIT_L, rfc1522_valtok, RFC1522_DLIM, &e) && rfc1522_token(++e, rfc1522_valtok, RFC1522_DLIM, &t) && rfc1522_token(++t, rfc1522_valenc, RFC1522_TERM, &p) - && p - s <= RFC1522_MAXW; + && p - s <= RFC1522_MAXW * (relaxchk ? 2 : 1); + /* + * relax the length condition by doubling the max length of an + * encoded word. It's is needed for some longer encoded words. + */ if(charset) *charset = c; @@ -3474,7 +3853,7 @@ } else if(*p == RFC1522_INIT[0] && !strncmp((char *) p, RFC1522_INIT, RFC1522_INIT_L)){ - if(rfc1522_valid((char *) p, NULL, NULL, NULL, (char **) &q)) + if(rfc1522_valid((char *) p, 0, NULL, NULL, NULL, (char **) &q)) p = q + RFC1522_TERM_L - 1; /* advance past encoded gunk */ } else if(*p == ESCAPE && match_escapes((char *)(p+1))){ @@ -3651,6 +4030,27 @@ CHARSET *from, *to; static CONV_TABLE null_tab; +#ifndef HAVE_ICONV + /* + * Another idea would be to check if the subject had charset tags + * and use this charset (we could use the last charset variable from + * conv_sstrcpy() in mailview.c) + */ + if (ps_global->VAR_ASSUMED_CHAR_SET + && (!from_cs || !*from_cs || !strucmp(UNKNOWN_CHARSET, from_cs) + || !strucmp(US_ASCII_CHARSET, from_cs))) + from_cs = ps_global->VAR_ASSUMED_CHAR_SET; + + /* + * Lets do user-specified charset aliasing before starting work: + */ + from_cs = resolve_charset_alias(from_cs, ps_global->VAR_CHAR_SET_ALIASES); + to_cs = resolve_charset_alias(to_cs, ps_global->VAR_CHAR_SET_ALIASES); +#endif + + /* + * Check if we need conversion for this pair. If not, it's easy: + */ if(!(from_cs && *from_cs && to_cs && *to_cs) || !strucmp(from_cs, to_cs)){ memset(&null_tab, 0, sizeof(null_tab)); null_tab.quality = CV_NO_TRANSLATE_NEEDED; @@ -3682,6 +4082,10 @@ if(ct){ if(ct->table && (ct->convert != gf_convert_utf8_charset)) fs_give((void **) &ct->table); + if(ct->table && (ct->convert == gf_convert_utf8_charset)) { + iconv_close((iconv_t)ct->table); + ct->table = NULL; + } if(ct->from_charset) fs_give((void **) &ct->from_charset); @@ -3696,6 +4100,16 @@ ct->from_charset = cpystr(from_cs); ct->to_charset = cpystr(to_cs); +#ifdef HAVE_ICONV + ct->quality = CV_LOSES_SPECIAL_CHARS; + ct->convert = gf_convert_utf8_charset; + get_iconv_d(to_cs, from_cs, (iconv_t *)&ct->table); +// if (ct->table) +// return(ct); +// Fall thru if iconv fails for some reason, +// but need own convert_iconv_charset function handled everywhere +//#endif +#else ct->quality = CV_NO_TRANSLATE_POSSIBLE; /* @@ -3829,6 +4243,7 @@ } } } +#endif return(ct); }