]>
Commit | Line | Data |
---|---|---|
a34c4fcd JB |
1 | --- rpm-4.4.8/rpmio/strtolocale.c.orig 2005-10-28 04:53:34.000000000 +0200 |
2 | +++ rpm-4.4.8/rpmio/strtolocale.c 2007-02-12 19:16:36.000000000 +0100 | |
3 | @@ -3,130 +3,63 @@ | |
50281fa4 | 4 | */ |
5 | ||
6 | #include "system.h" | |
7 | -#include <wchar.h> | |
8 | +#include <langinfo.h> | |
9 | +#include <iconv.h> | |
10 | #include "debug.h" | |
11 | ||
12 | -/*@access mbstate_t @*/ | |
13 | - | |
14 | -/** | |
15 | - * Wrapper to free(3), hides const compilation noise, permit NULL, return NULL. | |
16 | - * @param p memory to free | |
17 | - * @retval NULL always | |
18 | - */ | |
19 | -/*@unused@*/ static inline /*@null@*/ void * | |
20 | -_free(/*@only@*/ /*@null@*/ const void * p) /*@modifies p@*/ | |
21 | -{ | |
22 | - if (p != NULL) free((void *)p); | |
23 | - return NULL; | |
24 | -} | |
25 | +static char *locale_encoding = NULL; | |
26 | +static int locale_encoding_is_utf8; | |
27 | ||
28 | const char * xstrtolocale(const char *str) | |
29 | { | |
30 | - wchar_t *wstr, *wp; | |
31 | - const unsigned char *cp; | |
32 | - char *cc; | |
33 | - unsigned state = 0; | |
34 | - int c; | |
35 | - int ccl, cca, mb_cur_max; | |
36 | - size_t l; | |
37 | - mbstate_t ps; | |
38 | - int strisutf8 = 1; | |
39 | - int locisutf8 = 1; | |
40 | - | |
41 | - if (!str) | |
42 | - return 0; | |
43 | - if (!*str) | |
44 | - return str; | |
45 | - wstr = (wchar_t *)xmalloc((strlen(str) + 1) * sizeof(*wstr)); | |
46 | - wp = wstr; | |
47 | - cp = (const unsigned char *)str; | |
48 | - while ((c = *cp++) != 0) { | |
49 | - if (state) { | |
50 | - if ((c & 0xc0) != 0x80) { | |
51 | - /* encoding error */ | |
52 | - break; | |
53 | - } | |
54 | - c = (c & 0x3f) | (state << 6); | |
55 | - if (!(state & 0x40000000)) { | |
56 | - /* check for overlong sequences */ | |
57 | - if ((c & 0x820823e0) == 0x80000000) | |
58 | - c = 0xfdffffff; | |
59 | - else if ((c & 0x020821f0) == 0x02000000) | |
60 | - c = 0xfff7ffff; | |
61 | - else if ((c & 0x000820f8) == 0x00080000) | |
62 | - c = 0xffffd000; | |
63 | - else if ((c & 0x0000207c) == 0x00002000) | |
64 | - c = 0xffffff70; | |
65 | - } | |
66 | - } else { | |
67 | - /* new sequence */ | |
68 | - if (c >= 0xfe) | |
69 | - c = 0xfffd; | |
70 | - else if (c >= 0xfc) | |
71 | - c = (c & 0x01) | 0xbffffffc; /* 5 bytes to follow */ | |
72 | - else if (c >= 0xf8) | |
73 | - c = (c & 0x03) | 0xbfffff00; /* 4 */ | |
74 | - else if (c >= 0xf0) | |
75 | - c = (c & 0x07) | 0xbfffc000; /* 3 */ | |
76 | - else if (c >= 0xe0) | |
77 | - c = (c & 0x0f) | 0xbff00000; /* 2 */ | |
78 | - else if (c >= 0xc2) | |
79 | - c = (c & 0x1f) | 0xfc000000; /* 1 */ | |
80 | - else if (c >= 0xc0) | |
81 | - c = 0xfdffffff; /* overlong */ | |
82 | - else if (c >= 0x80) | |
83 | - c = 0xfffd; | |
84 | - } | |
85 | - state = (c & 0x80000000) ? c : 0; | |
86 | - if (state) | |
87 | - continue; | |
88 | - *wp++ = (wchar_t)c; | |
89 | - } | |
90 | -/*@-branchstate@*/ | |
91 | - if (state) { | |
92 | - /* encoding error, assume latin1 */ | |
93 | - strisutf8 = 0; | |
94 | - cp = (const unsigned char *)str; | |
95 | - wp = wstr; | |
96 | - while ((c = *cp++) != 0) { | |
97 | - *wp++ = (wchar_t)c; | |
98 | - } | |
99 | + iconv_t cd; | |
100 | + size_t src_size, dest_size; | |
101 | + char *result, *src, *dest; | |
102 | + | |
103 | + if (locale_encoding == NULL) { | |
104 | + const char *encoding = nl_langinfo(CODESET); | |
105 | + locale_encoding = xmalloc(strlen(encoding) + 11); | |
106 | + sprintf(locale_encoding, "%s//TRANSLIT", encoding); | |
107 | + locale_encoding_is_utf8 = strcasecmp(encoding, "UTF-8") == 0; | |
108 | } | |
109 | -/*@=branchstate@*/ | |
110 | - *wp = 0; | |
111 | - mb_cur_max = MB_CUR_MAX; | |
112 | - memset(&ps, 0, sizeof(ps)); | |
113 | - cc = xmalloc(mb_cur_max); | |
114 | - /* test locale encoding */ | |
115 | - if (wcrtomb(cc, 0x20ac, &ps) != 3 || memcmp(cc, "\342\202\254", 3)) | |
116 | - locisutf8 = 0; | |
117 | - if (locisutf8 == strisutf8) { | |
118 | - wstr = _free(wstr); | |
a34c4fcd | 119 | - cc = _free(cc); /* XXX memory leak plugged. */ |
50281fa4 | 120 | + |
121 | + if (!str || !*str || locale_encoding_is_utf8) | |
122 | return str; | |
123 | + | |
124 | + cd = iconv_open(locale_encoding, "UTF-8"); | |
125 | + if (cd == (iconv_t)-1) | |
126 | + return str; | |
127 | + | |
128 | + src_size = strlen(str); | |
129 | + dest_size = src_size + 1; | |
130 | + result = xmalloc(dest_size); | |
131 | + src = (char *)str; | |
132 | + dest = result; | |
133 | + for(;;) { | |
134 | + size_t status = iconv(cd, &src, &src_size, &dest, &dest_size); | |
135 | + if (status == (size_t)-1) { | |
136 | + size_t dest_offset; | |
137 | + if (errno != E2BIG) { | |
138 | + free(result); | |
139 | + iconv_close(cd); | |
140 | + return str; | |
141 | + } | |
142 | + dest_offset = dest - result; | |
143 | + dest_size += 16; | |
144 | + result = xrealloc(result, dest_offset + dest_size); | |
145 | + dest = result + dest_offset; | |
146 | + } else if (src_size == 0) { | |
147 | + if (src == NULL) break; | |
148 | + src = NULL; | |
149 | + } | |
150 | } | |
151 | - str = _free(str); | |
152 | - memset(&ps, 0, sizeof(ps)); | |
153 | - ccl = cca = 0; | |
154 | - for (wp = wstr; ; wp++) { | |
155 | - l = wcrtomb(cc + ccl, *wp, &ps); | |
156 | - if (*wp == 0) | |
157 | - break; | |
158 | - if (l == (size_t)-1) { | |
159 | - if (*wp < (wchar_t)256 && mbsinit(&ps)) { | |
160 | - cc[ccl] = *wp; | |
161 | - l = 1; | |
162 | - } else | |
163 | - l = wcrtomb(cc + ccl, (wchar_t)'?', &ps); | |
164 | - } | |
165 | - if (l == 0 || l == (size_t)-1) | |
166 | - continue; | |
167 | - ccl += l; | |
168 | - if (ccl > cca) { | |
169 | - cca = ccl + 16; | |
170 | - cc = xrealloc(cc, cca + mb_cur_max); | |
171 | - } | |
172 | + iconv_close(cd); | |
173 | + free((void *)str); | |
174 | + if (dest_size == 0) { | |
175 | + size_t dest_offset = dest - result; | |
176 | + result = xrealloc(result, dest_offset + 1); | |
177 | + dest = result + dest_offset; | |
178 | } | |
179 | - wstr = _free(wstr); | |
180 | - return (const char *)cc; | |
181 | + *dest = '\0'; | |
182 | + return result; | |
183 | } |