]>
Commit | Line | Data |
---|---|---|
11774a5d AF |
1 | #!/usr/bin/python |
2 | ||
c143293b AF |
3 | debug = False |
4 | ||
57250e28 | 5 | import os, os.path, re, sys, locale, StringIO, filecmp |
11774a5d AF |
6 | |
7 | langs={ | |
8 | 'bg':'windows-1251', | |
9 | 'br':'iso8859-1', | |
10 | 'ca':'iso8859-1', | |
11 | 'cs':'iso8859-2', | |
12 | 'da':'iso8859-1', | |
13 | 'de':'iso8859-1', | |
14 | 'en':'iso8859-1', | |
15 | 'eo':'iso8859-3', | |
16 | 'es':'iso8859-1', | |
122828a0 | 17 | 'et':'iso8859-15', |
11774a5d AF |
18 | 'fi':'iso8859-1', |
19 | 'fo':'iso8859-1', | |
20 | 'fr':'iso8859-1', | |
21 | 'gl':'iso8859-1', | |
22 | 'he':'iso8859-8', | |
23 | 'id':'iso8859-1', | |
24 | 'is':'iso8859-1', | |
25 | 'it':'iso8859-1', | |
26 | 'ja':'euc-jp', | |
27 | 'ko':'euc-kr', | |
28 | 'nb':'iso8859-1', | |
29 | 'nl':'iso8859-1', | |
30 | 'pl':'iso8859-2', | |
31 | 'pt':'iso8859-1', | |
32 | 'pt_BR':'iso8859-1', | |
33 | 'ro':'iso8859-2', | |
e4fe63e5 | 34 | 'ru':'KOI8-R', |
11774a5d AF |
35 | 'se':'UTF-8', |
36 | 'sk':'iso8859-2', | |
37 | 'sl':'iso8859-2', | |
38 | 'sv':'iso8859-1', | |
39 | 'tr':'iso8859-9', | |
40 | 'uk':'KOI8-U', | |
41 | 'wa':'iso8859-1', | |
42 | 'zh_CN':'GB2312', | |
43 | 'zh_HK':'BIG5-HKSCS', | |
44 | 'zh_TW':'BIG5', | |
45 | 0:0} | |
46 | ||
fb3b78fc AF |
47 | def find_encoding(lang): |
48 | r = re.match("^([^.]+)(\.[^@]+)?$", lang) | |
49 | pure_lang = r.group(1) | |
50 | if r.group(2) == None: | |
51 | try: | |
52 | enc = langs[lang] | |
53 | except KeyError: | |
54 | enc = None | |
55 | else: | |
56 | # strip dot | |
57 | enc = r.group(2)[1:] | |
58 | return (enc, pure_lang) | |
59 | ||
11774a5d | 60 | def parse_spec(infile, outfile): |
a5acbd63 | 61 | success = True |
e9bb2957 | 62 | re_summary = re.compile("^Summary\(([^\)]+)\):[ \t]+(.*)$") |
fb3b78fc | 63 | re_utf = re.compile("^utf-8$", re.I) |
11774a5d | 64 | re_desc = re.compile("^(%description.*\s)-l\s+([\S]+)($|\s.*$)") |
d59adc05 | 65 | re_proc = re.compile("^%[^{]") |
987bcde8 | 66 | re_ignore_proc = re.compile("^%(if|endif)") |
c143293b | 67 | re_changelog = re.compile("^%changelog") |
11774a5d | 68 | in_desc = False |
c143293b | 69 | in_changelog = False |
11774a5d AF |
70 | |
71 | for l in infile: | |
c143293b AF |
72 | outline = l |
73 | if debug: outfile.write("%s, %s, %s" % (in_desc, in_changelog, l)) | |
74 | ||
75 | # %description start | |
76 | r = re_desc.match(l) | |
77 | if r: | |
96b557de AF |
78 | lang = r.group(2) |
79 | (enc, pure_lang) = find_encoding(lang) | |
c143293b | 80 | if enc == None: |
96b557de | 81 | outfile.write("#spec_utf8: unknown lang code in %%description -l %s\n" % (lang)) |
a5acbd63 | 82 | success = False |
c143293b AF |
83 | elif not re_utf.search(enc): |
84 | in_desc = True | |
85 | outline = "%s-l %s.UTF-8%s\n" % (r.group(1), pure_lang, r.group(3)) | |
86 | elif in_desc: | |
987bcde8 | 87 | if re_proc.search(l) and not re_ignore_proc.search(l): |
c143293b AF |
88 | in_desc = False |
89 | else: | |
90 | # %description continues | |
91 | if not re_utf.search(enc): | |
92 | try: | |
93 | outline = unicode(l, enc).encode("UTF-8") | |
94 | except UnicodeDecodeError: | |
96b557de | 95 | outfile.write("#spec_utf8: transcoding error %%description -l %s\n" % (pure_lang)) |
a5acbd63 | 96 | success = False |
c143293b AF |
97 | elif in_changelog: |
98 | try: | |
99 | outline = unicode(l, "UTF-8").encode("UTF-8") | |
100 | except UnicodeDecodeError: | |
101 | try: | |
102 | outline = unicode(l, "ISO-8859-2").encode("UTF-8") | |
103 | except UnicodeDecodeError: | |
104 | outfile.write("#spec_utf8: transcoding next line from Latin2 failed\n") | |
a5acbd63 | 105 | success = False |
c143293b AF |
106 | else: |
107 | # Summary | |
11774a5d AF |
108 | r = re_summary.match(l) |
109 | if r: | |
96b557de AF |
110 | lang = r.group(1) |
111 | (enc, pure_lang) = find_encoding(lang) | |
fb3b78fc | 112 | if enc == None: |
2fb1bcb0 | 113 | outfile.write("#spec_utf8: unknown lang code in Summary(%s)\n" % (lang)) |
a5acbd63 | 114 | success = False |
fb3b78fc | 115 | elif not re_utf.search(enc): |
11774a5d | 116 | try: |
fb3b78fc | 117 | desc = unicode(r.group(2), enc).encode("UTF-8") |
aad7605e | 118 | outline = "Summary(%s.UTF-8):\t%s\n" % (pure_lang, desc) |
11774a5d | 119 | except UnicodeDecodeError: |
96b557de | 120 | outfile.write("#spec_utf8: transcoding error Summary(%s)\n" % (lang)) |
a5acbd63 | 121 | success = False |
c143293b AF |
122 | elif re_changelog.match(l): |
123 | # %changelog start | |
124 | in_changelog = True | |
125 | ||
126 | ||
127 | if debug: outfile.write("%s, %s\n"% (in_desc, in_changelog)) | |
128 | outfile.write("%s"% (outline, )) | |
8b4cb13b | 129 | return success |
11774a5d | 130 | |
57250e28 | 131 | def main(spec): |
132 | tmp = spec + '.tmp' | |
133 | print "Converting %s ..." % spec | |
134 | f = open(spec, 'r') | |
89cef9e8 AM |
135 | sio = StringIO.StringIO() |
136 | sio.write(f.read()) | |
137 | f.close() | |
138 | sio.seek(0) | |
57250e28 | 139 | f = open(tmp, 'w') |
a5acbd63 | 140 | if not parse_spec(sio, f): |
57250e28 | 141 | sys.stderr.write(" Problems while converting %s.\n" % spec) |
89cef9e8 | 142 | f.close() |
57250e28 | 143 | if filecmp.cmp(spec, tmp, False): |
144 | os.remove(tmp) | |
145 | else: | |
146 | os.rename(tmp, spec) | |
11774a5d AF |
147 | |
148 | if __name__ == "__main__": | |
57250e28 | 149 | if len(sys.argv) == 1: |
150 | sys.stderr.write("Usage: %s <spec-files>\n" % sys.argv[0]) | |
151 | sys.exit(1) | |
152 | for spec in sys.argv[1:]: | |
153 | main(spec) |