#!/usr/bin/python debug = False import os, os.path, re, sys, locale, StringIO, filecmp langs={ 'bg':'windows-1251', 'br':'iso8859-1', 'ca':'iso8859-1', 'cs':'iso8859-2', 'da':'iso8859-1', 'de':'iso8859-1', 'en':'iso8859-1', 'eo':'iso8859-3', 'es':'iso8859-1', 'et':'iso8859-15', 'fi':'iso8859-1', 'fo':'iso8859-1', 'fr':'iso8859-1', 'gl':'iso8859-1', 'he':'iso8859-8', 'id':'iso8859-1', 'is':'iso8859-1', 'it':'iso8859-1', 'ja':'euc-jp', 'ko':'euc-kr', 'nb':'iso8859-1', 'nl':'iso8859-1', 'pl':'iso8859-2', 'pt':'iso8859-1', 'pt_BR':'iso8859-1', 'ro':'iso8859-2', 'ru':'KOI8-R', 'se':'UTF-8', 'sk':'iso8859-2', 'sl':'iso8859-2', 'sv':'iso8859-1', 'tr':'iso8859-9', 'uk':'KOI8-U', 'wa':'iso8859-1', 'zh_CN':'GB2312', 'zh_HK':'BIG5-HKSCS', 'zh_TW':'BIG5', 0:0} def find_encoding(lang): r = re.match("^([^.]+)(\.[^@]+)?$", lang) pure_lang = r.group(1) if r.group(2) == None: try: enc = langs[lang] except KeyError: enc = None else: # strip dot enc = r.group(2)[1:] return (enc, pure_lang) def parse_spec(infile, outfile): success = True re_summary = re.compile("^Summary\(([^\)]+)\):[ \t]+(.*)$") re_utf = re.compile("^utf-8$", re.I) re_desc = re.compile("^(%description.*\s)-l\s+([\S]+)($|\s.*$)") re_proc = re.compile("^%[^{]") re_ignore_proc = re.compile("^%(if|endif)") re_changelog = re.compile("^%changelog") in_desc = False in_changelog = False for l in infile: outline = l if debug: outfile.write("%s, %s, %s" % (in_desc, in_changelog, l)) # %description start r = re_desc.match(l) if r: lang = r.group(2) (enc, pure_lang) = find_encoding(lang) if enc == None: outfile.write("#spec_utf8: unknown lang code in %%description -l %s\n" % (lang)) success = False elif not re_utf.search(enc): in_desc = True outline = "%s-l %s.UTF-8%s\n" % (r.group(1), pure_lang, r.group(3)) elif in_desc: if re_proc.search(l) and not re_ignore_proc.search(l): in_desc = False else: # %description continues if not re_utf.search(enc): try: outline = unicode(l, enc).encode("UTF-8") except UnicodeDecodeError: outfile.write("#spec_utf8: transcoding error %%description -l %s\n" % (pure_lang)) success = False elif in_changelog: try: outline = unicode(l, "UTF-8").encode("UTF-8") except UnicodeDecodeError: try: outline = unicode(l, "ISO-8859-2").encode("UTF-8") except UnicodeDecodeError: outfile.write("#spec_utf8: transcoding next line from Latin2 failed\n") success = False else: # Summary r = re_summary.match(l) if r: lang = r.group(1) (enc, pure_lang) = find_encoding(lang) if enc == None: outfile.write("#spec_utf8: unknown lang code in Summary(%s)\n" % (lang)) success = False elif not re_utf.search(enc): try: desc = unicode(r.group(2), enc).encode("UTF-8") outline = "Summary(%s.UTF-8):\t%s\n" % (pure_lang, desc) except UnicodeDecodeError: outfile.write("#spec_utf8: transcoding error Summary(%s)\n" % (lang)) success = False elif re_changelog.match(l): # %changelog start in_changelog = True if debug: outfile.write("%s, %s\n"% (in_desc, in_changelog)) outfile.write("%s"% (outline, )) return success def main(spec): tmp = spec + '.tmp' print "Converting %s ..." % spec f = open(spec, 'r') sio = StringIO.StringIO() sio.write(f.read()) f.close() sio.seek(0) f = open(tmp, 'w') if not parse_spec(sio, f): sys.stderr.write(" Problems while converting %s.\n" % spec) f.close() if filecmp.cmp(spec, tmp, False): os.remove(tmp) else: os.rename(tmp, spec) if __name__ == "__main__": if len(sys.argv) == 1: sys.stderr.write("Usage: %s \n" % sys.argv[0]) sys.exit(1) for spec in sys.argv[1:]: main(spec)