X-Git-Url: http://git.pld-linux.org/?a=blobdiff_plain;f=spec_utf8;h=d2c564b6a0be4e28792560485a61f2c1c92397ed;hb=9a67ccc2a2c52b36b0bdd0e88c3a99e61be9ddc5;hp=9dc13c23459ccc3e0ef52401cce509fb22e2768f;hpb=fb3b78fc43a1196909ae889256a98a387ef80b74;p=packages%2Frpm-build-tools.git diff --git a/spec_utf8 b/spec_utf8 old mode 100644 new mode 100755 index 9dc13c2..d2c564b --- a/spec_utf8 +++ b/spec_utf8 @@ -1,6 +1,8 @@ #!/usr/bin/python -import os, os.path, re, sys, locale +debug = False + +import os, os.path, re, sys, locale, StringIO, filecmp langs={ 'bg':'windows-1251', @@ -12,6 +14,7 @@ langs={ 'en':'iso8859-1', 'eo':'iso8859-3', 'es':'iso8859-1', + 'et':'iso8859-15', 'fi':'iso8859-1', 'fo':'iso8859-1', 'fr':'iso8859-1', @@ -55,49 +58,96 @@ def find_encoding(lang): return (enc, pure_lang) def parse_spec(infile, outfile): - re_summary = re.compile("^Summary\(([^\)]+)\):\t+(.*)$") + success = True + re_summary = re.compile("^Summary\(([^\)]+)\):[ \t]+(.*)$") re_utf = re.compile("^utf-8$", re.I) re_desc = re.compile("^(%description.*\s)-l\s+([\S]+)($|\s.*$)") - re_proc = re.compile("^%") + re_proc = re.compile("^%[^{]") + re_ignore_proc = re.compile("^%(if|endif)") + re_changelog = re.compile("^%changelog") in_desc = False + in_changelog = False for l in infile: - outline = l + outline = l + if debug: outfile.write("%s, %s, %s" % (in_desc, in_changelog, l)) + + # %description start + r = re_desc.match(l) + if r: + lang = r.group(2) + (enc, pure_lang) = find_encoding(lang) + if enc == None: + outfile.write("#spec_utf8: unknown lang code in %%description -l %s\n" % (lang)) + success = False + elif not re_utf.search(enc): + in_desc = True + outline = "%s-l %s.UTF-8%s\n" % (r.group(1), pure_lang, r.group(3)) + elif in_desc: + if re_proc.search(l) and not re_ignore_proc.search(l): + in_desc = False + else: + # %description continues + if not re_utf.search(enc): + try: + outline = unicode(l, enc).encode("UTF-8") + except UnicodeDecodeError: + outfile.write("#spec_utf8: transcoding error %%description -l %s\n" % (pure_lang)) + success = False + elif in_changelog: + try: + outline = unicode(l, "UTF-8").encode("UTF-8") + except UnicodeDecodeError: + try: + outline = unicode(l, "ISO-8859-2").encode("UTF-8") + except UnicodeDecodeError: + outfile.write("#spec_utf8: transcoding next line from Latin2 failed\n") + success = False + else: + # Summary r = re_summary.match(l) if r: - (enc, pure_lang) = find_encoding(r.group(1)) + lang = r.group(1) + (enc, pure_lang) = find_encoding(lang) if enc == None: - outfile.write("#unknow lang code Summary(%s)\n" % (lang)) + outfile.write("#spec_utf8: unknown lang code in Summary(%s)\n" % (lang)) + success = False elif not re_utf.search(enc): try: desc = unicode(r.group(2), enc).encode("UTF-8") - l = "Summary(%s.UTF-8): %s\n" % (pure_lang, desc) + outline = "Summary(%s.UTF-8):\t%s\n" % (pure_lang, desc) except UnicodeDecodeError: - outfile.write("#transcoding error Summary(%s)\n" % (lang)) - if in_desc: - if re_proc.search(l): - in_desc = False - else: - if not re_utf.search(enc): - try: - l = unicode(l, enc).encode("UTF-8") - except UnicodeDecodeError: - outfile.write("#transcoding error %%description -l %s\n" % (lang)) + outfile.write("#spec_utf8: transcoding error Summary(%s)\n" % (lang)) + success = False + elif re_changelog.match(l): + # %changelog start + in_changelog = True + + + if debug: outfile.write("%s, %s\n"% (in_desc, in_changelog)) + outfile.write("%s"% (outline, )) + return success - r = re_desc.match(l) - if r: - (enc, pure_lang) = find_encoding(r.group(2)) - if enc == None: - outfile.write("#unknow lang code in %%description -l %s\n" % (lang)) - elif not re_utf.search(enc): - in_desc = True - l = "%s-l %s.UTF-8%s\n" % (r.group(1), pure_lang, r.group(3)) - - outfile.write(l) - -def main(): - parse_spec(sys.stdin, sys.stdout) +def main(spec): + tmp = spec + '.tmp' + print "Converting %s ..." % spec + f = open(spec, 'r') + sio = StringIO.StringIO() + sio.write(f.read()) + f.close() + sio.seek(0) + f = open(tmp, 'w') + if not parse_spec(sio, f): + sys.stderr.write(" Problems while converting %s.\n" % spec) + f.close() + if filecmp.cmp(spec, tmp, False): + os.remove(tmp) + else: + os.rename(tmp, spec) if __name__ == "__main__": - main() - + if len(sys.argv) == 1: + sys.stderr.write("Usage: %s \n" % sys.argv[0]) + sys.exit(1) + for spec in sys.argv[1:]: + main(spec)