#!/usr/bin/python
-import os, os.path, re, sys, locale
+debug = False
+
+import os, os.path, re, sys, locale, StringIO
langs={
'bg':'windows-1251',
return (enc, pure_lang)
def parse_spec(infile, outfile):
+ success = True
re_summary = re.compile("^Summary\(([^\)]+)\):\t+(.*)$")
re_utf = re.compile("^utf-8$", re.I)
re_desc = re.compile("^(%description.*\s)-l\s+([\S]+)($|\s.*$)")
- re_proc = re.compile("^%")
+ re_proc = re.compile("^%[^{]")
+ re_ignore_proc = re.compile("^%(if|endif)")
+ re_changelog = re.compile("^%changelog")
in_desc = False
+ in_changelog = False
for l in infile:
- outline = l
+ outline = l
+ if debug: outfile.write("%s, %s, %s" % (in_desc, in_changelog, l))
+
+ # %description start
+ r = re_desc.match(l)
+ if r:
+ lang = r.group(2)
+ (enc, pure_lang) = find_encoding(lang)
+ if enc == None:
+ outfile.write("#spec_utf8: unknown lang code in %%description -l %s\n" % (lang))
+ success = False
+ elif not re_utf.search(enc):
+ in_desc = True
+ outline = "%s-l %s.UTF-8%s\n" % (r.group(1), pure_lang, r.group(3))
+ elif in_desc:
+ if re_proc.search(l) and not re_ignore_proc.search(l):
+ in_desc = False
+ else:
+ # %description continues
+ if not re_utf.search(enc):
+ try:
+ outline = unicode(l, enc).encode("UTF-8")
+ except UnicodeDecodeError:
+ outfile.write("#spec_utf8: transcoding error %%description -l %s\n" % (pure_lang))
+ success = False
+ elif in_changelog:
+ try:
+ outline = unicode(l, "UTF-8").encode("UTF-8")
+ except UnicodeDecodeError:
+ try:
+ outline = unicode(l, "ISO-8859-2").encode("UTF-8")
+ except UnicodeDecodeError:
+ outfile.write("#spec_utf8: transcoding next line from Latin2 failed\n")
+ success = False
+ else:
+ # Summary
r = re_summary.match(l)
if r:
- (enc, pure_lang) = find_encoding(r.group(1))
+ lang = r.group(1)
+ (enc, pure_lang) = find_encoding(lang)
if enc == None:
- outfile.write("#unknow lang code Summary(%s)\n" % (lang))
+ outfile.write("#spec_utf8: unknown lang code in Summary(%s)\n" % (lang))
+ success = False
elif not re_utf.search(enc):
try:
desc = unicode(r.group(2), enc).encode("UTF-8")
- l = "Summary(%s.UTF-8): %s\n" % (pure_lang, desc)
+ outline = "Summary(%s.UTF-8): %s\n" % (pure_lang, desc)
except UnicodeDecodeError:
- outfile.write("#transcoding error Summary(%s)\n" % (lang))
- if in_desc:
- if re_proc.search(l):
- in_desc = False
- else:
- if not re_utf.search(enc):
- try:
- l = unicode(l, enc).encode("UTF-8")
- except UnicodeDecodeError:
- outfile.write("#transcoding error %%description -l %s\n" % (lang))
-
- r = re_desc.match(l)
- if r:
- (enc, pure_lang) = find_encoding(r.group(2))
- if enc == None:
- outfile.write("#unknow lang code in %%description -l %s\n" % (lang))
- elif not re_utf.search(enc):
- in_desc = True
- l = "%s-l %s.UTF-8%s\n" % (r.group(1), pure_lang, r.group(3))
-
- outfile.write(l)
+ outfile.write("#spec_utf8: transcoding error Summary(%s)\n" % (lang))
+ success = False
+ elif re_changelog.match(l):
+ # %changelog start
+ in_changelog = True
+
+
+ if debug: outfile.write("%s, %s\n"% (in_desc, in_changelog))
+ outfile.write("%s"% (outline, ))
+ return success
-def main():
- parse_spec(sys.stdin, sys.stdout)
+def main(argv):
+ print "Converting %s ..." % argv[1]
+ f = open(argv[1], 'r')
+ sio = StringIO.StringIO()
+ sio.write(f.read())
+ f.close()
+ sio.seek(0)
+ f = open(argv[1] + '.tmp', 'w')
+ if not parse_spec(sio, f):
+ sys.stderr.write(" Problems while converting %s.\n" % argv[1])
+ f.close()
+ os.rename(argv[1] + '.tmp', argv[1])
if __name__ == "__main__":
- main()
+ if len(sys.argv) != 2:
+ sys.stderr.write("Usage: %s <spec file>\n" % sys.argv[0])
+ sys.exit(1)
+ main(sys.argv)