[packages/subconv.git] / subconv.py

#!/usr/bin/env python
#
# subconv
# divx subtitles converter by Pawel Stolowski, Julien Lerouge
# mpl2 by Grzegorz Zyla
#
# Maintained at http://cvs.pld-linux.org/cgi-bin/cvsweb.cgi/packages/subconv/
#
# Released under terms of GNU GPL
#

import re
import sys
import getopt
import string
import os
import subprocess
import urllib2

from xml.dom import minidom

try:
    from hashlib import md5 as md5
except ImportError:
    from md5 import md5

def usage():
    sys.stderr.write("""
 subconv -- DivX subtitles converter by Pawel Stolowski, Julien Lerouge

 Usage: subconv [-i fmt|-o fmt|-a sec|-s sec|-S h:m:s[,h:m:s,...]] input [output1, output2, ...]

     -i fmt        input format (one of: srt, tmp, mdvd, sub2, mpl2, auto; auto by default)
     -o fmt        output format (one of: tmp, srt; srt by default)
     -f fps        adjust fps rate for microdvd input subtitles (auto by default)
     -a sec        adjust subtitle delay in seconds (add)
     -s sec        adjust subtitle delay in seconds (sub)
     -S h:m:s,...  split subtitles in selected position(s); additional output file names must be specified
     -h            this help

     """)


def detect_file_fps(file):
    """
    Detect the FPS for a given media file
    input: file name
    returns: FPS
    """
    def mediainfo_fps(file):
        f = None
        try:
            f = subprocess.Popen(['mediainfo', '--Inform=Video;%FrameRate%', file], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        except OSError, e:
            return False
        (out, err) = f.communicate()
        if out:
            out = out.strip()
        if not out:
            return False
        return float(out)

    def file_fps(file):
        f = None
        try:
            f = subprocess.Popen(['file', file], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        except OSError, e:
            return False
        (out, err) = f.communicate()
        if out:
            out = out.strip()
        if not out:
            return False
        re_fps = re.compile(r'^.*, (\d+\.{0,1}\d{0,}) fps,.*')
        m = re_fps.match(out)
        if m:
            fps = m.group(1)
            if fps == '23.98':
                fps = 24/1.001
            return float(fps)
        return False

    def napiprojekt_fps(file):
        try:
            d = md5()
            d.update(open(file).read(10485760))
            digest = d.hexdigest()
            url = "http://napiprojekt.pl/api/api.php?mode=file_info&client=pynapi&id=%s" % (urllib2.quote(digest))
            f = urllib2.urlopen(url)
            fps_xml = f.read()
            f.close()
            xml = minidom.parseString(fps_xml)
            name = xml.getElementsByTagName("fps")
            fps = " ".join(t.nodeValue for t in name[0].childNodes if t.nodeType == t.TEXT_NODE)
        except Exception, e:
            return False
        if fps:
            return float(fps)
        return False

    print "Guessing fps",
    dir = os.path.dirname(file)
    if not dir:
        dir = '.'
    file = os.path.basename(file)
    if len(file) <= 4:
        return False
    mfile = file[:-4]
    ref = re.compile(r'^' + mfile + '.*')
    for file in os.listdir(dir):
        if not ref.match(file):
            continue
        fps = mediainfo_fps(file)
        if not fps:
            fps = file_fps(file)
        if not fps:
            fps = napiprojekt_fps(file)
        if fps:
            print "from file %s: %.3f" % (file, fps)
            return fps
    print " .. unknown"
    return False

def detect_fps(list):
    """
    Detect the FPS for a given input file
    input: contents of a file as list
    returns: FPS
    """
    sys.stderr.write("FPS guessing, here are approximate length of file for several FPS :\n")
    most_current = [24/1.001, 25.0, 30/1.001 ]

    re_mdvd = re.compile("^\{(\d+)\}\{(\d*)\}\s*(.*)")
    count = len(list) - 1
    m = re_mdvd.match(list[count])
    while not m:
        count = count - 1
        m = re_mdvd.match(list[count])
    last = int(m.group(2))

    for i in range(0,len(most_current)):
        sys.stderr.write("%s %.3f Fps -> " % (str(i), most_current[i]))
        tot_sec = int(last / most_current[i])
        min = tot_sec / 60
        sec = tot_sec % 60
        sys.stderr.write(str(min)+" min "+str(sec)+"sec\n")
    sys.stderr.write("Choice : ")
    choice=int(sys.stdin.readline().strip())
    if choice>=0 and choice<len(most_current):
        return most_current[choice]
    else:
        sys.stderr.write("Bad choice\n")
        sys.exit(1)


def detect_format(list):
    """
    Detect the format of input subtitles file.
    input: contents of a file as list
    returns: format (srt, tmp, mdvd) or "" if unknown
    """
    re_mdvd = re.compile("^\{(\d+)\}\{(\d*)\}\s*(.*)")
    re_srt = re.compile("^(\d+):(\d+):(\d+),\d+\s*-->.*")
    re_tmp = re.compile("^(\d+):(\d+):(\d+):(.*)")
    re_sub2 = re.compile("^(\d+):(\d+):(\d+)\.\d+\s*\,.*")
    re_mpl2 = re.compile("^\[(\d+)\]\[(\d+)\]\s*(.*)")
    for line in list:
        if re_mdvd.match(line):
            return "mdvd"
        elif re_srt.match(line):
            return "srt"
        elif re_tmp.match(line):
            return "tmp"
        elif re_sub2.match(line):
            return "sub2"
        elif re_mpl2.match(line):
            return "mpl2"			
    return ""


def read_mdvd(list, fps):
    """
    Read micro-dvd subtitles.
    input: contents of a file as list
    returns: list of subtitles in form: [[time_start in secs, time_end in secs, line1, ...],....]
    """
    re1 = re.compile("^\{(\d+)\}\{(\d*)\}\s*(.*)")

    subtitles = []
    while len(list)>0:
        x = list.pop(0)
        m = re1.match(x, 0)
        if m:
            time1 = int(m.group(1))
            subt = [ time1 / fps ]
            time2 = m.group(2)
            if time2 == '':
                time2 = int(time1) + 20
            subt.append(int(time2) / fps)
            texts = m.group(3).strip().split("|")
            for i in range(len(texts)):
                text = texts[i]
                if text.lower().startswith('{c:') or text.lower().startswith('{y:'):
                    end_marker = text.index('}')
                    if end_marker:
                        text = text[end_marker + 1:]
                        texts[i] = text
            subt.extend(texts)
            subtitles.append(subt)
    return subtitles

def read_mpl2(list):
    """
    Read mpl2 subtitles
    input: contents of a file as list
    returns: list of subtitles in form: [[time_start in secs, time_end is secs, line1, ...],.....]
    """
    re1 = re.compile("^\[(\d+)\]\[(\d+)\]\s*(.*)")
    subtitles = []
    while len(list)>0:
        m = re1.match(list.pop(0),0);
        if m:
            subt = [int(m.group(1))*0.1]
            subt.append(int(m.group(2))*0.1)
            subt.extend(m.group(3).strip().split("|"))
            subtitles.append(subt)
    return subtitles

def read_sub2(list):
    """
    Reads subviewer 2.0 format subtitles, e.g. :
        00:01:54.75,00:01:58.54
        You shall not pass!
    input: contents of a file as list
    returns: list of subtitles in form: [[time_dep, time_end, line1, ...],[time_dep, time_end, line1, ...],....]
    """
    re1 = re.compile("^(\d+):(\d+):(\d+)\.(\d+)\s*\,\s*(\d+):(\d+):(\d+)\.(\d+).*$")
    subtitles = []
    try:
        while len(list)>0:
            m = re1.match(list.pop(0), 0)
            if m:
                subt = [int(m.group(1))*3600 + int(m.group(2))*60 + int(m.group(3)) + int(m.group(4))/100.0]
                subt.append(int(m.group(5))*3600 + int(m.group(6))*60 + int(m.group(7)) + int(m.group(8))/100.0)
                l = list.pop(0).strip()
                lines = l.split("[br]")
                for i in range(0,len(lines)):
                    subt.append(lines[i])
                subtitles.append(subt)
    except IndexError:
        sys.stderr.write("Warning: it seems like input file is damaged or too short.\n")
    return subtitles

def read_srt(list):
    """
    Reads srt subtitles.
    input: contents of a file as list
    returns: list of subtitles in form: [[time_dep, time_end, line1, ...],[time_dep, time_end, line1, ...],....]
    """
    re1 = re.compile("^(\d+)\s*$")
    re2 = re.compile("^(\d+):(\d+):(\d+),(\d+)\s*-->\s*(\d+):(\d+):(\d+),(\d+).*$")
    re3 = re.compile("^\s*$")
    subtitles = []
    try:
        while len(list)>0:
            if re1.match(list.pop(0), 0):
                m = re2.match(list.pop(0), 0)
                if m:
                    subt = [int(m.group(1))*3600 + int(m.group(2))*60 + int(m.group(3)) + int(m.group(4))/1000.0]
                    subt.append(int(m.group(5))*3600 + int(m.group(6))*60 + int(m.group(7)) + int(m.group(8))/1000.0)
                    l = list.pop(0)
                    while not re3.match(l, 0):
                        #subt.append(string.replace(l[:-1], "\r", ""))
                        subt.append(l.strip())
                        l = list.pop(0)
                    subtitles.append(subt)
    except IndexError:
        sys.stderr.write("Warning: it seems like input file is damaged or too short.\n")
    return subtitles

def read_tmp(list):
    """
    Reads tmplayer (tmp) subtitles.
    input: contents of a file as list
    returns: list of subtitles in form: [[time_dep, time_end, line1, ...],[time_dep, time_end, line1, ...],....]
    """
    re1 = re.compile("^(\d+):(\d+):(\d+):(.*)")
    subtitles = []
    subs={}
    while len(list)>0:
        m = re1.match(list.pop(0), 0)
        if m:
            time = int(m.group(1))*3600 + int(m.group(2))*60 + int(m.group(3))
            if subs.has_key(time) :
                subs[time].extend(m.group(4).strip().split("|"))
            else:
                subs[time] = m.group(4).strip().split("|")

    times = subs.keys()
    times.sort()
    for i in range(0,len(times)):
        next_time = 1;
        while not subs.has_key(times[i]+next_time) and next_time < 4 :
            next_time = next_time + 1
        subt = [ times[i] , times[i] + next_time]
        subt.extend(subs[times[i]])
        subtitles.append(subt)
    return subtitles

def to_tmp(list):
    """
    Converts list of subtitles (internal format) to tmp format
    """
    outl = []
    for l in list:
        secs = l[0]
        h = int(secs/3600)
        m = int(int(secs%3600)/60)
        s = int(secs%60)
        outl.append("%.2d:%.2d:%.2d:%s\n" % (h,m,s,"|".join(l[2:])))
    return outl


def to_srt(list):
    """
    Converts list of subtitles (internal format) to srt format
    """
    outl = []
    count = 1
    for l in list:
        secs1 = l[0]
        h1 = int(secs1/3600)
        m1 = int(int(secs1%3600)/60)
        s1 = int(secs1%60)
        f1 = (secs1 - int(secs1))*1000
        secs2 = l[1]
        h2 = int(secs2/3600)
        m2 = int(int(secs2%3600)/60)
        s2 = int(secs2%60)
        f2 = (secs2 - int(secs2))*1000
        outl.append("%d\n%.2d:%.2d:%.2d,%.3d --> %.2d:%.2d:%.2d,%.3d\n%s\n\n" % (count,h1,m1,s1,f1,h2,m2,s2,f2,"\n".join(l[2:])))
        count = count + 1
    return outl


def sub_add_offset(list, off):
    """
    Adds an offset (in seconds, may be negative) to all subtitles in the list
    input: subtitles (internal format)
    returns: new subtitles (internal format)
    """
    outl = []
    for l in list:
        l[0] += off
        l[1] += off
        if l[0] < 0:
            sys.stderr.write("Warning, negative offset too high, subs beginning at 00:00:00\n")
            l[0] = 0
        if l[1] < 0:
            sys.stderr.write("Warning, negative offset too high, subs beginning at 00:00:00\n")
            l[1] = 0
        outl.append(l)
    return outl

def sub_split(sub, times):
    """
    Splits subtitles
    input: subtitles (internal format) and split positions (in seconds)
    returns: a list of lists with new subtitles
    """
    pos = 0
    num = len(sub)

    while pos<num and sub[pos][0]<times[0]:
        pos += 1

    lists = [ sub[:pos] ]    # [subtitles1, subtitles2, ...]

    times.append(99999999)
    minussec = times.pop(0)

    for second in times:
        outl = []
        while pos<num and sub[pos][0]<second:
            subline = [sub[pos][0]-minussec] + [sub[pos][1]-minussec] + sub[pos][2:]
            if subline[0] < 0:
                subline[0] = 0
            if subline[1] < 0:
                subline[1] = 0
            outl.append(subline)
            pos += 1
        lists.append(outl)
        minussec = second
    return lists

def sub_fix_times(sub):
    for i in range( len(sub) - 2 ):
	approx = 1 + ( len(" ".join(sub[i][2:])) / 10 )			# 10 char per second
	# end < start or end > start++ or displayed longer then 20s
	if (sub[i][1] < sub[i][0]) or (sub[i][1] > sub[i + 1][0]) or ( sub[i][1] - sub[i][0] > 20):	
	    if ( sub[i][0] + approx ) < sub[i + 1][0]:
		sub[i][1] = sub[i][0] + approx
	    else:
		sub[i][1] = sub[i + 1][0] - 1.0 / 10
    return sub

def get_split_times(str):
    """
    Converts comma-separated string of "xx:yy:zz,xx:yy:zz,..." times to list of times (in seconds)
    input: string of comma-separated xx:yy:zz time positions
    returns: list of times
    """
    tlist = str.split(",")
    re1 = re.compile("^(\d+):(\d+):(\d+)")
    times = []
    for t in tlist:
        m = re1.match(t, 0)
        if not m:
            sys.stderr.write("Unknown time format\n")
            return []
        times.append(int(m.group(1))*3600 + int(m.group(2))*60 + int(m.group(3)))
    return times		


def read_subs(file,fmt,fps):
    """
    Reads subtitles fomr file, using format fmt
    input : file name, format (srt,mdvd,tmp,auto)
    returns: list of subtitles in form: [[time in secs, line1, ...],[time in secs, line1, ...],....]
    """
    src = open(file,'r')
    subs = src.readlines()
    src.close()
    if fmt == "tmp":
        return read_tmp(subs)
    elif fmt == "srt":
        return read_srt(subs)
    elif fmt == "mdvd":
        if fps == -1:
            fps = detect_file_fps(file)
            if not fps:
                fps = detect_fps(subs)
        return read_mdvd(subs, fps)
    elif fmt == "auto":
	fmt = detect_format(subs)
        sys.stderr.write("Guessing subs format .. %s\n" % fmt )
        return read_subs(file,fmt,fps)
    elif fmt == "sub2":
        return read_sub2(subs)
    elif fmt == "mpl2":
        return read_mpl2(subs)
    else:
        sys.stderr.write("Input format not specified/recognized\n")
        sys.exit(1)


#
#-----------------------------------------------------------------------------------------

def main(argv=sys.argv):

    outfunc = {
      "srt":to_srt,
      "tmp":to_tmp}

    infmt = "auto"
    outfmt = "srt"
    subdelay = 0
    fps = -1
    #out_to_file == 1 => output to a file, 0 => output stdout, -1 => Split, output to stdout not allowed
    out_to_file = 0

    try:
	opts, args = getopt.getopt(sys.argv[1:], 'i:o:a:s:S:f:h')
    except getopt.GetoptError:
	usage()
        sys.exit(2)

    splittimes = []

    for opt, arg in opts:
	if opt == '-o':
    	    if outfunc.has_key(arg):
        	outfmt = arg
    	    else:
        	sys.stderr.write("Unknown output format.\n")
        	sys.exit(1)
        elif opt == '-i':
	    infmt = arg
        elif opt == '-a':
	    subdelay = float(arg)
        elif opt == '-s':
	    subdelay = -float(arg)
        elif opt == '-S':
	    out_to_file = -1
            splittimes = get_split_times(arg)
        elif opt == '-f':
	    fps = float(arg)
        elif opt == '-h':
	    usage()
    	    sys.exit(1)

    # number of file names must be 2 + number of split-points
    if len(args) == len(splittimes)+2:
	out_to_file = 1
    elif len(args) == len(splittimes)+1 and out_to_file != -1:
	out_to_file = 0
    else:
	sys.stderr.write("Too few file names given!\n")
        usage()
	sys.exit(1)

    # read file
    sub = read_subs(args.pop(0),infmt,fps)

    # apply DELAY
    if subdelay != 0:
	sub = sub_add_offset(sub, subdelay)

    # apply SPLIT
    if len(splittimes) == 0:
	sub_list = [sub]
    else:
	sub_list = sub_split(sub, splittimes)

    # save file(S)
    for nsub in sub_list:
	s = outfunc[outfmt](nsub)
	if out_to_file == 1:
    	    dst = open(args.pop(0), 'w')
    	    dst.writelines(s)
    	    dst.close()
	else:
    	    sys.stdout.writelines(s)

if __name__ == "__main__":
    ret = None
    try:
        ret = main()
    except (KeyboardInterrupt, SystemExit):
        print >> sys.stderr, "%s: Interrupted, aborting." % os.path.basename(sys.argv[0])
    sys.exit(ret)
Commit	Line	Data
6e604697 AM	1	#!/usr/bin/env python
6e604697 AM	2	#
109e065c AM	3	# subconv
	4	# divx subtitles converter by Pawel Stolowski, Julien Lerouge
	5	# mpl2 by Grzegorz Zyla
	6	#
	7	# Maintained at http://cvs.pld-linux.org/cgi-bin/cvsweb.cgi/packages/subconv/
6e604697 AM	8	#
	9	# Released under terms of GNU GPL
	10	#
	11
d1b6ba95 AM	12	import re
	13	import sys
	14	import getopt
	15	import string
	16	import os
	17	import subprocess
	18	import urllib2
	19
	20	from xml.dom import minidom
	21
	22	try:
	23	from hashlib import md5 as md5
	24	except ImportError:
	25	from md5 import md5
6e604697 AM	26
6e604697 AM	27	def usage():
7f429bde	28	sys.stderr.write("""
109e065c	29	subconv -- DivX subtitles converter by Pawel Stolowski, Julien Lerouge
6e604697 AM	30
6e604697 AM	31	Usage: subconv [-i fmt\|-o fmt\|-a sec\|-s sec\|-S h:m:s[,h:m:s,...]] input [output1, output2, ...]
7f429bde	32
4fa28d7b	33	-i fmt input format (one of: srt, tmp, mdvd, sub2, mpl2, auto; auto by default)
6e604697 AM	34	-o fmt output format (one of: tmp, srt; srt by default)
	35	-f fps adjust fps rate for microdvd input subtitles (auto by default)
	36	-a sec adjust subtitle delay in seconds (add)
	37	-s sec adjust subtitle delay in seconds (sub)
	38	-S h:m:s,... split subtitles in selected position(s); additional output file names must be specified
	39	-h this help
	40
7f429bde	41	""")
6e604697 AM	42
6e604697 AM	43
ec06e0aa AM	44	def detect_file_fps(file):
	45	"""
	46	Detect the FPS for a given media file
	47	input: file name
	48	returns: FPS
	49	"""
	50	def mediainfo_fps(file):
4e5dbeb3 AM	51	f = None
	52	try:
	53	f = subprocess.Popen(['mediainfo', '--Inform=Video;%FrameRate%', file], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
	54	except OSError, e:
	55	return False
ec06e0aa	56	(out, err) = f.communicate()
a9d17b42 AM	57	if out:
a9d17b42 AM	58	out = out.strip()
ec06e0aa AM	59	if not out:
	60	return False
	61	return float(out)
	62
	63	def file_fps(file):
4e5dbeb3 AM	64	f = None
	65	try:
	66	f = subprocess.Popen(['file', file], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
	67	except OSError, e:
	68	return False
ec06e0aa	69	(out, err) = f.communicate()
a9d17b42 AM	70	if out:
a9d17b42 AM	71	out = out.strip()
ec06e0aa AM	72	if not out:
	73	return False
	74	re_fps = re.compile(r'^., (\d+\.{0,1}\d{0,}) fps,.')
	75	m = re_fps.match(out)
	76	if m:
21acc1c6 AM	77	fps = m.group(1)
	78	if fps == '23.98':
	79	fps = 24/1.001
	80	return float(fps)
ec06e0aa AM	81	return False
ec06e0aa AM	82
d1b6ba95 AM	83	def napiprojekt_fps(file):
	84	try:
	85	d = md5()
	86	d.update(open(file).read(10485760))
	87	digest = d.hexdigest()
	88	url = "http://napiprojekt.pl/api/api.php?mode=file_info&client=pynapi&id=%s" % (urllib2.quote(digest))
	89	f = urllib2.urlopen(url)
	90	fps_xml = f.read()
	91	f.close()
	92	xml = minidom.parseString(fps_xml)
	93	name = xml.getElementsByTagName("fps")
	94	fps = " ".join(t.nodeValue for t in name[0].childNodes if t.nodeType == t.TEXT_NODE)
	95	except Exception, e:
	96	return False
	97	if fps:
	98	return float(fps)
	99	return False
	100
ec06e0aa	101	print "Guessing fps",
ec06e0aa AM	102	dir = os.path.dirname(file)
	103	if not dir:
	104	dir = '.'
b5bbe496 AM	105	file = os.path.basename(file)
	106	if len(file) <= 4:
	107	return False
ec06e0aa AM	108	mfile = file[:-4]
	109	ref = re.compile(r'^' + mfile + '.*')
	110	for file in os.listdir(dir):
	111	if not ref.match(file):
	112	continue
	113	fps = mediainfo_fps(file)
	114	if not fps:
	115	fps = file_fps(file)
d1b6ba95 AM	116	if not fps:
d1b6ba95 AM	117	fps = napiprojekt_fps(file)
ec06e0aa AM	118	if fps:
	119	print "from file %s: %.3f" % (file, fps)
	120	return fps
	121	print " .. unknown"
	122	return False
6e604697 AM	123
6e604697 AM	124	def detect_fps(list):
7f429bde AM	125	"""
	126	Detect the FPS for a given input file
	127	input: contents of a file as list
	128	returns: FPS
	129	"""
	130	sys.stderr.write("FPS guessing, here are approximate length of file for several FPS :\n")
e13fc614	131	most_current = [24/1.001, 25.0, 30/1.001 ]
7f429bde	132
109e065c	133	re_mdvd = re.compile("^\{(\d+)\}\{(\d)\}\s(.*)")
7f429bde AM	134	count = len(list) - 1
	135	m = re_mdvd.match(list[count])
	136	while not m:
	137	count = count - 1
	138	m = re_mdvd.match(list[count])
	139	last = int(m.group(2))
	140
	141	for i in range(0,len(most_current)):
ec06e0aa	142	sys.stderr.write("%s %.3f Fps -> " % (str(i), most_current[i]))
7f429bde AM	143	tot_sec = int(last / most_current[i])
	144	min = tot_sec / 60
	145	sec = tot_sec % 60
	146	sys.stderr.write(str(min)+" min "+str(sec)+"sec\n")
	147	sys.stderr.write("Choice : ")
	148	choice=int(sys.stdin.readline().strip())
	149	if choice>=0 and choice<len(most_current):
	150	return most_current[choice]
	151	else:
	152	sys.stderr.write("Bad choice\n")
	153	sys.exit(1)
6e604697 AM	154
	155
	156	def detect_format(list):
7f429bde AM	157	"""
	158	Detect the format of input subtitles file.
	159	input: contents of a file as list
	160	returns: format (srt, tmp, mdvd) or "" if unknown
	161	"""
109e065c	162	re_mdvd = re.compile("^\{(\d+)\}\{(\d)\}\s(.*)")
7f429bde AM	163	re_srt = re.compile("^(\d+):(\d+):(\d+),\d+\s-->.")
	164	re_tmp = re.compile("^(\d+):(\d+):(\d+):(.*)")
	165	re_sub2 = re.compile("^(\d+):(\d+):(\d+)\.\d+\s\,.")
	166	re_mpl2 = re.compile("^\[(\d+)\]\[(\d+)\]\s(.)")
ab0ac020	167	for line in list:
7f429bde	168	if re_mdvd.match(line):
7f429bde AM	169	return "mdvd"
7f429bde AM	170	elif re_srt.match(line):
7f429bde AM	171	return "srt"
7f429bde AM	172	elif re_tmp.match(line):
7f429bde AM	173	return "tmp"
7f429bde AM	174	elif re_sub2.match(line):
7f429bde AM	175	return "sub2"
7f429bde AM	176	elif re_mpl2.match(line):
7f429bde AM	177	return "mpl2"
7f429bde AM	178	return ""
6e604697 AM	179
	180
	181	def read_mdvd(list, fps):
7f429bde AM	182	"""
	183	Read micro-dvd subtitles.
	184	input: contents of a file as list
	185	returns: list of subtitles in form: [[time_start in secs, time_end in secs, line1, ...],....]
	186	"""
109e065c	187	re1 = re.compile("^\{(\d+)\}\{(\d)\}\s(.*)")
7f429bde AM	188
	189	subtitles = []
	190	while len(list)>0:
	191	x = list.pop(0)
	192	m = re1.match(x, 0)
	193	if m:
	194	time1 = int(m.group(1))
e13fc614	195	subt = [ time1 / fps ]
7f429bde AM	196	time2 = m.group(2)
	197	if time2 == '':
	198	time2 = int(time1) + 20
e13fc614	199	subt.append(int(time2) / fps)
7f429bde AM	200	texts = m.group(3).strip().split("\|")
	201	for i in range(len(texts)):
	202	text = texts[i]
	203	if text.lower().startswith('{c:') or text.lower().startswith('{y:'):
	204	end_marker = text.index('}')
	205	if end_marker:
	206	text = text[end_marker + 1:]
	207	texts[i] = text
	208	subt.extend(texts)
	209	subtitles.append(subt)
	210	return subtitles
6e604697	211
4fa28d7b	212	def read_mpl2(list):
7f429bde AM	213	"""
	214	Read mpl2 subtitles
	215	input: contents of a file as list
	216	returns: list of subtitles in form: [[time_start in secs, time_end is secs, line1, ...],.....]
	217	"""
	218	re1 = re.compile("^\[(\d+)\]\[(\d+)\]\s(.)")
	219	subtitles = []
	220	while len(list)>0:
	221	m = re1.match(list.pop(0),0);
	222	if m:
	223	subt = [int(m.group(1))*0.1]
	224	subt.append(int(m.group(2))*0.1)
	225	subt.extend(m.group(3).strip().split("\|"))
	226	subtitles.append(subt)
	227	return subtitles
	228
6e604697	229	def read_sub2(list):
7f429bde AM	230	"""
	231	Reads subviewer 2.0 format subtitles, e.g. :
	232	00:01:54.75,00:01:58.54
	233	You shall not pass!
	234	input: contents of a file as list
	235	returns: list of subtitles in form: [[time_dep, time_end, line1, ...],[time_dep, time_end, line1, ...],....]
	236	"""
	237	re1 = re.compile("^(\d+):(\d+):(\d+)\.(\d+)\s\,\s(\d+):(\d+):(\d+)\.(\d+).*$")
	238	subtitles = []
	239	try:
	240	while len(list)>0:
	241	m = re1.match(list.pop(0), 0)
	242	if m:
	243	subt = [int(m.group(1))3600 + int(m.group(2))60 + int(m.group(3)) + int(m.group(4))/100.0]
	244	subt.append(int(m.group(5))3600 + int(m.group(6))60 + int(m.group(7)) + int(m.group(8))/100.0)
	245	l = list.pop(0).strip()
	246	lines = l.split("[br]")
	247	for i in range(0,len(lines)):
	248	subt.append(lines[i])
	249	subtitles.append(subt)
	250	except IndexError:
	251	sys.stderr.write("Warning: it seems like input file is damaged or too short.\n")
	252	return subtitles
6e604697 AM	253
6e604697 AM	254	def read_srt(list):
7f429bde AM	255	"""
	256	Reads srt subtitles.
	257	input: contents of a file as list
	258	returns: list of subtitles in form: [[time_dep, time_end, line1, ...],[time_dep, time_end, line1, ...],....]
	259	"""
	260	re1 = re.compile("^(\d+)\s*$")
	261	re2 = re.compile("^(\d+):(\d+):(\d+),(\d+)\s-->\s(\d+):(\d+):(\d+),(\d+).*$")
	262	re3 = re.compile("^\s*$")
	263	subtitles = []
	264	try:
	265	while len(list)>0:
	266	if re1.match(list.pop(0), 0):
	267	m = re2.match(list.pop(0), 0)
	268	if m:
	269	subt = [int(m.group(1))3600 + int(m.group(2))60 + int(m.group(3)) + int(m.group(4))/1000.0]
	270	subt.append(int(m.group(5))3600 + int(m.group(6))60 + int(m.group(7)) + int(m.group(8))/1000.0)
	271	l = list.pop(0)
	272	while not re3.match(l, 0):
	273	#subt.append(string.replace(l[:-1], "\r", ""))
	274	subt.append(l.strip())
	275	l = list.pop(0)
	276	subtitles.append(subt)
	277	except IndexError:
	278	sys.stderr.write("Warning: it seems like input file is damaged or too short.\n")
	279	return subtitles
6e604697 AM	280
6e604697 AM	281	def read_tmp(list):
7f429bde AM	282	"""
	283	Reads tmplayer (tmp) subtitles.
	284	input: contents of a file as list
	285	returns: list of subtitles in form: [[time_dep, time_end, line1, ...],[time_dep, time_end, line1, ...],....]
	286	"""
	287	re1 = re.compile("^(\d+):(\d+):(\d+):(.*)")
	288	subtitles = []
	289	subs={}
	290	while len(list)>0:
	291	m = re1.match(list.pop(0), 0)
	292	if m:
	293	time = int(m.group(1))3600 + int(m.group(2))60 + int(m.group(3))
	294	if subs.has_key(time) :
	295	subs[time].extend(m.group(4).strip().split("\|"))
	296	else:
	297	subs[time] = m.group(4).strip().split("\|")
	298
	299	times = subs.keys()
	300	times.sort()
	301	for i in range(0,len(times)):
	302	next_time = 1;
	303	while not subs.has_key(times[i]+next_time) and next_time < 4 :
	304	next_time = next_time + 1
	305	subt = [ times[i] , times[i] + next_time]
	306	subt.extend(subs[times[i]])
	307	subtitles.append(subt)
	308	return subtitles
6e604697 AM	309
6e604697 AM	310	def to_tmp(list):
7f429bde AM	311	"""
	312	Converts list of subtitles (internal format) to tmp format
	313	"""
	314	outl = []
	315	for l in list:
	316	secs = l[0]
	317	h = int(secs/3600)
	318	m = int(int(secs%3600)/60)
	319	s = int(secs%60)
	320	outl.append("%.2d:%.2d:%.2d:%s\n" % (h,m,s,"\|".join(l[2:])))
	321	return outl
6e604697 AM	322
	323
	324	def to_srt(list):
7f429bde AM	325	"""
	326	Converts list of subtitles (internal format) to srt format
	327	"""
	328	outl = []
	329	count = 1
	330	for l in list:
	331	secs1 = l[0]
	332	h1 = int(secs1/3600)
	333	m1 = int(int(secs1%3600)/60)
	334	s1 = int(secs1%60)
	335	f1 = (secs1 - int(secs1))*1000
	336	secs2 = l[1]
	337	h2 = int(secs2/3600)
	338	m2 = int(int(secs2%3600)/60)
	339	s2 = int(secs2%60)
	340	f2 = (secs2 - int(secs2))*1000
	341	outl.append("%d\n%.2d:%.2d:%.2d,%.3d --> %.2d:%.2d:%.2d,%.3d\n%s\n\n" % (count,h1,m1,s1,f1,h2,m2,s2,f2,"\n".join(l[2:])))
	342	count = count + 1
	343	return outl
6e604697 AM	344
	345
	346	def sub_add_offset(list, off):
7f429bde AM	347	"""
	348	Adds an offset (in seconds, may be negative) to all subtitles in the list
	349	input: subtitles (internal format)
	350	returns: new subtitles (internal format)
	351	"""
	352	outl = []
	353	for l in list:
	354	l[0] += off
	355	l[1] += off
	356	if l[0] < 0:
	357	sys.stderr.write("Warning, negative offset too high, subs beginning at 00:00:00\n")
	358	l[0] = 0
	359	if l[1] < 0:
	360	sys.stderr.write("Warning, negative offset too high, subs beginning at 00:00:00\n")
	361	l[1] = 0
	362	outl.append(l)
	363	return outl
6e604697 AM	364
6e604697 AM	365	def sub_split(sub, times):
7f429bde AM	366	"""
	367	Splits subtitles
	368	input: subtitles (internal format) and split positions (in seconds)
	369	returns: a list of lists with new subtitles
	370	"""
	371	pos = 0
	372	num = len(sub)
	373
	374	while pos<num and sub[pos][0]<times[0]:
	375	pos += 1
	376
	377	lists = [ sub[:pos] ] # [subtitles1, subtitles2, ...]
	378
	379	times.append(99999999)
	380	minussec = times.pop(0)
	381
	382	for second in times:
	383	outl = []
	384	while pos<num and sub[pos][0]<second:
	385	subline = [sub[pos][0]-minussec] + [sub[pos][1]-minussec] + sub[pos][2:]
	386	if subline[0] < 0:
	387	subline[0] = 0
	388	if subline[1] < 0:
	389	subline[1] = 0
	390	outl.append(subline)
	391	pos += 1
	392	lists.append(outl)
	393	minussec = second
	394	return lists
6e604697	395
ad54e218	396	def sub_fix_times(sub):
	397	for i in range( len(sub) - 2 ):
	398	approx = 1 + ( len(" ".join(sub[i][2:])) / 10 ) # 10 char per second
	399	# end < start or end > start++ or displayed longer then 20s
	400	if (sub[i][1] < sub[i][0]) or (sub[i][1] > sub[i + 1][0]) or ( sub[i][1] - sub[i][0] > 20):
	401	if ( sub[i][0] + approx ) < sub[i + 1][0]:
	402	sub[i][1] = sub[i][0] + approx
	403	else:
	404	sub[i][1] = sub[i + 1][0] - 1.0 / 10
	405	return sub
	406
6e604697	407	def get_split_times(str):
7f429bde AM	408	"""
	409	Converts comma-separated string of "xx:yy:zz,xx:yy:zz,..." times to list of times (in seconds)
	410	input: string of comma-separated xx:yy:zz time positions
	411	returns: list of times
	412	"""
	413	tlist = str.split(",")
	414	re1 = re.compile("^(\d+):(\d+):(\d+)")
	415	times = []
	416	for t in tlist:
	417	m = re1.match(t, 0)
	418	if not m:
	419	sys.stderr.write("Unknown time format\n")
	420	return []
	421	times.append(int(m.group(1))3600 + int(m.group(2))60 + int(m.group(3)))
	422	return times
6e604697 AM	423
	424
	425	def read_subs(file,fmt,fps):
7f429bde AM	426	"""
	427	Reads subtitles fomr file, using format fmt
	428	input : file name, format (srt,mdvd,tmp,auto)
	429	returns: list of subtitles in form: [[time in secs, line1, ...],[time in secs, line1, ...],....]
	430	"""
	431	src = open(file,'r')
	432	subs = src.readlines()
	433	src.close()
	434	if fmt == "tmp":
	435	return read_tmp(subs)
	436	elif fmt == "srt":
	437	return read_srt(subs)
	438	elif fmt == "mdvd":
	439	if fps == -1:
64053442	440	fps = detect_file_fps(file)
ec06e0aa AM	441	if not fps:
ec06e0aa AM	442	fps = detect_fps(subs)
7f429bde AM	443	return read_mdvd(subs, fps)
7f429bde AM	444	elif fmt == "auto":
fe7ee42a	445	fmt = detect_format(subs)
	446	sys.stderr.write("Guessing subs format .. %s\n" % fmt )
	447	return read_subs(file,fmt,fps)
7f429bde AM	448	elif fmt == "sub2":
	449	return read_sub2(subs)
	450	elif fmt == "mpl2":
	451	return read_mpl2(subs)
	452	else:
	453	sys.stderr.write("Input format not specified/recognized\n")
	454	sys.exit(1)
6e604697 AM	455
	456
	457	#
	458	#-----------------------------------------------------------------------------------------
	459
fe7ee42a	460	def main(argv=sys.argv):
6e604697	461
fe7ee42a	462	outfunc = {
	463	"srt":to_srt,
	464	"tmp":to_tmp}
7f429bde	465
fe7ee42a	466	infmt = "auto"
	467	outfmt = "srt"
	468	subdelay = 0
	469	fps = -1
	470	#out_to_file == 1 => output to a file, 0 => output stdout, -1 => Split, output to stdout not allowed
7f429bde	471	out_to_file = 0
6e604697	472
fe7ee42a	473	try:
	474	opts, args = getopt.getopt(sys.argv[1:], 'i:o:a:s:S:f:h')
	475	except getopt.GetoptError:
	476	usage()
	477	sys.exit(2)
	478
	479	splittimes = []
	480
	481	for opt, arg in opts:
	482	if opt == '-o':
	483	if outfunc.has_key(arg):
	484	outfmt = arg
	485	else:
	486	sys.stderr.write("Unknown output format.\n")
	487	sys.exit(1)
	488	elif opt == '-i':
	489	infmt = arg
	490	elif opt == '-a':
	491	subdelay = float(arg)
	492	elif opt == '-s':
	493	subdelay = -float(arg)
	494	elif opt == '-S':
	495	out_to_file = -1
	496	splittimes = get_split_times(arg)
	497	elif opt == '-f':
	498	fps = float(arg)
	499	elif opt == '-h':
	500	usage()
	501	sys.exit(1)
	502
	503	# number of file names must be 2 + number of split-points
	504	if len(args) == len(splittimes)+2:
	505	out_to_file = 1
	506	elif len(args) == len(splittimes)+1 and out_to_file != -1:
	507	out_to_file = 0
	508	else:
	509	sys.stderr.write("Too few file names given!\n")
	510	usage()
	511	sys.exit(1)
6e604697	512
fe7ee42a	513	# read file
fe7ee42a	514	sub = read_subs(args.pop(0),infmt,fps)
6e604697	515
fe7ee42a	516	# apply DELAY
	517	if subdelay != 0:
	518	sub = sub_add_offset(sub, subdelay)
6e604697	519
fe7ee42a	520	# apply SPLIT
	521	if len(splittimes) == 0:
	522	sub_list = [sub]
7f429bde	523	else:
fe7ee42a	524	sub_list = sub_split(sub, splittimes)
	525
	526	# save file(S)
	527	for nsub in sub_list:
	528	s = outfunc[outfmt](nsub)
	529	if out_to_file == 1:
	530	dst = open(args.pop(0), 'w')
	531	dst.writelines(s)
	532	dst.close()
	533	else:
	534	sys.stdout.writelines(s)
	535
	536	if __name__ == "__main__":
	537	ret = None
	538	try:
	539	ret = main()
	540	except (KeyboardInterrupt, SystemExit):
	541	print >> sys.stderr, "%s: Interrupted, aborting." % os.path.basename(sys.argv[0])
	542	sys.exit(ret)