]> git.pld-linux.org Git - packages/subconv.git/blame - subconv.py
- rel 3; get dirname from whole path
[packages/subconv.git] / subconv.py
CommitLineData
6e604697
AM
1#!/usr/bin/env python
2#
109e065c
AM
3# subconv
4# divx subtitles converter by Pawel Stolowski, Julien Lerouge
5# mpl2 by Grzegorz Zyla
6#
7# Maintained at http://cvs.pld-linux.org/cgi-bin/cvsweb.cgi/packages/subconv/
6e604697
AM
8#
9# Released under terms of GNU GPL
10#
11
d1b6ba95
AM
12import re
13import sys
14import getopt
15import string
16import os
17import subprocess
18import urllib2
19
20from xml.dom import minidom
21
22try:
23 from hashlib import md5 as md5
24except ImportError:
25 from md5 import md5
6e604697
AM
26
27def usage():
7f429bde 28 sys.stderr.write("""
109e065c 29 subconv -- DivX subtitles converter by Pawel Stolowski, Julien Lerouge
6e604697
AM
30
31 Usage: subconv [-i fmt|-o fmt|-a sec|-s sec|-S h:m:s[,h:m:s,...]] input [output1, output2, ...]
7f429bde 32
4fa28d7b 33 -i fmt input format (one of: srt, tmp, mdvd, sub2, mpl2, auto; auto by default)
6e604697
AM
34 -o fmt output format (one of: tmp, srt; srt by default)
35 -f fps adjust fps rate for microdvd input subtitles (auto by default)
36 -a sec adjust subtitle delay in seconds (add)
37 -s sec adjust subtitle delay in seconds (sub)
38 -S h:m:s,... split subtitles in selected position(s); additional output file names must be specified
39 -h this help
40
7f429bde 41 """)
6e604697
AM
42
43
ec06e0aa
AM
44def detect_file_fps(file):
45 """
46 Detect the FPS for a given media file
47 input: file name
48 returns: FPS
49 """
50 def mediainfo_fps(file):
4e5dbeb3
AM
51 f = None
52 try:
53 f = subprocess.Popen(['mediainfo', '--Inform=Video;%FrameRate%', file], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
54 except OSError, e:
55 return False
ec06e0aa 56 (out, err) = f.communicate()
a9d17b42
AM
57 if out:
58 out = out.strip()
ec06e0aa
AM
59 if not out:
60 return False
61 return float(out)
62
63 def file_fps(file):
4e5dbeb3
AM
64 f = None
65 try:
66 f = subprocess.Popen(['file', file], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
67 except OSError, e:
68 return False
ec06e0aa 69 (out, err) = f.communicate()
a9d17b42
AM
70 if out:
71 out = out.strip()
ec06e0aa
AM
72 if not out:
73 return False
74 re_fps = re.compile(r'^.*, (\d+\.{0,1}\d{0,}) fps,.*')
75 m = re_fps.match(out)
76 if m:
21acc1c6
AM
77 fps = m.group(1)
78 if fps == '23.98':
79 fps = 24/1.001
80 return float(fps)
ec06e0aa
AM
81 return False
82
d1b6ba95
AM
83 def napiprojekt_fps(file):
84 try:
85 d = md5()
86 d.update(open(file).read(10485760))
87 digest = d.hexdigest()
88 url = "http://napiprojekt.pl/api/api.php?mode=file_info&client=pynapi&id=%s" % (urllib2.quote(digest))
89 f = urllib2.urlopen(url)
90 fps_xml = f.read()
91 f.close()
92 xml = minidom.parseString(fps_xml)
93 name = xml.getElementsByTagName("fps")
94 fps = " ".join(t.nodeValue for t in name[0].childNodes if t.nodeType == t.TEXT_NODE)
95 except Exception, e:
96 return False
97 if fps:
98 return float(fps)
99 return False
100
ec06e0aa 101 print "Guessing fps",
ec06e0aa
AM
102 dir = os.path.dirname(file)
103 if not dir:
104 dir = '.'
b5bbe496
AM
105 file = os.path.basename(file)
106 if len(file) <= 4:
107 return False
ec06e0aa
AM
108 mfile = file[:-4]
109 ref = re.compile(r'^' + mfile + '.*')
110 for file in os.listdir(dir):
111 if not ref.match(file):
112 continue
113 fps = mediainfo_fps(file)
114 if not fps:
115 fps = file_fps(file)
d1b6ba95
AM
116 if not fps:
117 fps = napiprojekt_fps(file)
ec06e0aa
AM
118 if fps:
119 print "from file %s: %.3f" % (file, fps)
120 return fps
121 print " .. unknown"
122 return False
6e604697
AM
123
124def detect_fps(list):
7f429bde
AM
125 """
126 Detect the FPS for a given input file
127 input: contents of a file as list
128 returns: FPS
129 """
130 sys.stderr.write("FPS guessing, here are approximate length of file for several FPS :\n")
e13fc614 131 most_current = [24/1.001, 25.0, 30/1.001 ]
7f429bde 132
109e065c 133 re_mdvd = re.compile("^\{(\d+)\}\{(\d*)\}\s*(.*)")
7f429bde
AM
134 count = len(list) - 1
135 m = re_mdvd.match(list[count])
136 while not m:
137 count = count - 1
138 m = re_mdvd.match(list[count])
139 last = int(m.group(2))
140
141 for i in range(0,len(most_current)):
ec06e0aa 142 sys.stderr.write("%s %.3f Fps -> " % (str(i), most_current[i]))
7f429bde
AM
143 tot_sec = int(last / most_current[i])
144 min = tot_sec / 60
145 sec = tot_sec % 60
146 sys.stderr.write(str(min)+" min "+str(sec)+"sec\n")
147 sys.stderr.write("Choice : ")
148 choice=int(sys.stdin.readline().strip())
149 if choice>=0 and choice<len(most_current):
150 return most_current[choice]
151 else:
152 sys.stderr.write("Bad choice\n")
153 sys.exit(1)
6e604697
AM
154
155
156def detect_format(list):
7f429bde
AM
157 """
158 Detect the format of input subtitles file.
159 input: contents of a file as list
160 returns: format (srt, tmp, mdvd) or "" if unknown
161 """
109e065c 162 re_mdvd = re.compile("^\{(\d+)\}\{(\d*)\}\s*(.*)")
7f429bde
AM
163 re_srt = re.compile("^(\d+):(\d+):(\d+),\d+\s*-->.*")
164 re_tmp = re.compile("^(\d+):(\d+):(\d+):(.*)")
165 re_sub2 = re.compile("^(\d+):(\d+):(\d+)\.\d+\s*\,.*")
166 re_mpl2 = re.compile("^\[(\d+)\]\[(\d+)\]\s*(.*)")
ab0ac020 167 for line in list:
7f429bde 168 if re_mdvd.match(line):
7f429bde
AM
169 return "mdvd"
170 elif re_srt.match(line):
7f429bde
AM
171 return "srt"
172 elif re_tmp.match(line):
7f429bde
AM
173 return "tmp"
174 elif re_sub2.match(line):
7f429bde
AM
175 return "sub2"
176 elif re_mpl2.match(line):
7f429bde
AM
177 return "mpl2"
178 return ""
6e604697
AM
179
180
181def read_mdvd(list, fps):
7f429bde
AM
182 """
183 Read micro-dvd subtitles.
184 input: contents of a file as list
185 returns: list of subtitles in form: [[time_start in secs, time_end in secs, line1, ...],....]
186 """
109e065c 187 re1 = re.compile("^\{(\d+)\}\{(\d*)\}\s*(.*)")
7f429bde
AM
188
189 subtitles = []
190 while len(list)>0:
191 x = list.pop(0)
192 m = re1.match(x, 0)
193 if m:
194 time1 = int(m.group(1))
e13fc614 195 subt = [ time1 / fps ]
7f429bde
AM
196 time2 = m.group(2)
197 if time2 == '':
198 time2 = int(time1) + 20
e13fc614 199 subt.append(int(time2) / fps)
7f429bde
AM
200 texts = m.group(3).strip().split("|")
201 for i in range(len(texts)):
202 text = texts[i]
203 if text.lower().startswith('{c:') or text.lower().startswith('{y:'):
204 end_marker = text.index('}')
205 if end_marker:
206 text = text[end_marker + 1:]
207 texts[i] = text
208 subt.extend(texts)
209 subtitles.append(subt)
210 return subtitles
6e604697 211
4fa28d7b 212def read_mpl2(list):
7f429bde
AM
213 """
214 Read mpl2 subtitles
215 input: contents of a file as list
216 returns: list of subtitles in form: [[time_start in secs, time_end is secs, line1, ...],.....]
217 """
218 re1 = re.compile("^\[(\d+)\]\[(\d+)\]\s*(.*)")
219 subtitles = []
220 while len(list)>0:
221 m = re1.match(list.pop(0),0);
222 if m:
223 subt = [int(m.group(1))*0.1]
224 subt.append(int(m.group(2))*0.1)
225 subt.extend(m.group(3).strip().split("|"))
226 subtitles.append(subt)
227 return subtitles
228
6e604697 229def read_sub2(list):
7f429bde
AM
230 """
231 Reads subviewer 2.0 format subtitles, e.g. :
232 00:01:54.75,00:01:58.54
233 You shall not pass!
234 input: contents of a file as list
235 returns: list of subtitles in form: [[time_dep, time_end, line1, ...],[time_dep, time_end, line1, ...],....]
236 """
237 re1 = re.compile("^(\d+):(\d+):(\d+)\.(\d+)\s*\,\s*(\d+):(\d+):(\d+)\.(\d+).*$")
238 subtitles = []
239 try:
240 while len(list)>0:
241 m = re1.match(list.pop(0), 0)
242 if m:
243 subt = [int(m.group(1))*3600 + int(m.group(2))*60 + int(m.group(3)) + int(m.group(4))/100.0]
244 subt.append(int(m.group(5))*3600 + int(m.group(6))*60 + int(m.group(7)) + int(m.group(8))/100.0)
245 l = list.pop(0).strip()
246 lines = l.split("[br]")
247 for i in range(0,len(lines)):
248 subt.append(lines[i])
249 subtitles.append(subt)
250 except IndexError:
251 sys.stderr.write("Warning: it seems like input file is damaged or too short.\n")
252 return subtitles
6e604697
AM
253
254def read_srt(list):
7f429bde
AM
255 """
256 Reads srt subtitles.
257 input: contents of a file as list
258 returns: list of subtitles in form: [[time_dep, time_end, line1, ...],[time_dep, time_end, line1, ...],....]
259 """
260 re1 = re.compile("^(\d+)\s*$")
261 re2 = re.compile("^(\d+):(\d+):(\d+),(\d+)\s*-->\s*(\d+):(\d+):(\d+),(\d+).*$")
262 re3 = re.compile("^\s*$")
263 subtitles = []
264 try:
265 while len(list)>0:
266 if re1.match(list.pop(0), 0):
267 m = re2.match(list.pop(0), 0)
268 if m:
269 subt = [int(m.group(1))*3600 + int(m.group(2))*60 + int(m.group(3)) + int(m.group(4))/1000.0]
270 subt.append(int(m.group(5))*3600 + int(m.group(6))*60 + int(m.group(7)) + int(m.group(8))/1000.0)
271 l = list.pop(0)
272 while not re3.match(l, 0):
273 #subt.append(string.replace(l[:-1], "\r", ""))
274 subt.append(l.strip())
275 l = list.pop(0)
276 subtitles.append(subt)
277 except IndexError:
278 sys.stderr.write("Warning: it seems like input file is damaged or too short.\n")
279 return subtitles
6e604697
AM
280
281def read_tmp(list):
7f429bde
AM
282 """
283 Reads tmplayer (tmp) subtitles.
284 input: contents of a file as list
285 returns: list of subtitles in form: [[time_dep, time_end, line1, ...],[time_dep, time_end, line1, ...],....]
286 """
287 re1 = re.compile("^(\d+):(\d+):(\d+):(.*)")
288 subtitles = []
289 subs={}
290 while len(list)>0:
291 m = re1.match(list.pop(0), 0)
292 if m:
293 time = int(m.group(1))*3600 + int(m.group(2))*60 + int(m.group(3))
294 if subs.has_key(time) :
295 subs[time].extend(m.group(4).strip().split("|"))
296 else:
297 subs[time] = m.group(4).strip().split("|")
298
299 times = subs.keys()
300 times.sort()
301 for i in range(0,len(times)):
302 next_time = 1;
303 while not subs.has_key(times[i]+next_time) and next_time < 4 :
304 next_time = next_time + 1
305 subt = [ times[i] , times[i] + next_time]
306 subt.extend(subs[times[i]])
307 subtitles.append(subt)
308 return subtitles
6e604697
AM
309
310def to_tmp(list):
7f429bde
AM
311 """
312 Converts list of subtitles (internal format) to tmp format
313 """
314 outl = []
315 for l in list:
316 secs = l[0]
317 h = int(secs/3600)
318 m = int(int(secs%3600)/60)
319 s = int(secs%60)
320 outl.append("%.2d:%.2d:%.2d:%s\n" % (h,m,s,"|".join(l[2:])))
321 return outl
6e604697
AM
322
323
324def to_srt(list):
7f429bde
AM
325 """
326 Converts list of subtitles (internal format) to srt format
327 """
328 outl = []
329 count = 1
330 for l in list:
331 secs1 = l[0]
332 h1 = int(secs1/3600)
333 m1 = int(int(secs1%3600)/60)
334 s1 = int(secs1%60)
335 f1 = (secs1 - int(secs1))*1000
336 secs2 = l[1]
337 h2 = int(secs2/3600)
338 m2 = int(int(secs2%3600)/60)
339 s2 = int(secs2%60)
340 f2 = (secs2 - int(secs2))*1000
341 outl.append("%d\n%.2d:%.2d:%.2d,%.3d --> %.2d:%.2d:%.2d,%.3d\n%s\n\n" % (count,h1,m1,s1,f1,h2,m2,s2,f2,"\n".join(l[2:])))
342 count = count + 1
343 return outl
6e604697
AM
344
345
346def sub_add_offset(list, off):
7f429bde
AM
347 """
348 Adds an offset (in seconds, may be negative) to all subtitles in the list
349 input: subtitles (internal format)
350 returns: new subtitles (internal format)
351 """
352 outl = []
353 for l in list:
354 l[0] += off
355 l[1] += off
356 if l[0] < 0:
357 sys.stderr.write("Warning, negative offset too high, subs beginning at 00:00:00\n")
358 l[0] = 0
359 if l[1] < 0:
360 sys.stderr.write("Warning, negative offset too high, subs beginning at 00:00:00\n")
361 l[1] = 0
362 outl.append(l)
363 return outl
6e604697
AM
364
365def sub_split(sub, times):
7f429bde
AM
366 """
367 Splits subtitles
368 input: subtitles (internal format) and split positions (in seconds)
369 returns: a list of lists with new subtitles
370 """
371 pos = 0
372 num = len(sub)
373
374 while pos<num and sub[pos][0]<times[0]:
375 pos += 1
376
377 lists = [ sub[:pos] ] # [subtitles1, subtitles2, ...]
378
379 times.append(99999999)
380 minussec = times.pop(0)
381
382 for second in times:
383 outl = []
384 while pos<num and sub[pos][0]<second:
385 subline = [sub[pos][0]-minussec] + [sub[pos][1]-minussec] + sub[pos][2:]
386 if subline[0] < 0:
387 subline[0] = 0
388 if subline[1] < 0:
389 subline[1] = 0
390 outl.append(subline)
391 pos += 1
392 lists.append(outl)
393 minussec = second
394 return lists
6e604697 395
ad54e218 396def sub_fix_times(sub):
397 for i in range( len(sub) - 2 ):
398 approx = 1 + ( len(" ".join(sub[i][2:])) / 10 ) # 10 char per second
399 # end < start or end > start++ or displayed longer then 20s
400 if (sub[i][1] < sub[i][0]) or (sub[i][1] > sub[i + 1][0]) or ( sub[i][1] - sub[i][0] > 20):
401 if ( sub[i][0] + approx ) < sub[i + 1][0]:
402 sub[i][1] = sub[i][0] + approx
403 else:
404 sub[i][1] = sub[i + 1][0] - 1.0 / 10
405 return sub
406
6e604697 407def get_split_times(str):
7f429bde
AM
408 """
409 Converts comma-separated string of "xx:yy:zz,xx:yy:zz,..." times to list of times (in seconds)
410 input: string of comma-separated xx:yy:zz time positions
411 returns: list of times
412 """
413 tlist = str.split(",")
414 re1 = re.compile("^(\d+):(\d+):(\d+)")
415 times = []
416 for t in tlist:
417 m = re1.match(t, 0)
418 if not m:
419 sys.stderr.write("Unknown time format\n")
420 return []
421 times.append(int(m.group(1))*3600 + int(m.group(2))*60 + int(m.group(3)))
422 return times
6e604697
AM
423
424
425def read_subs(file,fmt,fps):
7f429bde
AM
426 """
427 Reads subtitles fomr file, using format fmt
428 input : file name, format (srt,mdvd,tmp,auto)
429 returns: list of subtitles in form: [[time in secs, line1, ...],[time in secs, line1, ...],....]
430 """
431 src = open(file,'r')
432 subs = src.readlines()
433 src.close()
434 if fmt == "tmp":
435 return read_tmp(subs)
436 elif fmt == "srt":
437 return read_srt(subs)
438 elif fmt == "mdvd":
439 if fps == -1:
64053442 440 fps = detect_file_fps(file)
ec06e0aa
AM
441 if not fps:
442 fps = detect_fps(subs)
7f429bde
AM
443 return read_mdvd(subs, fps)
444 elif fmt == "auto":
fe7ee42a 445 fmt = detect_format(subs)
446 sys.stderr.write("Guessing subs format .. %s\n" % fmt )
447 return read_subs(file,fmt,fps)
7f429bde
AM
448 elif fmt == "sub2":
449 return read_sub2(subs)
450 elif fmt == "mpl2":
451 return read_mpl2(subs)
452 else:
453 sys.stderr.write("Input format not specified/recognized\n")
454 sys.exit(1)
6e604697
AM
455
456
457#
458#-----------------------------------------------------------------------------------------
459
fe7ee42a 460def main(argv=sys.argv):
6e604697 461
fe7ee42a 462 outfunc = {
463 "srt":to_srt,
464 "tmp":to_tmp}
7f429bde 465
fe7ee42a 466 infmt = "auto"
467 outfmt = "srt"
468 subdelay = 0
469 fps = -1
470 #out_to_file == 1 => output to a file, 0 => output stdout, -1 => Split, output to stdout not allowed
7f429bde 471 out_to_file = 0
6e604697 472
fe7ee42a 473 try:
474 opts, args = getopt.getopt(sys.argv[1:], 'i:o:a:s:S:f:h')
475 except getopt.GetoptError:
476 usage()
477 sys.exit(2)
478
479 splittimes = []
480
481 for opt, arg in opts:
482 if opt == '-o':
483 if outfunc.has_key(arg):
484 outfmt = arg
485 else:
486 sys.stderr.write("Unknown output format.\n")
487 sys.exit(1)
488 elif opt == '-i':
489 infmt = arg
490 elif opt == '-a':
491 subdelay = float(arg)
492 elif opt == '-s':
493 subdelay = -float(arg)
494 elif opt == '-S':
495 out_to_file = -1
496 splittimes = get_split_times(arg)
497 elif opt == '-f':
498 fps = float(arg)
499 elif opt == '-h':
500 usage()
501 sys.exit(1)
502
503 # number of file names must be 2 + number of split-points
504 if len(args) == len(splittimes)+2:
505 out_to_file = 1
506 elif len(args) == len(splittimes)+1 and out_to_file != -1:
507 out_to_file = 0
508 else:
509 sys.stderr.write("Too few file names given!\n")
510 usage()
511 sys.exit(1)
6e604697 512
fe7ee42a 513 # read file
514 sub = read_subs(args.pop(0),infmt,fps)
6e604697 515
fe7ee42a 516 # apply DELAY
517 if subdelay != 0:
518 sub = sub_add_offset(sub, subdelay)
6e604697 519
fe7ee42a 520 # apply SPLIT
521 if len(splittimes) == 0:
522 sub_list = [sub]
7f429bde 523 else:
fe7ee42a 524 sub_list = sub_split(sub, splittimes)
525
526 # save file(S)
527 for nsub in sub_list:
528 s = outfunc[outfmt](nsub)
529 if out_to_file == 1:
530 dst = open(args.pop(0), 'w')
531 dst.writelines(s)
532 dst.close()
533 else:
534 sys.stdout.writelines(s)
535
536if __name__ == "__main__":
537 ret = None
538 try:
539 ret = main()
540 except (KeyboardInterrupt, SystemExit):
541 print >> sys.stderr, "%s: Interrupted, aborting." % os.path.basename(sys.argv[0])
542 sys.exit(ret)
This page took 0.135798 seconds and 4 git commands to generate.