]> git.pld-linux.org Git - packages/subconv.git/blame - subconv.py
- mediainfo, file based fps detection
[packages/subconv.git] / subconv.py
CommitLineData
6e604697
AM
1#!/usr/bin/env python
2#
109e065c
AM
3# subconv
4# divx subtitles converter by Pawel Stolowski, Julien Lerouge
5# mpl2 by Grzegorz Zyla
6#
7# Maintained at http://cvs.pld-linux.org/cgi-bin/cvsweb.cgi/packages/subconv/
6e604697
AM
8#
9# Released under terms of GNU GPL
10#
11
ec06e0aa 12import re, sys, getopt, string, os, subprocess
6e604697
AM
13
14def usage():
7f429bde 15 sys.stderr.write("""
109e065c 16 subconv -- DivX subtitles converter by Pawel Stolowski, Julien Lerouge
6e604697
AM
17
18 Usage: subconv [-i fmt|-o fmt|-a sec|-s sec|-S h:m:s[,h:m:s,...]] input [output1, output2, ...]
7f429bde 19
4fa28d7b 20 -i fmt input format (one of: srt, tmp, mdvd, sub2, mpl2, auto; auto by default)
6e604697
AM
21 -o fmt output format (one of: tmp, srt; srt by default)
22 -f fps adjust fps rate for microdvd input subtitles (auto by default)
23 -a sec adjust subtitle delay in seconds (add)
24 -s sec adjust subtitle delay in seconds (sub)
25 -S h:m:s,... split subtitles in selected position(s); additional output file names must be specified
26 -h this help
27
7f429bde 28 """)
6e604697
AM
29
30
ec06e0aa
AM
31def detect_file_fps(file):
32 """
33 Detect the FPS for a given media file
34 input: file name
35 returns: FPS
36 """
37 def mediainfo_fps(file):
38 f = subprocess.Popen(['mediainfo', '--Inform=Video;%FrameRate%', file], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
39 (out, err) = f.communicate()
40 if not out:
41 return False
42 return float(out)
43
44 def file_fps(file):
45 f = subprocess.Popen(['file', file], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
46 (out, err) = f.communicate()
47 if not out:
48 return False
49 re_fps = re.compile(r'^.*, (\d+\.{0,1}\d{0,}) fps,.*')
50 m = re_fps.match(out)
51 if m:
52 return float(m.group(1))
53 return False
54
55 print "Guessing fps",
56 file = os.path.basename(file)
57 if len(file) <= 4:
58 return False
59 dir = os.path.dirname(file)
60 if not dir:
61 dir = '.'
62 mfile = file[:-4]
63 ref = re.compile(r'^' + mfile + '.*')
64 for file in os.listdir(dir):
65 if not ref.match(file):
66 continue
67 fps = mediainfo_fps(file)
68 if not fps:
69 fps = file_fps(file)
70 if fps:
71 print "from file %s: %.3f" % (file, fps)
72 return fps
73 print " .. unknown"
74 return False
6e604697
AM
75
76def detect_fps(list):
7f429bde
AM
77 """
78 Detect the FPS for a given input file
79 input: contents of a file as list
80 returns: FPS
81 """
82 sys.stderr.write("FPS guessing, here are approximate length of file for several FPS :\n")
e13fc614 83 most_current = [24/1.001, 25.0, 30/1.001 ]
7f429bde 84
109e065c 85 re_mdvd = re.compile("^\{(\d+)\}\{(\d*)\}\s*(.*)")
7f429bde
AM
86 count = len(list) - 1
87 m = re_mdvd.match(list[count])
88 while not m:
89 count = count - 1
90 m = re_mdvd.match(list[count])
91 last = int(m.group(2))
92
93 for i in range(0,len(most_current)):
ec06e0aa 94 sys.stderr.write("%s %.3f Fps -> " % (str(i), most_current[i]))
7f429bde
AM
95 tot_sec = int(last / most_current[i])
96 min = tot_sec / 60
97 sec = tot_sec % 60
98 sys.stderr.write(str(min)+" min "+str(sec)+"sec\n")
99 sys.stderr.write("Choice : ")
100 choice=int(sys.stdin.readline().strip())
101 if choice>=0 and choice<len(most_current):
102 return most_current[choice]
103 else:
104 sys.stderr.write("Bad choice\n")
105 sys.exit(1)
6e604697
AM
106
107
108def detect_format(list):
7f429bde
AM
109 """
110 Detect the format of input subtitles file.
111 input: contents of a file as list
112 returns: format (srt, tmp, mdvd) or "" if unknown
113 """
114 sys.stderr.write("Guessing subs format .")
109e065c 115 re_mdvd = re.compile("^\{(\d+)\}\{(\d*)\}\s*(.*)")
7f429bde
AM
116 re_srt = re.compile("^(\d+):(\d+):(\d+),\d+\s*-->.*")
117 re_tmp = re.compile("^(\d+):(\d+):(\d+):(.*)")
118 re_sub2 = re.compile("^(\d+):(\d+):(\d+)\.\d+\s*\,.*")
119 re_mpl2 = re.compile("^\[(\d+)\]\[(\d+)\]\s*(.*)")
120 while len(list) > 0 :
121 sys.stderr.write(".")
122 line = list.pop(0)
123 if re_mdvd.match(line):
124 sys.stderr.write(" mdvd\n")
125 return "mdvd"
126 elif re_srt.match(line):
127 sys.stderr.write(" srt\n")
128 return "srt"
129 elif re_tmp.match(line):
130 sys.stderr.write(" tmp\n")
131 return "tmp"
132 elif re_sub2.match(line):
133 sys.stderr.write(" subviewer 2 format\n")
134 return "sub2"
135 elif re_mpl2.match(line):
136 sys.stderr.write(" mpl2\n")
137 return "mpl2"
138 return ""
6e604697
AM
139
140
141def read_mdvd(list, fps):
7f429bde
AM
142 """
143 Read micro-dvd subtitles.
144 input: contents of a file as list
145 returns: list of subtitles in form: [[time_start in secs, time_end in secs, line1, ...],....]
146 """
109e065c 147 re1 = re.compile("^\{(\d+)\}\{(\d*)\}\s*(.*)")
7f429bde
AM
148
149 subtitles = []
150 while len(list)>0:
151 x = list.pop(0)
152 m = re1.match(x, 0)
153 if m:
154 time1 = int(m.group(1))
e13fc614 155 subt = [ time1 / fps ]
7f429bde
AM
156 time2 = m.group(2)
157 if time2 == '':
158 time2 = int(time1) + 20
e13fc614 159 subt.append(int(time2) / fps)
7f429bde
AM
160 texts = m.group(3).strip().split("|")
161 for i in range(len(texts)):
162 text = texts[i]
163 if text.lower().startswith('{c:') or text.lower().startswith('{y:'):
164 end_marker = text.index('}')
165 if end_marker:
166 text = text[end_marker + 1:]
167 texts[i] = text
168 subt.extend(texts)
169 subtitles.append(subt)
170 return subtitles
6e604697 171
4fa28d7b 172def read_mpl2(list):
7f429bde
AM
173 """
174 Read mpl2 subtitles
175 input: contents of a file as list
176 returns: list of subtitles in form: [[time_start in secs, time_end is secs, line1, ...],.....]
177 """
178 re1 = re.compile("^\[(\d+)\]\[(\d+)\]\s*(.*)")
179 subtitles = []
180 while len(list)>0:
181 m = re1.match(list.pop(0),0);
182 if m:
183 subt = [int(m.group(1))*0.1]
184 subt.append(int(m.group(2))*0.1)
185 subt.extend(m.group(3).strip().split("|"))
186 subtitles.append(subt)
187 return subtitles
188
6e604697 189def read_sub2(list):
7f429bde
AM
190 """
191 Reads subviewer 2.0 format subtitles, e.g. :
192 00:01:54.75,00:01:58.54
193 You shall not pass!
194 input: contents of a file as list
195 returns: list of subtitles in form: [[time_dep, time_end, line1, ...],[time_dep, time_end, line1, ...],....]
196 """
197 re1 = re.compile("^(\d+):(\d+):(\d+)\.(\d+)\s*\,\s*(\d+):(\d+):(\d+)\.(\d+).*$")
198 subtitles = []
199 try:
200 while len(list)>0:
201 m = re1.match(list.pop(0), 0)
202 if m:
203 subt = [int(m.group(1))*3600 + int(m.group(2))*60 + int(m.group(3)) + int(m.group(4))/100.0]
204 subt.append(int(m.group(5))*3600 + int(m.group(6))*60 + int(m.group(7)) + int(m.group(8))/100.0)
205 l = list.pop(0).strip()
206 lines = l.split("[br]")
207 for i in range(0,len(lines)):
208 subt.append(lines[i])
209 subtitles.append(subt)
210 except IndexError:
211 sys.stderr.write("Warning: it seems like input file is damaged or too short.\n")
212 return subtitles
6e604697
AM
213
214def read_srt(list):
7f429bde
AM
215 """
216 Reads srt subtitles.
217 input: contents of a file as list
218 returns: list of subtitles in form: [[time_dep, time_end, line1, ...],[time_dep, time_end, line1, ...],....]
219 """
220 re1 = re.compile("^(\d+)\s*$")
221 re2 = re.compile("^(\d+):(\d+):(\d+),(\d+)\s*-->\s*(\d+):(\d+):(\d+),(\d+).*$")
222 re3 = re.compile("^\s*$")
223 subtitles = []
224 try:
225 while len(list)>0:
226 if re1.match(list.pop(0), 0):
227 m = re2.match(list.pop(0), 0)
228 if m:
229 subt = [int(m.group(1))*3600 + int(m.group(2))*60 + int(m.group(3)) + int(m.group(4))/1000.0]
230 subt.append(int(m.group(5))*3600 + int(m.group(6))*60 + int(m.group(7)) + int(m.group(8))/1000.0)
231 l = list.pop(0)
232 while not re3.match(l, 0):
233 #subt.append(string.replace(l[:-1], "\r", ""))
234 subt.append(l.strip())
235 l = list.pop(0)
236 subtitles.append(subt)
237 except IndexError:
238 sys.stderr.write("Warning: it seems like input file is damaged or too short.\n")
239 return subtitles
6e604697
AM
240
241def read_tmp(list):
7f429bde
AM
242 """
243 Reads tmplayer (tmp) subtitles.
244 input: contents of a file as list
245 returns: list of subtitles in form: [[time_dep, time_end, line1, ...],[time_dep, time_end, line1, ...],....]
246 """
247 re1 = re.compile("^(\d+):(\d+):(\d+):(.*)")
248 subtitles = []
249 subs={}
250 while len(list)>0:
251 m = re1.match(list.pop(0), 0)
252 if m:
253 time = int(m.group(1))*3600 + int(m.group(2))*60 + int(m.group(3))
254 if subs.has_key(time) :
255 subs[time].extend(m.group(4).strip().split("|"))
256 else:
257 subs[time] = m.group(4).strip().split("|")
258
259 times = subs.keys()
260 times.sort()
261 for i in range(0,len(times)):
262 next_time = 1;
263 while not subs.has_key(times[i]+next_time) and next_time < 4 :
264 next_time = next_time + 1
265 subt = [ times[i] , times[i] + next_time]
266 subt.extend(subs[times[i]])
267 subtitles.append(subt)
268 return subtitles
6e604697
AM
269
270def to_tmp(list):
7f429bde
AM
271 """
272 Converts list of subtitles (internal format) to tmp format
273 """
274 outl = []
275 for l in list:
276 secs = l[0]
277 h = int(secs/3600)
278 m = int(int(secs%3600)/60)
279 s = int(secs%60)
280 outl.append("%.2d:%.2d:%.2d:%s\n" % (h,m,s,"|".join(l[2:])))
281 return outl
6e604697
AM
282
283
284def to_srt(list):
7f429bde
AM
285 """
286 Converts list of subtitles (internal format) to srt format
287 """
288 outl = []
289 count = 1
290 for l in list:
291 secs1 = l[0]
292 h1 = int(secs1/3600)
293 m1 = int(int(secs1%3600)/60)
294 s1 = int(secs1%60)
295 f1 = (secs1 - int(secs1))*1000
296 secs2 = l[1]
297 h2 = int(secs2/3600)
298 m2 = int(int(secs2%3600)/60)
299 s2 = int(secs2%60)
300 f2 = (secs2 - int(secs2))*1000
301 outl.append("%d\n%.2d:%.2d:%.2d,%.3d --> %.2d:%.2d:%.2d,%.3d\n%s\n\n" % (count,h1,m1,s1,f1,h2,m2,s2,f2,"\n".join(l[2:])))
302 count = count + 1
303 return outl
6e604697
AM
304
305
306def sub_add_offset(list, off):
7f429bde
AM
307 """
308 Adds an offset (in seconds, may be negative) to all subtitles in the list
309 input: subtitles (internal format)
310 returns: new subtitles (internal format)
311 """
312 outl = []
313 for l in list:
314 l[0] += off
315 l[1] += off
316 if l[0] < 0:
317 sys.stderr.write("Warning, negative offset too high, subs beginning at 00:00:00\n")
318 l[0] = 0
319 if l[1] < 0:
320 sys.stderr.write("Warning, negative offset too high, subs beginning at 00:00:00\n")
321 l[1] = 0
322 outl.append(l)
323 return outl
6e604697
AM
324
325def sub_split(sub, times):
7f429bde
AM
326 """
327 Splits subtitles
328 input: subtitles (internal format) and split positions (in seconds)
329 returns: a list of lists with new subtitles
330 """
331 pos = 0
332 num = len(sub)
333
334 while pos<num and sub[pos][0]<times[0]:
335 pos += 1
336
337 lists = [ sub[:pos] ] # [subtitles1, subtitles2, ...]
338
339 times.append(99999999)
340 minussec = times.pop(0)
341
342 for second in times:
343 outl = []
344 while pos<num and sub[pos][0]<second:
345 subline = [sub[pos][0]-minussec] + [sub[pos][1]-minussec] + sub[pos][2:]
346 if subline[0] < 0:
347 subline[0] = 0
348 if subline[1] < 0:
349 subline[1] = 0
350 outl.append(subline)
351 pos += 1
352 lists.append(outl)
353 minussec = second
354 return lists
6e604697
AM
355
356def get_split_times(str):
7f429bde
AM
357 """
358 Converts comma-separated string of "xx:yy:zz,xx:yy:zz,..." times to list of times (in seconds)
359 input: string of comma-separated xx:yy:zz time positions
360 returns: list of times
361 """
362 tlist = str.split(",")
363 re1 = re.compile("^(\d+):(\d+):(\d+)")
364 times = []
365 for t in tlist:
366 m = re1.match(t, 0)
367 if not m:
368 sys.stderr.write("Unknown time format\n")
369 return []
370 times.append(int(m.group(1))*3600 + int(m.group(2))*60 + int(m.group(3)))
371 return times
6e604697
AM
372
373
374def read_subs(file,fmt,fps):
7f429bde
AM
375 """
376 Reads subtitles fomr file, using format fmt
377 input : file name, format (srt,mdvd,tmp,auto)
378 returns: list of subtitles in form: [[time in secs, line1, ...],[time in secs, line1, ...],....]
379 """
380 src = open(file,'r')
381 subs = src.readlines()
382 src.close()
383 if fmt == "tmp":
384 return read_tmp(subs)
385 elif fmt == "srt":
386 return read_srt(subs)
387 elif fmt == "mdvd":
388 if fps == -1:
ec06e0aa
AM
389 fsp = detect_file_fps(file)
390 if not fps:
391 fps = detect_fps(subs)
7f429bde
AM
392 return read_mdvd(subs, fps)
393 elif fmt == "auto":
394 return read_subs(file,detect_format(subs),fps)
395 elif fmt == "sub2":
396 return read_sub2(subs)
397 elif fmt == "mpl2":
398 return read_mpl2(subs)
399 else:
400 sys.stderr.write("Input format not specified/recognized\n")
401 sys.exit(1)
6e604697
AM
402
403
404#
405#-----------------------------------------------------------------------------------------
406
407
408outfunc = {
409 "srt":to_srt,
410 "tmp":to_tmp}
411
412infmt = "auto"
413outfmt = "srt"
414subdelay = 0
415fps = -1
416#out_to_file == 1 => output to a file, 0 => output stdout, -1 => Split, output to stdout not allowed
417out_to_file = 0
418
419try:
7f429bde 420 opts, args = getopt.getopt(sys.argv[1:], 'i:o:a:s:S:f:h')
6e604697 421except getopt.GetoptError:
7f429bde
AM
422 usage()
423 sys.exit(2)
6e604697
AM
424
425splittimes = []
426
427for opt, arg in opts:
7f429bde
AM
428 if opt == '-o':
429 if outfunc.has_key(arg):
430 outfmt = arg
431 else:
432 sys.stderr.write("Unknown output format.\n")
433 sys.exit(1)
434 elif opt == '-i':
435 infmt = arg
436 elif opt == '-a':
437 subdelay = float(arg)
438 elif opt == '-s':
439 subdelay = -float(arg)
440 elif opt == '-S':
441 out_to_file = -1
442 splittimes = get_split_times(arg)
443 elif opt == '-f':
444 fps = float(arg)
445 elif opt == '-h':
446 usage()
447 sys.exit(1)
448
6e604697
AM
449#
450# number of file names must be 2 + number of split-points
451if len(args) == len(splittimes)+2:
7f429bde 452 out_to_file = 1
6e604697 453elif len(args) == len(splittimes)+1 and out_to_file != -1:
7f429bde 454 out_to_file = 0
6e604697 455else:
7f429bde
AM
456 sys.stderr.write("Too few file names given!\n")
457 usage()
458 sys.exit(1)
6e604697
AM
459
460#
461# read file
462sub = read_subs(args.pop(0),infmt,fps)
463
464#
465# apply DELAY
466if subdelay != 0:
7f429bde 467 sub = sub_add_offset(sub, subdelay)
6e604697
AM
468
469#
470# apply SPLIT
471if len(splittimes) == 0:
7f429bde 472 sub_list = [sub]
6e604697 473else:
7f429bde 474 sub_list = sub_split(sub, splittimes)
6e604697
AM
475
476#
477# save file(S)
478for nsub in sub_list:
7f429bde
AM
479 s = outfunc[outfmt](nsub)
480 if out_to_file == 1:
481 dst = open(args.pop(0), 'w')
482 dst.writelines(s)
483 dst.close()
484 else:
485 sys.stdout.writelines(s)
6e604697 486
This page took 0.114982 seconds and 4 git commands to generate.