]>
Commit | Line | Data |
---|---|---|
6e604697 AM |
1 | #!/usr/bin/env python |
2 | # | |
109e065c AM |
3 | # subconv |
4 | # divx subtitles converter by Pawel Stolowski, Julien Lerouge | |
5 | # mpl2 by Grzegorz Zyla | |
6 | # | |
7 | # Maintained at http://cvs.pld-linux.org/cgi-bin/cvsweb.cgi/packages/subconv/ | |
6e604697 AM |
8 | # |
9 | # Released under terms of GNU GPL | |
10 | # | |
11 | ||
ec06e0aa | 12 | import re, sys, getopt, string, os, subprocess |
6e604697 AM |
13 | |
14 | def usage(): | |
7f429bde | 15 | sys.stderr.write(""" |
109e065c | 16 | subconv -- DivX subtitles converter by Pawel Stolowski, Julien Lerouge |
6e604697 AM |
17 | |
18 | Usage: subconv [-i fmt|-o fmt|-a sec|-s sec|-S h:m:s[,h:m:s,...]] input [output1, output2, ...] | |
7f429bde | 19 | |
4fa28d7b | 20 | -i fmt input format (one of: srt, tmp, mdvd, sub2, mpl2, auto; auto by default) |
6e604697 AM |
21 | -o fmt output format (one of: tmp, srt; srt by default) |
22 | -f fps adjust fps rate for microdvd input subtitles (auto by default) | |
23 | -a sec adjust subtitle delay in seconds (add) | |
24 | -s sec adjust subtitle delay in seconds (sub) | |
25 | -S h:m:s,... split subtitles in selected position(s); additional output file names must be specified | |
26 | -h this help | |
27 | ||
7f429bde | 28 | """) |
6e604697 AM |
29 | |
30 | ||
ec06e0aa AM |
31 | def detect_file_fps(file): |
32 | """ | |
33 | Detect the FPS for a given media file | |
34 | input: file name | |
35 | returns: FPS | |
36 | """ | |
37 | def mediainfo_fps(file): | |
38 | f = subprocess.Popen(['mediainfo', '--Inform=Video;%FrameRate%', file], stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
39 | (out, err) = f.communicate() | |
40 | if not out: | |
41 | return False | |
42 | return float(out) | |
43 | ||
44 | def file_fps(file): | |
45 | f = subprocess.Popen(['file', file], stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
46 | (out, err) = f.communicate() | |
47 | if not out: | |
48 | return False | |
49 | re_fps = re.compile(r'^.*, (\d+\.{0,1}\d{0,}) fps,.*') | |
50 | m = re_fps.match(out) | |
51 | if m: | |
52 | return float(m.group(1)) | |
53 | return False | |
54 | ||
55 | print "Guessing fps", | |
56 | file = os.path.basename(file) | |
57 | if len(file) <= 4: | |
58 | return False | |
59 | dir = os.path.dirname(file) | |
60 | if not dir: | |
61 | dir = '.' | |
62 | mfile = file[:-4] | |
63 | ref = re.compile(r'^' + mfile + '.*') | |
64 | for file in os.listdir(dir): | |
65 | if not ref.match(file): | |
66 | continue | |
67 | fps = mediainfo_fps(file) | |
68 | if not fps: | |
69 | fps = file_fps(file) | |
70 | if fps: | |
71 | print "from file %s: %.3f" % (file, fps) | |
72 | return fps | |
73 | print " .. unknown" | |
74 | return False | |
6e604697 AM |
75 | |
76 | def detect_fps(list): | |
7f429bde AM |
77 | """ |
78 | Detect the FPS for a given input file | |
79 | input: contents of a file as list | |
80 | returns: FPS | |
81 | """ | |
82 | sys.stderr.write("FPS guessing, here are approximate length of file for several FPS :\n") | |
e13fc614 | 83 | most_current = [24/1.001, 25.0, 30/1.001 ] |
7f429bde | 84 | |
109e065c | 85 | re_mdvd = re.compile("^\{(\d+)\}\{(\d*)\}\s*(.*)") |
7f429bde AM |
86 | count = len(list) - 1 |
87 | m = re_mdvd.match(list[count]) | |
88 | while not m: | |
89 | count = count - 1 | |
90 | m = re_mdvd.match(list[count]) | |
91 | last = int(m.group(2)) | |
92 | ||
93 | for i in range(0,len(most_current)): | |
ec06e0aa | 94 | sys.stderr.write("%s %.3f Fps -> " % (str(i), most_current[i])) |
7f429bde AM |
95 | tot_sec = int(last / most_current[i]) |
96 | min = tot_sec / 60 | |
97 | sec = tot_sec % 60 | |
98 | sys.stderr.write(str(min)+" min "+str(sec)+"sec\n") | |
99 | sys.stderr.write("Choice : ") | |
100 | choice=int(sys.stdin.readline().strip()) | |
101 | if choice>=0 and choice<len(most_current): | |
102 | return most_current[choice] | |
103 | else: | |
104 | sys.stderr.write("Bad choice\n") | |
105 | sys.exit(1) | |
6e604697 AM |
106 | |
107 | ||
108 | def detect_format(list): | |
7f429bde AM |
109 | """ |
110 | Detect the format of input subtitles file. | |
111 | input: contents of a file as list | |
112 | returns: format (srt, tmp, mdvd) or "" if unknown | |
113 | """ | |
114 | sys.stderr.write("Guessing subs format .") | |
109e065c | 115 | re_mdvd = re.compile("^\{(\d+)\}\{(\d*)\}\s*(.*)") |
7f429bde AM |
116 | re_srt = re.compile("^(\d+):(\d+):(\d+),\d+\s*-->.*") |
117 | re_tmp = re.compile("^(\d+):(\d+):(\d+):(.*)") | |
118 | re_sub2 = re.compile("^(\d+):(\d+):(\d+)\.\d+\s*\,.*") | |
119 | re_mpl2 = re.compile("^\[(\d+)\]\[(\d+)\]\s*(.*)") | |
120 | while len(list) > 0 : | |
121 | sys.stderr.write(".") | |
122 | line = list.pop(0) | |
123 | if re_mdvd.match(line): | |
124 | sys.stderr.write(" mdvd\n") | |
125 | return "mdvd" | |
126 | elif re_srt.match(line): | |
127 | sys.stderr.write(" srt\n") | |
128 | return "srt" | |
129 | elif re_tmp.match(line): | |
130 | sys.stderr.write(" tmp\n") | |
131 | return "tmp" | |
132 | elif re_sub2.match(line): | |
133 | sys.stderr.write(" subviewer 2 format\n") | |
134 | return "sub2" | |
135 | elif re_mpl2.match(line): | |
136 | sys.stderr.write(" mpl2\n") | |
137 | return "mpl2" | |
138 | return "" | |
6e604697 AM |
139 | |
140 | ||
141 | def read_mdvd(list, fps): | |
7f429bde AM |
142 | """ |
143 | Read micro-dvd subtitles. | |
144 | input: contents of a file as list | |
145 | returns: list of subtitles in form: [[time_start in secs, time_end in secs, line1, ...],....] | |
146 | """ | |
109e065c | 147 | re1 = re.compile("^\{(\d+)\}\{(\d*)\}\s*(.*)") |
7f429bde AM |
148 | |
149 | subtitles = [] | |
150 | while len(list)>0: | |
151 | x = list.pop(0) | |
152 | m = re1.match(x, 0) | |
153 | if m: | |
154 | time1 = int(m.group(1)) | |
e13fc614 | 155 | subt = [ time1 / fps ] |
7f429bde AM |
156 | time2 = m.group(2) |
157 | if time2 == '': | |
158 | time2 = int(time1) + 20 | |
e13fc614 | 159 | subt.append(int(time2) / fps) |
7f429bde AM |
160 | texts = m.group(3).strip().split("|") |
161 | for i in range(len(texts)): | |
162 | text = texts[i] | |
163 | if text.lower().startswith('{c:') or text.lower().startswith('{y:'): | |
164 | end_marker = text.index('}') | |
165 | if end_marker: | |
166 | text = text[end_marker + 1:] | |
167 | texts[i] = text | |
168 | subt.extend(texts) | |
169 | subtitles.append(subt) | |
170 | return subtitles | |
6e604697 | 171 | |
4fa28d7b | 172 | def read_mpl2(list): |
7f429bde AM |
173 | """ |
174 | Read mpl2 subtitles | |
175 | input: contents of a file as list | |
176 | returns: list of subtitles in form: [[time_start in secs, time_end is secs, line1, ...],.....] | |
177 | """ | |
178 | re1 = re.compile("^\[(\d+)\]\[(\d+)\]\s*(.*)") | |
179 | subtitles = [] | |
180 | while len(list)>0: | |
181 | m = re1.match(list.pop(0),0); | |
182 | if m: | |
183 | subt = [int(m.group(1))*0.1] | |
184 | subt.append(int(m.group(2))*0.1) | |
185 | subt.extend(m.group(3).strip().split("|")) | |
186 | subtitles.append(subt) | |
187 | return subtitles | |
188 | ||
6e604697 | 189 | def read_sub2(list): |
7f429bde AM |
190 | """ |
191 | Reads subviewer 2.0 format subtitles, e.g. : | |
192 | 00:01:54.75,00:01:58.54 | |
193 | You shall not pass! | |
194 | input: contents of a file as list | |
195 | returns: list of subtitles in form: [[time_dep, time_end, line1, ...],[time_dep, time_end, line1, ...],....] | |
196 | """ | |
197 | re1 = re.compile("^(\d+):(\d+):(\d+)\.(\d+)\s*\,\s*(\d+):(\d+):(\d+)\.(\d+).*$") | |
198 | subtitles = [] | |
199 | try: | |
200 | while len(list)>0: | |
201 | m = re1.match(list.pop(0), 0) | |
202 | if m: | |
203 | subt = [int(m.group(1))*3600 + int(m.group(2))*60 + int(m.group(3)) + int(m.group(4))/100.0] | |
204 | subt.append(int(m.group(5))*3600 + int(m.group(6))*60 + int(m.group(7)) + int(m.group(8))/100.0) | |
205 | l = list.pop(0).strip() | |
206 | lines = l.split("[br]") | |
207 | for i in range(0,len(lines)): | |
208 | subt.append(lines[i]) | |
209 | subtitles.append(subt) | |
210 | except IndexError: | |
211 | sys.stderr.write("Warning: it seems like input file is damaged or too short.\n") | |
212 | return subtitles | |
6e604697 AM |
213 | |
214 | def read_srt(list): | |
7f429bde AM |
215 | """ |
216 | Reads srt subtitles. | |
217 | input: contents of a file as list | |
218 | returns: list of subtitles in form: [[time_dep, time_end, line1, ...],[time_dep, time_end, line1, ...],....] | |
219 | """ | |
220 | re1 = re.compile("^(\d+)\s*$") | |
221 | re2 = re.compile("^(\d+):(\d+):(\d+),(\d+)\s*-->\s*(\d+):(\d+):(\d+),(\d+).*$") | |
222 | re3 = re.compile("^\s*$") | |
223 | subtitles = [] | |
224 | try: | |
225 | while len(list)>0: | |
226 | if re1.match(list.pop(0), 0): | |
227 | m = re2.match(list.pop(0), 0) | |
228 | if m: | |
229 | subt = [int(m.group(1))*3600 + int(m.group(2))*60 + int(m.group(3)) + int(m.group(4))/1000.0] | |
230 | subt.append(int(m.group(5))*3600 + int(m.group(6))*60 + int(m.group(7)) + int(m.group(8))/1000.0) | |
231 | l = list.pop(0) | |
232 | while not re3.match(l, 0): | |
233 | #subt.append(string.replace(l[:-1], "\r", "")) | |
234 | subt.append(l.strip()) | |
235 | l = list.pop(0) | |
236 | subtitles.append(subt) | |
237 | except IndexError: | |
238 | sys.stderr.write("Warning: it seems like input file is damaged or too short.\n") | |
239 | return subtitles | |
6e604697 AM |
240 | |
241 | def read_tmp(list): | |
7f429bde AM |
242 | """ |
243 | Reads tmplayer (tmp) subtitles. | |
244 | input: contents of a file as list | |
245 | returns: list of subtitles in form: [[time_dep, time_end, line1, ...],[time_dep, time_end, line1, ...],....] | |
246 | """ | |
247 | re1 = re.compile("^(\d+):(\d+):(\d+):(.*)") | |
248 | subtitles = [] | |
249 | subs={} | |
250 | while len(list)>0: | |
251 | m = re1.match(list.pop(0), 0) | |
252 | if m: | |
253 | time = int(m.group(1))*3600 + int(m.group(2))*60 + int(m.group(3)) | |
254 | if subs.has_key(time) : | |
255 | subs[time].extend(m.group(4).strip().split("|")) | |
256 | else: | |
257 | subs[time] = m.group(4).strip().split("|") | |
258 | ||
259 | times = subs.keys() | |
260 | times.sort() | |
261 | for i in range(0,len(times)): | |
262 | next_time = 1; | |
263 | while not subs.has_key(times[i]+next_time) and next_time < 4 : | |
264 | next_time = next_time + 1 | |
265 | subt = [ times[i] , times[i] + next_time] | |
266 | subt.extend(subs[times[i]]) | |
267 | subtitles.append(subt) | |
268 | return subtitles | |
6e604697 AM |
269 | |
270 | def to_tmp(list): | |
7f429bde AM |
271 | """ |
272 | Converts list of subtitles (internal format) to tmp format | |
273 | """ | |
274 | outl = [] | |
275 | for l in list: | |
276 | secs = l[0] | |
277 | h = int(secs/3600) | |
278 | m = int(int(secs%3600)/60) | |
279 | s = int(secs%60) | |
280 | outl.append("%.2d:%.2d:%.2d:%s\n" % (h,m,s,"|".join(l[2:]))) | |
281 | return outl | |
6e604697 AM |
282 | |
283 | ||
284 | def to_srt(list): | |
7f429bde AM |
285 | """ |
286 | Converts list of subtitles (internal format) to srt format | |
287 | """ | |
288 | outl = [] | |
289 | count = 1 | |
290 | for l in list: | |
291 | secs1 = l[0] | |
292 | h1 = int(secs1/3600) | |
293 | m1 = int(int(secs1%3600)/60) | |
294 | s1 = int(secs1%60) | |
295 | f1 = (secs1 - int(secs1))*1000 | |
296 | secs2 = l[1] | |
297 | h2 = int(secs2/3600) | |
298 | m2 = int(int(secs2%3600)/60) | |
299 | s2 = int(secs2%60) | |
300 | f2 = (secs2 - int(secs2))*1000 | |
301 | outl.append("%d\n%.2d:%.2d:%.2d,%.3d --> %.2d:%.2d:%.2d,%.3d\n%s\n\n" % (count,h1,m1,s1,f1,h2,m2,s2,f2,"\n".join(l[2:]))) | |
302 | count = count + 1 | |
303 | return outl | |
6e604697 AM |
304 | |
305 | ||
306 | def sub_add_offset(list, off): | |
7f429bde AM |
307 | """ |
308 | Adds an offset (in seconds, may be negative) to all subtitles in the list | |
309 | input: subtitles (internal format) | |
310 | returns: new subtitles (internal format) | |
311 | """ | |
312 | outl = [] | |
313 | for l in list: | |
314 | l[0] += off | |
315 | l[1] += off | |
316 | if l[0] < 0: | |
317 | sys.stderr.write("Warning, negative offset too high, subs beginning at 00:00:00\n") | |
318 | l[0] = 0 | |
319 | if l[1] < 0: | |
320 | sys.stderr.write("Warning, negative offset too high, subs beginning at 00:00:00\n") | |
321 | l[1] = 0 | |
322 | outl.append(l) | |
323 | return outl | |
6e604697 AM |
324 | |
325 | def sub_split(sub, times): | |
7f429bde AM |
326 | """ |
327 | Splits subtitles | |
328 | input: subtitles (internal format) and split positions (in seconds) | |
329 | returns: a list of lists with new subtitles | |
330 | """ | |
331 | pos = 0 | |
332 | num = len(sub) | |
333 | ||
334 | while pos<num and sub[pos][0]<times[0]: | |
335 | pos += 1 | |
336 | ||
337 | lists = [ sub[:pos] ] # [subtitles1, subtitles2, ...] | |
338 | ||
339 | times.append(99999999) | |
340 | minussec = times.pop(0) | |
341 | ||
342 | for second in times: | |
343 | outl = [] | |
344 | while pos<num and sub[pos][0]<second: | |
345 | subline = [sub[pos][0]-minussec] + [sub[pos][1]-minussec] + sub[pos][2:] | |
346 | if subline[0] < 0: | |
347 | subline[0] = 0 | |
348 | if subline[1] < 0: | |
349 | subline[1] = 0 | |
350 | outl.append(subline) | |
351 | pos += 1 | |
352 | lists.append(outl) | |
353 | minussec = second | |
354 | return lists | |
6e604697 AM |
355 | |
356 | def get_split_times(str): | |
7f429bde AM |
357 | """ |
358 | Converts comma-separated string of "xx:yy:zz,xx:yy:zz,..." times to list of times (in seconds) | |
359 | input: string of comma-separated xx:yy:zz time positions | |
360 | returns: list of times | |
361 | """ | |
362 | tlist = str.split(",") | |
363 | re1 = re.compile("^(\d+):(\d+):(\d+)") | |
364 | times = [] | |
365 | for t in tlist: | |
366 | m = re1.match(t, 0) | |
367 | if not m: | |
368 | sys.stderr.write("Unknown time format\n") | |
369 | return [] | |
370 | times.append(int(m.group(1))*3600 + int(m.group(2))*60 + int(m.group(3))) | |
371 | return times | |
6e604697 AM |
372 | |
373 | ||
374 | def read_subs(file,fmt,fps): | |
7f429bde AM |
375 | """ |
376 | Reads subtitles fomr file, using format fmt | |
377 | input : file name, format (srt,mdvd,tmp,auto) | |
378 | returns: list of subtitles in form: [[time in secs, line1, ...],[time in secs, line1, ...],....] | |
379 | """ | |
380 | src = open(file,'r') | |
381 | subs = src.readlines() | |
382 | src.close() | |
383 | if fmt == "tmp": | |
384 | return read_tmp(subs) | |
385 | elif fmt == "srt": | |
386 | return read_srt(subs) | |
387 | elif fmt == "mdvd": | |
388 | if fps == -1: | |
ec06e0aa AM |
389 | fsp = detect_file_fps(file) |
390 | if not fps: | |
391 | fps = detect_fps(subs) | |
7f429bde AM |
392 | return read_mdvd(subs, fps) |
393 | elif fmt == "auto": | |
394 | return read_subs(file,detect_format(subs),fps) | |
395 | elif fmt == "sub2": | |
396 | return read_sub2(subs) | |
397 | elif fmt == "mpl2": | |
398 | return read_mpl2(subs) | |
399 | else: | |
400 | sys.stderr.write("Input format not specified/recognized\n") | |
401 | sys.exit(1) | |
6e604697 AM |
402 | |
403 | ||
404 | # | |
405 | #----------------------------------------------------------------------------------------- | |
406 | ||
407 | ||
408 | outfunc = { | |
409 | "srt":to_srt, | |
410 | "tmp":to_tmp} | |
411 | ||
412 | infmt = "auto" | |
413 | outfmt = "srt" | |
414 | subdelay = 0 | |
415 | fps = -1 | |
416 | #out_to_file == 1 => output to a file, 0 => output stdout, -1 => Split, output to stdout not allowed | |
417 | out_to_file = 0 | |
418 | ||
419 | try: | |
7f429bde | 420 | opts, args = getopt.getopt(sys.argv[1:], 'i:o:a:s:S:f:h') |
6e604697 | 421 | except getopt.GetoptError: |
7f429bde AM |
422 | usage() |
423 | sys.exit(2) | |
6e604697 AM |
424 | |
425 | splittimes = [] | |
426 | ||
427 | for opt, arg in opts: | |
7f429bde AM |
428 | if opt == '-o': |
429 | if outfunc.has_key(arg): | |
430 | outfmt = arg | |
431 | else: | |
432 | sys.stderr.write("Unknown output format.\n") | |
433 | sys.exit(1) | |
434 | elif opt == '-i': | |
435 | infmt = arg | |
436 | elif opt == '-a': | |
437 | subdelay = float(arg) | |
438 | elif opt == '-s': | |
439 | subdelay = -float(arg) | |
440 | elif opt == '-S': | |
441 | out_to_file = -1 | |
442 | splittimes = get_split_times(arg) | |
443 | elif opt == '-f': | |
444 | fps = float(arg) | |
445 | elif opt == '-h': | |
446 | usage() | |
447 | sys.exit(1) | |
448 | ||
6e604697 AM |
449 | # |
450 | # number of file names must be 2 + number of split-points | |
451 | if len(args) == len(splittimes)+2: | |
7f429bde | 452 | out_to_file = 1 |
6e604697 | 453 | elif len(args) == len(splittimes)+1 and out_to_file != -1: |
7f429bde | 454 | out_to_file = 0 |
6e604697 | 455 | else: |
7f429bde AM |
456 | sys.stderr.write("Too few file names given!\n") |
457 | usage() | |
458 | sys.exit(1) | |
6e604697 AM |
459 | |
460 | # | |
461 | # read file | |
462 | sub = read_subs(args.pop(0),infmt,fps) | |
463 | ||
464 | # | |
465 | # apply DELAY | |
466 | if subdelay != 0: | |
7f429bde | 467 | sub = sub_add_offset(sub, subdelay) |
6e604697 AM |
468 | |
469 | # | |
470 | # apply SPLIT | |
471 | if len(splittimes) == 0: | |
7f429bde | 472 | sub_list = [sub] |
6e604697 | 473 | else: |
7f429bde | 474 | sub_list = sub_split(sub, splittimes) |
6e604697 AM |
475 | |
476 | # | |
477 | # save file(S) | |
478 | for nsub in sub_list: | |
7f429bde AM |
479 | s = outfunc[outfmt](nsub) |
480 | if out_to_file == 1: | |
481 | dst = open(args.pop(0), 'w') | |
482 | dst.writelines(s) | |
483 | dst.close() | |
484 | else: | |
485 | sys.stdout.writelines(s) | |
6e604697 | 486 |