]>
Commit | Line | Data |
---|---|---|
6e604697 AM |
1 | #!/usr/bin/env python |
2 | # | |
3 | # subconv v0.2.2 -- divx subtitles converter | |
4 | # (w)by Pawel Stolowski | |
5 | # Julien Lerouge | |
6 | # | |
7 | # Released under terms of GNU GPL | |
8 | # | |
4fa28d7b | 9 | # mpl2 (w) by Grzegorz Zyla |
6e604697 AM |
10 | |
11 | import re, sys, getopt, string | |
12 | ||
13 | def usage(): | |
7f429bde | 14 | sys.stderr.write(""" |
6e604697 AM |
15 | subconv v0.2.2 -- DivX subtitles converter |
16 | (w)by Pawel Stolowski <yogin@linux.bydg.org> | |
17 | Julien Lerouge <julien.lerouge@free.fr> | |
18 | ||
19 | Usage: subconv [-i fmt|-o fmt|-a sec|-s sec|-S h:m:s[,h:m:s,...]] input [output1, output2, ...] | |
7f429bde | 20 | |
4fa28d7b | 21 | -i fmt input format (one of: srt, tmp, mdvd, sub2, mpl2, auto; auto by default) |
6e604697 AM |
22 | -o fmt output format (one of: tmp, srt; srt by default) |
23 | -f fps adjust fps rate for microdvd input subtitles (auto by default) | |
24 | -a sec adjust subtitle delay in seconds (add) | |
25 | -s sec adjust subtitle delay in seconds (sub) | |
26 | -S h:m:s,... split subtitles in selected position(s); additional output file names must be specified | |
27 | -h this help | |
28 | ||
7f429bde | 29 | """) |
6e604697 AM |
30 | |
31 | ||
32 | ||
33 | def detect_fps(list): | |
7f429bde AM |
34 | """ |
35 | Detect the FPS for a given input file | |
36 | input: contents of a file as list | |
37 | returns: FPS | |
38 | """ | |
39 | sys.stderr.write("FPS guessing, here are approximate length of file for several FPS :\n") | |
40 | most_current=[23.976,25.0,29.97] | |
41 | ||
42 | re_mdvd = re.compile("^[\{\[](\d+)[\}\]][\{\[](\d*)[\}\]]\s*(.*)") | |
43 | count = len(list) - 1 | |
44 | m = re_mdvd.match(list[count]) | |
45 | while not m: | |
46 | count = count - 1 | |
47 | m = re_mdvd.match(list[count]) | |
48 | last = int(m.group(2)) | |
49 | ||
50 | for i in range(0,len(most_current)): | |
51 | sys.stderr.write(str(i)+" "+str(most_current[i])+" Fps -> ") | |
52 | tot_sec = int(last / most_current[i]) | |
53 | min = tot_sec / 60 | |
54 | sec = tot_sec % 60 | |
55 | sys.stderr.write(str(min)+" min "+str(sec)+"sec\n") | |
56 | sys.stderr.write("Choice : ") | |
57 | choice=int(sys.stdin.readline().strip()) | |
58 | if choice>=0 and choice<len(most_current): | |
59 | return most_current[choice] | |
60 | else: | |
61 | sys.stderr.write("Bad choice\n") | |
62 | sys.exit(1) | |
6e604697 AM |
63 | |
64 | ||
65 | def detect_format(list): | |
7f429bde AM |
66 | """ |
67 | Detect the format of input subtitles file. | |
68 | input: contents of a file as list | |
69 | returns: format (srt, tmp, mdvd) or "" if unknown | |
70 | """ | |
71 | sys.stderr.write("Guessing subs format .") | |
72 | re_mdvd = re.compile("^[\{\[](\d+)[\}\]][\{\[](\d*)[\}\]]\s*(.*)") | |
73 | re_srt = re.compile("^(\d+):(\d+):(\d+),\d+\s*-->.*") | |
74 | re_tmp = re.compile("^(\d+):(\d+):(\d+):(.*)") | |
75 | re_sub2 = re.compile("^(\d+):(\d+):(\d+)\.\d+\s*\,.*") | |
76 | re_mpl2 = re.compile("^\[(\d+)\]\[(\d+)\]\s*(.*)") | |
77 | while len(list) > 0 : | |
78 | sys.stderr.write(".") | |
79 | line = list.pop(0) | |
80 | if re_mdvd.match(line): | |
81 | sys.stderr.write(" mdvd\n") | |
82 | return "mdvd" | |
83 | elif re_srt.match(line): | |
84 | sys.stderr.write(" srt\n") | |
85 | return "srt" | |
86 | elif re_tmp.match(line): | |
87 | sys.stderr.write(" tmp\n") | |
88 | return "tmp" | |
89 | elif re_sub2.match(line): | |
90 | sys.stderr.write(" subviewer 2 format\n") | |
91 | return "sub2" | |
92 | elif re_mpl2.match(line): | |
93 | sys.stderr.write(" mpl2\n") | |
94 | return "mpl2" | |
95 | return "" | |
6e604697 AM |
96 | |
97 | ||
98 | def read_mdvd(list, fps): | |
7f429bde AM |
99 | """ |
100 | Read micro-dvd subtitles. | |
101 | input: contents of a file as list | |
102 | returns: list of subtitles in form: [[time_start in secs, time_end in secs, line1, ...],....] | |
103 | """ | |
104 | re1 = re.compile("^[\{\[](\d+)[\}\]][\{\[](\d*)[\}\]]\s*(.*)") | |
105 | ||
106 | subtitles = [] | |
107 | while len(list)>0: | |
108 | x = list.pop(0) | |
109 | m = re1.match(x, 0) | |
110 | if m: | |
111 | time1 = int(m.group(1)) | |
112 | subt = [int(time1) / float(fps)] | |
113 | time2 = m.group(2) | |
114 | if time2 == '': | |
115 | time2 = int(time1) + 20 | |
116 | subt.append(int(time2) / float(fps)) | |
117 | texts = m.group(3).strip().split("|") | |
118 | for i in range(len(texts)): | |
119 | text = texts[i] | |
120 | if text.lower().startswith('{c:') or text.lower().startswith('{y:'): | |
121 | end_marker = text.index('}') | |
122 | if end_marker: | |
123 | text = text[end_marker + 1:] | |
124 | texts[i] = text | |
125 | subt.extend(texts) | |
126 | subtitles.append(subt) | |
127 | return subtitles | |
6e604697 | 128 | |
4fa28d7b | 129 | def read_mpl2(list): |
7f429bde AM |
130 | """ |
131 | Read mpl2 subtitles | |
132 | input: contents of a file as list | |
133 | returns: list of subtitles in form: [[time_start in secs, time_end is secs, line1, ...],.....] | |
134 | """ | |
135 | re1 = re.compile("^\[(\d+)\]\[(\d+)\]\s*(.*)") | |
136 | subtitles = [] | |
137 | while len(list)>0: | |
138 | m = re1.match(list.pop(0),0); | |
139 | if m: | |
140 | subt = [int(m.group(1))*0.1] | |
141 | subt.append(int(m.group(2))*0.1) | |
142 | subt.extend(m.group(3).strip().split("|")) | |
143 | subtitles.append(subt) | |
144 | return subtitles | |
145 | ||
6e604697 | 146 | def read_sub2(list): |
7f429bde AM |
147 | """ |
148 | Reads subviewer 2.0 format subtitles, e.g. : | |
149 | 00:01:54.75,00:01:58.54 | |
150 | You shall not pass! | |
151 | input: contents of a file as list | |
152 | returns: list of subtitles in form: [[time_dep, time_end, line1, ...],[time_dep, time_end, line1, ...],....] | |
153 | """ | |
154 | re1 = re.compile("^(\d+):(\d+):(\d+)\.(\d+)\s*\,\s*(\d+):(\d+):(\d+)\.(\d+).*$") | |
155 | subtitles = [] | |
156 | try: | |
157 | while len(list)>0: | |
158 | m = re1.match(list.pop(0), 0) | |
159 | if m: | |
160 | subt = [int(m.group(1))*3600 + int(m.group(2))*60 + int(m.group(3)) + int(m.group(4))/100.0] | |
161 | subt.append(int(m.group(5))*3600 + int(m.group(6))*60 + int(m.group(7)) + int(m.group(8))/100.0) | |
162 | l = list.pop(0).strip() | |
163 | lines = l.split("[br]") | |
164 | for i in range(0,len(lines)): | |
165 | subt.append(lines[i]) | |
166 | subtitles.append(subt) | |
167 | except IndexError: | |
168 | sys.stderr.write("Warning: it seems like input file is damaged or too short.\n") | |
169 | return subtitles | |
6e604697 AM |
170 | |
171 | def read_srt(list): | |
7f429bde AM |
172 | """ |
173 | Reads srt subtitles. | |
174 | input: contents of a file as list | |
175 | returns: list of subtitles in form: [[time_dep, time_end, line1, ...],[time_dep, time_end, line1, ...],....] | |
176 | """ | |
177 | re1 = re.compile("^(\d+)\s*$") | |
178 | re2 = re.compile("^(\d+):(\d+):(\d+),(\d+)\s*-->\s*(\d+):(\d+):(\d+),(\d+).*$") | |
179 | re3 = re.compile("^\s*$") | |
180 | subtitles = [] | |
181 | try: | |
182 | while len(list)>0: | |
183 | if re1.match(list.pop(0), 0): | |
184 | m = re2.match(list.pop(0), 0) | |
185 | if m: | |
186 | subt = [int(m.group(1))*3600 + int(m.group(2))*60 + int(m.group(3)) + int(m.group(4))/1000.0] | |
187 | subt.append(int(m.group(5))*3600 + int(m.group(6))*60 + int(m.group(7)) + int(m.group(8))/1000.0) | |
188 | l = list.pop(0) | |
189 | while not re3.match(l, 0): | |
190 | #subt.append(string.replace(l[:-1], "\r", "")) | |
191 | subt.append(l.strip()) | |
192 | l = list.pop(0) | |
193 | subtitles.append(subt) | |
194 | except IndexError: | |
195 | sys.stderr.write("Warning: it seems like input file is damaged or too short.\n") | |
196 | return subtitles | |
6e604697 AM |
197 | |
198 | def read_tmp(list): | |
7f429bde AM |
199 | """ |
200 | Reads tmplayer (tmp) subtitles. | |
201 | input: contents of a file as list | |
202 | returns: list of subtitles in form: [[time_dep, time_end, line1, ...],[time_dep, time_end, line1, ...],....] | |
203 | """ | |
204 | re1 = re.compile("^(\d+):(\d+):(\d+):(.*)") | |
205 | subtitles = [] | |
206 | subs={} | |
207 | while len(list)>0: | |
208 | m = re1.match(list.pop(0), 0) | |
209 | if m: | |
210 | time = int(m.group(1))*3600 + int(m.group(2))*60 + int(m.group(3)) | |
211 | if subs.has_key(time) : | |
212 | subs[time].extend(m.group(4).strip().split("|")) | |
213 | else: | |
214 | subs[time] = m.group(4).strip().split("|") | |
215 | ||
216 | times = subs.keys() | |
217 | times.sort() | |
218 | for i in range(0,len(times)): | |
219 | next_time = 1; | |
220 | while not subs.has_key(times[i]+next_time) and next_time < 4 : | |
221 | next_time = next_time + 1 | |
222 | subt = [ times[i] , times[i] + next_time] | |
223 | subt.extend(subs[times[i]]) | |
224 | subtitles.append(subt) | |
225 | return subtitles | |
6e604697 AM |
226 | |
227 | def to_tmp(list): | |
7f429bde AM |
228 | """ |
229 | Converts list of subtitles (internal format) to tmp format | |
230 | """ | |
231 | outl = [] | |
232 | for l in list: | |
233 | secs = l[0] | |
234 | h = int(secs/3600) | |
235 | m = int(int(secs%3600)/60) | |
236 | s = int(secs%60) | |
237 | outl.append("%.2d:%.2d:%.2d:%s\n" % (h,m,s,"|".join(l[2:]))) | |
238 | return outl | |
6e604697 AM |
239 | |
240 | ||
241 | def to_srt(list): | |
7f429bde AM |
242 | """ |
243 | Converts list of subtitles (internal format) to srt format | |
244 | """ | |
245 | outl = [] | |
246 | count = 1 | |
247 | for l in list: | |
248 | secs1 = l[0] | |
249 | h1 = int(secs1/3600) | |
250 | m1 = int(int(secs1%3600)/60) | |
251 | s1 = int(secs1%60) | |
252 | f1 = (secs1 - int(secs1))*1000 | |
253 | secs2 = l[1] | |
254 | h2 = int(secs2/3600) | |
255 | m2 = int(int(secs2%3600)/60) | |
256 | s2 = int(secs2%60) | |
257 | f2 = (secs2 - int(secs2))*1000 | |
258 | outl.append("%d\n%.2d:%.2d:%.2d,%.3d --> %.2d:%.2d:%.2d,%.3d\n%s\n\n" % (count,h1,m1,s1,f1,h2,m2,s2,f2,"\n".join(l[2:]))) | |
259 | count = count + 1 | |
260 | return outl | |
6e604697 AM |
261 | |
262 | ||
263 | def sub_add_offset(list, off): | |
7f429bde AM |
264 | """ |
265 | Adds an offset (in seconds, may be negative) to all subtitles in the list | |
266 | input: subtitles (internal format) | |
267 | returns: new subtitles (internal format) | |
268 | """ | |
269 | outl = [] | |
270 | for l in list: | |
271 | l[0] += off | |
272 | l[1] += off | |
273 | if l[0] < 0: | |
274 | sys.stderr.write("Warning, negative offset too high, subs beginning at 00:00:00\n") | |
275 | l[0] = 0 | |
276 | if l[1] < 0: | |
277 | sys.stderr.write("Warning, negative offset too high, subs beginning at 00:00:00\n") | |
278 | l[1] = 0 | |
279 | outl.append(l) | |
280 | return outl | |
6e604697 AM |
281 | |
282 | def sub_split(sub, times): | |
7f429bde AM |
283 | """ |
284 | Splits subtitles | |
285 | input: subtitles (internal format) and split positions (in seconds) | |
286 | returns: a list of lists with new subtitles | |
287 | """ | |
288 | pos = 0 | |
289 | num = len(sub) | |
290 | ||
291 | while pos<num and sub[pos][0]<times[0]: | |
292 | pos += 1 | |
293 | ||
294 | lists = [ sub[:pos] ] # [subtitles1, subtitles2, ...] | |
295 | ||
296 | times.append(99999999) | |
297 | minussec = times.pop(0) | |
298 | ||
299 | for second in times: | |
300 | outl = [] | |
301 | while pos<num and sub[pos][0]<second: | |
302 | subline = [sub[pos][0]-minussec] + [sub[pos][1]-minussec] + sub[pos][2:] | |
303 | if subline[0] < 0: | |
304 | subline[0] = 0 | |
305 | if subline[1] < 0: | |
306 | subline[1] = 0 | |
307 | outl.append(subline) | |
308 | pos += 1 | |
309 | lists.append(outl) | |
310 | minussec = second | |
311 | return lists | |
6e604697 AM |
312 | |
313 | def get_split_times(str): | |
7f429bde AM |
314 | """ |
315 | Converts comma-separated string of "xx:yy:zz,xx:yy:zz,..." times to list of times (in seconds) | |
316 | input: string of comma-separated xx:yy:zz time positions | |
317 | returns: list of times | |
318 | """ | |
319 | tlist = str.split(",") | |
320 | re1 = re.compile("^(\d+):(\d+):(\d+)") | |
321 | times = [] | |
322 | for t in tlist: | |
323 | m = re1.match(t, 0) | |
324 | if not m: | |
325 | sys.stderr.write("Unknown time format\n") | |
326 | return [] | |
327 | times.append(int(m.group(1))*3600 + int(m.group(2))*60 + int(m.group(3))) | |
328 | return times | |
6e604697 AM |
329 | |
330 | ||
331 | def read_subs(file,fmt,fps): | |
7f429bde AM |
332 | """ |
333 | Reads subtitles fomr file, using format fmt | |
334 | input : file name, format (srt,mdvd,tmp,auto) | |
335 | returns: list of subtitles in form: [[time in secs, line1, ...],[time in secs, line1, ...],....] | |
336 | """ | |
337 | src = open(file,'r') | |
338 | subs = src.readlines() | |
339 | src.close() | |
340 | if fmt == "tmp": | |
341 | return read_tmp(subs) | |
342 | elif fmt == "srt": | |
343 | return read_srt(subs) | |
344 | elif fmt == "mdvd": | |
345 | if fps == -1: | |
346 | fps = detect_fps(subs) | |
347 | return read_mdvd(subs, fps) | |
348 | elif fmt == "auto": | |
349 | return read_subs(file,detect_format(subs),fps) | |
350 | elif fmt == "sub2": | |
351 | return read_sub2(subs) | |
352 | elif fmt == "mpl2": | |
353 | return read_mpl2(subs) | |
354 | else: | |
355 | sys.stderr.write("Input format not specified/recognized\n") | |
356 | sys.exit(1) | |
6e604697 AM |
357 | |
358 | ||
359 | # | |
360 | #----------------------------------------------------------------------------------------- | |
361 | ||
362 | ||
363 | outfunc = { | |
364 | "srt":to_srt, | |
365 | "tmp":to_tmp} | |
366 | ||
367 | infmt = "auto" | |
368 | outfmt = "srt" | |
369 | subdelay = 0 | |
370 | fps = -1 | |
371 | #out_to_file == 1 => output to a file, 0 => output stdout, -1 => Split, output to stdout not allowed | |
372 | out_to_file = 0 | |
373 | ||
374 | try: | |
7f429bde | 375 | opts, args = getopt.getopt(sys.argv[1:], 'i:o:a:s:S:f:h') |
6e604697 | 376 | except getopt.GetoptError: |
7f429bde AM |
377 | usage() |
378 | sys.exit(2) | |
6e604697 AM |
379 | |
380 | splittimes = [] | |
381 | ||
382 | for opt, arg in opts: | |
7f429bde AM |
383 | if opt == '-o': |
384 | if outfunc.has_key(arg): | |
385 | outfmt = arg | |
386 | else: | |
387 | sys.stderr.write("Unknown output format.\n") | |
388 | sys.exit(1) | |
389 | elif opt == '-i': | |
390 | infmt = arg | |
391 | elif opt == '-a': | |
392 | subdelay = float(arg) | |
393 | elif opt == '-s': | |
394 | subdelay = -float(arg) | |
395 | elif opt == '-S': | |
396 | out_to_file = -1 | |
397 | splittimes = get_split_times(arg) | |
398 | elif opt == '-f': | |
399 | fps = float(arg) | |
400 | elif opt == '-h': | |
401 | usage() | |
402 | sys.exit(1) | |
403 | ||
6e604697 AM |
404 | # |
405 | # number of file names must be 2 + number of split-points | |
406 | if len(args) == len(splittimes)+2: | |
7f429bde | 407 | out_to_file = 1 |
6e604697 | 408 | elif len(args) == len(splittimes)+1 and out_to_file != -1: |
7f429bde | 409 | out_to_file = 0 |
6e604697 | 410 | else: |
7f429bde AM |
411 | sys.stderr.write("Too few file names given!\n") |
412 | usage() | |
413 | sys.exit(1) | |
6e604697 AM |
414 | |
415 | # | |
416 | # read file | |
417 | sub = read_subs(args.pop(0),infmt,fps) | |
418 | ||
419 | # | |
420 | # apply DELAY | |
421 | if subdelay != 0: | |
7f429bde | 422 | sub = sub_add_offset(sub, subdelay) |
6e604697 AM |
423 | |
424 | # | |
425 | # apply SPLIT | |
426 | if len(splittimes) == 0: | |
7f429bde | 427 | sub_list = [sub] |
6e604697 | 428 | else: |
7f429bde | 429 | sub_list = sub_split(sub, splittimes) |
6e604697 AM |
430 | |
431 | # | |
432 | # save file(S) | |
433 | for nsub in sub_list: | |
7f429bde AM |
434 | s = outfunc[outfmt](nsub) |
435 | if out_to_file == 1: | |
436 | dst = open(args.pop(0), 'w') | |
437 | dst.writelines(s) | |
438 | dst.close() | |
439 | else: | |
440 | sys.stdout.writelines(s) | |
6e604697 | 441 |