]> git.pld-linux.org Git - packages/subconv.git/blame - subconv.py
- reformat
[packages/subconv.git] / subconv.py
CommitLineData
6e604697
AM
1#!/usr/bin/env python
2#
3# subconv v0.2.2 -- divx subtitles converter
4# (w)by Pawel Stolowski
5# Julien Lerouge
6#
7# Released under terms of GNU GPL
8#
4fa28d7b 9# mpl2 (w) by Grzegorz Zyla
6e604697
AM
10
11import re, sys, getopt, string
12
13def usage():
7f429bde 14 sys.stderr.write("""
6e604697
AM
15 subconv v0.2.2 -- DivX subtitles converter
16 (w)by Pawel Stolowski <yogin@linux.bydg.org>
17 Julien Lerouge <julien.lerouge@free.fr>
18
19 Usage: subconv [-i fmt|-o fmt|-a sec|-s sec|-S h:m:s[,h:m:s,...]] input [output1, output2, ...]
7f429bde 20
4fa28d7b 21 -i fmt input format (one of: srt, tmp, mdvd, sub2, mpl2, auto; auto by default)
6e604697
AM
22 -o fmt output format (one of: tmp, srt; srt by default)
23 -f fps adjust fps rate for microdvd input subtitles (auto by default)
24 -a sec adjust subtitle delay in seconds (add)
25 -s sec adjust subtitle delay in seconds (sub)
26 -S h:m:s,... split subtitles in selected position(s); additional output file names must be specified
27 -h this help
28
7f429bde 29 """)
6e604697
AM
30
31
32
33def detect_fps(list):
7f429bde
AM
34 """
35 Detect the FPS for a given input file
36 input: contents of a file as list
37 returns: FPS
38 """
39 sys.stderr.write("FPS guessing, here are approximate length of file for several FPS :\n")
40 most_current=[23.976,25.0,29.97]
41
42 re_mdvd = re.compile("^[\{\[](\d+)[\}\]][\{\[](\d*)[\}\]]\s*(.*)")
43 count = len(list) - 1
44 m = re_mdvd.match(list[count])
45 while not m:
46 count = count - 1
47 m = re_mdvd.match(list[count])
48 last = int(m.group(2))
49
50 for i in range(0,len(most_current)):
51 sys.stderr.write(str(i)+" "+str(most_current[i])+" Fps -> ")
52 tot_sec = int(last / most_current[i])
53 min = tot_sec / 60
54 sec = tot_sec % 60
55 sys.stderr.write(str(min)+" min "+str(sec)+"sec\n")
56 sys.stderr.write("Choice : ")
57 choice=int(sys.stdin.readline().strip())
58 if choice>=0 and choice<len(most_current):
59 return most_current[choice]
60 else:
61 sys.stderr.write("Bad choice\n")
62 sys.exit(1)
6e604697
AM
63
64
65def detect_format(list):
7f429bde
AM
66 """
67 Detect the format of input subtitles file.
68 input: contents of a file as list
69 returns: format (srt, tmp, mdvd) or "" if unknown
70 """
71 sys.stderr.write("Guessing subs format .")
72 re_mdvd = re.compile("^[\{\[](\d+)[\}\]][\{\[](\d*)[\}\]]\s*(.*)")
73 re_srt = re.compile("^(\d+):(\d+):(\d+),\d+\s*-->.*")
74 re_tmp = re.compile("^(\d+):(\d+):(\d+):(.*)")
75 re_sub2 = re.compile("^(\d+):(\d+):(\d+)\.\d+\s*\,.*")
76 re_mpl2 = re.compile("^\[(\d+)\]\[(\d+)\]\s*(.*)")
77 while len(list) > 0 :
78 sys.stderr.write(".")
79 line = list.pop(0)
80 if re_mdvd.match(line):
81 sys.stderr.write(" mdvd\n")
82 return "mdvd"
83 elif re_srt.match(line):
84 sys.stderr.write(" srt\n")
85 return "srt"
86 elif re_tmp.match(line):
87 sys.stderr.write(" tmp\n")
88 return "tmp"
89 elif re_sub2.match(line):
90 sys.stderr.write(" subviewer 2 format\n")
91 return "sub2"
92 elif re_mpl2.match(line):
93 sys.stderr.write(" mpl2\n")
94 return "mpl2"
95 return ""
6e604697
AM
96
97
98def read_mdvd(list, fps):
7f429bde
AM
99 """
100 Read micro-dvd subtitles.
101 input: contents of a file as list
102 returns: list of subtitles in form: [[time_start in secs, time_end in secs, line1, ...],....]
103 """
104 re1 = re.compile("^[\{\[](\d+)[\}\]][\{\[](\d*)[\}\]]\s*(.*)")
105
106 subtitles = []
107 while len(list)>0:
108 x = list.pop(0)
109 m = re1.match(x, 0)
110 if m:
111 time1 = int(m.group(1))
112 subt = [int(time1) / float(fps)]
113 time2 = m.group(2)
114 if time2 == '':
115 time2 = int(time1) + 20
116 subt.append(int(time2) / float(fps))
117 texts = m.group(3).strip().split("|")
118 for i in range(len(texts)):
119 text = texts[i]
120 if text.lower().startswith('{c:') or text.lower().startswith('{y:'):
121 end_marker = text.index('}')
122 if end_marker:
123 text = text[end_marker + 1:]
124 texts[i] = text
125 subt.extend(texts)
126 subtitles.append(subt)
127 return subtitles
6e604697 128
4fa28d7b 129def read_mpl2(list):
7f429bde
AM
130 """
131 Read mpl2 subtitles
132 input: contents of a file as list
133 returns: list of subtitles in form: [[time_start in secs, time_end is secs, line1, ...],.....]
134 """
135 re1 = re.compile("^\[(\d+)\]\[(\d+)\]\s*(.*)")
136 subtitles = []
137 while len(list)>0:
138 m = re1.match(list.pop(0),0);
139 if m:
140 subt = [int(m.group(1))*0.1]
141 subt.append(int(m.group(2))*0.1)
142 subt.extend(m.group(3).strip().split("|"))
143 subtitles.append(subt)
144 return subtitles
145
6e604697 146def read_sub2(list):
7f429bde
AM
147 """
148 Reads subviewer 2.0 format subtitles, e.g. :
149 00:01:54.75,00:01:58.54
150 You shall not pass!
151 input: contents of a file as list
152 returns: list of subtitles in form: [[time_dep, time_end, line1, ...],[time_dep, time_end, line1, ...],....]
153 """
154 re1 = re.compile("^(\d+):(\d+):(\d+)\.(\d+)\s*\,\s*(\d+):(\d+):(\d+)\.(\d+).*$")
155 subtitles = []
156 try:
157 while len(list)>0:
158 m = re1.match(list.pop(0), 0)
159 if m:
160 subt = [int(m.group(1))*3600 + int(m.group(2))*60 + int(m.group(3)) + int(m.group(4))/100.0]
161 subt.append(int(m.group(5))*3600 + int(m.group(6))*60 + int(m.group(7)) + int(m.group(8))/100.0)
162 l = list.pop(0).strip()
163 lines = l.split("[br]")
164 for i in range(0,len(lines)):
165 subt.append(lines[i])
166 subtitles.append(subt)
167 except IndexError:
168 sys.stderr.write("Warning: it seems like input file is damaged or too short.\n")
169 return subtitles
6e604697
AM
170
171def read_srt(list):
7f429bde
AM
172 """
173 Reads srt subtitles.
174 input: contents of a file as list
175 returns: list of subtitles in form: [[time_dep, time_end, line1, ...],[time_dep, time_end, line1, ...],....]
176 """
177 re1 = re.compile("^(\d+)\s*$")
178 re2 = re.compile("^(\d+):(\d+):(\d+),(\d+)\s*-->\s*(\d+):(\d+):(\d+),(\d+).*$")
179 re3 = re.compile("^\s*$")
180 subtitles = []
181 try:
182 while len(list)>0:
183 if re1.match(list.pop(0), 0):
184 m = re2.match(list.pop(0), 0)
185 if m:
186 subt = [int(m.group(1))*3600 + int(m.group(2))*60 + int(m.group(3)) + int(m.group(4))/1000.0]
187 subt.append(int(m.group(5))*3600 + int(m.group(6))*60 + int(m.group(7)) + int(m.group(8))/1000.0)
188 l = list.pop(0)
189 while not re3.match(l, 0):
190 #subt.append(string.replace(l[:-1], "\r", ""))
191 subt.append(l.strip())
192 l = list.pop(0)
193 subtitles.append(subt)
194 except IndexError:
195 sys.stderr.write("Warning: it seems like input file is damaged or too short.\n")
196 return subtitles
6e604697
AM
197
198def read_tmp(list):
7f429bde
AM
199 """
200 Reads tmplayer (tmp) subtitles.
201 input: contents of a file as list
202 returns: list of subtitles in form: [[time_dep, time_end, line1, ...],[time_dep, time_end, line1, ...],....]
203 """
204 re1 = re.compile("^(\d+):(\d+):(\d+):(.*)")
205 subtitles = []
206 subs={}
207 while len(list)>0:
208 m = re1.match(list.pop(0), 0)
209 if m:
210 time = int(m.group(1))*3600 + int(m.group(2))*60 + int(m.group(3))
211 if subs.has_key(time) :
212 subs[time].extend(m.group(4).strip().split("|"))
213 else:
214 subs[time] = m.group(4).strip().split("|")
215
216 times = subs.keys()
217 times.sort()
218 for i in range(0,len(times)):
219 next_time = 1;
220 while not subs.has_key(times[i]+next_time) and next_time < 4 :
221 next_time = next_time + 1
222 subt = [ times[i] , times[i] + next_time]
223 subt.extend(subs[times[i]])
224 subtitles.append(subt)
225 return subtitles
6e604697
AM
226
227def to_tmp(list):
7f429bde
AM
228 """
229 Converts list of subtitles (internal format) to tmp format
230 """
231 outl = []
232 for l in list:
233 secs = l[0]
234 h = int(secs/3600)
235 m = int(int(secs%3600)/60)
236 s = int(secs%60)
237 outl.append("%.2d:%.2d:%.2d:%s\n" % (h,m,s,"|".join(l[2:])))
238 return outl
6e604697
AM
239
240
241def to_srt(list):
7f429bde
AM
242 """
243 Converts list of subtitles (internal format) to srt format
244 """
245 outl = []
246 count = 1
247 for l in list:
248 secs1 = l[0]
249 h1 = int(secs1/3600)
250 m1 = int(int(secs1%3600)/60)
251 s1 = int(secs1%60)
252 f1 = (secs1 - int(secs1))*1000
253 secs2 = l[1]
254 h2 = int(secs2/3600)
255 m2 = int(int(secs2%3600)/60)
256 s2 = int(secs2%60)
257 f2 = (secs2 - int(secs2))*1000
258 outl.append("%d\n%.2d:%.2d:%.2d,%.3d --> %.2d:%.2d:%.2d,%.3d\n%s\n\n" % (count,h1,m1,s1,f1,h2,m2,s2,f2,"\n".join(l[2:])))
259 count = count + 1
260 return outl
6e604697
AM
261
262
263def sub_add_offset(list, off):
7f429bde
AM
264 """
265 Adds an offset (in seconds, may be negative) to all subtitles in the list
266 input: subtitles (internal format)
267 returns: new subtitles (internal format)
268 """
269 outl = []
270 for l in list:
271 l[0] += off
272 l[1] += off
273 if l[0] < 0:
274 sys.stderr.write("Warning, negative offset too high, subs beginning at 00:00:00\n")
275 l[0] = 0
276 if l[1] < 0:
277 sys.stderr.write("Warning, negative offset too high, subs beginning at 00:00:00\n")
278 l[1] = 0
279 outl.append(l)
280 return outl
6e604697
AM
281
282def sub_split(sub, times):
7f429bde
AM
283 """
284 Splits subtitles
285 input: subtitles (internal format) and split positions (in seconds)
286 returns: a list of lists with new subtitles
287 """
288 pos = 0
289 num = len(sub)
290
291 while pos<num and sub[pos][0]<times[0]:
292 pos += 1
293
294 lists = [ sub[:pos] ] # [subtitles1, subtitles2, ...]
295
296 times.append(99999999)
297 minussec = times.pop(0)
298
299 for second in times:
300 outl = []
301 while pos<num and sub[pos][0]<second:
302 subline = [sub[pos][0]-minussec] + [sub[pos][1]-minussec] + sub[pos][2:]
303 if subline[0] < 0:
304 subline[0] = 0
305 if subline[1] < 0:
306 subline[1] = 0
307 outl.append(subline)
308 pos += 1
309 lists.append(outl)
310 minussec = second
311 return lists
6e604697
AM
312
313def get_split_times(str):
7f429bde
AM
314 """
315 Converts comma-separated string of "xx:yy:zz,xx:yy:zz,..." times to list of times (in seconds)
316 input: string of comma-separated xx:yy:zz time positions
317 returns: list of times
318 """
319 tlist = str.split(",")
320 re1 = re.compile("^(\d+):(\d+):(\d+)")
321 times = []
322 for t in tlist:
323 m = re1.match(t, 0)
324 if not m:
325 sys.stderr.write("Unknown time format\n")
326 return []
327 times.append(int(m.group(1))*3600 + int(m.group(2))*60 + int(m.group(3)))
328 return times
6e604697
AM
329
330
331def read_subs(file,fmt,fps):
7f429bde
AM
332 """
333 Reads subtitles fomr file, using format fmt
334 input : file name, format (srt,mdvd,tmp,auto)
335 returns: list of subtitles in form: [[time in secs, line1, ...],[time in secs, line1, ...],....]
336 """
337 src = open(file,'r')
338 subs = src.readlines()
339 src.close()
340 if fmt == "tmp":
341 return read_tmp(subs)
342 elif fmt == "srt":
343 return read_srt(subs)
344 elif fmt == "mdvd":
345 if fps == -1:
346 fps = detect_fps(subs)
347 return read_mdvd(subs, fps)
348 elif fmt == "auto":
349 return read_subs(file,detect_format(subs),fps)
350 elif fmt == "sub2":
351 return read_sub2(subs)
352 elif fmt == "mpl2":
353 return read_mpl2(subs)
354 else:
355 sys.stderr.write("Input format not specified/recognized\n")
356 sys.exit(1)
6e604697
AM
357
358
359#
360#-----------------------------------------------------------------------------------------
361
362
363outfunc = {
364 "srt":to_srt,
365 "tmp":to_tmp}
366
367infmt = "auto"
368outfmt = "srt"
369subdelay = 0
370fps = -1
371#out_to_file == 1 => output to a file, 0 => output stdout, -1 => Split, output to stdout not allowed
372out_to_file = 0
373
374try:
7f429bde 375 opts, args = getopt.getopt(sys.argv[1:], 'i:o:a:s:S:f:h')
6e604697 376except getopt.GetoptError:
7f429bde
AM
377 usage()
378 sys.exit(2)
6e604697
AM
379
380splittimes = []
381
382for opt, arg in opts:
7f429bde
AM
383 if opt == '-o':
384 if outfunc.has_key(arg):
385 outfmt = arg
386 else:
387 sys.stderr.write("Unknown output format.\n")
388 sys.exit(1)
389 elif opt == '-i':
390 infmt = arg
391 elif opt == '-a':
392 subdelay = float(arg)
393 elif opt == '-s':
394 subdelay = -float(arg)
395 elif opt == '-S':
396 out_to_file = -1
397 splittimes = get_split_times(arg)
398 elif opt == '-f':
399 fps = float(arg)
400 elif opt == '-h':
401 usage()
402 sys.exit(1)
403
6e604697
AM
404#
405# number of file names must be 2 + number of split-points
406if len(args) == len(splittimes)+2:
7f429bde 407 out_to_file = 1
6e604697 408elif len(args) == len(splittimes)+1 and out_to_file != -1:
7f429bde 409 out_to_file = 0
6e604697 410else:
7f429bde
AM
411 sys.stderr.write("Too few file names given!\n")
412 usage()
413 sys.exit(1)
6e604697
AM
414
415#
416# read file
417sub = read_subs(args.pop(0),infmt,fps)
418
419#
420# apply DELAY
421if subdelay != 0:
7f429bde 422 sub = sub_add_offset(sub, subdelay)
6e604697
AM
423
424#
425# apply SPLIT
426if len(splittimes) == 0:
7f429bde 427 sub_list = [sub]
6e604697 428else:
7f429bde 429 sub_list = sub_split(sub, splittimes)
6e604697
AM
430
431#
432# save file(S)
433for nsub in sub_list:
7f429bde
AM
434 s = outfunc[outfmt](nsub)
435 if out_to_file == 1:
436 dst = open(args.pop(0), 'w')
437 dst.writelines(s)
438 dst.close()
439 else:
440 sys.stdout.writelines(s)
6e604697 441
This page took 0.099261 seconds and 4 git commands to generate.