]>
Commit | Line | Data |
---|---|---|
57811e09 AM |
1 | #!/usr/bin/python |
2 | # -*- coding: UTF-8 -*- | |
036ce345 AM |
3 | # |
4 | # Copyright (C) 2009 Arkadiusz Miśkiewicz <arekm@pld-linux.org> | |
5 | # | |
6 | # This program is free software: you can redistribute it and/or modify | |
7 | # it under the terms of the GNU General Public License as published by | |
8 | # the Free Software Foundation, either version 3 of the License, or | |
9 | # (at your option) any later version. | |
10 | # | |
11 | # This program is distributed in the hope that it will be useful, | |
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | # GNU General Public License for more details. | |
15 | # | |
16 | # You should have received a copy of the GNU General Public License | |
17 | # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
ae92e79c | 18 | # |
2f5b3e87 AM |
19 | # napiprojekt.pl API is used with napiproject administration consent |
20 | # (given by Marek <kontakt@napiprojekt.pl> at Wed, 24 Feb 2010 14:43:00 +0100) | |
2855f830 AM |
21 | # |
22 | # napisy24.pl API access granted by napisy24 admins at 15 Feb 2015 | |
23 | # | |
57811e09 | 24 | |
2855f830 | 25 | import StringIO |
4ea7498e | 26 | import re |
57811e09 | 27 | import sys |
8a53d3e2 | 28 | import mimetypes |
2855f830 | 29 | import urllib |
8a53d3e2 | 30 | import urllib2 |
da0dfee4 | 31 | import time |
57811e09 | 32 | import os |
1afc25b2 | 33 | import getopt |
03a8e2fc | 34 | import socket |
2855f830 AM |
35 | import struct |
36 | import zipfile | |
57811e09 | 37 | |
a5884ecd AM |
38 | try: |
39 | from hashlib import md5 as md5 | |
40 | except ImportError: | |
41 | from md5 import md5 | |
42 | ||
57811e09 AM |
43 | prog = os.path.basename(sys.argv[0]) |
44 | ||
c906a435 | 45 | video_files = [ 'asf', 'avi', 'divx', 'm2ts', 'mkv', 'mp4', 'mpeg', 'mpg', 'ogm', 'rm', 'rmvb', 'wmv' ] |
036ce345 | 46 | languages = { 'pl': 'PL', 'en': 'ENG' } |
78cca783 | 47 | |
2855f830 AM |
48 | def calculate_digest(filename): |
49 | d = md5() | |
50 | try: | |
51 | d.update(open(filename, "rb").read(10485760)) | |
52 | except (IOError, OSError), e: | |
53 | raise Exception('Hashing video file failed: %s' % ( e )) | |
54 | return d.hexdigest() | |
55 | ||
56 | def napiprojekt_hash(z): | |
505068dc AM |
57 | idx = [ 0xe, 0x3, 0x6, 0x8, 0x2 ] |
58 | mul = [ 2, 2, 5, 4, 3 ] | |
59 | add = [ 0, 0xd, 0x10, 0xb, 0x5 ] | |
57811e09 | 60 | |
505068dc AM |
61 | b = [] |
62 | for i in xrange(len(idx)): | |
63 | a = add[i] | |
64 | m = mul[i] | |
65 | i = idx[i] | |
57811e09 | 66 | |
505068dc AM |
67 | t = a + int(z[i], 16) |
68 | v = int(z[t:t+2], 16) | |
69 | b.append( ("%x" % (v*m))[-1] ) | |
57811e09 | 70 | |
505068dc | 71 | return ''.join(b) |
57811e09 | 72 | |
2855f830 AM |
73 | def napisy24_hash(filename): |
74 | try: | |
75 | longlongformat = '<q' # little-endian long long | |
76 | bytesize = struct.calcsize(longlongformat) | |
77 | ||
78 | f = open(filename, "rb") | |
79 | ||
80 | filesize = os.path.getsize(filename) | |
81 | hash = filesize | |
82 | ||
83 | if filesize < 65536 * 2: | |
84 | raise Exception('Hashing (napisy24) video file failed: `%s\': File too small' % ( filename )) | |
85 | ||
86 | for x in range(65536/bytesize): | |
87 | buffer = f.read(bytesize) | |
88 | (l_value,)= struct.unpack(longlongformat, buffer) | |
89 | hash += l_value | |
90 | hash = hash & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number | |
91 | ||
92 | ||
93 | f.seek(max(0,filesize-65536),0) | |
94 | for x in range(65536/bytesize): | |
95 | buffer = f.read(bytesize) | |
96 | (l_value,)= struct.unpack(longlongformat, buffer) | |
97 | hash += l_value | |
98 | hash = hash & 0xFFFFFFFFFFFFFFFF | |
99 | ||
100 | f.close() | |
101 | returnedhash = "%016x" % hash | |
102 | return returnedhash | |
103 | ||
104 | except IOError, e: | |
105 | raise Exception('Hashing (napisy24) video file failed: %s' % ( e )) | |
106 | ||
1afc25b2 | 107 | def usage(): |
ac961430 | 108 | print >> sys.stderr, "Usage: %s [OPTIONS]... [FILE|DIR]..." % prog |
2855f830 | 109 | print >> sys.stderr, "Find video files and download matching subtitles from napiprojekt/napisy24 server." |
ac961430 AM |
110 | print >> sys.stderr |
111 | print >> sys.stderr, "Supported options:" | |
112 | print >> sys.stderr, " -h, --help display this help and exit" | |
113 | print >> sys.stderr, " -l, --lang=LANG subtitles language" | |
8a53d3e2 | 114 | print >> sys.stderr, " -n, --nobackup make no subtitle backup when in update mode" |
3e98b779 | 115 | print >> sys.stderr, " -c, --nocover do not download cover images" |
ac961430 | 116 | print >> sys.stderr, " -u, --update fetch new and also update existing subtitles" |
25036fc8 | 117 | print >> sys.stderr, " -d, --dest=DIR destination directory" |
ac961430 | 118 | print >> sys.stderr |
50529db1 | 119 | print >> sys.stderr, "Report bugs to <arekm@pld-linux.org>." |
1afc25b2 | 120 | |
4ea7498e AM |
121 | def get_desc_links(digest, file=None): |
122 | # improve me | |
16c95940 | 123 | re_link = re.compile(r'<a.*?href=[\'"](http://.*?)[ >\'"]', re.IGNORECASE) |
4ea7498e AM |
124 | d = "" |
125 | ||
126 | try: | |
8a53d3e2 AM |
127 | url = "http://www.napiprojekt.pl/index.php3?www=opis.php3&id=%s&film=%s" % (urllib2.quote(digest), urllib2.quote(file)) |
128 | f = urllib2.urlopen(url) | |
4ea7498e AM |
129 | d = f.read() |
130 | f.close() | |
131 | except Exception, e: | |
132 | return False | |
16c95940 | 133 | links = re_link.findall(d) |
f166502e | 134 | ignore = [ r'.*dobreprogramy\.pl', r'.*napiprojekt\.pl.*', r'.*nokaut\.pl.*', r'.*rodisite\.com.*' ] |
16c95940 AM |
135 | for i in range(0, len(ignore)): |
136 | ignore[i] = re.compile(ignore[i], re.IGNORECASE) | |
137 | ilinks = links[:] | |
138 | for l in ilinks: | |
f166502e AM |
139 | # main pages are useless |
140 | if l.count('/') < 3: | |
141 | links.remove(l) | |
142 | continue | |
143 | # blacklisted sites | |
16c95940 AM |
144 | for i in ignore: |
145 | if i.match(l): | |
146 | links.remove(l) | |
147 | return links | |
4ea7498e | 148 | |
8a53d3e2 AM |
149 | def get_cover(digest): |
150 | cover = "" | |
151 | try: | |
152 | url = "http://www.napiprojekt.pl/okladka_pobierz.php?id=%s&oceny=-1" % (urllib2.quote(digest)) | |
153 | f = urllib2.urlopen(url) | |
154 | cover = f.read() | |
155 | f.close() | |
156 | content_type = f.info()['Content-Type'] | |
157 | extension = mimetypes.guess_all_extensions(content_type)[-1] | |
158 | except Exception, e: | |
159 | return False | |
160 | return (cover, extension) | |
4ea7498e | 161 | |
2855f830 AM |
162 | def get_subtitle_napisy24(filename, digest=False, lang="pl"): |
163 | url = "http://napisy24.pl/run/CheckSubAgent.php" | |
164 | ||
165 | pdata = [] | |
166 | pdata.append(('postAction', 'CheckSub')) | |
167 | pdata.append(('ua', 'pynapi')) | |
168 | pdata.append(('ap', 'XaA!29OkF5Pe')) | |
169 | pdata.append(('nl', lang)) | |
170 | pdata.append(('fn', filename)) | |
171 | pdata.append(('fh', napisy24_hash(filename))) | |
172 | pdata.append(('fs', os.path.getsize(filename))) | |
173 | if digest: | |
174 | pdata.append(('md5', digest)) | |
175 | ||
176 | repeat = 3 | |
177 | error = "Fetching subtitle (napisy24) failed:" | |
178 | while repeat > 0: | |
179 | repeat = repeat - 1 | |
180 | try: | |
181 | sub = urllib2.urlopen(url, data=urllib.urlencode(pdata)) | |
182 | if hasattr(sub, 'getcode'): | |
183 | http_code = sub.getcode() | |
184 | sub = sub.read() | |
185 | except (IOError, OSError), e: | |
186 | error = error + " %s" % (e) | |
187 | time.sleep(0.5) | |
188 | continue | |
0578010f | 189 | |
2855f830 AM |
190 | if http_code != 200: |
191 | error = error + ",HTTP code: %s" % (str(http_code)) | |
192 | time.sleep(0.5) | |
193 | continue | |
194 | ||
195 | err_add = '' | |
196 | if sub.startswith('OK-2|'): | |
197 | pos = sub.find('||') | |
198 | if pos >= 2 and len(sub) > (pos + 2): | |
199 | sub = sub[pos+2:] | |
200 | ||
201 | try: | |
202 | subzip=zipfile.ZipFile(StringIO.StringIO(sub)) | |
203 | sub='' | |
204 | for name in subzip.namelist(): | |
205 | sub += subzip.read(name) | |
206 | except Exception, e: | |
207 | raise Exception('Subtitle NOT FOUND%s' % e) | |
208 | else: | |
209 | raise Exception('Subtitle NOT FOUND (subtitle too short)') | |
210 | elif sub.startswith('OK-'): | |
211 | raise Exception('Subtitle NOT FOUND') | |
212 | else: | |
213 | raise Exception('Subtitle NOT FOUND (unknown error)') | |
214 | ||
215 | repeat = 0 | |
216 | ||
217 | if sub is None or sub == "": | |
218 | raise Exception(error) | |
219 | ||
220 | return sub | |
221 | ||
222 | def get_subtitle_napiprojekt(digest, lang="PL"): | |
2f5b3e87 | 223 | url = "http://napiprojekt.pl/unit_napisy/dl.php?l=%s&f=%s&t=%s&v=pynapi&kolejka=false&nick=&pass=&napios=%s" % \ |
2855f830 | 224 | (lang, digest, napiprojekt_hash(digest), os.name) |
0578010f | 225 | repeat = 3 |
226 | sub = None | |
227 | http_code = 200 | |
2855f830 | 228 | error = "Fetching subtitle (napiprojekt) failed:" |
0578010f | 229 | while repeat > 0: |
230 | repeat = repeat - 1 | |
231 | try: | |
232 | sub = urllib2.urlopen(url) | |
233 | if hasattr(sub, 'getcode'): | |
234 | http_code = sub.getcode() | |
235 | sub = sub.read() | |
236 | except (IOError, OSError), e: | |
2e5f9fd7 | 237 | error = error + " %s" % (e) |
0578010f | 238 | time.sleep(0.5) |
239 | continue | |
240 | ||
241 | if http_code != 200: | |
2e5f9fd7 | 242 | error = error + ",HTTP code: %s" % (str(http_code)) |
0578010f | 243 | time.sleep(0.5) |
244 | continue | |
cc30ce30 AM |
245 | |
246 | err_add = '' | |
7d05389f | 247 | if not sub.startswith('NPc'): |
cc30ce30 | 248 | err_add = " (unknown error)" |
33a62f95 | 249 | if len(sub.split('\n')) < 20: |
cc30ce30 | 250 | raise Exception('Subtitle NOT FOUND%s' % err_add) |
fc516ed9 AM |
251 | |
252 | repeat = 0 | |
0578010f | 253 | |
2e5f9fd7 | 254 | if sub is None or sub == "": |
fc516ed9 AM |
255 | raise Exception(error) |
256 | ||
0578010f | 257 | return sub |
258 | ||
505068dc AM |
259 | def main(argv=sys.argv): |
260 | ||
261 | try: | |
c3bca6c6 | 262 | opts, args = getopt.getopt(argv[1:], "d:hl:nuc", ["dest", "help", "lang", "nobackup", "update", "nocover"]) |
505068dc AM |
263 | except getopt.GetoptError, err: |
264 | print str(err) | |
1afc25b2 | 265 | usage() |
ac961430 | 266 | return 2 |
505068dc AM |
267 | |
268 | output = None | |
269 | verbose = False | |
270 | nobackup = False | |
c3bca6c6 | 271 | nocover = False |
ac961430 | 272 | update = False |
505068dc | 273 | lang = 'pl' |
25036fc8 | 274 | dest = None |
505068dc AM |
275 | for o, a in opts: |
276 | if o == "-v": | |
277 | verbose = True | |
278 | elif o in ("-h", "--help"): | |
279 | usage() | |
ac961430 | 280 | return 0 |
505068dc AM |
281 | elif o in ("-l", "--lang"): |
282 | if a in languages: | |
283 | lang = a | |
284 | else: | |
285 | print >> sys.stderr, "%s: unsupported language `%s'. Supported languages: %s" % (prog, a, str(languages.keys())) | |
ac961430 AM |
286 | return 1 |
287 | elif o in ("-n", "--nobackup"): | |
288 | nobackup = True | |
289 | elif o in ("-u", "--update"): | |
290 | update = True | |
c3bca6c6 JR |
291 | elif o in ("-c", "--nocover"): |
292 | nocover = True | |
25036fc8 AM |
293 | elif o in ("-d", "--dest"): |
294 | dest = a | |
505068dc AM |
295 | else: |
296 | print >> sys.stderr, "%s: unhandled option" % prog | |
ac961430 | 297 | return 1 |
505068dc | 298 | |
617c0183 AM |
299 | if not args: |
300 | usage() | |
301 | return 2 | |
302 | ||
505068dc AM |
303 | print >> sys.stderr, "%s: Subtitles language `%s'. Finding video files..." % (prog, lang) |
304 | ||
03a8e2fc AM |
305 | socket.setdefaulttimeout(180) |
306 | ||
505068dc AM |
307 | files = [] |
308 | for arg in args: | |
309 | if os.path.isdir(arg): | |
310 | for dirpath, dirnames, filenames in os.walk(arg, topdown=False): | |
311 | for file in filenames: | |
312 | if file[-4:-3] == '.' and file.lower()[-3:] in video_files: | |
313 | files.append(os.path.join(dirpath, file)) | |
036ce345 | 314 | else: |
505068dc AM |
315 | files.append(arg) |
316 | ||
317 | files.sort() | |
318 | ||
319 | i_total = len(files) | |
320 | i = 0 | |
321 | ||
322 | for file in files: | |
323 | i += 1 | |
324 | ||
325 | vfile = file + '.txt' | |
8a53d3e2 | 326 | basefile = file |
505068dc | 327 | if len(file) > 4: |
8a53d3e2 AM |
328 | basefile = file[:-4] |
329 | vfile = basefile + '.txt' | |
25036fc8 AM |
330 | if dest: |
331 | vfile = os.path.join(dest, os.path.split(vfile)[1]) | |
505068dc | 332 | |
ac961430 AM |
333 | if not update and os.path.exists(vfile): |
334 | continue | |
335 | ||
505068dc AM |
336 | if not nobackup and os.path.exists(vfile): |
337 | vfile_bak = vfile + '-bak' | |
338 | try: | |
339 | os.rename(vfile, vfile_bak) | |
340 | except (IOError, OSError), e: | |
ac961430 | 341 | print >> sys.stderr, "%s: Skipping due to backup of `%s' as `%s' failure: %s" % (prog, vfile, vfile_bak, e) |
505068dc | 342 | continue |
ac961430 AM |
343 | else: |
344 | print >> sys.stderr, "%s: Old subtitle backed up as `%s'" % (prog, vfile_bak) | |
505068dc AM |
345 | |
346 | print >> sys.stderr, "%s: %d/%d: Processing subtitle for %s" % (prog, i, i_total, file) | |
347 | ||
fc516ed9 AM |
348 | try: |
349 | digest = calculate_digest(file) | |
fc516ed9 AM |
350 | except: |
351 | print >> sys.stderr, "%s: %d/%d: %s" % (prog, i, i_total, sys.exc_info()[1]) | |
352 | continue | |
2855f830 AM |
353 | |
354 | try: | |
355 | raise | |
356 | sub = get_subtitle_napiprojekt(digest, languages[lang]) | |
357 | except: | |
358 | try: | |
359 | sub = get_subtitle_napisy24(file, digest, lang) | |
360 | except: | |
361 | print >> sys.stderr, "%s: %d/%d: %s" % (prog, i, i_total, sys.exc_info()[1]) | |
362 | continue | |
029269ee | 363 | |
6357a847 | 364 | fp = open(vfile, 'wb') |
95317845 | 365 | fp.write(sub) |
505068dc | 366 | fp.close() |
fc516ed9 | 367 | |
0578010f | 368 | desc = get_desc_links(digest, file) |
4ea7498e | 369 | if desc: |
76ddf3ca AM |
370 | print >> sys.stderr, "%s: %d/%d: Description: " % (prog, i, i_total) |
371 | for desc_i in desc: | |
372 | print >> sys.stderr, "\t\t%s" % desc_i | |
fc516ed9 | 373 | |
8a53d3e2 | 374 | cover_stored = "" |
c3bca6c6 JR |
375 | if not nocover: |
376 | cover_data = get_cover(digest) | |
377 | if cover_data: | |
378 | cover, extension = cover_data | |
379 | fp = open(basefile + extension, 'wb') | |
380 | fp.write(cover) | |
381 | fp.close() | |
382 | cover_stored = ", %s COVER STORED (%d bytes)" % (extension, len(cover)) | |
8a53d3e2 | 383 | |
95317845 | 384 | print >> sys.stderr, "%s: %d/%d: SUBTITLE STORED (%d bytes)%s" % (prog, i, i_total, len(sub), cover_stored) |
505068dc AM |
385 | |
386 | return 0 | |
57811e09 | 387 | |
505068dc AM |
388 | if __name__ == "__main__": |
389 | ret = None | |
029269ee | 390 | try: |
ac961430 | 391 | ret = main() |
505068dc | 392 | except (KeyboardInterrupt, SystemExit): |
ac961430 | 393 | print >> sys.stderr, "%s: Interrupted, aborting." % prog |
505068dc | 394 | sys.exit(ret) |