]> git.pld-linux.org Git - packages/pynapi.git/blame - pynapi.py
- up to 0.20; crude way of accessing napisy 24 API and pynapi would welcome rewrite
[packages/pynapi.git] / pynapi.py
CommitLineData
57811e09
AM
1#!/usr/bin/python
2# -*- coding: UTF-8 -*-
036ce345
AM
3#
4# Copyright (C) 2009 Arkadiusz Miśkiewicz <arekm@pld-linux.org>
5#
6# This program is free software: you can redistribute it and/or modify
7# it under the terms of the GNU General Public License as published by
8# the Free Software Foundation, either version 3 of the License, or
9# (at your option) any later version.
10#
11# This program is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14# GNU General Public License for more details.
15#
16# You should have received a copy of the GNU General Public License
17# along with this program. If not, see <http://www.gnu.org/licenses/>.
ae92e79c 18#
2f5b3e87
AM
19# napiprojekt.pl API is used with napiproject administration consent
20# (given by Marek <kontakt@napiprojekt.pl> at Wed, 24 Feb 2010 14:43:00 +0100)
2855f830
AM
21#
22# napisy24.pl API access granted by napisy24 admins at 15 Feb 2015
23#
57811e09 24
2855f830 25import StringIO
4ea7498e 26import re
57811e09 27import sys
8a53d3e2 28import mimetypes
2855f830 29import urllib
8a53d3e2 30import urllib2
da0dfee4 31import time
57811e09 32import os
1afc25b2 33import getopt
03a8e2fc 34import socket
2855f830
AM
35import struct
36import zipfile
57811e09 37
a5884ecd
AM
38try:
39 from hashlib import md5 as md5
40except ImportError:
41 from md5 import md5
42
57811e09
AM
43prog = os.path.basename(sys.argv[0])
44
c906a435 45video_files = [ 'asf', 'avi', 'divx', 'm2ts', 'mkv', 'mp4', 'mpeg', 'mpg', 'ogm', 'rm', 'rmvb', 'wmv' ]
036ce345 46languages = { 'pl': 'PL', 'en': 'ENG' }
78cca783 47
2855f830
AM
48def calculate_digest(filename):
49 d = md5()
50 try:
51 d.update(open(filename, "rb").read(10485760))
52 except (IOError, OSError), e:
53 raise Exception('Hashing video file failed: %s' % ( e ))
54 return d.hexdigest()
55
56def napiprojekt_hash(z):
505068dc
AM
57 idx = [ 0xe, 0x3, 0x6, 0x8, 0x2 ]
58 mul = [ 2, 2, 5, 4, 3 ]
59 add = [ 0, 0xd, 0x10, 0xb, 0x5 ]
57811e09 60
505068dc
AM
61 b = []
62 for i in xrange(len(idx)):
63 a = add[i]
64 m = mul[i]
65 i = idx[i]
57811e09 66
505068dc
AM
67 t = a + int(z[i], 16)
68 v = int(z[t:t+2], 16)
69 b.append( ("%x" % (v*m))[-1] )
57811e09 70
505068dc 71 return ''.join(b)
57811e09 72
2855f830
AM
73def napisy24_hash(filename):
74 try:
75 longlongformat = '<q' # little-endian long long
76 bytesize = struct.calcsize(longlongformat)
77
78 f = open(filename, "rb")
79
80 filesize = os.path.getsize(filename)
81 hash = filesize
82
83 if filesize < 65536 * 2:
84 raise Exception('Hashing (napisy24) video file failed: `%s\': File too small' % ( filename ))
85
86 for x in range(65536/bytesize):
87 buffer = f.read(bytesize)
88 (l_value,)= struct.unpack(longlongformat, buffer)
89 hash += l_value
90 hash = hash & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number
91
92
93 f.seek(max(0,filesize-65536),0)
94 for x in range(65536/bytesize):
95 buffer = f.read(bytesize)
96 (l_value,)= struct.unpack(longlongformat, buffer)
97 hash += l_value
98 hash = hash & 0xFFFFFFFFFFFFFFFF
99
100 f.close()
101 returnedhash = "%016x" % hash
102 return returnedhash
103
104 except IOError, e:
105 raise Exception('Hashing (napisy24) video file failed: %s' % ( e ))
106
1afc25b2 107def usage():
ac961430 108 print >> sys.stderr, "Usage: %s [OPTIONS]... [FILE|DIR]..." % prog
2855f830 109 print >> sys.stderr, "Find video files and download matching subtitles from napiprojekt/napisy24 server."
ac961430
AM
110 print >> sys.stderr
111 print >> sys.stderr, "Supported options:"
112 print >> sys.stderr, " -h, --help display this help and exit"
113 print >> sys.stderr, " -l, --lang=LANG subtitles language"
8a53d3e2 114 print >> sys.stderr, " -n, --nobackup make no subtitle backup when in update mode"
3e98b779 115 print >> sys.stderr, " -c, --nocover do not download cover images"
ac961430 116 print >> sys.stderr, " -u, --update fetch new and also update existing subtitles"
25036fc8 117 print >> sys.stderr, " -d, --dest=DIR destination directory"
ac961430 118 print >> sys.stderr
50529db1 119 print >> sys.stderr, "Report bugs to <arekm@pld-linux.org>."
1afc25b2 120
4ea7498e
AM
121def get_desc_links(digest, file=None):
122 # improve me
16c95940 123 re_link = re.compile(r'<a.*?href=[\'"](http://.*?)[ >\'"]', re.IGNORECASE)
4ea7498e
AM
124 d = ""
125
126 try:
8a53d3e2
AM
127 url = "http://www.napiprojekt.pl/index.php3?www=opis.php3&id=%s&film=%s" % (urllib2.quote(digest), urllib2.quote(file))
128 f = urllib2.urlopen(url)
4ea7498e
AM
129 d = f.read()
130 f.close()
131 except Exception, e:
132 return False
16c95940 133 links = re_link.findall(d)
f166502e 134 ignore = [ r'.*dobreprogramy\.pl', r'.*napiprojekt\.pl.*', r'.*nokaut\.pl.*', r'.*rodisite\.com.*' ]
16c95940
AM
135 for i in range(0, len(ignore)):
136 ignore[i] = re.compile(ignore[i], re.IGNORECASE)
137 ilinks = links[:]
138 for l in ilinks:
f166502e
AM
139 # main pages are useless
140 if l.count('/') < 3:
141 links.remove(l)
142 continue
143 # blacklisted sites
16c95940
AM
144 for i in ignore:
145 if i.match(l):
146 links.remove(l)
147 return links
4ea7498e 148
8a53d3e2
AM
149def get_cover(digest):
150 cover = ""
151 try:
152 url = "http://www.napiprojekt.pl/okladka_pobierz.php?id=%s&oceny=-1" % (urllib2.quote(digest))
153 f = urllib2.urlopen(url)
154 cover = f.read()
155 f.close()
156 content_type = f.info()['Content-Type']
157 extension = mimetypes.guess_all_extensions(content_type)[-1]
158 except Exception, e:
159 return False
160 return (cover, extension)
4ea7498e 161
2855f830
AM
162def get_subtitle_napisy24(filename, digest=False, lang="pl"):
163 url = "http://napisy24.pl/run/CheckSubAgent.php"
164
165 pdata = []
166 pdata.append(('postAction', 'CheckSub'))
167 pdata.append(('ua', 'pynapi'))
168 pdata.append(('ap', 'XaA!29OkF5Pe'))
169 pdata.append(('nl', lang))
170 pdata.append(('fn', filename))
171 pdata.append(('fh', napisy24_hash(filename)))
172 pdata.append(('fs', os.path.getsize(filename)))
173 if digest:
174 pdata.append(('md5', digest))
175
176 repeat = 3
177 error = "Fetching subtitle (napisy24) failed:"
178 while repeat > 0:
179 repeat = repeat - 1
180 try:
181 sub = urllib2.urlopen(url, data=urllib.urlencode(pdata))
182 if hasattr(sub, 'getcode'):
183 http_code = sub.getcode()
184 sub = sub.read()
185 except (IOError, OSError), e:
186 error = error + " %s" % (e)
187 time.sleep(0.5)
188 continue
0578010f 189
2855f830
AM
190 if http_code != 200:
191 error = error + ",HTTP code: %s" % (str(http_code))
192 time.sleep(0.5)
193 continue
194
195 err_add = ''
196 if sub.startswith('OK-2|'):
197 pos = sub.find('||')
198 if pos >= 2 and len(sub) > (pos + 2):
199 sub = sub[pos+2:]
200
201 try:
202 subzip=zipfile.ZipFile(StringIO.StringIO(sub))
203 sub=''
204 for name in subzip.namelist():
205 sub += subzip.read(name)
206 except Exception, e:
207 raise Exception('Subtitle NOT FOUND%s' % e)
208 else:
209 raise Exception('Subtitle NOT FOUND (subtitle too short)')
210 elif sub.startswith('OK-'):
211 raise Exception('Subtitle NOT FOUND')
212 else:
213 raise Exception('Subtitle NOT FOUND (unknown error)')
214
215 repeat = 0
216
217 if sub is None or sub == "":
218 raise Exception(error)
219
220 return sub
221
222def get_subtitle_napiprojekt(digest, lang="PL"):
2f5b3e87 223 url = "http://napiprojekt.pl/unit_napisy/dl.php?l=%s&f=%s&t=%s&v=pynapi&kolejka=false&nick=&pass=&napios=%s" % \
2855f830 224 (lang, digest, napiprojekt_hash(digest), os.name)
0578010f 225 repeat = 3
226 sub = None
227 http_code = 200
2855f830 228 error = "Fetching subtitle (napiprojekt) failed:"
0578010f 229 while repeat > 0:
230 repeat = repeat - 1
231 try:
232 sub = urllib2.urlopen(url)
233 if hasattr(sub, 'getcode'):
234 http_code = sub.getcode()
235 sub = sub.read()
236 except (IOError, OSError), e:
2e5f9fd7 237 error = error + " %s" % (e)
0578010f 238 time.sleep(0.5)
239 continue
240
241 if http_code != 200:
2e5f9fd7 242 error = error + ",HTTP code: %s" % (str(http_code))
0578010f 243 time.sleep(0.5)
244 continue
cc30ce30
AM
245
246 err_add = ''
7d05389f 247 if not sub.startswith('NPc'):
cc30ce30 248 err_add = " (unknown error)"
33a62f95 249 if len(sub.split('\n')) < 20:
cc30ce30 250 raise Exception('Subtitle NOT FOUND%s' % err_add)
fc516ed9
AM
251
252 repeat = 0
0578010f 253
2e5f9fd7 254 if sub is None or sub == "":
fc516ed9
AM
255 raise Exception(error)
256
0578010f 257 return sub
258
505068dc
AM
259def main(argv=sys.argv):
260
261 try:
c3bca6c6 262 opts, args = getopt.getopt(argv[1:], "d:hl:nuc", ["dest", "help", "lang", "nobackup", "update", "nocover"])
505068dc
AM
263 except getopt.GetoptError, err:
264 print str(err)
1afc25b2 265 usage()
ac961430 266 return 2
505068dc
AM
267
268 output = None
269 verbose = False
270 nobackup = False
c3bca6c6 271 nocover = False
ac961430 272 update = False
505068dc 273 lang = 'pl'
25036fc8 274 dest = None
505068dc
AM
275 for o, a in opts:
276 if o == "-v":
277 verbose = True
278 elif o in ("-h", "--help"):
279 usage()
ac961430 280 return 0
505068dc
AM
281 elif o in ("-l", "--lang"):
282 if a in languages:
283 lang = a
284 else:
285 print >> sys.stderr, "%s: unsupported language `%s'. Supported languages: %s" % (prog, a, str(languages.keys()))
ac961430
AM
286 return 1
287 elif o in ("-n", "--nobackup"):
288 nobackup = True
289 elif o in ("-u", "--update"):
290 update = True
c3bca6c6
JR
291 elif o in ("-c", "--nocover"):
292 nocover = True
25036fc8
AM
293 elif o in ("-d", "--dest"):
294 dest = a
505068dc
AM
295 else:
296 print >> sys.stderr, "%s: unhandled option" % prog
ac961430 297 return 1
505068dc 298
617c0183
AM
299 if not args:
300 usage()
301 return 2
302
505068dc
AM
303 print >> sys.stderr, "%s: Subtitles language `%s'. Finding video files..." % (prog, lang)
304
03a8e2fc
AM
305 socket.setdefaulttimeout(180)
306
505068dc
AM
307 files = []
308 for arg in args:
309 if os.path.isdir(arg):
310 for dirpath, dirnames, filenames in os.walk(arg, topdown=False):
311 for file in filenames:
312 if file[-4:-3] == '.' and file.lower()[-3:] in video_files:
313 files.append(os.path.join(dirpath, file))
036ce345 314 else:
505068dc
AM
315 files.append(arg)
316
317 files.sort()
318
319 i_total = len(files)
320 i = 0
321
322 for file in files:
323 i += 1
324
325 vfile = file + '.txt'
8a53d3e2 326 basefile = file
505068dc 327 if len(file) > 4:
8a53d3e2
AM
328 basefile = file[:-4]
329 vfile = basefile + '.txt'
25036fc8
AM
330 if dest:
331 vfile = os.path.join(dest, os.path.split(vfile)[1])
505068dc 332
ac961430
AM
333 if not update and os.path.exists(vfile):
334 continue
335
505068dc
AM
336 if not nobackup and os.path.exists(vfile):
337 vfile_bak = vfile + '-bak'
338 try:
339 os.rename(vfile, vfile_bak)
340 except (IOError, OSError), e:
ac961430 341 print >> sys.stderr, "%s: Skipping due to backup of `%s' as `%s' failure: %s" % (prog, vfile, vfile_bak, e)
505068dc 342 continue
ac961430
AM
343 else:
344 print >> sys.stderr, "%s: Old subtitle backed up as `%s'" % (prog, vfile_bak)
505068dc
AM
345
346 print >> sys.stderr, "%s: %d/%d: Processing subtitle for %s" % (prog, i, i_total, file)
347
fc516ed9
AM
348 try:
349 digest = calculate_digest(file)
fc516ed9
AM
350 except:
351 print >> sys.stderr, "%s: %d/%d: %s" % (prog, i, i_total, sys.exc_info()[1])
352 continue
2855f830
AM
353
354 try:
355 raise
356 sub = get_subtitle_napiprojekt(digest, languages[lang])
357 except:
358 try:
359 sub = get_subtitle_napisy24(file, digest, lang)
360 except:
361 print >> sys.stderr, "%s: %d/%d: %s" % (prog, i, i_total, sys.exc_info()[1])
362 continue
029269ee 363
6357a847 364 fp = open(vfile, 'wb')
95317845 365 fp.write(sub)
505068dc 366 fp.close()
fc516ed9 367
0578010f 368 desc = get_desc_links(digest, file)
4ea7498e 369 if desc:
76ddf3ca
AM
370 print >> sys.stderr, "%s: %d/%d: Description: " % (prog, i, i_total)
371 for desc_i in desc:
372 print >> sys.stderr, "\t\t%s" % desc_i
fc516ed9 373
8a53d3e2 374 cover_stored = ""
c3bca6c6
JR
375 if not nocover:
376 cover_data = get_cover(digest)
377 if cover_data:
378 cover, extension = cover_data
379 fp = open(basefile + extension, 'wb')
380 fp.write(cover)
381 fp.close()
382 cover_stored = ", %s COVER STORED (%d bytes)" % (extension, len(cover))
8a53d3e2 383
95317845 384 print >> sys.stderr, "%s: %d/%d: SUBTITLE STORED (%d bytes)%s" % (prog, i, i_total, len(sub), cover_stored)
505068dc
AM
385
386 return 0
57811e09 387
505068dc
AM
388if __name__ == "__main__":
389 ret = None
029269ee 390 try:
ac961430 391 ret = main()
505068dc 392 except (KeyboardInterrupt, SystemExit):
ac961430 393 print >> sys.stderr, "%s: Interrupted, aborting." % prog
505068dc 394 sys.exit(ret)
This page took 0.130666 seconds and 4 git commands to generate.