]>
Commit | Line | Data |
---|---|---|
57811e09 AM |
1 | #!/usr/bin/python |
2 | # -*- coding: UTF-8 -*- | |
036ce345 AM |
3 | # |
4 | # Copyright (C) 2009 Arkadiusz Miśkiewicz <arekm@pld-linux.org> | |
5 | # | |
6 | # This program is free software: you can redistribute it and/or modify | |
7 | # it under the terms of the GNU General Public License as published by | |
8 | # the Free Software Foundation, either version 3 of the License, or | |
9 | # (at your option) any later version. | |
10 | # | |
11 | # This program is distributed in the hope that it will be useful, | |
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | # GNU General Public License for more details. | |
15 | # | |
16 | # You should have received a copy of the GNU General Public License | |
17 | # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
ae92e79c | 18 | # |
2f5b3e87 AM |
19 | # napiprojekt.pl API is used with napiproject administration consent |
20 | # (given by Marek <kontakt@napiprojekt.pl> at Wed, 24 Feb 2010 14:43:00 +0100) | |
57811e09 | 21 | |
4ea7498e | 22 | import re |
57811e09 | 23 | import sys |
8a53d3e2 AM |
24 | import mimetypes |
25 | import urllib2 | |
da0dfee4 | 26 | import time |
57811e09 | 27 | import os |
1afc25b2 | 28 | import getopt |
57811e09 | 29 | |
a5884ecd AM |
30 | try: |
31 | from hashlib import md5 as md5 | |
32 | except ImportError: | |
33 | from md5 import md5 | |
34 | ||
57811e09 AM |
35 | prog = os.path.basename(sys.argv[0]) |
36 | ||
c906a435 | 37 | video_files = [ 'asf', 'avi', 'divx', 'm2ts', 'mkv', 'mp4', 'mpeg', 'mpg', 'ogm', 'rm', 'rmvb', 'wmv' ] |
036ce345 | 38 | languages = { 'pl': 'PL', 'en': 'ENG' } |
78cca783 | 39 | |
57811e09 | 40 | def f(z): |
505068dc AM |
41 | idx = [ 0xe, 0x3, 0x6, 0x8, 0x2 ] |
42 | mul = [ 2, 2, 5, 4, 3 ] | |
43 | add = [ 0, 0xd, 0x10, 0xb, 0x5 ] | |
57811e09 | 44 | |
505068dc AM |
45 | b = [] |
46 | for i in xrange(len(idx)): | |
47 | a = add[i] | |
48 | m = mul[i] | |
49 | i = idx[i] | |
57811e09 | 50 | |
505068dc AM |
51 | t = a + int(z[i], 16) |
52 | v = int(z[t:t+2], 16) | |
53 | b.append( ("%x" % (v*m))[-1] ) | |
57811e09 | 54 | |
505068dc | 55 | return ''.join(b) |
57811e09 | 56 | |
1afc25b2 | 57 | def usage(): |
ac961430 AM |
58 | print >> sys.stderr, "Usage: %s [OPTIONS]... [FILE|DIR]..." % prog |
59 | print >> sys.stderr, "Find video files and download matching subtitles from napiprojekt server." | |
60 | print >> sys.stderr | |
61 | print >> sys.stderr, "Supported options:" | |
62 | print >> sys.stderr, " -h, --help display this help and exit" | |
63 | print >> sys.stderr, " -l, --lang=LANG subtitles language" | |
8a53d3e2 | 64 | print >> sys.stderr, " -n, --nobackup make no subtitle backup when in update mode" |
3e98b779 | 65 | print >> sys.stderr, " -c, --nocover do not download cover images" |
ac961430 | 66 | print >> sys.stderr, " -u, --update fetch new and also update existing subtitles" |
25036fc8 | 67 | print >> sys.stderr, " -d, --dest=DIR destination directory" |
ac961430 | 68 | print >> sys.stderr |
50529db1 | 69 | print >> sys.stderr, "pynapi $Revision$" |
ac961430 | 70 | print >> sys.stderr |
50529db1 | 71 | print >> sys.stderr, "Report bugs to <arekm@pld-linux.org>." |
1afc25b2 | 72 | |
4ea7498e AM |
73 | def get_desc_links(digest, file=None): |
74 | # improve me | |
16c95940 | 75 | re_link = re.compile(r'<a.*?href=[\'"](http://.*?)[ >\'"]', re.IGNORECASE) |
4ea7498e AM |
76 | d = "" |
77 | ||
78 | try: | |
8a53d3e2 AM |
79 | url = "http://www.napiprojekt.pl/index.php3?www=opis.php3&id=%s&film=%s" % (urllib2.quote(digest), urllib2.quote(file)) |
80 | f = urllib2.urlopen(url) | |
4ea7498e AM |
81 | d = f.read() |
82 | f.close() | |
83 | except Exception, e: | |
84 | return False | |
16c95940 AM |
85 | links = re_link.findall(d) |
86 | ignore = [ r'.*napiprojekt\.pl.*', r'.*nokaut\.pl.*', r'.*rodisite\.com.*' ] | |
87 | for i in range(0, len(ignore)): | |
88 | ignore[i] = re.compile(ignore[i], re.IGNORECASE) | |
89 | ilinks = links[:] | |
90 | for l in ilinks: | |
91 | for i in ignore: | |
92 | if i.match(l): | |
93 | links.remove(l) | |
94 | return links | |
4ea7498e | 95 | |
8a53d3e2 AM |
96 | def get_cover(digest): |
97 | cover = "" | |
98 | try: | |
99 | url = "http://www.napiprojekt.pl/okladka_pobierz.php?id=%s&oceny=-1" % (urllib2.quote(digest)) | |
100 | f = urllib2.urlopen(url) | |
101 | cover = f.read() | |
102 | f.close() | |
103 | content_type = f.info()['Content-Type'] | |
104 | extension = mimetypes.guess_all_extensions(content_type)[-1] | |
105 | except Exception, e: | |
106 | return False | |
107 | return (cover, extension) | |
4ea7498e | 108 | |
0578010f | 109 | def calculate_digest(file): |
110 | d = md5() | |
111 | try: | |
226ff601 | 112 | d.update(open(file, "rb").read(10485760)) |
0578010f | 113 | except (IOError, OSError), e: |
2e5f9fd7 | 114 | raise Exception('Hashing video file failed: %s' % ( e )) |
0578010f | 115 | return d.hexdigest() |
116 | ||
117 | def get_subtitle(digest, lang="PL"): | |
2f5b3e87 | 118 | url = "http://napiprojekt.pl/unit_napisy/dl.php?l=%s&f=%s&t=%s&v=pynapi&kolejka=false&nick=&pass=&napios=%s" % \ |
0578010f | 119 | (lang, digest, f(digest), os.name) |
120 | repeat = 3 | |
121 | sub = None | |
122 | http_code = 200 | |
2e5f9fd7 | 123 | error = "Fetching subtitle failed:" |
0578010f | 124 | while repeat > 0: |
125 | repeat = repeat - 1 | |
126 | try: | |
127 | sub = urllib2.urlopen(url) | |
128 | if hasattr(sub, 'getcode'): | |
129 | http_code = sub.getcode() | |
130 | sub = sub.read() | |
131 | except (IOError, OSError), e: | |
2e5f9fd7 | 132 | error = error + " %s" % (e) |
0578010f | 133 | time.sleep(0.5) |
134 | continue | |
135 | ||
136 | if http_code != 200: | |
2e5f9fd7 | 137 | error = error + ",HTTP code: %s" % (str(http_code)) |
0578010f | 138 | time.sleep(0.5) |
139 | continue | |
cc30ce30 AM |
140 | |
141 | err_add = '' | |
7d05389f | 142 | if not sub.startswith('NPc'): |
cc30ce30 | 143 | err_add = " (unknown error)" |
33a62f95 | 144 | if len(sub.split('\n')) < 20: |
cc30ce30 | 145 | raise Exception('Subtitle NOT FOUND%s' % err_add) |
fc516ed9 AM |
146 | |
147 | repeat = 0 | |
0578010f | 148 | |
2e5f9fd7 | 149 | if sub is None or sub == "": |
fc516ed9 AM |
150 | raise Exception(error) |
151 | ||
0578010f | 152 | return sub |
153 | ||
505068dc AM |
154 | def main(argv=sys.argv): |
155 | ||
156 | try: | |
c3bca6c6 | 157 | opts, args = getopt.getopt(argv[1:], "d:hl:nuc", ["dest", "help", "lang", "nobackup", "update", "nocover"]) |
505068dc AM |
158 | except getopt.GetoptError, err: |
159 | print str(err) | |
1afc25b2 | 160 | usage() |
ac961430 | 161 | return 2 |
505068dc AM |
162 | |
163 | output = None | |
164 | verbose = False | |
165 | nobackup = False | |
c3bca6c6 | 166 | nocover = False |
ac961430 | 167 | update = False |
505068dc | 168 | lang = 'pl' |
25036fc8 | 169 | dest = None |
505068dc AM |
170 | for o, a in opts: |
171 | if o == "-v": | |
172 | verbose = True | |
173 | elif o in ("-h", "--help"): | |
174 | usage() | |
ac961430 | 175 | return 0 |
505068dc AM |
176 | elif o in ("-l", "--lang"): |
177 | if a in languages: | |
178 | lang = a | |
179 | else: | |
180 | print >> sys.stderr, "%s: unsupported language `%s'. Supported languages: %s" % (prog, a, str(languages.keys())) | |
ac961430 AM |
181 | return 1 |
182 | elif o in ("-n", "--nobackup"): | |
183 | nobackup = True | |
184 | elif o in ("-u", "--update"): | |
185 | update = True | |
c3bca6c6 JR |
186 | elif o in ("-c", "--nocover"): |
187 | nocover = True | |
25036fc8 AM |
188 | elif o in ("-d", "--dest"): |
189 | dest = a | |
505068dc AM |
190 | else: |
191 | print >> sys.stderr, "%s: unhandled option" % prog | |
ac961430 | 192 | return 1 |
505068dc | 193 | |
617c0183 AM |
194 | if not args: |
195 | usage() | |
196 | return 2 | |
197 | ||
505068dc AM |
198 | print >> sys.stderr, "%s: Subtitles language `%s'. Finding video files..." % (prog, lang) |
199 | ||
200 | files = [] | |
201 | for arg in args: | |
202 | if os.path.isdir(arg): | |
203 | for dirpath, dirnames, filenames in os.walk(arg, topdown=False): | |
204 | for file in filenames: | |
205 | if file[-4:-3] == '.' and file.lower()[-3:] in video_files: | |
206 | files.append(os.path.join(dirpath, file)) | |
036ce345 | 207 | else: |
505068dc AM |
208 | files.append(arg) |
209 | ||
210 | files.sort() | |
211 | ||
212 | i_total = len(files) | |
213 | i = 0 | |
214 | ||
215 | for file in files: | |
216 | i += 1 | |
217 | ||
218 | vfile = file + '.txt' | |
8a53d3e2 | 219 | basefile = file |
505068dc | 220 | if len(file) > 4: |
8a53d3e2 AM |
221 | basefile = file[:-4] |
222 | vfile = basefile + '.txt' | |
25036fc8 AM |
223 | if dest: |
224 | vfile = os.path.join(dest, os.path.split(vfile)[1]) | |
505068dc | 225 | |
ac961430 AM |
226 | if not update and os.path.exists(vfile): |
227 | continue | |
228 | ||
505068dc AM |
229 | if not nobackup and os.path.exists(vfile): |
230 | vfile_bak = vfile + '-bak' | |
231 | try: | |
232 | os.rename(vfile, vfile_bak) | |
233 | except (IOError, OSError), e: | |
ac961430 | 234 | print >> sys.stderr, "%s: Skipping due to backup of `%s' as `%s' failure: %s" % (prog, vfile, vfile_bak, e) |
505068dc | 235 | continue |
ac961430 AM |
236 | else: |
237 | print >> sys.stderr, "%s: Old subtitle backed up as `%s'" % (prog, vfile_bak) | |
505068dc AM |
238 | |
239 | print >> sys.stderr, "%s: %d/%d: Processing subtitle for %s" % (prog, i, i_total, file) | |
240 | ||
fc516ed9 AM |
241 | try: |
242 | digest = calculate_digest(file) | |
243 | sub = get_subtitle(digest, languages[lang]) | |
244 | except: | |
245 | print >> sys.stderr, "%s: %d/%d: %s" % (prog, i, i_total, sys.exc_info()[1]) | |
246 | continue | |
029269ee | 247 | |
6357a847 | 248 | fp = open(vfile, 'wb') |
95317845 | 249 | fp.write(sub) |
505068dc | 250 | fp.close() |
fc516ed9 | 251 | |
0578010f | 252 | desc = get_desc_links(digest, file) |
4ea7498e | 253 | if desc: |
76ddf3ca AM |
254 | print >> sys.stderr, "%s: %d/%d: Description: " % (prog, i, i_total) |
255 | for desc_i in desc: | |
256 | print >> sys.stderr, "\t\t%s" % desc_i | |
fc516ed9 | 257 | |
8a53d3e2 | 258 | cover_stored = "" |
c3bca6c6 JR |
259 | if not nocover: |
260 | cover_data = get_cover(digest) | |
261 | if cover_data: | |
262 | cover, extension = cover_data | |
263 | fp = open(basefile + extension, 'wb') | |
264 | fp.write(cover) | |
265 | fp.close() | |
266 | cover_stored = ", %s COVER STORED (%d bytes)" % (extension, len(cover)) | |
8a53d3e2 | 267 | |
95317845 | 268 | print >> sys.stderr, "%s: %d/%d: SUBTITLE STORED (%d bytes)%s" % (prog, i, i_total, len(sub), cover_stored) |
505068dc AM |
269 | |
270 | return 0 | |
57811e09 | 271 | |
505068dc AM |
272 | if __name__ == "__main__": |
273 | ret = None | |
029269ee | 274 | try: |
ac961430 | 275 | ret = main() |
505068dc | 276 | except (KeyboardInterrupt, SystemExit): |
ac961430 | 277 | print >> sys.stderr, "%s: Interrupted, aborting." % prog |
505068dc | 278 | sys.exit(ret) |