pynapi.py

   1 #!/usr/bin/python
   2 # -*- coding: UTF-8 -*-
   3 #
   4 #  Copyright (C) 2009 Arkadiusz Miśkiewicz <arekm@pld-linux.org>
   5 #
   6 #  This program is free software: you can redistribute it and/or modify
   7 #  it under the terms of the GNU General Public License as published by
   8 #  the Free Software Foundation, either version 3 of the License, or
   9 #  (at your option) any later version.
  10 #
  11 #  This program is distributed in the hope that it will be useful,
  12 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 #  GNU General Public License for more details.
  15 #
  16 #  You should have received a copy of the GNU General Public License
  17 #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
  18 #
  19 # napiprojekt.pl API is used with napiproject administration consent
  20 # (given by Marek <kontakt@napiprojekt.pl> at Wed, 24 Feb 2010 14:43:00 +0100)
  21
  22 import re
  23 import sys
  24 import mimetypes
  25 import urllib2
  26 import time
  27 import os
  28 import getopt
  29 import socket
  30
  31 try:
  32     from hashlib import md5 as md5
  33 except ImportError:
  34     from md5 import md5
  35
  36 prog = os.path.basename(sys.argv[0])
  37
  38 video_files = [ 'asf', 'avi', 'divx', 'm2ts', 'mkv', 'mp4', 'mpeg', 'mpg', 'ogm', 'rm', 'rmvb', 'wmv' ]
  39 languages = { 'pl': 'PL', 'en': 'ENG' }
  40
  41 def f(z):
  42     idx = [ 0xe, 0x3,  0x6, 0x8, 0x2 ]
  43     mul = [   2,   2,    5,   4,   3 ]
  44     add = [   0, 0xd, 0x10, 0xb, 0x5 ]
  45
  46     b = []
  47     for i in xrange(len(idx)):
  48         a = add[i]
  49         m = mul[i]
  50         i = idx[i]
  51
  52         t = a + int(z[i], 16)
  53         v = int(z[t:t+2], 16)
  54         b.append( ("%x" % (v*m))[-1] )
  55
  56     return ''.join(b)
  57
  58 def usage():
  59     print >> sys.stderr, "Usage: %s [OPTIONS]... [FILE|DIR]..." % prog
  60     print >> sys.stderr, "Find video files and download matching subtitles from napiprojekt server."
  61     print >> sys.stderr
  62     print >> sys.stderr, "Supported options:"
  63     print >> sys.stderr, "     -h, --help            display this help and exit"
  64     print >> sys.stderr, "     -l, --lang=LANG       subtitles language"
  65     print >> sys.stderr, "     -n, --nobackup        make no subtitle backup when in update mode"
  66     print >> sys.stderr, "     -c, --nocover         do not download cover images"
  67     print >> sys.stderr, "     -u, --update          fetch new and also update existing subtitles"
  68     print >> sys.stderr, "     -d, --dest=DIR        destination directory"
  69     print >> sys.stderr
  70     print >> sys.stderr, "pynapi $Revision$"
  71     print >> sys.stderr
  72     print >> sys.stderr, "Report bugs to <arekm@pld-linux.org>."
  73
  74 def get_desc_links(digest, file=None):
  75     # improve me
  76     re_link = re.compile(r'<a.*?href=[\'"](http://.*?)[ >\'"]', re.IGNORECASE)
  77     d = ""
  78
  79     try:
  80         url = "http://www.napiprojekt.pl/index.php3?www=opis.php3&id=%s&film=%s" % (urllib2.quote(digest), urllib2.quote(file))
  81         f = urllib2.urlopen(url)
  82         d = f.read()
  83         f.close()
  84     except Exception, e:
  85         return False
  86     links = re_link.findall(d)
  87     ignore = [ r'.*dobreprogramy\.pl', r'.*napiprojekt\.pl.*', r'.*nokaut\.pl.*', r'.*rodisite\.com.*' ]
  88     for i in range(0, len(ignore)):
  89         ignore[i] = re.compile(ignore[i], re.IGNORECASE)
  90     ilinks = links[:]
  91     for l in ilinks:
  92         # main pages are useless
  93         if l.count('/') < 3:
  94             links.remove(l)
  95             continue
  96         # blacklisted sites
  97         for i in ignore:
  98             if i.match(l):
  99                 links.remove(l)
 100     return links
 101
 102 def get_cover(digest):
 103     cover = ""
 104     try:
 105         url = "http://www.napiprojekt.pl/okladka_pobierz.php?id=%s&oceny=-1" % (urllib2.quote(digest))
 106         f = urllib2.urlopen(url)
 107         cover = f.read()
 108         f.close()
 109         content_type = f.info()['Content-Type']
 110         extension = mimetypes.guess_all_extensions(content_type)[-1]
 111     except Exception, e:
 112         return False
 113     return (cover, extension)
 114
 115 def calculate_digest(file):
 116     d = md5()
 117     try:
 118         d.update(open(file, "rb").read(10485760))
 119     except (IOError, OSError), e:
 120         raise Exception('Hashing video file failed: %s' % ( e ))
 121     return d.hexdigest()
 122
 123 def get_subtitle(digest, lang="PL"):
 124     url = "http://napiprojekt.pl/unit_napisy/dl.php?l=%s&f=%s&t=%s&v=pynapi&kolejka=false&nick=&pass=&napios=%s" % \
 125         (lang, digest, f(digest), os.name)
 126     repeat = 3
 127     sub = None
 128     http_code = 200
 129     error = "Fetching subtitle failed:"
 130     while repeat > 0:
 131         repeat = repeat - 1
 132         try:
 133             sub = urllib2.urlopen(url)
 134             if hasattr(sub, 'getcode'):
 135                 http_code = sub.getcode()
 136             sub = sub.read()
 137         except (IOError, OSError), e:
 138             error = error + " %s" % (e)
 139             time.sleep(0.5)
 140             continue
 141
 142         if http_code != 200:
 143             error = error + ",HTTP code: %s" % (str(http_code))
 144             time.sleep(0.5)
 145             continue
 146
 147         err_add = ''
 148         if not sub.startswith('NPc'):
 149             err_add = " (unknown error)"
 150         if len(sub.split('\n')) < 20:
 151             raise Exception('Subtitle NOT FOUND%s' % err_add)
 152
 153         repeat = 0
 154
 155     if sub is None or sub == "":
 156         raise Exception(error)
 157
 158     return sub
 159
 160 def main(argv=sys.argv):
 161
 162     try:
 163         opts, args = getopt.getopt(argv[1:], "d:hl:nuc", ["dest", "help", "lang", "nobackup", "update", "nocover"])
 164     except getopt.GetoptError, err:
 165         print str(err)
 166         usage()
 167         return 2
 168
 169     output = None
 170     verbose = False
 171     nobackup = False
 172     nocover = False
 173     update = False
 174     lang = 'pl'
 175     dest = None
 176     for o, a in opts:
 177         if o == "-v":
 178             verbose = True
 179         elif o in ("-h", "--help"):
 180             usage()
 181             return 0
 182         elif o in ("-l", "--lang"):
 183             if a in languages:
 184                 lang = a
 185             else:
 186                 print >> sys.stderr, "%s: unsupported language `%s'. Supported languages: %s" % (prog, a, str(languages.keys()))
 187                 return 1
 188         elif o in ("-n", "--nobackup"):
 189             nobackup = True
 190         elif o in ("-u", "--update"):
 191             update = True
 192         elif o in ("-c", "--nocover"):
 193             nocover = True
 194         elif o in ("-d", "--dest"):
 195             dest = a
 196         else:
 197             print >> sys.stderr, "%s: unhandled option" % prog
 198             return 1
 199
 200     if not args:
 201         usage()
 202         return 2
 203
 204     print >> sys.stderr, "%s: Subtitles language `%s'. Finding video files..." % (prog, lang)
 205
 206     socket.setdefaulttimeout(180)
 207
 208     files = []
 209     for arg in args:
 210         if os.path.isdir(arg):
 211             for dirpath, dirnames, filenames in os.walk(arg, topdown=False):
 212                 for file in filenames:
 213                     if file[-4:-3] == '.' and file.lower()[-3:] in video_files:
 214                         files.append(os.path.join(dirpath, file))
 215         else:
 216             files.append(arg)
 217
 218     files.sort()
 219
 220     i_total = len(files)
 221     i = 0
 222
 223     for file in files:
 224         i += 1
 225
 226         vfile = file + '.txt'
 227         basefile = file
 228         if len(file) > 4:
 229             basefile = file[:-4]
 230             vfile = basefile + '.txt'
 231         if dest:
 232             vfile = os.path.join(dest, os.path.split(vfile)[1])
 233
 234         if not update and os.path.exists(vfile):
 235             continue
 236
 237         if not nobackup and os.path.exists(vfile):
 238             vfile_bak = vfile + '-bak'
 239             try:
 240                 os.rename(vfile, vfile_bak)
 241             except (IOError, OSError), e:
 242                 print >> sys.stderr, "%s: Skipping due to backup of `%s' as `%s' failure: %s" % (prog, vfile, vfile_bak, e)
 243                 continue
 244             else:
 245                 print >> sys.stderr, "%s: Old subtitle backed up as `%s'" % (prog, vfile_bak)
 246
 247         print >> sys.stderr, "%s: %d/%d: Processing subtitle for %s" % (prog, i, i_total, file)
 248
 249         try:
 250             digest = calculate_digest(file)
 251             sub = get_subtitle(digest, languages[lang])
 252         except:
 253             print >> sys.stderr, "%s: %d/%d: %s" % (prog, i, i_total, sys.exc_info()[1])
 254             continue
 255
 256         fp = open(vfile, 'wb')
 257         fp.write(sub)
 258         fp.close()
 259
 260         desc = get_desc_links(digest, file)
 261         if desc:
 262             print >> sys.stderr, "%s: %d/%d: Description: " % (prog, i, i_total)
 263             for desc_i in desc:
 264                 print >> sys.stderr, "\t\t%s" % desc_i
 265
 266         cover_stored = ""
 267         if not nocover:
 268             cover_data = get_cover(digest)
 269             if cover_data:
 270                 cover, extension = cover_data
 271                 fp = open(basefile + extension, 'wb')
 272                 fp.write(cover)
 273                 fp.close()
 274                 cover_stored = ", %s COVER STORED (%d bytes)" % (extension, len(cover))
 275
 276         print >> sys.stderr, "%s: %d/%d: SUBTITLE STORED (%d bytes)%s" % (prog, i, i_total, len(sub), cover_stored)
 277
 278     return 0
 279
 280 if __name__ == "__main__":
 281     ret = None
 282     try:
 283         ret = main()
 284     except (KeyboardInterrupt, SystemExit):
 285         print >> sys.stderr, "%s: Interrupted, aborting." % prog
 286     sys.exit(ret)