[packages/youtube-dl.git] / 10291.diff

commit ea80d6ff9bc9fc8c35ba0795126cc55e2fa81a21
Author: zmobbie <ottoxas@hotmail.com>
Date:   Thu Aug 11 06:07:45 2016 +0300

    Kanal2 Add new extractor

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 387230be0..9c9d0f829 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -491,6 +491,7 @@ from .jpopsukitv import JpopsukiIE
 from .kakao import KakaoIE
 from .kaltura import KalturaIE
 from .kanalplay import KanalPlayIE
+from .kanal2 import Kanal2IE
 from .kankan import KankanIE
 from .karaoketv import KaraoketvIE
 from .karrierevideos import KarriereVideosIE
diff --git a/youtube_dl/extractor/kanal2.py b/youtube_dl/extractor/kanal2.py
new file mode 100644
index 000000000..97ce8b5ea
--- /dev/null
+++ b/youtube_dl/extractor/kanal2.py
@@ -0,0 +1,160 @@
+from __future__ import unicode_literals
+from datetime import (
+    datetime,
+    timedelta,
+)
+
+from .common import InfoExtractor
+from ..utils import (
+    update_url_query,
+    xpath_text,
+    str_to_int,
+    int_or_none,
+    HEADRequest,
+)
+
+import re
+
+
+class Kanal2IE(InfoExtractor):
+    _VALID_URL = r'(?P<base>.+\.postimees\.ee)[a-zA-Z0-9\/._-]+\?[a-zA-Z0-9=&._-]*id=(?P<id>[0-9]+)[^ ]*'
+    _TESTS = [{
+        # The most ordinary case
+        'url': 'http://kanal2.postimees.ee/pluss/video/?id=40792',
+        'md5': '173e29daea5f5fab49390bddd78aaaf0',
+        'info_dict': {
+            'id': '40792',
+            'ext': 'flv',
+            'title': 'Aedniku aabits (06.08.2016 10:30)',
+            'thumbnail': 'http://kanal2.postimees.ee/imagecache/http_img.cdn.mind.ee/kanal2//14/100/00033/0053_4468c974c1010a21817c1ee37f3e7902.jpeg',
+            'description': 'Aedniku aabits" on saade, mis pakub kaasaelamist ja teadmisi nii algajatele, kui juba kogenud rohenäppudele. Kõik alates vajalikest näpunäidetest, nutikatest lahendustest, uudistoodetest kuni taimede hingeeluni ning aias kasutatava tehnikani välja.',
+            'upload_date': '20160805',
+            'timestamp': 1470434400,
+        }
+    }, {
+        # Embed player, also needs login in reality but all the streams are accessable without logging in
+        'url': 'http://kanal2.postimees.ee/video/lonelyPlayer?videoid=28848',
+        'md5': '18edb2fd235c06a60b81b3590a357ace',
+        'info_dict': {
+            'id': '28848',
+            'ext': 'flv',
+            'title': 'Viimane võmm - Rita, ära jama (24.11.2015 21:30)',
+            'thumbnail': 'http://kanal2.postimees.ee/imagecache/http_img.cdn.mind.ee/kanal2//14/100/00002/0050_4468c974c1010a21817c1ee37f3e7902.jpeg',
+            'description': 'Kinnisvaraomanik Villem Meius leitakse oma korterist tapetuna. Turvakaamera video paljastab surnukeha kõrvalt lahkumas ühe Meiuse üürniku - ei kellegi muu, kui politseinike kaitseingli Rita! Rita võetakse vahi alla ning kogu jaoskond näeb vaeva selle nimel, et teda vabastada ning tema kinniistumise ajal Rita baari käigus hoida. Uurimise käigus paljastub ulatuslik ja häbitu kinnisvarahangeldamine Kalamajas, mille niidid ulatuvad ka justiitsmaailma ladvikusse. Vastasleeri moodustavad Kalamaja põliselanikud. Organisatsiooni peakorter asub kellegi Mort Pärgi matusebüroos. Sealt hakkabki asi lõpuks hargnema.'
+        }
+    }, {
+        # Other ordinary case
+        'url': 'http://kanal2.postimees.ee/pluss/preview?id=40744',
+        'md5': '2579cdbf16013d7e7a7361a832bc818e',
+        'info_dict': {
+            'id': '40744',
+            'ext': 'flv',
+            'title': 'Kaunis Dila (10.08.2016 19:00)',
+            'thumbnail': 'http://kanal2.postimees.ee/imagecache/http_img.cdn.mind.ee/kanal2//16/300/00208/0050_4468c974c1010a21817c1ee37f3e7902.jpeg',
+        }
+    }, {
+        # Not on kanal2 subdomain like others, the site has different layout, so a lot of data can't be accessed, but the api's are same. also has rating
+        'url': 'http://kanal12.postimees.ee/vaatasaateid/Punkri-joulueri?videoid=248',
+        'md5': '4633c310980201e4d8195d22b948ad10',
+        'info_dict': {
+            'id': '248',
+            'ext': 'flv',
+            'title': 'Punkri jõulueri',
+            'thumbnail': 'http://img.cdn.mind.ee/kanal2/clips/KANAL 12/punkri joulueri.jpeg',
+            'description': 'Eestlaste lemmik-krõbesaade lõpetab aasta loodetavasti südamliku pühade-eriga! Hapukapsad ninast välja! Jeesuse sündi on tulnud tähistama Ivo Linna, pastor, saatan ja paljud teised. Saadet juhivad Marge Tava, Aleksander Ots ja Marek Reinaas.',
+            'average_rating': int,
+        }
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        # base url, e.g. kanal2.postimees.ee (in chrome, the black part of the address)
+        base = re.compile(self._VALID_URL).match(url).group('base')
+
+        # Acquire the video's address, where we can search for website data(needed in case of embed player)
+        if "pluss" not in url and "kanal2" in base:
+            # Generic url for all the kanal2 videos, may redirect
+            url = base + '/pluss/video/?id=' + video_id
+            # This part copied from generic.py, bypasses redirects
+            head_response = self._request_webpage(HEADRequest(url), video_id)
+            if head_response is not False:
+                new_url = head_response.geturl()
+                if url != new_url:
+                    self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
+                return self.url_result(new_url)
+                # copied until here
+
+        xmlfile = self._download_xml(update_url_query(base + '/video/playerPlaylistApi', {'id': video_id}), video_id)
+
+        # Remove stacked urls(e.g. http://test.comhttp://test2.com, removes everything before second http)
+        thumbnail = re.compile('[^\0]*(?P<realurl>https?:\/\/[^"]+)[^\0]*').match(base + xpath_text(xmlfile, './playlist/video/thumbUrl')).group('realurl')
+        average_rating = int_or_none(xpath_text(xmlfile, './playlist/video/rating/value'))
+
+        # Determine, whether the stream is high or low quality and act accordingly
+        for stream in xmlfile.findall('./playlist/video/streamItems/streamItem'):
+            # Found low quality stream, but keep iterating streamItems in hope of finding hq stream
+            if "k2lq" in stream.get('streamName'):
+                streamname = stream.get('streamName')
+                width = str_to_int(stream.get('width'))
+                height = str_to_int(stream.get('height'))
+                continue
+            # Found high quality stream, looping no longer necessary
+            if "k2hq" in stream.get('streamName'):
+                streamname = stream.get('streamName')
+                width = str_to_int(stream.get('width'))
+                height = str_to_int(stream.get('height'))
+                break
+
+        webpage = self._download_webpage(url, video_id)
+        # Is the following info on website? if div player-container is present, info also is
+        if 'player-container' in webpage:
+            # Find description
+            description = self._search_regex(r'[^\0]*<p class="full"[^>]*>([^<]*)<\/p>[^\0]*', webpage, 'description', default=None)
+            if description is not None:
+                # Remove a lot of trailing spaces, that were added to get the text to be in the right place on webpage
+                description = description.strip()
+            # Episode and season
+            epandseason = self._search_regex('[^\0]*(Osa *[0-9]+ *Hooaeg *[0-9]+)[^\0]*', webpage, 'epandseason', default=None)
+            if epandseason is not None:
+                episode = int_or_none(re.compile('Osa *(?P<episode>[0-9]+) *Hooaeg *[0-9]+').match(epandseason).group('episode'))
+                season = int_or_none(re.compile('Osa *[0-9]+ *Hooaeg *(?P<season>[0-9]+)').match(epandseason).group('season'))
+            # Timestamp generation
+            dateandtime = self._search_regex('[^\0]*(eetris[^\0]*<\/span>[^\0]*[0-9]{1,2}.[0-9]{1,2}.[0-9]{4,}[^0-9]*[0-9]{1,2}:[0-9]{1,2})[^\0]*', webpage, 'dateandtime', default=None)
+            if dateandtime is not None:
+                date = re.compile('[^\0]*eetris[^\0]*<\/span>[^\0]*(?P<date>[0-9]{1,2}.[0-9]{1,2}.[0-9]{4,})[^0-9]*(?P<time>[0-9]{1,2}:[0-9]{1,2})[^\0]*').match(dateandtime).group('date')
+                time = re.compile('[^\0]*eetris[^\0]*<\/span>[^\0]*(?P<date>[0-9]{1,2}.[0-9]{1,2}.[0-9]{4,})[^0-9]*(?P<time>[0-9]{1,2}:[0-9]{1,2})[^\0]*').match(dateandtime).group('time')
+                timestamp = int_or_none((datetime.strptime(date + " " + time, '%d.%m.%Y %H:%M') - datetime(1970, 1, 1) + timedelta(seconds=60 * 60 * 2)).total_seconds())  # No dst support, but added the 2 default hours of estonia
+            player_url = self._search_regex('[^\0]embedSWF\("([^"]+)[^\0]', webpage, 'player_url', default=None)
+
+        # There are videos that can only be seen when logged in, so some data can't be accessed(but we can still download the video)
+        else:
+            # Try to get description from api(which is mostly empty result) or in other case from og meta tag.
+            description = xpath_text(xmlfile, './playlist/video/description') or self._search_regex('[^\0]og:description"[^\0]*content="(.*)\" \/>', webpage, 'description', default=None)
+            # Basic character parsing to turn character references into real characters. also remove excessive whitespace
+            if description is not None:
+                description = description.strip().replace("&otilde;", "õ").replace("&Otilde;", "Õ").replace("&auml;", "ä").replace("&Auml;", "Ä").replace("&ouml;", "ö").replace("&Ouml;", "Ö").replace("&uuml;", "ü").replace("&Uuml;", "Ü").replace("&amp;", "&")
+
+            player_url = None
+            episode = int_or_none(xpath_text(xmlfile, './playlist/video/episode')) or None
+            season = None  # Episode is mostly empty in the xml but season does not even appear there
+            timestamp = None
+        return {
+            'app': "kanal2vod",
+            'average_rating': average_rating,
+            'description': description,
+            'episode_number': episode,
+            'ext': "flv",
+            'height': height,
+            'id': video_id,
+            'page_url': url,
+            'player_url': player_url,
+            'play_path': "mp4:" + streamname,
+            'protocol': "rtmp",
+            'rtmp_real_time': True,
+            'season_number': season,
+            'timestamp': timestamp,
+            'title': xpath_text(xmlfile, './playlist/video/name'),
+            'thumbnail': thumbnail,
+            'url': xmlfile.find('./playlist/video/streamItems').get('host') + streamname,
+            'width': width,
+        }

commit 0817510dfba8a7de1b8e46f7755994510f82366e
Author: zmobbie <ottoxas@hotmail.com>
Date:   Fri Aug 12 00:55:34 2016 +0300

    Kanal2 Add new extractor

diff --git a/youtube_dl/extractor/kanal2.py b/youtube_dl/extractor/kanal2.py
index 97ce8b5ea..b42b3f7a2 100644
--- a/youtube_dl/extractor/kanal2.py
+++ b/youtube_dl/extractor/kanal2.py
@@ -11,13 +11,14 @@ from ..utils import (
     str_to_int,
     int_or_none,
     HEADRequest,
+    unescapeHTML,
 )
 
 import re
 
 
 class Kanal2IE(InfoExtractor):
-    _VALID_URL = r'(?P<base>.+\.postimees\.ee)[a-zA-Z0-9\/._-]+\?[a-zA-Z0-9=&._-]*id=(?P<id>[0-9]+)[^ ]*'
+    _VALID_URL = r'(?P<base>https?:\/\/.+\.postimees\.ee)[a-zA-Z0-9\/._-]+\?[a-zA-Z0-9=&._-]*id=(?P<id>[a-zA-Z0-9_-]+)[^ ]*'
     _TESTS = [{
         # The most ordinary case
         'url': 'http://kanal2.postimees.ee/pluss/video/?id=40792',
@@ -68,12 +69,8 @@ class Kanal2IE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        # base url, e.g. kanal2.postimees.ee (in chrome, the black part of the address)
         base = re.compile(self._VALID_URL).match(url).group('base')
-
-        # Acquire the video's address, where we can search for website data(needed in case of embed player)
         if "pluss" not in url and "kanal2" in base:
-            # Generic url for all the kanal2 videos, may redirect
             url = base + '/pluss/video/?id=' + video_id
             # This part copied from generic.py, bypasses redirects
             head_response = self._request_webpage(HEADRequest(url), video_id)
@@ -82,79 +79,73 @@ class Kanal2IE(InfoExtractor):
                 if url != new_url:
                     self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
                 return self.url_result(new_url)
-                # copied until here
 
         xmlfile = self._download_xml(update_url_query(base + '/video/playerPlaylistApi', {'id': video_id}), video_id)
+        host = xmlfile.find('./playlist/video/streamItems').get('host')
+
+        formats = [{
+            'protocol': re.compile('(?P<protocol>.+):\/\/[^\0]*').match(host).group('protocol') or 'rtmp',
+            'app': re.compile(((re.compile('(?P<protocol>.+):\/\/[^\0]*').match(host).group('protocol') or 'rtmp') + ':\/\/[^\0]*\/(?P<app>.+\/)')).match(host).group('app') or 'kanal2vod',
+            'url': host + stream.get('streamName'),
+            'play_path': 'mp4:' + stream.get('streamName'),
+            'ext': 'flv',
+            'height': str_to_int(stream.get('height')),
+            'width': str_to_int(stream.get('width')),
+            'rtmp_real_time': True,
+        } for stream in xmlfile.findall('./playlist/video/streamItems/streamItem')]
+        self._sort_formats(formats)
 
-        # Remove stacked urls(e.g. http://test.comhttp://test2.com, removes everything before second http)
+        # Remove stacked urls(e.g. http://test.comhttp://test2.com, removes everything before second http(kanal12 fix))
         thumbnail = re.compile('[^\0]*(?P<realurl>https?:\/\/[^"]+)[^\0]*').match(base + xpath_text(xmlfile, './playlist/video/thumbUrl')).group('realurl')
         average_rating = int_or_none(xpath_text(xmlfile, './playlist/video/rating/value'))
 
-        # Determine, whether the stream is high or low quality and act accordingly
-        for stream in xmlfile.findall('./playlist/video/streamItems/streamItem'):
-            # Found low quality stream, but keep iterating streamItems in hope of finding hq stream
-            if "k2lq" in stream.get('streamName'):
-                streamname = stream.get('streamName')
-                width = str_to_int(stream.get('width'))
-                height = str_to_int(stream.get('height'))
-                continue
-            # Found high quality stream, looping no longer necessary
-            if "k2hq" in stream.get('streamName'):
-                streamname = stream.get('streamName')
-                width = str_to_int(stream.get('width'))
-                height = str_to_int(stream.get('height'))
-                break
-
         webpage = self._download_webpage(url, video_id)
-        # Is the following info on website? if div player-container is present, info also is
         if 'player-container' in webpage:
-            # Find description
             description = self._search_regex(r'[^\0]*<p class="full"[^>]*>([^<]*)<\/p>[^\0]*', webpage, 'description', default=None)
             if description is not None:
-                # Remove a lot of trailing spaces, that were added to get the text to be in the right place on webpage
                 description = description.strip()
-            # Episode and season
-            epandseason = self._search_regex('[^\0]*(Osa *[0-9]+ *Hooaeg *[0-9]+)[^\0]*', webpage, 'epandseason', default=None)
-            if epandseason is not None:
-                episode = int_or_none(re.compile('Osa *(?P<episode>[0-9]+) *Hooaeg *[0-9]+').match(epandseason).group('episode'))
-                season = int_or_none(re.compile('Osa *[0-9]+ *Hooaeg *(?P<season>[0-9]+)').match(epandseason).group('season'))
-            # Timestamp generation
-            dateandtime = self._search_regex('[^\0]*(eetris[^\0]*<\/span>[^\0]*[0-9]{1,2}.[0-9]{1,2}.[0-9]{4,}[^0-9]*[0-9]{1,2}:[0-9]{1,2})[^\0]*', webpage, 'dateandtime', default=None)
-            if dateandtime is not None:
-                date = re.compile('[^\0]*eetris[^\0]*<\/span>[^\0]*(?P<date>[0-9]{1,2}.[0-9]{1,2}.[0-9]{4,})[^0-9]*(?P<time>[0-9]{1,2}:[0-9]{1,2})[^\0]*').match(dateandtime).group('date')
-                time = re.compile('[^\0]*eetris[^\0]*<\/span>[^\0]*(?P<date>[0-9]{1,2}.[0-9]{1,2}.[0-9]{4,})[^0-9]*(?P<time>[0-9]{1,2}:[0-9]{1,2})[^\0]*').match(dateandtime).group('time')
+
+            epandseasonregex = re.compile('Osa *(?P<episode>[0-9]+) *Hooaeg *(?P<season>[0-9]+)').match(self._search_regex('[^\0]*(Osa *[0-9]+ *Hooaeg *[0-9]+)[^\0]*', webpage, 'epandseason', default=None))
+            if epandseasonregex is not None:
+                episode = int_or_none(epandseasonregex.group('episode'))
+                season = int_or_none(epandseasonregex.group('season'))
+
+            dateandtimeregex = re.compile('[^\0]*eetris[^\0]*<\/span>[^\0]*(?P<date>[0-9]{1,2}.[0-9]{1,2}.[0-9]{4,})[^0-9]*(?P<time>[0-9]{1,2}:[0-9]{1,2})[^\0]*').match(self._search_regex('[^\0]*(eetris[^\0]*<\/span>[^\0]*[0-9]{1,2}.[0-9]{1,2}.[0-9]{4,}[^0-9]*[0-9]{1,2}:[0-9]{1,2})[^\0]*', webpage, 'dateandtime', default=None))
+            if dateandtimeregex is not None:
+                date = dateandtimeregex.group('date')
+                time = dateandtimeregex.group('time')
                 timestamp = int_or_none((datetime.strptime(date + " " + time, '%d.%m.%Y %H:%M') - datetime(1970, 1, 1) + timedelta(seconds=60 * 60 * 2)).total_seconds())  # No dst support, but added the 2 default hours of estonia
             player_url = self._search_regex('[^\0]embedSWF\("([^"]+)[^\0]', webpage, 'player_url', default=None)
 
-        # There are videos that can only be seen when logged in, so some data can't be accessed(but we can still download the video)
         else:
-            # Try to get description from api(which is mostly empty result) or in other case from og meta tag.
-            description = xpath_text(xmlfile, './playlist/video/description') or self._search_regex('[^\0]og:description"[^\0]*content="(.*)\" \/>', webpage, 'description', default=None)
-            # Basic character parsing to turn character references into real characters. also remove excessive whitespace
-            if description is not None:
-                description = description.strip().replace("&otilde;", "õ").replace("&Otilde;", "Õ").replace("&auml;", "ä").replace("&Auml;", "Ä").replace("&ouml;", "ö").replace("&Ouml;", "Ö").replace("&uuml;", "ü").replace("&Uuml;", "Ü").replace("&amp;", "&")
-
+            description = None
             player_url = None
-            episode = int_or_none(xpath_text(xmlfile, './playlist/video/episode')) or None
-            season = None  # Episode is mostly empty in the xml but season does not even appear there
+            season = None
+            episode = None
             timestamp = None
+
+        if description is None:
+            description = xpath_text(xmlfile, './playlist/video/description') or self._search_regex('[^\0]og:description" *content="(.*)\" *\/>', webpage, 'description', default=None)
+            if description is not None:
+                description = unescapeHTML(description).strip()
+
+        if episode is None:
+            episode = int_or_none(xpath_text(xmlfile, './playlist/video/episode'))
+
+        title = xpath_text(xmlfile, './playlist/video/name')
+        if title is None:
+            title = self._search_regex('[^\0]og:title" *content="(.*)\" *\/>', webpage, 'title', default=None) or self._search_regex('[^\0]<title>(.*)<\/title>[^\0]', webpage, 'description', default=None)
+
         return {
-            'app': "kanal2vod",
             'average_rating': average_rating,
             'description': description,
             'episode_number': episode,
-            'ext': "flv",
-            'height': height,
+            'formats': formats,
             'id': video_id,
             'page_url': url,
             'player_url': player_url,
-            'play_path': "mp4:" + streamname,
-            'protocol': "rtmp",
-            'rtmp_real_time': True,
             'season_number': season,
             'timestamp': timestamp,
-            'title': xpath_text(xmlfile, './playlist/video/name'),
+            'title': title,
             'thumbnail': thumbnail,
-            'url': xmlfile.find('./playlist/video/streamItems').get('host') + streamname,
-            'width': width,
         }

commit 04dd3cb5811bd498a141743c8c558e9e0f2a1088
Author: zmobbie <ottoxas@hotmail.com>
Date:   Fri Aug 12 01:02:12 2016 +0300

    Update kanal2.py
    
    added character encoding system to be used in this file and tried to make the extractor better overall

diff --git a/youtube_dl/extractor/kanal2.py b/youtube_dl/extractor/kanal2.py
index b42b3f7a2..7a405e561 100644
--- a/youtube_dl/extractor/kanal2.py
+++ b/youtube_dl/extractor/kanal2.py
@@ -1,3 +1,4 @@
+# coding: ISO-8859-15
 from __future__ import unicode_literals
 from datetime import (
     datetime,
Commit	Line	Data
b9191cfa ER	1	commit ea80d6ff9bc9fc8c35ba0795126cc55e2fa81a21
	2	Author: zmobbie <ottoxas@hotmail.com>
	3	Date: Thu Aug 11 06:07:45 2016 +0300
	4
	5	Kanal2 Add new extractor
	6
	7	diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
	8	index 387230be0..9c9d0f829 100644
	9	--- a/youtube_dl/extractor/extractors.py
	10	+++ b/youtube_dl/extractor/extractors.py
56eb6cdc AM	11	@@ -491,6 +491,7 @@ from .jpopsukitv import JpopsukiIE
56eb6cdc AM	12	from .kakao import KakaoIE
b9191cfa	13	from .kaltura import KalturaIE
b9191cfa	14	from .kanalplay import KanalPlayIE
56eb6cdc	15	+from .kanal2 import Kanal2IE
b9191cfa ER	16	from .kankan import KankanIE
b9191cfa ER	17	from .karaoketv import KaraoketvIE
56eb6cdc	18	from .karrierevideos import KarriereVideosIE
b9191cfa ER	19	diff --git a/youtube_dl/extractor/kanal2.py b/youtube_dl/extractor/kanal2.py
	20	new file mode 100644
	21	index 000000000..97ce8b5ea
	22	--- /dev/null
	23	+++ b/youtube_dl/extractor/kanal2.py
	24	@@ -0,0 +1,160 @@
	25	+from __future__ import unicode_literals
	26	+from datetime import (
	27	+ datetime,
	28	+ timedelta,
	29	+)
	30	+
	31	+from .common import InfoExtractor
	32	+from ..utils import (
	33	+ update_url_query,
	34	+ xpath_text,
	35	+ str_to_int,
	36	+ int_or_none,
	37	+ HEADRequest,
	38	+)
	39	+
	40	+import re
	41	+
	42	+
	43	+class Kanal2IE(InfoExtractor):
	44	+ _VALID_URL = r'(?P<base>.+\.postimees\.ee)[a-zA-Z0-9\/._-]+\?[a-zA-Z0-9=&._-]id=(?P<id>[0-9]+)[^ ]'
	45	+ _TESTS = [{
	46	+ # The most ordinary case
	47	+ 'url': 'http://kanal2.postimees.ee/pluss/video/?id=40792',
	48	+ 'md5': '173e29daea5f5fab49390bddd78aaaf0',
	49	+ 'info_dict': {
	50	+ 'id': '40792',
	51	+ 'ext': 'flv',
	52	+ 'title': 'Aedniku aabits (06.08.2016 10:30)',
	53	+ 'thumbnail': 'http://kanal2.postimees.ee/imagecache/http_img.cdn.mind.ee/kanal2//14/100/00033/0053_4468c974c1010a21817c1ee37f3e7902.jpeg',
	54	+ 'description': 'Aedniku aabits" on saade, mis pakub kaasaelamist ja teadmisi nii algajatele, kui juba kogenud rohenäppudele. Kõik alates vajalikest näpunäidetest, nutikatest lahendustest, uudistoodetest kuni taimede hingeeluni ning aias kasutatava tehnikani välja.',
	55	+ 'upload_date': '20160805',
	56	+ 'timestamp': 1470434400,
	57	+ }
	58	+ }, {
	59	+ # Embed player, also needs login in reality but all the streams are accessable without logging in
	60	+ 'url': 'http://kanal2.postimees.ee/video/lonelyPlayer?videoid=28848',
	61	+ 'md5': '18edb2fd235c06a60b81b3590a357ace',
	62	+ 'info_dict': {
	63	+ 'id': '28848',
	64	+ 'ext': 'flv',
	65	+ 'title': 'Viimane võmm - Rita, ära jama (24.11.2015 21:30)',
	66	+ 'thumbnail': 'http://kanal2.postimees.ee/imagecache/http_img.cdn.mind.ee/kanal2//14/100/00002/0050_4468c974c1010a21817c1ee37f3e7902.jpeg',
	67	+ 'description': 'Kinnisvaraomanik Villem Meius leitakse oma korterist tapetuna. Turvakaamera video paljastab surnukeha kõrvalt lahkumas ühe Meiuse üürniku - ei kellegi muu, kui politseinike kaitseingli Rita! Rita võetakse vahi alla ning kogu jaoskond näeb vaeva selle nimel, et teda vabastada ning tema kinniistumise ajal Rita baari käigus hoida. Uurimise käigus paljastub ulatuslik ja häbitu kinnisvarahangeldamine Kalamajas, mille niidid ulatuvad ka justiitsmaailma ladvikusse. Vastasleeri moodustavad Kalamaja põliselanikud. Organisatsiooni peakorter asub kellegi Mort Pärgi matusebüroos. Sealt hakkabki asi lõpuks hargnema.'
	68	+ }
	69	+ }, {
	70	+ # Other ordinary case
	71	+ 'url': 'http://kanal2.postimees.ee/pluss/preview?id=40744',
	72	+ 'md5': '2579cdbf16013d7e7a7361a832bc818e',
	73	+ 'info_dict': {
	74	+ 'id': '40744',
	75	+ 'ext': 'flv',
	76	+ 'title': 'Kaunis Dila (10.08.2016 19:00)',
	77	+ 'thumbnail': 'http://kanal2.postimees.ee/imagecache/http_img.cdn.mind.ee/kanal2//16/300/00208/0050_4468c974c1010a21817c1ee37f3e7902.jpeg',
	78	+ }
	79	+ }, {
	80	+ # Not on kanal2 subdomain like others, the site has different layout, so a lot of data can't be accessed, but the api's are same. also has rating
	81	+ 'url': 'http://kanal12.postimees.ee/vaatasaateid/Punkri-joulueri?videoid=248',
	82	+ 'md5': '4633c310980201e4d8195d22b948ad10',
83	+ 'info_dict': {
84	+ 'id': '248',
85	+ 'ext': 'flv',
86	+ 'title': 'Punkri jõulueri',
87	+ 'thumbnail': 'http://img.cdn.mind.ee/kanal2/clips/KANAL 12/punkri joulueri.jpeg',
88	+ 'description': 'Eestlaste lemmik-krõbesaade lõpetab aasta loodetavasti südamliku pühade-eriga! Hapukapsad ninast välja! Jeesuse sündi on tulnud tähistama Ivo Linna, pastor, saatan ja paljud teised. Saadet juhivad Marge Tava, Aleksander Ots ja Marek Reinaas.',
89	+ 'average_rating': int,
90	+ }
91	+ }]
92	+
93	+ def _real_extract(self, url):
94	+ video_id = self._match_id(url)
95	+ # base url, e.g. kanal2.postimees.ee (in chrome, the black part of the address)
96	+ base = re.compile(self._VALID_URL).match(url).group('base')
97	+
98	+ # Acquire the video's address, where we can search for website data(needed in case of embed player)
99	+ if "pluss" not in url and "kanal2" in base:
100	+ # Generic url for all the kanal2 videos, may redirect
101	+ url = base + '/pluss/video/?id=' + video_id
102	+ # This part copied from generic.py, bypasses redirects
103	+ head_response = self._request_webpage(HEADRequest(url), video_id)
104	+ if head_response is not False:
105	+ new_url = head_response.geturl()
106	+ if url != new_url:
107	+ self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
108	+ return self.url_result(new_url)
109	+ # copied until here
110	+
111	+ xmlfile = self._download_xml(update_url_query(base + '/video/playerPlaylistApi', {'id': video_id}), video_id)
112	+
113	+ # Remove stacked urls(e.g. http://test.comhttp://test2.com, removes everything before second http)
114	+ thumbnail = re.compile('[^\0](?P<realurl>https?:\/\/[^"]+)[^\0]').match(base + xpath_text(xmlfile, './playlist/video/thumbUrl')).group('realurl')
115	+ average_rating = int_or_none(xpath_text(xmlfile, './playlist/video/rating/value'))
116	+
117	+ # Determine, whether the stream is high or low quality and act accordingly
118	+ for stream in xmlfile.findall('./playlist/video/streamItems/streamItem'):
119	+ # Found low quality stream, but keep iterating streamItems in hope of finding hq stream
120	+ if "k2lq" in stream.get('streamName'):
121	+ streamname = stream.get('streamName')
122	+ width = str_to_int(stream.get('width'))
123	+ height = str_to_int(stream.get('height'))
124	+ continue
125	+ # Found high quality stream, looping no longer necessary
126	+ if "k2hq" in stream.get('streamName'):
127	+ streamname = stream.get('streamName')
128	+ width = str_to_int(stream.get('width'))
129	+ height = str_to_int(stream.get('height'))
130	+ break
131	+
132	+ webpage = self._download_webpage(url, video_id)
133	+ # Is the following info on website? if div player-container is present, info also is
134	+ if 'player-container' in webpage:
135	+ # Find description
136	+ description = self._search_regex(r'[^\0]<p class="full"[^>]>([^<])<\/p>[^\0]', webpage, 'description', default=None)
137	+ if description is not None:
138	+ # Remove a lot of trailing spaces, that were added to get the text to be in the right place on webpage
139	+ description = description.strip()
140	+ # Episode and season
141	+ epandseason = self._search_regex('[^\0](Osa [0-9]+ Hooaeg [0-9]+)[^\0]*', webpage, 'epandseason', default=None)
142	+ if epandseason is not None:
143	+ episode = int_or_none(re.compile('Osa (?P<episode>[0-9]+) Hooaeg *[0-9]+').match(epandseason).group('episode'))
144	+ season = int_or_none(re.compile('Osa [0-9]+ Hooaeg *(?P<season>[0-9]+)').match(epandseason).group('season'))
145	+ # Timestamp generation
146	+ dateandtime = self._search_regex('[^\0](eetris[^\0]<\/span>[^\0][0-9]{1,2}.[0-9]{1,2}.[0-9]{4,}[^0-9][0-9]{1,2}:[0-9]{1,2})[^\0]*', webpage, 'dateandtime', default=None)
147	+ if dateandtime is not None:
148	+ date = re.compile('[^\0]eetris[^\0]<\/span>[^\0](?P<date>[0-9]{1,2}.[0-9]{1,2}.[0-9]{4,})[^0-9](?P<time>[0-9]{1,2}:[0-9]{1,2})[^\0]*').match(dateandtime).group('date')
149	+ time = re.compile('[^\0]eetris[^\0]<\/span>[^\0](?P<date>[0-9]{1,2}.[0-9]{1,2}.[0-9]{4,})[^0-9](?P<time>[0-9]{1,2}:[0-9]{1,2})[^\0]*').match(dateandtime).group('time')
150	+ timestamp = int_or_none((datetime.strptime(date + " " + time, '%d.%m.%Y %H:%M') - datetime(1970, 1, 1) + timedelta(seconds=60 * 60 * 2)).total_seconds()) # No dst support, but added the 2 default hours of estonia
151	+ player_url = self._search_regex('[^\0]embedSWF\("([^"]+)[^\0]', webpage, 'player_url', default=None)
152	+
153	+ # There are videos that can only be seen when logged in, so some data can't be accessed(but we can still download the video)
154	+ else:
155	+ # Try to get description from api(which is mostly empty result) or in other case from og meta tag.
156	+ description = xpath_text(xmlfile, './playlist/video/description') or self._search_regex('[^\0]og:description"[^\0]content="(.)\" \/>', webpage, 'description', default=None)
157	+ # Basic character parsing to turn character references into real characters. also remove excessive whitespace
158	+ if description is not None:
159	+ description = description.strip().replace("õ", "õ").replace("Õ", "Õ").replace("ä", "ä").replace("Ä", "Ä").replace("ö", "ö").replace("Ö", "Ö").replace("ü", "ü").replace("Ü", "Ü").replace("&", "&")
160	+
161	+ player_url = None
162	+ episode = int_or_none(xpath_text(xmlfile, './playlist/video/episode')) or None
163	+ season = None # Episode is mostly empty in the xml but season does not even appear there
164	+ timestamp = None
165	+ return {
166	+ 'app': "kanal2vod",
167	+ 'average_rating': average_rating,
168	+ 'description': description,
169	+ 'episode_number': episode,
170	+ 'ext': "flv",
171	+ 'height': height,
172	+ 'id': video_id,
173	+ 'page_url': url,
174	+ 'player_url': player_url,
175	+ 'play_path': "mp4:" + streamname,
176	+ 'protocol': "rtmp",
177	+ 'rtmp_real_time': True,
178	+ 'season_number': season,
179	+ 'timestamp': timestamp,
180	+ 'title': xpath_text(xmlfile, './playlist/video/name'),
181	+ 'thumbnail': thumbnail,
182	+ 'url': xmlfile.find('./playlist/video/streamItems').get('host') + streamname,
183	+ 'width': width,
184	+ }
185
186	commit 0817510dfba8a7de1b8e46f7755994510f82366e
187	Author: zmobbie <ottoxas@hotmail.com>
188	Date: Fri Aug 12 00:55:34 2016 +0300
189
190	Kanal2 Add new extractor
191
192	diff --git a/youtube_dl/extractor/kanal2.py b/youtube_dl/extractor/kanal2.py
193	index 97ce8b5ea..b42b3f7a2 100644
194	--- a/youtube_dl/extractor/kanal2.py
195	+++ b/youtube_dl/extractor/kanal2.py
196	@@ -11,13 +11,14 @@ from ..utils import (
197	str_to_int,
198	int_or_none,
199	HEADRequest,
200	+ unescapeHTML,
201	)
202
203	import re
204
205
206	class Kanal2IE(InfoExtractor):
207	- _VALID_URL = r'(?P<base>.+\.postimees\.ee)[a-zA-Z0-9\/._-]+\?[a-zA-Z0-9=&._-]id=(?P<id>[0-9]+)[^ ]'
208	+ _VALID_URL = r'(?P<base>https?:\/\/.+\.postimees\.ee)[a-zA-Z0-9\/._-]+\?[a-zA-Z0-9=&._-]id=(?P<id>[a-zA-Z0-9_-]+)[^ ]'
209	_TESTS = [{
210	# The most ordinary case
211	'url': 'http://kanal2.postimees.ee/pluss/video/?id=40792',
212	@@ -68,12 +69,8 @@ class Kanal2IE(InfoExtractor):
213
214	def _real_extract(self, url):
215	video_id = self._match_id(url)
216	- # base url, e.g. kanal2.postimees.ee (in chrome, the black part of the address)
217	base = re.compile(self._VALID_URL).match(url).group('base')
218	-
219	- # Acquire the video's address, where we can search for website data(needed in case of embed player)
220	if "pluss" not in url and "kanal2" in base:
221	- # Generic url for all the kanal2 videos, may redirect
222	url = base + '/pluss/video/?id=' + video_id
223	# This part copied from generic.py, bypasses redirects
224	head_response = self._request_webpage(HEADRequest(url), video_id)
225	@@ -82,79 +79,73 @@ class Kanal2IE(InfoExtractor):
226	if url != new_url:
227	self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
228	return self.url_result(new_url)
229	- # copied until here
230
231	xmlfile = self._download_xml(update_url_query(base + '/video/playerPlaylistApi', {'id': video_id}), video_id)
232	+ host = xmlfile.find('./playlist/video/streamItems').get('host')
233	+
234	+ formats = [{
235	+ 'protocol': re.compile('(?P<protocol>.+):\/\/[^\0]*').match(host).group('protocol') or 'rtmp',
236	+ 'app': re.compile(((re.compile('(?P<protocol>.+):\/\/[^\0]').match(host).group('protocol') or 'rtmp') + ':\/\/[^\0]\/(?P<app>.+\/)')).match(host).group('app') or 'kanal2vod',
237	+ 'url': host + stream.get('streamName'),
238	+ 'play_path': 'mp4:' + stream.get('streamName'),
239	+ 'ext': 'flv',
240	+ 'height': str_to_int(stream.get('height')),
241	+ 'width': str_to_int(stream.get('width')),
242	+ 'rtmp_real_time': True,
243	+ } for stream in xmlfile.findall('./playlist/video/streamItems/streamItem')]
244	+ self._sort_formats(formats)
245
246	- # Remove stacked urls(e.g. http://test.comhttp://test2.com, removes everything before second http)
247	+ # Remove stacked urls(e.g. http://test.comhttp://test2.com, removes everything before second http(kanal12 fix))
248	thumbnail = re.compile('[^\0](?P<realurl>https?:\/\/[^"]+)[^\0]').match(base + xpath_text(xmlfile, './playlist/video/thumbUrl')).group('realurl')
249	average_rating = int_or_none(xpath_text(xmlfile, './playlist/video/rating/value'))
250
251	- # Determine, whether the stream is high or low quality and act accordingly
252	- for stream in xmlfile.findall('./playlist/video/streamItems/streamItem'):
253	- # Found low quality stream, but keep iterating streamItems in hope of finding hq stream
254	- if "k2lq" in stream.get('streamName'):
255	- streamname = stream.get('streamName')
256	- width = str_to_int(stream.get('width'))
257	- height = str_to_int(stream.get('height'))
258	- continue
259	- # Found high quality stream, looping no longer necessary
260	- if "k2hq" in stream.get('streamName'):
261	- streamname = stream.get('streamName')
262	- width = str_to_int(stream.get('width'))
263	- height = str_to_int(stream.get('height'))
264	- break
265	-
266	webpage = self._download_webpage(url, video_id)
267	- # Is the following info on website? if div player-container is present, info also is
268	if 'player-container' in webpage:
269	- # Find description
270	description = self._search_regex(r'[^\0]<p class="full"[^>]>([^<])<\/p>[^\0]', webpage, 'description', default=None)
271	if description is not None:
272	- # Remove a lot of trailing spaces, that were added to get the text to be in the right place on webpage
273	description = description.strip()
274	- # Episode and season
275	- epandseason = self._search_regex('[^\0](Osa [0-9]+ Hooaeg [0-9]+)[^\0]*', webpage, 'epandseason', default=None)
276	- if epandseason is not None:
277	- episode = int_or_none(re.compile('Osa (?P<episode>[0-9]+) Hooaeg *[0-9]+').match(epandseason).group('episode'))
278	- season = int_or_none(re.compile('Osa [0-9]+ Hooaeg *(?P<season>[0-9]+)').match(epandseason).group('season'))
279	- # Timestamp generation
280	- dateandtime = self._search_regex('[^\0](eetris[^\0]<\/span>[^\0][0-9]{1,2}.[0-9]{1,2}.[0-9]{4,}[^0-9][0-9]{1,2}:[0-9]{1,2})[^\0]*', webpage, 'dateandtime', default=None)
281	- if dateandtime is not None:
282	- date = re.compile('[^\0]eetris[^\0]<\/span>[^\0](?P<date>[0-9]{1,2}.[0-9]{1,2}.[0-9]{4,})[^0-9](?P<time>[0-9]{1,2}:[0-9]{1,2})[^\0]*').match(dateandtime).group('date')
283	- time = re.compile('[^\0]eetris[^\0]<\/span>[^\0](?P<date>[0-9]{1,2}.[0-9]{1,2}.[0-9]{4,})[^0-9](?P<time>[0-9]{1,2}:[0-9]{1,2})[^\0]*').match(dateandtime).group('time')
284	+
285	+ epandseasonregex = re.compile('Osa (?P<episode>[0-9]+) Hooaeg (?P<season>[0-9]+)').match(self._search_regex('[^\0](Osa [0-9]+ Hooaeg [0-9]+)[^\0]', webpage, 'epandseason', default=None))
286	+ if epandseasonregex is not None:
287	+ episode = int_or_none(epandseasonregex.group('episode'))
288	+ season = int_or_none(epandseasonregex.group('season'))
289	+
290	+ dateandtimeregex = re.compile('[^\0]eetris[^\0]<\/span>[^\0](?P<date>[0-9]{1,2}.[0-9]{1,2}.[0-9]{4,})[^0-9](?P<time>[0-9]{1,2}:[0-9]{1,2})[^\0]').match(self._search_regex('[^\0](eetris[^\0]<\/span>[^\0][0-9]{1,2}.[0-9]{1,2}.[0-9]{4,}[^0-9][0-9]{1,2}:[0-9]{1,2})[^\0]', webpage, 'dateandtime', default=None))
291	+ if dateandtimeregex is not None:
292	+ date = dateandtimeregex.group('date')
293	+ time = dateandtimeregex.group('time')
294	timestamp = int_or_none((datetime.strptime(date + " " + time, '%d.%m.%Y %H:%M') - datetime(1970, 1, 1) + timedelta(seconds=60 * 60 * 2)).total_seconds()) # No dst support, but added the 2 default hours of estonia
295	player_url = self._search_regex('[^\0]embedSWF\("([^"]+)[^\0]', webpage, 'player_url', default=None)
296
297	- # There are videos that can only be seen when logged in, so some data can't be accessed(but we can still download the video)
298	else:
299	- # Try to get description from api(which is mostly empty result) or in other case from og meta tag.
300	- description = xpath_text(xmlfile, './playlist/video/description') or self._search_regex('[^\0]og:description"[^\0]content="(.)\" \/>', webpage, 'description', default=None)
301	- # Basic character parsing to turn character references into real characters. also remove excessive whitespace
302	- if description is not None:
303	- description = description.strip().replace("õ", "õ").replace("Õ", "Õ").replace("ä", "ä").replace("Ä", "Ä").replace("ö", "ö").replace("Ö", "Ö").replace("ü", "ü").replace("Ü", "Ü").replace("&", "&")
304	-
305	+ description = None
306	player_url = None
307	- episode = int_or_none(xpath_text(xmlfile, './playlist/video/episode')) or None
308	- season = None # Episode is mostly empty in the xml but season does not even appear there
309	+ season = None
310	+ episode = None
311	timestamp = None
312	+
313	+ if description is None:
314	+ description = xpath_text(xmlfile, './playlist/video/description') or self._search_regex('[^\0]og:description" content="(.)\" *\/>', webpage, 'description', default=None)
315	+ if description is not None:
316	+ description = unescapeHTML(description).strip()
317	+
318	+ if episode is None:
319	+ episode = int_or_none(xpath_text(xmlfile, './playlist/video/episode'))
320	+
321	+ title = xpath_text(xmlfile, './playlist/video/name')
322	+ if title is None:
323	+ title = self._search_regex('[^\0]og:title" content="(.)\" \/>', webpage, 'title', default=None) or self._search_regex('[^\0]<title>(.)<\/title>[^\0]', webpage, 'description', default=None)
324	+
325	return {
326	- 'app': "kanal2vod",
327	'average_rating': average_rating,
328	'description': description,
329	'episode_number': episode,
330	- 'ext': "flv",
331	- 'height': height,
332	+ 'formats': formats,
333	'id': video_id,
334	'page_url': url,
335	'player_url': player_url,
336	- 'play_path': "mp4:" + streamname,
337	- 'protocol': "rtmp",
338	- 'rtmp_real_time': True,
339	'season_number': season,
340	'timestamp': timestamp,
341	- 'title': xpath_text(xmlfile, './playlist/video/name'),
342	+ 'title': title,
343	'thumbnail': thumbnail,
344	- 'url': xmlfile.find('./playlist/video/streamItems').get('host') + streamname,
345	- 'width': width,
346	}
347
348	commit 04dd3cb5811bd498a141743c8c558e9e0f2a1088
349	Author: zmobbie <ottoxas@hotmail.com>
350	Date: Fri Aug 12 01:02:12 2016 +0300
351
352	Update kanal2.py
353
354	added character encoding system to be used in this file and tried to make the extractor better overall
355
356	diff --git a/youtube_dl/extractor/kanal2.py b/youtube_dl/extractor/kanal2.py
357	index b42b3f7a2..7a405e561 100644
358	--- a/youtube_dl/extractor/kanal2.py
359	+++ b/youtube_dl/extractor/kanal2.py
360	@@ -1,3 +1,4 @@
361	+# coding: ISO-8859-15
362	from __future__ import unicode_literals
363	from datetime import (
364	datetime,