12 files changed, 162 insertions, 147 deletions
diff --git a/youtube_dl/extractor/aftonbladet.py b/youtube_dl/extractor/aftonbladet.py
deleted file mode 100644
index 5766b4fe8..000000000
--- a/youtube_dl/extractor/aftonbladet.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# encoding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import int_or_none
-
-
-class AftonbladetIE(InfoExtractor):
-    _VALID_URL = r'https?://tv\.aftonbladet\.se/abtv/articles/(?P<id>[0-9]+)'
-    _TEST = {
-        'url': 'http://tv.aftonbladet.se/abtv/articles/36015',
-        'info_dict': {
-            'id': '36015',
-            'ext': 'mp4',
-            'title': 'Vulkanutbrott i rymden - nu släpper NASA bilderna',
-            'description': 'Jupiters måne mest aktiv av alla himlakroppar',
-            'timestamp': 1394142732,
-            'upload_date': '20140306',
-        },
-    }
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-
-        # find internal video meta data
-        meta_url = 'http://aftonbladet-play-metadata.cdn.drvideo.aptoma.no/video/%s.json'
-        player_config = self._parse_json(self._html_search_regex(
-            r'data-player-config="([^"]+)"', webpage, 'player config'), video_id)
-        internal_meta_id = player_config['aptomaVideoId']
-        internal_meta_url = meta_url % internal_meta_id
-        internal_meta_json = self._download_json(
-            internal_meta_url, video_id, 'Downloading video meta data')
-
-        # find internal video formats
-        format_url = 'http://aftonbladet-play.videodata.drvideo.aptoma.no/actions/video/?id=%s'
-        internal_video_id = internal_meta_json['videoId']
-        internal_formats_url = format_url % internal_video_id
-        internal_formats_json = self._download_json(
-            internal_formats_url, video_id, 'Downloading video formats')
-
-        formats = []
-        for fmt in internal_formats_json['formats']['http']['pseudostreaming']['mp4']:
-            p = fmt['paths'][0]
-            formats.append({
-                'url': 'http://%s:%d/%s/%s' % (p['address'], p['port'], p['path'], p['filename']),
-                'ext': 'mp4',
-                'width': int_or_none(fmt.get('width')),
-                'height': int_or_none(fmt.get('height')),
-                'tbr': int_or_none(fmt.get('bitrate')),
-                'protocol': 'http',
-            })
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'title': internal_meta_json['title'],
-            'formats': formats,
-            'thumbnail': internal_meta_json.get('imageUrl'),
-            'description': internal_meta_json.get('shortPreamble'),
-            'timestamp': int_or_none(internal_meta_json.get('timePublished')),
-            'duration': int_or_none(internal_meta_json.get('duration')),
-            'view_count': int_or_none(internal_meta_json.get('views')),
-        }
diff --git a/youtube_dl/extractor/clubic.py b/youtube_dl/extractor/clubic.py
index 2fba93543..f7ee3a8f8 100644
--- a/youtube_dl/extractor/clubic.py
+++ b/youtube_dl/extractor/clubic.py
@@ -1,9 +1,6 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import json
-import re
-
 from .common import InfoExtractor
 from ..utils import (
     clean_html,
@@ -30,16 +27,14 @@ class ClubicIE(InfoExtractor):
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         player_url = 'http://player.m6web.fr/v1/player/clubic/%s.html' % video_id
         player_page = self._download_webpage(player_url, video_id)
 
-        config_json = self._search_regex(
+        config = self._parse_json(self._search_regex(
             r'(?m)M6\.Player\.config\s*=\s*(\{.+?\});$', player_page,
-            'configuration')
-        config = json.loads(config_json)
+            'configuration'), video_id)
 
         video_info = config['videoInfo']
         sources = config['sources']
diff --git a/youtube_dl/extractor/dctp.py b/youtube_dl/extractor/dctp.py
index a47e04993..14ba88715 100644
--- a/youtube_dl/extractor/dctp.py
+++ b/youtube_dl/extractor/dctp.py
@@ -1,61 +1,54 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..compat import compat_str
+from ..utils import unified_strdate
 
 
 class DctpTvIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?dctp\.tv/(#/)?filme/(?P<id>.+?)/$'
     _TEST = {
         'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
+        'md5': '174dd4a8a6225cf5655952f969cfbe24',
         'info_dict': {
-            'id': '1324',
+            'id': '95eaa4f33dad413aa17b4ee613cccc6c',
             'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
-            'ext': 'flv',
-            'title': 'Videoinstallation für eine Kaufhausfassade'
+            'ext': 'mp4',
+            'title': 'Videoinstallation für eine Kaufhausfassade',
+            'description': 'Kurzfilm',
+            'upload_date': '20110407',
+            'thumbnail': 're:^https?://.*\.jpg$',
         },
-        'params': {
-            # rtmp download
-            'skip_download': True,
-        }
     }
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        base_url = 'http://dctp-ivms2-restapi.s3.amazonaws.com/'
-        version_json = self._download_json(
-            base_url + 'version.json',
-            video_id, note='Determining file version')
-        version = version_json['version_name']
-        info_json = self._download_json(
-            '{0}{1}/restapi/slugs/{2}.json'.format(base_url, version, video_id),
-            video_id, note='Fetching object ID')
-        object_id = compat_str(info_json['object_id'])
-        meta_json = self._download_json(
-            '{0}{1}/restapi/media/{2}.json'.format(base_url, version, object_id),
-            video_id, note='Downloading metadata')
-        uuid = meta_json['uuid']
-        title = meta_json['title']
-        wide = meta_json['is_wide']
-        if wide:
-            ratio = '16x9'
-        else:
-            ratio = '4x3'
-        play_path = 'mp4:{0}_dctp_0500_{1}.m4v'.format(uuid, ratio)
+        webpage = self._download_webpage(url, video_id)
+
+        object_id = self._html_search_meta('DC.identifier', webpage)
 
         servers_json = self._download_json(
-            'http://www.dctp.tv/streaming_servers/',
+            'http://www.dctp.tv/elastic_streaming_client/get_streaming_server/',
             video_id, note='Downloading server list')
-        url = servers_json[0]['endpoint']
+        server = servers_json[0]['server']
+        m3u8_path = self._search_regex(
+            r'\'([^\'"]+/playlist\.m3u8)"', webpage, 'm3u8 path')
+        formats = self._extract_m3u8_formats(
+            'http://%s%s' % (server, m3u8_path), video_id, ext='mp4',
+            entry_protocol='m3u8_native')
+
+        title = self._og_search_title(webpage)
+        description = self._html_search_meta('DC.description', webpage)
+        upload_date = unified_strdate(
+            self._html_search_meta('DC.date.created', webpage))
+        thumbnail = self._og_search_thumbnail(webpage)
 
         return {
             'id': object_id,
             'title': title,
-            'format': 'rtmp',
-            'url': url,
-            'play_path': play_path,
-            'rtmp_real_time': True,
-            'ext': 'flv',
-            'display_id': video_id
+            'formats': formats,
+            'display_id': video_id,
+            'description': description,
+            'upload_date': upload_date,
+            'thumbnail': thumbnail,
         }
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 23fd2a308..09b3b4942 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -31,7 +31,6 @@ from .aenetworks import (
     HistoryTopicIE,
 )
 from .afreecatv import AfreecaTVIE
-from .aftonbladet import AftonbladetIE
 from .airmozilla import AirMozillaIE
 from .aljazeera import AlJazeeraIE
 from .alphaporno import AlphaPornoIE
diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py
index 8f7f232be..196407b06 100644
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@@ -29,6 +29,7 @@ class InstagramIE(InfoExtractor):
             'uploader': 'Naomi Leonor Phan-Quang',
             'like_count': int,
             'comment_count': int,
+            'comments': list,
         },
     }, {
         # missing description
@@ -44,6 +45,7 @@ class InstagramIE(InfoExtractor):
             'uploader': 'Britney Spears',
             'like_count': int,
             'comment_count': int,
+            'comments': list,
         },
         'params': {
             'skip_download': True,
@@ -82,7 +84,7 @@ class InstagramIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
 
         (video_url, description, thumbnail, timestamp, uploader,
-         uploader_id, like_count, comment_count) = [None] * 8
+         uploader_id, like_count, comment_count, height, width) = [None] * 10
 
         shared_data = self._parse_json(
             self._search_regex(
@@ -94,6 +96,8 @@ class InstagramIE(InfoExtractor):
                 shared_data, lambda x: x['entry_data']['PostPage'][0]['media'], dict)
             if media:
                 video_url = media.get('video_url')
+                height = int_or_none(media.get('dimensions', {}).get('height'))
+                width = int_or_none(media.get('dimensions', {}).get('width'))
                 description = media.get('caption')
                 thumbnail = media.get('display_src')
                 timestamp = int_or_none(media.get('date'))
@@ -101,10 +105,24 @@ class InstagramIE(InfoExtractor):
                 uploader_id = media.get('owner', {}).get('username')
                 like_count = int_or_none(media.get('likes', {}).get('count'))
                 comment_count = int_or_none(media.get('comments', {}).get('count'))
+                comments = [{
+                    'author': comment.get('user', {}).get('username'),
+                    'author_id': comment.get('user', {}).get('id'),
+                    'id': comment.get('id'),
+                    'text': comment.get('text'),
+                    'timestamp': int_or_none(comment.get('created_at')),
+                } for comment in media.get(
+                    'comments', {}).get('nodes', []) if comment.get('text')]
 
         if not video_url:
             video_url = self._og_search_video_url(webpage, secure=False)
 
+        formats = [{
+            'url': video_url,
+            'width': width,
+            'height': height,
+        }]
+
         if not uploader_id:
             uploader_id = self._search_regex(
                 r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"',
@@ -121,7 +139,7 @@ class InstagramIE(InfoExtractor):
 
         return {
             'id': video_id,
-            'url': video_url,
+            'formats': formats,
             'ext': 'mp4',
             'title': 'Video by %s' % uploader_id,
             'description': description,
@@ -131,6 +149,7 @@ class InstagramIE(InfoExtractor):
             'uploader': uploader,
             'like_count': like_count,
             'comment_count': comment_count,
+            'comments': comments,
         }
 
 
diff --git a/youtube_dl/extractor/ketnet.py b/youtube_dl/extractor/ketnet.py
index aaf3f807a..eb0a16008 100644
--- a/youtube_dl/extractor/ketnet.py
+++ b/youtube_dl/extractor/ketnet.py
@@ -21,6 +21,10 @@ class KetnetIE(InfoExtractor):
     }, {
         'url': 'https://www.ketnet.be/achter-de-schermen/sien-repeteert-voor-stars-for-life',
         'only_matching': True,
+    }, {
+        # mzsource, geo restricted to Belgium
+        'url': 'https://www.ketnet.be/kijken/nachtwacht/de-bermadoe',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -36,9 +40,25 @@ class KetnetIE(InfoExtractor):
 
         title = config['title']
 
-        formats = self._extract_m3u8_formats(
-            config['source']['hls'], video_id, 'mp4',
-            entry_protocol='m3u8_native', m3u8_id='hls')
+        formats = []
+        for source_key in ('', 'mz'):
+            source = config.get('%ssource' % source_key)
+            if not isinstance(source, dict):
+                continue
+            for format_id, format_url in source.items():
+                if format_id == 'hls':
+                    formats.extend(self._extract_m3u8_formats(
+                        format_url, video_id, 'mp4',
+                        entry_protocol='m3u8_native', m3u8_id=format_id,
+                        fatal=False))
+                elif format_id == 'hds':
+                    formats.extend(self._extract_f4m_formats(
+                        format_url, video_id, f4m_id=format_id, fatal=False))
+                else:
+                    formats.append({
+                        'url': format_url,
+                        'format_id': format_id,
+                    })
         self._sort_formats(formats)
 
         return {
diff --git a/youtube_dl/extractor/leeco.py b/youtube_dl/extractor/leeco.py
index e9cc9aa59..c48a5aad1 100644
--- a/youtube_dl/extractor/leeco.py
+++ b/youtube_dl/extractor/leeco.py
@@ -29,7 +29,7 @@ from ..utils import (
 
 class LeIE(InfoExtractor):
     IE_DESC = '乐视网'
-    _VALID_URL = r'https?://(?:www\.le\.com/ptv/vplay|sports\.le\.com/video)/(?P<id>\d+)\.html'
+    _VALID_URL = r'https?://(?:www\.le\.com/ptv/vplay|(?:sports\.le|(?:www\.)?lesports)\.com/(?:match|video))/(?P<id>\d+)\.html'
 
     _URL_TEMPLATE = 'http://www.le.com/ptv/vplay/%s.html'
 
@@ -73,6 +73,12 @@ class LeIE(InfoExtractor):
     }, {
         'url': 'http://sports.le.com/video/25737697.html',
         'only_matching': True,
+    }, {
+        'url': 'http://www.lesports.com/match/1023203003.html',
+        'only_matching': True,
+    }, {
+        'url': 'http://sports.le.com/match/1023203003.html',
+        'only_matching': True,
     }]
 
     # ror() and calc_time_key() are reversed from a embedded swf file in KLetvPlayer.swf
diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py
index 6752ffee2..b7bfa7a6d 100644
--- a/youtube_dl/extractor/limelight.py
+++ b/youtube_dl/extractor/limelight.py
@@ -59,7 +59,7 @@ class LimelightBaseIE(InfoExtractor):
                     format_id = 'rtmp'
                     if stream.get('videoBitRate'):
                         format_id += '-%d' % int_or_none(stream['videoBitRate'])
-                    http_url = 'http://%s/%s' % (rtmp.group('host').replace('csl.', 'cpl.'), rtmp.group('playpath')[4:])
+                    http_url = 'http://cpl.delvenetworks.com/' + rtmp.group('playpath')[4:]
                     urls.append(http_url)
                     http_fmt = fmt.copy()
                     http_fmt.update({
diff --git a/youtube_dl/extractor/tvland.py b/youtube_dl/extractor/tvland.py
index cb76a2a58..957cf1ea2 100644
--- a/youtube_dl/extractor/tvland.py
+++ b/youtube_dl/extractor/tvland.py
@@ -6,7 +6,7 @@ from .mtv import MTVServicesInfoExtractor
 
 class TVLandIE(MTVServicesInfoExtractor):
     IE_NAME = 'tvland.com'
-    _VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|episodes)/(?P<id>[^/?#.]+)'
+    _VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|(?:full-)?episodes)/(?P<id>[^/?#.]+)'
     _FEED_URL = 'http://www.tvland.com/feeds/mrss/'
     _TESTS = [{
         # Geo-restricted. Without a proxy metadata are still there. With a
@@ -28,4 +28,7 @@ class TVLandIE(MTVServicesInfoExtractor):
             'upload_date': '20151228',
             'timestamp': 1451289600,
         },
+    }, {
+        'url': 'http://www.tvland.com/full-episodes/iu0hz6/younger-a-kiss-is-just-a-kiss-season-3-ep-301',
+        'only_matching': True,
     }]
diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py
index bc352391e..46c2cfe7b 100644
--- a/youtube_dl/extractor/twitch.py
+++ b/youtube_dl/extractor/twitch.py
@@ -247,6 +247,7 @@ class TwitchVodIE(TwitchItemBaseIE):
             # m3u8 download
             'skip_download': True,
         },
+        'skip': 'HTTP Error 404: Not Found',
     }]
 
     def _real_extract(self, url):
diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py
index 185756301..3b38ac700 100644
--- a/youtube_dl/extractor/vgtv.py
+++ b/youtube_dl/extractor/vgtv.py
@@ -22,6 +22,7 @@ class VGTVIE(XstreamIE):
         'fvn.no/fvntv': 'fvntv',
         'aftenposten.no/webtv': 'aptv',
         'ap.vgtv.no/webtv': 'aptv',
+        'tv.aftonbladet.se/abtv': 'abtv',
     }
 
     _APP_NAME_TO_VENDOR = {
@@ -30,6 +31,7 @@ class VGTVIE(XstreamIE):
         'satv': 'sa',
         'fvntv': 'fvn',
         'aptv': 'ap',
+        'abtv': 'ab',
     }
 
     _VALID_URL = r'''(?x)
@@ -40,7 +42,8 @@ class VGTVIE(XstreamIE):
                     /?
                     (?:
                         \#!/(?:video|live)/|
-                        embed?.*id=
+                        embed?.*id=|
+                        articles/
                     )|
                     (?P<appname>
                         %s
@@ -135,6 +138,14 @@ class VGTVIE(XstreamIE):
             'url': 'http://www.vgtv.no/#!/video/127205/inside-the-mind-of-favela-funk',
             'only_matching': True,
         },
+        {
+            'url': 'http://tv.aftonbladet.se/abtv/articles/36015',
+            'only_matching': True,
+        },
+        {
+            'url': 'abtv:140026',
+            'only_matching': True,
+        }
     ]
 
     def _real_extract(self, url):
diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py
index f26e0732c..58799d413 100644
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -20,7 +20,7 @@ from ..utils import (
     remove_start,
     str_to_int,
     unescapeHTML,
-    unified_strdate,
+    unified_timestamp,
     urlencode_postdata,
 )
 from .dailymotion import DailymotionIE
@@ -106,6 +106,7 @@ class VKIE(VKBaseIE):
                 'title': 'ProtivoGunz - Хуёвая песня',
                 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
                 'duration': 195,
+                'timestamp': 1329060660,
                 'upload_date': '20120212',
                 'view_count': int,
             },
@@ -119,6 +120,7 @@ class VKIE(VKBaseIE):
                 'uploader': 'Tom Cruise',
                 'title': 'No name',
                 'duration': 9,
+                'timestamp': 1374374880,
                 'upload_date': '20130721',
                 'view_count': int,
             }
@@ -195,6 +197,7 @@ class VKIE(VKBaseIE):
                 'upload_date': '20150709',
                 'view_count': int,
             },
+            'skip': 'Removed',
         },
         {
             # youtube embed
@@ -226,7 +229,7 @@ class VKIE(VKBaseIE):
             },
             'params': {
                 'skip_download': True,
-            }
+            },
         },
         {
             # video key is extra_data not url\d+
@@ -237,11 +240,31 @@ class VKIE(VKBaseIE):
                 'ext': 'mp4',
                 'title': 'S-Dance, репетиции к The way show',
                 'uploader': 'THE WAY SHOW | 17 апреля',
+                'timestamp': 1454870100,
                 'upload_date': '20160207',
                 'view_count': int,
             },
         },
         {
+            # finished live stream, live_mp4
+            'url': 'https://vk.com/videos-387766?z=video-387766_456242764%2Fpl_-387766_-2',
+            'md5': '90d22d051fccbbe9becfccc615be6791',
+            'info_dict': {
+                'id': '456242764',
+                'ext': 'mp4',
+                'title': 'ИгроМир 2016 — день 1',
+                'uploader': 'Игромания',
+                'duration': 5239,
+                'view_count': int,
+            },
+        },
+        {
+            # live stream, hls and rtmp links,most likely already finished live
+            # stream by the time you are reading this comment
+            'url': 'https://vk.com/video-140332_456239111',
+            'only_matching': True,
+        },
+        {
             # removed video, just testing that we match the pattern
             'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
             'only_matching': True,
@@ -349,42 +372,51 @@ class VKIE(VKBaseIE):
         data_json = self._search_regex(r'var\s+vars\s*=\s*({.+?});', info_page, 'vars')
         data = json.loads(data_json)
 
-        # Extract upload date
-        upload_date = None
-        mobj = re.search(r'id="mv_date(?:_views)?_wrap"[^>]*>([a-zA-Z]+ [0-9]+), ([0-9]+) at', info_page)
-        if mobj is not None:
-            mobj.group(1) + ' ' + mobj.group(2)
-            upload_date = unified_strdate(mobj.group(1) + ' ' + mobj.group(2))
-
-        view_count = None
-        views = self._html_search_regex(
-            r'"mv_views_count_number"[^>]*>(.+?\bviews?)<',
-            info_page, 'view count', default=None)
-        if views:
-            view_count = str_to_int(self._search_regex(
-                r'([\d,.]+)', views, 'view count', fatal=False))
+        title = unescapeHTML(data['md_title'])
+
+        if data.get('live') == 2:
+            title = self._live_title(title)
+
+        timestamp = unified_timestamp(self._html_search_regex(
+            r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page,
+            'upload date', fatal=False))
+
+        view_count = str_to_int(self._search_regex(
+            r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)',
+            info_page, 'view count', fatal=False))
 
         formats = []
-        for k, v in data.items():
-            if not k.startswith('url') and not k.startswith('cache') and k != 'extra_data' or not v:
+        for format_id, format_url in data.items():
+            if not isinstance(format_url, compat_str) or not format_url.startswith(('http', '//', 'rtmp')):
                 continue
-            height = int_or_none(self._search_regex(
-                r'^(?:url|cache)(\d+)', k, 'height', default=None))
-            formats.append({
-                'format_id': k,
-                'url': v,
-                'height': height,
-            })
+            if format_id.startswith(('url', 'cache')) or format_id in ('extra_data', 'live_mp4'):
+                height = int_or_none(self._search_regex(
+                    r'^(?:url|cache)(\d+)', format_id, 'height', default=None))
+                formats.append({
+                    'format_id': format_id,
+                    'url': format_url,
+                    'height': height,
+                })
+            elif format_id == 'hls':
+                formats.extend(self._extract_m3u8_formats(
+                    format_url, video_id, 'mp4', m3u8_id=format_id,
+                    fatal=False, live=True))
+            elif format_id == 'rtmp':
+                formats.append({
+                    'format_id': format_id,
+                    'url': format_url,
+                    'ext': 'flv',
+                })
         self._sort_formats(formats)
 
         return {
-            'id': compat_str(data['vid']),
+            'id': compat_str(data.get('vid') or video_id),
             'formats': formats,
-            'title': unescapeHTML(data['md_title']),
+            'title': title,
             'thumbnail': data.get('jpg'),
             'uploader': data.get('md_author'),
             'duration': data.get('duration'),
-            'upload_date': upload_date,
+            'timestamp': timestamp,
             'view_count': view_count,
         }