10 files changed, 116 insertions, 79 deletions
diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py
index 6dab226af..1c3c41d26 100644
--- a/youtube_dl/extractor/canalplus.py
+++ b/youtube_dl/extractor/canalplus.py
@@ -6,11 +6,13 @@ import re
 from .common import InfoExtractor
 from ..compat import compat_urllib_parse_urlparse
 from ..utils import (
+    dict_get,
     ExtractorError,
     HEADRequest,
-    unified_strdate,
-    qualities,
     int_or_none,
+    qualities,
+    remove_end,
+    unified_strdate,
 )
 
 
@@ -43,47 +45,46 @@ class CanalplusIE(InfoExtractor):
 
     _TESTS = [{
         'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1192814',
-        'md5': '41f438a4904f7664b91b4ed0dec969dc',
         'info_dict': {
-            'id': '1192814',
+            'id': '1405510',
+            'display_id': 'pid1830-c-zapping',
             'ext': 'mp4',
-            'title': "L'Année du Zapping 2014 - L'Année du Zapping 2014",
-            'description': "Toute l'année 2014 dans un Zapping exceptionnel !",
-            'upload_date': '20150105',
+            'title': 'Zapping - 02/07/2016',
+            'description': 'Le meilleur de toutes les chaînes, tous les jours',
+            'upload_date': '20160702',
         },
     }, {
         'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190',
         'info_dict': {
             'id': '1108190',
-            'ext': 'flv',
-            'title': 'Le labyrinthe - Boing super ranger',
+            'display_id': 'pid1405-le-labyrinthe-boing-super-ranger',
+            'ext': 'mp4',
+            'title': 'BOING SUPER RANGER - Ep : Le labyrinthe',
             'description': 'md5:4cea7a37153be42c1ba2c1d3064376ff',
             'upload_date': '20140724',
         },
         'skip': 'Only works from France',
     }, {
-        'url': 'http://www.d8.tv/d8-docs-mags/pid5198-d8-en-quete-d-actualite.html?vid=1390231',
+        'url': 'http://www.c8.fr/c8-divertissement/ms-touche-pas-a-mon-poste/pid6318-videos-integrales.html',
+        'md5': '4b47b12b4ee43002626b97fad8fb1de5',
         'info_dict': {
-            'id': '1390231',
+            'id': '1420213',
+            'display_id': 'pid6318-videos-integrales',
             'ext': 'mp4',
-            'title': "Vacances pas chères : prix discount ou grosses dépenses ? - En quête d'actualité",
-            'description': 'md5:edb6cf1cb4a1e807b5dd089e1ac8bfc6',
-            'upload_date': '20160512',
-        },
-        'params': {
-            'skip_download': True,
+            'title': 'TPMP ! Même le matin - Les 35H de Baba - 14/10/2016',
+            'description': 'md5:f96736c1b0ffaa96fd5b9e60ad871799',
+            'upload_date': '20161014',
         },
+        'skip': 'Only works from France',
     }, {
-        'url': 'http://www.itele.fr/chroniques/invite-bruce-toussaint/thierry-solere-nicolas-sarkozy-officialisera-sa-candidature-a-la-primaire-quand-il-le-voudra-167224',
+        'url': 'http://www.itele.fr/chroniques/invite-michael-darmon/rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510',
         'info_dict': {
-            'id': '1398334',
+            'id': '1420176',
+            'display_id': 'rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510',
             'ext': 'mp4',
-            'title': "L'invité de Bruce Toussaint du 07/06/2016 - ",
-            'description': 'md5:40ac7c9ad0feaeb6f605bad986f61324',
-            'upload_date': '20160607',
-        },
-        'params': {
-            'skip_download': True,
+            'title': 'L\'invité de Michaël Darmon du 14/10/2016 - ',
+            'description': 'Chaque matin du lundi au vendredi, Michaël Darmon reçoit un invité politique à 8h25.',
+            'upload_date': '20161014',
         },
     }, {
         'url': 'http://m.canalplus.fr/?vid=1398231',
@@ -95,18 +96,17 @@ class CanalplusIE(InfoExtractor):
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.groupdict().get('id') or mobj.groupdict().get('vid')
 
         site_id = self._SITE_ID_MAP[compat_urllib_parse_urlparse(url).netloc.rsplit('.', 2)[-2]]
 
         # Beware, some subclasses do not define an id group
-        display_id = mobj.group('display_id') or video_id
+        display_id = remove_end(dict_get(mobj.groupdict(), ('display_id', 'id', 'vid')), '.html')
 
-        if video_id is None:
-            webpage = self._download_webpage(url, display_id)
-            video_id = self._search_regex(
-                [r'<canal:player[^>]+?videoId=(["\'])(?P<id>\d+)', r'id=["\']canal_video_player(?P<id>\d+)'],
-                webpage, 'video id', group='id')
+        webpage = self._download_webpage(url, display_id)
+        video_id = self._search_regex(
+            [r'<canal:player[^>]+?videoId=(["\'])(?P<id>\d+)',
+             r'id=["\']canal_video_player(?P<id>\d+)'],
+            webpage, 'video id', group='id')
 
         info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id)
         video_data = self._download_json(info_url, video_id, 'Downloading video JSON')
diff --git a/youtube_dl/extractor/carambatv.py b/youtube_dl/extractor/carambatv.py
index 5797fb951..66c0f900a 100644
--- a/youtube_dl/extractor/carambatv.py
+++ b/youtube_dl/extractor/carambatv.py
@@ -9,6 +9,8 @@ from ..utils import (
     try_get,
 )
 
+from .videomore import VideomoreIE
+
 
 class CarambaTVIE(InfoExtractor):
     _VALID_URL = r'(?:carambatv:|https?://video1\.carambatv\.ru/v/)(?P<id>\d+)'
@@ -62,14 +64,16 @@ class CarambaTVPageIE(InfoExtractor):
     _VALID_URL = r'https?://carambatv\.ru/(?:[^/]+/)+(?P<id>[^/?#&]+)'
     _TEST = {
         'url': 'http://carambatv.ru/movie/bad-comedian/razborka-v-manile/',
-        'md5': '',
+        'md5': 'a49fb0ec2ad66503eeb46aac237d3c86',
         'info_dict': {
-            'id': '191910501',
-            'ext': 'mp4',
+            'id': '475222',
+            'ext': 'flv',
             'title': '[BadComedian] - Разборка в Маниле (Абсолютный обзор)',
-            'thumbnail': 're:^https?://.*\.jpg$',
-            'duration': 2678.31,
+            'thumbnail': 're:^https?://.*\.jpg',
+            # duration reported by videomore is incorrect
+            'duration': int,
         },
+        'add_ie': [VideomoreIE.ie_key()],
     }
 
     def _real_extract(self, url):
@@ -77,6 +81,16 @@ class CarambaTVPageIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
 
+        videomore_url = VideomoreIE._extract_url(webpage)
+        if videomore_url:
+            title = self._og_search_title(webpage)
+            return {
+                '_type': 'url_transparent',
+                'url': videomore_url,
+                'ie_key': VideomoreIE.ie_key(),
+                'title': title,
+            }
+
         video_url = self._og_search_property('video:iframe', webpage, default=None)
 
         if not video_url:
diff --git a/youtube_dl/extractor/cbsinteractive.py b/youtube_dl/extractor/cbsinteractive.py
index 821db20b2..57b18e81d 100644
--- a/youtube_dl/extractor/cbsinteractive.py
+++ b/youtube_dl/extractor/cbsinteractive.py
@@ -63,7 +63,7 @@ class CBSInteractiveIE(ThePlatformIE):
         webpage = self._download_webpage(url, display_id)
 
         data_json = self._html_search_regex(
-            r"data-(?:cnet|zdnet)-video(?:-uvp)?-options='([^']+)'",
+            r"data-(?:cnet|zdnet)-video(?:-uvp(?:js)?)?-options='([^']+)'",
             webpage, 'data json')
         data = self._parse_json(data_json, display_id)
         vdata = data.get('video') or data['videos'][0]
diff --git a/youtube_dl/extractor/chirbit.py b/youtube_dl/extractor/chirbit.py
index 61aed0167..f35df143a 100644
--- a/youtube_dl/extractor/chirbit.py
+++ b/youtube_dl/extractor/chirbit.py
@@ -2,6 +2,7 @@
 from __future__ import unicode_literals
 
 import base64
+import re
 
 from .common import InfoExtractor
 from ..utils import parse_duration
@@ -70,7 +71,6 @@ class ChirbitProfileIE(InfoExtractor):
         'url': 'http://chirbit.com/ScarletBeauty',
         'info_dict': {
             'id': 'ScarletBeauty',
-            'title': 'Chirbits by ScarletBeauty',
         },
         'playlist_mincount': 3,
     }
@@ -78,13 +78,10 @@ class ChirbitProfileIE(InfoExtractor):
     def _real_extract(self, url):
         profile_id = self._match_id(url)
 
-        rss = self._download_xml(
-            'http://chirbit.com/rss/%s' % profile_id, profile_id)
+        webpage = self._download_webpage(url, profile_id)
 
         entries = [
-            self.url_result(audio_url.text, 'Chirbit')
-            for audio_url in rss.findall('./channel/item/link')]
+            self.url_result(self._proto_relative_url('//chirb.it/' + video_id))
+            for _, video_id in re.findall(r'<input[^>]+id=([\'"])copy-btn-(?P<id>[0-9a-zA-Z]+)\1', webpage)]
 
-        title = rss.find('./channel/title').text
-
-        return self.playlist_result(entries, profile_id, title)
+        return self.playlist_result(entries, profile_id)
diff --git a/youtube_dl/extractor/clipfish.py b/youtube_dl/extractor/clipfish.py
index 3a47f6fa4..bb52e0c6f 100644
--- a/youtube_dl/extractor/clipfish.py
+++ b/youtube_dl/extractor/clipfish.py
@@ -1,3 +1,4 @@
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
@@ -10,15 +11,15 @@ from ..utils import (
 class ClipfishIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?clipfish\.de/(?:[^/]+/)+video/(?P<id>[0-9]+)'
     _TEST = {
-        'url': 'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
-        'md5': '79bc922f3e8a9097b3d68a93780fd475',
+        'url': 'http://www.clipfish.de/special/ugly-americans/video/4343170/s01-e01-ugly-americans-date-in-der-hoelle/',
+        'md5': '720563e467b86374c194bdead08d207d',
         'info_dict': {
-            'id': '3966754',
+            'id': '4343170',
             'ext': 'mp4',
-            'title': 'FIFA 14 - E3 2013 Trailer',
-            'description': 'Video zu FIFA 14: E3 2013 Trailer',
-            'upload_date': '20130611',
-            'duration': 82,
+            'title': 'S01 E01 - Ugly Americans - Date in der Hölle',
+            'description': 'Mark Lilly arbeitet im Sozialdienst der Stadt New York und soll Immigranten bei ihrer Einbürgerung in die USA zur Seite stehen.',
+            'upload_date': '20161005',
+            'duration': 1291,
             'view_count': int,
         }
     }
@@ -50,10 +51,14 @@ class ClipfishIE(InfoExtractor):
                 'tbr': int_or_none(video_info.get('bitrate')),
             })
 
+        descr = video_info.get('descr')
+        if descr:
+            descr = descr.strip()
+
         return {
             'id': video_id,
             'title': video_info['title'],
-            'description': video_info.get('descr'),
+            'description': descr,
             'formats': formats,
             'thumbnail': video_info.get('media_content_thumbnail_large') or video_info.get('media_thumbnail'),
             'duration': int_or_none(video_info.get('media_length')),
diff --git a/youtube_dl/extractor/cmt.py b/youtube_dl/extractor/cmt.py
index ac3bdfe8f..7d3e9b0c9 100644
--- a/youtube_dl/extractor/cmt.py
+++ b/youtube_dl/extractor/cmt.py
@@ -26,7 +26,7 @@ class CMTIE(MTVIE):
             'id': '1504699',
             'ext': 'mp4',
             'title': 'Still The King Ep. 109 in 3 Minutes',
-            'description': 'Relive or catch up with Still The King by watching this recap of season 1, episode 9. New episodes Sundays 9/8c.',
+            'description': 'Relive or catch up with Still The King by watching this recap of season 1, episode 9.',
             'timestamp': 1469421000.0,
             'upload_date': '20160725',
         },
@@ -42,3 +42,8 @@ class CMTIE(MTVIE):
                 '%s said: video is not available' % cls.IE_NAME, expected=True)
 
         return super(CMTIE, cls)._transform_rtmp_url(rtmp_video_url)
+
+    def _extract_mgid(self, webpage):
+        return self._search_regex(
+            r'MTVN\.VIDEO\.contentUri\s*=\s*([\'"])(?P<mgid>.+?)\1',
+            webpage, 'mgid', group='mgid')
diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py
index c38fd095a..cc141f68e 100644
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -150,6 +150,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
             # rtmp
             'skip_download': True,
         },
+        'skip': 'Video gone',
     }, {
         'url': 'http://www.crunchyroll.com/rezero-starting-life-in-another-world-/episode-5-the-morning-of-our-promise-is-still-distant-702409',
         'info_dict': {
diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py
index 6ae30679a..c7b107572 100644
--- a/youtube_dl/extractor/orf.py
+++ b/youtube_dl/extractor/orf.py
@@ -1,28 +1,28 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import json
 import re
 import calendar
 import datetime
 
 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
     HEADRequest,
     unified_strdate,
-    ExtractorError,
     strip_jsonp,
     int_or_none,
     float_or_none,
     determine_ext,
     remove_end,
+    unescapeHTML,
 )
 
 
 class ORFTVthekIE(InfoExtractor):
     IE_NAME = 'orf:tvthek'
     IE_DESC = 'ORF TVthek'
-    _VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics?/.+?|program/[^/]+)/(?P<id>\d+)'
+    _VALID_URL = r'https?://tvthek\.orf\.at/(?:[^/]+/)+(?P<id>\d+)'
 
     _TESTS = [{
         'url': 'http://tvthek.orf.at/program/Aufgetischt/2745173/Aufgetischt-Mit-der-Steirischen-Tafelrunde/8891389',
@@ -51,26 +51,23 @@ class ORFTVthekIE(InfoExtractor):
             'skip_download': True,  # rtsp downloads
         },
         '_skip': 'Blocked outside of Austria / Germany',
+    }, {
+        'url': 'http://tvthek.orf.at/topic/Fluechtlingskrise/10463081/Heimat-Fremde-Heimat/13879132/Senioren-betreuen-Migrantenkinder/13879141',
+        'skip_download': True,
+    }, {
+        'url': 'http://tvthek.orf.at/profile/Universum/35429',
+        'skip_download': True,
     }]
 
     def _real_extract(self, url):
         playlist_id = self._match_id(url)
         webpage = self._download_webpage(url, playlist_id)
 
-        data_json = self._search_regex(
-            r'initializeAdworx\((.+?)\);\n', webpage, 'video info')
-        all_data = json.loads(data_json)
-
-        def get_segments(all_data):
-            for data in all_data:
-                if data['name'] in (
-                        'Tracker::EPISODE_DETAIL_PAGE_OVER_PROGRAM',
-                        'Tracker::EPISODE_DETAIL_PAGE_OVER_TOPIC'):
-                    return data['values']['segments']
-
-        sdata = get_segments(all_data)
-        if not sdata:
-            raise ExtractorError('Unable to extract segments')
+        data_jsb = self._parse_json(
+            self._search_regex(
+                r'<div[^>]+class=(["\']).*?VideoPlaylist.*?\1[^>]+data-jsb=(["\'])(?P<json>.+?)\2',
+                webpage, 'playlist', group='json'),
+            playlist_id, transform_source=unescapeHTML)['playlist']['videos']
 
         def quality_to_int(s):
             m = re.search('([0-9]+)', s)
@@ -79,8 +76,11 @@ class ORFTVthekIE(InfoExtractor):
             return int(m.group(1))
 
         entries = []
-        for sd in sdata:
-            video_id = sd['id']
+        for sd in data_jsb:
+            video_id, title = sd.get('id'), sd.get('title')
+            if not video_id or not title:
+                continue
+            video_id = compat_str(video_id)
             formats = [{
                 'preference': -10 if fd['delivery'] == 'hls' else None,
                 'format_id': '%s-%s-%s' % (
@@ -88,7 +88,7 @@ class ORFTVthekIE(InfoExtractor):
                 'url': fd['src'],
                 'protocol': fd['protocol'],
                 'quality': quality_to_int(fd['quality']),
-            } for fd in sd['playlist_item_array']['sources']]
+            } for fd in sd['sources']]
 
             # Check for geoblocking.
             # There is a property is_geoprotection, but that's always false
@@ -115,14 +115,14 @@ class ORFTVthekIE(InfoExtractor):
             self._check_formats(formats, video_id)
             self._sort_formats(formats)
 
-            upload_date = unified_strdate(sd['created_date'])
+            upload_date = unified_strdate(sd.get('created_date'))
             entries.append({
                 '_type': 'video',
                 'id': video_id,
-                'title': sd['header'],
+                'title': title,
                 'formats': formats,
                 'description': sd.get('description'),
-                'duration': int(sd['duration_in_seconds']),
+                'duration': int_or_none(sd.get('duration_in_seconds')),
                 'upload_date': upload_date,
                 'thumbnail': sd.get('image_full_url'),
             })
diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py
index 8b35fd244..c3aec1edd 100644
--- a/youtube_dl/extractor/safari.py
+++ b/youtube_dl/extractor/safari.py
@@ -157,7 +157,14 @@ class SafariCourseIE(SafariBaseIE):
     IE_NAME = 'safari:course'
     IE_DESC = 'safaribooksonline.com online courses'
 
-    _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)/(?P<id>[^/]+)/?(?:[#?]|$)'
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:
+                            (?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)|
+                            techbus\.safaribooksonline\.com
+                        )
+                        /(?P<id>[^/]+)/?(?:[#?]|$)
+                    '''
 
     _TESTS = [{
         'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/',
@@ -170,6 +177,9 @@ class SafariCourseIE(SafariBaseIE):
     }, {
         'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json',
         'only_matching': True,
+    }, {
+        'url': 'http://techbus.safaribooksonline.com/9780134426365',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
diff --git a/youtube_dl/extractor/videomore.py b/youtube_dl/extractor/videomore.py
index 8a11ff848..7f2566586 100644
--- a/youtube_dl/extractor/videomore.py
+++ b/youtube_dl/extractor/videomore.py
@@ -86,6 +86,11 @@ class VideomoreIE(InfoExtractor):
         mobj = re.search(
             r'<object[^>]+data=(["\'])https?://videomore\.ru/player\.swf\?.*config=(?P<url>https?://videomore\.ru/(?:[^/]+/)+\d+\.xml).*\1',
             webpage)
+        if not mobj:
+            mobj = re.search(
+                r'<iframe[^>]+src=([\'"])(?P<url>https?://videomore\.ru/embed/\d+)',
+                webpage)
+
         if mobj:
             return mobj.group('url')