diff options
Diffstat (limited to 'youtube_dl/extractor/canalplus.py')
| -rw-r--r-- | youtube_dl/extractor/canalplus.py | 102 | 
1 files changed, 64 insertions, 38 deletions
diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index 25b2d4efe..1c3c41d26 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -1,86 +1,112 @@ -# encoding: utf-8 +# coding: utf-8  from __future__ import unicode_literals  import re  from .common import InfoExtractor +from ..compat import compat_urllib_parse_urlparse  from ..utils import ( +    dict_get,      ExtractorError,      HEADRequest, -    unified_strdate, -    url_basename, -    qualities,      int_or_none, +    qualities, +    remove_end, +    unified_strdate,  )  class CanalplusIE(InfoExtractor):      IE_DESC = 'canalplus.fr, piwiplus.fr and d8.tv' -    _VALID_URL = r'https?://(?:www\.(?P<site>canalplus\.fr|piwiplus\.fr|d8\.tv|itele\.fr)/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))' +    _VALID_URL = r'''(?x) +                        https?:// +                            (?: +                                (?: +                                    (?:(?:www|m)\.)?canalplus\.fr| +                                    (?:www\.)?piwiplus\.fr| +                                    (?:www\.)?d8\.tv| +                                    (?:www\.)?c8\.fr| +                                    (?:www\.)?d17\.tv| +                                    (?:www\.)?itele\.fr +                                )/(?:(?:[^/]+/)*(?P<display_id>[^/?#&]+))?(?:\?.*\bvid=(?P<vid>\d+))?| +                                player\.canalplus\.fr/#/(?P<id>\d+) +                            ) + +                    '''      _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s?format=json'      _SITE_ID_MAP = { -        'canalplus.fr': 'cplus', -        'piwiplus.fr': 'teletoon', -        'd8.tv': 'd8', -        'itele.fr': 'itele', +        'canalplus': 'cplus', +        'piwiplus': 'teletoon', +        'd8': 'd8', +        'c8': 'd8', +        'd17': 'd17', +        'itele': 'itele',      }      _TESTS = [{ -        'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1263092', -        'md5': '12164a6f14ff6df8bd628e8ba9b10b78', +        'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1192814',          'info_dict': { -            'id': '1263092', +            'id': '1405510', +            'display_id': 'pid1830-c-zapping',              'ext': 'mp4', -            'title': 'Le Zapping - 13/05/15', -            'description': 'md5:09738c0d06be4b5d06a0940edb0da73f', -            'upload_date': '20150513', +            'title': 'Zapping - 02/07/2016', +            'description': 'Le meilleur de toutes les chaînes, tous les jours', +            'upload_date': '20160702',          },      }, {          'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190',          'info_dict': {              'id': '1108190', -            'ext': 'flv', -            'title': 'Le labyrinthe - Boing super ranger', +            'display_id': 'pid1405-le-labyrinthe-boing-super-ranger', +            'ext': 'mp4', +            'title': 'BOING SUPER RANGER - Ep : Le labyrinthe',              'description': 'md5:4cea7a37153be42c1ba2c1d3064376ff',              'upload_date': '20140724',          },          'skip': 'Only works from France',      }, { -        'url': 'http://www.d8.tv/d8-docs-mags/pid6589-d8-campagne-intime.html', +        'url': 'http://www.c8.fr/c8-divertissement/ms-touche-pas-a-mon-poste/pid6318-videos-integrales.html', +        'md5': '4b47b12b4ee43002626b97fad8fb1de5',          'info_dict': { -            'id': '966289', -            'ext': 'flv', -            'title': 'Campagne intime - Documentaire exceptionnel', -            'description': 'md5:d2643b799fb190846ae09c61e59a859f', -            'upload_date': '20131108', +            'id': '1420213', +            'display_id': 'pid6318-videos-integrales', +            'ext': 'mp4', +            'title': 'TPMP ! Même le matin - Les 35H de Baba - 14/10/2016', +            'description': 'md5:f96736c1b0ffaa96fd5b9e60ad871799', +            'upload_date': '20161014',          }, -        'skip': 'videos get deleted after a while', +        'skip': 'Only works from France',      }, { -        'url': 'http://www.itele.fr/france/video/aubervilliers-un-lycee-en-colere-111559', -        'md5': '38b8f7934def74f0d6f3ba6c036a5f82', +        'url': 'http://www.itele.fr/chroniques/invite-michael-darmon/rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510',          'info_dict': { -            'id': '1213714', +            'id': '1420176', +            'display_id': 'rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510',              'ext': 'mp4', -            'title': 'Aubervilliers : un lycée en colère - Le 11/02/2015 à 06h45', -            'description': 'md5:8216206ec53426ea6321321f3b3c16db', -            'upload_date': '20150211', +            'title': 'L\'invité de Michaël Darmon du 14/10/2016 - ', +            'description': 'Chaque matin du lundi au vendredi, Michaël Darmon reçoit un invité politique à 8h25.', +            'upload_date': '20161014',          }, +    }, { +        'url': 'http://m.canalplus.fr/?vid=1398231', +        'only_matching': True, +    }, { +        'url': 'http://www.d17.tv/emissions/pid8303-lolywood.html?vid=1397061', +        'only_matching': True,      }]      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) -        video_id = mobj.groupdict().get('id') -        site_id = self._SITE_ID_MAP[mobj.group('site') or 'canal'] +        site_id = self._SITE_ID_MAP[compat_urllib_parse_urlparse(url).netloc.rsplit('.', 2)[-2]]          # Beware, some subclasses do not define an id group -        display_id = url_basename(mobj.group('path')) +        display_id = remove_end(dict_get(mobj.groupdict(), ('display_id', 'id', 'vid')), '.html') -        if video_id is None: -            webpage = self._download_webpage(url, display_id) -            video_id = self._search_regex( -                [r'<canal:player[^>]+?videoId=(["\'])(?P<id>\d+)', r'id=["\']canal_video_player(?P<id>\d+)'], -                webpage, 'video id', group='id') +        webpage = self._download_webpage(url, display_id) +        video_id = self._search_regex( +            [r'<canal:player[^>]+?videoId=(["\'])(?P<id>\d+)', +             r'id=["\']canal_video_player(?P<id>\d+)'], +            webpage, 'video id', group='id')          info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id)          video_data = self._download_json(info_url, video_id, 'Downloading video JSON')  | 
