diff options
| -rw-r--r-- | youtube_dl/extractor/extractors.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/generic.py | 6 | ||||
| -rw-r--r-- | youtube_dl/extractor/vice.py | 272 | ||||
| -rw-r--r-- | youtube_dl/extractor/viceland.py | 38 | 
4 files changed, 175 insertions, 142 deletions
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 46ca7bec3..a0779e164 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1210,7 +1210,6 @@ from .vice import (      ViceArticleIE,      ViceShowIE,  ) -from .viceland import VicelandIE  from .vidbit import VidbitIE  from .viddler import ViddlerIE  from .videa import VideaIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index bc350bcf7..a98f3636a 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -103,6 +103,7 @@ from .vshare import VShareIE  from .mediasite import MediasiteIE  from .springboardplatform import SpringboardPlatformIE  from .yapfiles import YapFilesIE +from .vice import ViceIE  class GenericIE(InfoExtractor): @@ -2965,6 +2966,11 @@ class GenericIE(InfoExtractor):              return self.playlist_from_matches(                  yapfiles_urls, video_id, video_title, ie=YapFilesIE.ie_key()) +        vice_urls = ViceIE._extract_urls(webpage) +        if vice_urls: +            return self.playlist_from_matches( +                vice_urls, video_id, video_title, ie=ViceIE.ie_key()) +          def merge_dicts(dict1, dict2):              merged = {}              for k, v in dict1.items(): diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py index bcc28693a..538258617 100644 --- a/youtube_dl/extractor/vice.py +++ b/youtube_dl/extractor/vice.py @@ -5,56 +5,169 @@ import re  import time  import hashlib  import json +import random  from .adobepass import AdobePassIE  from .youtube import YoutubeIE  from .common import InfoExtractor -from ..compat import compat_HTTPError +from ..compat import ( +    compat_HTTPError, +    compat_str, +)  from ..utils import ( +    ExtractorError,      int_or_none,      parse_age_limit,      str_or_none, -    parse_duration, -    ExtractorError, -    extract_attributes, +    try_get,  ) -class ViceBaseIE(AdobePassIE): -    def _extract_preplay_video(self, url, locale, webpage): -        watch_hub_data = extract_attributes(self._search_regex( -            r'(?s)(<watch-hub\s*.+?</watch-hub>)', webpage, 'watch hub')) -        video_id = watch_hub_data['vms-id'] -        title = watch_hub_data['video-title'] +class ViceIE(AdobePassIE): +    IE_NAME = 'vice' +    _VALID_URL = r'https?://(?:(?:video|vms)\.vice|(?:www\.)?viceland)\.com/(?P<locale>[^/]+)/(?:video/[^/]+|embed)/(?P<id>[\da-f]+)' +    _TESTS = [{ +        'url': 'https://video.vice.com/en_us/video/pet-cremator/58c69e38a55424f1227dc3f7', +        'info_dict': { +            'id': '5e647f0125e145c9aef2069412c0cbde', +            'ext': 'mp4', +            'title': '10 Questions You Always Wanted To Ask: Pet Cremator', +            'description': 'md5:fe856caacf61fe0e74fab15ce2b07ca5', +            'uploader': 'vice', +            'uploader_id': '57a204088cb727dec794c67b', +            'timestamp': 1489664942, +            'upload_date': '20170316', +            'age_limit': 14, +        }, +        'params': { +            # m3u8 download +            'skip_download': True, +        }, +        'add_ie': ['UplynkPreplay'], +    }, { +        # geo restricted to US +        'url': 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56', +        'info_dict': { +            'id': '930c0ad1f47141cc955087eecaddb0e2', +            'ext': 'mp4', +            'uploader': 'waypoint', +            'title': 'The Signal From Tölva', +            'description': 'md5:3927e3c79f9e8094606a2b3c5b5e55d5', +            'uploader_id': '57f7d621e05ca860fa9ccaf9', +            'timestamp': 1477941983, +            'upload_date': '20161031', +        }, +        'params': { +            # m3u8 download +            'skip_download': True, +        }, +        'add_ie': ['UplynkPreplay'], +    }, { +        'url': 'https://video.vice.com/alps/video/ulfs-wien-beruchtigste-grafitti-crew-part-1/581b12b60a0e1f4c0fb6ea2f', +        'info_dict': { +            'id': '581b12b60a0e1f4c0fb6ea2f', +            'ext': 'mp4', +            'title': 'ULFs - Wien berüchtigste Grafitti Crew - Part 1', +            'description': '<p>Zwischen Hinterzimmer-Tattoos und U-Bahnschächten erzählen uns die Ulfs, wie es ist, "süchtig nach Sachbeschädigung" zu sein.</p>', +            'uploader': 'VICE', +            'uploader_id': '57a204088cb727dec794c67b', +            'timestamp': 1485368119, +            'upload_date': '20170125', +            'age_limit': 14, +        }, +        'params': { +            # AES-encrypted m3u8 +            'skip_download': True, +            'proxy': '127.0.0.1:8118', +        }, +        'add_ie': ['UplynkPreplay'], +    }, { +        'url': 'https://video.vice.com/en_us/video/pizza-show-trailer/56d8c9a54d286ed92f7f30e4', +        'only_matching': True, +    }, { +        'url': 'https://video.vice.com/en_us/embed/57f41d3556a0a80f54726060', +        'only_matching': True, +    }, { +        'url': 'https://vms.vice.com/en_us/video/preplay/58c69e38a55424f1227dc3f7', +        'only_matching': True, +    }, { +        'url': 'https://www.viceland.com/en_us/video/thursday-march-1-2018/5a8f2d7ff1cdb332dd446ec1', +        'only_matching': True, +    }] +    _PREPLAY_HOST = 'vms.vice' + +    @staticmethod +    def _extract_urls(webpage): +        return re.findall( +            r'<iframe\b[^>]+\bsrc=["\']((?:https?:)?//video\.vice\.com/[^/]+/embed/[\da-f]+)', +            webpage) + +    @staticmethod +    def _extract_url(webpage): +        urls = ViceIE._extract_urls(webpage) +        return urls[0] if urls else None + +    def _real_extract(self, url): +        locale, video_id = re.match(self._VALID_URL, url).groups() + +        webpage = self._download_webpage( +            'https://video.vice.com/%s/embed/%s' % (locale, video_id), +            video_id) + +        video = self._parse_json( +            self._search_regex( +                r'PREFETCH_DATA\s*=\s*({.+?})\s*;\s*\n', webpage, +                'app state'), video_id)['video'] +        video_id = video.get('vms_id') or video.get('id') or video_id +        title = video['title'] +        is_locked = video.get('locked') +        rating = video.get('rating') +        thumbnail = video.get('thumbnail_url') +        duration = int_or_none(video.get('duration')) +        series = try_get( +            video, lambda x: x['episode']['season']['show']['title'], +            compat_str) +        episode_number = try_get( +            video, lambda x: x['episode']['episode_number']) +        season_number = try_get( +            video, lambda x: x['episode']['season']['season_number']) +        uploader = None          query = {} -        is_locked = watch_hub_data.get('video-locked') == '1'          if is_locked:              resource = self._get_mvpd_resource( -                'VICELAND', title, video_id, -                watch_hub_data.get('video-rating')) +                'VICELAND', title, video_id, rating)              query['tvetoken'] = self._extract_mvpd_auth(                  url, video_id, 'VICELAND', resource)          # signature generation algorithm is reverse engineered from signatureGenerator in          # webpack:///../shared/~/vice-player/dist/js/vice-player.js in          # https://www.viceland.com/assets/common/js/web.vendor.bundle.js -        exp = int(time.time()) + 14400 +        # new JS is located here https://vice-web-statics-cdn.vice.com/vice-player/player-embed.js +        exp = int(time.time()) + 1440 +          query.update({              'exp': exp,              'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(), +            '_ad_blocked': None, +            '_ad_unit': '', +            '_debug': '', +            'platform': 'desktop', +            'rn': random.randint(10000, 100000), +            'fbprebidtoken': '',          })          try:              host = 'www.viceland' if is_locked else self._PREPLAY_HOST              preplay = self._download_json( -                'https://%s.com/%s/preplay/%s' % (host, locale, video_id), +                'https://%s.com/%s/video/preplay/%s' % (host, locale, video_id),                  video_id, query=query)          except ExtractorError as e: -            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: +            if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401):                  error = json.loads(e.cause.read().decode()) +                error_message = error.get('error_description') or error['details']                  raise ExtractorError('%s said: %s' % ( -                    self.IE_NAME, error['details']), expected=True) +                    self.IE_NAME, error_message), expected=True)              raise          video_data = preplay['video'] @@ -76,92 +189,22 @@ class ViceBaseIE(AdobePassIE):              'id': video_id,              'title': title,              'description': base.get('body') or base.get('display_body'), -            'thumbnail': watch_hub_data.get('cover-image') or watch_hub_data.get('thumbnail'), -            'duration': int_or_none(video_data.get('video_duration')) or parse_duration(watch_hub_data.get('video-duration')), +            'thumbnail': thumbnail, +            'duration': int_or_none(video_data.get('video_duration')) or duration,              'timestamp': int_or_none(video_data.get('created_at'), 1000),              'age_limit': parse_age_limit(video_data.get('video_rating')), -            'series': video_data.get('show_title') or watch_hub_data.get('show-title'), -            'episode_number': int_or_none(episode.get('episode_number') or watch_hub_data.get('episode')), +            'series': video_data.get('show_title') or series, +            'episode_number': int_or_none(episode.get('episode_number') or episode_number),              'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')), -            'season_number': int_or_none(watch_hub_data.get('season')), +            'season_number': int_or_none(season_number),              'season_id': str_or_none(episode.get('season_id')), -            'uploader': channel.get('base', {}).get('title') or watch_hub_data.get('channel-title'), +            'uploader': channel.get('base', {}).get('title') or channel.get('name') or uploader,              'uploader_id': str_or_none(channel.get('id')),              'subtitles': subtitles,              'ie_key': 'UplynkPreplay',          } -class ViceIE(ViceBaseIE): -    IE_NAME = 'vice' -    _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:(?P<locale>[^/]+)/)?videos?/(?P<id>[^/?#&]+)' - -    _TESTS = [{ -        'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab', -        'md5': '7d3ae2f9ba5f196cdd9f9efd43657ac2', -        'info_dict': { -            'id': 'N2bzkydjraWDGwnt8jAttCF6Y0PDv4Zj', -            'ext': 'flv', -            'title': 'Monkey Labs of Holland', -            'description': 'md5:92b3c7dcbfe477f772dd4afa496c9149', -        }, -        'add_ie': ['Ooyala'], -    }, { -        'url': 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56', -        'info_dict': { -            'id': '5816510690b70e6c5fd39a56', -            'ext': 'mp4', -            'uploader': 'Waypoint', -            'title': 'The Signal From Tölva', -            'description': 'md5:3927e3c79f9e8094606a2b3c5b5e55d5', -            'uploader_id': '57f7d621e05ca860fa9ccaf9', -            'timestamp': 1477941983, -            'upload_date': '20161031', -        }, -        'params': { -            # m3u8 download -            'skip_download': True, -        }, -        'add_ie': ['UplynkPreplay'], -    }, { -        'url': 'https://video.vice.com/alps/video/ulfs-wien-beruchtigste-grafitti-crew-part-1/581b12b60a0e1f4c0fb6ea2f', -        'info_dict': { -            'id': '581b12b60a0e1f4c0fb6ea2f', -            'ext': 'mp4', -            'title': 'ULFs - Wien berüchtigste Grafitti Crew - Part 1', -            'description': '<p>Zwischen Hinterzimmer-Tattoos und U-Bahnschächten erzählen uns die Ulfs, wie es ist, "süchtig nach Sachbeschädigung" zu sein.</p>', -            'uploader': 'VICE', -            'uploader_id': '57a204088cb727dec794c67b', -            'timestamp': 1485368119, -            'upload_date': '20170125', -            'age_limit': 14, -        }, -        'params': { -            # AES-encrypted m3u8 -            'skip_download': True, -        }, -        'add_ie': ['UplynkPreplay'], -    }, { -        'url': 'https://video.vice.com/en_us/video/pizza-show-trailer/56d8c9a54d286ed92f7f30e4', -        'only_matching': True, -    }] -    _PREPLAY_HOST = 'video.vice' - -    def _real_extract(self, url): -        locale, video_id = re.match(self._VALID_URL, url).groups() -        webpage, urlh = self._download_webpage_handle(url, video_id) -        embed_code = self._search_regex( -            r'embedCode=([^&\'"]+)', webpage, -            'ooyala embed code', default=None) -        if embed_code: -            return self.url_result('ooyala:%s' % embed_code, 'Ooyala') -        youtube_id = self._search_regex( -            r'data-youtube-id="([^"]+)"', webpage, 'youtube id', default=None) -        if youtube_id: -            return self.url_result(youtube_id, 'Youtube') -        return self._extract_preplay_video(urlh.geturl(), locale, webpage) - -  class ViceShowIE(InfoExtractor):      IE_NAME = 'vice:show'      _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?show/(?P<id>[^/?#&]+)' @@ -203,14 +246,15 @@ class ViceArticleIE(InfoExtractor):      _TESTS = [{          'url': 'https://www.vice.com/en_us/article/on-set-with-the-woman-making-mormon-porn-in-utah',          'info_dict': { -            'id': '58dc0a3dee202d2a0ccfcbd8', +            'id': '41eae2a47b174a1398357cec55f1f6fc',              'ext': 'mp4',              'title': 'Mormon War on Porn ', -            'description': 'md5:ad396a2481e7f8afb5ed486878421090', -            'uploader': 'VICE', -            'uploader_id': '57a204088cb727dec794c693', -            'timestamp': 1489160690, -            'upload_date': '20170310', +            'description': 'md5:6394a8398506581d0346b9ab89093fef', +            'uploader': 'vice', +            'uploader_id': '57a204088cb727dec794c67b', +            'timestamp': 1491883129, +            'upload_date': '20170411', +            'age_limit': 17,          },          'params': {              # AES-encrypted m3u8 @@ -219,18 +263,36 @@ class ViceArticleIE(InfoExtractor):          'add_ie': ['UplynkPreplay'],      }, {          'url': 'https://www.vice.com/en_us/article/how-to-hack-a-car', -        'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2', +        'md5': '7fe8ebc4fa3323efafc127b82bd821d9',          'info_dict': {              'id': '3jstaBeXgAs',              'ext': 'mp4',              'title': 'How to Hack a Car: Phreaked Out (Episode 2)',              'description': 'md5:ee95453f7ff495db8efe14ae8bf56f30', -            'uploader_id': 'MotherboardTV',              'uploader': 'Motherboard', +            'uploader_id': 'MotherboardTV',              'upload_date': '20140529',          },          'add_ie': ['Youtube'],      }, { +        'url': 'https://www.vice.com/en_us/article/znm9dx/karley-sciortino-slutever-reloaded', +        'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2', +        'info_dict': { +            'id': 'e2ed435eb67e43efb66e6ef9a6930a88', +            'ext': 'mp4', +            'title': "Making The World's First Male Sex Doll", +            'description': 'md5:916078ef0e032d76343116208b6cc2c4', +            'uploader': 'vice', +            'uploader_id': '57a204088cb727dec794c67b', +            'timestamp': 1476919911, +            'upload_date': '20161019', +            'age_limit': 17, +        }, +        'params': { +            'skip_download': True, +        }, +        'add_ie': [ViceIE.ie_key()], +    }, {          'url': 'https://www.vice.com/en_us/article/cowboy-capitalists-part-1',          'only_matching': True,      }, { @@ -244,8 +306,8 @@ class ViceArticleIE(InfoExtractor):          webpage = self._download_webpage(url, display_id)          prefetch_data = self._parse_json(self._search_regex( -            r'window\.__PREFETCH_DATA\s*=\s*({.*});', -            webpage, 'prefetch data'), display_id) +            r'__APP_STATE\s*=\s*({.+?})(?:\s*\|\|\s*{}\s*)?;\s*\n', +            webpage, 'app state'), display_id)['pageData']          body = prefetch_data['body']          def _url_res(video_url, ie_key): @@ -256,6 +318,10 @@ class ViceArticleIE(InfoExtractor):                  'ie_key': ie_key,              } +        vice_url = ViceIE._extract_url(webpage) +        if vice_url: +            return _url_res(vice_url, ViceIE.ie_key()) +          embed_code = self._search_regex(              r'embedCode=([^&\'"]+)', body,              'ooyala embed code', default=None) diff --git a/youtube_dl/extractor/viceland.py b/youtube_dl/extractor/viceland.py deleted file mode 100644 index bd60235c8..000000000 --- a/youtube_dl/extractor/viceland.py +++ /dev/null @@ -1,38 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .vice import ViceBaseIE - - -class VicelandIE(ViceBaseIE): -    _VALID_URL = r'https?://(?:www\.)?viceland\.com/(?P<locale>[^/]+)/video/[^/]+/(?P<id>[a-f0-9]+)' -    _TEST = { -        'url': 'https://www.viceland.com/en_us/video/trapped/588a70d0dba8a16007de7316', -        'info_dict': { -            'id': '588a70d0dba8a16007de7316', -            'ext': 'mp4', -            'title': 'TRAPPED (Series Trailer)', -            'description': 'md5:7a8e95c2b6cd86461502a2845e581ccf', -            'age_limit': 14, -            'timestamp': 1485474122, -            'upload_date': '20170126', -            'uploader_id': '57a204098cb727dec794c6a3', -            'uploader': 'Viceland', -        }, -        'params': { -            # m3u8 download -            'skip_download': True, -        }, -        'add_ie': ['UplynkPreplay'], -        'skip': '404', -    } -    _PREPLAY_HOST = 'www.viceland' - -    def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') -        locale = mobj.group('locale') -        webpage = self._download_webpage(url, video_id) -        return self._extract_preplay_video(url, locale, webpage)  | 
