diff options
| author | Sergey M․ <dstftw@gmail.com> | 2018-08-22 01:14:47 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2018-08-22 01:14:47 +0700 | 
| commit | 52007de8ca6fcc2b72cbfb3fe026ea63cda88f25 (patch) | |
| tree | 1f7a5b677355e2353f2c8c52ceab082f1ee21a50 | |
| parent | 28f96cf407cecbac21fe2c7d3417928dae3ab3b5 (diff) | |
[go] Add support for disneynow.go.com (closes #16299, closes #17264)
| -rw-r--r-- | youtube_dl/extractor/go.py | 30 | 
1 files changed, 24 insertions, 6 deletions
| diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py index e781405f2..ec9dd6e3a 100644 --- a/youtube_dl/extractor/go.py +++ b/youtube_dl/extractor/go.py @@ -36,7 +36,8 @@ class GoIE(AdobePassIE):              'requestor_id': 'DisneyXD',          }      } -    _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys()) +    _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))'\ +                 % '|'.join(list(_SITE_INFO.keys()) + ['disneynow'])      _TESTS = [{          'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643',          'info_dict': { @@ -62,6 +63,14 @@ class GoIE(AdobePassIE):      }, {          'url': 'http://abc.go.com/shows/world-news-tonight/episode-guide/2017-02/17-021717-intense-stand-off-between-man-with-rifle-and-police-in-oakland',          'only_matching': True, +    }, { +        # brand 004 +        'url': 'http://disneynow.go.com/shows/big-hero-6-the-series/season-01/episode-10-mr-sparkles-loses-his-sparkle/vdka4637915', +        'only_matching': True, +    }, { +        # brand 008 +        'url': 'http://disneynow.go.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013', +        'only_matching': True,      }]      def _extract_videos(self, brand, video_id='-1', show_id='-1'): @@ -72,14 +81,23 @@ class GoIE(AdobePassIE):      def _real_extract(self, url):          sub_domain, video_id, display_id = re.match(self._VALID_URL, url).groups() -        site_info = self._SITE_INFO[sub_domain] -        brand = site_info['brand'] -        if not video_id: -            webpage = self._download_webpage(url, display_id) +        site_info = self._SITE_INFO.get(sub_domain, {}) +        brand = site_info.get('brand') +        if not video_id or not site_info: +            webpage = self._download_webpage(url, display_id or video_id)              video_id = self._search_regex(                  # There may be inner quotes, e.g. data-video-id="'VDKA3609139'"                  # from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood -                r'data-video-id=["\']*(VDKA\w+)', webpage, 'video id', default=None) +                r'data-video-id=["\']*(VDKA\w+)', webpage, 'video id', +                default=None) +            if not site_info: +                brand = self._search_regex( +                    (r'data-brand=\s*["\']\s*(\d+)', +                     r'data-page-brand=\s*["\']\s*(\d+)'), webpage, 'brand', +                    default='004') +                site_info = next( +                    si for _, si in self._SITE_INFO.items() +                    if si.get('brand') == brand)              if not video_id:                  # show extraction works for Disney, DisneyJunior and DisneyXD                  # ABC and Freeform has different layout | 
