diff options
| author | Remita Amine <remitamine@gmail.com> | 2017-02-13 11:43:20 +0100 | 
|---|---|---|
| committer | Remita Amine <remitamine@gmail.com> | 2017-02-13 11:43:20 +0100 | 
| commit | 9dad94185367cdfde0de21cd8e595094cbe31acc (patch) | |
| tree | 43d18d168bf8b3eac7a90ff2af19fd7c35abaf4e | |
| parent | 0dac7cbb092c804f1548c4a60f15ac29a7db06b9 (diff) | |
[disney] improve extraction
- add support for more urls
- detect expired videos
- skip Adobe Flash Access protected videos
closes #4975
closes #11000
closes #11882
closes #11936
| -rw-r--r-- | youtube_dl/extractor/disney.py | 60 | ||||
| -rw-r--r-- | youtube_dl/extractor/generic.py | 13 | 
2 files changed, 52 insertions, 21 deletions
| diff --git a/youtube_dl/extractor/disney.py b/youtube_dl/extractor/disney.py index 396873c6d..939d1338c 100644 --- a/youtube_dl/extractor/disney.py +++ b/youtube_dl/extractor/disney.py @@ -9,13 +9,15 @@ from ..utils import (      unified_strdate,      compat_str,      determine_ext, +    ExtractorError,  )  class DisneyIE(InfoExtractor):      _VALID_URL = r'''(?x) -        https?://(?P<domain>(?:[^/]+\.)?(?:disney\.[a-z]{2,3}(?:\.[a-z]{2})?|disney(?:(?:me|latino)\.com|turkiye\.com\.tr)|starwars\.com))/(?:embed/|(?:[^/]+/)+[\w-]+-)(?P<id>[a-z0-9]{24})''' +        https?://(?P<domain>(?:[^/]+\.)?(?:disney\.[a-z]{2,3}(?:\.[a-z]{2})?|disney(?:(?:me|latino)\.com|turkiye\.com\.tr)|(?:starwars|marvelkids)\.com))/(?:(?:embed/|(?:[^/]+/)+[\w-]+-)(?P<id>[a-z0-9]{24})|(?:[^/]+/)?(?P<display_id>[^/?#]+))'''      _TESTS = [{ +        # Disney.EmbedVideo          'url': 'http://video.disney.com/watch/moana-trailer-545ed1857afee5a0ec239977',          'info_dict': {              'id': '545ed1857afee5a0ec239977', @@ -29,6 +31,20 @@ class DisneyIE(InfoExtractor):              'skip_download': True,          }      }, { +        # Grill.burger +        'url': 'http://www.starwars.com/video/rogue-one-a-star-wars-story-intro-featurette', +        'info_dict': { +            'id': '5454e9f4e9804a552e3524c8', +            'ext': 'mp4', +            'title': '"Intro" Featurette: Rogue One: A Star Wars Story', +            'upload_date': '20170104', +            'description': 'Go behind-the-scenes of Rogue One: A Star Wars Story in this featurette with Director Gareth Edwards and the cast of the film.', +        }, +        'params': { +            # m3u8 download +            'skip_download': True, +        } +    }, {          'url': 'http://videos.disneylatino.com/ver/spider-man-de-regreso-a-casa-primer-adelanto-543a33a1850bdcfcca13bae2',          'only_matching': True,      }, { @@ -43,31 +59,55 @@ class DisneyIE(InfoExtractor):      }, {          'url': 'http://www.starwars.com/embed/54690d1e6c42e5f09a0fb097',          'only_matching': True, +    }, { +        'url': 'http://spiderman.marvelkids.com/embed/522900d2ced3c565e4cc0677', +        'only_matching': True, +    }, { +        'url': 'http://spiderman.marvelkids.com/videos/contest-of-champions-part-four-clip-1', +        'only_matching': True, +    }, { +        'url': 'http://disneyjunior.en.disneyme.com/dj/watch-my-friends-tigger-and-pooh-promo', +        'only_matching': True, +    }, { +        'url': 'http://disneyjunior.disney.com/galactech-the-galactech-grab-galactech-an-admiral-rescue', +        'only_matching': True,      }]      def _real_extract(self, url): -        domain, video_id = re.match(self._VALID_URL, url).groups() -        webpage = self._download_webpage( -            'http://%s/embed/%s' % (domain, video_id), video_id) -        video_data = self._parse_json(self._search_regex( -            r'Disney\.EmbedVideo=({.+});', webpage, 'embed data'), video_id)['video'] +        domain, video_id, display_id = re.match(self._VALID_URL, url).groups() +        if not video_id: +            webpage = self._download_webpage(url, display_id) +            grill = re.sub(r'"\s*\+\s*"', '', self._search_regex( +                r'Grill\.burger\s*=\s*({.+})\s*:', +                webpage, 'grill data')) +            page_data = next(s for s in self._parse_json(grill, display_id)['stack'] if s.get('type') == 'video') +            video_data = page_data['data'][0] +        else: +            webpage = self._download_webpage( +                'http://%s/embed/%s' % (domain, video_id), video_id) +            page_data = self._parse_json(self._search_regex( +                r'Disney\.EmbedVideo\s*=\s*({.+});', +                webpage, 'embed data'), video_id) +            video_data = page_data['video']          for external in video_data.get('externals', []):              if external.get('source') == 'vevo':                  return self.url_result('vevo:' + external['data_id'], 'Vevo') +        video_id = video_data['id']          title = video_data['title']          formats = []          for flavor in video_data.get('flavors', []):              flavor_format = flavor.get('format')              flavor_url = flavor.get('url') -            if not flavor_url or not re.match(r'https?://', flavor_url): +            if not flavor_url or not re.match(r'https?://', flavor_url) or flavor_format == 'mp4_access':                  continue              tbr = int_or_none(flavor.get('bitrate'))              if tbr == 99999:                  formats.extend(self._extract_m3u8_formats( -                    flavor_url, video_id, 'mp4', m3u8_id=flavor_format, fatal=False)) +                    flavor_url, video_id, 'mp4', +                    m3u8_id=flavor_format, fatal=False))                  continue              format_id = []              if flavor_format: @@ -88,6 +128,10 @@ class DisneyIE(InfoExtractor):                  'ext': ext,                  'vcodec': 'none' if (width == 0 and height == 0) else None,              }) +        if not formats and video_data.get('expired'): +            raise ExtractorError( +                '%s said: %s' % (self.IE_NAME, page_data['translations']['video_expired']), +                expected=True)          self._sort_formats(formats)          subtitles = {} diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 1c233f038..494cc3c84 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -991,19 +991,6 @@ class GenericIE(InfoExtractor):                  'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',              },          }, -        # Kaltura embed protected with referrer -        { -            'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero', -            'info_dict': { -                'id': '1_g4fbemnq', -                'ext': 'mp4', -                'title': 'Violetta - Achter De Schermen - Ruggero', -                'description': 'Achter de schermen met Ruggero', -                'timestamp': 1435133761, -                'upload_date': '20150624', -                'uploader_id': 'echojecka', -            }, -        },          # Kaltura embed with single quotes          {              'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY', | 
