diff options
author | Yen Chi Hsuan <yan12125@gmail.com> | 2016-04-24 17:01:18 +0800 |
---|---|---|
committer | Yen Chi Hsuan <yan12125@gmail.com> | 2016-04-24 17:01:18 +0800 |
commit | 2a7c38831cc8f789cdf4ee63f8d4450a46f45017 (patch) | |
tree | e95a61be39940cbfdb880565b27cd72016817548 | |
parent | 949b6497cc2be1f21cd439c6fc6e4047eddb0d66 (diff) |
[yahoo] Extend _VALID_URL and fix extraction
Closes #9271
-rw-r--r-- | youtube_dl/extractor/yahoo.py | 15 |
1 files changed, 13 insertions, 2 deletions
diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index b2d8f4b48..e2613659c 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -24,7 +24,7 @@ from .nbc import NBCSportsVPlayerIE class YahooIE(InfoExtractor): IE_DESC = 'Yahoo screen and movies' - _VALID_URL = r'(?P<url>(?P<host>https?://(?:[a-zA-Z]{2}\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?P<display_id>.+)?-(?P<id>[0-9]+)(?:-[a-z]+)?\.html)' + _VALID_URL = r'(?P<url>(?P<host>https?://(?:[a-zA-Z]{2}\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?P<display_id>.+)?-(?P<id>[0-9]+)(?:-[a-z]+)?(?:\.html)?)' _TESTS = [ { 'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', @@ -166,6 +166,17 @@ class YahooIE(InfoExtractor): 'description': 'While they play feuding fathers in \'Daddy\'s Home,\' star Will Ferrell & Mark Wahlberg share their true feelings on parenthood.', }, }, + { + # config['models']['applet_model']['data']['sapi'] has no query + 'url': 'https://www.yahoo.com/music/livenation/event/galactic-2016', + 'md5': 'dac0c72d502bc5facda80c9e6d5c98db', + 'info_dict': { + 'id': 'a6015640-e9e5-3efb-bb60-05589a183919', + 'ext': 'mp4', + 'description': 'Galactic', + 'title': 'Dolla Diva (feat. Maggie Koerner)', + }, + }, ] def _real_extract(self, url): @@ -202,7 +213,7 @@ class YahooIE(InfoExtractor): config = self._parse_json(config_json, display_id, fatal=False) if config: sapi = config.get('models', {}).get('applet_model', {}).get('data', {}).get('sapi') - if sapi: + if sapi and 'query' in sapi: return self._extract_info(display_id, sapi, webpage) items_json = self._search_regex( |