diff options
author | Yen Chi Hsuan <yan12125@gmail.com> | 2015-03-31 02:47:18 +0800 |
---|---|---|
committer | Yen Chi Hsuan <yan12125@gmail.com> | 2015-03-31 02:47:18 +0800 |
commit | a2a4d5fa313d5244d24fa70d5db91971a7583d79 (patch) | |
tree | 463c5335f1b10b8f744ea5f4ff8315e3691ae5e7 /youtube_dl/extractor/nbc.py | |
parent | a28ccbabc60c81016c851ae46365be377ea83795 (diff) |
[Yahoo/NBCSports] Generalize NBC sports info extractor
Diffstat (limited to 'youtube_dl/extractor/nbc.py')
-rw-r--r-- | youtube_dl/extractor/nbc.py | 31 |
1 files changed, 30 insertions, 1 deletions
diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 033bf71f0..c8dd72ab4 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -50,7 +50,7 @@ class NBCIE(InfoExtractor): return self.url_result(theplatform_url) -class NBCSportsIE(InfoExtractor): +class NBCSportsVPlayerIE(InfoExtractor): _VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z]+)' _TEST = { @@ -64,6 +64,13 @@ class NBCSportsIE(InfoExtractor): } } + @staticmethod + def _extract_url(webpage): + iframe_m = re.search( + r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage) + if iframe_m: + return iframe_m.group('url') + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) @@ -71,6 +78,28 @@ class NBCSportsIE(InfoExtractor): return self.url_result(theplatform_url, 'ThePlatform') +class NBCSportsIE(InfoExtractor): + # Does not include https becuase its certificate is invalid + _VALID_URL = r'http://www\.nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)' + + _TEST = { + 'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke', + 'md5': 'ba6c93f96b67bf05344f78bd523dac0f', + 'info_dict': { + 'id': 'PHJSaFWbrTY9', + 'ext': 'flv', + 'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke', + 'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + return self.url_result( + NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer') + + class NBCNewsIE(InfoExtractor): _VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/ (?:video/.+?/(?P<id>\d+)| |