aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl
diff options
context:
space:
mode:
authorYen Chi Hsuan <yan12125@gmail.com>2015-03-31 02:47:18 +0800
committerYen Chi Hsuan <yan12125@gmail.com>2015-03-31 02:47:18 +0800
commita2a4d5fa313d5244d24fa70d5db91971a7583d79 (patch)
tree463c5335f1b10b8f744ea5f4ff8315e3691ae5e7 /youtube_dl
parenta28ccbabc60c81016c851ae46365be377ea83795 (diff)
downloadyoutube-dl-a2a4d5fa313d5244d24fa70d5db91971a7583d79.tar.xz
[Yahoo/NBCSports] Generalize NBC sports info extractor
Diffstat (limited to 'youtube_dl')
-rw-r--r--youtube_dl/extractor/__init__.py1
-rw-r--r--youtube_dl/extractor/nbc.py31
-rw-r--r--youtube_dl/extractor/yahoo.py10
3 files changed, 37 insertions, 5 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 5d0d2a9bc..b113aaec6 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -310,6 +310,7 @@ from .naver import NaverIE
from .nba import NBAIE
from .nbc import (
NBCIE,
+ NBCSportsVPlayerIE,
NBCSportsIE,
NBCNewsIE,
)
diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py
index 033bf71f0..c8dd72ab4 100644
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@@ -50,7 +50,7 @@ class NBCIE(InfoExtractor):
return self.url_result(theplatform_url)
-class NBCSportsIE(InfoExtractor):
+class NBCSportsVPlayerIE(InfoExtractor):
_VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z]+)'
_TEST = {
@@ -64,6 +64,13 @@ class NBCSportsIE(InfoExtractor):
}
}
+ @staticmethod
+ def _extract_url(webpage):
+ iframe_m = re.search(
+ r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage)
+ if iframe_m:
+ return iframe_m.group('url')
+
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
@@ -71,6 +78,28 @@ class NBCSportsIE(InfoExtractor):
return self.url_result(theplatform_url, 'ThePlatform')
+class NBCSportsIE(InfoExtractor):
+ # Does not include https becuase its certificate is invalid
+ _VALID_URL = r'http://www\.nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
+
+ _TEST = {
+ 'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke',
+ 'md5': 'ba6c93f96b67bf05344f78bd523dac0f',
+ 'info_dict': {
+ 'id': 'PHJSaFWbrTY9',
+ 'ext': 'flv',
+ 'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke',
+ 'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ return self.url_result(
+ NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer')
+
+
class NBCNewsIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
(?:video/.+?/(?P<id>\d+)|
diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py
index 6e72f1e55..43776d1e6 100644
--- a/youtube_dl/extractor/yahoo.py
+++ b/youtube_dl/extractor/yahoo.py
@@ -17,6 +17,8 @@ from ..utils import (
int_or_none,
)
+from .nbc import NBCSportsVPlayerIE
+
class YahooIE(InfoExtractor):
IE_DESC = 'Yahoo screen and movies'
@@ -132,6 +134,7 @@ class YahooIE(InfoExtractor):
}, {
'note': 'NBC Sports embeds',
'url': 'http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313',
+ 'md5': 'ceae8dced5c14a1c1ffcb7a32194cca5',
'info_dict': {
'id': '9CsDKds0kvHI',
'ext': 'flv',
@@ -161,10 +164,9 @@ class YahooIE(InfoExtractor):
video_id = items[0]['id']
return self._get_info(video_id, display_id, webpage)
# Look for NBCSports iframes
- iframe_m = re.search(
- r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage)
- if iframe_m:
- return self.url_result(iframe_m.group('url'), 'NBCSports')
+ nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
+ if nbc_sports_url:
+ return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
items_json = self._search_regex(
r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,