[Yahoo/NBCSports] Generalize NBC sports info extractor

author: Yen Chi Hsuan <yan12125@gmail.com> 2015-03-31 02:47:18 +0800
committer: Yen Chi Hsuan <yan12125@gmail.com> 2015-03-31 02:47:18 +0800
commit: a2a4d5fa313d5244d24fa70d5db91971a7583d79 (patch)
tree: 463c5335f1b10b8f744ea5f4ff8315e3691ae5e7 /youtube_dl/extractor/nbc.py
parent: a28ccbabc60c81016c851ae46365be377ea83795 (diff)
1 files changed, 30 insertions, 1 deletions
diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py
index 033bf71f0..c8dd72ab4 100644
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@@ -50,7 +50,7 @@ class NBCIE(InfoExtractor):
         return self.url_result(theplatform_url)
 
 
-class NBCSportsIE(InfoExtractor):
+class NBCSportsVPlayerIE(InfoExtractor):
     _VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z]+)'
 
     _TEST = {
@@ -64,6 +64,13 @@ class NBCSportsIE(InfoExtractor):
         }
     }
 
+    @staticmethod
+    def _extract_url(webpage):
+        iframe_m = re.search(
+            r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage)
+        if iframe_m:
+            return iframe_m.group('url')
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
@@ -71,6 +78,28 @@ class NBCSportsIE(InfoExtractor):
         return self.url_result(theplatform_url, 'ThePlatform')
 
 
+class NBCSportsIE(InfoExtractor):
+    # Does not include https becuase its certificate is invalid
+    _VALID_URL = r'http://www\.nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
+
+    _TEST = {
+        'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke',
+        'md5': 'ba6c93f96b67bf05344f78bd523dac0f',
+        'info_dict': {
+            'id': 'PHJSaFWbrTY9',
+            'ext': 'flv',
+            'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke',
+            'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        return self.url_result(
+            NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer')
+
+
 class NBCNewsIE(InfoExtractor):
     _VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
         (?:video/.+?/(?P<id>\d+)|
author	Yen Chi Hsuan <yan12125@gmail.com>	2015-03-31 02:47:18 +0800
committer	Yen Chi Hsuan <yan12125@gmail.com>	2015-03-31 02:47:18 +0800
commit	a2a4d5fa313d5244d24fa70d5db91971a7583d79 (patch)
tree	463c5335f1b10b8f744ea5f4ff8315e3691ae5e7 /youtube_dl/extractor/nbc.py
parent	a28ccbabc60c81016c851ae46365be377ea83795 (diff)