diff options
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/nbc.py | 31 | ||||
| -rw-r--r-- | youtube_dl/extractor/yahoo.py | 10 | 
3 files changed, 37 insertions, 5 deletions
| diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 5d0d2a9bc..b113aaec6 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -310,6 +310,7 @@ from .naver import NaverIE  from .nba import NBAIE  from .nbc import (      NBCIE, +    NBCSportsVPlayerIE,      NBCSportsIE,      NBCNewsIE,  ) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 033bf71f0..c8dd72ab4 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -50,7 +50,7 @@ class NBCIE(InfoExtractor):          return self.url_result(theplatform_url) -class NBCSportsIE(InfoExtractor): +class NBCSportsVPlayerIE(InfoExtractor):      _VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z]+)'      _TEST = { @@ -64,6 +64,13 @@ class NBCSportsIE(InfoExtractor):          }      } +    @staticmethod +    def _extract_url(webpage): +        iframe_m = re.search( +            r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage) +        if iframe_m: +            return iframe_m.group('url') +      def _real_extract(self, url):          video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id) @@ -71,6 +78,28 @@ class NBCSportsIE(InfoExtractor):          return self.url_result(theplatform_url, 'ThePlatform') +class NBCSportsIE(InfoExtractor): +    # Does not include https becuase its certificate is invalid +    _VALID_URL = r'http://www\.nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)' + +    _TEST = { +        'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke', +        'md5': 'ba6c93f96b67bf05344f78bd523dac0f', +        'info_dict': { +            'id': 'PHJSaFWbrTY9', +            'ext': 'flv', +            'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke', +            'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113', +        } +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        webpage = self._download_webpage(url, video_id) +        return self.url_result( +            NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer') + +  class NBCNewsIE(InfoExtractor):      _VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/          (?:video/.+?/(?P<id>\d+)| diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 6e72f1e55..43776d1e6 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -17,6 +17,8 @@ from ..utils import (      int_or_none,  ) +from .nbc import NBCSportsVPlayerIE +  class YahooIE(InfoExtractor):      IE_DESC = 'Yahoo screen and movies' @@ -132,6 +134,7 @@ class YahooIE(InfoExtractor):          }, {              'note': 'NBC Sports embeds',              'url': 'http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313', +            'md5': 'ceae8dced5c14a1c1ffcb7a32194cca5',              'info_dict': {                  'id': '9CsDKds0kvHI',                  'ext': 'flv', @@ -161,10 +164,9 @@ class YahooIE(InfoExtractor):                  video_id = items[0]['id']                  return self._get_info(video_id, display_id, webpage)          # Look for NBCSports iframes -        iframe_m = re.search( -            r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage) -        if iframe_m: -            return self.url_result(iframe_m.group('url'), 'NBCSports') +        nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage) +        if nbc_sports_url: +            return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')          items_json = self._search_regex(              r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE, | 
