diff options
Diffstat (limited to 'youtube_dl/extractor/nbc.py')
| -rw-r--r-- | youtube_dl/extractor/nbc.py | 73 | 
1 files changed, 55 insertions, 18 deletions
| diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index e75ab7c39..f840f6532 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -4,32 +4,48 @@ import re  import json  from .common import InfoExtractor -from ..utils import ( +from ..compat import (      compat_str, +    compat_HTTPError, +) +from ..utils import (      ExtractorError,      find_xpath_attr,  )  class NBCIE(InfoExtractor): -    _VALID_URL = r'http://www\.nbc\.com/[^/]+/video/[^/]+/(?P<id>n?\d+)' - -    _TEST = { -        'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188', -        # md5 checksum is not stable -        'info_dict': { -            'id': 'bTmnLCvIbaaH', -            'ext': 'flv', -            'title': 'I Am a Firefighter', -            'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.', +    _VALID_URL = r'http://www\.nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)' + +    _TESTS = [ +        { +            'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188', +            # md5 checksum is not stable +            'info_dict': { +                'id': 'bTmnLCvIbaaH', +                'ext': 'flv', +                'title': 'I Am a Firefighter', +                'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.', +            }, +        }, +        { +            'url': 'http://www.nbc.com/the-tonight-show/episodes/176', +            'info_dict': { +                'id': 'XwU9KZkp98TH', +                'ext': 'flv', +                'title': 'Ricky Gervais, Steven Van Zandt, ILoveMakonnen', +                'description': 'A brand new episode of The Tonight Show welcomes Ricky Gervais, Steven Van Zandt and ILoveMakonnen.', +            }, +            'skip': 'Only works from US',          }, -    } +    ]      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') +        video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id) -        theplatform_url = self._search_regex('class="video-player video-player-full" data-mpx-url="(.*?)"', webpage, 'theplatform url') +        theplatform_url = self._search_regex( +            '(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"', +            webpage, 'theplatform url').replace('_no_endcard', '')          if theplatform_url.startswith('//'):              theplatform_url = 'http:' + theplatform_url          return self.url_result(theplatform_url) @@ -57,12 +73,22 @@ class NBCNewsIE(InfoExtractor):              'md5': 'b2421750c9f260783721d898f4c42063',              'info_dict': {                  'id': 'I1wpAI_zmhsQ', -                'ext': 'flv', +                'ext': 'mp4',                  'title': 'How Twitter Reacted To The Snowden Interview',                  'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',              },              'add_ie': ['ThePlatform'],          }, +        { +            'url': 'http://www.nbcnews.com/feature/dateline-full-episodes/full-episode-family-business-n285156', +            'md5': 'fdbf39ab73a72df5896b6234ff98518a', +            'info_dict': { +                'id': 'Wjf9EDR3A_60', +                'ext': 'mp4', +                'title': 'FULL EPISODE: Family Business', +                'description': 'md5:757988edbaae9d7be1d585eb5d55cc04', +            }, +        },      ]      def _real_extract(self, url): @@ -97,11 +123,22 @@ class NBCNewsIE(InfoExtractor):              ]              for base_url in base_urls: +                if not base_url: +                    continue                  playlist_url = base_url + '?form=MPXNBCNewsAPI' -                all_videos = self._download_json(playlist_url, title)['videos']                  try: -                    info = next(v for v in all_videos if v['mpxId'] == mpxid) +                    all_videos = self._download_json(playlist_url, title) +                except ExtractorError as ee: +                    if isinstance(ee.cause, compat_HTTPError): +                        continue +                    raise + +                if not all_videos or 'videos' not in all_videos: +                    continue + +                try: +                    info = next(v for v in all_videos['videos'] if v['mpxId'] == mpxid)                      break                  except StopIteration:                      continue | 
