diff options
| author | Yen Chi Hsuan <yan12125@gmail.com> | 2016-06-10 13:31:55 +0800 | 
|---|---|---|
| committer | Yen Chi Hsuan <yan12125@gmail.com> | 2016-06-10 13:31:55 +0800 | 
| commit | 5de008e8c3e4058c20956d19f69ac3347a2722e0 (patch) | |
| tree | 9dbb3a05077aced27ea3afa7685a50cdb00b1b43 | |
| parent | 3e74b444e7324fdda956aa816240b938eabf9c93 (diff) | |
[nbcnews] Support embed widgets
Used in some Vulture videos
| -rw-r--r-- | youtube_dl/extractor/nbc.py | 22 | 
1 files changed, 13 insertions, 9 deletions
| diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index f27c7f139..6b7da1149 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -266,6 +266,11 @@ class NBCNewsIE(ThePlatformIE):              'url': 'http://www.nbcnews.com/watch/dateline/full-episode--deadly-betrayal-386250819952',              'only_matching': True,          }, +        { +            # From http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html +            'url': 'http://www.nbcnews.com/widget/video-embed/701714499682', +            'only_matching': True, +        },      ]      def _real_extract(self, url): @@ -289,18 +294,17 @@ class NBCNewsIE(ThePlatformIE):              webpage = self._download_webpage(url, display_id)              info = None              bootstrap_json = self._search_regex( -                r'(?m)var\s+(?:bootstrapJson|playlistData)\s*=\s*({.+});?\s*$', +                [r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$', +                 r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"'],                  webpage, 'bootstrap json', default=None) -            if bootstrap_json: -                bootstrap = self._parse_json(bootstrap_json, display_id) +            bootstrap = self._parse_json( +                bootstrap_json, display_id, transform_source=unescapeHTML) +            if 'results' in bootstrap:                  info = bootstrap['results'][0]['video'] +            elif 'video' in bootstrap: +                info = bootstrap['video']              else: -                player_instance_json = self._search_regex( -                    r'videoObj\s*:\s*({.+})', webpage, 'player instance', default=None) -                if not player_instance_json: -                    player_instance_json = self._html_search_regex( -                        r'data-video="([^"]+)"', webpage, 'video json') -                info = self._parse_json(player_instance_json, display_id) +                info = bootstrap              video_id = info['mpxId']              title = info['title'] | 
