diff options
Diffstat (limited to 'youtube_dl/extractor/imdb.py')
| -rw-r--r-- | youtube_dl/extractor/imdb.py | 37 | 
1 files changed, 27 insertions, 10 deletions
diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index 8bed8ccd0..3a2b7cec5 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -1,10 +1,10 @@  from __future__ import unicode_literals  import re -import json  from .common import InfoExtractor  from ..utils import ( +    mimetype2ext,      qualities,  ) @@ -12,9 +12,9 @@ from ..utils import (  class ImdbIE(InfoExtractor):      IE_NAME = 'imdb'      IE_DESC = 'Internet Movie Database trailers' -    _VALID_URL = r'https?://(?:www|m)\.imdb\.com/video/imdb/vi(?P<id>\d+)' +    _VALID_URL = r'https?://(?:www|m)\.imdb\.com/video/[^/]+/vi(?P<id>\d+)' -    _TEST = { +    _TESTS = [{          'url': 'http://www.imdb.com/video/imdb/vi2524815897',          'info_dict': {              'id': '2524815897', @@ -22,7 +22,10 @@ class ImdbIE(InfoExtractor):              'title': 'Ice Age: Continental Drift Trailer (No. 2) - IMDb',              'description': 'md5:9061c2219254e5d14e03c25c98e96a81',          } -    } +    }, { +        'url': 'http://www.imdb.com/video/_/vi2524815897', +        'only_matching': True, +    }]      def _real_extract(self, url):          video_id = self._match_id(url) @@ -48,13 +51,27 @@ class ImdbIE(InfoExtractor):              json_data = self._search_regex(                  r'<script[^>]+class="imdb-player-data"[^>]*?>(.*?)</script>',                  format_page, 'json data', flags=re.DOTALL) -            info = json.loads(json_data) -            format_info = info['videoPlayerObject']['video'] -            f_id = format_info['ffname'] +            info = self._parse_json(json_data, video_id, fatal=False) +            if not info: +                continue +            format_info = info.get('videoPlayerObject', {}).get('video', {}) +            if not format_info: +                continue +            video_info_list = format_info.get('videoInfoList') +            if not video_info_list or not isinstance(video_info_list, list): +                continue +            video_info = video_info_list[0] +            if not video_info or not isinstance(video_info, dict): +                continue +            video_url = video_info.get('videoUrl') +            if not video_url: +                continue +            format_id = format_info.get('ffname')              formats.append({ -                'format_id': f_id, -                'url': format_info['videoInfoList'][0]['videoUrl'], -                'quality': quality(f_id), +                'format_id': format_id, +                'url': video_url, +                'ext': mimetype2ext(video_info.get('videoMimeType')), +                'quality': quality(format_id),              })          self._sort_formats(formats)  | 
