diff options
Diffstat (limited to 'youtube_dl/extractor/gamespot.py')
| -rw-r--r-- | youtube_dl/extractor/gamespot.py | 91 | 
1 files changed, 68 insertions, 23 deletions
| diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index 4ffdd7515..621257c9f 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -1,19 +1,19 @@  from __future__ import unicode_literals  import re -import json -from .common import InfoExtractor +from .once import OnceIE  from ..compat import (      compat_urllib_parse_unquote, -    compat_urlparse,  )  from ..utils import (      unescapeHTML, +    url_basename, +    dict_get,  ) -class GameSpotIE(InfoExtractor): +class GameSpotIE(OnceIE):      _VALID_URL = r'https?://(?:www\.)?gamespot\.com/.*-(?P<id>\d+)/?'      _TESTS = [{          'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/', @@ -39,29 +39,73 @@ class GameSpotIE(InfoExtractor):          webpage = self._download_webpage(url, page_id)          data_video_json = self._search_regex(              r'data-video=["\'](.*?)["\']', webpage, 'data video') -        data_video = json.loads(unescapeHTML(data_video_json)) +        data_video = self._parse_json(unescapeHTML(data_video_json), page_id)          streams = data_video['videoStreams'] +        manifest_url = None          formats = []          f4m_url = streams.get('f4m_stream') -        if f4m_url is not None: -            # Transform the manifest url to a link to the mp4 files -            # they are used in mobile devices. -            f4m_path = compat_urlparse.urlparse(f4m_url).path -            QUALITIES_RE = r'((,\d+)+,?)' -            qualities = self._search_regex(QUALITIES_RE, f4m_path, 'qualities').strip(',').split(',') -            http_path = f4m_path[1:].split('/', 1)[1] -            http_template = re.sub(QUALITIES_RE, r'%s', http_path) -            http_template = http_template.replace('.csmil/manifest.f4m', '') -            http_template = compat_urlparse.urljoin( -                'http://video.gamespotcdn.com/', http_template) -            for q in qualities: -                formats.append({ -                    'url': http_template % q, -                    'ext': 'mp4', -                    'format_id': q, -                }) -        else: +        if f4m_url: +            manifest_url = f4m_url +            formats.extend(self._extract_f4m_formats( +                f4m_url + '?hdcore=3.7.0', page_id, f4m_id='hds', fatal=False)) +        m3u8_url = streams.get('m3u8_stream') +        if m3u8_url: +            manifest_url = m3u8_url +            m3u8_formats = self._extract_m3u8_formats( +                m3u8_url, page_id, 'mp4', 'm3u8_native', +                m3u8_id='hls', fatal=False) +            formats.extend(m3u8_formats) +        progressive_url = dict_get( +            streams, ('progressive_hd', 'progressive_high', 'progressive_low')) +        if progressive_url and manifest_url: +            qualities_basename = self._search_regex( +                '/([^/]+)\.csmil/', +                manifest_url, 'qualities basename', default=None) +            if qualities_basename: +                QUALITIES_RE = r'((,\d+)+,?)' +                qualities = self._search_regex( +                    QUALITIES_RE, qualities_basename, +                    'qualities', default=None) +                if qualities: +                    qualities = list(map(lambda q: int(q), qualities.strip(',').split(','))) +                    qualities.sort() +                    http_template = re.sub(QUALITIES_RE, r'%d', qualities_basename) +                    http_url_basename = url_basename(progressive_url) +                    if m3u8_formats: +                        self._sort_formats(m3u8_formats) +                        m3u8_formats = list(filter( +                            lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple', +                            m3u8_formats)) +                    if len(qualities) == len(m3u8_formats): +                        for q, m3u8_format in zip(qualities, m3u8_formats): +                            f = m3u8_format.copy() +                            f.update({ +                                'url': progressive_url.replace( +                                    http_url_basename, http_template % q), +                                'format_id': f['format_id'].replace('hls', 'http'), +                                'protocol': 'http', +                            }) +                            formats.append(f) +                    else: +                        for q in qualities: +                            formats.append({ +                                'url': progressive_url.replace( +                                    http_url_basename, http_template % q), +                                'ext': 'mp4', +                                'format_id': 'http-%d' % q, +                                'tbr': q, +                            }) + +        onceux_json = self._search_regex( +            r'data-onceux-options=["\'](.*?)["\']', webpage, 'data video', default=None) +        if onceux_json: +            onceux_url = self._parse_json(unescapeHTML(onceux_json), page_id).get('metadataUri') +            if onceux_url: +                formats.extend(self._extract_once_formats(re.sub( +                    r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url).replace('ads/vmap/', ''))) + +        if not formats:              for quality in ['sd', 'hd']:                  # It's actually a link to a flv file                  flv_url = streams.get('f4m_{0}'.format(quality)) @@ -71,6 +115,7 @@ class GameSpotIE(InfoExtractor):                          'ext': 'flv',                          'format_id': quality,                      }) +        self._sort_formats(formats)          return {              'id': data_video['guid'], | 
