diff options
| author | Remita Amine <remitamine@gmail.com> | 2017-10-11 17:49:48 +0000 | 
|---|---|---|
| committer | Remita Amine <remitamine@gmail.com> | 2017-10-11 17:50:08 +0000 | 
| commit | 7fee3377dced16cd16660aeb78d18d81508aa28b (patch) | |
| tree | b1fd84a05b5213ea6a77d6141da59ae88e2b9332 | |
| parent | ff3f1a62f087332fa6409b5cbc39871d49e74f37 (diff) | |
[steam] fix extraction(fixes #14067)
| -rw-r--r-- | youtube_dl/extractor/steam.py | 129 | 
1 files changed, 76 insertions, 53 deletions
| diff --git a/youtube_dl/extractor/steam.py b/youtube_dl/extractor/steam.py index 1a831ef6d..e5ac586a7 100644 --- a/youtube_dl/extractor/steam.py +++ b/youtube_dl/extractor/steam.py @@ -4,8 +4,10 @@ import re  from .common import InfoExtractor  from ..utils import ( +    extract_attributes,      ExtractorError, -    unescapeHTML, +    get_element_by_class, +    js_to_json,  ) @@ -25,35 +27,39 @@ class SteamIE(InfoExtractor):          'url': 'http://store.steampowered.com/video/105600/',          'playlist': [              { -                'md5': 'f870007cee7065d7c76b88f0a45ecc07', +                'md5': '6a294ee0c4b1f47f5bb76a65e31e3592',                  'info_dict': { -                    'id': '81300', -                    'ext': 'flv', -                    'title': 'Terraria 1.1 Trailer', +                    'id': '2040428', +                    'ext': 'mp4', +                    'title': 'Terraria 1.3 Trailer',                      'playlist_index': 1,                  }              },              { -                'md5': '61aaf31a5c5c3041afb58fb83cbb5751', +                'md5': '911672b20064ca3263fa89650ba5a7aa',                  'info_dict': { -                    'id': '80859', -                    'ext': 'flv', -                    'title': 'Terraria Trailer', +                    'id': '2029566', +                    'ext': 'mp4', +                    'title': 'Terraria 1.2 Trailer',                      'playlist_index': 2,                  }              }          ], +        'info_dict': { +            'id': '105600', +            'title': 'Terraria', +        },          'params': {              'playlistend': 2,          }      }, {          'url': 'http://steamcommunity.com/sharedfiles/filedetails/?id=242472205',          'info_dict': { -            'id': 'WB5DvDOOvAY', +            'id': 'X8kpJBlzD2E',              'ext': 'mp4', -            'upload_date': '20140329', -            'title': 'FRONTIERS - Final Greenlight Trailer', -            'description': 'md5:dc96a773669d0ca1b36c13c1f30250d9', +            'upload_date': '20140617', +            'title': 'FRONTIERS - Trapping', +            'description': 'md5:bf6f7f773def614054089e5769c12a6e',              'uploader': 'AAD Productions',              'uploader_id': 'AtomicAgeDogGames',          } @@ -76,48 +82,65 @@ class SteamIE(InfoExtractor):              self.report_age_confirmation()              webpage = self._download_webpage(videourl, playlist_id) +        flash_vars = self._parse_json(self._search_regex( +            r'(?s)rgMovieFlashvars\s*=\s*({.+?});', webpage, +            'flash vars'), playlist_id, js_to_json) + +        playlist_title = None +        entries = []          if fileID: -            playlist_title = self._html_search_regex( -                r'<div class="workshopItemTitle">(.+)</div>', webpage, 'title') -            mweb = re.finditer(r'''(?x) -                'movie_(?P<videoID>[0-9]+)':\s*\{\s* -                YOUTUBE_VIDEO_ID:\s*"(?P<youtube_id>[^"]+)", -                ''', webpage) -            videos = [{ -                '_type': 'url', -                'url': vid.group('youtube_id'), -                'ie_key': 'Youtube', -            } for vid in mweb] +            playlist_title = get_element_by_class('workshopItemTitle', webpage) +            for movie in flash_vars.values(): +                if not movie: +                    continue +                youtube_id = movie.get('YOUTUBE_VIDEO_ID') +                if not youtube_id: +                    continue +                entries.append({ +                    '_type': 'url', +                    'url': youtube_id, +                    'ie_key': 'Youtube', +                })          else: -            playlist_title = self._html_search_regex( -                r'<h2 class="pageheader">(.*?)</h2>', webpage, 'game title') - -            mweb = re.finditer(r'''(?x) -                'movie_(?P<videoID>[0-9]+)':\s*\{\s* -                FILENAME:\s*"(?P<videoURL>[\w:/\.\?=]+)" -                (,\s*MOVIE_NAME:\s*\"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\}, -                ''', webpage) -            titles = re.finditer( -                r'<span class="title">(?P<videoName>.+?)</span>', webpage) -            thumbs = re.finditer( -                r'<img class="movie_thumb" src="(?P<thumbnail>.+?)">', webpage) -            videos = [] - -            for vid, vtitle, thumb in zip(mweb, titles, thumbs): -                video_id = vid.group('videoID') -                title = vtitle.group('videoName') -                video_url = vid.group('videoURL') -                video_thumb = thumb.group('thumbnail') -                if not video_url: -                    raise ExtractorError('Cannot find video url for %s' % video_id) -                videos.append({ +            playlist_title = get_element_by_class('apphub_AppName', webpage) +            for movie_id, movie in flash_vars.items(): +                if not movie: +                    continue +                video_id = self._search_regex(r'movie_(\d+)', movie_id, 'video id', fatal=False) +                title = movie.get('MOVIE_NAME') +                if not title or not video_id: +                    continue +                entry = {                      'id': video_id, -                    'url': video_url, -                    'ext': 'flv', -                    'title': unescapeHTML(title), -                    'thumbnail': video_thumb -                }) -        if not videos: +                    'title': title.replace('+', ' '), +                } +                formats = [] +                flv_url = movie.get('FILENAME') +                if flv_url: +                    formats.append({ +                        'format_id': 'flv', +                        'url': flv_url, +                    }) +                highlight_element = self._search_regex( +                    r'(<div[^>]+id="highlight_movie_%s"[^>]+>)' % video_id, +                    webpage, 'highlight element', fatal=False) +                if highlight_element: +                    highlight_attribs = extract_attributes(highlight_element) +                    if highlight_attribs: +                        entry['thumbnail'] = highlight_attribs.get('data-poster') +                        for quality in ('', '-hd'): +                            for ext in ('webm', 'mp4'): +                                video_url = highlight_attribs.get('data-%s%s-source' % (ext, quality)) +                                if video_url: +                                    formats.append({ +                                        'format_id': ext + quality, +                                        'url': video_url, +                                    }) +                if not formats: +                    continue +                entry['formats'] = formats +                entries.append(entry) +        if not entries:              raise ExtractorError('Could not find any videos') -        return self.playlist_result(videos, playlist_id, playlist_title) +        return self.playlist_result(entries, playlist_id, playlist_title) | 
