aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/steam.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor/steam.py')
-rw-r--r--youtube_dl/extractor/steam.py158
1 files changed, 98 insertions, 60 deletions
diff --git a/youtube_dl/extractor/steam.py b/youtube_dl/extractor/steam.py
index 91658f892..1d8d57224 100644
--- a/youtube_dl/extractor/steam.py
+++ b/youtube_dl/extractor/steam.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
import re
from .common import InfoExtractor
@@ -8,78 +10,114 @@ from ..utils import (
class SteamIE(InfoExtractor):
- _VALID_URL = r"""http://store\.steampowered\.com/
- (agecheck/)?
- (?P<urltype>video|app)/ #If the page is only for videos or for a game
- (?P<gameID>\d+)/?
- (?P<videoID>\d*)(?P<extra>\??) #For urltype == video we sometimes get the videoID
- """
+ _VALID_URL = r"""(?x)
+ https?://store\.steampowered\.com/
+ (agecheck/)?
+ (?P<urltype>video|app)/ #If the page is only for videos or for a game
+ (?P<gameID>\d+)/?
+ (?P<videoID>\d*)(?P<extra>\??) # For urltype == video we sometimes get the videoID
+ |
+ https?://(?:www\.)?steamcommunity\.com/sharedfiles/filedetails/\?id=(?P<fileID>[0-9]+)
+ """
_VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/'
_AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970'
- _TEST = {
- u"url": u"http://store.steampowered.com/video/105600/",
- u"playlist": [
+ _TESTS = [{
+ "url": "http://store.steampowered.com/video/105600/",
+ "playlist": [
{
- u"file": u"81300.flv",
- u"md5": u"f870007cee7065d7c76b88f0a45ecc07",
- u"info_dict": {
- u"title": u"Terraria 1.1 Trailer",
- u'playlist_index': 1,
+ "md5": "f870007cee7065d7c76b88f0a45ecc07",
+ "info_dict": {
+ 'id': '81300',
+ 'ext': 'flv',
+ "title": "Terraria 1.1 Trailer",
+ 'playlist_index': 1,
}
},
{
- u"file": u"80859.flv",
- u"md5": u"61aaf31a5c5c3041afb58fb83cbb5751",
- u"info_dict": {
- u"title": u"Terraria Trailer",
- u'playlist_index': 2,
+ "md5": "61aaf31a5c5c3041afb58fb83cbb5751",
+ "info_dict": {
+ 'id': '80859',
+ 'ext': 'flv',
+ "title": "Terraria Trailer",
+ 'playlist_index': 2,
}
}
- ]
- }
-
-
- @classmethod
- def suitable(cls, url):
- """Receives a URL and returns True if suitable for this IE."""
- return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
+ ],
+ 'params': {
+ 'playlistend': 2,
+ }
+ }, {
+ 'url': 'http://steamcommunity.com/sharedfiles/filedetails/?id=242472205',
+ 'info_dict': {
+ 'id': 'WB5DvDOOvAY',
+ 'ext': 'mp4',
+ 'upload_date': '20140329',
+ 'title': 'FRONTIERS - Final Greenlight Trailer',
+ 'description': "The final trailer for the Steam Greenlight launch. Hooray, progress! Here's the official Greenlight page: http://steamcommunity.com/sharedfiles/filedetails/?id=242472205",
+ 'uploader': 'AAD Productions',
+ 'uploader_id': 'AtomicAgeDogGames',
+ }
+ }]
def _real_extract(self, url):
- m = re.match(self._VALID_URL, url, re.VERBOSE)
- gameID = m.group('gameID')
-
- videourl = self._VIDEO_PAGE_TEMPLATE % gameID
- webpage = self._download_webpage(videourl, gameID)
+ m = re.match(self._VALID_URL, url)
+ fileID = m.group('fileID')
+ if fileID:
+ videourl = url
+ playlist_id = fileID
+ else:
+ gameID = m.group('gameID')
+ playlist_id = gameID
+ videourl = self._VIDEO_PAGE_TEMPLATE % playlist_id
+ webpage = self._download_webpage(videourl, playlist_id)
if re.search('<h2>Please enter your birth date to continue:</h2>', webpage) is not None:
- videourl = self._AGECHECK_TEMPLATE % gameID
+ videourl = self._AGECHECK_TEMPLATE % playlist_id
self.report_age_confirmation()
- webpage = self._download_webpage(videourl, gameID)
+ webpage = self._download_webpage(videourl, playlist_id)
+
+ if fileID:
+ playlist_title = self._html_search_regex(
+ r'<div class="workshopItemTitle">(.+)</div>', webpage, 'title')
+ mweb = re.finditer(r'''(?x)
+ 'movie_(?P<videoID>[0-9]+)':\s*\{\s*
+ YOUTUBE_VIDEO_ID:\s*"(?P<youtube_id>[^"]+)",
+ ''', webpage)
+ videos = [{
+ '_type': 'url',
+ 'url': vid.group('youtube_id'),
+ 'ie_key': 'Youtube',
+ } for vid in mweb]
+ else:
+ playlist_title = self._html_search_regex(
+ r'<h2 class="pageheader">(.*?)</h2>', webpage, 'game title')
+
+ mweb = re.finditer(r'''(?x)
+ 'movie_(?P<videoID>[0-9]+)':\s*\{\s*
+ FILENAME:\s*"(?P<videoURL>[\w:/\.\?=]+)"
+ (,\s*MOVIE_NAME:\s*\"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\},
+ ''', webpage)
+ titles = re.finditer(
+ r'<span class="title">(?P<videoName>.+?)</span>', webpage)
+ thumbs = re.finditer(
+ r'<img class="movie_thumb" src="(?P<thumbnail>.+?)">', webpage)
+ videos = []
- self.report_extraction(gameID)
- game_title = self._html_search_regex(r'<h2 class="pageheader">(.*?)</h2>',
- webpage, 'game title')
+ for vid, vtitle, thumb in zip(mweb, titles, thumbs):
+ video_id = vid.group('videoID')
+ title = vtitle.group('videoName')
+ video_url = vid.group('videoURL')
+ video_thumb = thumb.group('thumbnail')
+ if not video_url:
+ raise ExtractorError('Cannot find video url for %s' % video_id)
+ videos.append({
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': 'flv',
+ 'title': unescapeHTML(title),
+ 'thumbnail': video_thumb
+ })
+ if not videos:
+ raise ExtractorError('Could not find any videos')
- urlRE = r"'movie_(?P<videoID>\d+)': \{\s*FILENAME: \"(?P<videoURL>[\w:/\.\?=]+)\"(,\s*MOVIE_NAME: \"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\},"
- mweb = re.finditer(urlRE, webpage)
- namesRE = r'<span class="title">(?P<videoName>.+?)</span>'
- titles = re.finditer(namesRE, webpage)
- thumbsRE = r'<img class="movie_thumb" src="(?P<thumbnail>.+?)">'
- thumbs = re.finditer(thumbsRE, webpage)
- videos = []
- for vid,vtitle,thumb in zip(mweb,titles,thumbs):
- video_id = vid.group('videoID')
- title = vtitle.group('videoName')
- video_url = vid.group('videoURL')
- video_thumb = thumb.group('thumbnail')
- if not video_url:
- raise ExtractorError(u'Cannot find video url for %s' % video_id)
- info = {
- 'id':video_id,
- 'url':video_url,
- 'ext': 'flv',
- 'title': unescapeHTML(title),
- 'thumbnail': video_thumb
- }
- videos.append(info)
- return [self.playlist_result(videos, gameID, game_title)]
+ return self.playlist_result(videos, playlist_id, playlist_title)