diff options
-rwxr-xr-x | youtube_dl/InfoExtractors.py | 94 | ||||
-rw-r--r-- | youtube_dl/extractor/steam.py | 63 |
2 files changed, 65 insertions, 92 deletions
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 31f5254cf..25cc044d0 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -41,8 +41,10 @@ from .extractor.statigram import StatigramIE from .extractor.photobucket import PhotobucketIE from .extractor.soundcloud import SoundcloudIE, SoundcloudSetIE from .extractor.stanfordoc import StanfordOpenClassroomIE +from .extractor.steam import SteamIE from .extractor.ted import TEDIE from .extractor.vimeo import VimeoIE +from .extractor.xnxx import XNXXIE from .extractor.xvideos import XVideosIE from .extractor.yahoo import YahooIE, YahooSearchIE from .extractor.youku import YoukuIE @@ -57,44 +59,6 @@ from .extractor.zdf import ZDFIE -class XNXXIE(InfoExtractor): - """Information extractor for xnxx.com""" - - _VALID_URL = r'^(?:https?://)?video\.xnxx\.com/video([0-9]+)/(.*)' - IE_NAME = u'xnxx' - VIDEO_URL_RE = r'flv_url=(.*?)&' - VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM' - VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&' - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - if mobj is None: - raise ExtractorError(u'Invalid URL: %s' % url) - video_id = mobj.group(1) - - # Get webpage content - webpage = self._download_webpage(url, video_id) - - video_url = self._search_regex(self.VIDEO_URL_RE, - webpage, u'video URL') - video_url = compat_urllib_parse.unquote(video_url) - - video_title = self._html_search_regex(self.VIDEO_TITLE_RE, - webpage, u'title') - - video_thumbnail = self._search_regex(self.VIDEO_THUMB_RE, - webpage, u'thumbnail', fatal=False) - - return [{ - 'id': video_id, - 'url': video_url, - 'uploader': None, - 'upload_date': None, - 'title': video_title, - 'ext': 'flv', - 'thumbnail': video_thumbnail, - 'description': None, - }] @@ -261,60 +225,6 @@ class FunnyOrDieIE(InfoExtractor): } return [info] -class SteamIE(InfoExtractor): - _VALID_URL = r"""http://store\.steampowered\.com/ - (agecheck/)? - (?P<urltype>video|app)/ #If the page is only for videos or for a game - (?P<gameID>\d+)/? - (?P<videoID>\d*)(?P<extra>\??) #For urltype == video we sometimes get the videoID - """ - _VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/' - _AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970' - - @classmethod - def suitable(cls, url): - """Receives a URL and returns True if suitable for this IE.""" - return re.match(cls._VALID_URL, url, re.VERBOSE) is not None - - def _real_extract(self, url): - m = re.match(self._VALID_URL, url, re.VERBOSE) - gameID = m.group('gameID') - - videourl = self._VIDEO_PAGE_TEMPLATE % gameID - webpage = self._download_webpage(videourl, gameID) - - if re.search('<h2>Please enter your birth date to continue:</h2>', webpage) is not None: - videourl = self._AGECHECK_TEMPLATE % gameID - self.report_age_confirmation() - webpage = self._download_webpage(videourl, gameID) - - self.report_extraction(gameID) - game_title = self._html_search_regex(r'<h2 class="pageheader">(.*?)</h2>', - webpage, 'game title') - - urlRE = r"'movie_(?P<videoID>\d+)': \{\s*FILENAME: \"(?P<videoURL>[\w:/\.\?=]+)\"(,\s*MOVIE_NAME: \"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\}," - mweb = re.finditer(urlRE, webpage) - namesRE = r'<span class="title">(?P<videoName>.+?)</span>' - titles = re.finditer(namesRE, webpage) - thumbsRE = r'<img class="movie_thumb" src="(?P<thumbnail>.+?)">' - thumbs = re.finditer(thumbsRE, webpage) - videos = [] - for vid,vtitle,thumb in zip(mweb,titles,thumbs): - video_id = vid.group('videoID') - title = vtitle.group('videoName') - video_url = vid.group('videoURL') - video_thumb = thumb.group('thumbnail') - if not video_url: - raise ExtractorError(u'Cannot find video url for %s' % video_id) - info = { - 'id':video_id, - 'url':video_url, - 'ext': 'flv', - 'title': unescapeHTML(title), - 'thumbnail': video_thumb - } - videos.append(info) - return [self.playlist_result(videos, gameID, game_title)] class UstreamIE(InfoExtractor): _VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)' diff --git a/youtube_dl/extractor/steam.py b/youtube_dl/extractor/steam.py new file mode 100644 index 000000000..30cb83208 --- /dev/null +++ b/youtube_dl/extractor/steam.py @@ -0,0 +1,63 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + unescapeHTML, +) + + +class SteamIE(InfoExtractor): + _VALID_URL = r"""http://store\.steampowered\.com/ + (agecheck/)? + (?P<urltype>video|app)/ #If the page is only for videos or for a game + (?P<gameID>\d+)/? + (?P<videoID>\d*)(?P<extra>\??) #For urltype == video we sometimes get the videoID + """ + _VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/' + _AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970' + + @classmethod + def suitable(cls, url): + """Receives a URL and returns True if suitable for this IE.""" + return re.match(cls._VALID_URL, url, re.VERBOSE) is not None + + def _real_extract(self, url): + m = re.match(self._VALID_URL, url, re.VERBOSE) + gameID = m.group('gameID') + + videourl = self._VIDEO_PAGE_TEMPLATE % gameID + webpage = self._download_webpage(videourl, gameID) + + if re.search('<h2>Please enter your birth date to continue:</h2>', webpage) is not None: + videourl = self._AGECHECK_TEMPLATE % gameID + self.report_age_confirmation() + webpage = self._download_webpage(videourl, gameID) + + self.report_extraction(gameID) + game_title = self._html_search_regex(r'<h2 class="pageheader">(.*?)</h2>', + webpage, 'game title') + + urlRE = r"'movie_(?P<videoID>\d+)': \{\s*FILENAME: \"(?P<videoURL>[\w:/\.\?=]+)\"(,\s*MOVIE_NAME: \"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\}," + mweb = re.finditer(urlRE, webpage) + namesRE = r'<span class="title">(?P<videoName>.+?)</span>' + titles = re.finditer(namesRE, webpage) + thumbsRE = r'<img class="movie_thumb" src="(?P<thumbnail>.+?)">' + thumbs = re.finditer(thumbsRE, webpage) + videos = [] + for vid,vtitle,thumb in zip(mweb,titles,thumbs): + video_id = vid.group('videoID') + title = vtitle.group('videoName') + video_url = vid.group('videoURL') + video_thumb = thumb.group('thumbnail') + if not video_url: + raise ExtractorError(u'Cannot find video url for %s' % video_id) + info = { + 'id':video_id, + 'url':video_url, + 'ext': 'flv', + 'title': unescapeHTML(title), + 'thumbnail': video_thumb + } + videos.append(info) + return [self.playlist_result(videos, gameID, game_title)] |