diff options
| -rwxr-xr-x | youtube_dl/InfoExtractors.py | 94 | ||||
| -rw-r--r-- | youtube_dl/extractor/steam.py | 63 | 
2 files changed, 65 insertions, 92 deletions
| diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 31f5254cf..25cc044d0 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -41,8 +41,10 @@ from .extractor.statigram import StatigramIE  from .extractor.photobucket import PhotobucketIE  from .extractor.soundcloud import SoundcloudIE, SoundcloudSetIE  from .extractor.stanfordoc import StanfordOpenClassroomIE +from .extractor.steam import SteamIE  from .extractor.ted import TEDIE  from .extractor.vimeo import VimeoIE +from .extractor.xnxx import XNXXIE  from .extractor.xvideos import XVideosIE  from .extractor.yahoo import YahooIE, YahooSearchIE  from .extractor.youku import YoukuIE @@ -57,44 +59,6 @@ from .extractor.zdf import ZDFIE -class XNXXIE(InfoExtractor): -    """Information extractor for xnxx.com""" - -    _VALID_URL = r'^(?:https?://)?video\.xnxx\.com/video([0-9]+)/(.*)' -    IE_NAME = u'xnxx' -    VIDEO_URL_RE = r'flv_url=(.*?)&' -    VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM' -    VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&' - -    def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        if mobj is None: -            raise ExtractorError(u'Invalid URL: %s' % url) -        video_id = mobj.group(1) - -        # Get webpage content -        webpage = self._download_webpage(url, video_id) - -        video_url = self._search_regex(self.VIDEO_URL_RE, -            webpage, u'video URL') -        video_url = compat_urllib_parse.unquote(video_url) - -        video_title = self._html_search_regex(self.VIDEO_TITLE_RE, -            webpage, u'title') - -        video_thumbnail = self._search_regex(self.VIDEO_THUMB_RE, -            webpage, u'thumbnail', fatal=False) - -        return [{ -            'id': video_id, -            'url': video_url, -            'uploader': None, -            'upload_date': None, -            'title': video_title, -            'ext': 'flv', -            'thumbnail': video_thumbnail, -            'description': None, -        }] @@ -261,60 +225,6 @@ class FunnyOrDieIE(InfoExtractor):          }          return [info] -class SteamIE(InfoExtractor): -    _VALID_URL = r"""http://store\.steampowered\.com/ -                (agecheck/)? -                (?P<urltype>video|app)/ #If the page is only for videos or for a game -                (?P<gameID>\d+)/? -                (?P<videoID>\d*)(?P<extra>\??) #For urltype == video we sometimes get the videoID -                """ -    _VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/' -    _AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970' - -    @classmethod -    def suitable(cls, url): -        """Receives a URL and returns True if suitable for this IE.""" -        return re.match(cls._VALID_URL, url, re.VERBOSE) is not None - -    def _real_extract(self, url): -        m = re.match(self._VALID_URL, url, re.VERBOSE) -        gameID = m.group('gameID') - -        videourl = self._VIDEO_PAGE_TEMPLATE % gameID -        webpage = self._download_webpage(videourl, gameID) - -        if re.search('<h2>Please enter your birth date to continue:</h2>', webpage) is not None: -            videourl = self._AGECHECK_TEMPLATE % gameID -            self.report_age_confirmation() -            webpage = self._download_webpage(videourl, gameID) - -        self.report_extraction(gameID) -        game_title = self._html_search_regex(r'<h2 class="pageheader">(.*?)</h2>', -                                             webpage, 'game title') - -        urlRE = r"'movie_(?P<videoID>\d+)': \{\s*FILENAME: \"(?P<videoURL>[\w:/\.\?=]+)\"(,\s*MOVIE_NAME: \"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\}," -        mweb = re.finditer(urlRE, webpage) -        namesRE = r'<span class="title">(?P<videoName>.+?)</span>' -        titles = re.finditer(namesRE, webpage) -        thumbsRE = r'<img class="movie_thumb" src="(?P<thumbnail>.+?)">' -        thumbs = re.finditer(thumbsRE, webpage) -        videos = [] -        for vid,vtitle,thumb in zip(mweb,titles,thumbs): -            video_id = vid.group('videoID') -            title = vtitle.group('videoName') -            video_url = vid.group('videoURL') -            video_thumb = thumb.group('thumbnail') -            if not video_url: -                raise ExtractorError(u'Cannot find video url for %s' % video_id) -            info = { -                'id':video_id, -                'url':video_url, -                'ext': 'flv', -                'title': unescapeHTML(title), -                'thumbnail': video_thumb -                  } -            videos.append(info) -        return [self.playlist_result(videos, gameID, game_title)]  class UstreamIE(InfoExtractor):      _VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)' diff --git a/youtube_dl/extractor/steam.py b/youtube_dl/extractor/steam.py new file mode 100644 index 000000000..30cb83208 --- /dev/null +++ b/youtube_dl/extractor/steam.py @@ -0,0 +1,63 @@ +import re + +from .common import InfoExtractor +from ..utils import ( +    ExtractorError, +    unescapeHTML, +) + + +class SteamIE(InfoExtractor): +    _VALID_URL = r"""http://store\.steampowered\.com/ +                (agecheck/)? +                (?P<urltype>video|app)/ #If the page is only for videos or for a game +                (?P<gameID>\d+)/? +                (?P<videoID>\d*)(?P<extra>\??) #For urltype == video we sometimes get the videoID +                """ +    _VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/' +    _AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970' + +    @classmethod +    def suitable(cls, url): +        """Receives a URL and returns True if suitable for this IE.""" +        return re.match(cls._VALID_URL, url, re.VERBOSE) is not None + +    def _real_extract(self, url): +        m = re.match(self._VALID_URL, url, re.VERBOSE) +        gameID = m.group('gameID') + +        videourl = self._VIDEO_PAGE_TEMPLATE % gameID +        webpage = self._download_webpage(videourl, gameID) + +        if re.search('<h2>Please enter your birth date to continue:</h2>', webpage) is not None: +            videourl = self._AGECHECK_TEMPLATE % gameID +            self.report_age_confirmation() +            webpage = self._download_webpage(videourl, gameID) + +        self.report_extraction(gameID) +        game_title = self._html_search_regex(r'<h2 class="pageheader">(.*?)</h2>', +                                             webpage, 'game title') + +        urlRE = r"'movie_(?P<videoID>\d+)': \{\s*FILENAME: \"(?P<videoURL>[\w:/\.\?=]+)\"(,\s*MOVIE_NAME: \"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\}," +        mweb = re.finditer(urlRE, webpage) +        namesRE = r'<span class="title">(?P<videoName>.+?)</span>' +        titles = re.finditer(namesRE, webpage) +        thumbsRE = r'<img class="movie_thumb" src="(?P<thumbnail>.+?)">' +        thumbs = re.finditer(thumbsRE, webpage) +        videos = [] +        for vid,vtitle,thumb in zip(mweb,titles,thumbs): +            video_id = vid.group('videoID') +            title = vtitle.group('videoName') +            video_url = vid.group('videoURL') +            video_thumb = thumb.group('thumbnail') +            if not video_url: +                raise ExtractorError(u'Cannot find video url for %s' % video_id) +            info = { +                'id':video_id, +                'url':video_url, +                'ext': 'flv', +                'title': unescapeHTML(title), +                'thumbnail': video_thumb +                  } +            videos.append(info) +        return [self.playlist_result(videos, gameID, game_title)] | 
