diff options
Diffstat (limited to 'youtube_dl/InfoExtractors.py')
| -rwxr-xr-x | youtube_dl/InfoExtractors.py | 75 | 
1 files changed, 69 insertions, 6 deletions
| diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index af11333d1..39d2ef9d4 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -431,7 +431,7 @@ class YoutubeIE(InfoExtractor):      def _request_automatic_caption(self, video_id, webpage):          """We need the webpage for getting the captions url, pass it as an             argument to speed up the process.""" -        sub_lang = self._downloader.params.get('subtitleslang') +        sub_lang = self._downloader.params.get('subtitleslang') or 'en'          sub_format = self._downloader.params.get('subtitlesformat')          self.to_screen(u'%s: Looking for automatic captions' % video_id)          mobj = re.search(r';ytplayer.config = ({.*?});', webpage) @@ -710,14 +710,14 @@ class YoutubeIE(InfoExtractor):                          pass                      else:                          # We report the original error -                        self._downloader.report_error(sub_error) +                        self._downloader.report_warning(sub_error)          if self._downloader.params.get('allsubtitles', False):              video_subtitles = self._extract_all_subtitles(video_id)              for video_subtitle in video_subtitles:                  (sub_error, sub_lang, sub) = video_subtitle                  if sub_error: -                    self._downloader.report_error(sub_error) +                    self._downloader.report_warning(sub_error)          if self._downloader.params.get('listsubtitles', False):              sub_lang_list = self._list_available_subtitles(video_id) @@ -1121,6 +1121,25 @@ class VimeoIE(InfoExtractor):      _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)'      IE_NAME = u'vimeo' +    def _verify_video_password(self, url, video_id, webpage): +        password = self._downloader.params.get('password', None) +        if password is None: +            raise ExtractorError(u'This video is protected by a password, use the --password option') +        token = re.search(r'xsrft: \'(.*?)\'', webpage).group(1) +        data = compat_urllib_parse.urlencode({'password': password, +                                              'token': token}) +        # I didn't manage to use the password with https +        if url.startswith('https'): +            pass_url = url.replace('https','http') +        else: +            pass_url = url +        password_request = compat_urllib_request.Request(pass_url+'/password', data) +        password_request.add_header('Content-Type', 'application/x-www-form-urlencoded') +        password_request.add_header('Cookie', 'xsrft=%s' % token) +        pass_web = self._download_webpage(password_request, video_id, +                                          u'Verifying the password', +                                          u'Wrong password') +      def _real_extract(self, url, new_video=True):          # Extract ID from URL          mobj = re.match(self._VALID_URL, url) @@ -1149,6 +1168,10 @@ class VimeoIE(InfoExtractor):          except:              if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):                  raise ExtractorError(u'The author has restricted the access to this video, try with the "--referer" option') + +            if re.search('If so please provide the correct password.', webpage): +                self._verify_video_password(url, video_id, webpage) +                return self._real_extract(url)              else:                  raise ExtractorError(u'Unable to extract info section') @@ -1435,6 +1458,13 @@ class GenericIE(InfoExtractor):              # Try to find twitter cards info              mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)          if mobj is None: +            # We look for Open Graph info: +            # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am) +            m_video_type = re.search(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage) +            # We only look in og:video if the MIME type is a video, don't try if it's a Flash player: +            if m_video_type is not None: +                mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage) +        if mobj is None:              raise ExtractorError(u'Invalid URL: %s' % url)          # It's possible that one of the regexes @@ -1634,9 +1664,10 @@ class YoutubePlaylistIE(InfoExtractor):                  # Number of videos is a multiple of self._MAX_RESULTS                  break -            videos += [ (entry['yt$position']['$t'], entry['content']['src']) -                        for entry in response['feed']['entry'] -                        if 'content' in entry ] +            for entry in response['feed']['entry']: +                index = entry['yt$position']['$t'] +                if 'media$group' in entry and 'media$player' in entry['media$group']: +                    videos.append((index, entry['media$group']['media$player']['url']))              if len(response['feed']['entry']) < self._MAX_RESULTS:                  break @@ -4569,6 +4600,37 @@ class GametrailersIE(InfoExtractor):                  'description': video_description,                  } +class StatigramIE(InfoExtractor): +    _VALID_URL = r'(?:http://)?(?:www\.)?statigr\.am/p/([^/]+)' + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) + +        video_id = mobj.group(1) +        webpage = self._download_webpage(url, video_id) +        video_url = self._html_search_regex( +            r'<meta property="og:video:secure_url" content="(.+?)">', +            webpage, u'video URL') +        thumbnail_url = self._html_search_regex( +            r'<meta property="og:image" content="(.+?)" />', +            webpage, u'thumbnail URL', fatal=False) +        html_title = self._html_search_regex( +            r'<title>(.+?)</title>', +            webpage, u'title') +        title = html_title.rpartition(u' | Statigram')[0] +        uploader_id = self._html_search_regex( +            r'@([^ ]+)', title, u'uploader name', fatal=False) +        ext = 'mp4' + +        return [{ +            'id':        video_id, +            'url':       video_url, +            'ext':       ext, +            'title':     title, +            'thumbnail': thumbnail_url, +            'uploader_id' : uploader_id +        }] +  def gen_extractors():      """ Return a list of an instance of every supported extractor.      The order does matter; the first extractor matched is the one handling the URL. @@ -4635,6 +4697,7 @@ def gen_extractors():          HypemIE(),          Vbox7IE(),          GametrailersIE(), +        StatigramIE(),          GenericIE()      ] | 
