diff options
-rw-r--r-- | README.md | 2 | ||||
-rw-r--r-- | test/test_download.py | 2 | ||||
-rw-r--r-- | test/tests.json | 10 | ||||
-rwxr-xr-x | youtube_dl/InfoExtractors.py | 75 | ||||
-rw-r--r-- | youtube_dl/__init__.py | 4 |
5 files changed, 83 insertions, 10 deletions
@@ -1,7 +1,7 @@ % YOUTUBE-DL(1) # NAME -youtube-dl +youtube-dl - download videos from youtube.com or other video platforms # SYNOPSIS **youtube-dl** [OPTIONS] URL [URL...] diff --git a/test/test_download.py b/test/test_download.py index 577bcdbf2..84b3204fe 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -129,7 +129,7 @@ def generator(test_case): if isinstance(value, compat_str) and value.startswith('md5:'): self.assertEqual(value, 'md5:' + md5(info_dict.get(info_field))) else: - self.assertEqual(value, info_dict.get(info_field)) + self.assertEqual(value, info_dict.get(info_field), u'invalid value for field ' + info_field) # If checkable fields are missing from the test case, print the info_dict test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value)) diff --git a/test/tests.json b/test/tests.json index 3e0db297d..b112e6318 100644 --- a/test/tests.json +++ b/test/tests.json @@ -630,5 +630,15 @@ "title": "E3 2013: Debut Trailer" }, "skip": "Requires rtmpdump" + }, + { + "name": "Statigram", + "url": "http://statigr.am/p/484091715184808010_284179915", + "file": "484091715184808010_284179915.mp4", + "md5": "deda4ff333abe2e118740321e992605b", + "info_dict": { + "uploader": "videoseconds", + "title": "Instagram photo by @videoseconds (Videos)" + } } ] diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index af11333d1..39d2ef9d4 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -431,7 +431,7 @@ class YoutubeIE(InfoExtractor): def _request_automatic_caption(self, video_id, webpage): """We need the webpage for getting the captions url, pass it as an argument to speed up the process.""" - sub_lang = self._downloader.params.get('subtitleslang') + sub_lang = self._downloader.params.get('subtitleslang') or 'en' sub_format = self._downloader.params.get('subtitlesformat') self.to_screen(u'%s: Looking for automatic captions' % video_id) mobj = re.search(r';ytplayer.config = ({.*?});', webpage) @@ -710,14 +710,14 @@ class YoutubeIE(InfoExtractor): pass else: # We report the original error - self._downloader.report_error(sub_error) + self._downloader.report_warning(sub_error) if self._downloader.params.get('allsubtitles', False): video_subtitles = self._extract_all_subtitles(video_id) for video_subtitle in video_subtitles: (sub_error, sub_lang, sub) = video_subtitle if sub_error: - self._downloader.report_error(sub_error) + self._downloader.report_warning(sub_error) if self._downloader.params.get('listsubtitles', False): sub_lang_list = self._list_available_subtitles(video_id) @@ -1121,6 +1121,25 @@ class VimeoIE(InfoExtractor): _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)' IE_NAME = u'vimeo' + def _verify_video_password(self, url, video_id, webpage): + password = self._downloader.params.get('password', None) + if password is None: + raise ExtractorError(u'This video is protected by a password, use the --password option') + token = re.search(r'xsrft: \'(.*?)\'', webpage).group(1) + data = compat_urllib_parse.urlencode({'password': password, + 'token': token}) + # I didn't manage to use the password with https + if url.startswith('https'): + pass_url = url.replace('https','http') + else: + pass_url = url + password_request = compat_urllib_request.Request(pass_url+'/password', data) + password_request.add_header('Content-Type', 'application/x-www-form-urlencoded') + password_request.add_header('Cookie', 'xsrft=%s' % token) + pass_web = self._download_webpage(password_request, video_id, + u'Verifying the password', + u'Wrong password') + def _real_extract(self, url, new_video=True): # Extract ID from URL mobj = re.match(self._VALID_URL, url) @@ -1149,6 +1168,10 @@ class VimeoIE(InfoExtractor): except: if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage): raise ExtractorError(u'The author has restricted the access to this video, try with the "--referer" option') + + if re.search('If so please provide the correct password.', webpage): + self._verify_video_password(url, video_id, webpage) + return self._real_extract(url) else: raise ExtractorError(u'Unable to extract info section') @@ -1435,6 +1458,13 @@ class GenericIE(InfoExtractor): # Try to find twitter cards info mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage) if mobj is None: + # We look for Open Graph info: + # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am) + m_video_type = re.search(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage) + # We only look in og:video if the MIME type is a video, don't try if it's a Flash player: + if m_video_type is not None: + mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage) + if mobj is None: raise ExtractorError(u'Invalid URL: %s' % url) # It's possible that one of the regexes @@ -1634,9 +1664,10 @@ class YoutubePlaylistIE(InfoExtractor): # Number of videos is a multiple of self._MAX_RESULTS break - videos += [ (entry['yt$position']['$t'], entry['content']['src']) - for entry in response['feed']['entry'] - if 'content' in entry ] + for entry in response['feed']['entry']: + index = entry['yt$position']['$t'] + if 'media$group' in entry and 'media$player' in entry['media$group']: + videos.append((index, entry['media$group']['media$player']['url'])) if len(response['feed']['entry']) < self._MAX_RESULTS: break @@ -4569,6 +4600,37 @@ class GametrailersIE(InfoExtractor): 'description': video_description, } +class StatigramIE(InfoExtractor): + _VALID_URL = r'(?:http://)?(?:www\.)?statigr\.am/p/([^/]+)' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + + video_id = mobj.group(1) + webpage = self._download_webpage(url, video_id) + video_url = self._html_search_regex( + r'<meta property="og:video:secure_url" content="(.+?)">', + webpage, u'video URL') + thumbnail_url = self._html_search_regex( + r'<meta property="og:image" content="(.+?)" />', + webpage, u'thumbnail URL', fatal=False) + html_title = self._html_search_regex( + r'<title>(.+?)</title>', + webpage, u'title') + title = html_title.rpartition(u' | Statigram')[0] + uploader_id = self._html_search_regex( + r'@([^ ]+)', title, u'uploader name', fatal=False) + ext = 'mp4' + + return [{ + 'id': video_id, + 'url': video_url, + 'ext': ext, + 'title': title, + 'thumbnail': thumbnail_url, + 'uploader_id' : uploader_id + }] + def gen_extractors(): """ Return a list of an instance of every supported extractor. The order does matter; the first extractor matched is the one handling the URL. @@ -4635,6 +4697,7 @@ def gen_extractors(): HypemIE(), Vbox7IE(), GametrailersIE(), + StatigramIE(), GenericIE() ] diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 9279ce776..6f9ffba1e 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -200,7 +200,7 @@ def parseOpts(overrideArguments=None): action='store_true', dest='listsubtitles', help='lists all available subtitles for the video (currently youtube only)', default=False) video_format.add_option('--sub-format', - action='store', dest='subtitlesformat', metavar='LANG', + action='store', dest='subtitlesformat', metavar='FORMAT', help='subtitle format [srt/sbv] (default=srt) (currently youtube only)', default='srt') video_format.add_option('--sub-lang', '--srt-lang', action='store', dest='subtitleslang', metavar='LANG', @@ -423,7 +423,7 @@ def _real_main(argv=None): if opts.usenetrc and (opts.username is not None or opts.password is not None): parser.error(u'using .netrc conflicts with giving username/password') if opts.password is not None and opts.username is None: - parser.error(u'account username missing') + print(u'WARNING: account username missing') if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid): parser.error(u'using output template conflicts with using title, video ID or auto number') if opts.usetitle and opts.useid: |