diff options
-rw-r--r-- | README.md | 3 | ||||
-rwxr-xr-x | youtube_dl/InfoExtractors.py | 109 | ||||
-rw-r--r-- | youtube_dl/__init__.py | 3 |
3 files changed, 17 insertions, 98 deletions
@@ -27,7 +27,8 @@ which means you can modify it, redistribute it or use it however you like. from an initial value of SIZE. --dump-user-agent display the current browser identification --user-agent UA specify a custom user agent - --referer REF specify a custom referer + --referer REF specify a custom referer, use if the video access + is restricted to one domain --list-extractors List all supported extractors and the URLs they would handle diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index d5876ad34..3450f0d17 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -114,8 +114,8 @@ class InfoExtractor(object): def _request_webpage(self, url_or_request, video_id, note=None, errnote=None): """ Returns the response handle """ if note is None: - note = u'Downloading video webpage' - if note is not False: + self.report_download_webpage(video_id) + elif note is not False: self.to_screen(u'%s: %s' % (video_id, note)) try: return compat_urllib_request.urlopen(url_or_request) @@ -152,6 +152,10 @@ class InfoExtractor(object): """Report information extraction.""" self.to_screen(u'%s: Extracting information' % id_or_name) + def report_download_webpage(self, video_id): + """Report webpage download.""" + self.to_screen(u'%s: Downloading webpage' % video_id) + def report_age_confirmation(self): """Report attempt to confirm age.""" self.to_screen(u'Confirming age') @@ -684,17 +688,10 @@ class MetacafeIE(InfoExtractor): _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user' IE_NAME = u'metacafe' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - def report_disclaimer(self): """Report disclaimer retrieval.""" self.to_screen(u'Retrieving disclaimer') - def report_download_webpage(self, video_id): - """Report webpage download.""" - self.to_screen(u'%s: Downloading webpage' % video_id) - def _real_initialize(self): # Retrieve disclaimer request = compat_urllib_request.Request(self._DISCLAIMER) @@ -795,9 +792,6 @@ class DailymotionIE(InfoExtractor): IE_NAME = u'dailymotion' _WORKING = False - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - def _real_extract(self, url): # Extract id and simplified title from URL mobj = re.match(self._VALID_URL, url) @@ -879,13 +873,6 @@ class PhotobucketIE(InfoExtractor): _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)' IE_NAME = u'photobucket' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - - def report_download_webpage(self, video_id): - """Report webpage download.""" - self.to_screen(u'%s: Downloading webpage' % video_id) - def _real_extract(self, url): # Extract id from URL mobj = re.match(self._VALID_URL, url) @@ -944,13 +931,6 @@ class YahooIE(InfoExtractor): _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?' IE_NAME = u'video.yahoo' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - - def report_download_webpage(self, video_id): - """Report webpage download.""" - self.to_screen(u'%s: Downloading webpage' % video_id) - def _real_extract(self, url, new_video=True): # Extract ID from URL mobj = re.match(self._VALID_URL, url) @@ -1080,13 +1060,6 @@ class VimeoIE(InfoExtractor): _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo\.com/(?:(?:groups|album)/[^/]+/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)' IE_NAME = u'vimeo' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - - def report_download_webpage(self, video_id): - """Report webpage download.""" - self.to_screen(u'%s: Downloading webpage' % video_id) - def _real_extract(self, url, new_video=True): # Extract ID from URL mobj = re.match(self._VALID_URL, url) @@ -1120,7 +1093,10 @@ class VimeoIE(InfoExtractor): config = webpage.split(' = {config:')[1].split(',assets:')[0] config = json.loads(config) except: - self._downloader.report_error(u'unable to extract info section') + if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage): + self._downloader.report_error(u'The author has restricted the access to this video, try with the "--referer" option') + else: + self._downloader.report_error(u'unable to extract info section') return # Extract title @@ -1197,13 +1173,6 @@ class ArteTvIE(InfoExtractor): IE_NAME = u'arte.tv' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - - def report_download_webpage(self, video_id): - """Report webpage download.""" - self.to_screen(u'%s: Downloading webpage' % video_id) - def fetch_webpage(self, url): request = compat_urllib_request.Request(url) try: @@ -1327,14 +1296,11 @@ class GenericIE(InfoExtractor): _VALID_URL = r'.*' IE_NAME = u'generic' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - def report_download_webpage(self, video_id): """Report webpage download.""" if not self._downloader.params.get('test', False): self._downloader.report_warning(u'Falling back on generic information extractor.') - self.to_screen(u'%s: Downloading webpage' % video_id) + super(GenericIE, self).report_download_webpage(video_id) def report_following_redirect(self, new_url): """Report information extraction.""" @@ -1469,9 +1435,6 @@ class YoutubeSearchIE(InfoExtractor): _max_youtube_results = 1000 IE_NAME = u'youtube:search' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - def report_download_page(self, query, pagenum): """Report attempt to download search page with given number.""" query = query.decode(preferredencoding()) @@ -1546,9 +1509,6 @@ class GoogleSearchIE(InfoExtractor): _max_google_results = 1000 IE_NAME = u'video.google:search' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - def report_download_page(self, query, pagenum): """Report attempt to download playlist page with given number.""" query = query.decode(preferredencoding()) @@ -1630,9 +1590,6 @@ class YahooSearchIE(InfoExtractor): _max_yahoo_results = 1000 IE_NAME = u'video.yahoo:search' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - def report_download_page(self, query, pagenum): """Report attempt to download playlist page with given number.""" query = query.decode(preferredencoding()) @@ -1726,9 +1683,6 @@ class YoutubePlaylistIE(InfoExtractor): _MAX_RESULTS = 50 IE_NAME = u'youtube:playlist' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - @classmethod def suitable(cls, url): """Receives a URL and returns True if suitable for this IE.""" @@ -1873,9 +1827,6 @@ class YoutubeUserIE(InfoExtractor): _VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]' IE_NAME = u'youtube:user' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - def report_download_page(self, username, start_index): """Report attempt to download user page.""" self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' % @@ -1942,9 +1893,6 @@ class BlipTVUserIE(InfoExtractor): _PAGE_SIZE = 12 IE_NAME = u'blip.tv:user' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - def report_download_page(self, username, pagenum): """Report attempt to download user page.""" self.to_screen(u'user %s: Downloading video ids from page %d' % @@ -2020,10 +1968,6 @@ class DepositFilesIE(InfoExtractor): _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)' - def report_download_webpage(self, file_id): - """Report webpage download.""" - self.to_screen(u'%s: Downloading webpage' % file_id) - def _real_extract(self, url): file_id = url.split('/')[-1] # Rebuild url in english locale @@ -2274,9 +2218,6 @@ class MyVideoIE(InfoExtractor): _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*' IE_NAME = u'myvideo' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - def _real_extract(self,url): mobj = re.match(self._VALID_URL, url) if mobj is None: @@ -2712,9 +2653,6 @@ class SoundcloudIE(InfoExtractor): _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)' IE_NAME = u'soundcloud' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - def report_resolve(self, video_id): """Report information extraction.""" self.to_screen(u'%s: Resolving id' % video_id) @@ -2781,9 +2719,6 @@ class SoundcloudSetIE(InfoExtractor): _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)' IE_NAME = u'soundcloud' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - def report_resolve(self, video_id): """Report information extraction.""" self.to_screen(u'%s: Resolving id' % video_id) @@ -2861,7 +2796,7 @@ class InfoQIE(InfoExtractor): self.report_extraction(url) # Extract video URL - mobj = re.search(r"jsclassref='([^']*)'", webpage) + mobj = re.search(r"jsclassref ?= ?'([^']*)'", webpage) if mobj is None: self._downloader.report_error(u'unable to extract video url') return @@ -2904,9 +2839,6 @@ class MixcloudIE(InfoExtractor): _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)' IE_NAME = u'mixcloud' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - def report_download_json(self, file_id): """Report JSON download.""" self.to_screen(u'Downloading json') @@ -3014,10 +2946,6 @@ class StanfordOpenClassroomIE(InfoExtractor): _VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$' IE_NAME = u'stanfordoc' - def report_download_webpage(self, objid): - """Report information extraction.""" - self.to_screen(u'%s: Downloading webpage' % objid) - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) if mobj is None: @@ -3196,10 +3124,6 @@ class MTVIE(InfoExtractor): class YoukuIE(InfoExtractor): _VALID_URL = r'(?:http://)?v\.youku\.com/v_show/id_(?P<ID>[A-Za-z0-9]+)\.html' - def report_download_webpage(self, file_id): - """Report webpage download.""" - self.to_screen(u'%s: Downloading webpage' % file_id) - def _gen_sid(self): nowTime = int(time.time() * 1000) random1 = random.randint(1000,1998) @@ -3309,10 +3233,6 @@ class XNXXIE(InfoExtractor): VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM' VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&' - def report_webpage(self, video_id): - """Report information extraction""" - self.to_screen(u'%s: Downloading webpage' % video_id) - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) if mobj is None: @@ -3320,7 +3240,7 @@ class XNXXIE(InfoExtractor): return video_id = mobj.group(1) - self.report_webpage(video_id) + self.report_download_webpage(video_id) # Get webpage content try: @@ -3366,9 +3286,6 @@ class GooglePlusIE(InfoExtractor): _VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)' IE_NAME = u'plus.google' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - def report_extract_entry(self, url): """Report downloading extry""" self.to_screen(u'Downloading entry: %s' % url) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 16e74785f..d491402c6 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -141,7 +141,8 @@ def parseOpts(overrideArguments=None): general.add_option('--user-agent', dest='user_agent', help='specify a custom user agent', metavar='UA') general.add_option('--referer', - dest='referer', help='specify a custom referer', metavar='REF', default=None) + dest='referer', help='specify a custom referer, use if the video access is restricted to one domain', + metavar='REF', default=None) general.add_option('--list-extractors', action='store_true', dest='list_extractors', help='List all supported extractors and the URLs they would handle', default=False) |