diff options
Diffstat (limited to 'youtube_dl')
-rw-r--r-- | youtube_dl/YoutubeDL.py | 6 | ||||
-rw-r--r-- | youtube_dl/__init__.py | 4 | ||||
-rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/common.py | 10 | ||||
-rw-r--r-- | youtube_dl/extractor/generic.py | 6 | ||||
-rw-r--r-- | youtube_dl/extractor/pornotube.py | 4 | ||||
-rw-r--r-- | youtube_dl/extractor/viddler.py | 64 | ||||
-rw-r--r-- | youtube_dl/extractor/youporn.py | 4 | ||||
-rw-r--r-- | youtube_dl/extractor/youtube.py | 3 | ||||
-rw-r--r-- | youtube_dl/utils.py | 2 | ||||
-rw-r--r-- | youtube_dl/version.py | 2 |
11 files changed, 98 insertions, 8 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 856e9ac92..073a3837c 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -85,6 +85,8 @@ class YoutubeDL(object): cachedir: Location of the cache files in the filesystem. None to disable filesystem cache. noplaylist: Download single video instead of a playlist if in doubt. + age_limit: An integer representing the user's age in years. + Unsuitable videos for the given age are skipped. downloadarchive: File name of a file where all downloads are recorded. Videos already present in the file are not downloaded again. @@ -313,6 +315,10 @@ class YoutubeDL(object): dateRange = self.params.get('daterange', DateRange()) if date not in dateRange: return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) + age_limit = self.params.get('age_limit') + if age_limit is not None: + if age_limit < info_dict.get('age_limit', 0): + return u'Skipping "' + title + '" because it is age restricted' if self.in_download_archive(info_dict): return (u'%(title)s has already been recorded in archive' % info_dict) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index a680d7c55..ba5206387 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -188,6 +188,9 @@ def parseOpts(overrideArguments=None): selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None) selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None) selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False) + selection.add_option('--age-limit', metavar='YEARS', dest='age_limit', + help='download only videos suitable for the given age', + default=None, type=int) selection.add_option('--download-archive', metavar='FILE', dest='download_archive', help='Download only videos not present in the archive file. Record all downloaded videos in it.') @@ -634,6 +637,7 @@ def _real_main(argv=None): 'daterange': date, 'cachedir': opts.cachedir, 'youtube_print_sig_code': opts.youtube_print_sig_code, + 'age_limit': opts.age_limit, 'download_archive': opts.download_archive, }) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index d1b7e5f99..2b054e1c9 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -117,6 +117,7 @@ from .veehd import VeeHDIE from .veoh import VeohIE from .vevo import VevoIE from .vice import ViceIE +from .viddler import ViddlerIE from .videofyme import VideofyMeIE from .vimeo import VimeoIE, VimeoChannelIE from .vine import VineIE diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 69cdcdc1b..2a5a85dc6 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -54,6 +54,7 @@ class InfoExtractor(object): view_count: How many users have watched the video on the platform. urlhandle: [internal] The urlHandle to be used to download the file, like returned by urllib.request.urlopen + age_limit: Age restriction for the video, as an integer (years) formats: A list of dictionaries for each format available, it must be ordered from worst to best quality. Potential fields: * url Mandatory. The URL of the video file @@ -318,6 +319,15 @@ class InfoExtractor(object): self._og_regex('video')], html, name, **kargs) + def _rta_search(self, html): + # See http://www.rtalabel.org/index.php?content=howtofaq#single + if re.search(r'(?ix)<meta\s+name="rating"\s+' + r' content="RTA-5042-1996-1400-1577-RTA"', + html): + return 18 + return 0 + + class SearchInfoExtractor(InfoExtractor): """ Base class for paged search queries extractors. diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 764070635..7060c6f92 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -117,7 +117,7 @@ class GenericIE(InfoExtractor): except ValueError: # since this is the last-resort InfoExtractor, if # this error is thrown, it'll be thrown here - raise ExtractorError(u'Invalid URL: %s' % url) + raise ExtractorError(u'Failed to download URL: %s' % url) self.report_extraction(video_id) # Look for BrightCove: @@ -149,12 +149,12 @@ class GenericIE(InfoExtractor): # HTML5 video mobj = re.search(r'<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage, flags=re.DOTALL) if mobj is None: - raise ExtractorError(u'Invalid URL: %s' % url) + raise ExtractorError(u'Unsupported URL: %s' % url) # It's possible that one of the regexes # matched, but returned an empty group: if mobj.group(1) is None: - raise ExtractorError(u'Invalid URL: %s' % url) + raise ExtractorError(u'Did not find a valid video URL at %s' % url) video_url = mobj.group(1) video_url = compat_urlparse.urljoin(url, video_url) diff --git a/youtube_dl/extractor/pornotube.py b/youtube_dl/extractor/pornotube.py index add76a11e..5d770ec28 100644 --- a/youtube_dl/extractor/pornotube.py +++ b/youtube_dl/extractor/pornotube.py @@ -38,6 +38,7 @@ class PornotubeIE(InfoExtractor): VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by' upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, u'upload date', fatal=False) if upload_date: upload_date = unified_strdate(upload_date) + age_limit = self._rta_search(webpage) info = {'id': video_id, 'url': video_url, @@ -45,6 +46,7 @@ class PornotubeIE(InfoExtractor): 'upload_date': upload_date, 'title': video_title, 'ext': 'flv', - 'format': 'flv'} + 'format': 'flv', + 'age_limit': age_limit} return [info] diff --git a/youtube_dl/extractor/viddler.py b/youtube_dl/extractor/viddler.py new file mode 100644 index 000000000..12c84a985 --- /dev/null +++ b/youtube_dl/extractor/viddler.py @@ -0,0 +1,64 @@ +import json +import re + +from .common import InfoExtractor +from ..utils import ( + determine_ext, +) + + +class ViddlerIE(InfoExtractor): + _VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler.com)/(?:v|embed|player)/(?P<id>[0-9]+)' + _TEST = { + u"url": u"http://www.viddler.com/v/43903784", + u'file': u'43903784.mp4', + u'md5': u'fbbaedf7813e514eb7ca30410f439ac9', + u'info_dict': { + u"title": u"Video Made Easy", + u"uploader": u"viddler", + u"duration": 100.89, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + embed_url = mobj.group('domain') + u'/embed/' + video_id + webpage = self._download_webpage(embed_url, video_id) + + video_sources_code = self._search_regex( + r"(?ms)sources\s*:\s*(\{.*?\})", webpage, u'video URLs') + video_sources = json.loads(video_sources_code.replace("'", '"')) + + formats = [{ + 'url': video_url, + 'format': format_id, + } for video_url, format_id in video_sources.items()] + + title = self._html_search_regex( + r"title\s*:\s*'([^']*)'", webpage, u'title') + uploader = self._html_search_regex( + r"authorName\s*:\s*'([^']*)'", webpage, u'uploader', fatal=False) + duration_s = self._html_search_regex( + r"duration\s*:\s*([0-9.]*)", webpage, u'duration', fatal=False) + duration = float(duration_s) if duration_s else None + thumbnail = self._html_search_regex( + r"thumbnail\s*:\s*'([^']*)'", + webpage, u'thumbnail', fatal=False) + + info = { + '_type': 'video', + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'uploader': uploader, + 'duration': duration, + 'formats': formats, + } + + # TODO: Remove when #980 has been merged + info['formats'][-1]['ext'] = determine_ext(info['formats'][-1]['url']) + info.update(info['formats'][-1]) + + return info diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index c85fd4b5a..b1f93dd1b 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -51,6 +51,7 @@ class YouPornIE(InfoExtractor): req = compat_urllib_request.Request(url) req.add_header('Cookie', 'age_verified=1') webpage = self._download_webpage(req, video_id) + age_limit = self._rta_search(webpage) # Get JSON parameters json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, u'JSON parameters') @@ -115,7 +116,8 @@ class YouPornIE(InfoExtractor): 'ext': extension, 'format': format, 'thumbnail': thumbnail, - 'description': video_description + 'description': video_description, + 'age_limit': age_limit, }) if self._downloader.params.get('listformats', None): diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 1101011ea..b02ae2572 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1495,7 +1495,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 'description': video_description, 'player_url': player_url, 'subtitles': video_subtitles, - 'duration': video_duration + 'duration': video_duration, + 'age_limit': 18 if age_gate else 0, }) return results diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index a463049a4..de2654762 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -175,7 +175,7 @@ def compat_ord(c): compiled_regex_type = type(re.compile('')) std_headers = { - 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0', + 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', diff --git a/youtube_dl/version.py b/youtube_dl/version.py index e773e82da..08eda2197 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.10.04' +__version__ = '2013.10.06' |