aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl')
-rw-r--r--youtube_dl/YoutubeDL.py6
-rw-r--r--youtube_dl/__init__.py4
-rw-r--r--youtube_dl/extractor/__init__.py1
-rw-r--r--youtube_dl/extractor/common.py10
-rw-r--r--youtube_dl/extractor/generic.py6
-rw-r--r--youtube_dl/extractor/pornotube.py4
-rw-r--r--youtube_dl/extractor/viddler.py64
-rw-r--r--youtube_dl/extractor/youporn.py4
-rw-r--r--youtube_dl/extractor/youtube.py3
-rw-r--r--youtube_dl/utils.py2
-rw-r--r--youtube_dl/version.py2
11 files changed, 98 insertions, 8 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 856e9ac92..073a3837c 100644
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -85,6 +85,8 @@ class YoutubeDL(object):
cachedir: Location of the cache files in the filesystem.
None to disable filesystem cache.
noplaylist: Download single video instead of a playlist if in doubt.
+ age_limit: An integer representing the user's age in years.
+ Unsuitable videos for the given age are skipped.
downloadarchive: File name of a file where all downloads are recorded.
Videos already present in the file are not downloaded
again.
@@ -313,6 +315,10 @@ class YoutubeDL(object):
dateRange = self.params.get('daterange', DateRange())
if date not in dateRange:
return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
+ age_limit = self.params.get('age_limit')
+ if age_limit is not None:
+ if age_limit < info_dict.get('age_limit', 0):
+ return u'Skipping "' + title + '" because it is age restricted'
if self.in_download_archive(info_dict):
return (u'%(title)s has already been recorded in archive'
% info_dict)
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index a680d7c55..ba5206387 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -188,6 +188,9 @@ def parseOpts(overrideArguments=None):
selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None)
selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None)
selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False)
+ selection.add_option('--age-limit', metavar='YEARS', dest='age_limit',
+ help='download only videos suitable for the given age',
+ default=None, type=int)
selection.add_option('--download-archive', metavar='FILE',
dest='download_archive',
help='Download only videos not present in the archive file. Record all downloaded videos in it.')
@@ -634,6 +637,7 @@ def _real_main(argv=None):
'daterange': date,
'cachedir': opts.cachedir,
'youtube_print_sig_code': opts.youtube_print_sig_code,
+ 'age_limit': opts.age_limit,
'download_archive': opts.download_archive,
})
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index d1b7e5f99..2b054e1c9 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -117,6 +117,7 @@ from .veehd import VeeHDIE
from .veoh import VeohIE
from .vevo import VevoIE
from .vice import ViceIE
+from .viddler import ViddlerIE
from .videofyme import VideofyMeIE
from .vimeo import VimeoIE, VimeoChannelIE
from .vine import VineIE
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 69cdcdc1b..2a5a85dc6 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -54,6 +54,7 @@ class InfoExtractor(object):
view_count: How many users have watched the video on the platform.
urlhandle: [internal] The urlHandle to be used to download the file,
like returned by urllib.request.urlopen
+ age_limit: Age restriction for the video, as an integer (years)
formats: A list of dictionaries for each format available, it must
be ordered from worst to best quality. Potential fields:
* url Mandatory. The URL of the video file
@@ -318,6 +319,15 @@ class InfoExtractor(object):
self._og_regex('video')],
html, name, **kargs)
+ def _rta_search(self, html):
+ # See http://www.rtalabel.org/index.php?content=howtofaq#single
+ if re.search(r'(?ix)<meta\s+name="rating"\s+'
+ r' content="RTA-5042-1996-1400-1577-RTA"',
+ html):
+ return 18
+ return 0
+
+
class SearchInfoExtractor(InfoExtractor):
"""
Base class for paged search queries extractors.
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 764070635..7060c6f92 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -117,7 +117,7 @@ class GenericIE(InfoExtractor):
except ValueError:
# since this is the last-resort InfoExtractor, if
# this error is thrown, it'll be thrown here
- raise ExtractorError(u'Invalid URL: %s' % url)
+ raise ExtractorError(u'Failed to download URL: %s' % url)
self.report_extraction(video_id)
# Look for BrightCove:
@@ -149,12 +149,12 @@ class GenericIE(InfoExtractor):
# HTML5 video
mobj = re.search(r'<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage, flags=re.DOTALL)
if mobj is None:
- raise ExtractorError(u'Invalid URL: %s' % url)
+ raise ExtractorError(u'Unsupported URL: %s' % url)
# It's possible that one of the regexes
# matched, but returned an empty group:
if mobj.group(1) is None:
- raise ExtractorError(u'Invalid URL: %s' % url)
+ raise ExtractorError(u'Did not find a valid video URL at %s' % url)
video_url = mobj.group(1)
video_url = compat_urlparse.urljoin(url, video_url)
diff --git a/youtube_dl/extractor/pornotube.py b/youtube_dl/extractor/pornotube.py
index add76a11e..5d770ec28 100644
--- a/youtube_dl/extractor/pornotube.py
+++ b/youtube_dl/extractor/pornotube.py
@@ -38,6 +38,7 @@ class PornotubeIE(InfoExtractor):
VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by'
upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, u'upload date', fatal=False)
if upload_date: upload_date = unified_strdate(upload_date)
+ age_limit = self._rta_search(webpage)
info = {'id': video_id,
'url': video_url,
@@ -45,6 +46,7 @@ class PornotubeIE(InfoExtractor):
'upload_date': upload_date,
'title': video_title,
'ext': 'flv',
- 'format': 'flv'}
+ 'format': 'flv',
+ 'age_limit': age_limit}
return [info]
diff --git a/youtube_dl/extractor/viddler.py b/youtube_dl/extractor/viddler.py
new file mode 100644
index 000000000..12c84a985
--- /dev/null
+++ b/youtube_dl/extractor/viddler.py
@@ -0,0 +1,64 @@
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+)
+
+
+class ViddlerIE(InfoExtractor):
+ _VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler.com)/(?:v|embed|player)/(?P<id>[0-9]+)'
+ _TEST = {
+ u"url": u"http://www.viddler.com/v/43903784",
+ u'file': u'43903784.mp4',
+ u'md5': u'fbbaedf7813e514eb7ca30410f439ac9',
+ u'info_dict': {
+ u"title": u"Video Made Easy",
+ u"uploader": u"viddler",
+ u"duration": 100.89,
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ embed_url = mobj.group('domain') + u'/embed/' + video_id
+ webpage = self._download_webpage(embed_url, video_id)
+
+ video_sources_code = self._search_regex(
+ r"(?ms)sources\s*:\s*(\{.*?\})", webpage, u'video URLs')
+ video_sources = json.loads(video_sources_code.replace("'", '"'))
+
+ formats = [{
+ 'url': video_url,
+ 'format': format_id,
+ } for video_url, format_id in video_sources.items()]
+
+ title = self._html_search_regex(
+ r"title\s*:\s*'([^']*)'", webpage, u'title')
+ uploader = self._html_search_regex(
+ r"authorName\s*:\s*'([^']*)'", webpage, u'uploader', fatal=False)
+ duration_s = self._html_search_regex(
+ r"duration\s*:\s*([0-9.]*)", webpage, u'duration', fatal=False)
+ duration = float(duration_s) if duration_s else None
+ thumbnail = self._html_search_regex(
+ r"thumbnail\s*:\s*'([^']*)'",
+ webpage, u'thumbnail', fatal=False)
+
+ info = {
+ '_type': 'video',
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'duration': duration,
+ 'formats': formats,
+ }
+
+ # TODO: Remove when #980 has been merged
+ info['formats'][-1]['ext'] = determine_ext(info['formats'][-1]['url'])
+ info.update(info['formats'][-1])
+
+ return info
diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py
index c85fd4b5a..b1f93dd1b 100644
--- a/youtube_dl/extractor/youporn.py
+++ b/youtube_dl/extractor/youporn.py
@@ -51,6 +51,7 @@ class YouPornIE(InfoExtractor):
req = compat_urllib_request.Request(url)
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)
+ age_limit = self._rta_search(webpage)
# Get JSON parameters
json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, u'JSON parameters')
@@ -115,7 +116,8 @@ class YouPornIE(InfoExtractor):
'ext': extension,
'format': format,
'thumbnail': thumbnail,
- 'description': video_description
+ 'description': video_description,
+ 'age_limit': age_limit,
})
if self._downloader.params.get('listformats', None):
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 1101011ea..b02ae2572 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1495,7 +1495,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'description': video_description,
'player_url': player_url,
'subtitles': video_subtitles,
- 'duration': video_duration
+ 'duration': video_duration,
+ 'age_limit': 18 if age_gate else 0,
})
return results
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index a463049a4..de2654762 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -175,7 +175,7 @@ def compat_ord(c):
compiled_regex_type = type(re.compile(''))
std_headers = {
- 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
+ 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate',
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index e773e82da..08eda2197 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
-__version__ = '2013.10.04'
+__version__ = '2013.10.06'